diff --git a/src/debug_settings.h b/src/debug_settings.h index 3547cca2a6..08e5d23e4d 100644 --- a/src/debug_settings.h +++ b/src/debug_settings.h @@ -34,6 +34,7 @@ enum NewGRFOptimiserFlags { NGOF_NO_OPT_VARACT2_EXPENSIVE_VARS = 3, NGOF_NO_OPT_VARACT2_SIMPLIFY_STORES = 4, NGOF_NO_OPT_VARACT2_ADJUST_ORDERING = 5, + NGOF_NO_OPT_VARACT2_INSERT_JUMPS = 6, }; inline bool HasGrfOptimiserFlag(NewGRFOptimiserFlags flag) diff --git a/src/newgrf.cpp b/src/newgrf.cpp index 7db7a3c34f..a5046c9cc5 100644 --- a/src/newgrf.cpp +++ b/src/newgrf.cpp @@ -136,7 +136,6 @@ public: btree::btree_map group_temp_store_variable_tracking; UniformArenaAllocator group_temp_store_variable_tracking_storage; std::vector dead_store_elimination_candidates; - std::vector pending_expensive_var_checks; VarAction2GroupVariableTracking *GetVarAction2GroupVariableTracking(const SpriteGroup *group, bool make_new) { @@ -166,7 +165,6 @@ public: this->group_temp_store_variable_tracking.clear(); this->group_temp_store_variable_tracking_storage.EmptyArena(); this->dead_store_elimination_candidates.clear(); - this->pending_expensive_var_checks.clear(); } /** @@ -6625,6 +6623,7 @@ static void OptimiseVarAction2Adjust(VarAction2OptimiseState &state, const GrfSp } if (adjust.operation == DSGA_OP_MUL && adjust.variable != 0x7E) { state.inference |= VA2AIF_MUL_BOOL; + adjust.adjust_flags |= DSGAF_BOOL_MUL_HINT; } } } else { @@ -6924,6 +6923,7 @@ static void OptimiseVarAction2Adjust(VarAction2OptimiseState &state, const GrfSp } if ((prev_inference & VA2AIF_ONE_OR_ZERO) || (non_const_var_inference & VA2AIF_ONE_OR_ZERO)) { state.inference |= VA2AIF_MUL_BOOL; + adjust.adjust_flags |= DSGAF_BOOL_MUL_HINT; } break; } @@ -7199,13 +7199,22 @@ static bool OptimiseVarAction2DeterministicSpriteGroupExpensiveVarsInner(Determi } int insert_pos = start; uint32 and_mask = 0; + uint condition_depth = 0; + bool seen_first = false; for (int j = end; j >= start; j--) { DeterministicSpriteGroupAdjust &adjust = group->adjusts[j]; + if (seen_first && adjust.operation == DSGA_OP_JZ && condition_depth > 0) { + /* Do not insert the STO_NC inside a conditional block when it is also needed outside the block */ + condition_depth--; + insert_pos = j; + } + if (seen_first && adjust.adjust_flags & DSGAF_END_BLOCK) condition_depth++; if (adjust.variable == target_var && adjust.parameter == target_param) { and_mask |= adjust.and_mask << adjust.shift_num; adjust.variable = 0x7D; adjust.parameter = bit; insert_pos = j; + seen_first = true; } } DeterministicSpriteGroupAdjust load = {}; @@ -7386,7 +7395,111 @@ static void OptimiseVarAction2DeterministicSpriteGroupAdjustOrdering(Determinist }); adjust.operation = DSGA_OP_RST; - adjust.adjust_flags &= ~DSGAF_SKIP_ON_ZERO; + adjust.adjust_flags &= ~(DSGAF_SKIP_ON_ZERO | DSGAF_BOOL_MUL_HINT); + } + } + } +} + +static bool TryCombineTempStoreLoadWithStoreSourceAdjust(DeterministicSpriteGroupAdjust &target, const DeterministicSpriteGroupAdjust *var_src, bool inverted) +{ + DeterministicSpriteGroupAdjustType var_src_type = var_src->type; + if (inverted) { + switch (var_src_type) { + case DSGA_TYPE_EQ: + var_src_type = DSGA_TYPE_NEQ; + break; + case DSGA_TYPE_NEQ: + var_src_type = DSGA_TYPE_EQ; + break; + default: + /* Don't try to handle this case */ + return false; + } + } + if (target.type == DSGA_TYPE_NONE && target.shift_num == 0 && (target.and_mask == 0xFFFFFFFF || (IsConstantComparisonAdjustType(var_src_type) && (target.and_mask & 1)))) { + target.type = var_src_type; + target.variable = var_src->variable; + target.shift_num = var_src->shift_num; + target.parameter = var_src->parameter; + target.and_mask = var_src->and_mask; + target.add_val = var_src->add_val; + target.divmod_val = var_src->divmod_val; + return true; + } else if (IsConstantComparisonAdjustType(target.type) && target.shift_num == 0 && (target.and_mask & 1) && target.add_val == 0 && + IsConstantComparisonAdjustType(var_src_type)) { + /* DSGA_TYPE_EQ/NEQ on target are OK if add_val is 0 because this is a boolean invert/convert of the incoming DSGA_TYPE_EQ/NEQ */ + if (target.type == DSGA_TYPE_EQ) { + target.type = InvertConstantComparisonAdjustType(var_src_type); + } else { + target.type = var_src_type; + } + target.variable = var_src->variable; + target.shift_num = var_src->shift_num; + target.parameter = var_src->parameter; + target.and_mask = var_src->and_mask; + target.add_val = var_src->add_val; + target.divmod_val = var_src->divmod_val; + return true; + } else if (var_src_type == DSGA_TYPE_NONE && (target.shift_num + var_src->shift_num) < 32) { + target.variable = var_src->variable; + target.parameter = var_src->parameter; + target.and_mask &= var_src->and_mask >> target.shift_num; + target.shift_num += var_src->shift_num; + return true; + } + return false; +} + +static void OptimiseVarAction2DeterministicSpriteGroupInsertJumps(DeterministicSpriteGroup *group) +{ + if (HasGrfOptimiserFlag(NGOF_NO_OPT_VARACT2_INSERT_JUMPS)) return; + + for (int i = (int)group->adjusts.size() - 1; i >= 1; i--) { + DeterministicSpriteGroupAdjust &adjust = group->adjusts[i]; + + if (adjust.adjust_flags & DSGAF_BOOL_MUL_HINT) { + std::bitset<256> ok_stores; + int j = i - 1; + while (j >= 0) { + DeterministicSpriteGroupAdjust &prev = group->adjusts[j]; + + /* Don't try to skip over: unpredictable or special stores, procedure calls, permanent stores, or another jump */ + if (prev.operation == DSGA_OP_STO && (prev.type != DSGA_TYPE_NONE || prev.variable != 0x1A || prev.shift_num != 0 || prev.and_mask >= 0x100)) break; + if (prev.operation == DSGA_OP_STO_NC && prev.divmod_val >= 0x100) break; + if (prev.operation == DSGA_OP_STOP) break; + if (prev.operation == DSGA_OP_JZ) break; + if (prev.variable == 0x7E) break; + + /* Reached a store which can't be skipped over because the value is needed later */ + if (prev.operation == DSGA_OP_STO && !ok_stores[prev.and_mask]) break; + if (prev.operation == DSGA_OP_STO_NC && !ok_stores[prev.divmod_val]) break; + + if (prev.variable == 0x7D && (prev.adjust_flags & DSGAF_LAST_VAR_READ)) { + /* The stored value is no longer needed after this, we can skip the corresponding store */ + ok_stores.set(prev.parameter & 0xFF, true); + } + + j--; + } + if (j < i - 1) { + DeterministicSpriteGroupAdjust current = adjust; + current.operation = DSGA_OP_JZ; + current.adjust_flags &= ~(DSGAF_BOOL_MUL_HINT | DSGAF_SKIP_ON_ZERO); + group->adjusts[i - 1].adjust_flags |= DSGAF_END_BLOCK; + group->adjusts.erase(group->adjusts.begin() + i); + if (j >= 0 && current.variable == 0x7D && (current.adjust_flags & DSGAF_LAST_VAR_READ)) { + DeterministicSpriteGroupAdjust &prev = group->adjusts[j]; + if (prev.operation == DSGA_OP_STO_NC && prev.divmod_val == (current.parameter & 0xFF) && + TryCombineTempStoreLoadWithStoreSourceAdjust(current, &prev, false)) { + /* Managed to extract source from immediately prior STO_NC, which can now be removed */ + group->adjusts.erase(group->adjusts.begin() + j); + j--; + i--; + } + } + group->adjusts.insert(group->adjusts.begin() + j + 1, current); + i++; } } } @@ -7526,7 +7639,7 @@ static void OptimiseVarAction2DeterministicSpriteGroup(VarAction2OptimiseState & if (state.check_expensive_vars && !HasGrfOptimiserFlag(NGOF_NO_OPT_VARACT2_EXPENSIVE_VARS)) { if (dse_candidate) { - _cur.pending_expensive_var_checks.push_back(group); + group->dsg_flags |= DSGF_CHECK_EXPENSIVE_VARS; } else { OptimiseVarAction2DeterministicSpriteGroupExpensiveVars(group); } @@ -7572,51 +7685,7 @@ static std::bitset<256> HandleVarAction2DeadStoreElimination(DeterministicSprite if (var_src != nullptr && var_src->variable != 0x7C) { /* Don't use variable 7C as we're not checking for store perms which may clobber the value here */ - DeterministicSpriteGroupAdjustType var_src_type = var_src->type; - if (inverted) { - switch (var_src_type) { - case DSGA_TYPE_EQ: - var_src_type = DSGA_TYPE_NEQ; - break; - case DSGA_TYPE_NEQ: - var_src_type = DSGA_TYPE_EQ; - break; - default: - /* Don't try to handle this case */ - return false; - } - } - if (target.type == DSGA_TYPE_NONE && target.shift_num == 0 && (target.and_mask == 0xFFFFFFFF || (IsConstantComparisonAdjustType(var_src_type) && (target.and_mask & 1)))) { - target.type = var_src_type; - target.variable = var_src->variable; - target.shift_num = var_src->shift_num; - target.parameter = var_src->parameter; - target.and_mask = var_src->and_mask; - target.add_val = var_src->add_val; - target.divmod_val = var_src->divmod_val; - return true; - } else if (IsConstantComparisonAdjustType(target.type) && target.shift_num == 0 && (target.and_mask & 1) && target.add_val == 0 && - IsConstantComparisonAdjustType(var_src_type)) { - /* DSGA_TYPE_EQ/NEQ on target are OK if add_val is 0 because this is a boolean invert/convert of the incoming DSGA_TYPE_EQ/NEQ */ - if (target.type == DSGA_TYPE_EQ) { - target.type = InvertConstantComparisonAdjustType(var_src_type); - } else { - target.type = var_src_type; - } - target.variable = var_src->variable; - target.shift_num = var_src->shift_num; - target.parameter = var_src->parameter; - target.and_mask = var_src->and_mask; - target.add_val = var_src->add_val; - target.divmod_val = var_src->divmod_val; - return true; - } else if (var_src_type == DSGA_TYPE_NONE && (target.shift_num + var_src->shift_num) < 32) { - target.variable = var_src->variable; - target.parameter = var_src->parameter; - target.and_mask &= var_src->and_mask >> target.shift_num; - target.shift_num += var_src->shift_num; - return true; - } + if (TryCombineTempStoreLoadWithStoreSourceAdjust(target, var_src, inverted)) return true; } return false; }; @@ -7778,7 +7847,10 @@ static std::bitset<256> HandleVarAction2DeadStoreElimination(DeterministicSprite break; } } - if (add) substitution_candidates.push_back(adjust.parameter | (i << 8)); + if (add) { + substitution_candidates.push_back(adjust.parameter | (i << 8)); + const_cast(adjust).adjust_flags |= DSGAF_LAST_VAR_READ; + } } else { bits.set(adjust.parameter, true); } @@ -7850,10 +7922,12 @@ static void HandleVarAction2OptimisationPasses() OptimiseVarAction2DeterministicSpriteGroupSimplifyStores(group); OptimiseVarAction2DeterministicSpriteGroupAdjustOrdering(group); - } - - for (DeterministicSpriteGroup *group : _cur.pending_expensive_var_checks) { - OptimiseVarAction2DeterministicSpriteGroupExpensiveVars(group); + if (!(group->dsg_flags & DSGF_NO_DSE)) { + OptimiseVarAction2DeterministicSpriteGroupInsertJumps(group); + } + if (group->dsg_flags & DSGF_CHECK_EXPENSIVE_VARS) { + OptimiseVarAction2DeterministicSpriteGroupExpensiveVars(group); + } } } diff --git a/src/newgrf_spritegroup.cpp b/src/newgrf_spritegroup.cpp index b34e0ef0c7..957125ec47 100644 --- a/src/newgrf_spritegroup.cpp +++ b/src/newgrf_spritegroup.cpp @@ -18,6 +18,7 @@ #include "newgrf_extension.h" #include "newgrf_industrytiles_analysis.h" #include "scope.h" +#include "debug_settings.h" #include "safeguards.h" @@ -152,10 +153,16 @@ static inline uint32 GetVariable(const ResolverObject &object, ScopeResolver *sc return &this->default_scope; } +struct ConditionalNestingState { + uint depth = 0; + uint skip_until_depth = 0; + bool skipping = false; +}; + /* Evaluate an adjustment for a variable of the given size. * U is the unsigned type and S is the signed type to use. */ template -static U EvalAdjustT(const DeterministicSpriteGroupAdjust &adjust, ScopeResolver *scope, U last_value, uint32 value) +static U EvalAdjustT(const DeterministicSpriteGroupAdjust &adjust, ScopeResolver *scope, U last_value, uint32 value, ConditionalNestingState *cond_nesting_state = nullptr) { value >>= adjust.shift_num; value &= adjust.and_mask; @@ -201,6 +208,17 @@ static U EvalAdjustT(const DeterministicSpriteGroupAdjust &adjust, ScopeResolver case DSGA_OP_RSUB: return value - last_value; case DSGA_OP_STO_NC: _temp_store.StoreValue(adjust.divmod_val, (S)value); return last_value; case DSGA_OP_ABS: return ((S)last_value < 0) ? -((S)last_value) : (S)last_value; + case DSGA_OP_JZ: { + if (value == 0 && cond_nesting_state != nullptr) { + /* Jump */ + cond_nesting_state->skip_until_depth = cond_nesting_state->depth - 1; + cond_nesting_state->skipping = true; + return 0; + } else { + /* Don't jump */ + return last_value; + } + } default: return value; } } @@ -227,7 +245,19 @@ const SpriteGroup *DeterministicSpriteGroup::Resolve(ResolverObject &object) con ScopeResolver *scope = object.GetScope(this->var_scope); + ConditionalNestingState conditional_nesting = {}; + for (const auto &adjust : this->adjusts) { + if (adjust.adjust_flags & DSGAF_END_BLOCK) { + conditional_nesting.depth--; + if (conditional_nesting.skipping && conditional_nesting.skip_until_depth == conditional_nesting.depth) { + /* End of block that was skipped */ + conditional_nesting.skipping = false; + continue; + } + } + if (adjust.operation == DSGA_OP_JZ) conditional_nesting.depth++; + if (conditional_nesting.skipping) continue; if ((adjust.adjust_flags & DSGAF_SKIP_ON_ZERO) && (last_value == 0)) continue; if ((adjust.adjust_flags & DSGAF_SKIP_ON_LSB_SET) && (last_value & 1) != 0) continue; @@ -256,9 +286,9 @@ const SpriteGroup *DeterministicSpriteGroup::Resolve(ResolverObject &object) con } switch (this->size) { - case DSG_SIZE_BYTE: value = EvalAdjustT (adjust, scope, last_value, value); break; - case DSG_SIZE_WORD: value = EvalAdjustT(adjust, scope, last_value, value); break; - case DSG_SIZE_DWORD: value = EvalAdjustT(adjust, scope, last_value, value); break; + case DSG_SIZE_BYTE: value = EvalAdjustT (adjust, scope, last_value, value, &conditional_nesting); break; + case DSG_SIZE_WORD: value = EvalAdjustT(adjust, scope, last_value, value, &conditional_nesting); break; + case DSG_SIZE_DWORD: value = EvalAdjustT(adjust, scope, last_value, value, &conditional_nesting); break; default: NOT_REACHED(); } last_value = value; @@ -685,6 +715,7 @@ static const char *_dsg_op_special_names[] { "RSUB", "STO_NC", "ABS", + "JZ", }; static_assert(lengthof(_dsg_op_special_names) == DSGA_OP_SPECIAL_END - DSGA_OP_TERNARY); @@ -708,13 +739,18 @@ static char *GetAdjustOperationName(char *str, const char *last, DeterministicSp return str + seprintf(str, last, "\?\?\?(0x%X)", operation); } -static char *DumpSpriteGroupAdjust(char *p, const char *last, const DeterministicSpriteGroupAdjust &adjust, int padding, uint32 &highlight_tag) +static char *DumpSpriteGroupAdjust(char *p, const char *last, const DeterministicSpriteGroupAdjust &adjust, int padding, uint32 &highlight_tag, uint &conditional_indent) { if (adjust.variable == 0x7D) { /* Temp storage load */ highlight_tag = (1 << 16) | (adjust.parameter & 0xFFFF); } + p += seprintf(p, last, "%*s", padding, ""); + for (uint i = 0; i < conditional_indent; i++) { + p += seprintf(p, last, "> "); + } + auto append_flags = [&]() { if (adjust.adjust_flags & DSGAF_SKIP_ON_ZERO) { p += seprintf(p, last, ", skip on zero"); @@ -722,15 +758,31 @@ static char *DumpSpriteGroupAdjust(char *p, const char *last, const Deterministi if (adjust.adjust_flags & DSGAF_SKIP_ON_LSB_SET) { p += seprintf(p, last, ", skip on LSB set"); } + if (adjust.adjust_flags & DSGAF_LAST_VAR_READ && HasBit(_misc_debug_flags, MDF_NEWGRF_SG_DUMP_MORE_DETAIL)) { + p += seprintf(p, last, ", last var read"); + } + if (adjust.adjust_flags & DSGAF_BOOL_MUL_HINT && HasBit(_misc_debug_flags, MDF_NEWGRF_SG_DUMP_MORE_DETAIL)) { + p += seprintf(p, last, ", bool mul hint"); + } + if (adjust.adjust_flags & DSGAF_END_BLOCK) { + p += seprintf(p, last, ", end block"); + } }; + if (adjust.operation == DSGA_OP_JZ) { + conditional_indent++; + } + if (adjust.adjust_flags & DSGAF_END_BLOCK) { + conditional_indent--; + } + if (adjust.operation == DSGA_OP_TERNARY) { - p += seprintf(p, last, "%*sTERNARY: true: %X, false: %X", padding, "", adjust.and_mask, adjust.add_val); + p += seprintf(p, last, "TERNARY: true: %X, false: %X", adjust.and_mask, adjust.add_val); append_flags(); return p; } if (adjust.operation == DSGA_OP_ABS) { - p += seprintf(p, last, "%*sABS", padding, ""); + p += seprintf(p, last, "ABS"); append_flags(); return p; } @@ -738,7 +790,7 @@ static char *DumpSpriteGroupAdjust(char *p, const char *last, const Deterministi /* Temp storage store */ highlight_tag = (1 << 16) | (adjust.and_mask & 0xFFFF); } - p += seprintf(p, last, "%*svar: %X", padding, "", adjust.variable); + p += seprintf(p, last, "var: %X", adjust.variable); if (adjust.variable == A2VRI_VEHICLE_CURRENT_SPEED_SCALED) { p += seprintf(p, last, " (current_speed_scaled)"); } else if (adjust.variable >= 0x100) { @@ -860,12 +912,14 @@ void SpriteGroupDumper::DumpSpriteGroup(const SpriteGroup *sg, int padding, uint if (dsg->dsg_flags & DSGF_DSE_RECURSIVE_DISABLE) p += seprintf(p, lastof(this->buffer), ", DSE_RD"); if (dsg->dsg_flags & DSGF_VAR_TRACKING_PENDING) p += seprintf(p, lastof(this->buffer), ", VAR_PENDING"); if (dsg->dsg_flags & DSGF_REQUIRES_VAR1C) p += seprintf(p, lastof(this->buffer), ", REQ_1C"); + if (dsg->dsg_flags & DSGF_CHECK_EXPENSIVE_VARS) p += seprintf(p, lastof(this->buffer), ", CHECK_EXP_VAR"); } print(); emit_start(); padding += 2; + uint conditional_indent = 0; for (const auto &adjust : (*adjusts)) { - DumpSpriteGroupAdjust(this->buffer, lastof(this->buffer), adjust, padding, highlight_tag); + DumpSpriteGroupAdjust(this->buffer, lastof(this->buffer), adjust, padding, highlight_tag, conditional_indent); print(); if (adjust.variable == 0x7E && adjust.subroutine != nullptr) { this->DumpSpriteGroup(adjust.subroutine, padding + 5, 0); diff --git a/src/newgrf_spritegroup.h b/src/newgrf_spritegroup.h index 84e940899e..2a683efeb0 100644 --- a/src/newgrf_spritegroup.h +++ b/src/newgrf_spritegroup.h @@ -206,6 +206,7 @@ enum DeterministicSpriteGroupAdjustOperation : uint8 { DSGA_OP_RSUB, ///< b - a DSGA_OP_STO_NC, ///< store b into temporary storage, indexed by c. return a DSGA_OP_ABS, ///< abs(a) + DSGA_OP_JZ, ///< jump to adjust after DSGAF_END_BLOCK marker (taking into account nesting) if b is zero. return 0 if jumped, return a if not jumped DSGA_OP_SPECIAL_END, }; @@ -217,6 +218,9 @@ enum DeterministicSpriteGroupAdjustFlags : uint8 { DSGAF_NONE = 0, DSGAF_SKIP_ON_ZERO = 1 << 0, DSGAF_SKIP_ON_LSB_SET = 1 << 1, + DSGAF_LAST_VAR_READ = 1 << 2, + DSGAF_BOOL_MUL_HINT = 1 << 3, + DSGAF_END_BLOCK = 1 << 4, }; DECLARE_ENUM_AS_BIT_SET(DeterministicSpriteGroupAdjustFlags); @@ -441,6 +445,7 @@ enum DeterministicSpriteGroupFlags : uint8 { DSGF_DSE_RECURSIVE_DISABLE = 1 << 1, DSGF_VAR_TRACKING_PENDING = 1 << 2, DSGF_REQUIRES_VAR1C = 1 << 3, + DSGF_CHECK_EXPENSIVE_VARS = 1 << 4, }; DECLARE_ENUM_AS_BIT_SET(DeterministicSpriteGroupFlags)