diff --git a/src/debug_settings.h b/src/debug_settings.h index b72a4f124d..3f28ad89aa 100644 --- a/src/debug_settings.h +++ b/src/debug_settings.h @@ -36,6 +36,7 @@ enum NewGRFOptimiserFlags { NGOF_NO_OPT_VARACT2_ADJUST_ORDERING = 5, NGOF_NO_OPT_VARACT2_INSERT_JUMPS = 6, NGOF_NO_OPT_VARACT2_CB_QUICK_EXIT = 7, + NGOF_NO_OPT_VARACT2_PROC_INLINE = 8, }; inline bool HasGrfOptimiserFlag(NewGRFOptimiserFlags flag) diff --git a/src/newgrf.cpp b/src/newgrf.cpp index 248b5f0d4e..e3727e7ab0 100644 --- a/src/newgrf.cpp +++ b/src/newgrf.cpp @@ -5722,6 +5722,8 @@ static void NewSpriteGroup(ByteReader *buf) if (unlikely(HasBit(_misc_debug_flags, MDF_NEWGRF_SG_SAVE_RAW))) { shadow = &(_deterministic_sg_shadows[group]); } + static std::vector current_adjusts; + current_adjusts.clear(); VarAction2OptimiseState va2_opt_state; /* The initial value is always the constant 0 */ @@ -5731,7 +5733,7 @@ static void NewSpriteGroup(ByteReader *buf) /* Loop through the var adjusts. Unfortunately we don't know how many we have * from the outset, so we shall have to keep reallocing. */ do { - DeterministicSpriteGroupAdjust &adjust = group->adjusts.emplace_back(); + DeterministicSpriteGroupAdjust &adjust = current_adjusts.emplace_back(); /* The first var adjust doesn't have an operation specified, so we set it to add. */ adjust.operation = first_adjust ? DSGA_OP_ADD : (DeterministicSpriteGroupAdjustOperation)buf->ReadByte(); @@ -5784,11 +5786,16 @@ static void NewSpriteGroup(ByteReader *buf) if (adjust.subroutine != nullptr) adjust.subroutine = PruneTargetSpriteGroup(adjust.subroutine); } - OptimiseVarAction2Adjust(va2_opt_state, feature, varsize, group, adjust); + OptimiseVarAction2PreCheckAdjust(va2_opt_state, adjust); /* Continue reading var adjusts while bit 5 is set. */ } while (HasBit(varadjust, 5)); + for (const DeterministicSpriteGroupAdjust &adjust : current_adjusts) { + group->adjusts.push_back(adjust); + OptimiseVarAction2Adjust(va2_opt_state, feature, varsize, group, group->adjusts.back()); + } + std::vector ranges; ranges.resize(buf->ReadByte()); for (uint i = 0; i < ranges.size(); i++) { @@ -5817,7 +5824,7 @@ static void NewSpriteGroup(ByteReader *buf) ProcessDeterministicSpriteGroupRanges(ranges, group->ranges, group->default_group); - OptimiseVarAction2DeterministicSpriteGroup(va2_opt_state, feature, varsize, group); + OptimiseVarAction2DeterministicSpriteGroup(va2_opt_state, feature, varsize, group, current_adjusts); break; } diff --git a/src/newgrf_internal.h b/src/newgrf_internal.h index ce5ef0801c..effc059a70 100644 --- a/src/newgrf_internal.h +++ b/src/newgrf_internal.h @@ -72,6 +72,8 @@ public: UniformArenaAllocator group_temp_store_variable_tracking_storage; btree::btree_map procedure_annotations; UniformArenaAllocator procedure_annotations_storage; + btree::btree_map *> inlinable_adjust_groups; + UniformArenaAllocator), 1024> inlinable_adjust_groups_storage; std::vector dead_store_elimination_candidates; VarAction2GroupVariableTracking *GetVarAction2GroupVariableTracking(const SpriteGroup *group, bool make_new) @@ -98,6 +100,19 @@ public: } } + std::vector *GetInlinableGroupAdjusts(const DeterministicSpriteGroup *group, bool make_new) + { + if (make_new) { + std::vector *&ptr = this->inlinable_adjust_groups[group]; + if (!ptr) ptr = new (this->inlinable_adjust_groups_storage.Allocate()) std::vector(); + return ptr; + } else { + auto iter = this->inlinable_adjust_groups.find(group); + if (iter != this->inlinable_adjust_groups.end()) return iter->second; + return nullptr; + } + } + /** Clear temporary data before processing the next file in the current loading stage */ void ClearDataForNextFile() { @@ -114,6 +129,8 @@ public: this->group_temp_store_variable_tracking_storage.EmptyArena(); this->procedure_annotations.clear(); this->procedure_annotations_storage.EmptyArena(); + this->inlinable_adjust_groups.clear(); + this->inlinable_adjust_groups_storage.EmptyArena(); this->dead_store_elimination_candidates.clear(); } @@ -234,6 +251,7 @@ struct VarAction2OptimiseState { VarAction2InferenceBackup inference_backup; VarAction2GroupVariableTracking *var_tracking = nullptr; bool seen_procedure_call = false; + bool var_1C_present = false; bool check_expensive_vars = false; bool enable_dse = false; uint default_variable_version = 0; @@ -249,9 +267,16 @@ struct VarAction2OptimiseState { } }; +inline void OptimiseVarAction2PreCheckAdjust(VarAction2OptimiseState &state, const DeterministicSpriteGroupAdjust &adjust) +{ + uint16 variable = adjust.variable; + if (variable == 0x7B) variable = adjust.parameter; + if (variable == 0x1C) state.var_1C_present = true; +} + const SpriteGroup *PruneTargetSpriteGroup(const SpriteGroup *result); void OptimiseVarAction2Adjust(VarAction2OptimiseState &state, const GrfSpecFeature feature, const byte varsize, DeterministicSpriteGroup *group, DeterministicSpriteGroupAdjust &adjust); -void OptimiseVarAction2DeterministicSpriteGroup(VarAction2OptimiseState &state, const GrfSpecFeature feature, const byte varsize, DeterministicSpriteGroup *group); +void OptimiseVarAction2DeterministicSpriteGroup(VarAction2OptimiseState &state, const GrfSpecFeature feature, const byte varsize, DeterministicSpriteGroup *group, std::vector &saved_adjusts); void HandleVarAction2OptimisationPasses(); #endif /* NEWGRF_INTERNAL_H */ diff --git a/src/newgrf_optimiser.cpp b/src/newgrf_optimiser.cpp index e07f5061a4..4fa0ce392b 100644 --- a/src/newgrf_optimiser.cpp +++ b/src/newgrf_optimiser.cpp @@ -742,6 +742,59 @@ void OptimiseVarAction2Adjust(VarAction2OptimiseState &state, const GrfSpecFeatu } }; + auto try_inline_procedure = [&]() -> bool { + if (adjust.operation != DSGA_OP_RST || adjust.type != DSGA_TYPE_NONE || state.var_1C_present) return false; + + const SpriteGroup *subroutine = adjust.subroutine; + + if (subroutine == nullptr || subroutine->type != SGT_DETERMINISTIC || subroutine->feature != group->feature) { + return false; + } + + const DeterministicSpriteGroup *dsg = (const DeterministicSpriteGroup*)subroutine; + if (!(dsg->dsg_flags & DSGF_INLINE_CANDIDATE) || dsg->var_scope != group->var_scope || dsg->size != group->size) return false; + + std::vector *proc = _cur.GetInlinableGroupAdjusts(dsg, false); + if (proc == nullptr) return false; + + byte shift_num = adjust.shift_num; + uint32 and_mask = adjust.and_mask; + + // Initial value state is 0 + replace_with_constant_load(0); + + for (const DeterministicSpriteGroupAdjust &proc_adjust : *proc) { + group->adjusts.push_back(proc_adjust); + OptimiseVarAction2Adjust(state, feature, varsize, group, group->adjusts.back()); + } + if (shift_num != 0) { + DeterministicSpriteGroupAdjust &adj = group->adjusts.emplace_back(); + adj.operation = DSGA_OP_SHR; + adj.variable = 0x1A; + adj.shift_num = 0; + adj.type = DSGA_TYPE_NONE; + adj.and_mask = shift_num; + adj.add_val = 0; + adj.divmod_val = 0; + OptimiseVarAction2Adjust(state, feature, varsize, group, group->adjusts.back()); + } + if (and_mask != 0xFFFFFFFF) { + DeterministicSpriteGroupAdjust &adj = group->adjusts.emplace_back(); + adj.operation = DSGA_OP_AND; + adj.variable = 0x1A; + adj.shift_num = 0; + adj.type = DSGA_TYPE_NONE; + adj.and_mask = and_mask; + adj.add_val = 0; + adj.divmod_val = 0; + OptimiseVarAction2Adjust(state, feature, varsize, group, group->adjusts.back()); + } + + group->sg_flags |= SGF_INLINING; + + return true; + }; + /* Special handling of variable 7B, this uses the parameter as the variable number, and the last value as the variable's parameter. * If the last value is a known constant, it can be substituted immediately. */ if (adjust.variable == 0x7B) { @@ -772,6 +825,7 @@ void OptimiseVarAction2Adjust(VarAction2OptimiseState &state, const GrfSpecFeatu const VarAction2TempStoreInference &store = iter->second; if (store.inference & VA2AIF_HAVE_CONSTANT) { adjust.variable = 0x1A; + adjust.parameter = 0; adjust.and_mask &= (store.store_constant >> adjust.shift_num); } else if ((store.inference & VA2AIF_SINGLE_LOAD) && (store.var_source.variable == 0x7D || IsVariableVeryCheap(store.var_source.variable, feature))) { if (adjust.type == DSGA_TYPE_NONE && adjust.shift_num == 0 && (adjust.and_mask == 0xFFFFFFFF || ((store.inference & VA2AIF_ONE_OR_ZERO) && (adjust.and_mask & 1)))) { @@ -904,6 +958,8 @@ void OptimiseVarAction2Adjust(VarAction2OptimiseState &state, const GrfSpecFeatu /* Procedure call or complex adjustment */ if (adjust.operation == DSGA_OP_STO) handle_unpredictable_temp_store(); if (adjust.variable == 0x7E) { + if (try_inline_procedure()) return; + std::bitset<256> seen_stores; bool seen_unpredictable_store = false; bool seen_special_store = false; @@ -2311,7 +2367,30 @@ static void OptimiseVarAction2DeterministicSpriteResolveJumps(DeterministicSprit } } -void OptimiseVarAction2DeterministicSpriteGroup(VarAction2OptimiseState &state, const GrfSpecFeature feature, const byte varsize, DeterministicSpriteGroup *group) +static const size_t MAX_PROC_INLINE_ADJUST_COUNT = 5; + +static void OptimiseVarAction2CheckInliningCandidate(DeterministicSpriteGroup *group, std::vector &saved_adjusts) +{ + if (HasGrfOptimiserFlag(NGOF_NO_OPT_VARACT2_PROC_INLINE)) return; + if (group->adjusts.size() > MAX_PROC_INLINE_ADJUST_COUNT || !group->calculated_result || group->var_scope != VSG_SCOPE_SELF) return; + + for (const DeterministicSpriteGroupAdjust &adjust : group->adjusts) { + uint variable = adjust.variable; + if (variable == 0x7B) variable = adjust.parameter; + if (variable == 0xC || variable == 0x10 || variable == 0x18 || variable == 0x1A || (variable >= 0x7D && variable <= 0x7F)) { + // OK + } else if (variable == 0x7C) { + if (group->feature != GSF_AIRPORTS && group->feature != GSF_INDUSTRIES) return; + } else { + return; + } + } + + group->dsg_flags |= DSGF_INLINE_CANDIDATE; + *(_cur.GetInlinableGroupAdjusts(group, true)) = std::move(saved_adjusts); +} + +void OptimiseVarAction2DeterministicSpriteGroup(VarAction2OptimiseState &state, const GrfSpecFeature feature, const byte varsize, DeterministicSpriteGroup *group, std::vector &saved_adjusts) { if (unlikely(HasGrfOptimiserFlag(NGOF_NO_OPT_VARACT2))) return; @@ -2506,6 +2585,8 @@ void OptimiseVarAction2DeterministicSpriteGroup(VarAction2OptimiseState &state, OptimiseVarAction2DeterministicSpriteGroupAdjustOrdering(group); } + OptimiseVarAction2CheckInliningCandidate(group, saved_adjusts); + if (state.check_expensive_vars && !HasGrfOptimiserFlag(NGOF_NO_OPT_VARACT2_EXPENSIVE_VARS)) { if (dse_candidate) { group->dsg_flags |= DSGF_CHECK_EXPENSIVE_VARS; diff --git a/src/newgrf_spritegroup.cpp b/src/newgrf_spritegroup.cpp index dfe08abd95..8a18c4c11c 100644 --- a/src/newgrf_spritegroup.cpp +++ b/src/newgrf_spritegroup.cpp @@ -561,6 +561,9 @@ void SpriteGroupDumper::DumpSpriteGroup(const SpriteGroup *sg, const char *paddi char extra_info[64] = ""; if (sg->sg_flags & SGF_ACTION6) strecat(extra_info, " (action 6 modified)", lastof(extra_info)); + if (HasBit(_misc_debug_flags, MDF_NEWGRF_SG_DUMP_MORE_DETAIL)) { + if (sg->sg_flags & SGF_INLINING) strecat(extra_info, " (inlining)", lastof(extra_info)); + } switch (sg->type) { case SGT_REAL: { @@ -648,6 +651,7 @@ void SpriteGroupDumper::DumpSpriteGroup(const SpriteGroup *sg, const char *paddi if (dsg->dsg_flags & DSGF_CHECK_EXPENSIVE_VARS) p += seprintf(p, lastof(this->buffer), ", CHECK_EXP_VAR"); if (dsg->dsg_flags & DSGF_CHECK_INSERT_JUMP) p += seprintf(p, lastof(this->buffer), ", CHECK_INS_JMP"); if (dsg->dsg_flags & DSGF_CB_HANDLER) p += seprintf(p, lastof(this->buffer), ", CB_HANDLER"); + if (dsg->dsg_flags & DSGF_INLINE_CANDIDATE) p += seprintf(p, lastof(this->buffer), ", INLINE_CANDIDATE"); } print(); emit_start(); diff --git a/src/newgrf_spritegroup.h b/src/newgrf_spritegroup.h index 5b94bdbacf..370efbaec0 100644 --- a/src/newgrf_spritegroup.h +++ b/src/newgrf_spritegroup.h @@ -61,6 +61,7 @@ extern SpriteGroupPool _spritegroup_pool; enum SpriteGroupFlags : uint8 { SGF_NONE = 0, SGF_ACTION6 = 1 << 0, + SGF_INLINING = 1 << 1, }; DECLARE_ENUM_AS_BIT_SET(SpriteGroupFlags) @@ -432,6 +433,7 @@ enum DeterministicSpriteGroupFlags : uint8 { DSGF_CHECK_EXPENSIVE_VARS = 1 << 4, DSGF_CHECK_INSERT_JUMP = 1 << 5, DSGF_CB_HANDLER = 1 << 6, + DSGF_INLINE_CANDIDATE = 1 << 7, }; DECLARE_ENUM_AS_BIT_SET(DeterministicSpriteGroupFlags)