diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 50de743e4e..8c0197f801 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -267,12 +267,14 @@ add_files( newgrf_industrytiles.cpp newgrf_industrytiles.h newgrf_industrytiles_analysis.h + newgrf_internal.h newgrf_newlandscape.cpp newgrf_newlandscape.h newgrf_newsignals.cpp newgrf_newsignals.h newgrf_object.cpp newgrf_object.h + newgrf_optimiser.cpp newgrf_profiling.cpp newgrf_profiling.h newgrf_properties.h diff --git a/src/newgrf.cpp b/src/newgrf.cpp index 1d75aff597..0a2805295c 100644 --- a/src/newgrf.cpp +++ b/src/newgrf.cpp @@ -11,6 +11,7 @@ #include +#include "newgrf_internal.h" #include "debug.h" #include "fileio_func.h" #include "engine_func.h" @@ -52,10 +53,6 @@ #include "road.h" #include "newgrf_roadstop.h" #include "debug_settings.h" -#include "core/arena_alloc.hpp" -#include "core/y_combinator.hpp" -#include "core/container_func.hpp" -#include "scope.h" #include "table/strings.h" #include "table/build_industry.h" @@ -93,172 +90,7 @@ static uint32 _ttdpatch_flags[8]; /** Indicates which are the newgrf features currently loaded ingame */ GRFLoadedFeatures _loaded_newgrf_features; -static const uint MAX_SPRITEGROUP = UINT8_MAX; ///< Maximum GRF-local ID for a spritegroup. - -/** Base GRF ID for OpenTTD's base graphics GRFs. */ -static const uint32 OPENTTD_GRAPHICS_BASE_GRF_ID = BSWAP32(0xFF4F5400); - -struct VarAction2GroupVariableTracking { - std::bitset<256> in; - std::bitset<256> out; - std::bitset<256> proc_call_out; -}; - -struct VarAction2ProcedureAnnotation { - std::bitset<256> stores; - uint32 special_register_values[16]; - uint16 special_register_mask = 0; - bool unskippable = false; -}; - -/** Temporary data during loading of GRFs */ -struct GrfProcessingState { -private: - /** Definition of a single Action1 spriteset */ - struct SpriteSet { - SpriteID sprite; ///< SpriteID of the first sprite of the set. - uint num_sprites; ///< Number of sprites in the set. - }; - - /** Currently referenceable spritesets */ - btree::btree_map spritesets[GSF_END]; - -public: - /* Global state */ - GrfLoadingStage stage; ///< Current loading stage - SpriteID spriteid; ///< First available SpriteID for loading realsprites. - - /* Local state in the file */ - SpriteFile *file; ///< File of currently processed GRF file. - GRFFile *grffile; ///< Currently processed GRF file. - GRFConfig *grfconfig; ///< Config of the currently processed GRF file. - uint32 nfo_line; ///< Currently processed pseudo sprite number in the GRF. - - /* Kind of return values when processing certain actions */ - int skip_sprites; ///< Number of pseudo sprites to skip before processing the next one. (-1 to skip to end of file) - - /* Currently referenceable spritegroups */ - const SpriteGroup *spritegroups[MAX_SPRITEGROUP + 1]; - - /* VarAction2 temporary storage variable tracking */ - btree::btree_map group_temp_store_variable_tracking; - UniformArenaAllocator group_temp_store_variable_tracking_storage; - btree::btree_map procedure_annotations; - UniformArenaAllocator procedure_annotations_storage; - std::vector dead_store_elimination_candidates; - - VarAction2GroupVariableTracking *GetVarAction2GroupVariableTracking(const SpriteGroup *group, bool make_new) - { - if (make_new) { - VarAction2GroupVariableTracking *&ptr = this->group_temp_store_variable_tracking[group]; - if (!ptr) ptr = new (this->group_temp_store_variable_tracking_storage.Allocate()) VarAction2GroupVariableTracking(); - return ptr; - } else { - auto iter = this->group_temp_store_variable_tracking.find(group); - if (iter != this->group_temp_store_variable_tracking.end()) return iter->second; - return nullptr; - } - } - - std::pair GetVarAction2ProcedureAnnotation(const SpriteGroup *group) - { - VarAction2ProcedureAnnotation *&ptr = this->procedure_annotations[group]; - if (!ptr) { - ptr = new (this->procedure_annotations_storage.Allocate()) VarAction2ProcedureAnnotation(); - return std::make_pair(ptr, true); - } else { - return std::make_pair(ptr, false); - } - } - - /** Clear temporary data before processing the next file in the current loading stage */ - void ClearDataForNextFile() - { - this->nfo_line = 0; - this->skip_sprites = 0; - - for (uint i = 0; i < GSF_END; i++) { - this->spritesets[i].clear(); - } - - memset(this->spritegroups, 0, sizeof(this->spritegroups)); - - this->group_temp_store_variable_tracking.clear(); - this->group_temp_store_variable_tracking_storage.EmptyArena(); - this->procedure_annotations.clear(); - this->procedure_annotations_storage.EmptyArena(); - this->dead_store_elimination_candidates.clear(); - } - - /** - * Records new spritesets. - * @param feature GrfSpecFeature the set is defined for. - * @param first_sprite SpriteID of the first sprite in the set. - * @param first_set First spriteset to define. - * @param numsets Number of sets to define. - * @param numents Number of sprites per set to define. - */ - void AddSpriteSets(byte feature, SpriteID first_sprite, uint first_set, uint numsets, uint numents) - { - assert(feature < GSF_END); - for (uint i = 0; i < numsets; i++) { - SpriteSet &set = this->spritesets[feature][first_set + i]; - set.sprite = first_sprite + i * numents; - set.num_sprites = numents; - } - } - - /** - * Check whether there are any valid spritesets for a feature. - * @param feature GrfSpecFeature to check. - * @return true if there are any valid sets. - * @note Spritesets with zero sprites are valid to allow callback-failures. - */ - bool HasValidSpriteSets(byte feature) const - { - assert(feature < GSF_END); - return !this->spritesets[feature].empty(); - } - - /** - * Check whether a specific set is defined. - * @param feature GrfSpecFeature to check. - * @param set Set to check. - * @return true if the set is valid. - * @note Spritesets with zero sprites are valid to allow callback-failures. - */ - bool IsValidSpriteSet(byte feature, uint set) const - { - assert(feature < GSF_END); - return this->spritesets[feature].find(set) != this->spritesets[feature].end(); - } - - /** - * Returns the first sprite of a spriteset. - * @param feature GrfSpecFeature to query. - * @param set Set to query. - * @return First sprite of the set. - */ - SpriteID GetSprite(byte feature, uint set) const - { - assert(IsValidSpriteSet(feature, set)); - return this->spritesets[feature].find(set)->second.sprite; - } - - /** - * Returns the number of sprites in a spriteset - * @param feature GrfSpecFeature to query. - * @param set Set to query. - * @return Number of sprites in the set. - */ - uint GetNumEnts(byte feature, uint set) const - { - assert(IsValidSpriteSet(feature, set)); - return this->spritesets[feature].find(set)->second.num_sprites; - } -}; - -static GrfProcessingState _cur; +GrfProcessingState _cur; /** @@ -5719,35 +5551,6 @@ static const CallbackResultSpriteGroup *NewCallbackResultSpriteGroup(uint16 grou return ptr; } -static const SpriteGroup *PruneTargetSpriteGroup(const SpriteGroup *result) -{ - if (HasGrfOptimiserFlag(NGOF_NO_OPT_VARACT2) || HasGrfOptimiserFlag(NGOF_NO_OPT_VARACT2_GROUP_PRUNE)) return result; - while (result != nullptr) { - if (result->type == SGT_DETERMINISTIC) { - const DeterministicSpriteGroup *sg = static_cast(result); - if (sg->GroupMayBeBypassed()) { - /* Deterministic sprite group can be trivially resolved, skip it */ - uint32 value = (sg->adjusts.size() == 1) ? EvaluateDeterministicSpriteGroupAdjust(sg->size, sg->adjusts[0], nullptr, 0, UINT_MAX) : 0; - const SpriteGroup *candidate = sg->default_group; - for (const auto &range : sg->ranges) { - if (range.low <= value && value <= range.high) { - candidate = range.group; - break; - } - } - if (candidate != nullptr && candidate->type == SGT_DETERMINISTIC && static_cast(candidate)->dsg_flags & DSGF_REQUIRES_VAR1C) { - /* Can't skip this group as the child group requires the result of this group for variable 1C */ - return result; - } - result = candidate; - continue; - } - } - break; - } - return result; -} - /* Helper function to either create a callback or link to a previously * defined spritegroup. */ static const SpriteGroup *GetGroupFromGroupID(byte setid, byte type, uint16 groupid) @@ -5801,2780 +5604,6 @@ static const SpriteGroup *CreateGroupFromGroupID(byte feature, byte setid, byte return new ResultSpriteGroup(spriteset_start, num_sprites); } -enum VarAction2AdjustInferenceFlags { - VA2AIF_NONE = 0x00, - - VA2AIF_SIGNED_NON_NEGATIVE = 0x01, - VA2AIF_ONE_OR_ZERO = 0x02, - VA2AIF_PREV_TERNARY = 0x04, - VA2AIF_PREV_MASK_ADJUST = 0x08, - VA2AIF_PREV_STORE_TMP = 0x10, - VA2AIF_HAVE_CONSTANT = 0x20, - VA2AIF_SINGLE_LOAD = 0x40, - VA2AIF_MUL_BOOL = 0x80, - VA2AIF_PREV_SCMP_DEC = 0x100, - - VA2AIF_PREV_MASK = VA2AIF_PREV_TERNARY | VA2AIF_PREV_MASK_ADJUST | VA2AIF_PREV_STORE_TMP | VA2AIF_PREV_SCMP_DEC, -}; -DECLARE_ENUM_AS_BIT_SET(VarAction2AdjustInferenceFlags) - -struct VarAction2TempStoreInferenceVarSource { - DeterministicSpriteGroupAdjustType type; - uint16 variable; - byte shift_num; - uint32 parameter; - uint32 and_mask; - uint32 add_val; - uint32 divmod_val; -}; - -struct VarAction2TempStoreInference { - VarAction2AdjustInferenceFlags inference = VA2AIF_NONE; - uint32 store_constant = 0; - VarAction2TempStoreInferenceVarSource var_source; - uint version = 0; -}; - -struct VarAction2InferenceBackup { - VarAction2AdjustInferenceFlags inference = VA2AIF_NONE; - uint32 current_constant = 0; - uint adjust_size = 0; -}; - -struct VarAction2OptimiseState { - VarAction2AdjustInferenceFlags inference = VA2AIF_NONE; - uint32 current_constant = 0; - btree::btree_map temp_stores; - VarAction2InferenceBackup inference_backup; - VarAction2GroupVariableTracking *var_tracking = nullptr; - bool seen_procedure_call = false; - bool check_expensive_vars = false; - bool enable_dse = false; - uint default_variable_version = 0; - - inline VarAction2GroupVariableTracking *GetVarTracking(DeterministicSpriteGroup *group) - { - if (this->var_tracking == nullptr) { - this->var_tracking = _cur.GetVarAction2GroupVariableTracking(group, true); - } - return this->var_tracking; - } -}; - -static bool IsExpensiveVehicleVariable(uint16 variable) -{ - switch (variable) { - case 0x45: - case 0x4A: - case 0x60: - case 0x61: - case 0x62: - case 0x63: - case 0xFE: - case 0xFF: - return true; - - default: - return false; - } -} - -static bool IsExpensiveIndustryTileVariable(uint16 variable) -{ - switch (variable) { - case 0x60: - case 0x61: - case 0x62: - return true; - - default: - return false; - } -} - -static bool IsExpensiveObjectVariable(uint16 variable) -{ - switch (variable) { - case 0x41: - case 0x45: - case 0x60: - case 0x61: - case 0x62: - case 0x63: - case 0x64: - case A2VRI_OBJECT_FOUNDATION_SLOPE: - case A2VRI_OBJECT_FOUNDATION_SLOPE_CHANGE: - return true; - - default: - return false; - } -} - -static bool IsExpensiveVariable(uint16 variable, GrfSpecFeature feature, VarSpriteGroupScope var_scope) -{ - if ((feature >= GSF_TRAINS && feature <= GSF_AIRCRAFT) && IsExpensiveVehicleVariable(variable)) return true; - if (feature == GSF_INDUSTRYTILES && var_scope == VSG_SCOPE_SELF && IsExpensiveIndustryTileVariable(variable)) return true; - if (feature == GSF_OBJECTS && var_scope == VSG_SCOPE_SELF && IsExpensiveObjectVariable(variable)) return true; - return false; -} - -static bool IsVariableVeryCheap(uint16 variable, GrfSpecFeature feature) -{ - switch (variable) { - case 0x0C: - case 0x10: - case 0x18: - case 0x1C: - return true; - } - return false; -} - -static bool IsFeatureUsableForDSE(GrfSpecFeature feature) -{ - return (feature != GSF_STATIONS); -} - -static bool IsIdenticalValueLoad(const DeterministicSpriteGroupAdjust *a, const DeterministicSpriteGroupAdjust *b) -{ - if (a == nullptr && b == nullptr) return true; - if (a == nullptr || b == nullptr) return false; - - if (a->variable == 0x7B || a->variable == 0x7E) return false; - - return std::tie(a->type, a->variable, a->shift_num, a->parameter, a->and_mask, a->add_val, a->divmod_val) == - std::tie(b->type, b->variable, b->shift_num, b->parameter, b->and_mask, b->add_val, b->divmod_val); -} - -static const DeterministicSpriteGroupAdjust *GetVarAction2PreviousSingleLoadAdjust(const std::vector &adjusts, int start_index, bool *is_inverted) -{ - bool passed_store_perm = false; - if (is_inverted != nullptr) *is_inverted = false; - std::bitset<256> seen_stores; - for (int i = start_index; i >= 0; i--) { - const DeterministicSpriteGroupAdjust &prev = adjusts[i]; - if (prev.variable == 0x7E) { - /* Procedure call, don't use or go past this */ - break; - } - if (prev.operation == DSGA_OP_RST) { - if (prev.variable == 0x7B) { - /* Can't use this previous load as it depends on the last value */ - return nullptr; - } - if (prev.variable == 0x7C && passed_store_perm) { - /* If we passed a store perm then a load from permanent storage is not a valid previous load as we may have clobbered it */ - return nullptr; - } - if (prev.variable == 0x7D && seen_stores[prev.parameter & 0xFF]) { - /* If we passed a store then a load from that same store is not valid */ - return nullptr; - } - return &prev; - } else if (prev.operation == DSGA_OP_STO) { - if (prev.type == DSGA_TYPE_NONE && prev.variable == 0x1A && prev.shift_num == 0 && prev.and_mask < 0x100) { - /* Temp store */ - seen_stores.set(prev.and_mask, true); - continue; - } else { - /* Special register store or unpredictable store, don't try to optimise following load */ - break; - } - } else if (prev.operation == DSGA_OP_STOP) { - /* Permanent storage store */ - passed_store_perm = true; - continue; - } else if (prev.operation == DSGA_OP_XOR && prev.type == DSGA_TYPE_NONE && prev.variable == 0x1A && prev.shift_num == 0 && prev.and_mask == 1 && is_inverted != nullptr) { - /* XOR invert */ - *is_inverted = !(*is_inverted); - continue; - } else { - break; - } - } - return nullptr; -} - -static const DeterministicSpriteGroupAdjust *GetVarAction2PreviousSingleStoreAdjust(const std::vector &adjusts, int start_index, bool *is_inverted) -{ - if (is_inverted != nullptr) *is_inverted = false; - for (int i = start_index; i >= 0; i--) { - const DeterministicSpriteGroupAdjust &prev = adjusts[i]; - if (prev.variable == 0x7E) { - /* Procedure call, don't use or go past this */ - break; - } - if (prev.operation == DSGA_OP_STO) { - if (prev.type == DSGA_TYPE_NONE && prev.variable == 0x1A && prev.shift_num == 0 && prev.and_mask < 0x100) { - /* Temp store */ - return &prev; - } else { - /* Special register store or unpredictable store, don't try to optimise following load */ - break; - } - } else if (prev.operation == DSGA_OP_XOR && prev.type == DSGA_TYPE_NONE && prev.variable == 0x1A && prev.shift_num == 0 && prev.and_mask == 1 && is_inverted != nullptr) { - /* XOR invert */ - *is_inverted = !(*is_inverted); - continue; - } else { - break; - } - } - return nullptr; -} - -static int GetVarAction2AdjustOfPreviousTempStoreSource(const DeterministicSpriteGroupAdjust *adjusts, int start_index, uint8 store_var) -{ - for (int i = start_index - 1; i >= 0; i--) { - const DeterministicSpriteGroupAdjust &prev = adjusts[i]; - if (prev.variable == 0x7E) { - /* Procedure call, don't use or go past this */ - return -1; - } - if (prev.operation == DSGA_OP_STO) { - if (prev.type == DSGA_TYPE_NONE && prev.variable == 0x1A && prev.shift_num == 0 && prev.and_mask < 0x100) { - /* Temp store */ - if (prev.and_mask == (store_var & 0xFF)) { - return i; - } - } else { - /* Special register store or unpredictable store, don't use or go past this */ - return -1; - } - } - } - return -1; -} - -struct VarAction2AdjustDescriptor { - DeterministicSpriteGroupAdjust *adjust_array = nullptr; - DeterministicSpriteGroupAdjust *override_first = nullptr; - int index = 0; - - inline bool IsValid() const { return this->adjust_array != nullptr; } - inline const DeterministicSpriteGroupAdjust &GetCurrent() const { return this->override_first != nullptr ? *(this->override_first) : this->adjust_array[this->index]; }; -}; - -static bool AdvanceVarAction2AdjustDescriptor(VarAction2AdjustDescriptor &desc) -{ - const DeterministicSpriteGroupAdjust &adj = desc.GetCurrent(); - if (adj.variable == 0x7E || adj.variable == 0x7B || adj.operation == DSGA_OP_STOP) { - /* Procedure call or load depends on the last value, or a permanent store, don't use or go past this */ - desc.index = -1; - desc.override_first = nullptr; - return true; - } - if (adj.operation == DSGA_OP_STO) { - if (adj.type == DSGA_TYPE_NONE && adj.variable == 0x1A && adj.shift_num == 0 && adj.and_mask < 0x100) { - /* Temp store, skip */ - desc.index--; - } else { - /* Special register store or unpredictable store, don't use or go past this */ - desc.index = -1; - } - desc.override_first = nullptr; - return true; - } - return false; -} - -static bool AreVarAction2AdjustsEquivalent(VarAction2AdjustDescriptor a, VarAction2AdjustDescriptor b) -{ - if (!a.IsValid() || !b.IsValid()) return false; - - while (a.index >= 0 && b.index >= 0) { - if (a.adjust_array == b.adjust_array && a.index == b.index) return true; - - if (AdvanceVarAction2AdjustDescriptor(a)) continue; - if (AdvanceVarAction2AdjustDescriptor(b)) continue; - - const DeterministicSpriteGroupAdjust &adj_a = a.GetCurrent(); - const DeterministicSpriteGroupAdjust &adj_b = b.GetCurrent(); - - if (std::tie(adj_a.operation, adj_a.type, adj_a.variable, adj_a.shift_num, adj_a.and_mask, adj_a.add_val, adj_a.divmod_val) != - std::tie(adj_b.operation, adj_b.type, adj_b.variable, adj_b.shift_num, adj_b.and_mask, adj_b.add_val, adj_b.divmod_val)) return false; - - if (adj_a.parameter != adj_b.parameter) { - if (adj_a.variable == 0x7D) { - int store_index_a = GetVarAction2AdjustOfPreviousTempStoreSource(a.adjust_array, a.index - 1, (adj_a.parameter & 0xFF)); - if (store_index_a < 1) { - return false; - } - int store_index_b = GetVarAction2AdjustOfPreviousTempStoreSource(b.adjust_array, b.index - 1, (adj_b.parameter & 0xFF)); - if (store_index_b < 1) { - return false; - } - if (!AreVarAction2AdjustsEquivalent({ a.adjust_array, nullptr, store_index_a - 1 }, { b.adjust_array, nullptr, store_index_b - 1 })) return false; - } else { - return false; - } - } - - if (adj_b.operation == DSGA_OP_RST) return true; - - a.index--; - b.index--; - a.override_first = nullptr; - b.override_first = nullptr; - } - - return false; -} - -enum VarAction2AdjustsBooleanInverseResult { - VA2ABIR_NO, ///< Adjusts are not inverse - VA2ABIR_CCAT, ///< Adjusts are inverse (constant comparison adjust type) - VA2ABIR_XOR_A, ///< Adjusts are inverse (a has an additional XOR 1 or EQ 0 compared to b) - VA2ABIR_XOR_B, ///< Adjusts are inverse (b has an additional XOR 1 or EQ 0 compared to a) -}; - -static VarAction2AdjustsBooleanInverseResult AreVarAction2AdjustsBooleanInverse(VarAction2AdjustDescriptor a, VarAction2AdjustDescriptor b) -{ - if (!a.IsValid() || !b.IsValid()) return VA2ABIR_NO; - - if (a.index < 0 || b.index < 0) return VA2ABIR_NO; - - AdvanceVarAction2AdjustDescriptor(a); - AdvanceVarAction2AdjustDescriptor(b); - - if (a.index < 0 || b.index < 0) return VA2ABIR_NO; - - const DeterministicSpriteGroupAdjust &adj_a = a.GetCurrent(); - const DeterministicSpriteGroupAdjust &adj_b = b.GetCurrent(); - - if (adj_a.operation == DSGA_OP_RST && adj_b.operation == DSGA_OP_RST && - IsConstantComparisonAdjustType(adj_a.type) && InvertConstantComparisonAdjustType(adj_a.type) == adj_b.type && - (std::tie(adj_a.variable, adj_a.shift_num, adj_a.parameter, adj_a.and_mask, adj_a.add_val, adj_a.divmod_val) == - std::tie(adj_b.variable, adj_b.shift_num, adj_b.parameter, adj_b.and_mask, adj_b.add_val, adj_b.divmod_val))) { - return VA2ABIR_CCAT; - } - - auto check_inverse = [&]() -> bool { - auto check_inner = [](VarAction2AdjustDescriptor &a, VarAction2AdjustDescriptor &b) -> bool { - if (a.index >= 0) AdvanceVarAction2AdjustDescriptor(a); - if (a.index >= 0) { - const DeterministicSpriteGroupAdjust &a_adj = a.GetCurrent(); - /* Check that the value was bool prior to the XOR */ - if (IsEvalAdjustOperationRelationalComparison(a_adj.operation) || IsConstantComparisonAdjustType(a_adj.type)) { - if (AreVarAction2AdjustsEquivalent(a, b)) return true; - } - } - return false; - }; - const DeterministicSpriteGroupAdjust &adj = a.GetCurrent(); - if (adj.operation == DSGA_OP_XOR && adj.type == DSGA_TYPE_NONE && adj.variable == 0x1A && adj.shift_num == 0 && adj.and_mask == 1) { - VarAction2AdjustDescriptor tmp = { a.adjust_array, nullptr, a.index - 1 }; - if (check_inner(tmp, b)) return true; - } - if (adj.operation == DSGA_OP_RST && adj.type == DSGA_TYPE_EQ && adj.variable == 0x7D && adj.shift_num == 0 && adj.and_mask == 0xFFFFFFFF && adj.add_val == 0) { - int store_index = GetVarAction2AdjustOfPreviousTempStoreSource(a.adjust_array, a.index - 1, (adj.parameter & 0xFF)); - if (store_index >= 1) { - /* Found the referenced temp store, use that */ - VarAction2AdjustDescriptor tmp = { a.adjust_array, nullptr, store_index - 1 }; - if (check_inner(tmp, b)) return true; - } - } - return false; - }; - - if (check_inverse()) return VA2ABIR_XOR_A; - - std::swap(a, b); - - if (check_inverse()) return VA2ABIR_XOR_B; - - return VA2ABIR_NO; -} - -/* - * Find and replace the result of: - * (var * flag) + (var * !flag) with var - * (-var * (var < 0)) + (var * !(var < 0)) with abs(var) - * "+" may be ADD, OR or XOR. - */ -static bool TryMergeBoolMulCombineVarAction2Adjust(VarAction2OptimiseState &state, std::vector &adjusts, const int adjust_index) -{ - uint store_var = adjusts[adjust_index].parameter; - - DeterministicSpriteGroupAdjust synth_adjusts[2]; - VarAction2AdjustDescriptor found_adjusts[4] = {}; - uint mul_indices[2] = {}; - - auto find_adjusts = [&](int start_index, uint save_index) { - bool have_mul = false; - for (int i = start_index; i >= 0; i--) { - const DeterministicSpriteGroupAdjust &prev = adjusts[i]; - if (prev.variable == 0x7E || prev.variable == 0x7B) { - /* Procedure call or load depends on the last value, don't use or go past this */ - return; - } - if (prev.operation == DSGA_OP_STO) { - if (prev.type == DSGA_TYPE_NONE && prev.variable == 0x1A && prev.shift_num == 0 && prev.and_mask < 0x100) { - /* Temp store */ - if (prev.and_mask == (store_var & 0xFF)) return; - } else { - /* Special register store or unpredictable store, don't use or go past this */ - return; - } - } else if (prev.operation == DSGA_OP_MUL && !have_mul) { - /* First source is the variable of mul, if it's a temporary storage load, try to follow it */ - mul_indices[save_index] = i; - if (prev.variable == 0x7D && prev.type == DSGA_TYPE_NONE && prev.shift_num == 0 && prev.and_mask == 0xFFFFFFFF) { - int store_index = GetVarAction2AdjustOfPreviousTempStoreSource(adjusts.data(), i - 1, (prev.parameter & 0xFF)); - if (store_index >= 1) { - /* Found the referenced temp store, use that */ - found_adjusts[save_index * 2] = { adjusts.data(), nullptr, store_index - 1 }; - have_mul = true; - } - } - if (!have_mul) { - /* It's not a temporary storage load which can be followed, synthesise an RST */ - synth_adjusts[save_index] = prev; - synth_adjusts[save_index].operation = DSGA_OP_RST; - synth_adjusts[save_index].adjust_flags = DSGAF_NONE; - found_adjusts[save_index * 2] = { adjusts.data(), synth_adjusts + save_index, i }; - have_mul = true; - } - } else if (prev.operation == DSGA_OP_STOP) { - /* Don't try to handle writes to permanent storage */ - return; - } else if (have_mul) { - /* Found second source */ - found_adjusts[(save_index * 2) + 1] = { adjusts.data(), nullptr, i }; - return; - } else { - return; - } - } - }; - - find_adjusts(adjust_index - 1, 0); // A (first, closest) - if (!found_adjusts[0].IsValid() || !found_adjusts[1].IsValid()) return false; - - /* Find offset of referenced store */ - int store_index = GetVarAction2AdjustOfPreviousTempStoreSource(adjusts.data(), adjust_index - 1, (store_var & 0xFF)); - if (store_index < 0) return false; - - find_adjusts(store_index - 1, 1); // B (second, further) - if (!found_adjusts[2].IsValid() || !found_adjusts[3].IsValid()) return false; - - bool is_cond_first[2]; - VarAction2AdjustsBooleanInverseResult found = VA2ABIR_NO; - auto try_find = [&](bool a_first, bool b_first) { - if (found == VA2ABIR_NO) { - found = AreVarAction2AdjustsBooleanInverse(found_adjusts[a_first ? 0 : 1], found_adjusts[b_first ? 2 : 3]); - if (found != VA2ABIR_NO) { - is_cond_first[0] = a_first; - is_cond_first[1] = b_first; - } - } - }; - try_find(true, true); - try_find(true, false); - try_find(false, true); - try_find(false, false); - - if (found == VA2ABIR_NO) return false; - - auto try_erase_from = [&](uint start) -> bool { - for (uint i = start; i < (uint)adjusts.size(); i++) { - const DeterministicSpriteGroupAdjust &adjust = adjusts[i]; - if (adjust.variable == 0x7E || IsEvalAdjustWithSideEffects(adjust.operation)) return false; - } - adjusts.erase(adjusts.begin() + start, adjusts.end()); - return true; - }; - auto try_to_make_rst_from = [&](uint idx) -> bool { - const DeterministicSpriteGroupAdjust &src = adjusts[idx]; - if (src.variable == 0x7D) { - /* Check that variable is still valid */ - for (uint i = idx; i < (uint)adjusts.size(); i++) { - const DeterministicSpriteGroupAdjust &adjust = adjusts[i]; - if (adjust.variable == 0x7E) return false; - if (adjust.operation == DSGA_OP_STO) { - if (adjust.type == DSGA_TYPE_NONE && adjust.variable == 0x1A && adjust.shift_num == 0 && adjust.and_mask < 0x100) { - /* Temp store */ - if (adjust.and_mask == (src.parameter & 0xFF)) return false; - } else { - /* Special register store or unpredictable store, don't use or go past this */ - return false; - } - } - } - } - adjusts.push_back(src); - adjusts.back().operation = DSGA_OP_RST; - adjusts.back().adjust_flags = DSGAF_NONE; - return true; - }; - - if (AreVarAction2AdjustsEquivalent(found_adjusts[is_cond_first[0] ? 1 : 0], found_adjusts[is_cond_first[1] ? 3 : 2])) { - /* replace (var * flag) + (var * !flag) with var */ - - if (is_cond_first[0]) { - /* The cond is the mul variable of the first (closest) mul, the actual value is the prior adjust */ - if (try_erase_from(mul_indices[0] + 1)) return true; - } else { - /* The value is the mul variable of the first (closest) mul, the cond is the prior adjust */ - if (try_to_make_rst_from(mul_indices[0])) return true; - } - - if (!is_cond_first[1]) { - /* The value is the mul variable of the second (further) mul, the cond is the prior adjust */ - if (try_to_make_rst_from(mul_indices[1])) return true; - } - - return false; - } - - auto check_rsub = [&](VarAction2AdjustDescriptor &desc) -> bool { - int rsub_offset = desc.index; - if (rsub_offset < 1) return false; - const DeterministicSpriteGroupAdjust &adj = adjusts[rsub_offset]; - if (adj.operation == DSGA_OP_RSUB && adj.type == DSGA_TYPE_NONE && adj.variable == 0x1A && adj.shift_num == 0 && adj.and_mask == 0) { - desc.index--; - return true; - } - return false; - }; - - auto check_abs_cond = [&](VarAction2AdjustDescriptor cond, VarAction2AdjustDescriptor &value) -> bool { - int lt_offset = cond.index; - if (lt_offset < 1) return false; - const DeterministicSpriteGroupAdjust &adj = adjusts[lt_offset]; - if (adj.operation == DSGA_OP_SLT && adj.type == DSGA_TYPE_NONE && adj.variable == 0x1A && adj.shift_num == 0 && adj.and_mask == 0) { - cond.index--; - return AreVarAction2AdjustsEquivalent(cond, value); - } - return false; - }; - - auto append_abs = [&]() { - adjusts.emplace_back(); - adjusts.back().operation = DSGA_OP_ABS; - adjusts.back().variable = 0x1A; - state.inference |= VA2AIF_SIGNED_NON_NEGATIVE; - }; - - if (found == VA2ABIR_XOR_A) { - /* Try to find an ABS: - * A has the extra invert, check cond of B - * B is the negative path with the RSUB - */ - VarAction2AdjustDescriptor value_b = found_adjusts[is_cond_first[1] ? 3 : 2]; - const VarAction2AdjustDescriptor &cond_b = found_adjusts[is_cond_first[1] ? 2 : 3]; - - if (check_rsub(value_b) && check_abs_cond(cond_b, value_b) && AreVarAction2AdjustsEquivalent(found_adjusts[is_cond_first[0] ? 1 : 0], value_b)) { - /* Found an ABS, use one of the two value parts */ - - if (is_cond_first[0]) { - /* The cond is the mul variable of the A (first, closest) mul, the actual value is the prior adjust */ - if (try_erase_from(mul_indices[0])) { - append_abs(); - return true; - } - } else { - /* The value is the mul variable of the A (first, closest) mul, the cond is the prior adjust */ - if (try_to_make_rst_from(mul_indices[0])) { - append_abs(); - return true; - } - } - } - } - if (found == VA2ABIR_XOR_B) { - /* Try to find an ABS: - * B has the extra invert, check cond of A - * A is the negative path with the RSUB - */ - VarAction2AdjustDescriptor value_a = found_adjusts[is_cond_first[0] ? 1 : 0]; - const VarAction2AdjustDescriptor &cond_a = found_adjusts[is_cond_first[0] ? 0 : 1]; - - if (check_rsub(value_a) && check_abs_cond(cond_a, value_a) && AreVarAction2AdjustsEquivalent(found_adjusts[is_cond_first[1] ? 3 : 2], value_a)) { - /* Found an ABS, use one of the two value parts */ - - if (is_cond_first[0]) { - /* The cond is the mul variable of the A (first, closest) mul, the actual value is the prior adjust, -1 to also remove the RSUB */ - if (try_erase_from(mul_indices[0] - 1)) { - append_abs(); - return true; - } - } - - if (!is_cond_first[1]) { - /* The value is the mul variable of the B (second, further) mul, the cond is the prior adjust */ - if (try_to_make_rst_from(mul_indices[1])) { - append_abs(); - return true; - } - } - } - } - - return false; -} - -/* Returns the number of adjusts to remove: 0: neither, 1: current, 2: prev and current */ -static uint TryMergeVarAction2AdjustConstantOperations(DeterministicSpriteGroupAdjust &prev, DeterministicSpriteGroupAdjust ¤t) -{ - if (prev.type != DSGA_TYPE_NONE || prev.variable != 0x1A || prev.shift_num != 0) return 0; - if (current.type != DSGA_TYPE_NONE || current.variable != 0x1A || current.shift_num != 0) return 0; - - switch (current.operation) { - case DSGA_OP_ADD: - case DSGA_OP_SUB: - if (prev.operation == current.operation) { - prev.and_mask += current.and_mask; - break; - } - if (prev.operation == ((current.operation == DSGA_OP_SUB) ? DSGA_OP_ADD : DSGA_OP_SUB)) { - prev.and_mask -= current.and_mask; - break; - } - return 0; - - case DSGA_OP_OR: - if (prev.operation == DSGA_OP_OR) { - prev.and_mask |= current.and_mask; - break; - } - return 0; - - case DSGA_OP_AND: - if (prev.operation == DSGA_OP_AND) { - prev.and_mask &= current.and_mask; - break; - } - return 0; - - case DSGA_OP_XOR: - if (prev.operation == DSGA_OP_XOR) { - prev.and_mask ^= current.and_mask; - break; - } - return 0; - - default: - return 0; - } - - if (prev.and_mask == 0 && IsEvalAdjustWithZeroRemovable(prev.operation)) { - /* prev now does nothing, remove it as well */ - return 2; - } - return 1; -} - -static void OptimiseVarAction2Adjust(VarAction2OptimiseState &state, const GrfSpecFeature feature, const byte varsize, DeterministicSpriteGroup *group, DeterministicSpriteGroupAdjust &adjust) -{ - if (unlikely(HasGrfOptimiserFlag(NGOF_NO_OPT_VARACT2))) return; - - auto guard = scope_guard([&]() { - if (!group->adjusts.empty()) { - const DeterministicSpriteGroupAdjust &adjust = group->adjusts.back(); - if (adjust.variable == 0x7E || IsEvalAdjustWithSideEffects(adjust.operation)) { - /* save inference state */ - state.inference_backup.adjust_size = (uint)group->adjusts.size(); - state.inference_backup.inference = state.inference; - state.inference_backup.current_constant = state.current_constant; - } - } - }); - - auto try_restore_inference_backup = [&]() { - if (state.inference_backup.adjust_size != 0 && state.inference_backup.adjust_size == (uint)group->adjusts.size()) { - state.inference = state.inference_backup.inference; - state.current_constant = state.inference_backup.current_constant; - } - }; - - VarAction2AdjustInferenceFlags prev_inference = state.inference; - state.inference = VA2AIF_NONE; - - auto get_sign_bit = [&]() -> uint32 { - return (1 << ((varsize * 8) - 1)); - }; - - auto get_full_mask = [&]() -> uint32 { - return UINT_MAX >> ((4 - varsize) * 8); - }; - - auto add_inferences_from_mask = [&](uint32 mask) { - if (mask == 1) { - state.inference |= VA2AIF_SIGNED_NON_NEGATIVE | VA2AIF_ONE_OR_ZERO; - } else if ((mask & get_sign_bit()) == 0) { - state.inference |= VA2AIF_SIGNED_NON_NEGATIVE; - } - }; - - auto replace_with_constant_load = [&](uint32 constant) { - group->adjusts.pop_back(); - if ((prev_inference & VA2AIF_HAVE_CONSTANT) && constant == state.current_constant) { - /* Don't create a new constant load for the same constant as was there previously */ - state.inference = prev_inference; - return; - } - while (!group->adjusts.empty()) { - const DeterministicSpriteGroupAdjust &prev = group->adjusts.back(); - if (prev.variable != 0x7E && !IsEvalAdjustWithSideEffects(prev.operation)) { - /* Delete useless operation */ - group->adjusts.pop_back(); - } else { - break; - } - } - state.inference = VA2AIF_HAVE_CONSTANT; - add_inferences_from_mask(constant); - state.current_constant = constant; - if (constant != 0 || !group->adjusts.empty()) { - DeterministicSpriteGroupAdjust &replacement = group->adjusts.emplace_back(); - replacement.operation = DSGA_OP_RST; - replacement.variable = 0x1A; - replacement.shift_num = 0; - replacement.type = DSGA_TYPE_NONE; - replacement.and_mask = constant; - replacement.add_val = 0; - replacement.divmod_val = 0; - state.inference |= VA2AIF_PREV_MASK_ADJUST; - } - }; - - auto handle_unpredictable_temp_load = [&]() { - std::bitset<256> bits; - bits.set(); - for (auto &it : state.temp_stores) { - bits.set(it.first, false); - } - state.GetVarTracking(group)->in |= bits; - }; - auto reset_store_values = [&]() { - for (auto &it : state.temp_stores) { - it.second.inference = VA2AIF_NONE; - it.second.version++; - } - state.default_variable_version++; - }; - auto handle_unpredictable_temp_store = [&]() { - reset_store_values(); - }; - - auto try_merge_with_previous = [&]() { - if (adjust.variable == 0x1A && group->adjusts.size() >= 2) { - /* Merged this adjust into the previous one */ - uint to_remove = TryMergeVarAction2AdjustConstantOperations(group->adjusts[group->adjusts.size() - 2], adjust); - if (to_remove > 0) group->adjusts.erase(group->adjusts.end() - to_remove, group->adjusts.end()); - - if (to_remove == 1 && group->adjusts.back().and_mask == 0 && IsEvalAdjustWithZeroAlwaysZero(group->adjusts.back().operation)) { - /* Operation always returns 0, replace it and any useless prior operations */ - replace_with_constant_load(0); - } - } - }; - - /* Special handling of variable 7B, this uses the parameter as the variable number, and the last value as the variable's parameter. - * If the last value is a known constant, it can be substituted immediately. */ - if (adjust.variable == 0x7B) { - if (prev_inference & VA2AIF_HAVE_CONSTANT) { - adjust.variable = adjust.parameter; - adjust.parameter = state.current_constant; - } else if (adjust.parameter == 0x7D) { - handle_unpredictable_temp_load(); - } else if (adjust.parameter == 0x1C) { - /* This is to simplify tracking of variable 1C, the parameter is never used for anything */ - adjust.variable = adjust.parameter; - adjust.parameter = 0; - } - } - if (adjust.variable == 0x1C && !state.seen_procedure_call) { - group->dsg_flags |= DSGF_REQUIRES_VAR1C; - } - - VarAction2AdjustInferenceFlags non_const_var_inference = VA2AIF_NONE; - while (adjust.variable == 0x7D) { - non_const_var_inference = VA2AIF_NONE; - auto iter = state.temp_stores.find(adjust.parameter & 0xFF); - if (iter == state.temp_stores.end()) { - /* Read without any previous store */ - state.GetVarTracking(group)->in.set(adjust.parameter & 0xFF, true); - adjust.parameter |= (state.default_variable_version << 8); - } else { - const VarAction2TempStoreInference &store = iter->second; - if (store.inference & VA2AIF_HAVE_CONSTANT) { - adjust.variable = 0x1A; - adjust.and_mask &= (store.store_constant >> adjust.shift_num); - } else if ((store.inference & VA2AIF_SINGLE_LOAD) && (store.var_source.variable == 0x7D || IsVariableVeryCheap(store.var_source.variable, feature))) { - if (adjust.type == DSGA_TYPE_NONE && adjust.shift_num == 0 && (adjust.and_mask == 0xFFFFFFFF || ((store.inference & VA2AIF_ONE_OR_ZERO) && (adjust.and_mask & 1)))) { - adjust.type = store.var_source.type; - adjust.variable = store.var_source.variable; - adjust.shift_num = store.var_source.shift_num; - adjust.parameter = store.var_source.parameter; - adjust.and_mask = store.var_source.and_mask; - adjust.add_val = store.var_source.add_val; - adjust.divmod_val = store.var_source.divmod_val; - continue; - } else if (store.var_source.type == DSGA_TYPE_NONE && (adjust.shift_num + store.var_source.shift_num) < 32) { - adjust.variable = store.var_source.variable; - adjust.parameter = store.var_source.parameter; - adjust.and_mask &= store.var_source.and_mask >> adjust.shift_num; - adjust.shift_num += store.var_source.shift_num; - continue; - } - adjust.parameter |= (store.version << 8); - } else { - if (adjust.type == DSGA_TYPE_NONE) { - non_const_var_inference = store.inference & (VA2AIF_SIGNED_NON_NEGATIVE | VA2AIF_ONE_OR_ZERO | VA2AIF_MUL_BOOL); - } - if (store.inference & VA2AIF_SINGLE_LOAD) { - /* Not possible to substitute this here, but it may be possible in the DSE pass */ - state.enable_dse = true; - } - adjust.parameter |= (store.version << 8); - } - } - break; - } - - if (adjust.operation == DSGA_OP_STOP) { - for (auto &it : state.temp_stores) { - /* Check if some other variable is marked as a copy of permanent storage */ - if ((it.second.inference & VA2AIF_SINGLE_LOAD) && it.second.var_source.variable == 0x7C) { - it.second.inference &= ~VA2AIF_SINGLE_LOAD; - } - } - } - - if (IsExpensiveVariable(adjust.variable, feature, group->var_scope)) state.check_expensive_vars = true; - - auto get_prev_single_load = [&](bool *invert) -> const DeterministicSpriteGroupAdjust* { - return GetVarAction2PreviousSingleLoadAdjust(group->adjusts, (int)group->adjusts.size() - 2, invert); - }; - - auto get_prev_single_store = [&](bool *invert) -> const DeterministicSpriteGroupAdjust* { - return GetVarAction2PreviousSingleStoreAdjust(group->adjusts, (int)group->adjusts.size() - 2, invert); - }; - - if ((prev_inference & VA2AIF_SINGLE_LOAD) && adjust.operation == DSGA_OP_RST && adjust.variable != 0x1A && adjust.variable != 0x7D && adjust.variable != 0x7E) { - /* See if this is a repeated load of a variable (not constant, temp store load or procedure call) */ - const DeterministicSpriteGroupAdjust *prev_load = get_prev_single_load(nullptr); - if (prev_load != nullptr && MemCmpT(prev_load, &adjust) == 0) { - group->adjusts.pop_back(); - state.inference = prev_inference; - return; - } - } - - if ((prev_inference & VA2AIF_MUL_BOOL) && (non_const_var_inference & VA2AIF_MUL_BOOL) && - (adjust.operation == DSGA_OP_ADD || adjust.operation == DSGA_OP_OR || adjust.operation == DSGA_OP_XOR) && - adjust.variable == 0x7D && adjust.type == DSGA_TYPE_NONE && adjust.shift_num == 0 && adjust.and_mask == 0xFFFFFFFF) { - if (TryMergeBoolMulCombineVarAction2Adjust(state, group->adjusts, (int)(group->adjusts.size() - 1))) { - OptimiseVarAction2Adjust(state, feature, varsize, group, group->adjusts.back()); - return; - } - } - - if (group->adjusts.size() >= 2 && adjust.operation == DSGA_OP_RST && adjust.variable != 0x7B) { - /* See if any previous adjusts can be removed */ - bool removed = false; - while (group->adjusts.size() >= 2) { - const DeterministicSpriteGroupAdjust &prev = group->adjusts[group->adjusts.size() - 2]; - if (prev.variable != 0x7E && !IsEvalAdjustWithSideEffects(prev.operation)) { - /* Delete useless operation */ - group->adjusts.erase(group->adjusts.end() - 2); - removed = true; - } else { - break; - } - } - if (removed) { - state.inference = prev_inference; - OptimiseVarAction2Adjust(state, feature, varsize, group, group->adjusts.back()); - return; - } - } - - if (adjust.variable != 0x7E && IsEvalAdjustWithZeroLastValueAlwaysZero(adjust.operation)) { - adjust.adjust_flags |= DSGAF_SKIP_ON_ZERO; - } - - if ((prev_inference & VA2AIF_PREV_TERNARY) && adjust.variable == 0x1A && IsEvalAdjustUsableForConstantPropagation(adjust.operation)) { - /* Propagate constant operation back into previous ternary */ - DeterministicSpriteGroupAdjust &prev = group->adjusts[group->adjusts.size() - 2]; - prev.and_mask = EvaluateDeterministicSpriteGroupAdjust(group->size, adjust, nullptr, prev.and_mask, UINT_MAX); - prev.add_val = EvaluateDeterministicSpriteGroupAdjust(group->size, adjust, nullptr, prev.add_val, UINT_MAX); - group->adjusts.pop_back(); - state.inference = prev_inference; - } else if ((prev_inference & VA2AIF_HAVE_CONSTANT) && adjust.variable == 0x1A && IsEvalAdjustUsableForConstantPropagation(adjust.operation)) { - /* Reduce constant operation on previous constant */ - replace_with_constant_load(EvaluateDeterministicSpriteGroupAdjust(group->size, adjust, nullptr, state.current_constant, UINT_MAX)); - } else if ((prev_inference & VA2AIF_HAVE_CONSTANT) && state.current_constant == 0 && (adjust.adjust_flags & DSGAF_SKIP_ON_ZERO)) { - /* Remove operation which does nothing when applied to 0 */ - group->adjusts.pop_back(); - state.inference = prev_inference; - } else if ((prev_inference & VA2AIF_HAVE_CONSTANT) && IsEvalAdjustOperationOnConstantEffectiveLoad(adjust.operation, state.current_constant)) { - /* Convert operation to a load */ - DeterministicSpriteGroupAdjust current = group->adjusts.back(); - group->adjusts.pop_back(); - while (!group->adjusts.empty()) { - const DeterministicSpriteGroupAdjust &prev = group->adjusts.back(); - if (prev.variable != 0x7E && !IsEvalAdjustWithSideEffects(prev.operation)) { - /* Delete useless operation */ - group->adjusts.pop_back(); - } else { - break; - } - } - try_restore_inference_backup(); - current.operation = DSGA_OP_RST; - current.adjust_flags = DSGAF_NONE; - group->adjusts.push_back(current); - OptimiseVarAction2Adjust(state, feature, varsize, group, group->adjusts.back()); - return; - } else if (adjust.variable == 0x7E || adjust.type != DSGA_TYPE_NONE) { - /* Procedure call or complex adjustment */ - if (adjust.operation == DSGA_OP_STO) handle_unpredictable_temp_store(); - if (adjust.variable == 0x7E) { - std::bitset<256> seen_stores; - bool seen_unpredictable_store = false; - bool seen_special_store = false; - bool seen_perm_store = false; - auto handle_proc_stores = y_combinator([&](auto handle_proc_stores, const SpriteGroup *sg) -> void { - if (sg == nullptr) return; - if (sg->type == SGT_RANDOMIZED) { - const RandomizedSpriteGroup *rsg = (const RandomizedSpriteGroup*)sg; - for (const auto &group : rsg->groups) { - handle_proc_stores(group); - } - } else if (sg->type == SGT_DETERMINISTIC) { - const DeterministicSpriteGroup *dsg = (const DeterministicSpriteGroup*)sg; - for (const DeterministicSpriteGroupAdjust &adjust : dsg->adjusts) { - if (adjust.variable == 0x7E) { - handle_proc_stores(adjust.subroutine); - } - if (adjust.operation == DSGA_OP_STO) { - if (adjust.type == DSGA_TYPE_NONE && adjust.variable == 0x1A && adjust.shift_num == 0) { - /* Temp store */ - if (adjust.and_mask < 0x100) { - seen_stores.set(adjust.and_mask, true); - } else { - seen_special_store = true; - } - } else { - /* Unpredictable store */ - seen_unpredictable_store = true; - } - } - if (adjust.operation == DSGA_OP_STO_NC) { - if (adjust.divmod_val < 0x100) { - seen_stores.set(adjust.divmod_val, true); - } else { - seen_special_store = true; - } - } - if (adjust.operation == DSGA_OP_STOP) { - seen_perm_store = true; - } - } - } - }); - - auto handle_group = y_combinator([&](auto handle_group, const SpriteGroup *sg) -> void { - if (sg == nullptr) return; - if (sg->type == SGT_RANDOMIZED) { - const RandomizedSpriteGroup *rsg = (const RandomizedSpriteGroup*)sg; - for (const auto &group : rsg->groups) { - handle_group(group); - } - } else if (sg->type == SGT_DETERMINISTIC) { - VarAction2GroupVariableTracking *var_tracking = _cur.GetVarAction2GroupVariableTracking(sg, false); - if (var_tracking != nullptr) { - std::bitset<256> bits = var_tracking->in; - for (auto &it : state.temp_stores) { - bits.set(it.first, false); - } - state.GetVarTracking(group)->in |= bits; - } - if (!state.seen_procedure_call && ((const DeterministicSpriteGroup*)sg)->dsg_flags & DSGF_REQUIRES_VAR1C) { - group->dsg_flags |= DSGF_REQUIRES_VAR1C; - } - handle_proc_stores(sg); - } - }); - handle_group(adjust.subroutine); - - if (seen_unpredictable_store) { - reset_store_values(); - } else { - for (auto &it : state.temp_stores) { - if (seen_stores[it.first]) { - it.second.inference = VA2AIF_NONE; - it.second.version++; - } else { - /* See DSGA_OP_STO handler */ - if ((it.second.inference & VA2AIF_SINGLE_LOAD) && it.second.var_source.variable == 0x7D && seen_stores[it.second.var_source.parameter & 0xFF]) { - it.second.inference &= ~VA2AIF_SINGLE_LOAD; - } - if (seen_special_store && (it.second.inference & VA2AIF_SINGLE_LOAD) && it.second.var_source.variable != 0x7D) { - it.second.inference &= ~VA2AIF_SINGLE_LOAD; - } - - /* See DSGA_OP_STOP handler */ - if (seen_perm_store && (it.second.inference & VA2AIF_SINGLE_LOAD) && it.second.var_source.variable == 0x7C) { - it.second.inference &= ~VA2AIF_SINGLE_LOAD; - } - } - } - } - - state.seen_procedure_call = true; - } else if (adjust.operation == DSGA_OP_RST) { - state.inference = VA2AIF_SINGLE_LOAD; - } - if (IsConstantComparisonAdjustType(adjust.type)) { - if (adjust.operation == DSGA_OP_RST) { - state.inference |= VA2AIF_SIGNED_NON_NEGATIVE | VA2AIF_ONE_OR_ZERO; - } else if (adjust.operation == DSGA_OP_OR || adjust.operation == DSGA_OP_XOR || adjust.operation == DSGA_OP_AND) { - state.inference |= (prev_inference & (VA2AIF_SIGNED_NON_NEGATIVE | VA2AIF_ONE_OR_ZERO)); - } - if (adjust.operation == DSGA_OP_OR && (prev_inference & VA2AIF_ONE_OR_ZERO) && adjust.variable != 0x7E) { - adjust.adjust_flags |= DSGAF_SKIP_ON_LSB_SET; - } - if (adjust.operation == DSGA_OP_MUL && adjust.variable != 0x7E) { - state.inference |= VA2AIF_MUL_BOOL; - adjust.adjust_flags |= DSGAF_JUMP_INS_HINT; - group->dsg_flags |= DSGF_CHECK_INSERT_JUMP; - } - } - } else { - if (adjust.and_mask == 0 && IsEvalAdjustWithZeroRemovable(adjust.operation)) { - /* Delete useless zero operations */ - group->adjusts.pop_back(); - state.inference = prev_inference; - } else if (adjust.and_mask == 0 && IsEvalAdjustWithZeroAlwaysZero(adjust.operation)) { - /* Operation always returns 0, replace it and any useless prior operations */ - replace_with_constant_load(0); - } else { - if (adjust.variable == 0x7D && adjust.shift_num == 0 && adjust.and_mask == get_full_mask() && IsEvalAdjustOperationCommutative(adjust.operation) && group->adjusts.size() >= 2) { - DeterministicSpriteGroupAdjust &prev = group->adjusts[group->adjusts.size() - 2]; - if (group->adjusts.size() >= 3 && prev.operation == DSGA_OP_RST) { - const DeterministicSpriteGroupAdjust &prev2 = group->adjusts[group->adjusts.size() - 3]; - if (prev2.operation == DSGA_OP_STO && prev2.type == DSGA_TYPE_NONE && prev2.variable == 0x1A && - prev2.shift_num == 0 && prev2.and_mask == (adjust.parameter & 0xFF)) { - /* Convert: store, load var, commutative op on stored --> (dead) store, commutative op var */ - prev.operation = adjust.operation; - group->adjusts.pop_back(); - state.inference = non_const_var_inference & (VA2AIF_SIGNED_NON_NEGATIVE | VA2AIF_ONE_OR_ZERO | VA2AIF_MUL_BOOL); - OptimiseVarAction2Adjust(state, feature, varsize, group, group->adjusts.back()); - return; - } - } - } - switch (adjust.operation) { - case DSGA_OP_ADD: - try_merge_with_previous(); - break; - case DSGA_OP_SUB: - if (adjust.variable == 0x7D && adjust.shift_num == 0 && adjust.and_mask == 0xFFFFFFFF && group->adjusts.size() >= 2) { - DeterministicSpriteGroupAdjust &prev = group->adjusts[group->adjusts.size() - 2]; - if (group->adjusts.size() >= 3 && prev.operation == DSGA_OP_RST) { - const DeterministicSpriteGroupAdjust &prev2 = group->adjusts[group->adjusts.size() - 3]; - if (prev2.operation == DSGA_OP_STO && prev2.type == DSGA_TYPE_NONE && prev2.variable == 0x1A && - prev2.shift_num == 0 && prev2.and_mask == (adjust.parameter & 0xFF)) { - /* Convert: store, load var, subtract stored --> (dead) store, reverse subtract var */ - prev.operation = DSGA_OP_RSUB; - group->adjusts.pop_back(); - state.inference = non_const_var_inference & (VA2AIF_SIGNED_NON_NEGATIVE | VA2AIF_ONE_OR_ZERO); - OptimiseVarAction2Adjust(state, feature, varsize, group, group->adjusts.back()); - return; - } - } - } - if (adjust.variable == 0x1A && adjust.shift_num == 0 && adjust.and_mask == 1 && group->adjusts.size() >= 2) { - DeterministicSpriteGroupAdjust &prev = group->adjusts[group->adjusts.size() - 2]; - if (prev.operation == DSGA_OP_SCMP) { - state.inference |= VA2AIF_PREV_SCMP_DEC; - } - } - try_merge_with_previous(); - break; - case DSGA_OP_SMIN: - if (adjust.variable == 0x1A && adjust.shift_num == 0 && adjust.and_mask == 1 && group->adjusts.size() >= 2) { - DeterministicSpriteGroupAdjust &prev = group->adjusts[group->adjusts.size() - 2]; - if (prev.operation == DSGA_OP_SCMP) { - prev.operation = DSGA_OP_SGE; - group->adjusts.pop_back(); - state.inference = VA2AIF_SIGNED_NON_NEGATIVE | VA2AIF_ONE_OR_ZERO; - break; - } - if (group->adjusts.size() >= 3 && prev.operation == DSGA_OP_XOR && prev.type == DSGA_TYPE_NONE && prev.variable == 0x1A && - prev.shift_num == 0 && prev.and_mask == 2) { - DeterministicSpriteGroupAdjust &prev2 = group->adjusts[group->adjusts.size() - 3]; - if (prev2.operation == DSGA_OP_SCMP) { - prev2.operation = DSGA_OP_SLE; - group->adjusts.pop_back(); - group->adjusts.pop_back(); - state.inference = VA2AIF_SIGNED_NON_NEGATIVE | VA2AIF_ONE_OR_ZERO; - break; - } - } - } - if (adjust.and_mask <= 1 && (prev_inference & VA2AIF_SIGNED_NON_NEGATIVE)) state.inference = VA2AIF_SIGNED_NON_NEGATIVE | VA2AIF_ONE_OR_ZERO; - break; - case DSGA_OP_SMAX: - if (adjust.variable == 0x1A && adjust.shift_num == 0 && adjust.and_mask == 0 && group->adjusts.size() >= 2) { - DeterministicSpriteGroupAdjust &prev = group->adjusts[group->adjusts.size() - 2]; - if (group->adjusts.size() >= 3 && prev.operation == DSGA_OP_SUB && prev.type == DSGA_TYPE_NONE && prev.variable == 0x1A && - prev.shift_num == 0 && prev.and_mask == 1) { - DeterministicSpriteGroupAdjust &prev2 = group->adjusts[group->adjusts.size() - 3]; - if (prev2.operation == DSGA_OP_SCMP) { - prev2.operation = DSGA_OP_SGT; - group->adjusts.pop_back(); - group->adjusts.pop_back(); - state.inference = VA2AIF_SIGNED_NON_NEGATIVE | VA2AIF_ONE_OR_ZERO; - break; - } - } - } - break; - case DSGA_OP_UMIN: - if (adjust.and_mask == 1) { - if (prev_inference & VA2AIF_ONE_OR_ZERO) { - /* Delete useless bool -> bool conversion */ - group->adjusts.pop_back(); - state.inference = prev_inference; - break; - } else { - state.inference = VA2AIF_SIGNED_NON_NEGATIVE | VA2AIF_ONE_OR_ZERO; - if (group->adjusts.size() >= 2) { - DeterministicSpriteGroupAdjust &prev = group->adjusts[group->adjusts.size() - 2]; - if (prev.operation == DSGA_OP_RST && prev.type == DSGA_TYPE_NONE) { - prev.type = DSGA_TYPE_NEQ; - prev.add_val = 0; - group->adjusts.pop_back(); - state.inference |= VA2AIF_SINGLE_LOAD; - } - } - } - } - break; - case DSGA_OP_AND: - if ((prev_inference & VA2AIF_PREV_MASK_ADJUST) && adjust.variable == 0x1A && adjust.shift_num == 0 && group->adjusts.size() >= 2) { - /* Propagate and into immediately prior variable read */ - DeterministicSpriteGroupAdjust &prev = group->adjusts[group->adjusts.size() - 2]; - prev.and_mask &= adjust.and_mask; - add_inferences_from_mask(prev.and_mask); - state.inference |= VA2AIF_PREV_MASK_ADJUST; - group->adjusts.pop_back(); - break; - } - if (adjust.variable == 0x1A && adjust.shift_num == 0 && adjust.and_mask == 1 && group->adjusts.size() >= 2) { - DeterministicSpriteGroupAdjust &prev = group->adjusts[group->adjusts.size() - 2]; - if (prev.operation == DSGA_OP_SCMP || prev.operation == DSGA_OP_UCMP) { - prev.operation = DSGA_OP_EQ; - group->adjusts.pop_back(); - state.inference = VA2AIF_SIGNED_NON_NEGATIVE | VA2AIF_ONE_OR_ZERO; - if (group->adjusts.size() >= 2) { - DeterministicSpriteGroupAdjust &eq_adjust = group->adjusts[group->adjusts.size() - 1]; - DeterministicSpriteGroupAdjust &prev_op = group->adjusts[group->adjusts.size() - 2]; - if (eq_adjust.type == DSGA_TYPE_NONE && eq_adjust.variable == 0x1A && - prev_op.type == DSGA_TYPE_NONE && prev_op.operation == DSGA_OP_RST) { - prev_op.type = DSGA_TYPE_EQ; - prev_op.add_val = (0xFFFFFFFF >> eq_adjust.shift_num) & eq_adjust.and_mask; - group->adjusts.pop_back(); - state.inference |= VA2AIF_SINGLE_LOAD; - } - } - break; - } - if (prev_inference & VA2AIF_ONE_OR_ZERO) { - /* Current value is already one or zero, remove this */ - group->adjusts.pop_back(); - state.inference = prev_inference; - break; - } - } - if (adjust.and_mask <= 1) { - state.inference = VA2AIF_SIGNED_NON_NEGATIVE | VA2AIF_ONE_OR_ZERO; - } else if ((adjust.and_mask & get_sign_bit()) == 0) { - state.inference = VA2AIF_SIGNED_NON_NEGATIVE; - } - state.inference |= non_const_var_inference; - if ((state.inference & VA2AIF_ONE_OR_ZERO) && (prev_inference & VA2AIF_ONE_OR_ZERO)) { - adjust.adjust_flags |= DSGAF_JUMP_INS_HINT; - group->dsg_flags |= DSGF_CHECK_INSERT_JUMP; - } - try_merge_with_previous(); - break; - case DSGA_OP_OR: - if (adjust.variable == 0x1A && adjust.shift_num == 0 && adjust.and_mask == 1 && (prev_inference & VA2AIF_ONE_OR_ZERO)) { - replace_with_constant_load(1); - break; - } - if (adjust.and_mask <= 1) state.inference = prev_inference & (VA2AIF_SIGNED_NON_NEGATIVE | VA2AIF_ONE_OR_ZERO); - state.inference |= prev_inference & (VA2AIF_SIGNED_NON_NEGATIVE | VA2AIF_ONE_OR_ZERO) & non_const_var_inference; - if ((non_const_var_inference & VA2AIF_ONE_OR_ZERO) || (adjust.and_mask <= 1)) { - adjust.adjust_flags |= DSGAF_SKIP_ON_LSB_SET; - if (prev_inference & VA2AIF_ONE_OR_ZERO) { - adjust.adjust_flags |= DSGAF_JUMP_INS_HINT; - group->dsg_flags |= DSGF_CHECK_INSERT_JUMP; - } - } - try_merge_with_previous(); - break; - case DSGA_OP_XOR: - if (adjust.variable == 0x1A && adjust.shift_num == 0 && group->adjusts.size() >= 2) { - DeterministicSpriteGroupAdjust &prev = group->adjusts[group->adjusts.size() - 2]; - if (adjust.and_mask == 1) { - if (IsEvalAdjustOperationRelationalComparison(prev.operation)) { - prev.operation = InvertEvalAdjustRelationalComparisonOperation(prev.operation); - group->adjusts.pop_back(); - state.inference = VA2AIF_SIGNED_NON_NEGATIVE | VA2AIF_ONE_OR_ZERO; - break; - } - if (prev.operation == DSGA_OP_UMIN && prev.type == DSGA_TYPE_NONE && prev.variable == 0x1A && prev.shift_num == 0 && prev.and_mask == 1) { - prev.operation = DSGA_OP_TERNARY; - prev.adjust_flags = DSGAF_NONE; - prev.and_mask = 0; - prev.add_val = 1; - group->adjusts.pop_back(); - state.inference = VA2AIF_PREV_TERNARY; - break; - } - if (prev.operation == DSGA_OP_RST && IsConstantComparisonAdjustType(prev.type)) { - prev.type = InvertConstantComparisonAdjustType(prev.type); - group->adjusts.pop_back(); - state.inference = VA2AIF_SIGNED_NON_NEGATIVE | VA2AIF_ONE_OR_ZERO | VA2AIF_SINGLE_LOAD; - break; - } - if (prev.operation == DSGA_OP_OR && (IsConstantComparisonAdjustType(prev.type) || (prev.type == DSGA_TYPE_NONE && (prev.adjust_flags & DSGAF_SKIP_ON_LSB_SET))) && group->adjusts.size() >= 3) { - DeterministicSpriteGroupAdjust &prev2 = group->adjusts[group->adjusts.size() - 3]; - bool found = false; - if (IsEvalAdjustOperationRelationalComparison(prev2.operation)) { - prev2.operation = InvertEvalAdjustRelationalComparisonOperation(prev2.operation); - found = true; - } else if (prev2.operation == DSGA_OP_RST && IsConstantComparisonAdjustType(prev2.type)) { - prev2.type = InvertConstantComparisonAdjustType(prev2.type); - found = true; - } - if (found) { - if (prev.type == DSGA_TYPE_NONE) { - prev.type = DSGA_TYPE_EQ; - prev.add_val = 0; - } else { - prev.type = InvertConstantComparisonAdjustType(prev.type); - } - prev.operation = DSGA_OP_AND; - prev.adjust_flags = DSGAF_SKIP_ON_ZERO; - group->adjusts.pop_back(); - state.inference = VA2AIF_SIGNED_NON_NEGATIVE | VA2AIF_ONE_OR_ZERO; - break; - } - } - } - if (prev.operation == DSGA_OP_OR && prev.type == DSGA_TYPE_NONE && prev.variable == 0x1A && prev.shift_num == 0 && prev.and_mask == adjust.and_mask) { - prev.operation = DSGA_OP_AND; - prev.and_mask = ~prev.and_mask; - prev.adjust_flags = DSGAF_NONE; - group->adjusts.pop_back(); - break; - } - } - if (adjust.and_mask <= 1) state.inference = prev_inference & (VA2AIF_SIGNED_NON_NEGATIVE | VA2AIF_ONE_OR_ZERO); - state.inference |= prev_inference & (VA2AIF_SIGNED_NON_NEGATIVE | VA2AIF_ONE_OR_ZERO) & non_const_var_inference; - if (adjust.variable == 0x1A && adjust.shift_num == 0 && adjust.and_mask == 1) { - /* Single load tracking can handle bool inverts */ - state.inference |= (prev_inference & VA2AIF_SINGLE_LOAD); - } - try_merge_with_previous(); - break; - case DSGA_OP_MUL: { - if ((prev_inference & VA2AIF_ONE_OR_ZERO) && adjust.variable == 0x1A && adjust.shift_num == 0 && group->adjusts.size() >= 2) { - /* Found a ternary operator */ - adjust.operation = DSGA_OP_TERNARY; - adjust.adjust_flags = DSGAF_NONE; - while (group->adjusts.size() > 1) { - /* Merge with previous if applicable */ - const DeterministicSpriteGroupAdjust &prev = group->adjusts[group->adjusts.size() - 2]; - if (prev.type == DSGA_TYPE_NONE && prev.variable == 0x1A && prev.shift_num == 0 && prev.and_mask == 1) { - if (prev.operation == DSGA_OP_XOR) { - DeterministicSpriteGroupAdjust current = group->adjusts.back(); - group->adjusts.pop_back(); - group->adjusts.pop_back(); - std::swap(current.and_mask, current.add_val); - group->adjusts.push_back(current); - continue; - } else if (prev.operation == DSGA_OP_SMIN || prev.operation == DSGA_OP_UMIN) { - DeterministicSpriteGroupAdjust current = group->adjusts.back(); - group->adjusts.pop_back(); - group->adjusts.pop_back(); - group->adjusts.push_back(current); - } - } - break; - } - if (group->adjusts.size() > 1) { - /* Remove redundant comparison with 0 if applicable */ - const DeterministicSpriteGroupAdjust &prev = group->adjusts[group->adjusts.size() - 2]; - if (prev.type == DSGA_TYPE_NONE && prev.operation == DSGA_OP_EQ && prev.variable == 0x1A && prev.shift_num == 0 && prev.and_mask == 0) { - DeterministicSpriteGroupAdjust current = group->adjusts.back(); - group->adjusts.pop_back(); - group->adjusts.pop_back(); - std::swap(current.and_mask, current.add_val); - group->adjusts.push_back(current); - } - } - state.inference = VA2AIF_PREV_TERNARY; - break; - } - if ((prev_inference & VA2AIF_PREV_SCMP_DEC) && group->adjusts.size() >= 4 && adjust.variable == 0x7D && adjust.shift_num == 0 && adjust.and_mask == 0xFFFFFFFF) { - const DeterministicSpriteGroupAdjust &adj1 = group->adjusts[group->adjusts.size() - 4]; - const DeterministicSpriteGroupAdjust &adj2 = group->adjusts[group->adjusts.size() - 3]; - const DeterministicSpriteGroupAdjust &adj3 = group->adjusts[group->adjusts.size() - 2]; - auto is_expected_op = [](const DeterministicSpriteGroupAdjust &adj, DeterministicSpriteGroupAdjustOperation op, uint32 value) -> bool { - return adj.operation == op && adj.type == DSGA_TYPE_NONE && adj.variable == 0x1A && adj.shift_num == 0 && adj.and_mask == value; - }; - if (is_expected_op(adj1, DSGA_OP_STO, (adjust.parameter & 0xFF)) && - is_expected_op(adj2, DSGA_OP_SCMP, 0) && - is_expected_op(adj3, DSGA_OP_SUB, 1)) { - group->adjusts.pop_back(); - group->adjusts.pop_back(); - group->adjusts.back().operation = DSGA_OP_ABS; - state.inference |= VA2AIF_SIGNED_NON_NEGATIVE; - break; - } - } - uint32 sign_bit = (1 << ((varsize * 8) - 1)); - if ((prev_inference & VA2AIF_PREV_MASK_ADJUST) && (prev_inference & VA2AIF_SIGNED_NON_NEGATIVE) && adjust.variable == 0x1A && adjust.shift_num == 0 && (adjust.and_mask & sign_bit) == 0) { - /* Determine whether the result will be always non-negative */ - if (((uint64)group->adjusts[group->adjusts.size() - 2].and_mask) * ((uint64)adjust.and_mask) < ((uint64)sign_bit)) { - state.inference |= VA2AIF_SIGNED_NON_NEGATIVE; - } - } - if ((prev_inference & VA2AIF_ONE_OR_ZERO) || (non_const_var_inference & VA2AIF_ONE_OR_ZERO)) { - state.inference |= VA2AIF_MUL_BOOL; - } - if (non_const_var_inference & VA2AIF_ONE_OR_ZERO) { - adjust.adjust_flags |= DSGAF_JUMP_INS_HINT; - group->dsg_flags |= DSGF_CHECK_INSERT_JUMP; - } - break; - } - case DSGA_OP_SCMP: - case DSGA_OP_UCMP: - state.inference = VA2AIF_SIGNED_NON_NEGATIVE; - break; - case DSGA_OP_STOP: - state.inference = prev_inference & (~VA2AIF_PREV_MASK); - break; - case DSGA_OP_STO: - state.inference = prev_inference & (~VA2AIF_PREV_MASK); - if (adjust.variable == 0x1A && adjust.shift_num == 0) { - state.inference |= VA2AIF_PREV_STORE_TMP; - if (adjust.and_mask < 0x100) { - for (auto &it : state.temp_stores) { - /* Check if some other variable is marked as a copy of the one we are overwriting */ - if ((it.second.inference & VA2AIF_SINGLE_LOAD) && it.second.var_source.variable == 0x7D && (it.second.var_source.parameter & 0xFF) == adjust.and_mask) { - it.second.inference &= ~VA2AIF_SINGLE_LOAD; - } - } - VarAction2TempStoreInference &store = state.temp_stores[adjust.and_mask]; - if (store.version == 0) { - /* New store */ - store.version = state.default_variable_version + 1; - } else { - /* Updating previous store */ - store.version++; - } - store.inference = prev_inference & (~VA2AIF_PREV_MASK); - store.store_constant = state.current_constant; - - bool invert_store = false; - const DeterministicSpriteGroupAdjust *prev_store = get_prev_single_store((prev_inference & VA2AIF_ONE_OR_ZERO) ? &invert_store : nullptr); - if (prev_store != nullptr) { - /* This store is a clone of the previous store, or inverted clone of the previous store (bool) */ - store.inference |= VA2AIF_SINGLE_LOAD; - store.var_source.type = (invert_store ? DSGA_TYPE_EQ : DSGA_TYPE_NONE); - store.var_source.variable = 0x7D; - store.var_source.shift_num = 0; - store.var_source.parameter = prev_store->and_mask | (state.temp_stores[prev_store->and_mask].version << 8); - store.var_source.and_mask = 0xFFFFFFFF; - store.var_source.add_val = 0; - store.var_source.divmod_val = 0; - break; - } - - if (prev_inference & VA2AIF_SINGLE_LOAD) { - bool invert = false; - const DeterministicSpriteGroupAdjust *prev_load = get_prev_single_load(&invert); - if (prev_load != nullptr && (!invert || IsConstantComparisonAdjustType(prev_load->type))) { - store.inference |= VA2AIF_SINGLE_LOAD; - store.var_source.type = prev_load->type; - if (invert) store.var_source.type = InvertConstantComparisonAdjustType(store.var_source.type); - store.var_source.variable = prev_load->variable; - store.var_source.shift_num = prev_load->shift_num; - store.var_source.parameter = prev_load->parameter; - store.var_source.and_mask = prev_load->and_mask; - store.var_source.add_val = prev_load->add_val; - store.var_source.divmod_val = prev_load->divmod_val; - break; - } - } - } else { - /* Store to special register, this can change the result of future variable loads for some variables. - * Assume all variables except temp storage for now. - */ - for (auto &it : state.temp_stores) { - if (it.second.inference & VA2AIF_SINGLE_LOAD && it.second.var_source.variable != 0x7D) { - it.second.inference &= ~VA2AIF_SINGLE_LOAD; - } - } - } - } else { - handle_unpredictable_temp_store(); - } - break; - case DSGA_OP_RST: - if ((prev_inference & VA2AIF_PREV_STORE_TMP) && adjust.variable == 0x7D && adjust.shift_num == 0 && adjust.and_mask == get_full_mask() && group->adjusts.size() >= 2) { - const DeterministicSpriteGroupAdjust &prev = group->adjusts[group->adjusts.size() - 2]; - if (prev.type == DSGA_TYPE_NONE && prev.operation == DSGA_OP_STO && prev.variable == 0x1A && prev.shift_num == 0 && prev.and_mask == (adjust.parameter & 0xFF)) { - /* Redundant load from temp store after store to temp store */ - group->adjusts.pop_back(); - state.inference = prev_inference; - break; - } - } - add_inferences_from_mask(adjust.and_mask); - state.inference |= VA2AIF_PREV_MASK_ADJUST | VA2AIF_SINGLE_LOAD; - if (adjust.variable == 0x1A || adjust.and_mask == 0) { - replace_with_constant_load(EvaluateDeterministicSpriteGroupAdjust(group->size, adjust, nullptr, 0, UINT_MAX)); - } - break; - case DSGA_OP_SHR: - case DSGA_OP_SAR: - if ((adjust.operation == DSGA_OP_SHR || (prev_inference & VA2AIF_SIGNED_NON_NEGATIVE)) && - ((prev_inference & VA2AIF_PREV_MASK_ADJUST) && adjust.variable == 0x1A && adjust.shift_num == 0 && group->adjusts.size() >= 2)) { - /* Propagate shift right into immediately prior variable read */ - DeterministicSpriteGroupAdjust &prev = group->adjusts[group->adjusts.size() - 2]; - if (prev.shift_num + adjust.and_mask < 32) { - prev.shift_num += adjust.and_mask; - prev.and_mask >>= adjust.and_mask; - add_inferences_from_mask(prev.and_mask); - state.inference |= VA2AIF_PREV_MASK_ADJUST; - group->adjusts.pop_back(); - break; - } - } - break; - case DSGA_OP_SDIV: - if ((prev_inference & VA2AIF_SIGNED_NON_NEGATIVE) && adjust.variable == 0x1A && adjust.shift_num == 0 && HasExactlyOneBit(adjust.and_mask)) { - uint shift_count = FindFirstBit(adjust.and_mask); - if (group->adjusts.size() >= 3 && shift_count == 16 && varsize == 4 && (feature == GSF_TRAINS || feature == GSF_ROADVEHICLES || feature == GSF_SHIPS)) { - const DeterministicSpriteGroupAdjust &prev = group->adjusts[group->adjusts.size() - 2]; - DeterministicSpriteGroupAdjust &prev2 = group->adjusts[group->adjusts.size() - 3]; - if (prev.operation == DSGA_OP_MUL && prev.type == DSGA_TYPE_NONE && prev.variable == 0x1A && prev.shift_num == 0 && prev.and_mask <= 0xFFFF && - (prev2.operation == DSGA_OP_RST || group->adjusts.size() == 3) && prev2.type == DSGA_TYPE_NONE && prev2.variable == 0xB4 && prev2.shift_num == 0 && prev2.and_mask == 0xFFFF) { - /* Replace with scaled current speed */ - prev2.variable = A2VRI_VEHICLE_CURRENT_SPEED_SCALED; - prev2.parameter = prev.and_mask; - group->adjusts.pop_back(); - group->adjusts.pop_back(); - state.inference = VA2AIF_SIGNED_NON_NEGATIVE; - break; - } - } - /* Convert to a shift */ - adjust.operation = DSGA_OP_SHR; - adjust.and_mask = shift_count; - state.inference = VA2AIF_SIGNED_NON_NEGATIVE; - } - break; - default: - break; - } - } - } -} - -static bool CheckDeterministicSpriteGroupOutputVarBits(const DeterministicSpriteGroup *group, std::bitset<256> bits, bool quick_exit); - -static void RecursiveDisallowDSEForProcedure(const SpriteGroup *group) -{ - if (group == nullptr) return; - - if (group->type == SGT_RANDOMIZED) { - const RandomizedSpriteGroup *rsg = (const RandomizedSpriteGroup*)group; - for (const auto &g : rsg->groups) { - RecursiveDisallowDSEForProcedure(g); - } - return; - } - - if (group->type != SGT_DETERMINISTIC) return; - - const DeterministicSpriteGroup *sub = static_cast(group); - if (sub->dsg_flags & DSGF_DSE_RECURSIVE_DISABLE) return; - const_cast(sub)->dsg_flags |= (DSGF_NO_DSE | DSGF_DSE_RECURSIVE_DISABLE); - for (const DeterministicSpriteGroupAdjust &adjust : sub->adjusts) { - if (adjust.variable == 0x7E) RecursiveDisallowDSEForProcedure(adjust.subroutine); - } - if (!sub->calculated_result) { - RecursiveDisallowDSEForProcedure(sub->default_group); - for (const auto &range : sub->ranges) { - RecursiveDisallowDSEForProcedure(range.group); - } - } -} - -static bool CheckDeterministicSpriteGroupOutputVarBits(const DeterministicSpriteGroup *group, std::bitset<256> bits, bool quick_exit) -{ - bool dse = false; - for (int i = (int)group->adjusts.size() - 1; i >= 0; i--) { - const DeterministicSpriteGroupAdjust &adjust = group->adjusts[i]; - if (adjust.operation == DSGA_OP_STO) { - if (adjust.type == DSGA_TYPE_NONE && adjust.variable == 0x1A && adjust.shift_num == 0 && adjust.and_mask < 0x100) { - /* Predictable store */ - if (!bits[adjust.and_mask]) { - /* Possibly redundant store */ - dse = true; - if (quick_exit) break; - } - bits.set(adjust.and_mask, false); - } - } - if (adjust.operation == DSGA_OP_STO_NC && adjust.divmod_val < 0x100) { - if (!bits[adjust.divmod_val]) { - /* Possibly redundant store */ - dse = true; - if (quick_exit) break; - } - bits.set(adjust.divmod_val, false); - } - if (adjust.variable == 0x7B && adjust.parameter == 0x7D) { - /* Unpredictable load */ - bits.set(); - } - if (adjust.variable == 0x7D && adjust.parameter) { - bits.set(adjust.parameter & 0xFF, true); - } - if (adjust.variable == 0x7E) { - /* procedure call */ - auto handle_group = y_combinator([&](auto handle_group, const SpriteGroup *sg) -> void { - if (sg == nullptr) return; - if (sg->type == SGT_RANDOMIZED) { - const RandomizedSpriteGroup *rsg = (const RandomizedSpriteGroup*)sg; - for (const auto &group : rsg->groups) { - handle_group(group); - } - } else if (sg->type == SGT_DETERMINISTIC) { - const DeterministicSpriteGroup *sub = static_cast(sg); - VarAction2GroupVariableTracking *var_tracking = _cur.GetVarAction2GroupVariableTracking(sub, true); - auto procedure_dse_ok = [&]() -> bool { - if (sub->calculated_result) return true; - - if (sub->default_group != nullptr && sub->default_group->type != SGT_CALLBACK) return false; - for (const auto &range : sub->ranges) { - if (range.group != nullptr && range.group->type != SGT_CALLBACK) return false; - } - return true; - }; - if (procedure_dse_ok()) { - std::bitset<256> new_proc_call_out = bits | var_tracking->proc_call_out; - if (new_proc_call_out != var_tracking->proc_call_out) { - var_tracking->proc_call_out = new_proc_call_out; - std::bitset<256> old_total = var_tracking->out | var_tracking->proc_call_out; - std::bitset<256> new_total = var_tracking->out | new_proc_call_out; - if (old_total != new_total) { - CheckDeterministicSpriteGroupOutputVarBits(sub, new_total, false); - } - } - } else { - RecursiveDisallowDSEForProcedure(sub); - } - bits |= var_tracking->in; - } - }); - handle_group(adjust.subroutine); - } - } - return dse; -} - -static bool OptimiseVarAction2DeterministicSpriteGroupExpensiveVarsInner(DeterministicSpriteGroup *group, VarAction2GroupVariableTracking *var_tracking) -{ - btree::btree_map seen_expensive_variables; - std::bitset<256> usable_vars; - if (var_tracking != nullptr) { - usable_vars = ~(var_tracking->out | var_tracking->proc_call_out); - } else { - usable_vars.set(); - } - uint16 target_var = 0; - uint32 target_param = 0; - auto found_target = [&]() -> bool { - for (auto &iter : seen_expensive_variables) { - if (iter.second >= 2) { - target_var = iter.first >> 32; - target_param = iter.first & 0xFFFFFFFF; - return true; - } - } - return false; - }; - auto do_replacements = [&](int start, int end) { - std::bitset<256> mask(UINT64_MAX); - std::bitset<256> cur = usable_vars; - uint8 bit = 0; - while (true) { - uint64 t = (cur & mask).to_ullong(); - if (t != 0) { - bit += FindFirstBit(t); - break; - } - cur >>= 64; - bit += 64; - } - int insert_pos = start; - uint32 and_mask = 0; - uint condition_depth = 0; - bool seen_first = false; - int last_unused_jump = -1; - for (int j = end; j >= start; j--) { - DeterministicSpriteGroupAdjust &adjust = group->adjusts[j]; - if (seen_first && IsEvalAdjustJumpOperation(adjust.operation)) { - if (condition_depth > 0) { - /* Do not insert the STO_NC inside a conditional block when it is also needed outside the block */ - condition_depth--; - insert_pos = j; - } else { - last_unused_jump = j; - } - } - if (seen_first && adjust.adjust_flags & DSGAF_END_BLOCK) condition_depth += adjust.jump; - if (adjust.variable == target_var && adjust.parameter == target_param) { - and_mask |= adjust.and_mask << adjust.shift_num; - adjust.variable = 0x7D; - adjust.parameter = bit; - insert_pos = j; - seen_first = true; - } - } - DeterministicSpriteGroupAdjust load = {}; - load.operation = DSGA_OP_STO_NC; - load.type = DSGA_TYPE_NONE; - load.variable = target_var; - load.shift_num = 0; - load.parameter = target_param; - load.and_mask = and_mask; - load.divmod_val = bit; - if (group->adjusts[insert_pos].adjust_flags & DSGAF_SKIP_ON_ZERO) { - for (int j = insert_pos + 1; j <= end; j++) { - if (group->adjusts[j].adjust_flags & DSGAF_SKIP_ON_ZERO) continue; - if (group->adjusts[j].operation == DSGA_OP_JZ_LV && last_unused_jump == j) { - /* The variable is never actually read if last_value is 0 at this point */ - load.adjust_flags |= DSGAF_SKIP_ON_ZERO; - } - break; - } - } - group->adjusts.insert(group->adjusts.begin() + insert_pos, load); - }; - - int i = (int)group->adjusts.size() - 1; - int end = i; - while (i >= 0) { - const DeterministicSpriteGroupAdjust &adjust = group->adjusts[i]; - if (adjust.operation == DSGA_OP_STO && (adjust.type != DSGA_TYPE_NONE || adjust.variable != 0x1A || adjust.shift_num != 0)) return false; - if (adjust.variable == 0x7B && adjust.parameter == 0x7D) return false; - if (adjust.operation == DSGA_OP_STO_NC && adjust.divmod_val < 0x100) { - usable_vars.set(adjust.divmod_val, false); - } - if (adjust.operation == DSGA_OP_STO && adjust.and_mask < 0x100) { - usable_vars.set(adjust.and_mask, false); - } else if (adjust.variable == 0x7D) { - if (adjust.parameter < 0x100) usable_vars.set(adjust.parameter, false); - } else if (IsExpensiveVariable(adjust.variable, group->feature, group->var_scope)) { - seen_expensive_variables[(((uint64)adjust.variable) << 32) | adjust.parameter]++; - } - if (adjust.variable == 0x7E || (adjust.operation == DSGA_OP_STO && adjust.and_mask >= 0x100) || (adjust.operation == DSGA_OP_STO_NC && adjust.divmod_val >= 0x100)) { - /* Can't cross this barrier, stop here */ - if (usable_vars.none()) return false; - if (found_target()) { - do_replacements(i + 1, end); - return true; - } - seen_expensive_variables.clear(); - end = i - 1; - if (adjust.variable == 0x7E) { - auto handle_group = y_combinator([&](auto handle_group, const SpriteGroup *sg) -> void { - if (sg != nullptr && sg->type == SGT_DETERMINISTIC) { - VarAction2GroupVariableTracking *var_tracking = _cur.GetVarAction2GroupVariableTracking(sg, false); - if (var_tracking != nullptr) usable_vars &= ~var_tracking->in; - } - if (sg != nullptr && sg->type == SGT_RANDOMIZED) { - const RandomizedSpriteGroup *rsg = (const RandomizedSpriteGroup*)sg; - for (const auto &group : rsg->groups) { - handle_group(group); - } - } - }); - handle_group(adjust.subroutine); - } - } - i--; - } - if (usable_vars.none()) return false; - if (found_target()) { - do_replacements(0, end); - return true; - } - - return false; -} - -static void OptimiseVarAction2DeterministicSpriteGroupExpensiveVars(DeterministicSpriteGroup *group) -{ - VarAction2GroupVariableTracking *var_tracking = _cur.GetVarAction2GroupVariableTracking(group, false); - while (OptimiseVarAction2DeterministicSpriteGroupExpensiveVarsInner(group, var_tracking)) {} -} - -static void OptimiseVarAction2DeterministicSpriteGroupSimplifyStores(DeterministicSpriteGroup *group) -{ - if (HasGrfOptimiserFlag(NGOF_NO_OPT_VARACT2_SIMPLIFY_STORES)) return; - - int src_adjust = -1; - bool is_constant = false; - for (size_t i = 0; i < group->adjusts.size(); i++) { - auto acceptable_store = [](const DeterministicSpriteGroupAdjust &adjust) -> bool { - return adjust.type == DSGA_TYPE_NONE && adjust.operation == DSGA_OP_STO && adjust.variable == 0x1A && adjust.shift_num == 0; - }; - - DeterministicSpriteGroupAdjust &adjust = group->adjusts[i]; - - if ((adjust.type == DSGA_TYPE_NONE || IsConstantComparisonAdjustType(adjust.type)) && adjust.operation == DSGA_OP_RST && adjust.variable != 0x7E) { - src_adjust = (int)i; - is_constant = (adjust.variable == 0x1A); - continue; - } - - if (src_adjust >= 0 && acceptable_store(adjust)) { - bool ok = false; - bool more_stores = false; - size_t j = i; - while (true) { - j++; - if (j == group->adjusts.size()) { - ok = !group->calculated_result && group->ranges.empty(); - break; - } - const DeterministicSpriteGroupAdjust &next = group->adjusts[j]; - if (next.operation == DSGA_OP_RST) { - ok = (next.variable != 0x7B); - break; - } - if (is_constant && next.operation == DSGA_OP_STO_NC) { - continue; - } - if (is_constant && acceptable_store(next)) { - more_stores = true; - continue; - } - break; - } - if (ok) { - const DeterministicSpriteGroupAdjust &src = group->adjusts[src_adjust]; - adjust.operation = DSGA_OP_STO_NC; - adjust.type = src.type; - adjust.adjust_flags = DSGAF_NONE; - adjust.divmod_val = adjust.and_mask; - adjust.add_val = src.add_val; - adjust.variable = src.variable; - adjust.parameter = src.parameter; - adjust.shift_num = src.shift_num; - adjust.and_mask = src.and_mask; - if (more_stores) { - continue; - } - group->adjusts.erase(group->adjusts.begin() + src_adjust); - i--; - } - } - - src_adjust = -1; - } -} - -static void OptimiseVarAction2DeterministicSpriteGroupAdjustOrdering(DeterministicSpriteGroup *group) -{ - if (HasGrfOptimiserFlag(NGOF_NO_OPT_VARACT2_ADJUST_ORDERING)) return; - - auto acceptable_variable = [](uint16 variable) -> bool { - return variable != 0x7E && variable != 0x7B; - }; - - auto get_variable_expense = [&](uint16 variable) -> int { - if (variable == 0x1A) return -15; - if (IsVariableVeryCheap(variable, group->feature)) return -10; - if (variable == 0x7D || variable == 0x7C) return -5; - if (IsExpensiveVariable(variable, group->feature, group->var_scope)) return 10; - return 0; - }; - - for (size_t i = 0; i + 1 < group->adjusts.size(); i++) { - DeterministicSpriteGroupAdjust &adjust = group->adjusts[i]; - - if (adjust.operation == DSGA_OP_RST && acceptable_variable(adjust.variable)) { - DeterministicSpriteGroupAdjustOperation operation = group->adjusts[i + 1].operation; - const size_t start = i; - size_t end = i; - if (IsEvalAdjustWithZeroLastValueAlwaysZero(operation) && IsEvalAdjustOperationCommutative(operation)) { - for (size_t j = start + 1; j < group->adjusts.size(); j++) { - DeterministicSpriteGroupAdjust &next = group->adjusts[j]; - if (next.operation == operation && acceptable_variable(next.variable) && (next.adjust_flags & DSGAF_SKIP_ON_ZERO)) { - end = j; - } else { - break; - } - } - } - if (end != start) { - adjust.operation = operation; - adjust.adjust_flags |= DSGAF_SKIP_ON_ZERO; - - /* Sort so that the least expensive comes first */ - std::stable_sort(group->adjusts.begin() + start, group->adjusts.begin() + end + 1, [&](const DeterministicSpriteGroupAdjust &a, const DeterministicSpriteGroupAdjust &b) -> bool { - return get_variable_expense(a.variable) < get_variable_expense(b.variable); - }); - - adjust.operation = DSGA_OP_RST; - adjust.adjust_flags &= ~(DSGAF_SKIP_ON_ZERO | DSGAF_JUMP_INS_HINT); - } - } - } -} - -static bool TryCombineTempStoreLoadWithStoreSourceAdjust(DeterministicSpriteGroupAdjust &target, const DeterministicSpriteGroupAdjust *var_src, bool inverted) -{ - DeterministicSpriteGroupAdjustType var_src_type = var_src->type; - if (inverted) { - switch (var_src_type) { - case DSGA_TYPE_EQ: - var_src_type = DSGA_TYPE_NEQ; - break; - case DSGA_TYPE_NEQ: - var_src_type = DSGA_TYPE_EQ; - break; - default: - /* Don't try to handle this case */ - return false; - } - } - if (target.type == DSGA_TYPE_NONE && target.shift_num == 0 && (target.and_mask == 0xFFFFFFFF || (IsConstantComparisonAdjustType(var_src_type) && (target.and_mask & 1)))) { - target.type = var_src_type; - target.variable = var_src->variable; - target.shift_num = var_src->shift_num; - target.parameter = var_src->parameter; - target.and_mask = var_src->and_mask; - target.add_val = var_src->add_val; - target.divmod_val = var_src->divmod_val; - return true; - } else if (IsConstantComparisonAdjustType(target.type) && target.shift_num == 0 && (target.and_mask & 1) && target.add_val == 0 && - IsConstantComparisonAdjustType(var_src_type)) { - /* DSGA_TYPE_EQ/NEQ on target are OK if add_val is 0 because this is a boolean invert/convert of the incoming DSGA_TYPE_EQ/NEQ */ - if (target.type == DSGA_TYPE_EQ) { - target.type = InvertConstantComparisonAdjustType(var_src_type); - } else { - target.type = var_src_type; - } - target.variable = var_src->variable; - target.shift_num = var_src->shift_num; - target.parameter = var_src->parameter; - target.and_mask = var_src->and_mask; - target.add_val = var_src->add_val; - target.divmod_val = var_src->divmod_val; - return true; - } else if (var_src_type == DSGA_TYPE_NONE && (target.shift_num + var_src->shift_num) < 32) { - target.variable = var_src->variable; - target.parameter = var_src->parameter; - target.and_mask &= var_src->and_mask >> target.shift_num; - target.shift_num += var_src->shift_num; - return true; - } - return false; -} - -static VarAction2ProcedureAnnotation *OptimiseVarAction2GetFilledProcedureAnnotation(const SpriteGroup *group) -{ - VarAction2ProcedureAnnotation *anno; - bool is_new; - std::tie(anno, is_new) = _cur.GetVarAction2ProcedureAnnotation(group); - if (is_new) { - auto handle_group_contents = y_combinator([&](auto handle_group_contents, const SpriteGroup *sg) -> void { - if (sg == nullptr || anno->unskippable) return; - if (sg->type == SGT_RANDOMIZED) { - const RandomizedSpriteGroup *rsg = (const RandomizedSpriteGroup*)sg; - for (const auto &group : rsg->groups) { - handle_group_contents(group); - } - - /* Don't try to skip over procedure calls to randomised groups */ - anno->unskippable = true; - } else if (sg->type == SGT_DETERMINISTIC) { - const DeterministicSpriteGroup *dsg = static_cast(sg); - if (dsg->dsg_flags & DSGF_DSE_RECURSIVE_DISABLE) { - anno->unskippable = true; - return; - } - - for (const DeterministicSpriteGroupAdjust &adjust : dsg->adjusts) { - /* Don't try to skip over: unpredictable or special stores, procedure calls, permanent stores, or another jump */ - if (adjust.operation == DSGA_OP_STO && (adjust.type != DSGA_TYPE_NONE || adjust.variable != 0x1A || adjust.shift_num != 0 || adjust.and_mask >= 0x100)) { - anno->unskippable = true; - return; - } - if (adjust.operation == DSGA_OP_STO_NC && adjust.divmod_val >= 0x100) { - if (adjust.divmod_val < 0x110 && adjust.type == DSGA_TYPE_NONE && adjust.variable == 0x1A && adjust.shift_num == 0) { - /* Storing a constant */ - anno->special_register_values[adjust.divmod_val - 0x100] = adjust.and_mask; - SetBit(anno->special_register_mask, adjust.divmod_val - 0x100); - } else { - anno->unskippable = true; - } - return; - } - if (adjust.operation == DSGA_OP_STOP) { - anno->unskippable = true; - return; - } - if (adjust.variable == 0x7E) { - handle_group_contents(adjust.subroutine); - } - - if (adjust.operation == DSGA_OP_STO) anno->stores.set(adjust.and_mask, true); - if (adjust.operation == DSGA_OP_STO_NC) anno->stores.set(adjust.divmod_val, true); - } - } - }); - handle_group_contents(group); - } - return anno; -} - -static uint OptimiseVarAction2InsertSpecialStoreOps(DeterministicSpriteGroup *group, uint offset, uint32 values[16], uint16 mask) -{ - uint added = 0; - for (uint8 bit : SetBitIterator(mask)) { - bool skip = false; - for (size_t i = offset; i < group->adjusts.size(); i++) { - const DeterministicSpriteGroupAdjust &next = group->adjusts[i]; - if (next.operation == DSGA_OP_STO_NC && next.divmod_val == 0x100u + bit) { - skip = true; - break; - } - if (next.operation == DSGA_OP_STO && next.variable == 0x1A && next.type == DSGA_TYPE_NONE && next.shift_num == 0 && next.and_mask == 0x100u + bit) { - skip = true; - break; - } - if (next.variable == 0x7D && next.parameter == 0x100u + bit) break; - if (next.variable >= 0x40 && next.variable != 0x7D && next.variable != 0x7C) break; // crude whitelist of variables which will never read special registers - } - if (skip) continue; - DeterministicSpriteGroupAdjust store = {}; - store.operation = DSGA_OP_STO_NC; - store.variable = 0x1A; - store.type = DSGA_TYPE_NONE; - store.shift_num = 0; - store.and_mask = values[bit]; - store.divmod_val = 0x100 + bit; - group->adjusts.insert(group->adjusts.begin() + offset + added, store); - added++; - } - return added; -} - -struct VarAction2ProcedureCallVarReadAnnotation { - const SpriteGroup *subroutine; - VarAction2ProcedureAnnotation *anno; - std::bitset<256> relevant_stores; - std::bitset<256> last_reads; - bool unskippable; -}; -static std::vector _varaction2_proc_call_var_read_annotations; - -static void OptimiseVarAction2DeterministicSpriteGroupPopulateLastVarReadAnnotations(DeterministicSpriteGroup *group, VarAction2GroupVariableTracking *var_tracking) -{ - std::bitset<256> bits; - if (var_tracking != nullptr) bits = (var_tracking->out | var_tracking->proc_call_out); - bool need_var1C = false; - - for (int i = (int)group->adjusts.size() - 1; i >= 0; i--) { - DeterministicSpriteGroupAdjust &adjust = group->adjusts[i]; - - if (adjust.operation == DSGA_OP_STO) { - if (adjust.type == DSGA_TYPE_NONE && adjust.variable == 0x1A && adjust.shift_num == 0 && adjust.and_mask < 0x100) { - /* Predictable store */ - bits.set(adjust.and_mask, false); - } - } - if (adjust.variable == 0x7B && adjust.parameter == 0x7D) { - /* Unpredictable load */ - bits.set(); - } - if (adjust.variable == 0x7D && adjust.parameter < 0x100) { - if (!bits[adjust.parameter]) { - bits.set(adjust.parameter, true); - adjust.adjust_flags |= DSGAF_LAST_VAR_READ; - } - } - if (adjust.variable == 0x1C) { - need_var1C = true; - } - - if (adjust.variable == 0x7E) { - /* procedure call */ - - VarAction2ProcedureCallVarReadAnnotation &anno = _varaction2_proc_call_var_read_annotations.emplace_back(); - anno.subroutine = adjust.subroutine; - anno.anno = OptimiseVarAction2GetFilledProcedureAnnotation(adjust.subroutine); - anno.relevant_stores = anno.anno->stores & bits; - anno.unskippable = anno.anno->unskippable; - adjust.jump = (uint)_varaction2_proc_call_var_read_annotations.size() - 1; // index into _varaction2_proc_call_var_read_annotations - - if (need_var1C) { - anno.unskippable = true; - need_var1C = false; - } - - std::bitset<256> orig_bits = bits; - - auto handle_group = y_combinator([&](auto handle_group, const SpriteGroup *sg) -> void { - if (sg == nullptr) return; - if (sg->type == SGT_RANDOMIZED) { - const RandomizedSpriteGroup *rsg = (const RandomizedSpriteGroup*)sg; - for (const auto &group : rsg->groups) { - handle_group(group); - } - - /* Don't try to skip over procedure calls to randomised groups */ - anno.unskippable = true; - } else if (sg->type == SGT_DETERMINISTIC) { - const DeterministicSpriteGroup *sub = static_cast(sg); - VarAction2GroupVariableTracking *var_tracking = _cur.GetVarAction2GroupVariableTracking(sub, false); - if (var_tracking != nullptr) { - bits |= var_tracking->in; - anno.last_reads |= (var_tracking->in & ~orig_bits); - } - - if (sub->dsg_flags & DSGF_REQUIRES_VAR1C) need_var1C = true; - - if (sub->dsg_flags & DSGF_DSE_RECURSIVE_DISABLE) anno.unskippable = true; - /* No need to check default_group and ranges here as if those contain deterministic groups then DSGF_DSE_RECURSIVE_DISABLE would be set */ - } - }); - handle_group(anno.subroutine); - } - } -} - -static void OptimiseVarAction2DeterministicSpriteGroupInsertJumps(DeterministicSpriteGroup *group, VarAction2GroupVariableTracking *var_tracking) -{ - if (HasGrfOptimiserFlag(NGOF_NO_OPT_VARACT2_INSERT_JUMPS)) return; - - group->dsg_flags &= ~DSGF_CHECK_INSERT_JUMP; - - OptimiseVarAction2DeterministicSpriteGroupPopulateLastVarReadAnnotations(group, var_tracking); - - for (int i = (int)group->adjusts.size() - 1; i >= 1; i--) { - DeterministicSpriteGroupAdjust &adjust = group->adjusts[i]; - - if (adjust.adjust_flags & DSGAF_JUMP_INS_HINT) { - std::bitset<256> ok_stores; - uint32 special_stores[16]; - uint16 special_stores_mask = 0; - int j = i - 1; - while (j >= 0) { - DeterministicSpriteGroupAdjust &prev = group->adjusts[j]; - - /* Don't try to skip over: unpredictable or unusable special stores, unskippable procedure calls, permanent stores, or another jump */ - if (prev.operation == DSGA_OP_STO && (prev.type != DSGA_TYPE_NONE || prev.variable != 0x1A || prev.shift_num != 0 || prev.and_mask >= 0x100)) break; - if (prev.operation == DSGA_OP_STO_NC && prev.divmod_val >= 0x100) { - if (prev.divmod_val < 0x110 && prev.type == DSGA_TYPE_NONE && prev.variable == 0x1A && prev.shift_num == 0) { - /* Storing a constant in a special register */ - if (!HasBit(special_stores_mask, prev.divmod_val - 0x100)) { - special_stores[prev.divmod_val - 0x100] = prev.and_mask; - SetBit(special_stores_mask, prev.divmod_val - 0x100); - } - } else { - break; - } - } - if (prev.operation == DSGA_OP_STOP) break; - if (IsEvalAdjustJumpOperation(prev.operation)) break; - if (prev.variable == 0x7E) { - const VarAction2ProcedureCallVarReadAnnotation &anno = _varaction2_proc_call_var_read_annotations[prev.jump]; - if (anno.unskippable) break; - if ((anno.relevant_stores & ~ok_stores).any()) break; - ok_stores |= anno.last_reads; - - uint16 new_stores = anno.anno->special_register_mask & ~special_stores_mask; - for (uint8 bit : SetBitIterator(new_stores)) { - special_stores[bit] = anno.anno->special_register_values[bit]; - } - special_stores_mask |= new_stores; - } - - /* Reached a store which can't be skipped over because the value is needed later */ - if (prev.operation == DSGA_OP_STO && !ok_stores[prev.and_mask]) break; - if (prev.operation == DSGA_OP_STO_NC && prev.divmod_val < 0x100 && !ok_stores[prev.divmod_val]) break; - - if (prev.variable == 0x7D && (prev.adjust_flags & DSGAF_LAST_VAR_READ)) { - /* The stored value is no longer needed after this, we can skip the corresponding store */ - ok_stores.set(prev.parameter & 0xFF, true); - } - - j--; - } - if (j < i - 1) { - auto mark_end_block = [&](uint index, uint inc) { - if (group->adjusts[index].variable == 0x7E) { - /* Procedure call, can't mark this as an end block directly, so insert a NOOP and use that */ - DeterministicSpriteGroupAdjust noop = {}; - noop.operation = DSGA_OP_NOOP; - noop.variable = 0x1A; - group->adjusts.insert(group->adjusts.begin() + index + 1, noop); - - /* Fixup offsets */ - if (i > (int)index) i++; - if (j > (int)index) j++; - index++; - } - - DeterministicSpriteGroupAdjust &adj = group->adjusts[index]; - if (adj.adjust_flags & DSGAF_END_BLOCK) { - adj.jump += inc; - } else { - adj.adjust_flags |= DSGAF_END_BLOCK; - adj.jump = inc; - if (special_stores_mask) { - uint added = OptimiseVarAction2InsertSpecialStoreOps(group, index + 1, special_stores, special_stores_mask); - - /* Fixup offsets */ - if (i > (int)index) i += added; - if (j > (int)index) j += added; - } - } - }; - - DeterministicSpriteGroupAdjust current = adjust; - /* Do not use adjust reference after this point */ - - if (current.adjust_flags & DSGAF_END_BLOCK) { - /* Move the existing end block 1 place back, to avoid it being moved with the jump adjust */ - mark_end_block(i - 1, current.jump); - current.adjust_flags &= ~DSGAF_END_BLOCK; - current.jump = 0; - } - current.operation = (current.adjust_flags & DSGAF_SKIP_ON_LSB_SET) ? DSGA_OP_JNZ : DSGA_OP_JZ; - current.adjust_flags &= ~(DSGAF_JUMP_INS_HINT | DSGAF_SKIP_ON_ZERO | DSGAF_SKIP_ON_LSB_SET); - mark_end_block(i - 1, 1); - group->adjusts.erase(group->adjusts.begin() + i); - if (j >= 0 && current.variable == 0x7D && (current.adjust_flags & DSGAF_LAST_VAR_READ)) { - DeterministicSpriteGroupAdjust &prev = group->adjusts[j]; - if (prev.operation == DSGA_OP_STO_NC && prev.divmod_val == (current.parameter & 0xFF) && - TryCombineTempStoreLoadWithStoreSourceAdjust(current, &prev, false)) { - /* Managed to extract source from immediately prior STO_NC, which can now be removed */ - group->adjusts.erase(group->adjusts.begin() + j); - j--; - i--; - } else if (current.type == DSGA_TYPE_NONE && current.shift_num == 0 && current.and_mask == 0xFFFFFFFF && - prev.operation == DSGA_OP_STO && prev.variable == 0x1A && prev.shift_num == 0 && prev.and_mask == (current.parameter & 0xFF)) { - /* Reading from immediately prior store, which can now be removed */ - current.operation = (current.operation == DSGA_OP_JNZ) ? DSGA_OP_JNZ_LV : DSGA_OP_JZ_LV; - current.adjust_flags &= ~DSGAF_LAST_VAR_READ; - current.and_mask = 0; - current.variable = 0x1A; - group->adjusts.erase(group->adjusts.begin() + j); - j--; - i--; - } - } - group->adjusts.insert(group->adjusts.begin() + j + 1, current); - group->dsg_flags |= DSGF_CHECK_INSERT_JUMP; - i++; - } - } - } - - if (!_varaction2_proc_call_var_read_annotations.empty()) { - for (DeterministicSpriteGroupAdjust &adjust : group->adjusts) { - if (adjust.variable == 0x7E) adjust.subroutine = _varaction2_proc_call_var_read_annotations[adjust.jump].subroutine; - } - _varaction2_proc_call_var_read_annotations.clear(); - } -} - -struct ResolveJumpInnerResult { - uint end_index; - uint end_block_remaining; -}; - -static ResolveJumpInnerResult OptimiseVarAction2DeterministicSpriteResolveJumpsInner(DeterministicSpriteGroup *group, const uint start) -{ - for (uint i = start + 1; i < (uint)group->adjusts.size(); i++) { - if (IsEvalAdjustJumpOperation(group->adjusts[i].operation)) { - ResolveJumpInnerResult result = OptimiseVarAction2DeterministicSpriteResolveJumpsInner(group, i); - i = result.end_index; - if (result.end_block_remaining > 0) { - group->adjusts[start].jump = i - start; - return { i, result.end_block_remaining - 1 }; - } - } else if (group->adjusts[i].adjust_flags & DSGAF_END_BLOCK) { - group->adjusts[start].jump = i - start; - return { i, group->adjusts[i].jump - 1 }; - } - } - - NOT_REACHED(); -} - -static void OptimiseVarAction2DeterministicSpriteResolveJumps(DeterministicSpriteGroup *group) -{ - if (HasGrfOptimiserFlag(NGOF_NO_OPT_VARACT2_INSERT_JUMPS)) return; - - for (uint i = 0; i < (uint)group->adjusts.size(); i++) { - if (IsEvalAdjustJumpOperation(group->adjusts[i].operation)) { - ResolveJumpInnerResult result = OptimiseVarAction2DeterministicSpriteResolveJumpsInner(group, i); - i = result.end_index; - assert(result.end_block_remaining == 0); - } - } -} - -static void OptimiseVarAction2DeterministicSpriteGroup(VarAction2OptimiseState &state, const GrfSpecFeature feature, const byte varsize, DeterministicSpriteGroup *group) -{ - if (unlikely(HasGrfOptimiserFlag(NGOF_NO_OPT_VARACT2))) return; - - for (DeterministicSpriteGroupAdjust &adjust : group->adjusts) { - if (adjust.variable == 0x7D) adjust.parameter &= 0xFF; // Clear temporary version tags - } - - if (!HasGrfOptimiserFlag(NGOF_NO_OPT_VARACT2_GROUP_PRUNE) && (state.inference & VA2AIF_HAVE_CONSTANT) && !group->calculated_result) { - /* Result of this sprite group is always the same, discard the unused branches */ - const SpriteGroup *target = group->default_group; - for (const auto &range : group->ranges) { - if (range.low <= state.current_constant && state.current_constant <= range.high) { - target = range.group; - } - } - group->default_group = target; - group->error_group = target; - group->ranges.clear(); - } - - std::bitset<256> bits; - std::bitset<256> pending_bits; - bool seen_pending = false; - bool seen_req_var1C = false; - if (!group->calculated_result) { - auto handle_group = y_combinator([&](auto handle_group, const SpriteGroup *sg) -> void { - if (sg != nullptr && sg->type == SGT_DETERMINISTIC) { - VarAction2GroupVariableTracking *var_tracking = _cur.GetVarAction2GroupVariableTracking(sg, false); - const DeterministicSpriteGroup *dsg = (const DeterministicSpriteGroup*)sg; - if (dsg->dsg_flags & DSGF_VAR_TRACKING_PENDING) { - seen_pending = true; - if (var_tracking != nullptr) pending_bits |= var_tracking->in; - } else { - if (var_tracking != nullptr) bits |= var_tracking->in; - } - if (dsg->dsg_flags & DSGF_REQUIRES_VAR1C) seen_req_var1C = true; - } - if (sg != nullptr && sg->type == SGT_RANDOMIZED) { - const RandomizedSpriteGroup *rsg = (const RandomizedSpriteGroup*)sg; - for (const auto &group : rsg->groups) { - handle_group(group); - } - } - if (sg != nullptr && sg->type == SGT_TILELAYOUT) { - const TileLayoutSpriteGroup *tlsg = (const TileLayoutSpriteGroup*)sg; - if (tlsg->dts.registers != nullptr) { - const TileLayoutRegisters *registers = tlsg->dts.registers; - size_t count = 1; // 1 for the ground sprite - const DrawTileSeqStruct *element; - foreach_draw_tile_seq(element, tlsg->dts.seq) count++; - for (size_t i = 0; i < count; i ++) { - const TileLayoutRegisters *reg = registers + i; - if (reg->flags & TLF_DODRAW) bits.set(reg->dodraw, true); - if (reg->flags & TLF_SPRITE) bits.set(reg->sprite, true); - if (reg->flags & TLF_PALETTE) bits.set(reg->palette, true); - if (reg->flags & TLF_BB_XY_OFFSET) { - bits.set(reg->delta.parent[0], true); - bits.set(reg->delta.parent[1], true); - } - if (reg->flags & TLF_BB_Z_OFFSET) bits.set(reg->delta.parent[2], true); - if (reg->flags & TLF_CHILD_X_OFFSET) bits.set(reg->delta.child[0], true); - if (reg->flags & TLF_CHILD_Y_OFFSET) bits.set(reg->delta.child[1], true); - } - } - } - if (sg != nullptr && sg->type == SGT_INDUSTRY_PRODUCTION) { - const IndustryProductionSpriteGroup *ipsg = (const IndustryProductionSpriteGroup*)sg; - if (ipsg->version >= 1) { - for (int i = 0; i < ipsg->num_input; i++) { - if (ipsg->subtract_input[i] < 0x100) bits.set(ipsg->subtract_input[i], true); - } - for (int i = 0; i < ipsg->num_output; i++) { - if (ipsg->add_output[i] < 0x100) bits.set(ipsg->add_output[i], true); - } - bits.set(ipsg->again, true); - } - } - }); - handle_group(group->default_group); - for (const auto &range : group->ranges) { - handle_group(range.group); - } - if (bits.any()) { - state.GetVarTracking(group)->out = bits; - std::bitset<256> in_bits = bits | pending_bits; - for (auto &it : state.temp_stores) { - in_bits.set(it.first, false); - } - state.GetVarTracking(group)->in |= in_bits; - } - } - - if (!HasGrfOptimiserFlag(NGOF_NO_OPT_VARACT2_GROUP_PRUNE) && group->ranges.empty() && !group->calculated_result && !seen_req_var1C) { - /* There is only one option, remove any redundant adjustments when the result will be ignored anyway */ - while (!group->adjusts.empty()) { - const DeterministicSpriteGroupAdjust &prev = group->adjusts.back(); - if (prev.variable != 0x7E && !IsEvalAdjustWithSideEffects(prev.operation)) { - /* Delete useless operation */ - group->adjusts.pop_back(); - } else { - break; - } - } - } - - bool dse_allowed = IsFeatureUsableForDSE(feature) && !HasGrfOptimiserFlag(NGOF_NO_OPT_VARACT2_DSE); - bool dse_eligible = state.enable_dse; - if (dse_allowed && !dse_eligible) { - dse_eligible |= CheckDeterministicSpriteGroupOutputVarBits(group, bits, true); - } - if (state.seen_procedure_call) { - /* Be more pessimistic with procedures as the ordering is different. - * Later groups can require variables set in earlier procedures instead of the usual - * where earlier groups can require variables set in later groups. - * DSE on the procedure runs before the groups which use it, so set the procedure - * output bits not using values from call site groups before DSE. */ - CheckDeterministicSpriteGroupOutputVarBits(group, bits | pending_bits, false); - } - bool dse_candidate = (dse_allowed && dse_eligible); - if (!dse_candidate && (seen_pending || (group->dsg_flags & DSGF_CHECK_INSERT_JUMP))) { - group->dsg_flags |= DSGF_NO_DSE; - dse_candidate = true; - } - if (dse_candidate) { - _cur.dead_store_elimination_candidates.push_back(group); - group->dsg_flags |= DSGF_VAR_TRACKING_PENDING; - } else { - OptimiseVarAction2DeterministicSpriteGroupSimplifyStores(group); - OptimiseVarAction2DeterministicSpriteGroupAdjustOrdering(group); - } - - if (state.check_expensive_vars && !HasGrfOptimiserFlag(NGOF_NO_OPT_VARACT2_EXPENSIVE_VARS)) { - if (dse_candidate) { - group->dsg_flags |= DSGF_CHECK_EXPENSIVE_VARS; - } else { - OptimiseVarAction2DeterministicSpriteGroupExpensiveVars(group); - } - } -} - -static std::bitset<256> HandleVarAction2DeadStoreElimination(DeterministicSpriteGroup *group, VarAction2GroupVariableTracking *var_tracking, bool no_changes) -{ - std::bitset<256> all_bits; - std::bitset<256> propagate_bits; - std::vector substitution_candidates; - if (var_tracking != nullptr) { - propagate_bits = var_tracking->out; - all_bits = propagate_bits | var_tracking->proc_call_out; - } - bool need_var1C = false; - - auto abandon_substitution_candidates = [&]() { - for (uint value : substitution_candidates) { - all_bits.set(value & 0xFF, true); - propagate_bits.set(value & 0xFF, true); - } - substitution_candidates.clear(); - }; - auto erase_adjust = [&](int index) { - group->adjusts.erase(group->adjusts.begin() + index); - for (size_t i = 0; i < substitution_candidates.size();) { - uint &value = substitution_candidates[i]; - if (value >> 8 == (uint)index) { - /* Removed the substitution candidate target */ - value = substitution_candidates.back(); - substitution_candidates.pop_back(); - continue; - } - - if (value >> 8 > (uint)index) { - /* Adjust the substitution candidate target offset */ - value -= 0x100; - } - - i++; - } - }; - auto try_variable_substitution = [&](DeterministicSpriteGroupAdjust &target, int prev_load_index, uint8 idx) -> bool { - assert(target.variable == 0x7D && target.parameter == idx); - - bool inverted = false; - const DeterministicSpriteGroupAdjust *var_src = GetVarAction2PreviousSingleLoadAdjust(group->adjusts, prev_load_index, &inverted); - if (var_src != nullptr) { - if (TryCombineTempStoreLoadWithStoreSourceAdjust(target, var_src, inverted)) return true; - } - return false; - }; - - for (int i = (int)group->adjusts.size() - 1; i >= 0;) { - bool pending_restart = false; - auto restart = [&]() { - pending_restart = false; - i = (int)group->adjusts.size() - 1; - if (var_tracking != nullptr) { - propagate_bits = var_tracking->out; - all_bits = propagate_bits | var_tracking->proc_call_out; - } else { - all_bits.reset(); - propagate_bits.reset(); - } - substitution_candidates.clear(); - need_var1C = false; - }; - const DeterministicSpriteGroupAdjust &adjust = group->adjusts[i]; - if (adjust.operation == DSGA_OP_STO) { - if (adjust.type == DSGA_TYPE_NONE && adjust.variable == 0x1A && adjust.shift_num == 0 && adjust.and_mask < 0x100) { - uint8 idx = adjust.and_mask; - /* Predictable store */ - - for (size_t j = 0; j < substitution_candidates.size(); j++) { - if ((substitution_candidates[j] & 0xFF) == idx) { - /* Found candidate */ - - DeterministicSpriteGroupAdjust &target = group->adjusts[substitution_candidates[j] >> 8]; - bool substituted = try_variable_substitution(target, i - 1, idx); - if (!substituted) { - /* Not usable, mark as required so it's not eliminated */ - all_bits.set(idx, true); - propagate_bits.set(idx, true); - } - substitution_candidates[j] = substitution_candidates.back(); - substitution_candidates.pop_back(); - break; - } - } - - if (!all_bits[idx] && !no_changes) { - /* Redundant store */ - erase_adjust(i); - i--; - if ((i + 1 < (int)group->adjusts.size() && group->adjusts[i + 1].operation == DSGA_OP_RST && group->adjusts[i + 1].variable != 0x7B) || - (i + 1 == (int)group->adjusts.size() && group->ranges.empty() && !group->calculated_result)) { - /* Now the store is eliminated, the current value has no users */ - while (i >= 0) { - const DeterministicSpriteGroupAdjust &prev = group->adjusts[i]; - if (prev.variable != 0x7E && !IsEvalAdjustWithSideEffects(prev.operation)) { - /* Delete useless operation */ - erase_adjust(i); - i--; - } else { - if (i + 1 < (int)group->adjusts.size()) { - DeterministicSpriteGroupAdjust &next = group->adjusts[i + 1]; - if (prev.operation == DSGA_OP_STO && prev.type == DSGA_TYPE_NONE && prev.variable == 0x1A && - prev.shift_num == 0 && prev.and_mask < 0x100 && - next.operation == DSGA_OP_RST && next.type == DSGA_TYPE_NONE && next.variable == 0x7D && - next.parameter == prev.and_mask && next.shift_num == 0 && next.and_mask == 0xFFFFFFFF) { - /* Removing the dead store results in a store/load sequence, remove the load and re-check */ - erase_adjust(i + 1); - restart(); - break; - } - if (next.operation == DSGA_OP_RST) { - /* See if this is a repeated load of a variable (not procedure call) */ - const DeterministicSpriteGroupAdjust *prev_load = GetVarAction2PreviousSingleLoadAdjust(group->adjusts, i, nullptr); - if (prev_load != nullptr && MemCmpT(prev_load, &next) == 0) { - if (next.variable == 0x7D) pending_restart = true; - erase_adjust(i + 1); - break; - } - } - if (i + 2 < (int)group->adjusts.size() && next.operation == DSGA_OP_RST && next.variable != 0x7E && - prev.operation == DSGA_OP_STO && prev.type == DSGA_TYPE_NONE && prev.variable == 0x1A && - prev.shift_num == 0 && prev.and_mask < 0x100) { - const DeterministicSpriteGroupAdjust &next2 = group->adjusts[i + 2]; - if (next2.type == DSGA_TYPE_NONE && next2.variable == 0x7D && next2.shift_num == 0 && - next2.and_mask == 0xFFFFFFFF && next2.parameter == prev.and_mask) { - if (IsEvalAdjustOperationReversable(next2.operation)) { - /* Convert: store, load var, (anti-)commutative op on stored --> (dead) store, (reversed) (anti-)commutative op var */ - next.operation = ReverseEvalAdjustOperation(next2.operation); - if (IsEvalAdjustWithZeroLastValueAlwaysZero(next.operation)) { - next.adjust_flags |= DSGAF_SKIP_ON_ZERO; - } - erase_adjust(i + 2); - restart(); - break; - } - } - } - } - break; - } - } - } else { - while (i >= 0 && i + 1 < (int)group->adjusts.size()) { - /* See if having removed the store, there is now a useful pair of operations which can be combined */ - DeterministicSpriteGroupAdjust &prev = group->adjusts[i]; - DeterministicSpriteGroupAdjust &next = group->adjusts[i + 1]; - if (next.type == DSGA_TYPE_NONE && next.operation == DSGA_OP_XOR && next.variable == 0x1A && next.shift_num == 0 && next.and_mask == 1) { - /* XOR: boolean invert */ - if (IsEvalAdjustOperationRelationalComparison(prev.operation)) { - prev.operation = InvertEvalAdjustRelationalComparisonOperation(prev.operation); - erase_adjust(i + 1); - continue; - } else if (prev.operation == DSGA_OP_RST && IsConstantComparisonAdjustType(prev.type)) { - prev.type = InvertConstantComparisonAdjustType(prev.type); - erase_adjust(i + 1); - continue; - } - } - if (i >= 1 && prev.type == DSGA_TYPE_NONE && IsEvalAdjustOperationRelationalComparison(prev.operation) && - prev.variable == 0x1A && prev.shift_num == 0 && next.operation == DSGA_OP_MUL) { - if (((prev.operation == DSGA_OP_SGT && (prev.and_mask == 0 || prev.and_mask == (uint)-1)) || (prev.operation == DSGA_OP_SGE && (prev.and_mask == 0 || prev.and_mask == 1))) && - IsIdenticalValueLoad(GetVarAction2PreviousSingleLoadAdjust(group->adjusts, i - 1, nullptr), &next)) { - prev.operation = DSGA_OP_SMAX; - prev.and_mask = 0; - erase_adjust(i + 1); - continue; - } - if (((prev.operation == DSGA_OP_SLE && (prev.and_mask == 0 || prev.and_mask == (uint)-1)) || (prev.operation == DSGA_OP_SLT && (prev.and_mask == 0 || prev.and_mask == 1))) && - IsIdenticalValueLoad(GetVarAction2PreviousSingleLoadAdjust(group->adjusts, i - 1, nullptr), &next)) { - prev.operation = DSGA_OP_SMIN; - prev.and_mask = 0; - erase_adjust(i + 1); - continue; - } - } - break; - } - } - if (pending_restart) restart(); - continue; - } else { - /* Non-redundant store */ - all_bits.set(idx, false); - propagate_bits.set(idx, false); - } - } else { - /* Unpredictable store */ - abandon_substitution_candidates(); - } - } - if (adjust.variable == 0x7B && adjust.parameter == 0x7D) { - /* Unpredictable load */ - all_bits.set(); - propagate_bits.set(); - abandon_substitution_candidates(); - } - if (adjust.variable == 0x7D && adjust.parameter < 0x100) { - if (i > 0 && !all_bits[adjust.parameter] && !no_changes) { - /* See if this can be made a substitution candidate */ - bool add = true; - for (size_t j = 0; j < substitution_candidates.size(); j++) { - if ((substitution_candidates[j] & 0xFF) == adjust.parameter) { - /* There already is a candidate */ - substitution_candidates[j] = substitution_candidates.back(); - substitution_candidates.pop_back(); - all_bits.set(adjust.parameter, true); - propagate_bits.set(adjust.parameter, true); - add = false; - break; - } - } - if (add) { - substitution_candidates.push_back(adjust.parameter | (i << 8)); - } - } else { - all_bits.set(adjust.parameter, true); - propagate_bits.set(adjust.parameter, true); - } - } - if (adjust.variable == 0x1C) { - need_var1C = true; - } - if (adjust.variable == 0x7E) { - /* procedure call */ - - VarAction2ProcedureAnnotation *anno = OptimiseVarAction2GetFilledProcedureAnnotation(adjust.subroutine); - - bool may_remove = !need_var1C; - if (may_remove && anno->unskippable) may_remove = false; - if (may_remove && (anno->stores & all_bits).any()) may_remove = false; - - if (may_remove) { - if ((i + 1 < (int)group->adjusts.size() && group->adjusts[i + 1].operation == DSGA_OP_RST && group->adjusts[i + 1].variable != 0x7B) || - (i + 1 == (int)group->adjusts.size() && group->ranges.empty() && !group->calculated_result)) { - /* Procedure is skippable, makes no stores we need, and the return value is also not needed */ - erase_adjust(i); - if (anno->special_register_mask) { - OptimiseVarAction2InsertSpecialStoreOps(group, i, anno->special_register_values, anno->special_register_mask); - restart(); - } else { - i--; - } - continue; - } - } - - need_var1C = false; - - auto handle_group = y_combinator([&](auto handle_group, const SpriteGroup *sg) -> void { - if (sg == nullptr) return; - if (sg->type == SGT_RANDOMIZED) { - const RandomizedSpriteGroup *rsg = (const RandomizedSpriteGroup*)sg; - for (const auto &group : rsg->groups) { - handle_group(group); - } - } else if (sg->type == SGT_DETERMINISTIC) { - const DeterministicSpriteGroup *sub = static_cast(sg); - VarAction2GroupVariableTracking *var_tracking = _cur.GetVarAction2GroupVariableTracking(sub, false); - if (var_tracking != nullptr) { - all_bits |= var_tracking->in; - propagate_bits |= var_tracking->in; - } - if (sub->dsg_flags & DSGF_REQUIRES_VAR1C) need_var1C = true; - } - }); - handle_group(adjust.subroutine); - if (anno->unskippable || anno->special_register_mask) { - abandon_substitution_candidates(); - } else { - /* Flush any substitution candidates which reference stores made in the procedure */ - for (size_t j = 0; j < substitution_candidates.size();) { - uint8 idx = substitution_candidates[j] & 0xFF; - if (anno->stores[idx]) { - all_bits.set(idx, true); - propagate_bits.set(idx, true); - substitution_candidates[j] = substitution_candidates.back(); - substitution_candidates.pop_back(); - } else { - j++; - } - } - } - } - i--; - } - abandon_substitution_candidates(); - return propagate_bits; -} - -static void HandleVarAction2OptimisationPasses() -{ - if (unlikely(HasGrfOptimiserFlag(NGOF_NO_OPT_VARACT2))) return; - - for (DeterministicSpriteGroup *group : _cur.dead_store_elimination_candidates) { - VarAction2GroupVariableTracking *var_tracking = _cur.GetVarAction2GroupVariableTracking(group, false); - if (!group->calculated_result) { - /* Add bits from any groups previously marked with DSGF_VAR_TRACKING_PENDING which should now be correctly updated after DSE */ - auto handle_group = y_combinator([&](auto handle_group, const SpriteGroup *sg) -> void { - if (sg != nullptr && sg->type == SGT_DETERMINISTIC) { - VarAction2GroupVariableTracking *targ_var_tracking = _cur.GetVarAction2GroupVariableTracking(sg, false); - if (targ_var_tracking != nullptr) { - if (var_tracking == nullptr) var_tracking = _cur.GetVarAction2GroupVariableTracking(group, true); - var_tracking->out |= targ_var_tracking->in; - } - } - if (sg != nullptr && sg->type == SGT_RANDOMIZED) { - const RandomizedSpriteGroup *rsg = (const RandomizedSpriteGroup*)sg; - for (const auto &group : rsg->groups) { - handle_group(group); - } - } - }); - handle_group(group->default_group); - group->default_group = PruneTargetSpriteGroup(group->default_group); - for (auto &range : group->ranges) { - handle_group(range.group); - range.group = PruneTargetSpriteGroup(range.group); - } - } - - /* Always run this even DSGF_NO_DSE is set because the load/store tracking is needed to re-calculate the input bits, - * even if no stores are actually eliminated */ - std::bitset<256> in_bits = HandleVarAction2DeadStoreElimination(group, var_tracking, group->dsg_flags & DSGF_NO_DSE); - if (var_tracking == nullptr && in_bits.any()) { - var_tracking = _cur.GetVarAction2GroupVariableTracking(group, true); - var_tracking->in = in_bits; - } else if (var_tracking != nullptr) { - var_tracking->in = in_bits; - } - - OptimiseVarAction2DeterministicSpriteGroupSimplifyStores(group); - OptimiseVarAction2DeterministicSpriteGroupAdjustOrdering(group); - if (group->dsg_flags & DSGF_CHECK_INSERT_JUMP) { - OptimiseVarAction2DeterministicSpriteGroupInsertJumps(group, var_tracking); - } - if (group->dsg_flags & DSGF_CHECK_EXPENSIVE_VARS) { - OptimiseVarAction2DeterministicSpriteGroupExpensiveVars(group); - } - if (group->dsg_flags & DSGF_CHECK_INSERT_JUMP) { - OptimiseVarAction2DeterministicSpriteResolveJumps(group); - } - } -} - -static void ProcessDeterministicSpriteGroupRanges(const std::vector &ranges, std::vector &ranges_out, const SpriteGroup *default_group) -{ - /* Sort ranges ascending. When ranges overlap, this may required clamping or splitting them */ - std::vector bounds; - for (uint i = 0; i < ranges.size(); i++) { - bounds.push_back(ranges[i].low); - if (ranges[i].high != UINT32_MAX) bounds.push_back(ranges[i].high + 1); - } - std::sort(bounds.begin(), bounds.end()); - bounds.erase(std::unique(bounds.begin(), bounds.end()), bounds.end()); - - std::vector target; - for (uint j = 0; j < bounds.size(); ++j) { - uint32 v = bounds[j]; - const SpriteGroup *t = default_group; - for (uint i = 0; i < ranges.size(); i++) { - if (ranges[i].low <= v && v <= ranges[i].high) { - t = ranges[i].group; - break; - } - } - target.push_back(t); - } - assert(target.size() == bounds.size()); - - for (uint j = 0; j < bounds.size(); ) { - if (target[j] != default_group) { - DeterministicSpriteGroupRange &r = ranges_out.emplace_back(); - r.group = target[j]; - r.low = bounds[j]; - while (j < bounds.size() && target[j] == r.group) { - j++; - } - r.high = j < bounds.size() ? bounds[j] - 1 : UINT32_MAX; - } else { - j++; - } - } -} - /* Action 0x02 */ static void NewSpriteGroup(ByteReader *buf) { diff --git a/src/newgrf_internal.h b/src/newgrf_internal.h new file mode 100644 index 0000000000..242c171e43 --- /dev/null +++ b/src/newgrf_internal.h @@ -0,0 +1,256 @@ +/* + * This file is part of OpenTTD. + * OpenTTD is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, version 2. + * OpenTTD is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with OpenTTD. If not, see . + */ + +/** + * @file newgrf_internal.h Internal NewGRF processing definitions. + */ + +#ifndef NEWGRF_INTERNAL_H +#define NEWGRF_INTERNAL_H + +#include "newgrf.h" +#include "newgrf_spritegroup.h" +#include "spriteloader/spriteloader.hpp" +#include "core/arena_alloc.hpp" + +#include "3rdparty/cpp-btree/btree_map.h" +#include + +static const uint MAX_SPRITEGROUP = UINT8_MAX; ///< Maximum GRF-local ID for a spritegroup. + +/** Base GRF ID for OpenTTD's base graphics GRFs. */ +static const uint32 OPENTTD_GRAPHICS_BASE_GRF_ID = BSWAP32(0xFF4F5400); + +struct VarAction2GroupVariableTracking { + std::bitset<256> in; + std::bitset<256> out; + std::bitset<256> proc_call_out; +}; + +struct VarAction2ProcedureAnnotation { + std::bitset<256> stores; + uint32 special_register_values[16]; + uint16 special_register_mask = 0; + bool unskippable = false; +}; + +/** Temporary data during loading of GRFs */ +struct GrfProcessingState { +private: + /** Definition of a single Action1 spriteset */ + struct SpriteSet { + SpriteID sprite; ///< SpriteID of the first sprite of the set. + uint num_sprites; ///< Number of sprites in the set. + }; + + /** Currently referenceable spritesets */ + btree::btree_map spritesets[GSF_END]; + +public: + /* Global state */ + GrfLoadingStage stage; ///< Current loading stage + SpriteID spriteid; ///< First available SpriteID for loading realsprites. + + /* Local state in the file */ + SpriteFile *file; ///< File of currently processed GRF file. + GRFFile *grffile; ///< Currently processed GRF file. + GRFConfig *grfconfig; ///< Config of the currently processed GRF file. + uint32 nfo_line; ///< Currently processed pseudo sprite number in the GRF. + + /* Kind of return values when processing certain actions */ + int skip_sprites; ///< Number of pseudo sprites to skip before processing the next one. (-1 to skip to end of file) + + /* Currently referenceable spritegroups */ + const SpriteGroup *spritegroups[MAX_SPRITEGROUP + 1]; + + /* VarAction2 temporary storage variable tracking */ + btree::btree_map group_temp_store_variable_tracking; + UniformArenaAllocator group_temp_store_variable_tracking_storage; + btree::btree_map procedure_annotations; + UniformArenaAllocator procedure_annotations_storage; + std::vector dead_store_elimination_candidates; + + VarAction2GroupVariableTracking *GetVarAction2GroupVariableTracking(const SpriteGroup *group, bool make_new) + { + if (make_new) { + VarAction2GroupVariableTracking *&ptr = this->group_temp_store_variable_tracking[group]; + if (!ptr) ptr = new (this->group_temp_store_variable_tracking_storage.Allocate()) VarAction2GroupVariableTracking(); + return ptr; + } else { + auto iter = this->group_temp_store_variable_tracking.find(group); + if (iter != this->group_temp_store_variable_tracking.end()) return iter->second; + return nullptr; + } + } + + std::pair GetVarAction2ProcedureAnnotation(const SpriteGroup *group) + { + VarAction2ProcedureAnnotation *&ptr = this->procedure_annotations[group]; + if (!ptr) { + ptr = new (this->procedure_annotations_storage.Allocate()) VarAction2ProcedureAnnotation(); + return std::make_pair(ptr, true); + } else { + return std::make_pair(ptr, false); + } + } + + /** Clear temporary data before processing the next file in the current loading stage */ + void ClearDataForNextFile() + { + this->nfo_line = 0; + this->skip_sprites = 0; + + for (uint i = 0; i < GSF_END; i++) { + this->spritesets[i].clear(); + } + + memset(this->spritegroups, 0, sizeof(this->spritegroups)); + + this->group_temp_store_variable_tracking.clear(); + this->group_temp_store_variable_tracking_storage.EmptyArena(); + this->procedure_annotations.clear(); + this->procedure_annotations_storage.EmptyArena(); + this->dead_store_elimination_candidates.clear(); + } + + /** + * Records new spritesets. + * @param feature GrfSpecFeature the set is defined for. + * @param first_sprite SpriteID of the first sprite in the set. + * @param first_set First spriteset to define. + * @param numsets Number of sets to define. + * @param numents Number of sprites per set to define. + */ + void AddSpriteSets(byte feature, SpriteID first_sprite, uint first_set, uint numsets, uint numents) + { + assert(feature < GSF_END); + for (uint i = 0; i < numsets; i++) { + SpriteSet &set = this->spritesets[feature][first_set + i]; + set.sprite = first_sprite + i * numents; + set.num_sprites = numents; + } + } + + /** + * Check whether there are any valid spritesets for a feature. + * @param feature GrfSpecFeature to check. + * @return true if there are any valid sets. + * @note Spritesets with zero sprites are valid to allow callback-failures. + */ + bool HasValidSpriteSets(byte feature) const + { + assert(feature < GSF_END); + return !this->spritesets[feature].empty(); + } + + /** + * Check whether a specific set is defined. + * @param feature GrfSpecFeature to check. + * @param set Set to check. + * @return true if the set is valid. + * @note Spritesets with zero sprites are valid to allow callback-failures. + */ + bool IsValidSpriteSet(byte feature, uint set) const + { + assert(feature < GSF_END); + return this->spritesets[feature].find(set) != this->spritesets[feature].end(); + } + + /** + * Returns the first sprite of a spriteset. + * @param feature GrfSpecFeature to query. + * @param set Set to query. + * @return First sprite of the set. + */ + SpriteID GetSprite(byte feature, uint set) const + { + assert(IsValidSpriteSet(feature, set)); + return this->spritesets[feature].find(set)->second.sprite; + } + + /** + * Returns the number of sprites in a spriteset + * @param feature GrfSpecFeature to query. + * @param set Set to query. + * @return Number of sprites in the set. + */ + uint GetNumEnts(byte feature, uint set) const + { + assert(IsValidSpriteSet(feature, set)); + return this->spritesets[feature].find(set)->second.num_sprites; + } +}; + +extern GrfProcessingState _cur; + +enum VarAction2AdjustInferenceFlags { + VA2AIF_NONE = 0x00, + + VA2AIF_SIGNED_NON_NEGATIVE = 0x01, + VA2AIF_ONE_OR_ZERO = 0x02, + VA2AIF_PREV_TERNARY = 0x04, + VA2AIF_PREV_MASK_ADJUST = 0x08, + VA2AIF_PREV_STORE_TMP = 0x10, + VA2AIF_HAVE_CONSTANT = 0x20, + VA2AIF_SINGLE_LOAD = 0x40, + VA2AIF_MUL_BOOL = 0x80, + VA2AIF_PREV_SCMP_DEC = 0x100, + + VA2AIF_PREV_MASK = VA2AIF_PREV_TERNARY | VA2AIF_PREV_MASK_ADJUST | VA2AIF_PREV_STORE_TMP | VA2AIF_PREV_SCMP_DEC, +}; +DECLARE_ENUM_AS_BIT_SET(VarAction2AdjustInferenceFlags) + +struct VarAction2TempStoreInferenceVarSource { + DeterministicSpriteGroupAdjustType type; + uint16 variable; + byte shift_num; + uint32 parameter; + uint32 and_mask; + uint32 add_val; + uint32 divmod_val; +}; + +struct VarAction2TempStoreInference { + VarAction2AdjustInferenceFlags inference = VA2AIF_NONE; + uint32 store_constant = 0; + VarAction2TempStoreInferenceVarSource var_source; + uint version = 0; +}; + +struct VarAction2InferenceBackup { + VarAction2AdjustInferenceFlags inference = VA2AIF_NONE; + uint32 current_constant = 0; + uint adjust_size = 0; +}; + +struct VarAction2OptimiseState { + VarAction2AdjustInferenceFlags inference = VA2AIF_NONE; + uint32 current_constant = 0; + btree::btree_map temp_stores; + VarAction2InferenceBackup inference_backup; + VarAction2GroupVariableTracking *var_tracking = nullptr; + bool seen_procedure_call = false; + bool check_expensive_vars = false; + bool enable_dse = false; + uint default_variable_version = 0; + + inline VarAction2GroupVariableTracking *GetVarTracking(DeterministicSpriteGroup *group) + { + if (this->var_tracking == nullptr) { + this->var_tracking = _cur.GetVarAction2GroupVariableTracking(group, true); + } + return this->var_tracking; + } +}; + +const SpriteGroup *PruneTargetSpriteGroup(const SpriteGroup *result); +void OptimiseVarAction2Adjust(VarAction2OptimiseState &state, const GrfSpecFeature feature, const byte varsize, DeterministicSpriteGroup *group, DeterministicSpriteGroupAdjust &adjust); +void ProcessDeterministicSpriteGroupRanges(const std::vector &ranges, std::vector &ranges_out, const SpriteGroup *default_group); +void OptimiseVarAction2DeterministicSpriteGroup(VarAction2OptimiseState &state, const GrfSpecFeature feature, const byte varsize, DeterministicSpriteGroup *group); +void HandleVarAction2OptimisationPasses(); + +#endif /* NEWGRF_INTERNAL_H */ \ No newline at end of file diff --git a/src/newgrf_optimiser.cpp b/src/newgrf_optimiser.cpp new file mode 100644 index 0000000000..c3ff6eef1b --- /dev/null +++ b/src/newgrf_optimiser.cpp @@ -0,0 +1,2763 @@ +/* + * This file is part of OpenTTD. + * OpenTTD is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, version 2. + * OpenTTD is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with OpenTTD. If not, see . + */ + +/** @file newgrf_optimiser.cpp NewGRF optimiser. */ + +#include "stdafx.h" + +#include "newgrf_internal.h" +#include "newgrf_extension.h" +#include "debug_settings.h" +#include "core/y_combinator.hpp" +#include "scope.h" + +#include + +#include "safeguards.h" + +static bool IsExpensiveVehicleVariable(uint16 variable) +{ + switch (variable) { + case 0x45: + case 0x4A: + case 0x60: + case 0x61: + case 0x62: + case 0x63: + case 0xFE: + case 0xFF: + return true; + + default: + return false; + } +} + +static bool IsExpensiveIndustryTileVariable(uint16 variable) +{ + switch (variable) { + case 0x60: + case 0x61: + case 0x62: + return true; + + default: + return false; + } +} + +static bool IsExpensiveObjectVariable(uint16 variable) +{ + switch (variable) { + case 0x41: + case 0x45: + case 0x60: + case 0x61: + case 0x62: + case 0x63: + case 0x64: + case A2VRI_OBJECT_FOUNDATION_SLOPE: + case A2VRI_OBJECT_FOUNDATION_SLOPE_CHANGE: + return true; + + default: + return false; + } +} + +static bool IsExpensiveVariable(uint16 variable, GrfSpecFeature feature, VarSpriteGroupScope var_scope) +{ + if ((feature >= GSF_TRAINS && feature <= GSF_AIRCRAFT) && IsExpensiveVehicleVariable(variable)) return true; + if (feature == GSF_INDUSTRYTILES && var_scope == VSG_SCOPE_SELF && IsExpensiveIndustryTileVariable(variable)) return true; + if (feature == GSF_OBJECTS && var_scope == VSG_SCOPE_SELF && IsExpensiveObjectVariable(variable)) return true; + return false; +} + +static bool IsVariableVeryCheap(uint16 variable, GrfSpecFeature feature) +{ + switch (variable) { + case 0x0C: + case 0x10: + case 0x18: + case 0x1C: + return true; + } + return false; +} + +static bool IsFeatureUsableForDSE(GrfSpecFeature feature) +{ + return (feature != GSF_STATIONS); +} + +static bool IsIdenticalValueLoad(const DeterministicSpriteGroupAdjust *a, const DeterministicSpriteGroupAdjust *b) +{ + if (a == nullptr && b == nullptr) return true; + if (a == nullptr || b == nullptr) return false; + + if (a->variable == 0x7B || a->variable == 0x7E) return false; + + return std::tie(a->type, a->variable, a->shift_num, a->parameter, a->and_mask, a->add_val, a->divmod_val) == + std::tie(b->type, b->variable, b->shift_num, b->parameter, b->and_mask, b->add_val, b->divmod_val); +} + +static const DeterministicSpriteGroupAdjust *GetVarAction2PreviousSingleLoadAdjust(const std::vector &adjusts, int start_index, bool *is_inverted) +{ + bool passed_store_perm = false; + if (is_inverted != nullptr) *is_inverted = false; + std::bitset<256> seen_stores; + for (int i = start_index; i >= 0; i--) { + const DeterministicSpriteGroupAdjust &prev = adjusts[i]; + if (prev.variable == 0x7E) { + /* Procedure call, don't use or go past this */ + break; + } + if (prev.operation == DSGA_OP_RST) { + if (prev.variable == 0x7B) { + /* Can't use this previous load as it depends on the last value */ + return nullptr; + } + if (prev.variable == 0x7C && passed_store_perm) { + /* If we passed a store perm then a load from permanent storage is not a valid previous load as we may have clobbered it */ + return nullptr; + } + if (prev.variable == 0x7D && seen_stores[prev.parameter & 0xFF]) { + /* If we passed a store then a load from that same store is not valid */ + return nullptr; + } + return &prev; + } else if (prev.operation == DSGA_OP_STO) { + if (prev.type == DSGA_TYPE_NONE && prev.variable == 0x1A && prev.shift_num == 0 && prev.and_mask < 0x100) { + /* Temp store */ + seen_stores.set(prev.and_mask, true); + continue; + } else { + /* Special register store or unpredictable store, don't try to optimise following load */ + break; + } + } else if (prev.operation == DSGA_OP_STOP) { + /* Permanent storage store */ + passed_store_perm = true; + continue; + } else if (prev.operation == DSGA_OP_XOR && prev.type == DSGA_TYPE_NONE && prev.variable == 0x1A && prev.shift_num == 0 && prev.and_mask == 1 && is_inverted != nullptr) { + /* XOR invert */ + *is_inverted = !(*is_inverted); + continue; + } else { + break; + } + } + return nullptr; +} + +static const DeterministicSpriteGroupAdjust *GetVarAction2PreviousSingleStoreAdjust(const std::vector &adjusts, int start_index, bool *is_inverted) +{ + if (is_inverted != nullptr) *is_inverted = false; + for (int i = start_index; i >= 0; i--) { + const DeterministicSpriteGroupAdjust &prev = adjusts[i]; + if (prev.variable == 0x7E) { + /* Procedure call, don't use or go past this */ + break; + } + if (prev.operation == DSGA_OP_STO) { + if (prev.type == DSGA_TYPE_NONE && prev.variable == 0x1A && prev.shift_num == 0 && prev.and_mask < 0x100) { + /* Temp store */ + return &prev; + } else { + /* Special register store or unpredictable store, don't try to optimise following load */ + break; + } + } else if (prev.operation == DSGA_OP_XOR && prev.type == DSGA_TYPE_NONE && prev.variable == 0x1A && prev.shift_num == 0 && prev.and_mask == 1 && is_inverted != nullptr) { + /* XOR invert */ + *is_inverted = !(*is_inverted); + continue; + } else { + break; + } + } + return nullptr; +} + +static int GetVarAction2AdjustOfPreviousTempStoreSource(const DeterministicSpriteGroupAdjust *adjusts, int start_index, uint8 store_var) +{ + for (int i = start_index - 1; i >= 0; i--) { + const DeterministicSpriteGroupAdjust &prev = adjusts[i]; + if (prev.variable == 0x7E) { + /* Procedure call, don't use or go past this */ + return -1; + } + if (prev.operation == DSGA_OP_STO) { + if (prev.type == DSGA_TYPE_NONE && prev.variable == 0x1A && prev.shift_num == 0 && prev.and_mask < 0x100) { + /* Temp store */ + if (prev.and_mask == (store_var & 0xFF)) { + return i; + } + } else { + /* Special register store or unpredictable store, don't use or go past this */ + return -1; + } + } + } + return -1; +} + +struct VarAction2AdjustDescriptor { + DeterministicSpriteGroupAdjust *adjust_array = nullptr; + DeterministicSpriteGroupAdjust *override_first = nullptr; + int index = 0; + + inline bool IsValid() const { return this->adjust_array != nullptr; } + inline const DeterministicSpriteGroupAdjust &GetCurrent() const { return this->override_first != nullptr ? *(this->override_first) : this->adjust_array[this->index]; }; +}; + +static bool AdvanceVarAction2AdjustDescriptor(VarAction2AdjustDescriptor &desc) +{ + const DeterministicSpriteGroupAdjust &adj = desc.GetCurrent(); + if (adj.variable == 0x7E || adj.variable == 0x7B || adj.operation == DSGA_OP_STOP) { + /* Procedure call or load depends on the last value, or a permanent store, don't use or go past this */ + desc.index = -1; + desc.override_first = nullptr; + return true; + } + if (adj.operation == DSGA_OP_STO) { + if (adj.type == DSGA_TYPE_NONE && adj.variable == 0x1A && adj.shift_num == 0 && adj.and_mask < 0x100) { + /* Temp store, skip */ + desc.index--; + } else { + /* Special register store or unpredictable store, don't use or go past this */ + desc.index = -1; + } + desc.override_first = nullptr; + return true; + } + return false; +} + +static bool AreVarAction2AdjustsEquivalent(VarAction2AdjustDescriptor a, VarAction2AdjustDescriptor b) +{ + if (!a.IsValid() || !b.IsValid()) return false; + + while (a.index >= 0 && b.index >= 0) { + if (a.adjust_array == b.adjust_array && a.index == b.index) return true; + + if (AdvanceVarAction2AdjustDescriptor(a)) continue; + if (AdvanceVarAction2AdjustDescriptor(b)) continue; + + const DeterministicSpriteGroupAdjust &adj_a = a.GetCurrent(); + const DeterministicSpriteGroupAdjust &adj_b = b.GetCurrent(); + + if (std::tie(adj_a.operation, adj_a.type, adj_a.variable, adj_a.shift_num, adj_a.and_mask, adj_a.add_val, adj_a.divmod_val) != + std::tie(adj_b.operation, adj_b.type, adj_b.variable, adj_b.shift_num, adj_b.and_mask, adj_b.add_val, adj_b.divmod_val)) return false; + + if (adj_a.parameter != adj_b.parameter) { + if (adj_a.variable == 0x7D) { + int store_index_a = GetVarAction2AdjustOfPreviousTempStoreSource(a.adjust_array, a.index - 1, (adj_a.parameter & 0xFF)); + if (store_index_a < 1) { + return false; + } + int store_index_b = GetVarAction2AdjustOfPreviousTempStoreSource(b.adjust_array, b.index - 1, (adj_b.parameter & 0xFF)); + if (store_index_b < 1) { + return false; + } + if (!AreVarAction2AdjustsEquivalent({ a.adjust_array, nullptr, store_index_a - 1 }, { b.adjust_array, nullptr, store_index_b - 1 })) return false; + } else { + return false; + } + } + + if (adj_b.operation == DSGA_OP_RST) return true; + + a.index--; + b.index--; + a.override_first = nullptr; + b.override_first = nullptr; + } + + return false; +} + +enum VarAction2AdjustsBooleanInverseResult { + VA2ABIR_NO, ///< Adjusts are not inverse + VA2ABIR_CCAT, ///< Adjusts are inverse (constant comparison adjust type) + VA2ABIR_XOR_A, ///< Adjusts are inverse (a has an additional XOR 1 or EQ 0 compared to b) + VA2ABIR_XOR_B, ///< Adjusts are inverse (b has an additional XOR 1 or EQ 0 compared to a) +}; + +static VarAction2AdjustsBooleanInverseResult AreVarAction2AdjustsBooleanInverse(VarAction2AdjustDescriptor a, VarAction2AdjustDescriptor b) +{ + if (!a.IsValid() || !b.IsValid()) return VA2ABIR_NO; + + if (a.index < 0 || b.index < 0) return VA2ABIR_NO; + + AdvanceVarAction2AdjustDescriptor(a); + AdvanceVarAction2AdjustDescriptor(b); + + if (a.index < 0 || b.index < 0) return VA2ABIR_NO; + + const DeterministicSpriteGroupAdjust &adj_a = a.GetCurrent(); + const DeterministicSpriteGroupAdjust &adj_b = b.GetCurrent(); + + if (adj_a.operation == DSGA_OP_RST && adj_b.operation == DSGA_OP_RST && + IsConstantComparisonAdjustType(adj_a.type) && InvertConstantComparisonAdjustType(adj_a.type) == adj_b.type && + (std::tie(adj_a.variable, adj_a.shift_num, adj_a.parameter, adj_a.and_mask, adj_a.add_val, adj_a.divmod_val) == + std::tie(adj_b.variable, adj_b.shift_num, adj_b.parameter, adj_b.and_mask, adj_b.add_val, adj_b.divmod_val))) { + return VA2ABIR_CCAT; + } + + auto check_inverse = [&]() -> bool { + auto check_inner = [](VarAction2AdjustDescriptor &a, VarAction2AdjustDescriptor &b) -> bool { + if (a.index >= 0) AdvanceVarAction2AdjustDescriptor(a); + if (a.index >= 0) { + const DeterministicSpriteGroupAdjust &a_adj = a.GetCurrent(); + /* Check that the value was bool prior to the XOR */ + if (IsEvalAdjustOperationRelationalComparison(a_adj.operation) || IsConstantComparisonAdjustType(a_adj.type)) { + if (AreVarAction2AdjustsEquivalent(a, b)) return true; + } + } + return false; + }; + const DeterministicSpriteGroupAdjust &adj = a.GetCurrent(); + if (adj.operation == DSGA_OP_XOR && adj.type == DSGA_TYPE_NONE && adj.variable == 0x1A && adj.shift_num == 0 && adj.and_mask == 1) { + VarAction2AdjustDescriptor tmp = { a.adjust_array, nullptr, a.index - 1 }; + if (check_inner(tmp, b)) return true; + } + if (adj.operation == DSGA_OP_RST && adj.type == DSGA_TYPE_EQ && adj.variable == 0x7D && adj.shift_num == 0 && adj.and_mask == 0xFFFFFFFF && adj.add_val == 0) { + int store_index = GetVarAction2AdjustOfPreviousTempStoreSource(a.adjust_array, a.index - 1, (adj.parameter & 0xFF)); + if (store_index >= 1) { + /* Found the referenced temp store, use that */ + VarAction2AdjustDescriptor tmp = { a.adjust_array, nullptr, store_index - 1 }; + if (check_inner(tmp, b)) return true; + } + } + return false; + }; + + if (check_inverse()) return VA2ABIR_XOR_A; + + std::swap(a, b); + + if (check_inverse()) return VA2ABIR_XOR_B; + + return VA2ABIR_NO; +} + +/* + * Find and replace the result of: + * (var * flag) + (var * !flag) with var + * (-var * (var < 0)) + (var * !(var < 0)) with abs(var) + * "+" may be ADD, OR or XOR. + */ +static bool TryMergeBoolMulCombineVarAction2Adjust(VarAction2OptimiseState &state, std::vector &adjusts, const int adjust_index) +{ + uint store_var = adjusts[adjust_index].parameter; + + DeterministicSpriteGroupAdjust synth_adjusts[2]; + VarAction2AdjustDescriptor found_adjusts[4] = {}; + uint mul_indices[2] = {}; + + auto find_adjusts = [&](int start_index, uint save_index) { + bool have_mul = false; + for (int i = start_index; i >= 0; i--) { + const DeterministicSpriteGroupAdjust &prev = adjusts[i]; + if (prev.variable == 0x7E || prev.variable == 0x7B) { + /* Procedure call or load depends on the last value, don't use or go past this */ + return; + } + if (prev.operation == DSGA_OP_STO) { + if (prev.type == DSGA_TYPE_NONE && prev.variable == 0x1A && prev.shift_num == 0 && prev.and_mask < 0x100) { + /* Temp store */ + if (prev.and_mask == (store_var & 0xFF)) return; + } else { + /* Special register store or unpredictable store, don't use or go past this */ + return; + } + } else if (prev.operation == DSGA_OP_MUL && !have_mul) { + /* First source is the variable of mul, if it's a temporary storage load, try to follow it */ + mul_indices[save_index] = i; + if (prev.variable == 0x7D && prev.type == DSGA_TYPE_NONE && prev.shift_num == 0 && prev.and_mask == 0xFFFFFFFF) { + int store_index = GetVarAction2AdjustOfPreviousTempStoreSource(adjusts.data(), i - 1, (prev.parameter & 0xFF)); + if (store_index >= 1) { + /* Found the referenced temp store, use that */ + found_adjusts[save_index * 2] = { adjusts.data(), nullptr, store_index - 1 }; + have_mul = true; + } + } + if (!have_mul) { + /* It's not a temporary storage load which can be followed, synthesise an RST */ + synth_adjusts[save_index] = prev; + synth_adjusts[save_index].operation = DSGA_OP_RST; + synth_adjusts[save_index].adjust_flags = DSGAF_NONE; + found_adjusts[save_index * 2] = { adjusts.data(), synth_adjusts + save_index, i }; + have_mul = true; + } + } else if (prev.operation == DSGA_OP_STOP) { + /* Don't try to handle writes to permanent storage */ + return; + } else if (have_mul) { + /* Found second source */ + found_adjusts[(save_index * 2) + 1] = { adjusts.data(), nullptr, i }; + return; + } else { + return; + } + } + }; + + find_adjusts(adjust_index - 1, 0); // A (first, closest) + if (!found_adjusts[0].IsValid() || !found_adjusts[1].IsValid()) return false; + + /* Find offset of referenced store */ + int store_index = GetVarAction2AdjustOfPreviousTempStoreSource(adjusts.data(), adjust_index - 1, (store_var & 0xFF)); + if (store_index < 0) return false; + + find_adjusts(store_index - 1, 1); // B (second, further) + if (!found_adjusts[2].IsValid() || !found_adjusts[3].IsValid()) return false; + + bool is_cond_first[2]; + VarAction2AdjustsBooleanInverseResult found = VA2ABIR_NO; + auto try_find = [&](bool a_first, bool b_first) { + if (found == VA2ABIR_NO) { + found = AreVarAction2AdjustsBooleanInverse(found_adjusts[a_first ? 0 : 1], found_adjusts[b_first ? 2 : 3]); + if (found != VA2ABIR_NO) { + is_cond_first[0] = a_first; + is_cond_first[1] = b_first; + } + } + }; + try_find(true, true); + try_find(true, false); + try_find(false, true); + try_find(false, false); + + if (found == VA2ABIR_NO) return false; + + auto try_erase_from = [&](uint start) -> bool { + for (uint i = start; i < (uint)adjusts.size(); i++) { + const DeterministicSpriteGroupAdjust &adjust = adjusts[i]; + if (adjust.variable == 0x7E || IsEvalAdjustWithSideEffects(adjust.operation)) return false; + } + adjusts.erase(adjusts.begin() + start, adjusts.end()); + return true; + }; + auto try_to_make_rst_from = [&](uint idx) -> bool { + const DeterministicSpriteGroupAdjust &src = adjusts[idx]; + if (src.variable == 0x7D) { + /* Check that variable is still valid */ + for (uint i = idx; i < (uint)adjusts.size(); i++) { + const DeterministicSpriteGroupAdjust &adjust = adjusts[i]; + if (adjust.variable == 0x7E) return false; + if (adjust.operation == DSGA_OP_STO) { + if (adjust.type == DSGA_TYPE_NONE && adjust.variable == 0x1A && adjust.shift_num == 0 && adjust.and_mask < 0x100) { + /* Temp store */ + if (adjust.and_mask == (src.parameter & 0xFF)) return false; + } else { + /* Special register store or unpredictable store, don't use or go past this */ + return false; + } + } + } + } + adjusts.push_back(src); + adjusts.back().operation = DSGA_OP_RST; + adjusts.back().adjust_flags = DSGAF_NONE; + return true; + }; + + if (AreVarAction2AdjustsEquivalent(found_adjusts[is_cond_first[0] ? 1 : 0], found_adjusts[is_cond_first[1] ? 3 : 2])) { + /* replace (var * flag) + (var * !flag) with var */ + + if (is_cond_first[0]) { + /* The cond is the mul variable of the first (closest) mul, the actual value is the prior adjust */ + if (try_erase_from(mul_indices[0] + 1)) return true; + } else { + /* The value is the mul variable of the first (closest) mul, the cond is the prior adjust */ + if (try_to_make_rst_from(mul_indices[0])) return true; + } + + if (!is_cond_first[1]) { + /* The value is the mul variable of the second (further) mul, the cond is the prior adjust */ + if (try_to_make_rst_from(mul_indices[1])) return true; + } + + return false; + } + + auto check_rsub = [&](VarAction2AdjustDescriptor &desc) -> bool { + int rsub_offset = desc.index; + if (rsub_offset < 1) return false; + const DeterministicSpriteGroupAdjust &adj = adjusts[rsub_offset]; + if (adj.operation == DSGA_OP_RSUB && adj.type == DSGA_TYPE_NONE && adj.variable == 0x1A && adj.shift_num == 0 && adj.and_mask == 0) { + desc.index--; + return true; + } + return false; + }; + + auto check_abs_cond = [&](VarAction2AdjustDescriptor cond, VarAction2AdjustDescriptor &value) -> bool { + int lt_offset = cond.index; + if (lt_offset < 1) return false; + const DeterministicSpriteGroupAdjust &adj = adjusts[lt_offset]; + if (adj.operation == DSGA_OP_SLT && adj.type == DSGA_TYPE_NONE && adj.variable == 0x1A && adj.shift_num == 0 && adj.and_mask == 0) { + cond.index--; + return AreVarAction2AdjustsEquivalent(cond, value); + } + return false; + }; + + auto append_abs = [&]() { + adjusts.emplace_back(); + adjusts.back().operation = DSGA_OP_ABS; + adjusts.back().variable = 0x1A; + state.inference |= VA2AIF_SIGNED_NON_NEGATIVE; + }; + + if (found == VA2ABIR_XOR_A) { + /* Try to find an ABS: + * A has the extra invert, check cond of B + * B is the negative path with the RSUB + */ + VarAction2AdjustDescriptor value_b = found_adjusts[is_cond_first[1] ? 3 : 2]; + const VarAction2AdjustDescriptor &cond_b = found_adjusts[is_cond_first[1] ? 2 : 3]; + + if (check_rsub(value_b) && check_abs_cond(cond_b, value_b) && AreVarAction2AdjustsEquivalent(found_adjusts[is_cond_first[0] ? 1 : 0], value_b)) { + /* Found an ABS, use one of the two value parts */ + + if (is_cond_first[0]) { + /* The cond is the mul variable of the A (first, closest) mul, the actual value is the prior adjust */ + if (try_erase_from(mul_indices[0])) { + append_abs(); + return true; + } + } else { + /* The value is the mul variable of the A (first, closest) mul, the cond is the prior adjust */ + if (try_to_make_rst_from(mul_indices[0])) { + append_abs(); + return true; + } + } + } + } + if (found == VA2ABIR_XOR_B) { + /* Try to find an ABS: + * B has the extra invert, check cond of A + * A is the negative path with the RSUB + */ + VarAction2AdjustDescriptor value_a = found_adjusts[is_cond_first[0] ? 1 : 0]; + const VarAction2AdjustDescriptor &cond_a = found_adjusts[is_cond_first[0] ? 0 : 1]; + + if (check_rsub(value_a) && check_abs_cond(cond_a, value_a) && AreVarAction2AdjustsEquivalent(found_adjusts[is_cond_first[1] ? 3 : 2], value_a)) { + /* Found an ABS, use one of the two value parts */ + + if (is_cond_first[0]) { + /* The cond is the mul variable of the A (first, closest) mul, the actual value is the prior adjust, -1 to also remove the RSUB */ + if (try_erase_from(mul_indices[0] - 1)) { + append_abs(); + return true; + } + } + + if (!is_cond_first[1]) { + /* The value is the mul variable of the B (second, further) mul, the cond is the prior adjust */ + if (try_to_make_rst_from(mul_indices[1])) { + append_abs(); + return true; + } + } + } + } + + return false; +} + +/* Returns the number of adjusts to remove: 0: neither, 1: current, 2: prev and current */ +static uint TryMergeVarAction2AdjustConstantOperations(DeterministicSpriteGroupAdjust &prev, DeterministicSpriteGroupAdjust ¤t) +{ + if (prev.type != DSGA_TYPE_NONE || prev.variable != 0x1A || prev.shift_num != 0) return 0; + if (current.type != DSGA_TYPE_NONE || current.variable != 0x1A || current.shift_num != 0) return 0; + + switch (current.operation) { + case DSGA_OP_ADD: + case DSGA_OP_SUB: + if (prev.operation == current.operation) { + prev.and_mask += current.and_mask; + break; + } + if (prev.operation == ((current.operation == DSGA_OP_SUB) ? DSGA_OP_ADD : DSGA_OP_SUB)) { + prev.and_mask -= current.and_mask; + break; + } + return 0; + + case DSGA_OP_OR: + if (prev.operation == DSGA_OP_OR) { + prev.and_mask |= current.and_mask; + break; + } + return 0; + + case DSGA_OP_AND: + if (prev.operation == DSGA_OP_AND) { + prev.and_mask &= current.and_mask; + break; + } + return 0; + + case DSGA_OP_XOR: + if (prev.operation == DSGA_OP_XOR) { + prev.and_mask ^= current.and_mask; + break; + } + return 0; + + default: + return 0; + } + + if (prev.and_mask == 0 && IsEvalAdjustWithZeroRemovable(prev.operation)) { + /* prev now does nothing, remove it as well */ + return 2; + } + return 1; +} + +void OptimiseVarAction2Adjust(VarAction2OptimiseState &state, const GrfSpecFeature feature, const byte varsize, DeterministicSpriteGroup *group, DeterministicSpriteGroupAdjust &adjust) +{ + if (unlikely(HasGrfOptimiserFlag(NGOF_NO_OPT_VARACT2))) return; + + auto guard = scope_guard([&]() { + if (!group->adjusts.empty()) { + const DeterministicSpriteGroupAdjust &adjust = group->adjusts.back(); + if (adjust.variable == 0x7E || IsEvalAdjustWithSideEffects(adjust.operation)) { + /* save inference state */ + state.inference_backup.adjust_size = (uint)group->adjusts.size(); + state.inference_backup.inference = state.inference; + state.inference_backup.current_constant = state.current_constant; + } + } + }); + + auto try_restore_inference_backup = [&]() { + if (state.inference_backup.adjust_size != 0 && state.inference_backup.adjust_size == (uint)group->adjusts.size()) { + state.inference = state.inference_backup.inference; + state.current_constant = state.inference_backup.current_constant; + } + }; + + VarAction2AdjustInferenceFlags prev_inference = state.inference; + state.inference = VA2AIF_NONE; + + auto get_sign_bit = [&]() -> uint32 { + return (1 << ((varsize * 8) - 1)); + }; + + auto get_full_mask = [&]() -> uint32 { + return UINT_MAX >> ((4 - varsize) * 8); + }; + + auto add_inferences_from_mask = [&](uint32 mask) { + if (mask == 1) { + state.inference |= VA2AIF_SIGNED_NON_NEGATIVE | VA2AIF_ONE_OR_ZERO; + } else if ((mask & get_sign_bit()) == 0) { + state.inference |= VA2AIF_SIGNED_NON_NEGATIVE; + } + }; + + auto replace_with_constant_load = [&](uint32 constant) { + group->adjusts.pop_back(); + if ((prev_inference & VA2AIF_HAVE_CONSTANT) && constant == state.current_constant) { + /* Don't create a new constant load for the same constant as was there previously */ + state.inference = prev_inference; + return; + } + while (!group->adjusts.empty()) { + const DeterministicSpriteGroupAdjust &prev = group->adjusts.back(); + if (prev.variable != 0x7E && !IsEvalAdjustWithSideEffects(prev.operation)) { + /* Delete useless operation */ + group->adjusts.pop_back(); + } else { + break; + } + } + state.inference = VA2AIF_HAVE_CONSTANT; + add_inferences_from_mask(constant); + state.current_constant = constant; + if (constant != 0 || !group->adjusts.empty()) { + DeterministicSpriteGroupAdjust &replacement = group->adjusts.emplace_back(); + replacement.operation = DSGA_OP_RST; + replacement.variable = 0x1A; + replacement.shift_num = 0; + replacement.type = DSGA_TYPE_NONE; + replacement.and_mask = constant; + replacement.add_val = 0; + replacement.divmod_val = 0; + state.inference |= VA2AIF_PREV_MASK_ADJUST; + } + }; + + auto handle_unpredictable_temp_load = [&]() { + std::bitset<256> bits; + bits.set(); + for (auto &it : state.temp_stores) { + bits.set(it.first, false); + } + state.GetVarTracking(group)->in |= bits; + }; + auto reset_store_values = [&]() { + for (auto &it : state.temp_stores) { + it.second.inference = VA2AIF_NONE; + it.second.version++; + } + state.default_variable_version++; + }; + auto handle_unpredictable_temp_store = [&]() { + reset_store_values(); + }; + + auto try_merge_with_previous = [&]() { + if (adjust.variable == 0x1A && group->adjusts.size() >= 2) { + /* Merged this adjust into the previous one */ + uint to_remove = TryMergeVarAction2AdjustConstantOperations(group->adjusts[group->adjusts.size() - 2], adjust); + if (to_remove > 0) group->adjusts.erase(group->adjusts.end() - to_remove, group->adjusts.end()); + + if (to_remove == 1 && group->adjusts.back().and_mask == 0 && IsEvalAdjustWithZeroAlwaysZero(group->adjusts.back().operation)) { + /* Operation always returns 0, replace it and any useless prior operations */ + replace_with_constant_load(0); + } + } + }; + + /* Special handling of variable 7B, this uses the parameter as the variable number, and the last value as the variable's parameter. + * If the last value is a known constant, it can be substituted immediately. */ + if (adjust.variable == 0x7B) { + if (prev_inference & VA2AIF_HAVE_CONSTANT) { + adjust.variable = adjust.parameter; + adjust.parameter = state.current_constant; + } else if (adjust.parameter == 0x7D) { + handle_unpredictable_temp_load(); + } else if (adjust.parameter == 0x1C) { + /* This is to simplify tracking of variable 1C, the parameter is never used for anything */ + adjust.variable = adjust.parameter; + adjust.parameter = 0; + } + } + if (adjust.variable == 0x1C && !state.seen_procedure_call) { + group->dsg_flags |= DSGF_REQUIRES_VAR1C; + } + + VarAction2AdjustInferenceFlags non_const_var_inference = VA2AIF_NONE; + while (adjust.variable == 0x7D) { + non_const_var_inference = VA2AIF_NONE; + auto iter = state.temp_stores.find(adjust.parameter & 0xFF); + if (iter == state.temp_stores.end()) { + /* Read without any previous store */ + state.GetVarTracking(group)->in.set(adjust.parameter & 0xFF, true); + adjust.parameter |= (state.default_variable_version << 8); + } else { + const VarAction2TempStoreInference &store = iter->second; + if (store.inference & VA2AIF_HAVE_CONSTANT) { + adjust.variable = 0x1A; + adjust.and_mask &= (store.store_constant >> adjust.shift_num); + } else if ((store.inference & VA2AIF_SINGLE_LOAD) && (store.var_source.variable == 0x7D || IsVariableVeryCheap(store.var_source.variable, feature))) { + if (adjust.type == DSGA_TYPE_NONE && adjust.shift_num == 0 && (adjust.and_mask == 0xFFFFFFFF || ((store.inference & VA2AIF_ONE_OR_ZERO) && (adjust.and_mask & 1)))) { + adjust.type = store.var_source.type; + adjust.variable = store.var_source.variable; + adjust.shift_num = store.var_source.shift_num; + adjust.parameter = store.var_source.parameter; + adjust.and_mask = store.var_source.and_mask; + adjust.add_val = store.var_source.add_val; + adjust.divmod_val = store.var_source.divmod_val; + continue; + } else if (store.var_source.type == DSGA_TYPE_NONE && (adjust.shift_num + store.var_source.shift_num) < 32) { + adjust.variable = store.var_source.variable; + adjust.parameter = store.var_source.parameter; + adjust.and_mask &= store.var_source.and_mask >> adjust.shift_num; + adjust.shift_num += store.var_source.shift_num; + continue; + } + adjust.parameter |= (store.version << 8); + } else { + if (adjust.type == DSGA_TYPE_NONE) { + non_const_var_inference = store.inference & (VA2AIF_SIGNED_NON_NEGATIVE | VA2AIF_ONE_OR_ZERO | VA2AIF_MUL_BOOL); + } + if (store.inference & VA2AIF_SINGLE_LOAD) { + /* Not possible to substitute this here, but it may be possible in the DSE pass */ + state.enable_dse = true; + } + adjust.parameter |= (store.version << 8); + } + } + break; + } + + if (adjust.operation == DSGA_OP_STOP) { + for (auto &it : state.temp_stores) { + /* Check if some other variable is marked as a copy of permanent storage */ + if ((it.second.inference & VA2AIF_SINGLE_LOAD) && it.second.var_source.variable == 0x7C) { + it.second.inference &= ~VA2AIF_SINGLE_LOAD; + } + } + } + + if (IsExpensiveVariable(adjust.variable, feature, group->var_scope)) state.check_expensive_vars = true; + + auto get_prev_single_load = [&](bool *invert) -> const DeterministicSpriteGroupAdjust* { + return GetVarAction2PreviousSingleLoadAdjust(group->adjusts, (int)group->adjusts.size() - 2, invert); + }; + + auto get_prev_single_store = [&](bool *invert) -> const DeterministicSpriteGroupAdjust* { + return GetVarAction2PreviousSingleStoreAdjust(group->adjusts, (int)group->adjusts.size() - 2, invert); + }; + + if ((prev_inference & VA2AIF_SINGLE_LOAD) && adjust.operation == DSGA_OP_RST && adjust.variable != 0x1A && adjust.variable != 0x7D && adjust.variable != 0x7E) { + /* See if this is a repeated load of a variable (not constant, temp store load or procedure call) */ + const DeterministicSpriteGroupAdjust *prev_load = get_prev_single_load(nullptr); + if (prev_load != nullptr && MemCmpT(prev_load, &adjust) == 0) { + group->adjusts.pop_back(); + state.inference = prev_inference; + return; + } + } + + if ((prev_inference & VA2AIF_MUL_BOOL) && (non_const_var_inference & VA2AIF_MUL_BOOL) && + (adjust.operation == DSGA_OP_ADD || adjust.operation == DSGA_OP_OR || adjust.operation == DSGA_OP_XOR) && + adjust.variable == 0x7D && adjust.type == DSGA_TYPE_NONE && adjust.shift_num == 0 && adjust.and_mask == 0xFFFFFFFF) { + if (TryMergeBoolMulCombineVarAction2Adjust(state, group->adjusts, (int)(group->adjusts.size() - 1))) { + OptimiseVarAction2Adjust(state, feature, varsize, group, group->adjusts.back()); + return; + } + } + + if (group->adjusts.size() >= 2 && adjust.operation == DSGA_OP_RST && adjust.variable != 0x7B) { + /* See if any previous adjusts can be removed */ + bool removed = false; + while (group->adjusts.size() >= 2) { + const DeterministicSpriteGroupAdjust &prev = group->adjusts[group->adjusts.size() - 2]; + if (prev.variable != 0x7E && !IsEvalAdjustWithSideEffects(prev.operation)) { + /* Delete useless operation */ + group->adjusts.erase(group->adjusts.end() - 2); + removed = true; + } else { + break; + } + } + if (removed) { + state.inference = prev_inference; + OptimiseVarAction2Adjust(state, feature, varsize, group, group->adjusts.back()); + return; + } + } + + if (adjust.variable != 0x7E && IsEvalAdjustWithZeroLastValueAlwaysZero(adjust.operation)) { + adjust.adjust_flags |= DSGAF_SKIP_ON_ZERO; + } + + if ((prev_inference & VA2AIF_PREV_TERNARY) && adjust.variable == 0x1A && IsEvalAdjustUsableForConstantPropagation(adjust.operation)) { + /* Propagate constant operation back into previous ternary */ + DeterministicSpriteGroupAdjust &prev = group->adjusts[group->adjusts.size() - 2]; + prev.and_mask = EvaluateDeterministicSpriteGroupAdjust(group->size, adjust, nullptr, prev.and_mask, UINT_MAX); + prev.add_val = EvaluateDeterministicSpriteGroupAdjust(group->size, adjust, nullptr, prev.add_val, UINT_MAX); + group->adjusts.pop_back(); + state.inference = prev_inference; + } else if ((prev_inference & VA2AIF_HAVE_CONSTANT) && adjust.variable == 0x1A && IsEvalAdjustUsableForConstantPropagation(adjust.operation)) { + /* Reduce constant operation on previous constant */ + replace_with_constant_load(EvaluateDeterministicSpriteGroupAdjust(group->size, adjust, nullptr, state.current_constant, UINT_MAX)); + } else if ((prev_inference & VA2AIF_HAVE_CONSTANT) && state.current_constant == 0 && (adjust.adjust_flags & DSGAF_SKIP_ON_ZERO)) { + /* Remove operation which does nothing when applied to 0 */ + group->adjusts.pop_back(); + state.inference = prev_inference; + } else if ((prev_inference & VA2AIF_HAVE_CONSTANT) && IsEvalAdjustOperationOnConstantEffectiveLoad(adjust.operation, state.current_constant)) { + /* Convert operation to a load */ + DeterministicSpriteGroupAdjust current = group->adjusts.back(); + group->adjusts.pop_back(); + while (!group->adjusts.empty()) { + const DeterministicSpriteGroupAdjust &prev = group->adjusts.back(); + if (prev.variable != 0x7E && !IsEvalAdjustWithSideEffects(prev.operation)) { + /* Delete useless operation */ + group->adjusts.pop_back(); + } else { + break; + } + } + try_restore_inference_backup(); + current.operation = DSGA_OP_RST; + current.adjust_flags = DSGAF_NONE; + group->adjusts.push_back(current); + OptimiseVarAction2Adjust(state, feature, varsize, group, group->adjusts.back()); + return; + } else if (adjust.variable == 0x7E || adjust.type != DSGA_TYPE_NONE) { + /* Procedure call or complex adjustment */ + if (adjust.operation == DSGA_OP_STO) handle_unpredictable_temp_store(); + if (adjust.variable == 0x7E) { + std::bitset<256> seen_stores; + bool seen_unpredictable_store = false; + bool seen_special_store = false; + bool seen_perm_store = false; + auto handle_proc_stores = y_combinator([&](auto handle_proc_stores, const SpriteGroup *sg) -> void { + if (sg == nullptr) return; + if (sg->type == SGT_RANDOMIZED) { + const RandomizedSpriteGroup *rsg = (const RandomizedSpriteGroup*)sg; + for (const auto &group : rsg->groups) { + handle_proc_stores(group); + } + } else if (sg->type == SGT_DETERMINISTIC) { + const DeterministicSpriteGroup *dsg = (const DeterministicSpriteGroup*)sg; + for (const DeterministicSpriteGroupAdjust &adjust : dsg->adjusts) { + if (adjust.variable == 0x7E) { + handle_proc_stores(adjust.subroutine); + } + if (adjust.operation == DSGA_OP_STO) { + if (adjust.type == DSGA_TYPE_NONE && adjust.variable == 0x1A && adjust.shift_num == 0) { + /* Temp store */ + if (adjust.and_mask < 0x100) { + seen_stores.set(adjust.and_mask, true); + } else { + seen_special_store = true; + } + } else { + /* Unpredictable store */ + seen_unpredictable_store = true; + } + } + if (adjust.operation == DSGA_OP_STO_NC) { + if (adjust.divmod_val < 0x100) { + seen_stores.set(adjust.divmod_val, true); + } else { + seen_special_store = true; + } + } + if (adjust.operation == DSGA_OP_STOP) { + seen_perm_store = true; + } + } + } + }); + + auto handle_group = y_combinator([&](auto handle_group, const SpriteGroup *sg) -> void { + if (sg == nullptr) return; + if (sg->type == SGT_RANDOMIZED) { + const RandomizedSpriteGroup *rsg = (const RandomizedSpriteGroup*)sg; + for (const auto &group : rsg->groups) { + handle_group(group); + } + } else if (sg->type == SGT_DETERMINISTIC) { + VarAction2GroupVariableTracking *var_tracking = _cur.GetVarAction2GroupVariableTracking(sg, false); + if (var_tracking != nullptr) { + std::bitset<256> bits = var_tracking->in; + for (auto &it : state.temp_stores) { + bits.set(it.first, false); + } + state.GetVarTracking(group)->in |= bits; + } + if (!state.seen_procedure_call && ((const DeterministicSpriteGroup*)sg)->dsg_flags & DSGF_REQUIRES_VAR1C) { + group->dsg_flags |= DSGF_REQUIRES_VAR1C; + } + handle_proc_stores(sg); + } + }); + handle_group(adjust.subroutine); + + if (seen_unpredictable_store) { + reset_store_values(); + } else { + for (auto &it : state.temp_stores) { + if (seen_stores[it.first]) { + it.second.inference = VA2AIF_NONE; + it.second.version++; + } else { + /* See DSGA_OP_STO handler */ + if ((it.second.inference & VA2AIF_SINGLE_LOAD) && it.second.var_source.variable == 0x7D && seen_stores[it.second.var_source.parameter & 0xFF]) { + it.second.inference &= ~VA2AIF_SINGLE_LOAD; + } + if (seen_special_store && (it.second.inference & VA2AIF_SINGLE_LOAD) && it.second.var_source.variable != 0x7D) { + it.second.inference &= ~VA2AIF_SINGLE_LOAD; + } + + /* See DSGA_OP_STOP handler */ + if (seen_perm_store && (it.second.inference & VA2AIF_SINGLE_LOAD) && it.second.var_source.variable == 0x7C) { + it.second.inference &= ~VA2AIF_SINGLE_LOAD; + } + } + } + } + + state.seen_procedure_call = true; + } else if (adjust.operation == DSGA_OP_RST) { + state.inference = VA2AIF_SINGLE_LOAD; + } + if (IsConstantComparisonAdjustType(adjust.type)) { + if (adjust.operation == DSGA_OP_RST) { + state.inference |= VA2AIF_SIGNED_NON_NEGATIVE | VA2AIF_ONE_OR_ZERO; + } else if (adjust.operation == DSGA_OP_OR || adjust.operation == DSGA_OP_XOR || adjust.operation == DSGA_OP_AND) { + state.inference |= (prev_inference & (VA2AIF_SIGNED_NON_NEGATIVE | VA2AIF_ONE_OR_ZERO)); + } + if (adjust.operation == DSGA_OP_OR && (prev_inference & VA2AIF_ONE_OR_ZERO) && adjust.variable != 0x7E) { + adjust.adjust_flags |= DSGAF_SKIP_ON_LSB_SET; + } + if (adjust.operation == DSGA_OP_MUL && adjust.variable != 0x7E) { + state.inference |= VA2AIF_MUL_BOOL; + adjust.adjust_flags |= DSGAF_JUMP_INS_HINT; + group->dsg_flags |= DSGF_CHECK_INSERT_JUMP; + } + } + } else { + if (adjust.and_mask == 0 && IsEvalAdjustWithZeroRemovable(adjust.operation)) { + /* Delete useless zero operations */ + group->adjusts.pop_back(); + state.inference = prev_inference; + } else if (adjust.and_mask == 0 && IsEvalAdjustWithZeroAlwaysZero(adjust.operation)) { + /* Operation always returns 0, replace it and any useless prior operations */ + replace_with_constant_load(0); + } else { + if (adjust.variable == 0x7D && adjust.shift_num == 0 && adjust.and_mask == get_full_mask() && IsEvalAdjustOperationCommutative(adjust.operation) && group->adjusts.size() >= 2) { + DeterministicSpriteGroupAdjust &prev = group->adjusts[group->adjusts.size() - 2]; + if (group->adjusts.size() >= 3 && prev.operation == DSGA_OP_RST) { + const DeterministicSpriteGroupAdjust &prev2 = group->adjusts[group->adjusts.size() - 3]; + if (prev2.operation == DSGA_OP_STO && prev2.type == DSGA_TYPE_NONE && prev2.variable == 0x1A && + prev2.shift_num == 0 && prev2.and_mask == (adjust.parameter & 0xFF)) { + /* Convert: store, load var, commutative op on stored --> (dead) store, commutative op var */ + prev.operation = adjust.operation; + group->adjusts.pop_back(); + state.inference = non_const_var_inference & (VA2AIF_SIGNED_NON_NEGATIVE | VA2AIF_ONE_OR_ZERO | VA2AIF_MUL_BOOL); + OptimiseVarAction2Adjust(state, feature, varsize, group, group->adjusts.back()); + return; + } + } + } + switch (adjust.operation) { + case DSGA_OP_ADD: + try_merge_with_previous(); + break; + case DSGA_OP_SUB: + if (adjust.variable == 0x7D && adjust.shift_num == 0 && adjust.and_mask == 0xFFFFFFFF && group->adjusts.size() >= 2) { + DeterministicSpriteGroupAdjust &prev = group->adjusts[group->adjusts.size() - 2]; + if (group->adjusts.size() >= 3 && prev.operation == DSGA_OP_RST) { + const DeterministicSpriteGroupAdjust &prev2 = group->adjusts[group->adjusts.size() - 3]; + if (prev2.operation == DSGA_OP_STO && prev2.type == DSGA_TYPE_NONE && prev2.variable == 0x1A && + prev2.shift_num == 0 && prev2.and_mask == (adjust.parameter & 0xFF)) { + /* Convert: store, load var, subtract stored --> (dead) store, reverse subtract var */ + prev.operation = DSGA_OP_RSUB; + group->adjusts.pop_back(); + state.inference = non_const_var_inference & (VA2AIF_SIGNED_NON_NEGATIVE | VA2AIF_ONE_OR_ZERO); + OptimiseVarAction2Adjust(state, feature, varsize, group, group->adjusts.back()); + return; + } + } + } + if (adjust.variable == 0x1A && adjust.shift_num == 0 && adjust.and_mask == 1 && group->adjusts.size() >= 2) { + DeterministicSpriteGroupAdjust &prev = group->adjusts[group->adjusts.size() - 2]; + if (prev.operation == DSGA_OP_SCMP) { + state.inference |= VA2AIF_PREV_SCMP_DEC; + } + } + try_merge_with_previous(); + break; + case DSGA_OP_SMIN: + if (adjust.variable == 0x1A && adjust.shift_num == 0 && adjust.and_mask == 1 && group->adjusts.size() >= 2) { + DeterministicSpriteGroupAdjust &prev = group->adjusts[group->adjusts.size() - 2]; + if (prev.operation == DSGA_OP_SCMP) { + prev.operation = DSGA_OP_SGE; + group->adjusts.pop_back(); + state.inference = VA2AIF_SIGNED_NON_NEGATIVE | VA2AIF_ONE_OR_ZERO; + break; + } + if (group->adjusts.size() >= 3 && prev.operation == DSGA_OP_XOR && prev.type == DSGA_TYPE_NONE && prev.variable == 0x1A && + prev.shift_num == 0 && prev.and_mask == 2) { + DeterministicSpriteGroupAdjust &prev2 = group->adjusts[group->adjusts.size() - 3]; + if (prev2.operation == DSGA_OP_SCMP) { + prev2.operation = DSGA_OP_SLE; + group->adjusts.pop_back(); + group->adjusts.pop_back(); + state.inference = VA2AIF_SIGNED_NON_NEGATIVE | VA2AIF_ONE_OR_ZERO; + break; + } + } + } + if (adjust.and_mask <= 1 && (prev_inference & VA2AIF_SIGNED_NON_NEGATIVE)) state.inference = VA2AIF_SIGNED_NON_NEGATIVE | VA2AIF_ONE_OR_ZERO; + break; + case DSGA_OP_SMAX: + if (adjust.variable == 0x1A && adjust.shift_num == 0 && adjust.and_mask == 0 && group->adjusts.size() >= 2) { + DeterministicSpriteGroupAdjust &prev = group->adjusts[group->adjusts.size() - 2]; + if (group->adjusts.size() >= 3 && prev.operation == DSGA_OP_SUB && prev.type == DSGA_TYPE_NONE && prev.variable == 0x1A && + prev.shift_num == 0 && prev.and_mask == 1) { + DeterministicSpriteGroupAdjust &prev2 = group->adjusts[group->adjusts.size() - 3]; + if (prev2.operation == DSGA_OP_SCMP) { + prev2.operation = DSGA_OP_SGT; + group->adjusts.pop_back(); + group->adjusts.pop_back(); + state.inference = VA2AIF_SIGNED_NON_NEGATIVE | VA2AIF_ONE_OR_ZERO; + break; + } + } + } + break; + case DSGA_OP_UMIN: + if (adjust.and_mask == 1) { + if (prev_inference & VA2AIF_ONE_OR_ZERO) { + /* Delete useless bool -> bool conversion */ + group->adjusts.pop_back(); + state.inference = prev_inference; + break; + } else { + state.inference = VA2AIF_SIGNED_NON_NEGATIVE | VA2AIF_ONE_OR_ZERO; + if (group->adjusts.size() >= 2) { + DeterministicSpriteGroupAdjust &prev = group->adjusts[group->adjusts.size() - 2]; + if (prev.operation == DSGA_OP_RST && prev.type == DSGA_TYPE_NONE) { + prev.type = DSGA_TYPE_NEQ; + prev.add_val = 0; + group->adjusts.pop_back(); + state.inference |= VA2AIF_SINGLE_LOAD; + } + } + } + } + break; + case DSGA_OP_AND: + if ((prev_inference & VA2AIF_PREV_MASK_ADJUST) && adjust.variable == 0x1A && adjust.shift_num == 0 && group->adjusts.size() >= 2) { + /* Propagate and into immediately prior variable read */ + DeterministicSpriteGroupAdjust &prev = group->adjusts[group->adjusts.size() - 2]; + prev.and_mask &= adjust.and_mask; + add_inferences_from_mask(prev.and_mask); + state.inference |= VA2AIF_PREV_MASK_ADJUST; + group->adjusts.pop_back(); + break; + } + if (adjust.variable == 0x1A && adjust.shift_num == 0 && adjust.and_mask == 1 && group->adjusts.size() >= 2) { + DeterministicSpriteGroupAdjust &prev = group->adjusts[group->adjusts.size() - 2]; + if (prev.operation == DSGA_OP_SCMP || prev.operation == DSGA_OP_UCMP) { + prev.operation = DSGA_OP_EQ; + group->adjusts.pop_back(); + state.inference = VA2AIF_SIGNED_NON_NEGATIVE | VA2AIF_ONE_OR_ZERO; + if (group->adjusts.size() >= 2) { + DeterministicSpriteGroupAdjust &eq_adjust = group->adjusts[group->adjusts.size() - 1]; + DeterministicSpriteGroupAdjust &prev_op = group->adjusts[group->adjusts.size() - 2]; + if (eq_adjust.type == DSGA_TYPE_NONE && eq_adjust.variable == 0x1A && + prev_op.type == DSGA_TYPE_NONE && prev_op.operation == DSGA_OP_RST) { + prev_op.type = DSGA_TYPE_EQ; + prev_op.add_val = (0xFFFFFFFF >> eq_adjust.shift_num) & eq_adjust.and_mask; + group->adjusts.pop_back(); + state.inference |= VA2AIF_SINGLE_LOAD; + } + } + break; + } + if (prev_inference & VA2AIF_ONE_OR_ZERO) { + /* Current value is already one or zero, remove this */ + group->adjusts.pop_back(); + state.inference = prev_inference; + break; + } + } + if (adjust.and_mask <= 1) { + state.inference = VA2AIF_SIGNED_NON_NEGATIVE | VA2AIF_ONE_OR_ZERO; + } else if ((adjust.and_mask & get_sign_bit()) == 0) { + state.inference = VA2AIF_SIGNED_NON_NEGATIVE; + } + state.inference |= non_const_var_inference; + if ((state.inference & VA2AIF_ONE_OR_ZERO) && (prev_inference & VA2AIF_ONE_OR_ZERO)) { + adjust.adjust_flags |= DSGAF_JUMP_INS_HINT; + group->dsg_flags |= DSGF_CHECK_INSERT_JUMP; + } + try_merge_with_previous(); + break; + case DSGA_OP_OR: + if (adjust.variable == 0x1A && adjust.shift_num == 0 && adjust.and_mask == 1 && (prev_inference & VA2AIF_ONE_OR_ZERO)) { + replace_with_constant_load(1); + break; + } + if (adjust.and_mask <= 1) state.inference = prev_inference & (VA2AIF_SIGNED_NON_NEGATIVE | VA2AIF_ONE_OR_ZERO); + state.inference |= prev_inference & (VA2AIF_SIGNED_NON_NEGATIVE | VA2AIF_ONE_OR_ZERO) & non_const_var_inference; + if ((non_const_var_inference & VA2AIF_ONE_OR_ZERO) || (adjust.and_mask <= 1)) { + adjust.adjust_flags |= DSGAF_SKIP_ON_LSB_SET; + if (prev_inference & VA2AIF_ONE_OR_ZERO) { + adjust.adjust_flags |= DSGAF_JUMP_INS_HINT; + group->dsg_flags |= DSGF_CHECK_INSERT_JUMP; + } + } + try_merge_with_previous(); + break; + case DSGA_OP_XOR: + if (adjust.variable == 0x1A && adjust.shift_num == 0 && group->adjusts.size() >= 2) { + DeterministicSpriteGroupAdjust &prev = group->adjusts[group->adjusts.size() - 2]; + if (adjust.and_mask == 1) { + if (IsEvalAdjustOperationRelationalComparison(prev.operation)) { + prev.operation = InvertEvalAdjustRelationalComparisonOperation(prev.operation); + group->adjusts.pop_back(); + state.inference = VA2AIF_SIGNED_NON_NEGATIVE | VA2AIF_ONE_OR_ZERO; + break; + } + if (prev.operation == DSGA_OP_UMIN && prev.type == DSGA_TYPE_NONE && prev.variable == 0x1A && prev.shift_num == 0 && prev.and_mask == 1) { + prev.operation = DSGA_OP_TERNARY; + prev.adjust_flags = DSGAF_NONE; + prev.and_mask = 0; + prev.add_val = 1; + group->adjusts.pop_back(); + state.inference = VA2AIF_PREV_TERNARY; + break; + } + if (prev.operation == DSGA_OP_RST && IsConstantComparisonAdjustType(prev.type)) { + prev.type = InvertConstantComparisonAdjustType(prev.type); + group->adjusts.pop_back(); + state.inference = VA2AIF_SIGNED_NON_NEGATIVE | VA2AIF_ONE_OR_ZERO | VA2AIF_SINGLE_LOAD; + break; + } + if (prev.operation == DSGA_OP_OR && (IsConstantComparisonAdjustType(prev.type) || (prev.type == DSGA_TYPE_NONE && (prev.adjust_flags & DSGAF_SKIP_ON_LSB_SET))) && group->adjusts.size() >= 3) { + DeterministicSpriteGroupAdjust &prev2 = group->adjusts[group->adjusts.size() - 3]; + bool found = false; + if (IsEvalAdjustOperationRelationalComparison(prev2.operation)) { + prev2.operation = InvertEvalAdjustRelationalComparisonOperation(prev2.operation); + found = true; + } else if (prev2.operation == DSGA_OP_RST && IsConstantComparisonAdjustType(prev2.type)) { + prev2.type = InvertConstantComparisonAdjustType(prev2.type); + found = true; + } + if (found) { + if (prev.type == DSGA_TYPE_NONE) { + prev.type = DSGA_TYPE_EQ; + prev.add_val = 0; + } else { + prev.type = InvertConstantComparisonAdjustType(prev.type); + } + prev.operation = DSGA_OP_AND; + prev.adjust_flags = DSGAF_SKIP_ON_ZERO; + group->adjusts.pop_back(); + state.inference = VA2AIF_SIGNED_NON_NEGATIVE | VA2AIF_ONE_OR_ZERO; + break; + } + } + } + if (prev.operation == DSGA_OP_OR && prev.type == DSGA_TYPE_NONE && prev.variable == 0x1A && prev.shift_num == 0 && prev.and_mask == adjust.and_mask) { + prev.operation = DSGA_OP_AND; + prev.and_mask = ~prev.and_mask; + prev.adjust_flags = DSGAF_NONE; + group->adjusts.pop_back(); + break; + } + } + if (adjust.and_mask <= 1) state.inference = prev_inference & (VA2AIF_SIGNED_NON_NEGATIVE | VA2AIF_ONE_OR_ZERO); + state.inference |= prev_inference & (VA2AIF_SIGNED_NON_NEGATIVE | VA2AIF_ONE_OR_ZERO) & non_const_var_inference; + if (adjust.variable == 0x1A && adjust.shift_num == 0 && adjust.and_mask == 1) { + /* Single load tracking can handle bool inverts */ + state.inference |= (prev_inference & VA2AIF_SINGLE_LOAD); + } + try_merge_with_previous(); + break; + case DSGA_OP_MUL: { + if ((prev_inference & VA2AIF_ONE_OR_ZERO) && adjust.variable == 0x1A && adjust.shift_num == 0 && group->adjusts.size() >= 2) { + /* Found a ternary operator */ + adjust.operation = DSGA_OP_TERNARY; + adjust.adjust_flags = DSGAF_NONE; + while (group->adjusts.size() > 1) { + /* Merge with previous if applicable */ + const DeterministicSpriteGroupAdjust &prev = group->adjusts[group->adjusts.size() - 2]; + if (prev.type == DSGA_TYPE_NONE && prev.variable == 0x1A && prev.shift_num == 0 && prev.and_mask == 1) { + if (prev.operation == DSGA_OP_XOR) { + DeterministicSpriteGroupAdjust current = group->adjusts.back(); + group->adjusts.pop_back(); + group->adjusts.pop_back(); + std::swap(current.and_mask, current.add_val); + group->adjusts.push_back(current); + continue; + } else if (prev.operation == DSGA_OP_SMIN || prev.operation == DSGA_OP_UMIN) { + DeterministicSpriteGroupAdjust current = group->adjusts.back(); + group->adjusts.pop_back(); + group->adjusts.pop_back(); + group->adjusts.push_back(current); + } + } + break; + } + if (group->adjusts.size() > 1) { + /* Remove redundant comparison with 0 if applicable */ + const DeterministicSpriteGroupAdjust &prev = group->adjusts[group->adjusts.size() - 2]; + if (prev.type == DSGA_TYPE_NONE && prev.operation == DSGA_OP_EQ && prev.variable == 0x1A && prev.shift_num == 0 && prev.and_mask == 0) { + DeterministicSpriteGroupAdjust current = group->adjusts.back(); + group->adjusts.pop_back(); + group->adjusts.pop_back(); + std::swap(current.and_mask, current.add_val); + group->adjusts.push_back(current); + } + } + state.inference = VA2AIF_PREV_TERNARY; + break; + } + if ((prev_inference & VA2AIF_PREV_SCMP_DEC) && group->adjusts.size() >= 4 && adjust.variable == 0x7D && adjust.shift_num == 0 && adjust.and_mask == 0xFFFFFFFF) { + const DeterministicSpriteGroupAdjust &adj1 = group->adjusts[group->adjusts.size() - 4]; + const DeterministicSpriteGroupAdjust &adj2 = group->adjusts[group->adjusts.size() - 3]; + const DeterministicSpriteGroupAdjust &adj3 = group->adjusts[group->adjusts.size() - 2]; + auto is_expected_op = [](const DeterministicSpriteGroupAdjust &adj, DeterministicSpriteGroupAdjustOperation op, uint32 value) -> bool { + return adj.operation == op && adj.type == DSGA_TYPE_NONE && adj.variable == 0x1A && adj.shift_num == 0 && adj.and_mask == value; + }; + if (is_expected_op(adj1, DSGA_OP_STO, (adjust.parameter & 0xFF)) && + is_expected_op(adj2, DSGA_OP_SCMP, 0) && + is_expected_op(adj3, DSGA_OP_SUB, 1)) { + group->adjusts.pop_back(); + group->adjusts.pop_back(); + group->adjusts.back().operation = DSGA_OP_ABS; + state.inference |= VA2AIF_SIGNED_NON_NEGATIVE; + break; + } + } + uint32 sign_bit = (1 << ((varsize * 8) - 1)); + if ((prev_inference & VA2AIF_PREV_MASK_ADJUST) && (prev_inference & VA2AIF_SIGNED_NON_NEGATIVE) && adjust.variable == 0x1A && adjust.shift_num == 0 && (adjust.and_mask & sign_bit) == 0) { + /* Determine whether the result will be always non-negative */ + if (((uint64)group->adjusts[group->adjusts.size() - 2].and_mask) * ((uint64)adjust.and_mask) < ((uint64)sign_bit)) { + state.inference |= VA2AIF_SIGNED_NON_NEGATIVE; + } + } + if ((prev_inference & VA2AIF_ONE_OR_ZERO) || (non_const_var_inference & VA2AIF_ONE_OR_ZERO)) { + state.inference |= VA2AIF_MUL_BOOL; + } + if (non_const_var_inference & VA2AIF_ONE_OR_ZERO) { + adjust.adjust_flags |= DSGAF_JUMP_INS_HINT; + group->dsg_flags |= DSGF_CHECK_INSERT_JUMP; + } + break; + } + case DSGA_OP_SCMP: + case DSGA_OP_UCMP: + state.inference = VA2AIF_SIGNED_NON_NEGATIVE; + break; + case DSGA_OP_STOP: + state.inference = prev_inference & (~VA2AIF_PREV_MASK); + break; + case DSGA_OP_STO: + state.inference = prev_inference & (~VA2AIF_PREV_MASK); + if (adjust.variable == 0x1A && adjust.shift_num == 0) { + state.inference |= VA2AIF_PREV_STORE_TMP; + if (adjust.and_mask < 0x100) { + for (auto &it : state.temp_stores) { + /* Check if some other variable is marked as a copy of the one we are overwriting */ + if ((it.second.inference & VA2AIF_SINGLE_LOAD) && it.second.var_source.variable == 0x7D && (it.second.var_source.parameter & 0xFF) == adjust.and_mask) { + it.second.inference &= ~VA2AIF_SINGLE_LOAD; + } + } + VarAction2TempStoreInference &store = state.temp_stores[adjust.and_mask]; + if (store.version == 0) { + /* New store */ + store.version = state.default_variable_version + 1; + } else { + /* Updating previous store */ + store.version++; + } + store.inference = prev_inference & (~VA2AIF_PREV_MASK); + store.store_constant = state.current_constant; + + bool invert_store = false; + const DeterministicSpriteGroupAdjust *prev_store = get_prev_single_store((prev_inference & VA2AIF_ONE_OR_ZERO) ? &invert_store : nullptr); + if (prev_store != nullptr) { + /* This store is a clone of the previous store, or inverted clone of the previous store (bool) */ + store.inference |= VA2AIF_SINGLE_LOAD; + store.var_source.type = (invert_store ? DSGA_TYPE_EQ : DSGA_TYPE_NONE); + store.var_source.variable = 0x7D; + store.var_source.shift_num = 0; + store.var_source.parameter = prev_store->and_mask | (state.temp_stores[prev_store->and_mask].version << 8); + store.var_source.and_mask = 0xFFFFFFFF; + store.var_source.add_val = 0; + store.var_source.divmod_val = 0; + break; + } + + if (prev_inference & VA2AIF_SINGLE_LOAD) { + bool invert = false; + const DeterministicSpriteGroupAdjust *prev_load = get_prev_single_load(&invert); + if (prev_load != nullptr && (!invert || IsConstantComparisonAdjustType(prev_load->type))) { + store.inference |= VA2AIF_SINGLE_LOAD; + store.var_source.type = prev_load->type; + if (invert) store.var_source.type = InvertConstantComparisonAdjustType(store.var_source.type); + store.var_source.variable = prev_load->variable; + store.var_source.shift_num = prev_load->shift_num; + store.var_source.parameter = prev_load->parameter; + store.var_source.and_mask = prev_load->and_mask; + store.var_source.add_val = prev_load->add_val; + store.var_source.divmod_val = prev_load->divmod_val; + break; + } + } + } else { + /* Store to special register, this can change the result of future variable loads for some variables. + * Assume all variables except temp storage for now. + */ + for (auto &it : state.temp_stores) { + if (it.second.inference & VA2AIF_SINGLE_LOAD && it.second.var_source.variable != 0x7D) { + it.second.inference &= ~VA2AIF_SINGLE_LOAD; + } + } + } + } else { + handle_unpredictable_temp_store(); + } + break; + case DSGA_OP_RST: + if ((prev_inference & VA2AIF_PREV_STORE_TMP) && adjust.variable == 0x7D && adjust.shift_num == 0 && adjust.and_mask == get_full_mask() && group->adjusts.size() >= 2) { + const DeterministicSpriteGroupAdjust &prev = group->adjusts[group->adjusts.size() - 2]; + if (prev.type == DSGA_TYPE_NONE && prev.operation == DSGA_OP_STO && prev.variable == 0x1A && prev.shift_num == 0 && prev.and_mask == (adjust.parameter & 0xFF)) { + /* Redundant load from temp store after store to temp store */ + group->adjusts.pop_back(); + state.inference = prev_inference; + break; + } + } + add_inferences_from_mask(adjust.and_mask); + state.inference |= VA2AIF_PREV_MASK_ADJUST | VA2AIF_SINGLE_LOAD; + if (adjust.variable == 0x1A || adjust.and_mask == 0) { + replace_with_constant_load(EvaluateDeterministicSpriteGroupAdjust(group->size, adjust, nullptr, 0, UINT_MAX)); + } + break; + case DSGA_OP_SHR: + case DSGA_OP_SAR: + if ((adjust.operation == DSGA_OP_SHR || (prev_inference & VA2AIF_SIGNED_NON_NEGATIVE)) && + ((prev_inference & VA2AIF_PREV_MASK_ADJUST) && adjust.variable == 0x1A && adjust.shift_num == 0 && group->adjusts.size() >= 2)) { + /* Propagate shift right into immediately prior variable read */ + DeterministicSpriteGroupAdjust &prev = group->adjusts[group->adjusts.size() - 2]; + if (prev.shift_num + adjust.and_mask < 32) { + prev.shift_num += adjust.and_mask; + prev.and_mask >>= adjust.and_mask; + add_inferences_from_mask(prev.and_mask); + state.inference |= VA2AIF_PREV_MASK_ADJUST; + group->adjusts.pop_back(); + break; + } + } + break; + case DSGA_OP_SDIV: + if ((prev_inference & VA2AIF_SIGNED_NON_NEGATIVE) && adjust.variable == 0x1A && adjust.shift_num == 0 && HasExactlyOneBit(adjust.and_mask)) { + uint shift_count = FindFirstBit(adjust.and_mask); + if (group->adjusts.size() >= 3 && shift_count == 16 && varsize == 4 && (feature == GSF_TRAINS || feature == GSF_ROADVEHICLES || feature == GSF_SHIPS)) { + const DeterministicSpriteGroupAdjust &prev = group->adjusts[group->adjusts.size() - 2]; + DeterministicSpriteGroupAdjust &prev2 = group->adjusts[group->adjusts.size() - 3]; + if (prev.operation == DSGA_OP_MUL && prev.type == DSGA_TYPE_NONE && prev.variable == 0x1A && prev.shift_num == 0 && prev.and_mask <= 0xFFFF && + (prev2.operation == DSGA_OP_RST || group->adjusts.size() == 3) && prev2.type == DSGA_TYPE_NONE && prev2.variable == 0xB4 && prev2.shift_num == 0 && prev2.and_mask == 0xFFFF) { + /* Replace with scaled current speed */ + prev2.variable = A2VRI_VEHICLE_CURRENT_SPEED_SCALED; + prev2.parameter = prev.and_mask; + group->adjusts.pop_back(); + group->adjusts.pop_back(); + state.inference = VA2AIF_SIGNED_NON_NEGATIVE; + break; + } + } + /* Convert to a shift */ + adjust.operation = DSGA_OP_SHR; + adjust.and_mask = shift_count; + state.inference = VA2AIF_SIGNED_NON_NEGATIVE; + } + break; + default: + break; + } + } + } +} + +static bool CheckDeterministicSpriteGroupOutputVarBits(const DeterministicSpriteGroup *group, std::bitset<256> bits, bool quick_exit); + +static void RecursiveDisallowDSEForProcedure(const SpriteGroup *group) +{ + if (group == nullptr) return; + + if (group->type == SGT_RANDOMIZED) { + const RandomizedSpriteGroup *rsg = (const RandomizedSpriteGroup*)group; + for (const auto &g : rsg->groups) { + RecursiveDisallowDSEForProcedure(g); + } + return; + } + + if (group->type != SGT_DETERMINISTIC) return; + + const DeterministicSpriteGroup *sub = static_cast(group); + if (sub->dsg_flags & DSGF_DSE_RECURSIVE_DISABLE) return; + const_cast(sub)->dsg_flags |= (DSGF_NO_DSE | DSGF_DSE_RECURSIVE_DISABLE); + for (const DeterministicSpriteGroupAdjust &adjust : sub->adjusts) { + if (adjust.variable == 0x7E) RecursiveDisallowDSEForProcedure(adjust.subroutine); + } + if (!sub->calculated_result) { + RecursiveDisallowDSEForProcedure(sub->default_group); + for (const auto &range : sub->ranges) { + RecursiveDisallowDSEForProcedure(range.group); + } + } +} + +static bool CheckDeterministicSpriteGroupOutputVarBits(const DeterministicSpriteGroup *group, std::bitset<256> bits, bool quick_exit) +{ + bool dse = false; + for (int i = (int)group->adjusts.size() - 1; i >= 0; i--) { + const DeterministicSpriteGroupAdjust &adjust = group->adjusts[i]; + if (adjust.operation == DSGA_OP_STO) { + if (adjust.type == DSGA_TYPE_NONE && adjust.variable == 0x1A && adjust.shift_num == 0 && adjust.and_mask < 0x100) { + /* Predictable store */ + if (!bits[adjust.and_mask]) { + /* Possibly redundant store */ + dse = true; + if (quick_exit) break; + } + bits.set(adjust.and_mask, false); + } + } + if (adjust.operation == DSGA_OP_STO_NC && adjust.divmod_val < 0x100) { + if (!bits[adjust.divmod_val]) { + /* Possibly redundant store */ + dse = true; + if (quick_exit) break; + } + bits.set(adjust.divmod_val, false); + } + if (adjust.variable == 0x7B && adjust.parameter == 0x7D) { + /* Unpredictable load */ + bits.set(); + } + if (adjust.variable == 0x7D && adjust.parameter) { + bits.set(adjust.parameter & 0xFF, true); + } + if (adjust.variable == 0x7E) { + /* procedure call */ + auto handle_group = y_combinator([&](auto handle_group, const SpriteGroup *sg) -> void { + if (sg == nullptr) return; + if (sg->type == SGT_RANDOMIZED) { + const RandomizedSpriteGroup *rsg = (const RandomizedSpriteGroup*)sg; + for (const auto &group : rsg->groups) { + handle_group(group); + } + } else if (sg->type == SGT_DETERMINISTIC) { + const DeterministicSpriteGroup *sub = static_cast(sg); + VarAction2GroupVariableTracking *var_tracking = _cur.GetVarAction2GroupVariableTracking(sub, true); + auto procedure_dse_ok = [&]() -> bool { + if (sub->calculated_result) return true; + + if (sub->default_group != nullptr && sub->default_group->type != SGT_CALLBACK) return false; + for (const auto &range : sub->ranges) { + if (range.group != nullptr && range.group->type != SGT_CALLBACK) return false; + } + return true; + }; + if (procedure_dse_ok()) { + std::bitset<256> new_proc_call_out = bits | var_tracking->proc_call_out; + if (new_proc_call_out != var_tracking->proc_call_out) { + var_tracking->proc_call_out = new_proc_call_out; + std::bitset<256> old_total = var_tracking->out | var_tracking->proc_call_out; + std::bitset<256> new_total = var_tracking->out | new_proc_call_out; + if (old_total != new_total) { + CheckDeterministicSpriteGroupOutputVarBits(sub, new_total, false); + } + } + } else { + RecursiveDisallowDSEForProcedure(sub); + } + bits |= var_tracking->in; + } + }); + handle_group(adjust.subroutine); + } + } + return dse; +} + +static bool OptimiseVarAction2DeterministicSpriteGroupExpensiveVarsInner(DeterministicSpriteGroup *group, VarAction2GroupVariableTracking *var_tracking) +{ + btree::btree_map seen_expensive_variables; + std::bitset<256> usable_vars; + if (var_tracking != nullptr) { + usable_vars = ~(var_tracking->out | var_tracking->proc_call_out); + } else { + usable_vars.set(); + } + uint16 target_var = 0; + uint32 target_param = 0; + auto found_target = [&]() -> bool { + for (auto &iter : seen_expensive_variables) { + if (iter.second >= 2) { + target_var = iter.first >> 32; + target_param = iter.first & 0xFFFFFFFF; + return true; + } + } + return false; + }; + auto do_replacements = [&](int start, int end) { + std::bitset<256> mask(UINT64_MAX); + std::bitset<256> cur = usable_vars; + uint8 bit = 0; + while (true) { + uint64 t = (cur & mask).to_ullong(); + if (t != 0) { + bit += FindFirstBit(t); + break; + } + cur >>= 64; + bit += 64; + } + int insert_pos = start; + uint32 and_mask = 0; + uint condition_depth = 0; + bool seen_first = false; + int last_unused_jump = -1; + for (int j = end; j >= start; j--) { + DeterministicSpriteGroupAdjust &adjust = group->adjusts[j]; + if (seen_first && IsEvalAdjustJumpOperation(adjust.operation)) { + if (condition_depth > 0) { + /* Do not insert the STO_NC inside a conditional block when it is also needed outside the block */ + condition_depth--; + insert_pos = j; + } else { + last_unused_jump = j; + } + } + if (seen_first && adjust.adjust_flags & DSGAF_END_BLOCK) condition_depth += adjust.jump; + if (adjust.variable == target_var && adjust.parameter == target_param) { + and_mask |= adjust.and_mask << adjust.shift_num; + adjust.variable = 0x7D; + adjust.parameter = bit; + insert_pos = j; + seen_first = true; + } + } + DeterministicSpriteGroupAdjust load = {}; + load.operation = DSGA_OP_STO_NC; + load.type = DSGA_TYPE_NONE; + load.variable = target_var; + load.shift_num = 0; + load.parameter = target_param; + load.and_mask = and_mask; + load.divmod_val = bit; + if (group->adjusts[insert_pos].adjust_flags & DSGAF_SKIP_ON_ZERO) { + for (int j = insert_pos + 1; j <= end; j++) { + if (group->adjusts[j].adjust_flags & DSGAF_SKIP_ON_ZERO) continue; + if (group->adjusts[j].operation == DSGA_OP_JZ_LV && last_unused_jump == j) { + /* The variable is never actually read if last_value is 0 at this point */ + load.adjust_flags |= DSGAF_SKIP_ON_ZERO; + } + break; + } + } + group->adjusts.insert(group->adjusts.begin() + insert_pos, load); + }; + + int i = (int)group->adjusts.size() - 1; + int end = i; + while (i >= 0) { + const DeterministicSpriteGroupAdjust &adjust = group->adjusts[i]; + if (adjust.operation == DSGA_OP_STO && (adjust.type != DSGA_TYPE_NONE || adjust.variable != 0x1A || adjust.shift_num != 0)) return false; + if (adjust.variable == 0x7B && adjust.parameter == 0x7D) return false; + if (adjust.operation == DSGA_OP_STO_NC && adjust.divmod_val < 0x100) { + usable_vars.set(adjust.divmod_val, false); + } + if (adjust.operation == DSGA_OP_STO && adjust.and_mask < 0x100) { + usable_vars.set(adjust.and_mask, false); + } else if (adjust.variable == 0x7D) { + if (adjust.parameter < 0x100) usable_vars.set(adjust.parameter, false); + } else if (IsExpensiveVariable(adjust.variable, group->feature, group->var_scope)) { + seen_expensive_variables[(((uint64)adjust.variable) << 32) | adjust.parameter]++; + } + if (adjust.variable == 0x7E || (adjust.operation == DSGA_OP_STO && adjust.and_mask >= 0x100) || (adjust.operation == DSGA_OP_STO_NC && adjust.divmod_val >= 0x100)) { + /* Can't cross this barrier, stop here */ + if (usable_vars.none()) return false; + if (found_target()) { + do_replacements(i + 1, end); + return true; + } + seen_expensive_variables.clear(); + end = i - 1; + if (adjust.variable == 0x7E) { + auto handle_group = y_combinator([&](auto handle_group, const SpriteGroup *sg) -> void { + if (sg != nullptr && sg->type == SGT_DETERMINISTIC) { + VarAction2GroupVariableTracking *var_tracking = _cur.GetVarAction2GroupVariableTracking(sg, false); + if (var_tracking != nullptr) usable_vars &= ~var_tracking->in; + } + if (sg != nullptr && sg->type == SGT_RANDOMIZED) { + const RandomizedSpriteGroup *rsg = (const RandomizedSpriteGroup*)sg; + for (const auto &group : rsg->groups) { + handle_group(group); + } + } + }); + handle_group(adjust.subroutine); + } + } + i--; + } + if (usable_vars.none()) return false; + if (found_target()) { + do_replacements(0, end); + return true; + } + + return false; +} + +static void OptimiseVarAction2DeterministicSpriteGroupExpensiveVars(DeterministicSpriteGroup *group) +{ + VarAction2GroupVariableTracking *var_tracking = _cur.GetVarAction2GroupVariableTracking(group, false); + while (OptimiseVarAction2DeterministicSpriteGroupExpensiveVarsInner(group, var_tracking)) {} +} + +static void OptimiseVarAction2DeterministicSpriteGroupSimplifyStores(DeterministicSpriteGroup *group) +{ + if (HasGrfOptimiserFlag(NGOF_NO_OPT_VARACT2_SIMPLIFY_STORES)) return; + + int src_adjust = -1; + bool is_constant = false; + for (size_t i = 0; i < group->adjusts.size(); i++) { + auto acceptable_store = [](const DeterministicSpriteGroupAdjust &adjust) -> bool { + return adjust.type == DSGA_TYPE_NONE && adjust.operation == DSGA_OP_STO && adjust.variable == 0x1A && adjust.shift_num == 0; + }; + + DeterministicSpriteGroupAdjust &adjust = group->adjusts[i]; + + if ((adjust.type == DSGA_TYPE_NONE || IsConstantComparisonAdjustType(adjust.type)) && adjust.operation == DSGA_OP_RST && adjust.variable != 0x7E) { + src_adjust = (int)i; + is_constant = (adjust.variable == 0x1A); + continue; + } + + if (src_adjust >= 0 && acceptable_store(adjust)) { + bool ok = false; + bool more_stores = false; + size_t j = i; + while (true) { + j++; + if (j == group->adjusts.size()) { + ok = !group->calculated_result && group->ranges.empty(); + break; + } + const DeterministicSpriteGroupAdjust &next = group->adjusts[j]; + if (next.operation == DSGA_OP_RST) { + ok = (next.variable != 0x7B); + break; + } + if (is_constant && next.operation == DSGA_OP_STO_NC) { + continue; + } + if (is_constant && acceptable_store(next)) { + more_stores = true; + continue; + } + break; + } + if (ok) { + const DeterministicSpriteGroupAdjust &src = group->adjusts[src_adjust]; + adjust.operation = DSGA_OP_STO_NC; + adjust.type = src.type; + adjust.adjust_flags = DSGAF_NONE; + adjust.divmod_val = adjust.and_mask; + adjust.add_val = src.add_val; + adjust.variable = src.variable; + adjust.parameter = src.parameter; + adjust.shift_num = src.shift_num; + adjust.and_mask = src.and_mask; + if (more_stores) { + continue; + } + group->adjusts.erase(group->adjusts.begin() + src_adjust); + i--; + } + } + + src_adjust = -1; + } +} + +static void OptimiseVarAction2DeterministicSpriteGroupAdjustOrdering(DeterministicSpriteGroup *group) +{ + if (HasGrfOptimiserFlag(NGOF_NO_OPT_VARACT2_ADJUST_ORDERING)) return; + + auto acceptable_variable = [](uint16 variable) -> bool { + return variable != 0x7E && variable != 0x7B; + }; + + auto get_variable_expense = [&](uint16 variable) -> int { + if (variable == 0x1A) return -15; + if (IsVariableVeryCheap(variable, group->feature)) return -10; + if (variable == 0x7D || variable == 0x7C) return -5; + if (IsExpensiveVariable(variable, group->feature, group->var_scope)) return 10; + return 0; + }; + + for (size_t i = 0; i + 1 < group->adjusts.size(); i++) { + DeterministicSpriteGroupAdjust &adjust = group->adjusts[i]; + + if (adjust.operation == DSGA_OP_RST && acceptable_variable(adjust.variable)) { + DeterministicSpriteGroupAdjustOperation operation = group->adjusts[i + 1].operation; + const size_t start = i; + size_t end = i; + if (IsEvalAdjustWithZeroLastValueAlwaysZero(operation) && IsEvalAdjustOperationCommutative(operation)) { + for (size_t j = start + 1; j < group->adjusts.size(); j++) { + DeterministicSpriteGroupAdjust &next = group->adjusts[j]; + if (next.operation == operation && acceptable_variable(next.variable) && (next.adjust_flags & DSGAF_SKIP_ON_ZERO)) { + end = j; + } else { + break; + } + } + } + if (end != start) { + adjust.operation = operation; + adjust.adjust_flags |= DSGAF_SKIP_ON_ZERO; + + /* Sort so that the least expensive comes first */ + std::stable_sort(group->adjusts.begin() + start, group->adjusts.begin() + end + 1, [&](const DeterministicSpriteGroupAdjust &a, const DeterministicSpriteGroupAdjust &b) -> bool { + return get_variable_expense(a.variable) < get_variable_expense(b.variable); + }); + + adjust.operation = DSGA_OP_RST; + adjust.adjust_flags &= ~(DSGAF_SKIP_ON_ZERO | DSGAF_JUMP_INS_HINT); + } + } + } +} + +static bool TryCombineTempStoreLoadWithStoreSourceAdjust(DeterministicSpriteGroupAdjust &target, const DeterministicSpriteGroupAdjust *var_src, bool inverted) +{ + DeterministicSpriteGroupAdjustType var_src_type = var_src->type; + if (inverted) { + switch (var_src_type) { + case DSGA_TYPE_EQ: + var_src_type = DSGA_TYPE_NEQ; + break; + case DSGA_TYPE_NEQ: + var_src_type = DSGA_TYPE_EQ; + break; + default: + /* Don't try to handle this case */ + return false; + } + } + if (target.type == DSGA_TYPE_NONE && target.shift_num == 0 && (target.and_mask == 0xFFFFFFFF || (IsConstantComparisonAdjustType(var_src_type) && (target.and_mask & 1)))) { + target.type = var_src_type; + target.variable = var_src->variable; + target.shift_num = var_src->shift_num; + target.parameter = var_src->parameter; + target.and_mask = var_src->and_mask; + target.add_val = var_src->add_val; + target.divmod_val = var_src->divmod_val; + return true; + } else if (IsConstantComparisonAdjustType(target.type) && target.shift_num == 0 && (target.and_mask & 1) && target.add_val == 0 && + IsConstantComparisonAdjustType(var_src_type)) { + /* DSGA_TYPE_EQ/NEQ on target are OK if add_val is 0 because this is a boolean invert/convert of the incoming DSGA_TYPE_EQ/NEQ */ + if (target.type == DSGA_TYPE_EQ) { + target.type = InvertConstantComparisonAdjustType(var_src_type); + } else { + target.type = var_src_type; + } + target.variable = var_src->variable; + target.shift_num = var_src->shift_num; + target.parameter = var_src->parameter; + target.and_mask = var_src->and_mask; + target.add_val = var_src->add_val; + target.divmod_val = var_src->divmod_val; + return true; + } else if (var_src_type == DSGA_TYPE_NONE && (target.shift_num + var_src->shift_num) < 32) { + target.variable = var_src->variable; + target.parameter = var_src->parameter; + target.and_mask &= var_src->and_mask >> target.shift_num; + target.shift_num += var_src->shift_num; + return true; + } + return false; +} + +static VarAction2ProcedureAnnotation *OptimiseVarAction2GetFilledProcedureAnnotation(const SpriteGroup *group) +{ + VarAction2ProcedureAnnotation *anno; + bool is_new; + std::tie(anno, is_new) = _cur.GetVarAction2ProcedureAnnotation(group); + if (is_new) { + auto handle_group_contents = y_combinator([&](auto handle_group_contents, const SpriteGroup *sg) -> void { + if (sg == nullptr || anno->unskippable) return; + if (sg->type == SGT_RANDOMIZED) { + const RandomizedSpriteGroup *rsg = (const RandomizedSpriteGroup*)sg; + for (const auto &group : rsg->groups) { + handle_group_contents(group); + } + + /* Don't try to skip over procedure calls to randomised groups */ + anno->unskippable = true; + } else if (sg->type == SGT_DETERMINISTIC) { + const DeterministicSpriteGroup *dsg = static_cast(sg); + if (dsg->dsg_flags & DSGF_DSE_RECURSIVE_DISABLE) { + anno->unskippable = true; + return; + } + + for (const DeterministicSpriteGroupAdjust &adjust : dsg->adjusts) { + /* Don't try to skip over: unpredictable or special stores, procedure calls, permanent stores, or another jump */ + if (adjust.operation == DSGA_OP_STO && (adjust.type != DSGA_TYPE_NONE || adjust.variable != 0x1A || adjust.shift_num != 0 || adjust.and_mask >= 0x100)) { + anno->unskippable = true; + return; + } + if (adjust.operation == DSGA_OP_STO_NC && adjust.divmod_val >= 0x100) { + if (adjust.divmod_val < 0x110 && adjust.type == DSGA_TYPE_NONE && adjust.variable == 0x1A && adjust.shift_num == 0) { + /* Storing a constant */ + anno->special_register_values[adjust.divmod_val - 0x100] = adjust.and_mask; + SetBit(anno->special_register_mask, adjust.divmod_val - 0x100); + } else { + anno->unskippable = true; + } + return; + } + if (adjust.operation == DSGA_OP_STOP) { + anno->unskippable = true; + return; + } + if (adjust.variable == 0x7E) { + handle_group_contents(adjust.subroutine); + } + + if (adjust.operation == DSGA_OP_STO) anno->stores.set(adjust.and_mask, true); + if (adjust.operation == DSGA_OP_STO_NC) anno->stores.set(adjust.divmod_val, true); + } + } + }); + handle_group_contents(group); + } + return anno; +} + +static uint OptimiseVarAction2InsertSpecialStoreOps(DeterministicSpriteGroup *group, uint offset, uint32 values[16], uint16 mask) +{ + uint added = 0; + for (uint8 bit : SetBitIterator(mask)) { + bool skip = false; + for (size_t i = offset; i < group->adjusts.size(); i++) { + const DeterministicSpriteGroupAdjust &next = group->adjusts[i]; + if (next.operation == DSGA_OP_STO_NC && next.divmod_val == 0x100u + bit) { + skip = true; + break; + } + if (next.operation == DSGA_OP_STO && next.variable == 0x1A && next.type == DSGA_TYPE_NONE && next.shift_num == 0 && next.and_mask == 0x100u + bit) { + skip = true; + break; + } + if (next.variable == 0x7D && next.parameter == 0x100u + bit) break; + if (next.variable >= 0x40 && next.variable != 0x7D && next.variable != 0x7C) break; // crude whitelist of variables which will never read special registers + } + if (skip) continue; + DeterministicSpriteGroupAdjust store = {}; + store.operation = DSGA_OP_STO_NC; + store.variable = 0x1A; + store.type = DSGA_TYPE_NONE; + store.shift_num = 0; + store.and_mask = values[bit]; + store.divmod_val = 0x100 + bit; + group->adjusts.insert(group->adjusts.begin() + offset + added, store); + added++; + } + return added; +} + +struct VarAction2ProcedureCallVarReadAnnotation { + const SpriteGroup *subroutine; + VarAction2ProcedureAnnotation *anno; + std::bitset<256> relevant_stores; + std::bitset<256> last_reads; + bool unskippable; +}; +static std::vector _varaction2_proc_call_var_read_annotations; + +static void OptimiseVarAction2DeterministicSpriteGroupPopulateLastVarReadAnnotations(DeterministicSpriteGroup *group, VarAction2GroupVariableTracking *var_tracking) +{ + std::bitset<256> bits; + if (var_tracking != nullptr) bits = (var_tracking->out | var_tracking->proc_call_out); + bool need_var1C = false; + + for (int i = (int)group->adjusts.size() - 1; i >= 0; i--) { + DeterministicSpriteGroupAdjust &adjust = group->adjusts[i]; + + if (adjust.operation == DSGA_OP_STO) { + if (adjust.type == DSGA_TYPE_NONE && adjust.variable == 0x1A && adjust.shift_num == 0 && adjust.and_mask < 0x100) { + /* Predictable store */ + bits.set(adjust.and_mask, false); + } + } + if (adjust.variable == 0x7B && adjust.parameter == 0x7D) { + /* Unpredictable load */ + bits.set(); + } + if (adjust.variable == 0x7D && adjust.parameter < 0x100) { + if (!bits[adjust.parameter]) { + bits.set(adjust.parameter, true); + adjust.adjust_flags |= DSGAF_LAST_VAR_READ; + } + } + if (adjust.variable == 0x1C) { + need_var1C = true; + } + + if (adjust.variable == 0x7E) { + /* procedure call */ + + VarAction2ProcedureCallVarReadAnnotation &anno = _varaction2_proc_call_var_read_annotations.emplace_back(); + anno.subroutine = adjust.subroutine; + anno.anno = OptimiseVarAction2GetFilledProcedureAnnotation(adjust.subroutine); + anno.relevant_stores = anno.anno->stores & bits; + anno.unskippable = anno.anno->unskippable; + adjust.jump = (uint)_varaction2_proc_call_var_read_annotations.size() - 1; // index into _varaction2_proc_call_var_read_annotations + + if (need_var1C) { + anno.unskippable = true; + need_var1C = false; + } + + std::bitset<256> orig_bits = bits; + + auto handle_group = y_combinator([&](auto handle_group, const SpriteGroup *sg) -> void { + if (sg == nullptr) return; + if (sg->type == SGT_RANDOMIZED) { + const RandomizedSpriteGroup *rsg = (const RandomizedSpriteGroup*)sg; + for (const auto &group : rsg->groups) { + handle_group(group); + } + + /* Don't try to skip over procedure calls to randomised groups */ + anno.unskippable = true; + } else if (sg->type == SGT_DETERMINISTIC) { + const DeterministicSpriteGroup *sub = static_cast(sg); + VarAction2GroupVariableTracking *var_tracking = _cur.GetVarAction2GroupVariableTracking(sub, false); + if (var_tracking != nullptr) { + bits |= var_tracking->in; + anno.last_reads |= (var_tracking->in & ~orig_bits); + } + + if (sub->dsg_flags & DSGF_REQUIRES_VAR1C) need_var1C = true; + + if (sub->dsg_flags & DSGF_DSE_RECURSIVE_DISABLE) anno.unskippable = true; + /* No need to check default_group and ranges here as if those contain deterministic groups then DSGF_DSE_RECURSIVE_DISABLE would be set */ + } + }); + handle_group(anno.subroutine); + } + } +} + +static void OptimiseVarAction2DeterministicSpriteGroupInsertJumps(DeterministicSpriteGroup *group, VarAction2GroupVariableTracking *var_tracking) +{ + if (HasGrfOptimiserFlag(NGOF_NO_OPT_VARACT2_INSERT_JUMPS)) return; + + group->dsg_flags &= ~DSGF_CHECK_INSERT_JUMP; + + OptimiseVarAction2DeterministicSpriteGroupPopulateLastVarReadAnnotations(group, var_tracking); + + for (int i = (int)group->adjusts.size() - 1; i >= 1; i--) { + DeterministicSpriteGroupAdjust &adjust = group->adjusts[i]; + + if (adjust.adjust_flags & DSGAF_JUMP_INS_HINT) { + std::bitset<256> ok_stores; + uint32 special_stores[16]; + uint16 special_stores_mask = 0; + int j = i - 1; + while (j >= 0) { + DeterministicSpriteGroupAdjust &prev = group->adjusts[j]; + + /* Don't try to skip over: unpredictable or unusable special stores, unskippable procedure calls, permanent stores, or another jump */ + if (prev.operation == DSGA_OP_STO && (prev.type != DSGA_TYPE_NONE || prev.variable != 0x1A || prev.shift_num != 0 || prev.and_mask >= 0x100)) break; + if (prev.operation == DSGA_OP_STO_NC && prev.divmod_val >= 0x100) { + if (prev.divmod_val < 0x110 && prev.type == DSGA_TYPE_NONE && prev.variable == 0x1A && prev.shift_num == 0) { + /* Storing a constant in a special register */ + if (!HasBit(special_stores_mask, prev.divmod_val - 0x100)) { + special_stores[prev.divmod_val - 0x100] = prev.and_mask; + SetBit(special_stores_mask, prev.divmod_val - 0x100); + } + } else { + break; + } + } + if (prev.operation == DSGA_OP_STOP) break; + if (IsEvalAdjustJumpOperation(prev.operation)) break; + if (prev.variable == 0x7E) { + const VarAction2ProcedureCallVarReadAnnotation &anno = _varaction2_proc_call_var_read_annotations[prev.jump]; + if (anno.unskippable) break; + if ((anno.relevant_stores & ~ok_stores).any()) break; + ok_stores |= anno.last_reads; + + uint16 new_stores = anno.anno->special_register_mask & ~special_stores_mask; + for (uint8 bit : SetBitIterator(new_stores)) { + special_stores[bit] = anno.anno->special_register_values[bit]; + } + special_stores_mask |= new_stores; + } + + /* Reached a store which can't be skipped over because the value is needed later */ + if (prev.operation == DSGA_OP_STO && !ok_stores[prev.and_mask]) break; + if (prev.operation == DSGA_OP_STO_NC && prev.divmod_val < 0x100 && !ok_stores[prev.divmod_val]) break; + + if (prev.variable == 0x7D && (prev.adjust_flags & DSGAF_LAST_VAR_READ)) { + /* The stored value is no longer needed after this, we can skip the corresponding store */ + ok_stores.set(prev.parameter & 0xFF, true); + } + + j--; + } + if (j < i - 1) { + auto mark_end_block = [&](uint index, uint inc) { + if (group->adjusts[index].variable == 0x7E) { + /* Procedure call, can't mark this as an end block directly, so insert a NOOP and use that */ + DeterministicSpriteGroupAdjust noop = {}; + noop.operation = DSGA_OP_NOOP; + noop.variable = 0x1A; + group->adjusts.insert(group->adjusts.begin() + index + 1, noop); + + /* Fixup offsets */ + if (i > (int)index) i++; + if (j > (int)index) j++; + index++; + } + + DeterministicSpriteGroupAdjust &adj = group->adjusts[index]; + if (adj.adjust_flags & DSGAF_END_BLOCK) { + adj.jump += inc; + } else { + adj.adjust_flags |= DSGAF_END_BLOCK; + adj.jump = inc; + if (special_stores_mask) { + uint added = OptimiseVarAction2InsertSpecialStoreOps(group, index + 1, special_stores, special_stores_mask); + + /* Fixup offsets */ + if (i > (int)index) i += added; + if (j > (int)index) j += added; + } + } + }; + + DeterministicSpriteGroupAdjust current = adjust; + /* Do not use adjust reference after this point */ + + if (current.adjust_flags & DSGAF_END_BLOCK) { + /* Move the existing end block 1 place back, to avoid it being moved with the jump adjust */ + mark_end_block(i - 1, current.jump); + current.adjust_flags &= ~DSGAF_END_BLOCK; + current.jump = 0; + } + current.operation = (current.adjust_flags & DSGAF_SKIP_ON_LSB_SET) ? DSGA_OP_JNZ : DSGA_OP_JZ; + current.adjust_flags &= ~(DSGAF_JUMP_INS_HINT | DSGAF_SKIP_ON_ZERO | DSGAF_SKIP_ON_LSB_SET); + mark_end_block(i - 1, 1); + group->adjusts.erase(group->adjusts.begin() + i); + if (j >= 0 && current.variable == 0x7D && (current.adjust_flags & DSGAF_LAST_VAR_READ)) { + DeterministicSpriteGroupAdjust &prev = group->adjusts[j]; + if (prev.operation == DSGA_OP_STO_NC && prev.divmod_val == (current.parameter & 0xFF) && + TryCombineTempStoreLoadWithStoreSourceAdjust(current, &prev, false)) { + /* Managed to extract source from immediately prior STO_NC, which can now be removed */ + group->adjusts.erase(group->adjusts.begin() + j); + j--; + i--; + } else if (current.type == DSGA_TYPE_NONE && current.shift_num == 0 && current.and_mask == 0xFFFFFFFF && + prev.operation == DSGA_OP_STO && prev.variable == 0x1A && prev.shift_num == 0 && prev.and_mask == (current.parameter & 0xFF)) { + /* Reading from immediately prior store, which can now be removed */ + current.operation = (current.operation == DSGA_OP_JNZ) ? DSGA_OP_JNZ_LV : DSGA_OP_JZ_LV; + current.adjust_flags &= ~DSGAF_LAST_VAR_READ; + current.and_mask = 0; + current.variable = 0x1A; + group->adjusts.erase(group->adjusts.begin() + j); + j--; + i--; + } + } + group->adjusts.insert(group->adjusts.begin() + j + 1, current); + group->dsg_flags |= DSGF_CHECK_INSERT_JUMP; + i++; + } + } + } + + if (!_varaction2_proc_call_var_read_annotations.empty()) { + for (DeterministicSpriteGroupAdjust &adjust : group->adjusts) { + if (adjust.variable == 0x7E) adjust.subroutine = _varaction2_proc_call_var_read_annotations[adjust.jump].subroutine; + } + _varaction2_proc_call_var_read_annotations.clear(); + } +} + +struct ResolveJumpInnerResult { + uint end_index; + uint end_block_remaining; +}; + +static ResolveJumpInnerResult OptimiseVarAction2DeterministicSpriteResolveJumpsInner(DeterministicSpriteGroup *group, const uint start) +{ + for (uint i = start + 1; i < (uint)group->adjusts.size(); i++) { + if (IsEvalAdjustJumpOperation(group->adjusts[i].operation)) { + ResolveJumpInnerResult result = OptimiseVarAction2DeterministicSpriteResolveJumpsInner(group, i); + i = result.end_index; + if (result.end_block_remaining > 0) { + group->adjusts[start].jump = i - start; + return { i, result.end_block_remaining - 1 }; + } + } else if (group->adjusts[i].adjust_flags & DSGAF_END_BLOCK) { + group->adjusts[start].jump = i - start; + return { i, group->adjusts[i].jump - 1 }; + } + } + + NOT_REACHED(); +} + +static void OptimiseVarAction2DeterministicSpriteResolveJumps(DeterministicSpriteGroup *group) +{ + if (HasGrfOptimiserFlag(NGOF_NO_OPT_VARACT2_INSERT_JUMPS)) return; + + for (uint i = 0; i < (uint)group->adjusts.size(); i++) { + if (IsEvalAdjustJumpOperation(group->adjusts[i].operation)) { + ResolveJumpInnerResult result = OptimiseVarAction2DeterministicSpriteResolveJumpsInner(group, i); + i = result.end_index; + assert(result.end_block_remaining == 0); + } + } +} + +void OptimiseVarAction2DeterministicSpriteGroup(VarAction2OptimiseState &state, const GrfSpecFeature feature, const byte varsize, DeterministicSpriteGroup *group) +{ + if (unlikely(HasGrfOptimiserFlag(NGOF_NO_OPT_VARACT2))) return; + + for (DeterministicSpriteGroupAdjust &adjust : group->adjusts) { + if (adjust.variable == 0x7D) adjust.parameter &= 0xFF; // Clear temporary version tags + } + + if (!HasGrfOptimiserFlag(NGOF_NO_OPT_VARACT2_GROUP_PRUNE) && (state.inference & VA2AIF_HAVE_CONSTANT) && !group->calculated_result) { + /* Result of this sprite group is always the same, discard the unused branches */ + const SpriteGroup *target = group->default_group; + for (const auto &range : group->ranges) { + if (range.low <= state.current_constant && state.current_constant <= range.high) { + target = range.group; + } + } + group->default_group = target; + group->error_group = target; + group->ranges.clear(); + } + + std::bitset<256> bits; + std::bitset<256> pending_bits; + bool seen_pending = false; + bool seen_req_var1C = false; + if (!group->calculated_result) { + auto handle_group = y_combinator([&](auto handle_group, const SpriteGroup *sg) -> void { + if (sg != nullptr && sg->type == SGT_DETERMINISTIC) { + VarAction2GroupVariableTracking *var_tracking = _cur.GetVarAction2GroupVariableTracking(sg, false); + const DeterministicSpriteGroup *dsg = (const DeterministicSpriteGroup*)sg; + if (dsg->dsg_flags & DSGF_VAR_TRACKING_PENDING) { + seen_pending = true; + if (var_tracking != nullptr) pending_bits |= var_tracking->in; + } else { + if (var_tracking != nullptr) bits |= var_tracking->in; + } + if (dsg->dsg_flags & DSGF_REQUIRES_VAR1C) seen_req_var1C = true; + } + if (sg != nullptr && sg->type == SGT_RANDOMIZED) { + const RandomizedSpriteGroup *rsg = (const RandomizedSpriteGroup*)sg; + for (const auto &group : rsg->groups) { + handle_group(group); + } + } + if (sg != nullptr && sg->type == SGT_TILELAYOUT) { + const TileLayoutSpriteGroup *tlsg = (const TileLayoutSpriteGroup*)sg; + if (tlsg->dts.registers != nullptr) { + const TileLayoutRegisters *registers = tlsg->dts.registers; + size_t count = 1; // 1 for the ground sprite + const DrawTileSeqStruct *element; + foreach_draw_tile_seq(element, tlsg->dts.seq) count++; + for (size_t i = 0; i < count; i ++) { + const TileLayoutRegisters *reg = registers + i; + if (reg->flags & TLF_DODRAW) bits.set(reg->dodraw, true); + if (reg->flags & TLF_SPRITE) bits.set(reg->sprite, true); + if (reg->flags & TLF_PALETTE) bits.set(reg->palette, true); + if (reg->flags & TLF_BB_XY_OFFSET) { + bits.set(reg->delta.parent[0], true); + bits.set(reg->delta.parent[1], true); + } + if (reg->flags & TLF_BB_Z_OFFSET) bits.set(reg->delta.parent[2], true); + if (reg->flags & TLF_CHILD_X_OFFSET) bits.set(reg->delta.child[0], true); + if (reg->flags & TLF_CHILD_Y_OFFSET) bits.set(reg->delta.child[1], true); + } + } + } + if (sg != nullptr && sg->type == SGT_INDUSTRY_PRODUCTION) { + const IndustryProductionSpriteGroup *ipsg = (const IndustryProductionSpriteGroup*)sg; + if (ipsg->version >= 1) { + for (int i = 0; i < ipsg->num_input; i++) { + if (ipsg->subtract_input[i] < 0x100) bits.set(ipsg->subtract_input[i], true); + } + for (int i = 0; i < ipsg->num_output; i++) { + if (ipsg->add_output[i] < 0x100) bits.set(ipsg->add_output[i], true); + } + bits.set(ipsg->again, true); + } + } + }); + handle_group(group->default_group); + for (const auto &range : group->ranges) { + handle_group(range.group); + } + if (bits.any()) { + state.GetVarTracking(group)->out = bits; + std::bitset<256> in_bits = bits | pending_bits; + for (auto &it : state.temp_stores) { + in_bits.set(it.first, false); + } + state.GetVarTracking(group)->in |= in_bits; + } + } + + if (!HasGrfOptimiserFlag(NGOF_NO_OPT_VARACT2_GROUP_PRUNE) && group->ranges.empty() && !group->calculated_result && !seen_req_var1C) { + /* There is only one option, remove any redundant adjustments when the result will be ignored anyway */ + while (!group->adjusts.empty()) { + const DeterministicSpriteGroupAdjust &prev = group->adjusts.back(); + if (prev.variable != 0x7E && !IsEvalAdjustWithSideEffects(prev.operation)) { + /* Delete useless operation */ + group->adjusts.pop_back(); + } else { + break; + } + } + } + + bool dse_allowed = IsFeatureUsableForDSE(feature) && !HasGrfOptimiserFlag(NGOF_NO_OPT_VARACT2_DSE); + bool dse_eligible = state.enable_dse; + if (dse_allowed && !dse_eligible) { + dse_eligible |= CheckDeterministicSpriteGroupOutputVarBits(group, bits, true); + } + if (state.seen_procedure_call) { + /* Be more pessimistic with procedures as the ordering is different. + * Later groups can require variables set in earlier procedures instead of the usual + * where earlier groups can require variables set in later groups. + * DSE on the procedure runs before the groups which use it, so set the procedure + * output bits not using values from call site groups before DSE. */ + CheckDeterministicSpriteGroupOutputVarBits(group, bits | pending_bits, false); + } + bool dse_candidate = (dse_allowed && dse_eligible); + if (!dse_candidate && (seen_pending || (group->dsg_flags & DSGF_CHECK_INSERT_JUMP))) { + group->dsg_flags |= DSGF_NO_DSE; + dse_candidate = true; + } + if (dse_candidate) { + _cur.dead_store_elimination_candidates.push_back(group); + group->dsg_flags |= DSGF_VAR_TRACKING_PENDING; + } else { + OptimiseVarAction2DeterministicSpriteGroupSimplifyStores(group); + OptimiseVarAction2DeterministicSpriteGroupAdjustOrdering(group); + } + + if (state.check_expensive_vars && !HasGrfOptimiserFlag(NGOF_NO_OPT_VARACT2_EXPENSIVE_VARS)) { + if (dse_candidate) { + group->dsg_flags |= DSGF_CHECK_EXPENSIVE_VARS; + } else { + OptimiseVarAction2DeterministicSpriteGroupExpensiveVars(group); + } + } +} + +static std::bitset<256> HandleVarAction2DeadStoreElimination(DeterministicSpriteGroup *group, VarAction2GroupVariableTracking *var_tracking, bool no_changes) +{ + std::bitset<256> all_bits; + std::bitset<256> propagate_bits; + std::vector substitution_candidates; + if (var_tracking != nullptr) { + propagate_bits = var_tracking->out; + all_bits = propagate_bits | var_tracking->proc_call_out; + } + bool need_var1C = false; + + auto abandon_substitution_candidates = [&]() { + for (uint value : substitution_candidates) { + all_bits.set(value & 0xFF, true); + propagate_bits.set(value & 0xFF, true); + } + substitution_candidates.clear(); + }; + auto erase_adjust = [&](int index) { + group->adjusts.erase(group->adjusts.begin() + index); + for (size_t i = 0; i < substitution_candidates.size();) { + uint &value = substitution_candidates[i]; + if (value >> 8 == (uint)index) { + /* Removed the substitution candidate target */ + value = substitution_candidates.back(); + substitution_candidates.pop_back(); + continue; + } + + if (value >> 8 > (uint)index) { + /* Adjust the substitution candidate target offset */ + value -= 0x100; + } + + i++; + } + }; + auto try_variable_substitution = [&](DeterministicSpriteGroupAdjust &target, int prev_load_index, uint8 idx) -> bool { + assert(target.variable == 0x7D && target.parameter == idx); + + bool inverted = false; + const DeterministicSpriteGroupAdjust *var_src = GetVarAction2PreviousSingleLoadAdjust(group->adjusts, prev_load_index, &inverted); + if (var_src != nullptr) { + if (TryCombineTempStoreLoadWithStoreSourceAdjust(target, var_src, inverted)) return true; + } + return false; + }; + + for (int i = (int)group->adjusts.size() - 1; i >= 0;) { + bool pending_restart = false; + auto restart = [&]() { + pending_restart = false; + i = (int)group->adjusts.size() - 1; + if (var_tracking != nullptr) { + propagate_bits = var_tracking->out; + all_bits = propagate_bits | var_tracking->proc_call_out; + } else { + all_bits.reset(); + propagate_bits.reset(); + } + substitution_candidates.clear(); + need_var1C = false; + }; + const DeterministicSpriteGroupAdjust &adjust = group->adjusts[i]; + if (adjust.operation == DSGA_OP_STO) { + if (adjust.type == DSGA_TYPE_NONE && adjust.variable == 0x1A && adjust.shift_num == 0 && adjust.and_mask < 0x100) { + uint8 idx = adjust.and_mask; + /* Predictable store */ + + for (size_t j = 0; j < substitution_candidates.size(); j++) { + if ((substitution_candidates[j] & 0xFF) == idx) { + /* Found candidate */ + + DeterministicSpriteGroupAdjust &target = group->adjusts[substitution_candidates[j] >> 8]; + bool substituted = try_variable_substitution(target, i - 1, idx); + if (!substituted) { + /* Not usable, mark as required so it's not eliminated */ + all_bits.set(idx, true); + propagate_bits.set(idx, true); + } + substitution_candidates[j] = substitution_candidates.back(); + substitution_candidates.pop_back(); + break; + } + } + + if (!all_bits[idx] && !no_changes) { + /* Redundant store */ + erase_adjust(i); + i--; + if ((i + 1 < (int)group->adjusts.size() && group->adjusts[i + 1].operation == DSGA_OP_RST && group->adjusts[i + 1].variable != 0x7B) || + (i + 1 == (int)group->adjusts.size() && group->ranges.empty() && !group->calculated_result)) { + /* Now the store is eliminated, the current value has no users */ + while (i >= 0) { + const DeterministicSpriteGroupAdjust &prev = group->adjusts[i]; + if (prev.variable != 0x7E && !IsEvalAdjustWithSideEffects(prev.operation)) { + /* Delete useless operation */ + erase_adjust(i); + i--; + } else { + if (i + 1 < (int)group->adjusts.size()) { + DeterministicSpriteGroupAdjust &next = group->adjusts[i + 1]; + if (prev.operation == DSGA_OP_STO && prev.type == DSGA_TYPE_NONE && prev.variable == 0x1A && + prev.shift_num == 0 && prev.and_mask < 0x100 && + next.operation == DSGA_OP_RST && next.type == DSGA_TYPE_NONE && next.variable == 0x7D && + next.parameter == prev.and_mask && next.shift_num == 0 && next.and_mask == 0xFFFFFFFF) { + /* Removing the dead store results in a store/load sequence, remove the load and re-check */ + erase_adjust(i + 1); + restart(); + break; + } + if (next.operation == DSGA_OP_RST) { + /* See if this is a repeated load of a variable (not procedure call) */ + const DeterministicSpriteGroupAdjust *prev_load = GetVarAction2PreviousSingleLoadAdjust(group->adjusts, i, nullptr); + if (prev_load != nullptr && MemCmpT(prev_load, &next) == 0) { + if (next.variable == 0x7D) pending_restart = true; + erase_adjust(i + 1); + break; + } + } + if (i + 2 < (int)group->adjusts.size() && next.operation == DSGA_OP_RST && next.variable != 0x7E && + prev.operation == DSGA_OP_STO && prev.type == DSGA_TYPE_NONE && prev.variable == 0x1A && + prev.shift_num == 0 && prev.and_mask < 0x100) { + const DeterministicSpriteGroupAdjust &next2 = group->adjusts[i + 2]; + if (next2.type == DSGA_TYPE_NONE && next2.variable == 0x7D && next2.shift_num == 0 && + next2.and_mask == 0xFFFFFFFF && next2.parameter == prev.and_mask) { + if (IsEvalAdjustOperationReversable(next2.operation)) { + /* Convert: store, load var, (anti-)commutative op on stored --> (dead) store, (reversed) (anti-)commutative op var */ + next.operation = ReverseEvalAdjustOperation(next2.operation); + if (IsEvalAdjustWithZeroLastValueAlwaysZero(next.operation)) { + next.adjust_flags |= DSGAF_SKIP_ON_ZERO; + } + erase_adjust(i + 2); + restart(); + break; + } + } + } + } + break; + } + } + } else { + while (i >= 0 && i + 1 < (int)group->adjusts.size()) { + /* See if having removed the store, there is now a useful pair of operations which can be combined */ + DeterministicSpriteGroupAdjust &prev = group->adjusts[i]; + DeterministicSpriteGroupAdjust &next = group->adjusts[i + 1]; + if (next.type == DSGA_TYPE_NONE && next.operation == DSGA_OP_XOR && next.variable == 0x1A && next.shift_num == 0 && next.and_mask == 1) { + /* XOR: boolean invert */ + if (IsEvalAdjustOperationRelationalComparison(prev.operation)) { + prev.operation = InvertEvalAdjustRelationalComparisonOperation(prev.operation); + erase_adjust(i + 1); + continue; + } else if (prev.operation == DSGA_OP_RST && IsConstantComparisonAdjustType(prev.type)) { + prev.type = InvertConstantComparisonAdjustType(prev.type); + erase_adjust(i + 1); + continue; + } + } + if (i >= 1 && prev.type == DSGA_TYPE_NONE && IsEvalAdjustOperationRelationalComparison(prev.operation) && + prev.variable == 0x1A && prev.shift_num == 0 && next.operation == DSGA_OP_MUL) { + if (((prev.operation == DSGA_OP_SGT && (prev.and_mask == 0 || prev.and_mask == (uint)-1)) || (prev.operation == DSGA_OP_SGE && (prev.and_mask == 0 || prev.and_mask == 1))) && + IsIdenticalValueLoad(GetVarAction2PreviousSingleLoadAdjust(group->adjusts, i - 1, nullptr), &next)) { + prev.operation = DSGA_OP_SMAX; + prev.and_mask = 0; + erase_adjust(i + 1); + continue; + } + if (((prev.operation == DSGA_OP_SLE && (prev.and_mask == 0 || prev.and_mask == (uint)-1)) || (prev.operation == DSGA_OP_SLT && (prev.and_mask == 0 || prev.and_mask == 1))) && + IsIdenticalValueLoad(GetVarAction2PreviousSingleLoadAdjust(group->adjusts, i - 1, nullptr), &next)) { + prev.operation = DSGA_OP_SMIN; + prev.and_mask = 0; + erase_adjust(i + 1); + continue; + } + } + break; + } + } + if (pending_restart) restart(); + continue; + } else { + /* Non-redundant store */ + all_bits.set(idx, false); + propagate_bits.set(idx, false); + } + } else { + /* Unpredictable store */ + abandon_substitution_candidates(); + } + } + if (adjust.variable == 0x7B && adjust.parameter == 0x7D) { + /* Unpredictable load */ + all_bits.set(); + propagate_bits.set(); + abandon_substitution_candidates(); + } + if (adjust.variable == 0x7D && adjust.parameter < 0x100) { + if (i > 0 && !all_bits[adjust.parameter] && !no_changes) { + /* See if this can be made a substitution candidate */ + bool add = true; + for (size_t j = 0; j < substitution_candidates.size(); j++) { + if ((substitution_candidates[j] & 0xFF) == adjust.parameter) { + /* There already is a candidate */ + substitution_candidates[j] = substitution_candidates.back(); + substitution_candidates.pop_back(); + all_bits.set(adjust.parameter, true); + propagate_bits.set(adjust.parameter, true); + add = false; + break; + } + } + if (add) { + substitution_candidates.push_back(adjust.parameter | (i << 8)); + } + } else { + all_bits.set(adjust.parameter, true); + propagate_bits.set(adjust.parameter, true); + } + } + if (adjust.variable == 0x1C) { + need_var1C = true; + } + if (adjust.variable == 0x7E) { + /* procedure call */ + + VarAction2ProcedureAnnotation *anno = OptimiseVarAction2GetFilledProcedureAnnotation(adjust.subroutine); + + bool may_remove = !need_var1C; + if (may_remove && anno->unskippable) may_remove = false; + if (may_remove && (anno->stores & all_bits).any()) may_remove = false; + + if (may_remove) { + if ((i + 1 < (int)group->adjusts.size() && group->adjusts[i + 1].operation == DSGA_OP_RST && group->adjusts[i + 1].variable != 0x7B) || + (i + 1 == (int)group->adjusts.size() && group->ranges.empty() && !group->calculated_result)) { + /* Procedure is skippable, makes no stores we need, and the return value is also not needed */ + erase_adjust(i); + if (anno->special_register_mask) { + OptimiseVarAction2InsertSpecialStoreOps(group, i, anno->special_register_values, anno->special_register_mask); + restart(); + } else { + i--; + } + continue; + } + } + + need_var1C = false; + + auto handle_group = y_combinator([&](auto handle_group, const SpriteGroup *sg) -> void { + if (sg == nullptr) return; + if (sg->type == SGT_RANDOMIZED) { + const RandomizedSpriteGroup *rsg = (const RandomizedSpriteGroup*)sg; + for (const auto &group : rsg->groups) { + handle_group(group); + } + } else if (sg->type == SGT_DETERMINISTIC) { + const DeterministicSpriteGroup *sub = static_cast(sg); + VarAction2GroupVariableTracking *var_tracking = _cur.GetVarAction2GroupVariableTracking(sub, false); + if (var_tracking != nullptr) { + all_bits |= var_tracking->in; + propagate_bits |= var_tracking->in; + } + if (sub->dsg_flags & DSGF_REQUIRES_VAR1C) need_var1C = true; + } + }); + handle_group(adjust.subroutine); + if (anno->unskippable || anno->special_register_mask) { + abandon_substitution_candidates(); + } else { + /* Flush any substitution candidates which reference stores made in the procedure */ + for (size_t j = 0; j < substitution_candidates.size();) { + uint8 idx = substitution_candidates[j] & 0xFF; + if (anno->stores[idx]) { + all_bits.set(idx, true); + propagate_bits.set(idx, true); + substitution_candidates[j] = substitution_candidates.back(); + substitution_candidates.pop_back(); + } else { + j++; + } + } + } + } + i--; + } + abandon_substitution_candidates(); + return propagate_bits; +} + +void HandleVarAction2OptimisationPasses() +{ + if (unlikely(HasGrfOptimiserFlag(NGOF_NO_OPT_VARACT2))) return; + + for (DeterministicSpriteGroup *group : _cur.dead_store_elimination_candidates) { + VarAction2GroupVariableTracking *var_tracking = _cur.GetVarAction2GroupVariableTracking(group, false); + if (!group->calculated_result) { + /* Add bits from any groups previously marked with DSGF_VAR_TRACKING_PENDING which should now be correctly updated after DSE */ + auto handle_group = y_combinator([&](auto handle_group, const SpriteGroup *sg) -> void { + if (sg != nullptr && sg->type == SGT_DETERMINISTIC) { + VarAction2GroupVariableTracking *targ_var_tracking = _cur.GetVarAction2GroupVariableTracking(sg, false); + if (targ_var_tracking != nullptr) { + if (var_tracking == nullptr) var_tracking = _cur.GetVarAction2GroupVariableTracking(group, true); + var_tracking->out |= targ_var_tracking->in; + } + } + if (sg != nullptr && sg->type == SGT_RANDOMIZED) { + const RandomizedSpriteGroup *rsg = (const RandomizedSpriteGroup*)sg; + for (const auto &group : rsg->groups) { + handle_group(group); + } + } + }); + handle_group(group->default_group); + group->default_group = PruneTargetSpriteGroup(group->default_group); + for (auto &range : group->ranges) { + handle_group(range.group); + range.group = PruneTargetSpriteGroup(range.group); + } + } + + /* Always run this even DSGF_NO_DSE is set because the load/store tracking is needed to re-calculate the input bits, + * even if no stores are actually eliminated */ + std::bitset<256> in_bits = HandleVarAction2DeadStoreElimination(group, var_tracking, group->dsg_flags & DSGF_NO_DSE); + if (var_tracking == nullptr && in_bits.any()) { + var_tracking = _cur.GetVarAction2GroupVariableTracking(group, true); + var_tracking->in = in_bits; + } else if (var_tracking != nullptr) { + var_tracking->in = in_bits; + } + + OptimiseVarAction2DeterministicSpriteGroupSimplifyStores(group); + OptimiseVarAction2DeterministicSpriteGroupAdjustOrdering(group); + if (group->dsg_flags & DSGF_CHECK_INSERT_JUMP) { + OptimiseVarAction2DeterministicSpriteGroupInsertJumps(group, var_tracking); + } + if (group->dsg_flags & DSGF_CHECK_EXPENSIVE_VARS) { + OptimiseVarAction2DeterministicSpriteGroupExpensiveVars(group); + } + if (group->dsg_flags & DSGF_CHECK_INSERT_JUMP) { + OptimiseVarAction2DeterministicSpriteResolveJumps(group); + } + } +} + +void ProcessDeterministicSpriteGroupRanges(const std::vector &ranges, std::vector &ranges_out, const SpriteGroup *default_group) +{ + /* Sort ranges ascending. When ranges overlap, this may required clamping or splitting them */ + std::vector bounds; + for (uint i = 0; i < ranges.size(); i++) { + bounds.push_back(ranges[i].low); + if (ranges[i].high != UINT32_MAX) bounds.push_back(ranges[i].high + 1); + } + std::sort(bounds.begin(), bounds.end()); + bounds.erase(std::unique(bounds.begin(), bounds.end()), bounds.end()); + + std::vector target; + for (uint j = 0; j < bounds.size(); ++j) { + uint32 v = bounds[j]; + const SpriteGroup *t = default_group; + for (uint i = 0; i < ranges.size(); i++) { + if (ranges[i].low <= v && v <= ranges[i].high) { + t = ranges[i].group; + break; + } + } + target.push_back(t); + } + assert(target.size() == bounds.size()); + + for (uint j = 0; j < bounds.size(); ) { + if (target[j] != default_group) { + DeterministicSpriteGroupRange &r = ranges_out.emplace_back(); + r.group = target[j]; + r.low = bounds[j]; + while (j < bounds.size() && target[j] == r.group) { + j++; + } + r.high = j < bounds.size() ? bounds[j] - 1 : UINT32_MAX; + } else { + j++; + } + } +} + +const SpriteGroup *PruneTargetSpriteGroup(const SpriteGroup *result) +{ + if (HasGrfOptimiserFlag(NGOF_NO_OPT_VARACT2) || HasGrfOptimiserFlag(NGOF_NO_OPT_VARACT2_GROUP_PRUNE)) return result; + while (result != nullptr) { + if (result->type == SGT_DETERMINISTIC) { + const DeterministicSpriteGroup *sg = static_cast(result); + if (sg->GroupMayBeBypassed()) { + /* Deterministic sprite group can be trivially resolved, skip it */ + uint32 value = (sg->adjusts.size() == 1) ? EvaluateDeterministicSpriteGroupAdjust(sg->size, sg->adjusts[0], nullptr, 0, UINT_MAX) : 0; + const SpriteGroup *candidate = sg->default_group; + for (const auto &range : sg->ranges) { + if (range.low <= value && value <= range.high) { + candidate = range.group; + break; + } + } + if (candidate != nullptr && candidate->type == SGT_DETERMINISTIC && static_cast(candidate)->dsg_flags & DSGF_REQUIRES_VAR1C) { + /* Can't skip this group as the child group requires the result of this group for variable 1C */ + return result; + } + result = candidate; + continue; + } + } + break; + } + return result; +}