VarAction2: Deduplicate loads from expensive uncached variables

Currently only some vehicle variables
This commit is contained in:
Jonathan G Rennison
2022-06-03 20:14:30 +01:00
parent 6a17f00a27
commit db7f29b07c
4 changed files with 171 additions and 4 deletions

View File

@@ -23,6 +23,7 @@ enum ChickenBitFlags {
DCBF_NO_OPTIMISE_VARACT2 = 6, DCBF_NO_OPTIMISE_VARACT2 = 6,
DCBF_NO_OPTIMISE_VARACT2_DSE = 7, DCBF_NO_OPTIMISE_VARACT2_DSE = 7,
DCBF_NO_OPTIMISE_VARACT2_PRUNE = 8, DCBF_NO_OPTIMISE_VARACT2_PRUNE = 8,
DCBF_NO_OPTIMISE_VARACT2_EXP_VAR = 9,
}; };
inline bool HasChickenBit(ChickenBitFlags flag) inline bool HasChickenBit(ChickenBitFlags flag)

View File

@@ -130,6 +130,7 @@ public:
btree::btree_map<const SpriteGroup *, VarAction2GroupVariableTracking *> group_temp_store_variable_tracking; btree::btree_map<const SpriteGroup *, VarAction2GroupVariableTracking *> group_temp_store_variable_tracking;
UniformArenaAllocator<sizeof(VarAction2GroupVariableTracking), 1024> group_temp_store_variable_tracking_storage; UniformArenaAllocator<sizeof(VarAction2GroupVariableTracking), 1024> group_temp_store_variable_tracking_storage;
std::vector<DeterministicSpriteGroup *> dead_store_elimination_candidates; std::vector<DeterministicSpriteGroup *> dead_store_elimination_candidates;
std::vector<std::pair<GrfSpecFeature, DeterministicSpriteGroup *>> pending_expensive_var_checks;
VarAction2GroupVariableTracking *GetVarAction2GroupVariableTracking(const SpriteGroup *group, bool make_new) VarAction2GroupVariableTracking *GetVarAction2GroupVariableTracking(const SpriteGroup *group, bool make_new)
{ {
@@ -159,6 +160,7 @@ public:
this->group_temp_store_variable_tracking.clear(); this->group_temp_store_variable_tracking.clear();
this->group_temp_store_variable_tracking_storage.EmptyArena(); this->group_temp_store_variable_tracking_storage.EmptyArena();
this->dead_store_elimination_candidates.clear(); this->dead_store_elimination_candidates.clear();
this->pending_expensive_var_checks.clear();
} }
/** /**
@@ -5629,6 +5631,7 @@ struct VarAction2OptimiseState {
btree::btree_map<uint8, VarAction2TempStoreInference> temp_stores; btree::btree_map<uint8, VarAction2TempStoreInference> temp_stores;
VarAction2GroupVariableTracking *var_tracking = nullptr; VarAction2GroupVariableTracking *var_tracking = nullptr;
bool seen_procedure_call = false; bool seen_procedure_call = false;
bool check_expensive_vars = false;
inline VarAction2GroupVariableTracking *GetVarTracking(DeterministicSpriteGroup *group) inline VarAction2GroupVariableTracking *GetVarTracking(DeterministicSpriteGroup *group)
{ {
@@ -5639,6 +5642,24 @@ struct VarAction2OptimiseState {
} }
}; };
static bool IsExpensiveVehicleVariable(uint16 variable)
{
switch (variable) {
case 0x45:
case 0x4A:
case 0x60:
case 0x61:
case 0x62:
case 0x63:
case 0xFE:
case 0xFF:
return true;
default:
return false;
}
}
static void OptimiseVarAction2Adjust(VarAction2OptimiseState &state, const GrfSpecFeature feature, const byte varsize, DeterministicSpriteGroup *group, DeterministicSpriteGroupAdjust &adjust) static void OptimiseVarAction2Adjust(VarAction2OptimiseState &state, const GrfSpecFeature feature, const byte varsize, DeterministicSpriteGroup *group, DeterministicSpriteGroupAdjust &adjust)
{ {
if (unlikely(HasChickenBit(DCBF_NO_OPTIMISE_VARACT2))) return; if (unlikely(HasChickenBit(DCBF_NO_OPTIMISE_VARACT2))) return;
@@ -5747,6 +5768,8 @@ static void OptimiseVarAction2Adjust(VarAction2OptimiseState &state, const GrfSp
break; break;
} }
if ((feature >= GSF_TRAINS && feature <= GSF_AIRCRAFT) && IsExpensiveVehicleVariable(adjust.variable)) state.check_expensive_vars = true;
auto get_prev_single_load = [&]() -> const DeterministicSpriteGroupAdjust* { auto get_prev_single_load = [&]() -> const DeterministicSpriteGroupAdjust* {
for (int i = (int)group->adjusts.size() - 2; i >= 0; i--) { for (int i = (int)group->adjusts.size() - 2; i >= 0; i--) {
const DeterministicSpriteGroupAdjust &prev = group->adjusts[i]; const DeterministicSpriteGroupAdjust &prev = group->adjusts[i];
@@ -6177,7 +6200,7 @@ static void OptimiseVarAction2Adjust(VarAction2OptimiseState &state, const GrfSp
} }
} }
static void CheckDeterministicSpriteGroupOutputVarBits(const DeterministicSpriteGroup *group, std::bitset<256> bits, bool add_to_dse, bool quick_exit); static bool CheckDeterministicSpriteGroupOutputVarBits(const DeterministicSpriteGroup *group, std::bitset<256> bits, bool add_to_dse, bool quick_exit);
static void RecursiveDisallowDSEForProcedure(const SpriteGroup *group) static void RecursiveDisallowDSEForProcedure(const SpriteGroup *group)
{ {
@@ -6207,7 +6230,7 @@ static void RecursiveDisallowDSEForProcedure(const SpriteGroup *group)
} }
} }
static void CheckDeterministicSpriteGroupOutputVarBits(const DeterministicSpriteGroup *group, std::bitset<256> bits, bool add_to_dse, bool quick_exit) static bool CheckDeterministicSpriteGroupOutputVarBits(const DeterministicSpriteGroup *group, std::bitset<256> bits, bool add_to_dse, bool quick_exit)
{ {
bool dse = false; bool dse = false;
for (int i = (int)group->adjusts.size() - 1; i >= 0; i--) { for (int i = (int)group->adjusts.size() - 1; i >= 0; i--) {
@@ -6223,6 +6246,14 @@ static void CheckDeterministicSpriteGroupOutputVarBits(const DeterministicSprite
bits.set(adjust.and_mask, false); bits.set(adjust.and_mask, false);
} }
} }
if (adjust.operation == DSGA_OP_STO_NC) {
if (!bits[adjust.add_val]) {
/* Possibly redundant store */
dse = true;
if (quick_exit) break;
}
bits.set(adjust.add_val, false);
}
if (adjust.variable == 0x7B && adjust.parameter == 0x7D) { if (adjust.variable == 0x7B && adjust.parameter == 0x7D) {
/* Unpredictable load */ /* Unpredictable load */
bits.set(); bits.set();
@@ -6257,7 +6288,122 @@ static void CheckDeterministicSpriteGroupOutputVarBits(const DeterministicSprite
handle_group(adjust.subroutine); handle_group(adjust.subroutine);
} }
} }
if (dse && add_to_dse) _cur.dead_store_elimination_candidates.push_back(const_cast<DeterministicSpriteGroup *>(group)); bool dse_candidate = (dse && add_to_dse);
if (dse_candidate) _cur.dead_store_elimination_candidates.push_back(const_cast<DeterministicSpriteGroup *>(group));
return dse_candidate;
}
static bool OptimiseVarAction2DeterministicSpriteGroupExpensiveVarsInner(const GrfSpecFeature feature, DeterministicSpriteGroup *group, VarAction2GroupVariableTracking *var_tracking)
{
btree::btree_map<uint64, uint32> seen_expensive_variables;
std::bitset<256> usable_vars;
if (var_tracking != nullptr) {
usable_vars = ~var_tracking->out;
} else {
usable_vars.set();
}
uint16 target_var = 0;
uint32 target_param = 0;
auto found_target = [&]() -> bool {
for (auto &iter : seen_expensive_variables) {
if (iter.second >= 2) {
target_var = iter.first >> 32;
target_param = iter.first & 0xFFFFFFFF;
return true;
}
}
return false;
};
auto do_replacements = [&](int start, int end) {
std::bitset<256> mask(UINT64_MAX);
std::bitset<256> cur = usable_vars;
uint8 bit = 0;
while (true) {
uint64 t = (cur & mask).to_ullong();
if (t != 0) {
bit += FindFirstBit(t);
break;
}
cur >>= 64;
bit += 64;
}
int insert_pos = start;
uint32 and_mask = 0;
for (int j = end; j >= start; j--) {
DeterministicSpriteGroupAdjust &adjust = group->adjusts[j];
if (adjust.variable == target_var && adjust.parameter == target_param) {
and_mask |= adjust.and_mask << adjust.shift_num;
adjust.variable = 0x7D;
adjust.parameter = bit;
insert_pos = j;
}
}
DeterministicSpriteGroupAdjust load = {};
load.operation = DSGA_OP_STO_NC;
load.type = DSGA_TYPE_NONE;
load.variable = target_var;
load.shift_num = 0;
load.parameter = target_param;
load.and_mask = and_mask;
load.add_val = bit;
group->adjusts.insert(group->adjusts.begin() + insert_pos, load);
};
int i = (int)group->adjusts.size() - 1;
int end = i;
while (i >= 0) {
const DeterministicSpriteGroupAdjust &adjust = group->adjusts[i];
if (adjust.operation == DSGA_OP_STO && (adjust.type != DSGA_TYPE_NONE || adjust.variable != 0x1A || adjust.shift_num != 0)) return false;
if (adjust.variable == 0x7B && adjust.parameter == 0x7D) return false;
if (adjust.operation == DSGA_OP_STO_NC) {
usable_vars.set(adjust.add_val, false);
} else if (adjust.operation == DSGA_OP_STO && adjust.and_mask < 0x100) {
usable_vars.set(adjust.and_mask, false);
} else if (adjust.variable == 0x7D) {
if (adjust.parameter < 0x100) usable_vars.set(adjust.parameter, false);
} else if ((feature >= GSF_TRAINS && feature <= GSF_AIRCRAFT) && IsExpensiveVehicleVariable(adjust.variable)) {
seen_expensive_variables[(((uint64)adjust.variable) << 32) | adjust.parameter]++;
}
if (adjust.variable == 0x7E || (adjust.operation == DSGA_OP_STO && adjust.and_mask >= 0x100)) {
/* Can't cross this barrier, stop here */
if (usable_vars.none()) return false;
if (found_target()) {
do_replacements(i + 1, end);
return true;
}
seen_expensive_variables.clear();
end = i - 1;
if (adjust.variable == 0x7E) {
auto handle_group = y_combinator([&](auto handle_group, const SpriteGroup *sg) -> void {
if (sg != nullptr && sg->type == SGT_DETERMINISTIC) {
VarAction2GroupVariableTracking *var_tracking = _cur.GetVarAction2GroupVariableTracking(sg, false);
if (var_tracking != nullptr) usable_vars &= ~var_tracking->in;
}
if (sg != nullptr && sg->type == SGT_RANDOMIZED) {
const RandomizedSpriteGroup *rsg = (const RandomizedSpriteGroup*)sg;
for (const auto &group : rsg->groups) {
handle_group(group);
}
}
});
handle_group(adjust.subroutine);
}
}
i--;
}
if (usable_vars.none()) return false;
if (found_target()) {
do_replacements(0, end);
return true;
}
return false;
}
static void OptimiseVarAction2DeterministicSpriteGroupExpensiveVars(const GrfSpecFeature feature, DeterministicSpriteGroup *group)
{
VarAction2GroupVariableTracking *var_tracking = _cur.GetVarAction2GroupVariableTracking(group, false);
while (OptimiseVarAction2DeterministicSpriteGroupExpensiveVarsInner(feature, group, var_tracking)) {}
} }
static void OptimiseVarAction2DeterministicSpriteGroup(VarAction2OptimiseState &state, const GrfSpecFeature feature, const byte varsize, DeterministicSpriteGroup *group) static void OptimiseVarAction2DeterministicSpriteGroup(VarAction2OptimiseState &state, const GrfSpecFeature feature, const byte varsize, DeterministicSpriteGroup *group)
@@ -6352,7 +6498,16 @@ static void OptimiseVarAction2DeterministicSpriteGroup(VarAction2OptimiseState &
} }
} }
bool check_dse = (feature != GSF_STATIONS); bool check_dse = (feature != GSF_STATIONS);
if (check_dse || state.seen_procedure_call) CheckDeterministicSpriteGroupOutputVarBits(group, bits, check_dse, !state.seen_procedure_call); bool dse_candidate = false;
if (check_dse || state.seen_procedure_call) dse_candidate = CheckDeterministicSpriteGroupOutputVarBits(group, bits, check_dse, !state.seen_procedure_call);
if (state.check_expensive_vars && !HasChickenBit(DCBF_NO_OPTIMISE_VARACT2_EXP_VAR)) {
if (dse_candidate && !HasChickenBit(DCBF_NO_OPTIMISE_VARACT2_DSE)) {
_cur.pending_expensive_var_checks.push_back({ feature, group });
} else {
OptimiseVarAction2DeterministicSpriteGroupExpensiveVars(feature, group);
}
}
} }
static void HandleVarAction2DeadStoreElimination() static void HandleVarAction2DeadStoreElimination()
@@ -6442,6 +6597,10 @@ static void HandleVarAction2DeadStoreElimination()
i--; i--;
} }
} }
for (auto iter : _cur.pending_expensive_var_checks) {
OptimiseVarAction2DeterministicSpriteGroupExpensiveVars(iter.first, iter.second);
}
} }
/* Action 0x02 */ /* Action 0x02 */

View File

@@ -193,6 +193,7 @@ static U EvalAdjustT(const DeterministicSpriteGroupAdjust &adjust, ScopeResolver
case DSGA_OP_SLE: return ((S)last_value <= (S)value) ? 1 : 0; case DSGA_OP_SLE: return ((S)last_value <= (S)value) ? 1 : 0;
case DSGA_OP_SGT: return ((S)last_value > (S)value) ? 1 : 0; case DSGA_OP_SGT: return ((S)last_value > (S)value) ? 1 : 0;
case DSGA_OP_RSUB: return value - last_value; case DSGA_OP_RSUB: return value - last_value;
case DSGA_OP_STO_NC: _temp_store.StoreValue(adjust.add_val, (S)value); return last_value;
default: return value; default: return value;
} }
} }
@@ -659,6 +660,7 @@ static const char *_dsg_op_special_names[] {
"SLE", "SLE",
"SGT", "SGT",
"RSUB", "RSUB",
"STO_NC",
}; };
static_assert(lengthof(_dsg_op_special_names) == DSGA_OP_SPECIAL_END - DSGA_OP_TERNARY); static_assert(lengthof(_dsg_op_special_names) == DSGA_OP_SPECIAL_END - DSGA_OP_TERNARY);
@@ -772,6 +774,10 @@ void SpriteGroupDumper::DumpSpriteGroup(const SpriteGroup *sg, int padding, uint
case DSGA_TYPE_NEQ: p += seprintf(p, lastof(this->buffer), ", neq: %X", adjust.add_val); break; case DSGA_TYPE_NEQ: p += seprintf(p, lastof(this->buffer), ", neq: %X", adjust.add_val); break;
case DSGA_TYPE_NONE: break; case DSGA_TYPE_NONE: break;
} }
if (adjust.operation == DSGA_OP_STO_NC) {
p += seprintf(p, lastof(this->buffer), ", store to: %X", adjust.add_val);
highlight_tag = (1 << 16) | adjust.add_val;
}
p += seprintf(p, lastof(this->buffer), ", op: %X (%s)", adjust.operation, GetAdjustOperationName(adjust.operation)); p += seprintf(p, lastof(this->buffer), ", op: %X (%s)", adjust.operation, GetAdjustOperationName(adjust.operation));
print(); print();
if (adjust.variable == 0x7E && adjust.subroutine != nullptr) { if (adjust.variable == 0x7E && adjust.subroutine != nullptr) {

View File

@@ -185,6 +185,7 @@ enum DeterministicSpriteGroupAdjustOperation {
DSGA_OP_SLE, ///< (signed) a <= b ? 1 : 0, DSGA_OP_SLE, ///< (signed) a <= b ? 1 : 0,
DSGA_OP_SGT, ///< (signed) a > b ? 1 : 0, DSGA_OP_SGT, ///< (signed) a > b ? 1 : 0,
DSGA_OP_RSUB, ///< b - a DSGA_OP_RSUB, ///< b - a
DSGA_OP_STO_NC, ///< store b into temporary storage, indexed by c. return a
DSGA_OP_SPECIAL_END, DSGA_OP_SPECIAL_END,
}; };