VarAction2: Substitute single-use vars through temp storage in DSE pass

This commit is contained in:
Jonathan G Rennison
2022-06-07 22:03:02 +01:00
parent 0310692acf
commit d3dce79d73

View File

@@ -5659,6 +5659,7 @@ struct VarAction2OptimiseState {
VarAction2GroupVariableTracking *var_tracking = nullptr; VarAction2GroupVariableTracking *var_tracking = nullptr;
bool seen_procedure_call = false; bool seen_procedure_call = false;
bool check_expensive_vars = false; bool check_expensive_vars = false;
bool enable_dse = false;
inline VarAction2GroupVariableTracking *GetVarTracking(DeterministicSpriteGroup *group) inline VarAction2GroupVariableTracking *GetVarTracking(DeterministicSpriteGroup *group)
{ {
@@ -5838,8 +5839,7 @@ static void OptimiseVarAction2Adjust(VarAction2OptimiseState &state, const GrfSp
adjust.variable = 0x1A; adjust.variable = 0x1A;
adjust.and_mask &= (store.store_constant >> adjust.shift_num); adjust.and_mask &= (store.store_constant >> adjust.shift_num);
} else if ((store.inference & VA2AIF_SINGLE_LOAD) && (store.var_source.variable == 0x7D || IsVariableVeryCheap(store.var_source.variable, feature))) { } else if ((store.inference & VA2AIF_SINGLE_LOAD) && (store.var_source.variable == 0x7D || IsVariableVeryCheap(store.var_source.variable, feature))) {
if (adjust.type == DSGA_TYPE_NONE) { if (adjust.type == DSGA_TYPE_NONE && adjust.shift_num == 0 && (adjust.and_mask == 0xFFFFFFFF || ((store.inference & VA2AIF_ONE_OR_ZERO) && (adjust.and_mask & 1)))) {
if (adjust.and_mask == 0xFFFFFFFF || ((store.inference & VA2AIF_ONE_OR_ZERO) && (adjust.and_mask & 1))) {
adjust.type = store.var_source.type; adjust.type = store.var_source.type;
adjust.variable = store.var_source.variable; adjust.variable = store.var_source.variable;
adjust.shift_num = store.var_source.shift_num; adjust.shift_num = store.var_source.shift_num;
@@ -5848,7 +5848,6 @@ static void OptimiseVarAction2Adjust(VarAction2OptimiseState &state, const GrfSp
adjust.add_val = store.var_source.add_val; adjust.add_val = store.var_source.add_val;
adjust.divmod_val = store.var_source.divmod_val; adjust.divmod_val = store.var_source.divmod_val;
continue; continue;
}
} else if (store.var_source.type == DSGA_TYPE_NONE && (adjust.shift_num + store.var_source.shift_num) < 32) { } else if (store.var_source.type == DSGA_TYPE_NONE && (adjust.shift_num + store.var_source.shift_num) < 32) {
adjust.variable = store.var_source.variable; adjust.variable = store.var_source.variable;
adjust.parameter = store.var_source.parameter; adjust.parameter = store.var_source.parameter;
@@ -5856,9 +5855,15 @@ static void OptimiseVarAction2Adjust(VarAction2OptimiseState &state, const GrfSp
adjust.shift_num += store.var_source.shift_num; adjust.shift_num += store.var_source.shift_num;
continue; continue;
} }
} else if (adjust.type == DSGA_TYPE_NONE) { } else {
if (adjust.type == DSGA_TYPE_NONE) {
non_const_var_inference = store.inference & (VA2AIF_SIGNED_NON_NEGATIVE | VA2AIF_ONE_OR_ZERO); non_const_var_inference = store.inference & (VA2AIF_SIGNED_NON_NEGATIVE | VA2AIF_ONE_OR_ZERO);
} }
if (store.inference & VA2AIF_SINGLE_LOAD) {
/* Not possible to substitute this here, but it may be possible in the DSE pass */
state.enable_dse = true;
}
}
} }
break; break;
} }
@@ -6343,7 +6348,7 @@ static void OptimiseVarAction2Adjust(VarAction2OptimiseState &state, const GrfSp
} }
} }
static bool CheckDeterministicSpriteGroupOutputVarBits(const DeterministicSpriteGroup *group, std::bitset<256> bits, bool add_to_dse, bool quick_exit); static bool CheckDeterministicSpriteGroupOutputVarBits(const DeterministicSpriteGroup *group, std::bitset<256> bits, bool quick_exit);
static void RecursiveDisallowDSEForProcedure(const SpriteGroup *group) static void RecursiveDisallowDSEForProcedure(const SpriteGroup *group)
{ {
@@ -6373,7 +6378,7 @@ static void RecursiveDisallowDSEForProcedure(const SpriteGroup *group)
} }
} }
static bool CheckDeterministicSpriteGroupOutputVarBits(const DeterministicSpriteGroup *group, std::bitset<256> bits, bool add_to_dse, bool quick_exit) static bool CheckDeterministicSpriteGroupOutputVarBits(const DeterministicSpriteGroup *group, std::bitset<256> bits, bool quick_exit)
{ {
bool dse = false; bool dse = false;
for (int i = (int)group->adjusts.size() - 1; i >= 0; i--) { for (int i = (int)group->adjusts.size() - 1; i >= 0; i--) {
@@ -6429,7 +6434,7 @@ static bool CheckDeterministicSpriteGroupOutputVarBits(const DeterministicSprite
std::bitset<256> new_out = bits | var_tracking->out; std::bitset<256> new_out = bits | var_tracking->out;
if (new_out != var_tracking->out) { if (new_out != var_tracking->out) {
var_tracking->out = new_out; var_tracking->out = new_out;
CheckDeterministicSpriteGroupOutputVarBits(sub, new_out, false, false); CheckDeterministicSpriteGroupOutputVarBits(sub, new_out, false);
} }
} else { } else {
RecursiveDisallowDSEForProcedure(sub); RecursiveDisallowDSEForProcedure(sub);
@@ -6440,9 +6445,7 @@ static bool CheckDeterministicSpriteGroupOutputVarBits(const DeterministicSprite
handle_group(adjust.subroutine); handle_group(adjust.subroutine);
} }
} }
bool dse_candidate = (dse && add_to_dse); return dse;
if (dse_candidate) _cur.dead_store_elimination_candidates.push_back(const_cast<DeterministicSpriteGroup *>(group));
return dse_candidate;
} }
static bool OptimiseVarAction2DeterministicSpriteGroupExpensiveVarsInner(const GrfSpecFeature feature, DeterministicSpriteGroup *group, VarAction2GroupVariableTracking *var_tracking) static bool OptimiseVarAction2DeterministicSpriteGroupExpensiveVarsInner(const GrfSpecFeature feature, DeterministicSpriteGroup *group, VarAction2GroupVariableTracking *var_tracking)
@@ -6716,9 +6719,13 @@ static void OptimiseVarAction2DeterministicSpriteGroup(VarAction2OptimiseState &
state.GetVarTracking(group)->in |= in_bits; state.GetVarTracking(group)->in |= in_bits;
} }
} }
bool check_dse = IsFeatureUsableForDSE(feature); bool dse_allowed = IsFeatureUsableForDSE(feature);
bool dse_candidate = false; bool dse_eligible = state.enable_dse;
if (check_dse || state.seen_procedure_call) dse_candidate = CheckDeterministicSpriteGroupOutputVarBits(group, bits, check_dse, !state.seen_procedure_call); if ((dse_allowed && !dse_eligible) || state.seen_procedure_call) {
dse_eligible |= CheckDeterministicSpriteGroupOutputVarBits(group, bits, !state.seen_procedure_call);
}
bool dse_candidate = (dse_allowed && dse_eligible);
if (dse_candidate) _cur.dead_store_elimination_candidates.push_back(group);
if (!dse_candidate) OptimiseVarAction2DeterministicSpriteGroupSimplifyStores(group); if (!dse_candidate) OptimiseVarAction2DeterministicSpriteGroupSimplifyStores(group);
@@ -6734,26 +6741,96 @@ static void OptimiseVarAction2DeterministicSpriteGroup(VarAction2OptimiseState &
static void HandleVarAction2DeadStoreElimination(DeterministicSpriteGroup *group) static void HandleVarAction2DeadStoreElimination(DeterministicSpriteGroup *group)
{ {
std::bitset<256> bits; std::bitset<256> bits;
std::vector<uint> substitution_candidates;
VarAction2GroupVariableTracking *var_tracking = _cur.GetVarAction2GroupVariableTracking(group, false); VarAction2GroupVariableTracking *var_tracking = _cur.GetVarAction2GroupVariableTracking(group, false);
if (var_tracking != nullptr) bits = var_tracking->out; if (var_tracking != nullptr) bits = var_tracking->out;
auto abandon_substitution_candidates = [&]() {
for (uint value : substitution_candidates) {
bits.set(value & 0xFF, true);
}
substitution_candidates.clear();
};
auto erase_adjust = [&](int index) {
group->adjusts.erase(group->adjusts.begin() + index);
for (size_t i = 0; i < substitution_candidates.size();) {
uint &value = substitution_candidates[i];
if (value >> 8 == (uint)index) {
/* Removed the substitution candidate target */
value = substitution_candidates.back();
substitution_candidates.pop_back();
continue;
}
if (value >> 8 > (uint)index) {
/* Adjust the substitution candidate target offset */
value -= 0x100;
}
i++;
}
};
for (int i = (int)group->adjusts.size() - 1; i >= 0;) { for (int i = (int)group->adjusts.size() - 1; i >= 0;) {
bool pending_restart = false;
auto restart = [&]() { auto restart = [&]() {
pending_restart = false;
i = (int)group->adjusts.size() - 1; i = (int)group->adjusts.size() - 1;
if (var_tracking != nullptr) { if (var_tracking != nullptr) {
bits = var_tracking->out; bits = var_tracking->out;
} else { } else {
bits.reset(); bits.reset();
} }
substitution_candidates.clear();
}; };
const DeterministicSpriteGroupAdjust &adjust = group->adjusts[i]; const DeterministicSpriteGroupAdjust &adjust = group->adjusts[i];
if (adjust.operation == DSGA_OP_STO) { if (adjust.operation == DSGA_OP_STO) {
if (adjust.type == DSGA_TYPE_NONE && adjust.variable == 0x1A && adjust.shift_num == 0 && adjust.and_mask < 0x100) { if (adjust.type == DSGA_TYPE_NONE && adjust.variable == 0x1A && adjust.shift_num == 0 && adjust.and_mask < 0x100) {
uint8 idx = adjust.and_mask; uint8 idx = adjust.and_mask;
/* Predictable store */ /* Predictable store */
for (size_t j = 0; j < substitution_candidates.size(); j++) {
if ((substitution_candidates[j] & 0xFF) == idx) {
/* Found candidate */
DeterministicSpriteGroupAdjust &target = group->adjusts[substitution_candidates[j] >> 8];
assert(target.variable == 0x7D && target.parameter == idx);
bool substituted = false;
const DeterministicSpriteGroupAdjust *var_src = GetVarAction2PreviousSingleLoadAdjust(group->adjusts, i - 1);
if (var_src != nullptr && var_src->variable != 0x7C) {
/* Don't use variable 7C as we're not checking for store perms which may clobber the value here */
if (target.type == DSGA_TYPE_NONE && target.shift_num == 0 && (target.and_mask == 0xFFFFFFFF || ((var_src->type == DSGA_TYPE_EQ || var_src->type == DSGA_TYPE_NEQ) && (target.and_mask & 1)))) {
target.type = var_src->type;
target.variable = var_src->variable;
target.shift_num = var_src->shift_num;
target.parameter = var_src->parameter;
target.and_mask = var_src->and_mask;
target.add_val = var_src->add_val;
target.divmod_val = var_src->divmod_val;
substituted = true;
} else if (var_src->type == DSGA_TYPE_NONE && (adjust.shift_num + var_src->shift_num) < 32) {
target.variable = var_src->variable;
target.parameter = var_src->parameter;
target.and_mask &= var_src->and_mask >> adjust.shift_num;
target.shift_num += var_src->shift_num;
substituted = true;
}
}
if (!substituted) {
/* Not usable, mark as required so it's not eliminated */
bits.set(idx, true);
}
substitution_candidates[j] = substitution_candidates.back();
substitution_candidates.pop_back();
break;
}
}
if (!bits[idx]) { if (!bits[idx]) {
/* Redundant store */ /* Redundant store */
group->adjusts.erase(group->adjusts.begin() + i); erase_adjust(i);
i--; i--;
if ((i + 1 < (int)group->adjusts.size() && group->adjusts[i + 1].operation == DSGA_OP_RST && group->adjusts[i + 1].variable != 0x7B) || if ((i + 1 < (int)group->adjusts.size() && group->adjusts[i + 1].operation == DSGA_OP_RST && group->adjusts[i + 1].variable != 0x7B) ||
(i + 1 == (int)group->adjusts.size() && group->ranges.empty() && !group->calculated_result)) { (i + 1 == (int)group->adjusts.size() && group->ranges.empty() && !group->calculated_result)) {
@@ -6762,7 +6839,7 @@ static void HandleVarAction2DeadStoreElimination(DeterministicSpriteGroup *group
const DeterministicSpriteGroupAdjust &prev = group->adjusts[i]; const DeterministicSpriteGroupAdjust &prev = group->adjusts[i];
if (prev.variable != 0x7E && !IsEvalAdjustWithSideEffects(prev.operation)) { if (prev.variable != 0x7E && !IsEvalAdjustWithSideEffects(prev.operation)) {
/* Delete useless operation */ /* Delete useless operation */
group->adjusts.erase(group->adjusts.begin() + i); erase_adjust(i);
i--; i--;
} else { } else {
if (i + 1 < (int)group->adjusts.size()) { if (i + 1 < (int)group->adjusts.size()) {
@@ -6772,7 +6849,7 @@ static void HandleVarAction2DeadStoreElimination(DeterministicSpriteGroup *group
next.operation == DSGA_OP_RST && next.type == DSGA_TYPE_NONE && next.variable == 0x7D && next.operation == DSGA_OP_RST && next.type == DSGA_TYPE_NONE && next.variable == 0x7D &&
next.parameter == prev.and_mask && next.shift_num == 0 && next.and_mask == 0xFFFFFFFF) { next.parameter == prev.and_mask && next.shift_num == 0 && next.and_mask == 0xFFFFFFFF) {
/* Removing the dead store results in a store/load sequence, remove the load and re-check */ /* Removing the dead store results in a store/load sequence, remove the load and re-check */
group->adjusts.erase(group->adjusts.begin() + i + 1); erase_adjust(i + 1);
restart(); restart();
break; break;
} }
@@ -6780,7 +6857,8 @@ static void HandleVarAction2DeadStoreElimination(DeterministicSpriteGroup *group
/* See if this is a repeated load of a variable (not procedure call) */ /* See if this is a repeated load of a variable (not procedure call) */
const DeterministicSpriteGroupAdjust *prev_load = GetVarAction2PreviousSingleLoadAdjust(group->adjusts, i); const DeterministicSpriteGroupAdjust *prev_load = GetVarAction2PreviousSingleLoadAdjust(group->adjusts, i);
if (prev_load != nullptr && MemCmpT<DeterministicSpriteGroupAdjust>(prev_load, &next) == 0) { if (prev_load != nullptr && MemCmpT<DeterministicSpriteGroupAdjust>(prev_load, &next) == 0) {
group->adjusts.erase(group->adjusts.begin() + i + 1); if (next.variable == 0x7D) pending_restart = true;
erase_adjust(i + 1);
break; break;
} }
} }
@@ -6789,19 +6867,40 @@ static void HandleVarAction2DeadStoreElimination(DeterministicSpriteGroup *group
} }
} }
} }
if (pending_restart) restart();
continue; continue;
} else { } else {
/* Non-redundant store */ /* Non-redundant store */
bits.set(idx, false); bits.set(idx, false);
} }
} else {
/* Unpredictable store */
abandon_substitution_candidates();
} }
} }
if (adjust.variable == 0x7B && adjust.parameter == 0x7D) { if (adjust.variable == 0x7B && adjust.parameter == 0x7D) {
/* Unpredictable load */ /* Unpredictable load */
bits.set(); bits.set();
abandon_substitution_candidates();
} }
if (adjust.variable == 0x7D && adjust.parameter < 0x100) { if (adjust.variable == 0x7D && adjust.parameter < 0x100) {
if (i > 0 && !bits[adjust.parameter]) {
/* See if this can be made a substitution candidate */
bool add = true;
for (size_t j = 0; j < substitution_candidates.size(); j++) {
if ((substitution_candidates[j] & 0xFF) == adjust.parameter) {
/* There already is a candidate */
substitution_candidates[j] = substitution_candidates.back();
substitution_candidates.pop_back();
bits.set(adjust.parameter, true); bits.set(adjust.parameter, true);
add = false;
break;
}
}
if (add) substitution_candidates.push_back(adjust.parameter | (i << 8));
} else {
bits.set(adjust.parameter, true);
}
} }
if (adjust.variable == 0x7E) { if (adjust.variable == 0x7E) {
/* procedure call */ /* procedure call */
@@ -6819,6 +6918,7 @@ static void HandleVarAction2DeadStoreElimination(DeterministicSpriteGroup *group
} }
}); });
handle_group(adjust.subroutine); handle_group(adjust.subroutine);
abandon_substitution_candidates();
} }
i--; i--;
} }