84 lines
1.9 KiB
SQL
84 lines
1.9 KiB
SQL
with
|
|
pair_cutoff_dates as (
|
|
select
|
|
case
|
|
when a1.name < a2.name then a1.name
|
|
else a2.name
|
|
end as name1,
|
|
case
|
|
when a1.name < a2.name then a2.name
|
|
else a1.name
|
|
end as name2,
|
|
MAX(a1.date) as cutoff_date
|
|
from
|
|
achievements a1
|
|
join achievements a2 on a1.id = a2.id
|
|
and a1.date = a2.date
|
|
and a1.completed = a2.completed
|
|
and a1.name != a2.name
|
|
group by
|
|
case
|
|
when a1.name < a2.name then a1.name
|
|
else a2.name
|
|
end,
|
|
case
|
|
when a1.name < a2.name then a2.name
|
|
else a1.name
|
|
end
|
|
),
|
|
matching_achievements as (
|
|
select
|
|
pcd.name1,
|
|
pcd.name2,
|
|
pcd.cutoff_date,
|
|
count(*) as matching_count
|
|
from
|
|
achievements a1
|
|
join achievements a2 on a1.id = a2.id
|
|
and a1.date = a2.date
|
|
and a1.completed = a2.completed
|
|
join pair_cutoff_dates pcd on (
|
|
a1.name = pcd.name1
|
|
and a2.name = pcd.name2
|
|
)
|
|
where
|
|
a1.date <= pcd.cutoff_date
|
|
group by
|
|
pcd.name1,
|
|
pcd.name2,
|
|
pcd.cutoff_date
|
|
),
|
|
achievement_counts as (
|
|
select
|
|
name,
|
|
count(*) as total_achievements
|
|
from
|
|
achievements
|
|
group by
|
|
name
|
|
)
|
|
insert or replace into
|
|
similar_pairs (
|
|
name1,
|
|
name2,
|
|
matching_count,
|
|
total_achievements1,
|
|
total_achievements2,
|
|
similarity_percentage,
|
|
cutoff_date
|
|
)
|
|
select
|
|
m.name1,
|
|
m.name2,
|
|
m.matching_count,
|
|
ac1.total_achievements,
|
|
ac2.total_achievements,
|
|
CAST(m.matching_count as REAL) * 100.0 / MIN(ac1.total_achievements, ac2.total_achievements) as similarity_percentage,
|
|
m.cutoff_date
|
|
from
|
|
matching_achievements m
|
|
join achievement_counts ac1 on m.name1 = ac1.name
|
|
join achievement_counts ac2 on m.name2 = ac2.name
|
|
where
|
|
m.matching_count >= 10
|
|
and CAST(m.matching_count as REAL) * 100.0 / MIN(ac1.total_achievements, ac2.total_achievements) >= 70.0; |