feat(search): implement Aho-Corasick for efficient substring searching and improve result ordering

This commit is contained in:
2025-08-10 22:29:15 +02:00
parent 7af7d9ecd0
commit 11fda4e11f
2 changed files with 35 additions and 10 deletions

View File

@@ -59,15 +59,19 @@ export const SearchDialog: React.FC = () => {
}, [open, all.length]); }, [open, all.length]);
useEffect(() => { useEffect(() => {
if (query.trim().length === 0) { setResults([]); return; } const q = query.trim().toLowerCase();
const out: Array<SearchResult> = []; if (q.length === 0) { setResults([]); return; }
const scored: Array<{ s: SearchResult; idx: number }> = [];
for (const r of all) { for (const r of all) {
if (automaton.searchHas(r.system.toLowerCase())) { const idx = automaton.searchFirstIndex(r.system.toLowerCase());
out.push(r); if (idx >= 0) scored.push({ s: r, idx });
if (out.length >= 10) break;
} }
} scored.sort((a, b) => {
setResults(out); if (a.idx !== b.idx) return a.idx - b.idx; // earlier index first
if (a.s.system.length !== b.s.system.length) return a.s.system.length - b.s.system.length; // shorter name next
return a.s.system.localeCompare(b.s.system);
});
setResults(scored.slice(0, 10).map(x => x.s));
}, [query, all, automaton]); }, [query, all, automaton]);
const onSelect = (r: SearchResult) => { const onSelect = (r: SearchResult) => {

View File

@@ -1,6 +1,7 @@
export class AhoCorasick { export class AhoCorasick {
private goto: Array<Map<string, number>> = [new Map()]; private goto: Array<Map<string, number>> = [new Map()];
private out: Array<boolean> = [false]; private out: Array<boolean> = [false];
private outLen: Array<number> = [0];
private fail: Array<number> = [0]; private fail: Array<number> = [0];
add(pattern: string) { add(pattern: string) {
@@ -12,6 +13,7 @@ export class AhoCorasick {
this.goto[state].set(ch, newState); this.goto[state].set(ch, newState);
this.goto.push(new Map()); this.goto.push(new Map());
this.out.push(false); this.out.push(false);
this.outLen.push(0);
this.fail.push(0); this.fail.push(0);
state = newState; state = newState;
} else { } else {
@@ -19,16 +21,15 @@ export class AhoCorasick {
} }
} }
this.out[state] = true; this.out[state] = true;
this.outLen[state] = Math.max(this.outLen[state], pattern.length);
} }
build() { build() {
const queue: number[] = []; const queue: number[] = [];
// Initialize depth 1 states for (const [, s] of this.goto[0]) {
for (const [ch, s] of this.goto[0]) {
this.fail[s] = 0; this.fail[s] = 0;
queue.push(s); queue.push(s);
} }
// BFS
while (queue.length > 0) { while (queue.length > 0) {
const r = queue.shift()!; const r = queue.shift()!;
for (const [a, s] of this.goto[r]) { for (const [a, s] of this.goto[r]) {
@@ -40,6 +41,7 @@ export class AhoCorasick {
const f = this.goto[state].get(a) ?? 0; const f = this.goto[state].get(a) ?? 0;
this.fail[s] = f; this.fail[s] = f;
this.out[s] = this.out[s] || this.out[f]; this.out[s] = this.out[s] || this.out[f];
if (this.outLen[f] > this.outLen[s]) this.outLen[s] = this.outLen[f];
} }
} }
} }
@@ -56,4 +58,23 @@ export class AhoCorasick {
} }
return false; return false;
} }
// Returns the starting index of the first match in text, or -1 if none
searchFirstIndex(text: string): number {
let state = 0;
let i = 0;
for (const ch of text) {
while (state !== 0 && !this.goto[state].has(ch)) {
state = this.fail[state];
}
state = this.goto[state].get(ch) ?? 0;
if (this.out[state]) {
const len = this.outLen[state] || 0;
if (len > 0) return i - len + 1;
return i; // fallback
}
i++;
}
return -1;
}
} }