feat(search): implement Aho-Corasick for efficient substring searching and improve result ordering

This commit is contained in:
2025-08-10 22:29:15 +02:00
parent 7af7d9ecd0
commit 11fda4e11f
2 changed files with 35 additions and 10 deletions

View File

@@ -59,15 +59,19 @@ export const SearchDialog: React.FC = () => {
}, [open, all.length]);
useEffect(() => {
if (query.trim().length === 0) { setResults([]); return; }
const out: Array<SearchResult> = [];
const q = query.trim().toLowerCase();
if (q.length === 0) { setResults([]); return; }
const scored: Array<{ s: SearchResult; idx: number }> = [];
for (const r of all) {
if (automaton.searchHas(r.system.toLowerCase())) {
out.push(r);
if (out.length >= 10) break;
const idx = automaton.searchFirstIndex(r.system.toLowerCase());
if (idx >= 0) scored.push({ s: r, idx });
}
}
setResults(out);
scored.sort((a, b) => {
if (a.idx !== b.idx) return a.idx - b.idx; // earlier index first
if (a.s.system.length !== b.s.system.length) return a.s.system.length - b.s.system.length; // shorter name next
return a.s.system.localeCompare(b.s.system);
});
setResults(scored.slice(0, 10).map(x => x.s));
}, [query, all, automaton]);
const onSelect = (r: SearchResult) => {

View File

@@ -1,6 +1,7 @@
export class AhoCorasick {
private goto: Array<Map<string, number>> = [new Map()];
private out: Array<boolean> = [false];
private outLen: Array<number> = [0];
private fail: Array<number> = [0];
add(pattern: string) {
@@ -12,6 +13,7 @@ export class AhoCorasick {
this.goto[state].set(ch, newState);
this.goto.push(new Map());
this.out.push(false);
this.outLen.push(0);
this.fail.push(0);
state = newState;
} else {
@@ -19,16 +21,15 @@ export class AhoCorasick {
}
}
this.out[state] = true;
this.outLen[state] = Math.max(this.outLen[state], pattern.length);
}
build() {
const queue: number[] = [];
// Initialize depth 1 states
for (const [ch, s] of this.goto[0]) {
for (const [, s] of this.goto[0]) {
this.fail[s] = 0;
queue.push(s);
}
// BFS
while (queue.length > 0) {
const r = queue.shift()!;
for (const [a, s] of this.goto[r]) {
@@ -40,6 +41,7 @@ export class AhoCorasick {
const f = this.goto[state].get(a) ?? 0;
this.fail[s] = f;
this.out[s] = this.out[s] || this.out[f];
if (this.outLen[f] > this.outLen[s]) this.outLen[s] = this.outLen[f];
}
}
}
@@ -56,4 +58,23 @@ export class AhoCorasick {
}
return false;
}
// Returns the starting index of the first match in text, or -1 if none
searchFirstIndex(text: string): number {
let state = 0;
let i = 0;
for (const ch of text) {
while (state !== 0 && !this.goto[state].has(ch)) {
state = this.fail[state];
}
state = this.goto[state].get(ch) ?? 0;
if (this.out[state]) {
const len = this.outLen[state] || 0;
if (len > 0) return i - len + 1;
return i; // fallback
}
i++;
}
return -1;
}
}