feat(search): implement Aho-Corasick for efficient substring searching and improve result ordering
This commit is contained in:
@@ -59,15 +59,19 @@ export const SearchDialog: React.FC = () => {
|
|||||||
}, [open, all.length]);
|
}, [open, all.length]);
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
if (query.trim().length === 0) { setResults([]); return; }
|
const q = query.trim().toLowerCase();
|
||||||
const out: Array<SearchResult> = [];
|
if (q.length === 0) { setResults([]); return; }
|
||||||
|
const scored: Array<{ s: SearchResult; idx: number }> = [];
|
||||||
for (const r of all) {
|
for (const r of all) {
|
||||||
if (automaton.searchHas(r.system.toLowerCase())) {
|
const idx = automaton.searchFirstIndex(r.system.toLowerCase());
|
||||||
out.push(r);
|
if (idx >= 0) scored.push({ s: r, idx });
|
||||||
if (out.length >= 10) break;
|
|
||||||
}
|
}
|
||||||
}
|
scored.sort((a, b) => {
|
||||||
setResults(out);
|
if (a.idx !== b.idx) return a.idx - b.idx; // earlier index first
|
||||||
|
if (a.s.system.length !== b.s.system.length) return a.s.system.length - b.s.system.length; // shorter name next
|
||||||
|
return a.s.system.localeCompare(b.s.system);
|
||||||
|
});
|
||||||
|
setResults(scored.slice(0, 10).map(x => x.s));
|
||||||
}, [query, all, automaton]);
|
}, [query, all, automaton]);
|
||||||
|
|
||||||
const onSelect = (r: SearchResult) => {
|
const onSelect = (r: SearchResult) => {
|
||||||
|
@@ -1,6 +1,7 @@
|
|||||||
export class AhoCorasick {
|
export class AhoCorasick {
|
||||||
private goto: Array<Map<string, number>> = [new Map()];
|
private goto: Array<Map<string, number>> = [new Map()];
|
||||||
private out: Array<boolean> = [false];
|
private out: Array<boolean> = [false];
|
||||||
|
private outLen: Array<number> = [0];
|
||||||
private fail: Array<number> = [0];
|
private fail: Array<number> = [0];
|
||||||
|
|
||||||
add(pattern: string) {
|
add(pattern: string) {
|
||||||
@@ -12,6 +13,7 @@ export class AhoCorasick {
|
|||||||
this.goto[state].set(ch, newState);
|
this.goto[state].set(ch, newState);
|
||||||
this.goto.push(new Map());
|
this.goto.push(new Map());
|
||||||
this.out.push(false);
|
this.out.push(false);
|
||||||
|
this.outLen.push(0);
|
||||||
this.fail.push(0);
|
this.fail.push(0);
|
||||||
state = newState;
|
state = newState;
|
||||||
} else {
|
} else {
|
||||||
@@ -19,16 +21,15 @@ export class AhoCorasick {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
this.out[state] = true;
|
this.out[state] = true;
|
||||||
|
this.outLen[state] = Math.max(this.outLen[state], pattern.length);
|
||||||
}
|
}
|
||||||
|
|
||||||
build() {
|
build() {
|
||||||
const queue: number[] = [];
|
const queue: number[] = [];
|
||||||
// Initialize depth 1 states
|
for (const [, s] of this.goto[0]) {
|
||||||
for (const [ch, s] of this.goto[0]) {
|
|
||||||
this.fail[s] = 0;
|
this.fail[s] = 0;
|
||||||
queue.push(s);
|
queue.push(s);
|
||||||
}
|
}
|
||||||
// BFS
|
|
||||||
while (queue.length > 0) {
|
while (queue.length > 0) {
|
||||||
const r = queue.shift()!;
|
const r = queue.shift()!;
|
||||||
for (const [a, s] of this.goto[r]) {
|
for (const [a, s] of this.goto[r]) {
|
||||||
@@ -40,6 +41,7 @@ export class AhoCorasick {
|
|||||||
const f = this.goto[state].get(a) ?? 0;
|
const f = this.goto[state].get(a) ?? 0;
|
||||||
this.fail[s] = f;
|
this.fail[s] = f;
|
||||||
this.out[s] = this.out[s] || this.out[f];
|
this.out[s] = this.out[s] || this.out[f];
|
||||||
|
if (this.outLen[f] > this.outLen[s]) this.outLen[s] = this.outLen[f];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -56,4 +58,23 @@ export class AhoCorasick {
|
|||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Returns the starting index of the first match in text, or -1 if none
|
||||||
|
searchFirstIndex(text: string): number {
|
||||||
|
let state = 0;
|
||||||
|
let i = 0;
|
||||||
|
for (const ch of text) {
|
||||||
|
while (state !== 0 && !this.goto[state].has(ch)) {
|
||||||
|
state = this.fail[state];
|
||||||
|
}
|
||||||
|
state = this.goto[state].get(ch) ?? 0;
|
||||||
|
if (this.out[state]) {
|
||||||
|
const len = this.outLen[state] || 0;
|
||||||
|
if (len > 0) return i - len + 1;
|
||||||
|
return i; // fallback
|
||||||
|
}
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
}
|
}
|
Reference in New Issue
Block a user