Files
2025-05-18 23:48:22 +02:00

148 lines
3.1 KiB
Go

package main
import (
"fmt"
"io"
"log"
"os"
"strings"
_ "embed"
)
var Error *log.Logger
var Warning *log.Logger
func init() {
log.SetFlags(log.Lmicroseconds | log.Lshortfile)
logFile, err := os.Create("main.log")
if err != nil {
log.Printf("Error creating log file: %v", err)
os.Exit(1)
}
logger := io.MultiWriter(os.Stdout, logFile)
log.SetOutput(logger)
Error = log.New(io.MultiWriter(logFile, os.Stderr, os.Stdout),
fmt.Sprintf("%sERROR:%s ", "\033[0;101m", "\033[0m"),
log.Lmicroseconds|log.Lshortfile)
Warning = log.New(io.MultiWriter(logFile, os.Stdout),
fmt.Sprintf("%sWarning:%s ", "\033[0;93m", "\033[0m"),
log.Lmicroseconds|log.Lshortfile)
}
var db DB
//go:embed cache.sql
var cacheSql string
//go:embed compare.sql
var compareSql string
//go:embed unique-noncached-pairs.sql
var uniqueNonCachedPairsSql string
func main() {
db = DB{
path: "../data/db.db",
}
err := db.Open()
if err != nil {
Error.Printf("Error opening database: %v", err)
return
}
defer db.Close()
for {
err := Process(50000)
if err != nil {
Error.Printf("Error processing: %v", err)
return
}
}
}
func ComputeSimilarity(pair CharPair) (float64, error) {
res := db.readConn.QueryRow(compareSql, pair.A, pair.B)
var similarity float64
err := res.Scan(&similarity)
if err != nil {
return 0, fmt.Errorf("error scanning result: %v", err)
}
return similarity, nil
}
type CharPair struct {
A string
B string
}
func GetUncachedPairs(n int) ([]CharPair, error) {
res := []CharPair{}
rows, err := db.readConn.Query(uniqueNonCachedPairsSql, n)
if err != nil {
return res, fmt.Errorf("error running query: %v", err)
}
defer rows.Close()
for rows.Next() {
var a, b string
err := rows.Scan(&a, &b)
if err != nil {
return res, fmt.Errorf("error scanning row: %v", err)
}
res = append(res, CharPair{A: a, B: b})
}
return res, nil
}
func CacheSimilarity(pair CharPair, similarity float64) error {
log.Printf("Caching similarity for %s and %s (%.1f)", pair.A, pair.B, similarity)
_, err := db.writeConn.Exec("INSERT INTO cached (character1, character2, similarity) VALUES (?, ?, ?);", pair.A, pair.B, similarity)
if err != nil {
return fmt.Errorf("error running query: %v", err)
}
return nil
}
func Process(n int) error {
log.Printf("Processing %d pairs", n)
pairs, err := GetUncachedPairs(n)
if err != nil {
return fmt.Errorf("error getting pairs: %v", err)
}
if len(pairs) == 0 {
return fmt.Errorf("no pairs found, done?")
}
sql := []string{"INSERT INTO cached (character1, character2, similarity) VALUES "}
for i, pair := range pairs {
similarity, err := ComputeSimilarity(pair)
if err != nil {
return fmt.Errorf("error computing similarity: %v", err)
}
// err = CacheSimilarity(pair, similarity)
// if err != nil {
// return fmt.Errorf("error caching similarity: %v", err)
// }
sql = append(sql, fmt.Sprintf("('%s', '%s', %f)", pair.A, pair.B, similarity))
if i != len(pairs)-1 {
sql = append(sql, ",")
}
}
sql = append(sql, ";")
log.Printf("Saving %d pairs", n)
_, err = db.writeConn.Exec(strings.Join(sql, ""))
if err != nil {
return fmt.Errorf("error running query: %v", err)
}
return nil
}