148 lines
3.1 KiB
Go
148 lines
3.1 KiB
Go
package main
|
|
|
|
import (
|
|
"fmt"
|
|
"io"
|
|
"log"
|
|
"os"
|
|
"strings"
|
|
|
|
_ "embed"
|
|
)
|
|
|
|
var Error *log.Logger
|
|
var Warning *log.Logger
|
|
|
|
func init() {
|
|
log.SetFlags(log.Lmicroseconds | log.Lshortfile)
|
|
logFile, err := os.Create("main.log")
|
|
if err != nil {
|
|
log.Printf("Error creating log file: %v", err)
|
|
os.Exit(1)
|
|
}
|
|
logger := io.MultiWriter(os.Stdout, logFile)
|
|
log.SetOutput(logger)
|
|
|
|
Error = log.New(io.MultiWriter(logFile, os.Stderr, os.Stdout),
|
|
fmt.Sprintf("%sERROR:%s ", "\033[0;101m", "\033[0m"),
|
|
log.Lmicroseconds|log.Lshortfile)
|
|
Warning = log.New(io.MultiWriter(logFile, os.Stdout),
|
|
fmt.Sprintf("%sWarning:%s ", "\033[0;93m", "\033[0m"),
|
|
log.Lmicroseconds|log.Lshortfile)
|
|
}
|
|
|
|
var db DB
|
|
|
|
//go:embed cache.sql
|
|
var cacheSql string
|
|
|
|
//go:embed compare.sql
|
|
var compareSql string
|
|
|
|
//go:embed unique-noncached-pairs.sql
|
|
var uniqueNonCachedPairsSql string
|
|
|
|
func main() {
|
|
db = DB{
|
|
path: "../data/db.db",
|
|
}
|
|
err := db.Open()
|
|
if err != nil {
|
|
Error.Printf("Error opening database: %v", err)
|
|
return
|
|
}
|
|
defer db.Close()
|
|
|
|
for {
|
|
err := Process(50000)
|
|
if err != nil {
|
|
Error.Printf("Error processing: %v", err)
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
func ComputeSimilarity(pair CharPair) (float64, error) {
|
|
res := db.readConn.QueryRow(compareSql, pair.A, pair.B)
|
|
|
|
var similarity float64
|
|
err := res.Scan(&similarity)
|
|
if err != nil {
|
|
return 0, fmt.Errorf("error scanning result: %v", err)
|
|
}
|
|
|
|
return similarity, nil
|
|
}
|
|
|
|
type CharPair struct {
|
|
A string
|
|
B string
|
|
}
|
|
|
|
func GetUncachedPairs(n int) ([]CharPair, error) {
|
|
res := []CharPair{}
|
|
|
|
rows, err := db.readConn.Query(uniqueNonCachedPairsSql, n)
|
|
if err != nil {
|
|
return res, fmt.Errorf("error running query: %v", err)
|
|
}
|
|
defer rows.Close()
|
|
|
|
for rows.Next() {
|
|
var a, b string
|
|
err := rows.Scan(&a, &b)
|
|
if err != nil {
|
|
return res, fmt.Errorf("error scanning row: %v", err)
|
|
}
|
|
res = append(res, CharPair{A: a, B: b})
|
|
}
|
|
|
|
return res, nil
|
|
}
|
|
|
|
func CacheSimilarity(pair CharPair, similarity float64) error {
|
|
log.Printf("Caching similarity for %s and %s (%.1f)", pair.A, pair.B, similarity)
|
|
_, err := db.writeConn.Exec("INSERT INTO cached (character1, character2, similarity) VALUES (?, ?, ?);", pair.A, pair.B, similarity)
|
|
if err != nil {
|
|
return fmt.Errorf("error running query: %v", err)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func Process(n int) error {
|
|
log.Printf("Processing %d pairs", n)
|
|
|
|
pairs, err := GetUncachedPairs(n)
|
|
if err != nil {
|
|
return fmt.Errorf("error getting pairs: %v", err)
|
|
}
|
|
if len(pairs) == 0 {
|
|
return fmt.Errorf("no pairs found, done?")
|
|
}
|
|
|
|
sql := []string{"INSERT INTO cached (character1, character2, similarity) VALUES "}
|
|
for i, pair := range pairs {
|
|
similarity, err := ComputeSimilarity(pair)
|
|
if err != nil {
|
|
return fmt.Errorf("error computing similarity: %v", err)
|
|
}
|
|
// err = CacheSimilarity(pair, similarity)
|
|
// if err != nil {
|
|
// return fmt.Errorf("error caching similarity: %v", err)
|
|
// }
|
|
sql = append(sql, fmt.Sprintf("('%s', '%s', %f)", pair.A, pair.B, similarity))
|
|
if i != len(pairs)-1 {
|
|
sql = append(sql, ",")
|
|
}
|
|
}
|
|
sql = append(sql, ";")
|
|
|
|
log.Printf("Saving %d pairs", n)
|
|
_, err = db.writeConn.Exec(strings.Join(sql, ""))
|
|
if err != nil {
|
|
return fmt.Errorf("error running query: %v", err)
|
|
}
|
|
|
|
return nil
|
|
} |