From 7a04d16a2e124b8317563606a29d4f4211f8b07c Mon Sep 17 00:00:00 2001 From: PhatPhuckDave Date: Wed, 6 Nov 2024 00:40:28 +0100 Subject: [PATCH] Wire everything together, now it's amen --- .gitignore | 1 + function.go | 5 +++-- html-getter.go | 12 ++++++++--- main.go | 55 +++++++++++++++++++++++++++++++++----------------- 4 files changed, 49 insertions(+), 24 deletions(-) diff --git a/.gitignore b/.gitignore index 2ffe895..373272b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ main.log test.html doc.html +out diff --git a/function.go b/function.go index a69d467..a4a14a4 100644 --- a/function.go +++ b/function.go @@ -4,6 +4,7 @@ import ( _ "embed" "fmt" "os" + "path/filepath" "text/template" ) @@ -44,12 +45,12 @@ type ( func (f *Function) ResolveFileName() string { return f.Name + ".lua" } -func (f *Function) WriteFile() error { +func (f *Function) WriteFile(root string) error { if f.Name == "" { return fmt.Errorf("function name is empty of %+v", f) } - file, err := os.Create(f.ResolveFileName()) + file, err := os.Create(filepath.Join(root, f.ResolveFileName())) if err != nil { return err } diff --git a/html-getter.go b/html-getter.go index 9e381b8..0477213 100644 --- a/html-getter.go +++ b/html-getter.go @@ -1,6 +1,7 @@ package main import ( + "context" "fmt" "io" "net/http" @@ -8,9 +9,14 @@ import ( "golang.org/x/time/rate" ) -var LIMITER = rate.NewLimiter(rate.Limit(1), 2) +var LIMITER = rate.NewLimiter(rate.Limit(8), 16) + +func Fetch(url string) (string, error) { + err := LIMITER.Wait(context.Background()) + if err != nil { + return "", fmt.Errorf("Error waiting for rate limiter: %v", err) + } -func FetchFull(url string) (string, error) { res, err := http.Get(url) if err != nil { return "", fmt.Errorf("Error fetching %s: %v", url, err) @@ -31,4 +37,4 @@ func FetchFull(url string) (string, error) { func FetchDocs(url string) (string, error) { return "", nil -} \ No newline at end of file +} diff --git a/main.go b/main.go index 128c49a..69a7dd1 100644 --- a/main.go +++ b/main.go @@ -6,9 +6,11 @@ import ( "io" "log" "os" + "regexp" ) -var url = `https://wowprogramming.com/docs/api.html` +var rootUrl = `https://wowprogramming.com/` +var apiUrl = fmt.Sprintf("%s/docs/api.html", rootUrl) var Error *log.Logger var Warning *log.Logger @@ -37,30 +39,45 @@ var html string //go:embed doc.html var doc string +var pageNameExtractor = regexp.MustCompile(`\/([^/]+).html`) +var outDir = "out" + func main() { - //res, err := FetchFull(url) - //if err != nil { - // Error.Printf("Error fetching %s: %v", url, err) - // return - //} + res, err := Fetch(apiUrl) + if err != nil { + Error.Printf("Error fetching %s: %v", apiUrl, err) + return + } //os.WriteFile("test.html", []byte(res), 0644) - // foo, err := ParseHTML(html) - // if err != nil { - // Error.Printf("Error parsing HTML: %v", err) - // return - // } - // log.Printf("%#v", foo) - foo, err := ParseDoc(doc) + pages, err := ParseHTML(res) if err != nil { Error.Printf("Error parsing HTML: %v", err) return } - foo.Name = "JoinPermanentChannel" - log.Printf("%#v", foo) - err = foo.WriteFile() - if err != nil { - Error.Printf("Error writing file: %v", err) - return + for _, page := range pages { + log.Printf("Processing page %s", page) + pname := pageNameExtractor.FindStringSubmatch(page) + if len(pname) != 2 { + Error.Printf("Failed to extract page name from %s", page) + continue + } + + res, err := Fetch(rootUrl + page) + if err != nil { + Error.Printf("Error fetching %s: %v", rootUrl+page, err) + } + + function, err := ParseDoc(res) + if err != nil { + Error.Printf("Error parsing HTML: %v", err) + continue + } + function.Name = pname[1] + err = function.WriteFile(outDir) + if err != nil { + Error.Printf("Error writing file: %v", err) + continue + } } }