package main import ( "fmt" "log" "strings" "github.com/PuerkitoBio/goquery" "github.com/davecgh/go-spew/spew" ) func ParseHTML(html string) ([]string, error) { res := []string{} doc, err := goquery.NewDocumentFromReader(strings.NewReader(html)) if err != nil { return res, fmt.Errorf("failed parsing html: %v", err) } log.Printf("Looking for links in %s", html) doc.Find("tr > td > a").Each(func(i int, s *goquery.Selection) { href, exists := s.Attr("href") if !exists { Warning.Printf("href not found for element %v", s) return } res = append(res, href) }) log.Printf("Found %d links", len(res)) return res, nil } func ParseDoc(html string) (Function, error) { res := Function{} log.Printf("Parsing doc %s", html) doc, err := goquery.NewDocumentFromReader(strings.NewReader(html)) if err != nil { return res, fmt.Errorf("failed parsing html: %v", err) } doc.Find("div.api-desc").Each(func(i int, s *goquery.Selection) { log.Printf("Parsing description %s", s.Text()) desc := s.Text() desc = strings.ReplaceAll(desc, "\n", " ") desc = strings.ReplaceAll(desc, "\t", " ") desc = strings.ReplaceAll(desc, " ", " ") desc = strings.TrimSpace(desc) res.Description = desc }) isArgs := false isReturn := false doc.Find("div.api-listing > p, div.api-listing > ul").Each(func(i int, s *goquery.Selection) { if s.Is("p") { switch s.Text() { case "Arguments:": isArgs = true isReturn = false return case "Returns:": isReturn = true isArgs = false return default: Warning.Printf("Unknown p tag: %s", s.Text()) return } } if s.Is("ul") { params, err := parseUl(s) if err != nil { Error.Printf("Error parsing ul %s: %v", s.Text(), err) return } if isArgs { res.Arguments = params } else if isReturn { res.Returns = params } } }) spew.Dump(res) return res, nil } func parseUl(ul *goquery.Selection) ([]Parameter, error) { res := []Parameter{} ul.Find("li").Each(func(i int, s *goquery.Selection) { log.Printf("Parsing li %s", s.Text()) param := Parameter{} codes := s.Find("code") if codes.Length() == 0 { Warning.Printf("No code found for %s", s.Text()) return } code := codes.First() name := code.Text() if name == "" { Warning.Printf("No name found for %s", s.Text()) return } param.Name = name if codes.Length() > 1 { code := codes.Last() typ := code.Text() if typ == "" { Warning.Printf("No type found for %s", s.Text()) return } param.Type = typ } log.Printf("Found param %+v", param) res = append(res, param) }) return res, nil }