Initial commit
This commit is contained in:
49
html-parser.go
Normal file
49
html-parser.go
Normal file
@@ -0,0 +1,49 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
)
|
||||
|
||||
func ParseHTML(html string) ([]string, error) {
|
||||
res := []string{}
|
||||
doc, err := goquery.NewDocumentFromReader(strings.NewReader(html))
|
||||
if err != nil {
|
||||
return res, fmt.Errorf("failed parsing html: %v", err)
|
||||
}
|
||||
|
||||
doc.Find("tr > td > a").Each(func(i int, s *goquery.Selection) {
|
||||
href, exists := s.Attr("href")
|
||||
if !exists {
|
||||
Warning.Printf("href not found for element %v", s)
|
||||
return
|
||||
}
|
||||
res = append(res, href)
|
||||
})
|
||||
return res, nil
|
||||
}
|
||||
|
||||
//doc, err := goquery.NewDocumentFromReader(strings.NewReader(*task.Html))
|
||||
//if err != nil {
|
||||
// Error.Printf("failed to parse html: %v", err)
|
||||
// return
|
||||
//}
|
||||
|
||||
//doc.Find("img").Each(func(i int, s *goquery.Selection) {
|
||||
// parent := s.Parent()
|
||||
// if parent.Is("a") {
|
||||
// href, _ := parent.Attr("href")
|
||||
// href, ok := fixLink(href)
|
||||
// if ok {
|
||||
// hw.output <- &ImageDownloadTask{Url: &href, Topic: task.Topic}
|
||||
// }
|
||||
// } else {
|
||||
// src, _ := s.Attr("src")
|
||||
// src, ok := fixLink(src)
|
||||
// if ok {
|
||||
// hw.output <- &ImageDownloadTask{Url: &src, Topic: task.Topic}
|
||||
// }
|
||||
// }
|
||||
//})
|
Reference in New Issue
Block a user