50 lines
1.1 KiB
Go
50 lines
1.1 KiB
Go
package main
|
|
|
|
import (
|
|
"fmt"
|
|
"strings"
|
|
|
|
"github.com/PuerkitoBio/goquery"
|
|
)
|
|
|
|
func ParseHTML(html string) ([]string, error) {
|
|
res := []string{}
|
|
doc, err := goquery.NewDocumentFromReader(strings.NewReader(html))
|
|
if err != nil {
|
|
return res, fmt.Errorf("failed parsing html: %v", err)
|
|
}
|
|
|
|
doc.Find("tr > td > a").Each(func(i int, s *goquery.Selection) {
|
|
href, exists := s.Attr("href")
|
|
if !exists {
|
|
Warning.Printf("href not found for element %v", s)
|
|
return
|
|
}
|
|
res = append(res, href)
|
|
})
|
|
return res, nil
|
|
}
|
|
|
|
//doc, err := goquery.NewDocumentFromReader(strings.NewReader(*task.Html))
|
|
//if err != nil {
|
|
// Error.Printf("failed to parse html: %v", err)
|
|
// return
|
|
//}
|
|
|
|
//doc.Find("img").Each(func(i int, s *goquery.Selection) {
|
|
// parent := s.Parent()
|
|
// if parent.Is("a") {
|
|
// href, _ := parent.Attr("href")
|
|
// href, ok := fixLink(href)
|
|
// if ok {
|
|
// hw.output <- &ImageDownloadTask{Url: &href, Topic: task.Topic}
|
|
// }
|
|
// } else {
|
|
// src, _ := s.Attr("src")
|
|
// src, ok := fixLink(src)
|
|
// if ok {
|
|
// hw.output <- &ImageDownloadTask{Url: &src, Topic: task.Topic}
|
|
// }
|
|
// }
|
|
//})
|