ignore script tags and such when extracting text

master
magical 2021-12-31 22:26:28 +00:00
parent ab1da46096
commit 2182e4e739
1 changed files with 2 additions and 0 deletions

View File

@ -193,6 +193,8 @@ func text(s *goquery.Selection) string {
} else if n.Type == html.ElementNode && n.DataAtom == atom.P && n.PrevSibling != nil { } else if n.Type == html.ElementNode && n.DataAtom == atom.P && n.PrevSibling != nil {
//buf.WriteString("\n\n") //buf.WriteString("\n\n")
buf.WriteString(" ") buf.WriteString(" ")
} else if n.Type == html.ElementNode && (n.DataAtom == atom.Script || n.DataAtom == atom.Style || n.DataAtom == atom.Template) {
// nothing
} }
if n.FirstChild != nil { if n.FirstChild != nil {
for c := n.FirstChild; c != nil; c = c.NextSibling { for c := n.FirstChild; c != nil; c = c.NextSibling {