From 9221cf8ec8f721eaafb1d7f3b590bdd4a6fa6130 Mon Sep 17 00:00:00 2001 From: magical Date: Fri, 31 Dec 2021 08:48:18 +0000 Subject: [PATCH] better mastodon text formatting --- go.mod | 1 + main.go | 9 +++++++++ mastodon.go | 43 +++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 51 insertions(+), 2 deletions(-) diff --git a/go.mod b/go.mod index f392835..ff594b0 100644 --- a/go.mod +++ b/go.mod @@ -5,4 +5,5 @@ go 1.14 require ( github.com/PuerkitoBio/goquery v1.5.1 github.com/mmcdole/gofeed v1.1.3 + golang.org/x/net v0.0.0-20200301022130-244492dfa37a ) diff --git a/main.go b/main.go index 1bc6741..5d267a4 100644 --- a/main.go +++ b/main.go @@ -20,6 +20,7 @@ func main() { var sources = []*FeedSource{ // TODO: interface Source NewFeed("https://tilde.team/~dozens/dreams/rss.xml"), NewFeed("https://tilde.town/~magical/xkcd.xml"), // "https://xkcd.com/atom.xml", + NewFeed("https://tilde.town/~magical/404.xml"), } var wg sync.WaitGroup @@ -36,6 +37,14 @@ func main() { for _, src := range sources { fmt.Println(src.Title, src.Error, src.LastStatus) } + + src := NewMastoSource("https://tilde.town/~magical/masto_test.html") + src.update(context.Background()) + fmt.Println(src.Title, src.Error, src.LastStatus) + for _, x := range src.Items { + d, _ := time.Parse(time.RFC3339, x.PublishedString) + fmt.Println("\t", d.Format(time.Stamp), x.Content) + } } type Source interface { diff --git a/mastodon.go b/mastodon.go index 84953d4..6d5befe 100644 --- a/mastodon.go +++ b/mastodon.go @@ -1,16 +1,20 @@ package main import ( + "bytes" "context" "fmt" "io" "log" "net/http" "sort" + "strings" "sync" "time" "github.com/PuerkitoBio/goquery" + "golang.org/x/net/html" + "golang.org/x/net/html/atom" ) // https://docs.joinmastodon.org/spec/microformats/ @@ -153,13 +157,48 @@ func parseMicroformats(r io.Reader) (*MastoFeed, error) { } feed.Title = doc.Find(".h-feed > .p-name").First().AttrOr("value", "") doc.Find(".h-feed .h-entry").Each(func(i int, elem *goquery.Selection) { + cw := strings.TrimSpace(text(elem.Find(".p-summary"))) + if cw != "" { + cw = "[" + cw + "] " + } feed.Items = append(feed.Items, &MastoItem{ Title: "", - Content: elem.Find(".e-content").Text(), + Content: cw + text(elem.Find(".e-content")), Link: elem.Find("a.u-url.u-uid").AttrOr("href", ""), - Author: elem.Find(".p-author .p-name").Text(), + Author: text(elem.Find(".p-author .p-name")), PublishedString: elem.Find("data.dt-published").AttrOr("value", ""), }) }) return feed, nil } + +// Text gets the combined text contents of each element in the set of matched +// elements, including their descendants. +func text(s *goquery.Selection) string { + var buf bytes.Buffer + + // Slightly optimized vs calling Each: no single selection object created + var f func(*html.Node) + f = func(n *html.Node) { + if n.Type == html.TextNode { + // Keep newlines and spaces, like jQuery + buf.WriteString(n.Data) + } else if n.Type == html.ElementNode && n.DataAtom == atom.Br { + //buf.WriteString("\n") + buf.WriteString(" ") + } else if n.Type == html.ElementNode && n.DataAtom == atom.P && n.PrevSibling != nil { + //buf.WriteString("\n\n") + buf.WriteString(" ") + } + if n.FirstChild != nil { + for c := n.FirstChild; c != nil; c = c.NextSibling { + f(c) + } + } + } + for _, n := range s.Nodes { + f(n) + } + + return buf.String() +}