better mastodon text formatting

master
magical 2021-12-31 08:48:18 +00:00
parent 2e195e36a6
commit 9221cf8ec8
3 changed files with 51 additions and 2 deletions

1
go.mod
View File

@ -5,4 +5,5 @@ go 1.14
require (
github.com/PuerkitoBio/goquery v1.5.1
github.com/mmcdole/gofeed v1.1.3
golang.org/x/net v0.0.0-20200301022130-244492dfa37a
)

View File

@ -20,6 +20,7 @@ func main() {
var sources = []*FeedSource{ // TODO: interface Source
NewFeed("https://tilde.team/~dozens/dreams/rss.xml"),
NewFeed("https://tilde.town/~magical/xkcd.xml"), // "https://xkcd.com/atom.xml",
NewFeed("https://tilde.town/~magical/404.xml"),
}
var wg sync.WaitGroup
@ -36,6 +37,14 @@ func main() {
for _, src := range sources {
fmt.Println(src.Title, src.Error, src.LastStatus)
}
src := NewMastoSource("https://tilde.town/~magical/masto_test.html")
src.update(context.Background())
fmt.Println(src.Title, src.Error, src.LastStatus)
for _, x := range src.Items {
d, _ := time.Parse(time.RFC3339, x.PublishedString)
fmt.Println("\t", d.Format(time.Stamp), x.Content)
}
}
type Source interface {

View File

@ -1,16 +1,20 @@
package main
import (
"bytes"
"context"
"fmt"
"io"
"log"
"net/http"
"sort"
"strings"
"sync"
"time"
"github.com/PuerkitoBio/goquery"
"golang.org/x/net/html"
"golang.org/x/net/html/atom"
)
// https://docs.joinmastodon.org/spec/microformats/
@ -153,13 +157,48 @@ func parseMicroformats(r io.Reader) (*MastoFeed, error) {
}
feed.Title = doc.Find(".h-feed > .p-name").First().AttrOr("value", "")
doc.Find(".h-feed .h-entry").Each(func(i int, elem *goquery.Selection) {
cw := strings.TrimSpace(text(elem.Find(".p-summary")))
if cw != "" {
cw = "[" + cw + "] "
}
feed.Items = append(feed.Items, &MastoItem{
Title: "",
Content: elem.Find(".e-content").Text(),
Content: cw + text(elem.Find(".e-content")),
Link: elem.Find("a.u-url.u-uid").AttrOr("href", ""),
Author: elem.Find(".p-author .p-name").Text(),
Author: text(elem.Find(".p-author .p-name")),
PublishedString: elem.Find("data.dt-published").AttrOr("value", ""),
})
})
return feed, nil
}
// Text gets the combined text contents of each element in the set of matched
// elements, including their descendants.
func text(s *goquery.Selection) string {
var buf bytes.Buffer
// Slightly optimized vs calling Each: no single selection object created
var f func(*html.Node)
f = func(n *html.Node) {
if n.Type == html.TextNode {
// Keep newlines and spaces, like jQuery
buf.WriteString(n.Data)
} else if n.Type == html.ElementNode && n.DataAtom == atom.Br {
//buf.WriteString("\n")
buf.WriteString(" ")
} else if n.Type == html.ElementNode && n.DataAtom == atom.P && n.PrevSibling != nil {
//buf.WriteString("\n\n")
buf.WriteString(" ")
}
if n.FirstChild != nil {
for c := n.FirstChild; c != nil; c = c.NextSibling {
f(c)
}
}
}
for _, n := range s.Nodes {
f(n)
}
return buf.String()
}