better mastodon text formatting

master
magical 2021-12-31 08:48:18 +00:00
parent 2e195e36a6
commit 9221cf8ec8
3 changed files with 51 additions and 2 deletions

1
go.mod
View File

@ -5,4 +5,5 @@ go 1.14
require ( require (
github.com/PuerkitoBio/goquery v1.5.1 github.com/PuerkitoBio/goquery v1.5.1
github.com/mmcdole/gofeed v1.1.3 github.com/mmcdole/gofeed v1.1.3
golang.org/x/net v0.0.0-20200301022130-244492dfa37a
) )

View File

@ -20,6 +20,7 @@ func main() {
var sources = []*FeedSource{ // TODO: interface Source var sources = []*FeedSource{ // TODO: interface Source
NewFeed("https://tilde.team/~dozens/dreams/rss.xml"), NewFeed("https://tilde.team/~dozens/dreams/rss.xml"),
NewFeed("https://tilde.town/~magical/xkcd.xml"), // "https://xkcd.com/atom.xml", NewFeed("https://tilde.town/~magical/xkcd.xml"), // "https://xkcd.com/atom.xml",
NewFeed("https://tilde.town/~magical/404.xml"),
} }
var wg sync.WaitGroup var wg sync.WaitGroup
@ -36,6 +37,14 @@ func main() {
for _, src := range sources { for _, src := range sources {
fmt.Println(src.Title, src.Error, src.LastStatus) fmt.Println(src.Title, src.Error, src.LastStatus)
} }
src := NewMastoSource("https://tilde.town/~magical/masto_test.html")
src.update(context.Background())
fmt.Println(src.Title, src.Error, src.LastStatus)
for _, x := range src.Items {
d, _ := time.Parse(time.RFC3339, x.PublishedString)
fmt.Println("\t", d.Format(time.Stamp), x.Content)
}
} }
type Source interface { type Source interface {

View File

@ -1,16 +1,20 @@
package main package main
import ( import (
"bytes"
"context" "context"
"fmt" "fmt"
"io" "io"
"log" "log"
"net/http" "net/http"
"sort" "sort"
"strings"
"sync" "sync"
"time" "time"
"github.com/PuerkitoBio/goquery" "github.com/PuerkitoBio/goquery"
"golang.org/x/net/html"
"golang.org/x/net/html/atom"
) )
// https://docs.joinmastodon.org/spec/microformats/ // https://docs.joinmastodon.org/spec/microformats/
@ -153,13 +157,48 @@ func parseMicroformats(r io.Reader) (*MastoFeed, error) {
} }
feed.Title = doc.Find(".h-feed > .p-name").First().AttrOr("value", "") feed.Title = doc.Find(".h-feed > .p-name").First().AttrOr("value", "")
doc.Find(".h-feed .h-entry").Each(func(i int, elem *goquery.Selection) { doc.Find(".h-feed .h-entry").Each(func(i int, elem *goquery.Selection) {
cw := strings.TrimSpace(text(elem.Find(".p-summary")))
if cw != "" {
cw = "[" + cw + "] "
}
feed.Items = append(feed.Items, &MastoItem{ feed.Items = append(feed.Items, &MastoItem{
Title: "", Title: "",
Content: elem.Find(".e-content").Text(), Content: cw + text(elem.Find(".e-content")),
Link: elem.Find("a.u-url.u-uid").AttrOr("href", ""), Link: elem.Find("a.u-url.u-uid").AttrOr("href", ""),
Author: elem.Find(".p-author .p-name").Text(), Author: text(elem.Find(".p-author .p-name")),
PublishedString: elem.Find("data.dt-published").AttrOr("value", ""), PublishedString: elem.Find("data.dt-published").AttrOr("value", ""),
}) })
}) })
return feed, nil return feed, nil
} }
// Text gets the combined text contents of each element in the set of matched
// elements, including their descendants.
func text(s *goquery.Selection) string {
var buf bytes.Buffer
// Slightly optimized vs calling Each: no single selection object created
var f func(*html.Node)
f = func(n *html.Node) {
if n.Type == html.TextNode {
// Keep newlines and spaces, like jQuery
buf.WriteString(n.Data)
} else if n.Type == html.ElementNode && n.DataAtom == atom.Br {
//buf.WriteString("\n")
buf.WriteString(" ")
} else if n.Type == html.ElementNode && n.DataAtom == atom.P && n.PrevSibling != nil {
//buf.WriteString("\n\n")
buf.WriteString(" ")
}
if n.FirstChild != nil {
for c := n.FirstChild; c != nil; c = c.NextSibling {
f(c)
}
}
}
for _, n := range s.Nodes {
f(n)
}
return buf.String()
}