better mastodon text formatting
parent
2e195e36a6
commit
9221cf8ec8
1
go.mod
1
go.mod
|
@ -5,4 +5,5 @@ go 1.14
|
|||
require (
|
||||
github.com/PuerkitoBio/goquery v1.5.1
|
||||
github.com/mmcdole/gofeed v1.1.3
|
||||
golang.org/x/net v0.0.0-20200301022130-244492dfa37a
|
||||
)
|
||||
|
|
9
main.go
9
main.go
|
@ -20,6 +20,7 @@ func main() {
|
|||
var sources = []*FeedSource{ // TODO: interface Source
|
||||
NewFeed("https://tilde.team/~dozens/dreams/rss.xml"),
|
||||
NewFeed("https://tilde.town/~magical/xkcd.xml"), // "https://xkcd.com/atom.xml",
|
||||
NewFeed("https://tilde.town/~magical/404.xml"),
|
||||
}
|
||||
|
||||
var wg sync.WaitGroup
|
||||
|
@ -36,6 +37,14 @@ func main() {
|
|||
for _, src := range sources {
|
||||
fmt.Println(src.Title, src.Error, src.LastStatus)
|
||||
}
|
||||
|
||||
src := NewMastoSource("https://tilde.town/~magical/masto_test.html")
|
||||
src.update(context.Background())
|
||||
fmt.Println(src.Title, src.Error, src.LastStatus)
|
||||
for _, x := range src.Items {
|
||||
d, _ := time.Parse(time.RFC3339, x.PublishedString)
|
||||
fmt.Println("\t", d.Format(time.Stamp), x.Content)
|
||||
}
|
||||
}
|
||||
|
||||
type Source interface {
|
||||
|
|
43
mastodon.go
43
mastodon.go
|
@ -1,16 +1,20 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"net/http"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
"golang.org/x/net/html"
|
||||
"golang.org/x/net/html/atom"
|
||||
)
|
||||
|
||||
// https://docs.joinmastodon.org/spec/microformats/
|
||||
|
@ -153,13 +157,48 @@ func parseMicroformats(r io.Reader) (*MastoFeed, error) {
|
|||
}
|
||||
feed.Title = doc.Find(".h-feed > .p-name").First().AttrOr("value", "")
|
||||
doc.Find(".h-feed .h-entry").Each(func(i int, elem *goquery.Selection) {
|
||||
cw := strings.TrimSpace(text(elem.Find(".p-summary")))
|
||||
if cw != "" {
|
||||
cw = "[" + cw + "] "
|
||||
}
|
||||
feed.Items = append(feed.Items, &MastoItem{
|
||||
Title: "",
|
||||
Content: elem.Find(".e-content").Text(),
|
||||
Content: cw + text(elem.Find(".e-content")),
|
||||
Link: elem.Find("a.u-url.u-uid").AttrOr("href", ""),
|
||||
Author: elem.Find(".p-author .p-name").Text(),
|
||||
Author: text(elem.Find(".p-author .p-name")),
|
||||
PublishedString: elem.Find("data.dt-published").AttrOr("value", ""),
|
||||
})
|
||||
})
|
||||
return feed, nil
|
||||
}
|
||||
|
||||
// Text gets the combined text contents of each element in the set of matched
|
||||
// elements, including their descendants.
|
||||
func text(s *goquery.Selection) string {
|
||||
var buf bytes.Buffer
|
||||
|
||||
// Slightly optimized vs calling Each: no single selection object created
|
||||
var f func(*html.Node)
|
||||
f = func(n *html.Node) {
|
||||
if n.Type == html.TextNode {
|
||||
// Keep newlines and spaces, like jQuery
|
||||
buf.WriteString(n.Data)
|
||||
} else if n.Type == html.ElementNode && n.DataAtom == atom.Br {
|
||||
//buf.WriteString("\n")
|
||||
buf.WriteString(" ")
|
||||
} else if n.Type == html.ElementNode && n.DataAtom == atom.P && n.PrevSibling != nil {
|
||||
//buf.WriteString("\n\n")
|
||||
buf.WriteString(" ")
|
||||
}
|
||||
if n.FirstChild != nil {
|
||||
for c := n.FirstChild; c != nil; c = c.NextSibling {
|
||||
f(c)
|
||||
}
|
||||
}
|
||||
}
|
||||
for _, n := range s.Nodes {
|
||||
f(n)
|
||||
}
|
||||
|
||||
return buf.String()
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue