better mastodon text formatting
parent
2e195e36a6
commit
9221cf8ec8
1
go.mod
1
go.mod
|
@ -5,4 +5,5 @@ go 1.14
|
||||||
require (
|
require (
|
||||||
github.com/PuerkitoBio/goquery v1.5.1
|
github.com/PuerkitoBio/goquery v1.5.1
|
||||||
github.com/mmcdole/gofeed v1.1.3
|
github.com/mmcdole/gofeed v1.1.3
|
||||||
|
golang.org/x/net v0.0.0-20200301022130-244492dfa37a
|
||||||
)
|
)
|
||||||
|
|
9
main.go
9
main.go
|
@ -20,6 +20,7 @@ func main() {
|
||||||
var sources = []*FeedSource{ // TODO: interface Source
|
var sources = []*FeedSource{ // TODO: interface Source
|
||||||
NewFeed("https://tilde.team/~dozens/dreams/rss.xml"),
|
NewFeed("https://tilde.team/~dozens/dreams/rss.xml"),
|
||||||
NewFeed("https://tilde.town/~magical/xkcd.xml"), // "https://xkcd.com/atom.xml",
|
NewFeed("https://tilde.town/~magical/xkcd.xml"), // "https://xkcd.com/atom.xml",
|
||||||
|
NewFeed("https://tilde.town/~magical/404.xml"),
|
||||||
}
|
}
|
||||||
|
|
||||||
var wg sync.WaitGroup
|
var wg sync.WaitGroup
|
||||||
|
@ -36,6 +37,14 @@ func main() {
|
||||||
for _, src := range sources {
|
for _, src := range sources {
|
||||||
fmt.Println(src.Title, src.Error, src.LastStatus)
|
fmt.Println(src.Title, src.Error, src.LastStatus)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
src := NewMastoSource("https://tilde.town/~magical/masto_test.html")
|
||||||
|
src.update(context.Background())
|
||||||
|
fmt.Println(src.Title, src.Error, src.LastStatus)
|
||||||
|
for _, x := range src.Items {
|
||||||
|
d, _ := time.Parse(time.RFC3339, x.PublishedString)
|
||||||
|
fmt.Println("\t", d.Format(time.Stamp), x.Content)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
type Source interface {
|
type Source interface {
|
||||||
|
|
43
mastodon.go
43
mastodon.go
|
@ -1,16 +1,20 @@
|
||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bytes"
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"log"
|
"log"
|
||||||
"net/http"
|
"net/http"
|
||||||
"sort"
|
"sort"
|
||||||
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/PuerkitoBio/goquery"
|
"github.com/PuerkitoBio/goquery"
|
||||||
|
"golang.org/x/net/html"
|
||||||
|
"golang.org/x/net/html/atom"
|
||||||
)
|
)
|
||||||
|
|
||||||
// https://docs.joinmastodon.org/spec/microformats/
|
// https://docs.joinmastodon.org/spec/microformats/
|
||||||
|
@ -153,13 +157,48 @@ func parseMicroformats(r io.Reader) (*MastoFeed, error) {
|
||||||
}
|
}
|
||||||
feed.Title = doc.Find(".h-feed > .p-name").First().AttrOr("value", "")
|
feed.Title = doc.Find(".h-feed > .p-name").First().AttrOr("value", "")
|
||||||
doc.Find(".h-feed .h-entry").Each(func(i int, elem *goquery.Selection) {
|
doc.Find(".h-feed .h-entry").Each(func(i int, elem *goquery.Selection) {
|
||||||
|
cw := strings.TrimSpace(text(elem.Find(".p-summary")))
|
||||||
|
if cw != "" {
|
||||||
|
cw = "[" + cw + "] "
|
||||||
|
}
|
||||||
feed.Items = append(feed.Items, &MastoItem{
|
feed.Items = append(feed.Items, &MastoItem{
|
||||||
Title: "",
|
Title: "",
|
||||||
Content: elem.Find(".e-content").Text(),
|
Content: cw + text(elem.Find(".e-content")),
|
||||||
Link: elem.Find("a.u-url.u-uid").AttrOr("href", ""),
|
Link: elem.Find("a.u-url.u-uid").AttrOr("href", ""),
|
||||||
Author: elem.Find(".p-author .p-name").Text(),
|
Author: text(elem.Find(".p-author .p-name")),
|
||||||
PublishedString: elem.Find("data.dt-published").AttrOr("value", ""),
|
PublishedString: elem.Find("data.dt-published").AttrOr("value", ""),
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
return feed, nil
|
return feed, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Text gets the combined text contents of each element in the set of matched
|
||||||
|
// elements, including their descendants.
|
||||||
|
func text(s *goquery.Selection) string {
|
||||||
|
var buf bytes.Buffer
|
||||||
|
|
||||||
|
// Slightly optimized vs calling Each: no single selection object created
|
||||||
|
var f func(*html.Node)
|
||||||
|
f = func(n *html.Node) {
|
||||||
|
if n.Type == html.TextNode {
|
||||||
|
// Keep newlines and spaces, like jQuery
|
||||||
|
buf.WriteString(n.Data)
|
||||||
|
} else if n.Type == html.ElementNode && n.DataAtom == atom.Br {
|
||||||
|
//buf.WriteString("\n")
|
||||||
|
buf.WriteString(" ")
|
||||||
|
} else if n.Type == html.ElementNode && n.DataAtom == atom.P && n.PrevSibling != nil {
|
||||||
|
//buf.WriteString("\n\n")
|
||||||
|
buf.WriteString(" ")
|
||||||
|
}
|
||||||
|
if n.FirstChild != nil {
|
||||||
|
for c := n.FirstChild; c != nil; c = c.NextSibling {
|
||||||
|
f(c)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for _, n := range s.Nodes {
|
||||||
|
f(n)
|
||||||
|
}
|
||||||
|
|
||||||
|
return buf.String()
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue