Compare commits
17 Commits
ce9960c06a
...
ae160942bb
Author | SHA1 | Date |
---|---|---|
magical | ae160942bb | |
magical | 5b76339e40 | |
magical | ddb93ee852 | |
magical | ad7a1edaf1 | |
magical | 2f3ae0c09b | |
magical | 4017f94b56 | |
magical | a606a15903 | |
magical | df2ebd5b6a | |
magical | 2182e4e739 | |
magical | ab1da46096 | |
magical | ac5bfa7ad7 | |
magical | 808cc164f1 | |
magical | c305e96334 | |
magical | 9221cf8ec8 | |
magical | 2e195e36a6 | |
magical | 6a8856c170 | |
magical | d260db550d |
|
@ -0,0 +1,64 @@
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"io"
|
||||||
|
)
|
||||||
|
|
||||||
|
type HTTPCache struct {
|
||||||
|
}
|
||||||
|
|
||||||
|
// Based on http.MaxBytesReader
|
||||||
|
|
||||||
|
// MaxBytesReader is similar to io.LimitReader but is intended for
|
||||||
|
// limiting the size of incoming request bodies. In contrast to
|
||||||
|
// io.LimitReader, MaxBytesReader's result is a ReadCloser, returns a
|
||||||
|
// non-EOF error for a Read beyond the limit, and closes the
|
||||||
|
// underlying reader when its Close method is called.
|
||||||
|
//
|
||||||
|
// MaxBytesReader prevents clients from accidentally or maliciously
|
||||||
|
// sending a large request and wasting server resources.
|
||||||
|
func MaxBytesReader(r io.ReadCloser, n int64) io.ReadCloser {
|
||||||
|
if n < 0 { // Treat negative limits as equivalent to 0.
|
||||||
|
n = 0
|
||||||
|
}
|
||||||
|
return &maxBytesReader{r: r, n: n}
|
||||||
|
}
|
||||||
|
|
||||||
|
type maxBytesReader struct {
|
||||||
|
r io.ReadCloser // underlying reader
|
||||||
|
n int64 // max bytes remaining
|
||||||
|
err error // sticky error
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *maxBytesReader) Read(p []byte) (n int, err error) {
|
||||||
|
if l.err != nil {
|
||||||
|
return 0, l.err
|
||||||
|
}
|
||||||
|
if len(p) == 0 {
|
||||||
|
return 0, nil
|
||||||
|
}
|
||||||
|
// If they asked for a 32KB byte read but only 5 bytes are
|
||||||
|
// remaining, no need to read 32KB. 6 bytes will answer the
|
||||||
|
// question of the whether we hit the limit or go past it.
|
||||||
|
if int64(len(p)) > l.n+1 {
|
||||||
|
p = p[:l.n+1]
|
||||||
|
}
|
||||||
|
n, err = l.r.Read(p)
|
||||||
|
|
||||||
|
if int64(n) <= l.n {
|
||||||
|
l.n -= int64(n)
|
||||||
|
l.err = err
|
||||||
|
return n, err
|
||||||
|
}
|
||||||
|
|
||||||
|
n = int(l.n)
|
||||||
|
l.n = 0
|
||||||
|
|
||||||
|
l.err = errors.New("http: response body too large")
|
||||||
|
return n, l.err
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *maxBytesReader) Close() error {
|
||||||
|
return l.r.Close()
|
||||||
|
}
|
|
@ -0,0 +1,48 @@
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"net/http"
|
||||||
|
"net/http/httptest"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
var feedSourceTests = []struct {
|
||||||
|
filename string
|
||||||
|
title string
|
||||||
|
itemText string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
filename: "/xkcd.xml",
|
||||||
|
title: "xkcd.com",
|
||||||
|
itemText: "Occam",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFeedSource(t *testing.T) {
|
||||||
|
server := httptest.NewServer(http.FileServer(http.Dir("testdata")))
|
||||||
|
defer server.Close()
|
||||||
|
|
||||||
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
defer cancel()
|
||||||
|
for _, tt := range feedSourceTests {
|
||||||
|
src := NewFeed(server.URL + tt.filename)
|
||||||
|
src.update(ctx)
|
||||||
|
if err := src.Error; err != nil {
|
||||||
|
t.Errorf("%v: Error = %v, expected nil", tt.filename, err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if src.Title != tt.title {
|
||||||
|
t.Errorf("%v: Title = %v expected %v", tt.filename, src.Title, tt.title)
|
||||||
|
}
|
||||||
|
items := src.GetItems()
|
||||||
|
if len(items) == 0 {
|
||||||
|
t.Errorf("%v: GetItems() = %v expected len > 0", tt.filename, items)
|
||||||
|
} else {
|
||||||
|
x := items[0]
|
||||||
|
if x.Text != tt.itemText {
|
||||||
|
t.Errorf("%v: item 0 Text = %q expected %q", tt.filename, x.Text, tt.itemText)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
6
go.mod
6
go.mod
|
@ -2,4 +2,8 @@ module git.tilde.town/magical/feedget
|
||||||
|
|
||||||
go 1.14
|
go 1.14
|
||||||
|
|
||||||
require github.com/mmcdole/gofeed v1.1.3
|
require (
|
||||||
|
github.com/PuerkitoBio/goquery v1.5.1
|
||||||
|
github.com/mmcdole/gofeed v1.1.3
|
||||||
|
golang.org/x/net v0.0.0-20200301022130-244492dfa37a
|
||||||
|
)
|
||||||
|
|
|
@ -0,0 +1,40 @@
|
||||||
|
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
|
||||||
|
github.com/PuerkitoBio/goquery v1.5.1 h1:PSPBGne8NIUWw+/7vFBV+kG2J/5MOjbzc7154OaKCSE=
|
||||||
|
github.com/PuerkitoBio/goquery v1.5.1/go.mod h1:GsLWisAFVj4WgDibEWF4pvYnkVQBpKBKeU+7zCJoLcc=
|
||||||
|
github.com/andybalholm/cascadia v1.1.0 h1:BuuO6sSfQNFRu1LppgbD25Hr2vLYW25JvxHs5zzsLTo=
|
||||||
|
github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
|
||||||
|
github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU=
|
||||||
|
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||||
|
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||||
|
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||||
|
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
|
||||||
|
github.com/json-iterator/go v1.1.10 h1:Kz6Cvnvv2wGdaG/V8yMvfkmNiXq9Ya2KUv4rouJJr68=
|
||||||
|
github.com/json-iterator/go v1.1.10/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
|
||||||
|
github.com/mmcdole/gofeed v1.1.3 h1:pdrvMb18jMSLidGp8j0pLvc9IGziX4vbmvVqmLH6z8o=
|
||||||
|
github.com/mmcdole/gofeed v1.1.3/go.mod h1:QQO3maftbOu+hiVOGOZDRLymqGQCos4zxbA4j89gMrE=
|
||||||
|
github.com/mmcdole/goxpp v0.0.0-20181012175147-0068e33feabf h1:sWGE2v+hO0Nd4yFU/S/mDBM5plIU8v/Qhfz41hkDIAI=
|
||||||
|
github.com/mmcdole/goxpp v0.0.0-20181012175147-0068e33feabf/go.mod h1:pasqhqstspkosTneA62Nc+2p9SOBBYAPbnmRRWPQ0V8=
|
||||||
|
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421 h1:ZqeYNhU3OHLH3mGKHDcjJRFFRrJa6eAM5H+CtDdOsPc=
|
||||||
|
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
|
||||||
|
github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742 h1:Esafd1046DLDQ0W1YjYsBW+p8U2u7vzgW2SQVmlNazg=
|
||||||
|
github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
|
||||||
|
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||||
|
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||||
|
github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
|
||||||
|
github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc=
|
||||||
|
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||||
|
github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q=
|
||||||
|
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
|
||||||
|
github.com/urfave/cli v1.22.3/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0=
|
||||||
|
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
|
||||||
|
golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
||||||
|
golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||||
|
golang.org/x/net v0.0.0-20200301022130-244492dfa37a h1:GuSPYbZzB5/dcLNCwLQLsg3obCJtX9IJhpXkvY7kzk0=
|
||||||
|
golang.org/x/net v0.0.0-20200301022130-244492dfa37a/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||||
|
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||||
|
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
||||||
|
golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs=
|
||||||
|
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
|
||||||
|
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||||
|
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||||
|
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
134
main.go
134
main.go
|
@ -6,6 +6,7 @@ import (
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"log"
|
"log"
|
||||||
|
"net/http"
|
||||||
"sort"
|
"sort"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
@ -13,44 +14,54 @@ import (
|
||||||
"github.com/mmcdole/gofeed"
|
"github.com/mmcdole/gofeed"
|
||||||
)
|
)
|
||||||
|
|
||||||
var feedurls = []string{
|
const UserAgent = "feedget/0.1"
|
||||||
"https://tilde.team/~dozens/dreams/rss.xml",
|
|
||||||
//"https://xkcd.com/atom.xml",
|
|
||||||
"https://tilde.town/~magical/xkcd.xml",
|
|
||||||
}
|
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
var sources = make([]*FeedSource, len(feedurls)) // TODO: interface Source
|
var sources = []Source{
|
||||||
for i, u := range feedurls {
|
NewFeed("https://tilde.team/~dozens/dreams/rss.xml"),
|
||||||
sources[i] = NewFeed(u)
|
NewFeed("https://tilde.town/~magical/xkcd.xml"), // "https://xkcd.com/atom.xml",
|
||||||
|
//NewFeed("https://tilde.town/~magical/404.xml"),
|
||||||
|
NewMastoSource("https://tilde.town/~magical/masto_test.html"),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
var wg sync.WaitGroup
|
var wg sync.WaitGroup
|
||||||
wg.Add(len(sources))
|
wg.Add(len(sources))
|
||||||
for i := range sources {
|
for i := range sources {
|
||||||
src := sources[i]
|
src := sources[i]
|
||||||
go func() {
|
go func() {
|
||||||
src.update()
|
src.update(ctx)
|
||||||
wg.Done()
|
wg.Done()
|
||||||
}()
|
}()
|
||||||
}
|
}
|
||||||
wg.Wait()
|
wg.Wait()
|
||||||
|
|
||||||
for _, src := range sources {
|
for _, src := range sources {
|
||||||
fmt.Println(src.Title, src.Error)
|
fmt.Println(src.GetTitle(), src.GetError())
|
||||||
|
for i, x := range src.GetItems() {
|
||||||
|
if i > 5 {
|
||||||
|
//break
|
||||||
|
}
|
||||||
|
fmt.Println("\t", x.Date.Format("2006 Jan _2 15:04"), x.Text)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
//var feeds []*gofeed.Feed
|
|
||||||
//var errors []error
|
|
||||||
|
|
||||||
//for _, url := range feedUrls {
|
|
||||||
//}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
type Source interface {
|
type Source interface {
|
||||||
Title() string
|
GetTitle() string
|
||||||
Link() string
|
//GetLink() string
|
||||||
Error() error
|
GetError() error
|
||||||
Update(context.Context)
|
GetItems() []Item
|
||||||
|
update(context.Context)
|
||||||
|
}
|
||||||
|
|
||||||
|
type Item struct {
|
||||||
|
Date time.Time
|
||||||
|
Link string
|
||||||
|
Text string
|
||||||
}
|
}
|
||||||
|
|
||||||
// want to keep track of:
|
// want to keep track of:
|
||||||
|
@ -71,31 +82,108 @@ type FeedSource struct {
|
||||||
LastFetch time.Time
|
LastFetch time.Time
|
||||||
Error error
|
Error error
|
||||||
|
|
||||||
|
LastStatusCode int
|
||||||
|
LastStatus string
|
||||||
|
|
||||||
mu sync.Mutex
|
mu sync.Mutex
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var _ Source = &FeedSource{}
|
||||||
|
|
||||||
func NewFeed(url string) *FeedSource {
|
func NewFeed(url string) *FeedSource {
|
||||||
return &FeedSource{
|
return &FeedSource{
|
||||||
URL: url,
|
URL: url,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (src *FeedSource) update() {
|
func (src *FeedSource) update(ctx context.Context) {
|
||||||
src.mu.Lock()
|
src.mu.Lock()
|
||||||
defer src.mu.Unlock()
|
defer src.mu.Unlock()
|
||||||
fp := gofeed.NewParser()
|
fp := gofeed.NewParser()
|
||||||
feed, err := fp.ParseURL(src.URL)
|
|
||||||
|
req, err := http.NewRequestWithContext(ctx, "GET", src.URL, nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
err := fmt.Errorf("error parsing %q: %v", src.URL, err)
|
src.Error = fmt.Errorf("error fetching %q: %w", src.URL, err)
|
||||||
|
log.Println(src.Error)
|
||||||
|
return // return err?
|
||||||
|
}
|
||||||
|
req.Header.Set("User-Agent", UserAgent)
|
||||||
|
// TODO: If-Modified-Since, Etag
|
||||||
|
|
||||||
|
resp, err := http.DefaultClient.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
err := fmt.Errorf("error fetching %q: %w", src.URL, err)
|
||||||
log.Println(err)
|
log.Println(err)
|
||||||
src.Error = err
|
src.Error = err
|
||||||
return // return err?
|
return // return err?
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if resp != nil && resp.Body != nil {
|
||||||
|
defer func() {
|
||||||
|
err := resp.Body.Close()
|
||||||
|
if err != nil {
|
||||||
|
log.Printf("error closing response body for %q: %v", src.URL, err)
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
|
||||||
|
src.LastStatusCode = resp.StatusCode
|
||||||
|
src.LastStatus = resp.Status
|
||||||
|
if resp.StatusCode != 200 {
|
||||||
|
src.Error = fmt.Errorf("error fetching %q: status %s", src.URL, resp.Status)
|
||||||
|
log.Println(src.Error)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: cache body
|
||||||
|
|
||||||
|
feed, err := fp.Parse(resp.Body)
|
||||||
|
if err != nil {
|
||||||
|
err := fmt.Errorf("error parsing %q: %w", src.URL, err)
|
||||||
|
log.Println(err)
|
||||||
|
src.Error = err
|
||||||
|
return // return err?
|
||||||
|
}
|
||||||
|
|
||||||
items := feed.Items
|
items := feed.Items
|
||||||
sort.Slice(items, func(i, j int) bool {
|
sort.SliceStable(items, func(i, j int) bool {
|
||||||
return items[i].Updated >= items[j].Updated
|
var d1, d2 time.Time
|
||||||
|
if items[i].PublishedParsed != nil {
|
||||||
|
d1 = *items[i].PublishedParsed
|
||||||
|
} else if items[i].UpdatedParsed != nil {
|
||||||
|
d1 = *items[i].UpdatedParsed
|
||||||
|
}
|
||||||
|
|
||||||
|
if items[j].PublishedParsed != nil {
|
||||||
|
d2 = *items[j].PublishedParsed
|
||||||
|
} else if items[j].UpdatedParsed != nil {
|
||||||
|
d2 = *items[j].UpdatedParsed
|
||||||
|
}
|
||||||
|
return !d1.Before(d2)
|
||||||
})
|
})
|
||||||
src.Title = feed.Title
|
src.Title = feed.Title
|
||||||
src.Items = items
|
src.Items = items
|
||||||
src.LastFetch = time.Now()
|
src.LastFetch = time.Now()
|
||||||
|
src.Error = nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (src *FeedSource) GetTitle() string { return src.Title }
|
||||||
|
func (src *FeedSource) GetError() error { return src.Error }
|
||||||
|
|
||||||
|
func (src *FeedSource) GetItems() (items []Item) {
|
||||||
|
for _, x := range src.Items {
|
||||||
|
d := time.Time{}
|
||||||
|
if x.PublishedParsed != nil {
|
||||||
|
d = *x.PublishedParsed
|
||||||
|
}
|
||||||
|
if x.UpdatedParsed != nil {
|
||||||
|
d = *x.UpdatedParsed
|
||||||
|
}
|
||||||
|
items = append(items, Item{
|
||||||
|
Date: d,
|
||||||
|
Link: x.Link,
|
||||||
|
Text: x.Title,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
return
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,234 @@
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"log"
|
||||||
|
"net/http"
|
||||||
|
"sort"
|
||||||
|
"strings"
|
||||||
|
"sync"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/PuerkitoBio/goquery"
|
||||||
|
"golang.org/x/net/html"
|
||||||
|
"golang.org/x/net/html/atom"
|
||||||
|
)
|
||||||
|
|
||||||
|
const maxResponseSize = 1e6 // 1MB
|
||||||
|
|
||||||
|
// https://docs.joinmastodon.org/spec/microformats/
|
||||||
|
|
||||||
|
// Root elements (h-*)
|
||||||
|
//
|
||||||
|
// h-feed
|
||||||
|
// Represents a stream of entries. Attached to a profile's toots. Also
|
||||||
|
// attached to the parent thread within detailed status views.
|
||||||
|
//
|
||||||
|
// h-entry
|
||||||
|
// Represents episodic or date stamped online content. Attached to a status.
|
||||||
|
//
|
||||||
|
// URL properties (u-*)
|
||||||
|
//
|
||||||
|
// u-photo
|
||||||
|
// Within h-card, represents the profile picture. Attached to the avatar image.
|
||||||
|
//
|
||||||
|
// u-uid
|
||||||
|
// Within h-entry or h-cite, represents a universally unique identifier.
|
||||||
|
// Attached to timestamp link.
|
||||||
|
//
|
||||||
|
// u-url
|
||||||
|
// Within h-entry or h-cite, represents the status permalink. Attached to
|
||||||
|
// timestamp link. Within h-card, represents the profile permalink.
|
||||||
|
// Attached to display name link.
|
||||||
|
//
|
||||||
|
// Datetime properties (dt-*)
|
||||||
|
//
|
||||||
|
// dt-published
|
||||||
|
// Within h-entry or h-cite, represents the date and time at which the
|
||||||
|
// status was published. Attached to data element with value attribute.
|
||||||
|
//
|
||||||
|
// Element tree (e-*)
|
||||||
|
//
|
||||||
|
// e-content
|
||||||
|
// Within h-entry or h-cite, represents the content of the status. Attached to status content.
|
||||||
|
//
|
||||||
|
|
||||||
|
// I learned after writing this that mastodon also has RSS feeds
|
||||||
|
// e.g. https://tiny.tilde.website/@magical.rss
|
||||||
|
|
||||||
|
type MastoSource struct {
|
||||||
|
Items []*MastoItem
|
||||||
|
Title string
|
||||||
|
URL string
|
||||||
|
LastFetch time.Time
|
||||||
|
Error error
|
||||||
|
|
||||||
|
LastStatusCode int
|
||||||
|
LastStatus string
|
||||||
|
|
||||||
|
mu sync.Mutex
|
||||||
|
}
|
||||||
|
|
||||||
|
var _ Source = &MastoSource{}
|
||||||
|
|
||||||
|
type MastoFeed struct {
|
||||||
|
Title string
|
||||||
|
Items []*MastoItem
|
||||||
|
}
|
||||||
|
|
||||||
|
type MastoItem struct {
|
||||||
|
Title string `json:"title,omitempty"`
|
||||||
|
Content string `json:"content,omitempty"`
|
||||||
|
Link string `json:"link,omitempty"`
|
||||||
|
PublishedString string `json:"published,omitempty"`
|
||||||
|
Author string `json:"author,omitempty"`
|
||||||
|
IsBoost bool `json:"is_boost,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewMastoSource(url string) *MastoSource {
|
||||||
|
return &MastoSource{
|
||||||
|
URL: url,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (src *MastoSource) update(ctx context.Context) {
|
||||||
|
src.mu.Lock()
|
||||||
|
defer src.mu.Unlock()
|
||||||
|
|
||||||
|
req, err := http.NewRequestWithContext(ctx, "GET", src.URL, nil)
|
||||||
|
if err != nil {
|
||||||
|
src.Error = fmt.Errorf("error fetching %q: %w", src.URL, err)
|
||||||
|
log.Println(src.Error)
|
||||||
|
return // return err?
|
||||||
|
}
|
||||||
|
req.Header.Set("User-Agent", UserAgent)
|
||||||
|
// TODO: If-Modified-Since, Etag
|
||||||
|
|
||||||
|
resp, err := http.DefaultClient.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
err := fmt.Errorf("error fetching %q: %w", src.URL, err)
|
||||||
|
log.Println(err)
|
||||||
|
src.Error = err
|
||||||
|
return // return err?
|
||||||
|
}
|
||||||
|
|
||||||
|
if resp != nil && resp.Body != nil {
|
||||||
|
defer func() {
|
||||||
|
err := resp.Body.Close()
|
||||||
|
if err != nil {
|
||||||
|
log.Printf("error closing response body for %q: %v", src.URL, err)
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
|
||||||
|
src.LastStatusCode = resp.StatusCode
|
||||||
|
src.LastStatus = resp.Status
|
||||||
|
if resp.StatusCode != 200 {
|
||||||
|
src.Error = fmt.Errorf("error fetching %q: status %s", src.URL, resp.Status)
|
||||||
|
log.Println(src.Error)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: cache body
|
||||||
|
body := MaxBytesReader(resp.Body, maxResponseSize)
|
||||||
|
|
||||||
|
feed, err := parseMicroformats(body)
|
||||||
|
if err != nil {
|
||||||
|
err := fmt.Errorf("error parsing %q: %w", src.URL, err)
|
||||||
|
log.Println(err)
|
||||||
|
src.Error = err
|
||||||
|
return // return err?
|
||||||
|
}
|
||||||
|
|
||||||
|
items := feed.Items
|
||||||
|
sort.Slice(items, func(i, j int) bool {
|
||||||
|
return items[i].PublishedString >= items[j].PublishedString
|
||||||
|
})
|
||||||
|
src.Title = feed.Title
|
||||||
|
src.Items = items
|
||||||
|
src.LastFetch = time.Now()
|
||||||
|
src.Error = nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseMicroformats(r io.Reader) (*MastoFeed, error) {
|
||||||
|
doc, err := goquery.NewDocumentFromReader(r)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
feed := new(MastoFeed)
|
||||||
|
if doc.Find(".h-feed").Length() == 0 {
|
||||||
|
return nil, fmt.Errorf("no feed content found")
|
||||||
|
}
|
||||||
|
feed.Title = doc.Find(".h-feed > .p-name").First().AttrOr("value", "")
|
||||||
|
doc.Find(".h-feed").Find(".h-entry, .h-cite").Each(func(i int, elem *goquery.Selection) {
|
||||||
|
cw := strings.TrimSpace(text(elem.Find(".p-summary").First()))
|
||||||
|
// TODO: move this logic to GetItems
|
||||||
|
if cw != "" {
|
||||||
|
cw = "[" + cw + "] "
|
||||||
|
}
|
||||||
|
feed.Items = append(feed.Items, &MastoItem{
|
||||||
|
Title: "",
|
||||||
|
Content: cw + text(elem.Find(".e-content").First()),
|
||||||
|
Link: elem.Find("a.u-url.u-uid").AttrOr("href", ""),
|
||||||
|
Author: text(elem.Find(".p-author .p-name").First()),
|
||||||
|
PublishedString: elem.Find("data.dt-published").AttrOr("value", ""),
|
||||||
|
IsBoost: elem.HasClass("h-cite"),
|
||||||
|
})
|
||||||
|
})
|
||||||
|
return feed, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Text gets the combined text contents of each element in the set of matched
|
||||||
|
// elements, including their descendants.
|
||||||
|
func text(s *goquery.Selection) string {
|
||||||
|
var buf bytes.Buffer
|
||||||
|
|
||||||
|
// Slightly optimized vs calling Each: no single selection object created
|
||||||
|
var f func(*html.Node)
|
||||||
|
f = func(n *html.Node) {
|
||||||
|
if n.Type == html.TextNode {
|
||||||
|
// Keep newlines and spaces, like jQuery
|
||||||
|
buf.WriteString(n.Data)
|
||||||
|
} else if n.Type == html.ElementNode && n.DataAtom == atom.Br {
|
||||||
|
//buf.WriteString("\n")
|
||||||
|
buf.WriteString(" ")
|
||||||
|
} else if n.Type == html.ElementNode && n.DataAtom == atom.P && n.PrevSibling != nil {
|
||||||
|
//buf.WriteString("\n\n")
|
||||||
|
buf.WriteString(" ")
|
||||||
|
} else if n.Type == html.ElementNode && (n.DataAtom == atom.Script || n.DataAtom == atom.Style || n.DataAtom == atom.Template) {
|
||||||
|
// nothing
|
||||||
|
}
|
||||||
|
if n.FirstChild != nil {
|
||||||
|
for c := n.FirstChild; c != nil; c = c.NextSibling {
|
||||||
|
f(c)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for _, n := range s.Nodes {
|
||||||
|
f(n)
|
||||||
|
}
|
||||||
|
|
||||||
|
return buf.String()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (src *MastoSource) GetTitle() string { return src.Title }
|
||||||
|
func (src *MastoSource) GetError() error { return src.Error }
|
||||||
|
|
||||||
|
func (src *MastoSource) GetItems() (items []Item) {
|
||||||
|
for _, x := range src.Items {
|
||||||
|
text := x.Content
|
||||||
|
if x.IsBoost {
|
||||||
|
text = "RT @" + x.Author + ": " + text
|
||||||
|
}
|
||||||
|
d, _ := time.Parse(time.RFC3339, x.PublishedString)
|
||||||
|
items = append(items, Item{
|
||||||
|
Date: d,
|
||||||
|
Link: x.Link,
|
||||||
|
Text: text,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
|
@ -0,0 +1,48 @@
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"net/http"
|
||||||
|
"net/http/httptest"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
var mastoSourceTests = []struct {
|
||||||
|
filename string
|
||||||
|
title string
|
||||||
|
itemText string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
filename: "/masto.html",
|
||||||
|
title: "magical on tiny.tilde.website",
|
||||||
|
itemText: "here's hoping it never melts",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestMastoSource(t *testing.T) {
|
||||||
|
server := httptest.NewServer(http.FileServer(http.Dir("testdata")))
|
||||||
|
defer server.Close()
|
||||||
|
|
||||||
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
defer cancel()
|
||||||
|
for _, tt := range mastoSourceTests {
|
||||||
|
src := NewMastoSource(server.URL + tt.filename)
|
||||||
|
src.update(ctx)
|
||||||
|
if err := src.Error; err != nil {
|
||||||
|
t.Errorf("%v: Error = %v, expected nil", tt.filename, err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if src.Title != tt.title {
|
||||||
|
t.Errorf("%v: Title = %v expected %v", tt.filename, src.Title, tt.title)
|
||||||
|
}
|
||||||
|
items := src.GetItems()
|
||||||
|
if len(items) == 0 {
|
||||||
|
t.Errorf("%v: GetItems() = %v expected len > 0", tt.filename, items)
|
||||||
|
} else {
|
||||||
|
x := items[0]
|
||||||
|
if x.Text != tt.itemText {
|
||||||
|
t.Errorf("%v: item 0 Text = %q expected %q", tt.filename, x.Text, tt.itemText)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
File diff suppressed because one or more lines are too long
|
@ -0,0 +1,2 @@
|
||||||
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="en"><title>xkcd.com</title><link href="https://xkcd.com/" rel="alternate"></link><id>https://xkcd.com/</id><updated>2021-11-12T00:00:00Z</updated><entry><title>Occam</title><link href="https://xkcd.com/2541/" rel="alternate"></link><updated>2021-11-12T00:00:00Z</updated><id>https://xkcd.com/2541/</id><summary type="html"><img src="https://imgs.xkcd.com/comics/occam.png" title="Oh no, Murphy just picked up the razor." alt="Oh no, Murphy just picked up the razor." /></summary></entry><entry><title>TTSLTSWBD</title><link href="https://xkcd.com/2540/" rel="alternate"></link><updated>2021-11-10T00:00:00Z</updated><id>https://xkcd.com/2540/</id><summary type="html"><img src="https://imgs.xkcd.com/comics/ttsltswbd.png" title="Tomorrow's sessions will be entirely devoted to sewing machine rotary hooks." alt="Tomorrow's sessions will be entirely devoted to sewing machine rotary hooks." /></summary></entry><entry><title>Flinch</title><link href="https://xkcd.com/2539/" rel="alternate"></link><updated>2021-11-08T00:00:00Z</updated><id>https://xkcd.com/2539/</id><summary type="html"><img src="https://imgs.xkcd.com/comics/flinch.png" title="Premed: &quot;Does this count for a physics credit? Can we shorten the string so I can get it done faster? And can we do one where it hits me in the face? I gotta do a thing for first aid training right after.&quot;" alt="Premed: &quot;Does this count for a physics credit? Can we shorten the string so I can get it done faster? And can we do one where it hits me in the face? I gotta do a thing for first aid training right after.&quot;" /></summary></entry><entry><title>Snack</title><link href="https://xkcd.com/2538/" rel="alternate"></link><updated>2021-11-05T00:00:00Z</updated><id>https://xkcd.com/2538/</id><summary type="html"><img src="https://imgs.xkcd.com/comics/snack.png" title="Although grad students, suddenly reminded that food exists, tend to just grab and devour both without further discussion." alt="Although grad students, suddenly reminded that food exists, tend to just grab and devour both without further discussion." /></summary></entry></feed>
|
Loading…
Reference in New Issue