Compare commits

..

17 Commits

Author SHA1 Message Date
magical ae160942bb welp 2022-05-01 02:44:54 +00:00
magical 5b76339e40 add mastodon test 2022-01-01 05:16:08 +00:00
magical ddb93ee852 add a test for FeedSource 2022-01-01 01:00:34 +00:00
magical ad7a1edaf1 fix FeedSource item sorting
we were sorting by strings instead of parsed dates and it was getting
confused by RFC1123 dates that looked like

    Thu, 02 Dec 2021 00:00:00 -0700
    Mon, 20 Sep 2021 00:00:00 -0600
    Mon, 06 Sep 2021 00:00:00 -0600
    Thu, 02 Sep 2021 00:00:00 -0600
    Sun, 22 Aug 2021 00:00:00 -0600

which of course sort like

    Thu, 02 Sep 2021 00:00:00 -0600
    Thu, 02 Dec 2021 00:00:00 -0700
    Sun, 22 Aug 2021 00:00:00 -0600
    Mon, 20 Sep 2021 00:00:00 -0600
    Mon, 06 Sep 2021 00:00:00 -0600
2022-01-01 00:55:54 +00:00
magical 2f3ae0c09b use the Source, interface 2022-01-01 00:52:03 +00:00
magical 4017f94b56 move MaxBytesReader to another file 2022-01-01 00:51:10 +00:00
magical a606a15903 sprinkle some First()s 2021-12-31 22:29:06 +00:00
magical df2ebd5b6a show boosts 2021-12-31 22:28:06 +00:00
magical 2182e4e739 ignore script tags and such when extracting text 2021-12-31 22:26:28 +00:00
magical ab1da46096 use NewRequestWithContext 2021-12-31 22:25:30 +00:00
magical ac5bfa7ad7 limit response bodies 2021-12-31 22:24:58 +00:00
magical 808cc164f1 use a real context w/ deadline 2021-12-31 09:03:45 +00:00
magical c305e96334 huh not sure why go.sum hadn't been added 2021-12-31 08:48:36 +00:00
magical 9221cf8ec8 better mastodon text formatting 2021-12-31 08:48:18 +00:00
magical 2e195e36a6 stub out a mastodon feed source 2021-12-31 08:46:49 +00:00
magical 6a8856c170 fetch the url ourself
makes way for caching the response

also clear src.Error on success
2021-12-31 06:22:08 +00:00
magical d260db550d remove feedurls variable 2021-12-31 06:20:09 +00:00
9 changed files with 1643 additions and 24 deletions

64
cache.go 100644
View File

@ -0,0 +1,64 @@
package main
import (
"errors"
"io"
)
type HTTPCache struct {
}
// Based on http.MaxBytesReader
// MaxBytesReader is similar to io.LimitReader but is intended for
// limiting the size of incoming request bodies. In contrast to
// io.LimitReader, MaxBytesReader's result is a ReadCloser, returns a
// non-EOF error for a Read beyond the limit, and closes the
// underlying reader when its Close method is called.
//
// MaxBytesReader prevents clients from accidentally or maliciously
// sending a large request and wasting server resources.
func MaxBytesReader(r io.ReadCloser, n int64) io.ReadCloser {
if n < 0 { // Treat negative limits as equivalent to 0.
n = 0
}
return &maxBytesReader{r: r, n: n}
}
type maxBytesReader struct {
r io.ReadCloser // underlying reader
n int64 // max bytes remaining
err error // sticky error
}
func (l *maxBytesReader) Read(p []byte) (n int, err error) {
if l.err != nil {
return 0, l.err
}
if len(p) == 0 {
return 0, nil
}
// If they asked for a 32KB byte read but only 5 bytes are
// remaining, no need to read 32KB. 6 bytes will answer the
// question of the whether we hit the limit or go past it.
if int64(len(p)) > l.n+1 {
p = p[:l.n+1]
}
n, err = l.r.Read(p)
if int64(n) <= l.n {
l.n -= int64(n)
l.err = err
return n, err
}
n = int(l.n)
l.n = 0
l.err = errors.New("http: response body too large")
return n, l.err
}
func (l *maxBytesReader) Close() error {
return l.r.Close()
}

48
feed_test.go 100644
View File

@ -0,0 +1,48 @@
package main
import (
"context"
"net/http"
"net/http/httptest"
"testing"
)
var feedSourceTests = []struct {
filename string
title string
itemText string
}{
{
filename: "/xkcd.xml",
title: "xkcd.com",
itemText: "Occam",
},
}
func TestFeedSource(t *testing.T) {
server := httptest.NewServer(http.FileServer(http.Dir("testdata")))
defer server.Close()
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
for _, tt := range feedSourceTests {
src := NewFeed(server.URL + tt.filename)
src.update(ctx)
if err := src.Error; err != nil {
t.Errorf("%v: Error = %v, expected nil", tt.filename, err)
continue
}
if src.Title != tt.title {
t.Errorf("%v: Title = %v expected %v", tt.filename, src.Title, tt.title)
}
items := src.GetItems()
if len(items) == 0 {
t.Errorf("%v: GetItems() = %v expected len > 0", tt.filename, items)
} else {
x := items[0]
if x.Text != tt.itemText {
t.Errorf("%v: item 0 Text = %q expected %q", tt.filename, x.Text, tt.itemText)
}
}
}
}

6
go.mod
View File

@ -2,4 +2,8 @@ module git.tilde.town/magical/feedget
go 1.14
require github.com/mmcdole/gofeed v1.1.3
require (
github.com/PuerkitoBio/goquery v1.5.1
github.com/mmcdole/gofeed v1.1.3
golang.org/x/net v0.0.0-20200301022130-244492dfa37a
)

40
go.sum 100644
View File

@ -0,0 +1,40 @@
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/PuerkitoBio/goquery v1.5.1 h1:PSPBGne8NIUWw+/7vFBV+kG2J/5MOjbzc7154OaKCSE=
github.com/PuerkitoBio/goquery v1.5.1/go.mod h1:GsLWisAFVj4WgDibEWF4pvYnkVQBpKBKeU+7zCJoLcc=
github.com/andybalholm/cascadia v1.1.0 h1:BuuO6sSfQNFRu1LppgbD25Hr2vLYW25JvxHs5zzsLTo=
github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
github.com/json-iterator/go v1.1.10 h1:Kz6Cvnvv2wGdaG/V8yMvfkmNiXq9Ya2KUv4rouJJr68=
github.com/json-iterator/go v1.1.10/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
github.com/mmcdole/gofeed v1.1.3 h1:pdrvMb18jMSLidGp8j0pLvc9IGziX4vbmvVqmLH6z8o=
github.com/mmcdole/gofeed v1.1.3/go.mod h1:QQO3maftbOu+hiVOGOZDRLymqGQCos4zxbA4j89gMrE=
github.com/mmcdole/goxpp v0.0.0-20181012175147-0068e33feabf h1:sWGE2v+hO0Nd4yFU/S/mDBM5plIU8v/Qhfz41hkDIAI=
github.com/mmcdole/goxpp v0.0.0-20181012175147-0068e33feabf/go.mod h1:pasqhqstspkosTneA62Nc+2p9SOBBYAPbnmRRWPQ0V8=
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421 h1:ZqeYNhU3OHLH3mGKHDcjJRFFRrJa6eAM5H+CtDdOsPc=
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742 h1:Esafd1046DLDQ0W1YjYsBW+p8U2u7vzgW2SQVmlNazg=
github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/urfave/cli v1.22.3/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200301022130-244492dfa37a h1:GuSPYbZzB5/dcLNCwLQLsg3obCJtX9IJhpXkvY7kzk0=
golang.org/x/net v0.0.0-20200301022130-244492dfa37a/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs=
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=

134
main.go
View File

@ -6,6 +6,7 @@ import (
"context"
"fmt"
"log"
"net/http"
"sort"
"sync"
"time"
@ -13,44 +14,54 @@ import (
"github.com/mmcdole/gofeed"
)
var feedurls = []string{
"https://tilde.team/~dozens/dreams/rss.xml",
//"https://xkcd.com/atom.xml",
"https://tilde.town/~magical/xkcd.xml",
}
const UserAgent = "feedget/0.1"
func main() {
var sources = make([]*FeedSource, len(feedurls)) // TODO: interface Source
for i, u := range feedurls {
sources[i] = NewFeed(u)
var sources = []Source{
NewFeed("https://tilde.team/~dozens/dreams/rss.xml"),
NewFeed("https://tilde.town/~magical/xkcd.xml"), // "https://xkcd.com/atom.xml",
//NewFeed("https://tilde.town/~magical/404.xml"),
NewMastoSource("https://tilde.town/~magical/masto_test.html"),
}
ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second)
defer cancel()
var wg sync.WaitGroup
wg.Add(len(sources))
for i := range sources {
src := sources[i]
go func() {
src.update()
src.update(ctx)
wg.Done()
}()
}
wg.Wait()
for _, src := range sources {
fmt.Println(src.Title, src.Error)
fmt.Println(src.GetTitle(), src.GetError())
for i, x := range src.GetItems() {
if i > 5 {
//break
}
fmt.Println("\t", x.Date.Format("2006 Jan _2 15:04"), x.Text)
}
}
//var feeds []*gofeed.Feed
//var errors []error
//for _, url := range feedUrls {
//}
}
type Source interface {
Title() string
Link() string
Error() error
Update(context.Context)
GetTitle() string
//GetLink() string
GetError() error
GetItems() []Item
update(context.Context)
}
type Item struct {
Date time.Time
Link string
Text string
}
// want to keep track of:
@ -71,31 +82,108 @@ type FeedSource struct {
LastFetch time.Time
Error error
LastStatusCode int
LastStatus string
mu sync.Mutex
}
var _ Source = &FeedSource{}
func NewFeed(url string) *FeedSource {
return &FeedSource{
URL: url,
}
}
func (src *FeedSource) update() {
func (src *FeedSource) update(ctx context.Context) {
src.mu.Lock()
defer src.mu.Unlock()
fp := gofeed.NewParser()
feed, err := fp.ParseURL(src.URL)
req, err := http.NewRequestWithContext(ctx, "GET", src.URL, nil)
if err != nil {
err := fmt.Errorf("error parsing %q: %v", src.URL, err)
src.Error = fmt.Errorf("error fetching %q: %w", src.URL, err)
log.Println(src.Error)
return // return err?
}
req.Header.Set("User-Agent", UserAgent)
// TODO: If-Modified-Since, Etag
resp, err := http.DefaultClient.Do(req)
if err != nil {
err := fmt.Errorf("error fetching %q: %w", src.URL, err)
log.Println(err)
src.Error = err
return // return err?
}
if resp != nil && resp.Body != nil {
defer func() {
err := resp.Body.Close()
if err != nil {
log.Printf("error closing response body for %q: %v", src.URL, err)
}
}()
}
src.LastStatusCode = resp.StatusCode
src.LastStatus = resp.Status
if resp.StatusCode != 200 {
src.Error = fmt.Errorf("error fetching %q: status %s", src.URL, resp.Status)
log.Println(src.Error)
return
}
// TODO: cache body
feed, err := fp.Parse(resp.Body)
if err != nil {
err := fmt.Errorf("error parsing %q: %w", src.URL, err)
log.Println(err)
src.Error = err
return // return err?
}
items := feed.Items
sort.Slice(items, func(i, j int) bool {
return items[i].Updated >= items[j].Updated
sort.SliceStable(items, func(i, j int) bool {
var d1, d2 time.Time
if items[i].PublishedParsed != nil {
d1 = *items[i].PublishedParsed
} else if items[i].UpdatedParsed != nil {
d1 = *items[i].UpdatedParsed
}
if items[j].PublishedParsed != nil {
d2 = *items[j].PublishedParsed
} else if items[j].UpdatedParsed != nil {
d2 = *items[j].UpdatedParsed
}
return !d1.Before(d2)
})
src.Title = feed.Title
src.Items = items
src.LastFetch = time.Now()
src.Error = nil
}
func (src *FeedSource) GetTitle() string { return src.Title }
func (src *FeedSource) GetError() error { return src.Error }
func (src *FeedSource) GetItems() (items []Item) {
for _, x := range src.Items {
d := time.Time{}
if x.PublishedParsed != nil {
d = *x.PublishedParsed
}
if x.UpdatedParsed != nil {
d = *x.UpdatedParsed
}
items = append(items, Item{
Date: d,
Link: x.Link,
Text: x.Title,
})
}
return
}

234
mastodon.go 100644
View File

@ -0,0 +1,234 @@
package main
import (
"bytes"
"context"
"fmt"
"io"
"log"
"net/http"
"sort"
"strings"
"sync"
"time"
"github.com/PuerkitoBio/goquery"
"golang.org/x/net/html"
"golang.org/x/net/html/atom"
)
const maxResponseSize = 1e6 // 1MB
// https://docs.joinmastodon.org/spec/microformats/
// Root elements (h-*)
//
// h-feed
// Represents a stream of entries. Attached to a profile's toots. Also
// attached to the parent thread within detailed status views.
//
// h-entry
// Represents episodic or date stamped online content. Attached to a status.
//
// URL properties (u-*)
//
// u-photo
// Within h-card, represents the profile picture. Attached to the avatar image.
//
// u-uid
// Within h-entry or h-cite, represents a universally unique identifier.
// Attached to timestamp link.
//
// u-url
// Within h-entry or h-cite, represents the status permalink. Attached to
// timestamp link. Within h-card, represents the profile permalink.
// Attached to display name link.
//
// Datetime properties (dt-*)
//
// dt-published
// Within h-entry or h-cite, represents the date and time at which the
// status was published. Attached to data element with value attribute.
//
// Element tree (e-*)
//
// e-content
// Within h-entry or h-cite, represents the content of the status. Attached to status content.
//
// I learned after writing this that mastodon also has RSS feeds
// e.g. https://tiny.tilde.website/@magical.rss
type MastoSource struct {
Items []*MastoItem
Title string
URL string
LastFetch time.Time
Error error
LastStatusCode int
LastStatus string
mu sync.Mutex
}
var _ Source = &MastoSource{}
type MastoFeed struct {
Title string
Items []*MastoItem
}
type MastoItem struct {
Title string `json:"title,omitempty"`
Content string `json:"content,omitempty"`
Link string `json:"link,omitempty"`
PublishedString string `json:"published,omitempty"`
Author string `json:"author,omitempty"`
IsBoost bool `json:"is_boost,omitempty"`
}
func NewMastoSource(url string) *MastoSource {
return &MastoSource{
URL: url,
}
}
func (src *MastoSource) update(ctx context.Context) {
src.mu.Lock()
defer src.mu.Unlock()
req, err := http.NewRequestWithContext(ctx, "GET", src.URL, nil)
if err != nil {
src.Error = fmt.Errorf("error fetching %q: %w", src.URL, err)
log.Println(src.Error)
return // return err?
}
req.Header.Set("User-Agent", UserAgent)
// TODO: If-Modified-Since, Etag
resp, err := http.DefaultClient.Do(req)
if err != nil {
err := fmt.Errorf("error fetching %q: %w", src.URL, err)
log.Println(err)
src.Error = err
return // return err?
}
if resp != nil && resp.Body != nil {
defer func() {
err := resp.Body.Close()
if err != nil {
log.Printf("error closing response body for %q: %v", src.URL, err)
}
}()
}
src.LastStatusCode = resp.StatusCode
src.LastStatus = resp.Status
if resp.StatusCode != 200 {
src.Error = fmt.Errorf("error fetching %q: status %s", src.URL, resp.Status)
log.Println(src.Error)
return
}
// TODO: cache body
body := MaxBytesReader(resp.Body, maxResponseSize)
feed, err := parseMicroformats(body)
if err != nil {
err := fmt.Errorf("error parsing %q: %w", src.URL, err)
log.Println(err)
src.Error = err
return // return err?
}
items := feed.Items
sort.Slice(items, func(i, j int) bool {
return items[i].PublishedString >= items[j].PublishedString
})
src.Title = feed.Title
src.Items = items
src.LastFetch = time.Now()
src.Error = nil
}
func parseMicroformats(r io.Reader) (*MastoFeed, error) {
doc, err := goquery.NewDocumentFromReader(r)
if err != nil {
return nil, err
}
feed := new(MastoFeed)
if doc.Find(".h-feed").Length() == 0 {
return nil, fmt.Errorf("no feed content found")
}
feed.Title = doc.Find(".h-feed > .p-name").First().AttrOr("value", "")
doc.Find(".h-feed").Find(".h-entry, .h-cite").Each(func(i int, elem *goquery.Selection) {
cw := strings.TrimSpace(text(elem.Find(".p-summary").First()))
// TODO: move this logic to GetItems
if cw != "" {
cw = "[" + cw + "] "
}
feed.Items = append(feed.Items, &MastoItem{
Title: "",
Content: cw + text(elem.Find(".e-content").First()),
Link: elem.Find("a.u-url.u-uid").AttrOr("href", ""),
Author: text(elem.Find(".p-author .p-name").First()),
PublishedString: elem.Find("data.dt-published").AttrOr("value", ""),
IsBoost: elem.HasClass("h-cite"),
})
})
return feed, nil
}
// Text gets the combined text contents of each element in the set of matched
// elements, including their descendants.
func text(s *goquery.Selection) string {
var buf bytes.Buffer
// Slightly optimized vs calling Each: no single selection object created
var f func(*html.Node)
f = func(n *html.Node) {
if n.Type == html.TextNode {
// Keep newlines and spaces, like jQuery
buf.WriteString(n.Data)
} else if n.Type == html.ElementNode && n.DataAtom == atom.Br {
//buf.WriteString("\n")
buf.WriteString(" ")
} else if n.Type == html.ElementNode && n.DataAtom == atom.P && n.PrevSibling != nil {
//buf.WriteString("\n\n")
buf.WriteString(" ")
} else if n.Type == html.ElementNode && (n.DataAtom == atom.Script || n.DataAtom == atom.Style || n.DataAtom == atom.Template) {
// nothing
}
if n.FirstChild != nil {
for c := n.FirstChild; c != nil; c = c.NextSibling {
f(c)
}
}
}
for _, n := range s.Nodes {
f(n)
}
return buf.String()
}
func (src *MastoSource) GetTitle() string { return src.Title }
func (src *MastoSource) GetError() error { return src.Error }
func (src *MastoSource) GetItems() (items []Item) {
for _, x := range src.Items {
text := x.Content
if x.IsBoost {
text = "RT @" + x.Author + ": " + text
}
d, _ := time.Parse(time.RFC3339, x.PublishedString)
items = append(items, Item{
Date: d,
Link: x.Link,
Text: text,
})
}
return
}

48
mastodon_test.go 100644
View File

@ -0,0 +1,48 @@
package main
import (
"context"
"net/http"
"net/http/httptest"
"testing"
)
var mastoSourceTests = []struct {
filename string
title string
itemText string
}{
{
filename: "/masto.html",
title: "magical on tiny.tilde.website",
itemText: "here's hoping it never melts",
},
}
func TestMastoSource(t *testing.T) {
server := httptest.NewServer(http.FileServer(http.Dir("testdata")))
defer server.Close()
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
for _, tt := range mastoSourceTests {
src := NewMastoSource(server.URL + tt.filename)
src.update(ctx)
if err := src.Error; err != nil {
t.Errorf("%v: Error = %v, expected nil", tt.filename, err)
continue
}
if src.Title != tt.title {
t.Errorf("%v: Title = %v expected %v", tt.filename, src.Title, tt.title)
}
items := src.GetItems()
if len(items) == 0 {
t.Errorf("%v: GetItems() = %v expected len > 0", tt.filename, items)
} else {
x := items[0]
if x.Text != tt.itemText {
t.Errorf("%v: item 0 Text = %q expected %q", tt.filename, x.Text, tt.itemText)
}
}
}
}

1091
testdata/masto.html vendored 100644

File diff suppressed because one or more lines are too long

2
testdata/xkcd.xml vendored 100644
View File

@ -0,0 +1,2 @@
<?xml version="1.0" encoding="utf-8"?>
<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="en"><title>xkcd.com</title><link href="https://xkcd.com/" rel="alternate"></link><id>https://xkcd.com/</id><updated>2021-11-12T00:00:00Z</updated><entry><title>Occam</title><link href="https://xkcd.com/2541/" rel="alternate"></link><updated>2021-11-12T00:00:00Z</updated><id>https://xkcd.com/2541/</id><summary type="html">&lt;img src="https://imgs.xkcd.com/comics/occam.png" title="Oh no, Murphy just picked up the razor." alt="Oh no, Murphy just picked up the razor." /&gt;</summary></entry><entry><title>TTSLTSWBD</title><link href="https://xkcd.com/2540/" rel="alternate"></link><updated>2021-11-10T00:00:00Z</updated><id>https://xkcd.com/2540/</id><summary type="html">&lt;img src="https://imgs.xkcd.com/comics/ttsltswbd.png" title="Tomorrow's sessions will be entirely devoted to sewing machine rotary hooks." alt="Tomorrow's sessions will be entirely devoted to sewing machine rotary hooks." /&gt;</summary></entry><entry><title>Flinch</title><link href="https://xkcd.com/2539/" rel="alternate"></link><updated>2021-11-08T00:00:00Z</updated><id>https://xkcd.com/2539/</id><summary type="html">&lt;img src="https://imgs.xkcd.com/comics/flinch.png" title="Premed: &amp;quot;Does this count for a physics credit? Can we shorten the string so I can get it done faster? And can we do one where it hits me in the face? I gotta do a thing for first aid training right after.&amp;quot;" alt="Premed: &amp;quot;Does this count for a physics credit? Can we shorten the string so I can get it done faster? And can we do one where it hits me in the face? I gotta do a thing for first aid training right after.&amp;quot;" /&gt;</summary></entry><entry><title>Snack</title><link href="https://xkcd.com/2538/" rel="alternate"></link><updated>2021-11-05T00:00:00Z</updated><id>https://xkcd.com/2538/</id><summary type="html">&lt;img src="https://imgs.xkcd.com/comics/snack.png" title="Although grad students, suddenly reminded that food exists, tend to just grab and devour both without further discussion." alt="Although grad students, suddenly reminded that food exists, tend to just grab and devour both without further discussion." /&gt;</summary></entry></feed>