feedget/main.go

190 lines
3.9 KiB
Go

// Feedget scrapes RSS feeds (and other sources)
// and spits the latest headline from each onto a static web page.
package main
import (
"context"
"fmt"
"log"
"net/http"
"sort"
"sync"
"time"
"github.com/mmcdole/gofeed"
)
const UserAgent = "feedget/0.1"
func main() {
var sources = []Source{
NewFeed("https://tilde.team/~dozens/dreams/rss.xml"),
NewFeed("https://tilde.town/~magical/xkcd.xml"), // "https://xkcd.com/atom.xml",
//NewFeed("https://tilde.town/~magical/404.xml"),
NewMastoSource("https://tilde.town/~magical/masto_test.html"),
}
ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second)
defer cancel()
var wg sync.WaitGroup
wg.Add(len(sources))
for i := range sources {
src := sources[i]
go func() {
src.update(ctx)
wg.Done()
}()
}
wg.Wait()
for _, src := range sources {
fmt.Println(src.GetTitle(), src.GetError())
for i, x := range src.GetItems() {
if i > 5 {
//break
}
fmt.Println("\t", x.Date.Format("2006 Jan _2 15:04"), x.Text)
}
}
}
type Source interface {
GetTitle() string
//GetLink() string
GetError() error
GetItems() []Item
update(context.Context)
}
type Item struct {
Date time.Time
Link string
Text string
}
// want to keep track of:
// - whether the most recent update suceeded
// - when the last successful update was
// - how many of the last N updates succeeded
// - status codes for the last N updates
// - response time for the last N updates
// - how frequently items are posted
type Cache struct {
}
type FeedSource struct {
Items []*gofeed.Item
Title string
URL string
LastFetch time.Time
Error error
LastStatusCode int
LastStatus string
mu sync.Mutex
}
var _ Source = &FeedSource{}
func NewFeed(url string) *FeedSource {
return &FeedSource{
URL: url,
}
}
func (src *FeedSource) update(ctx context.Context) {
src.mu.Lock()
defer src.mu.Unlock()
fp := gofeed.NewParser()
req, err := http.NewRequestWithContext(ctx, "GET", src.URL, nil)
if err != nil {
src.Error = fmt.Errorf("error fetching %q: %w", src.URL, err)
log.Println(src.Error)
return // return err?
}
req.Header.Set("User-Agent", UserAgent)
// TODO: If-Modified-Since, Etag
resp, err := http.DefaultClient.Do(req)
if err != nil {
err := fmt.Errorf("error fetching %q: %w", src.URL, err)
log.Println(err)
src.Error = err
return // return err?
}
if resp != nil && resp.Body != nil {
defer func() {
err := resp.Body.Close()
if err != nil {
log.Printf("error closing response body for %q: %v", src.URL, err)
}
}()
}
src.LastStatusCode = resp.StatusCode
src.LastStatus = resp.Status
if resp.StatusCode != 200 {
src.Error = fmt.Errorf("error fetching %q: status %s", src.URL, resp.Status)
log.Println(src.Error)
return
}
// TODO: cache body
feed, err := fp.Parse(resp.Body)
if err != nil {
err := fmt.Errorf("error parsing %q: %w", src.URL, err)
log.Println(err)
src.Error = err
return // return err?
}
items := feed.Items
sort.SliceStable(items, func(i, j int) bool {
var d1, d2 time.Time
if items[i].PublishedParsed != nil {
d1 = *items[i].PublishedParsed
} else if items[i].UpdatedParsed != nil {
d1 = *items[i].UpdatedParsed
}
if items[j].PublishedParsed != nil {
d2 = *items[j].PublishedParsed
} else if items[j].UpdatedParsed != nil {
d2 = *items[j].UpdatedParsed
}
return !d1.Before(d2)
})
src.Title = feed.Title
src.Items = items
src.LastFetch = time.Now()
src.Error = nil
}
func (src *FeedSource) GetTitle() string { return src.Title }
func (src *FeedSource) GetError() error { return src.Error }
func (src *FeedSource) GetItems() (items []Item) {
for _, x := range src.Items {
d := time.Time{}
if x.PublishedParsed != nil {
d = *x.PublishedParsed
}
if x.UpdatedParsed != nil {
d = *x.UpdatedParsed
}
items = append(items, Item{
Date: d,
Link: x.Link,
Text: x.Title,
})
}
return
}