feedget/main.go

148 lines
3.1 KiB
Go

// Feedget scrapes RSS feeds (and other sources)
// and spits the latest headline from each onto a static web page.
package main
import (
"context"
"fmt"
"log"
"net/http"
"sort"
"sync"
"time"
"github.com/mmcdole/gofeed"
)
const UserAgent = "feedget/0.1"
func main() {
var sources = []*FeedSource{ // TODO: interface Source
NewFeed("https://tilde.team/~dozens/dreams/rss.xml"),
NewFeed("https://tilde.town/~magical/xkcd.xml"), // "https://xkcd.com/atom.xml",
NewFeed("https://tilde.town/~magical/404.xml"),
}
var wg sync.WaitGroup
wg.Add(len(sources))
for i := range sources {
src := sources[i]
go func() {
src.update()
wg.Done()
}()
}
wg.Wait()
for _, src := range sources {
fmt.Println(src.Title, src.Error, src.LastStatus)
}
src := NewMastoSource("https://tilde.town/~magical/masto_test.html")
src.update(context.Background())
fmt.Println(src.Title, src.Error, src.LastStatus)
for _, x := range src.Items {
d, _ := time.Parse(time.RFC3339, x.PublishedString)
fmt.Println("\t", d.Format(time.Stamp), x.Content)
}
}
type Source interface {
Title() string
Link() string
Error() error
Update(context.Context)
}
// want to keep track of:
// - whether the most recent update suceeded
// - when the last successful update was
// - how many of the last N updates succeeded
// - status codes for the last N updates
// - response time for the last N updates
// - how frequently items are posted
type Cache struct {
}
type FeedSource struct {
Items []*gofeed.Item
Title string
URL string
LastFetch time.Time
Error error
LastStatusCode int
LastStatus string
mu sync.Mutex
}
func NewFeed(url string) *FeedSource {
return &FeedSource{
URL: url,
}
}
func (src *FeedSource) update() {
src.mu.Lock()
defer src.mu.Unlock()
fp := gofeed.NewParser()
ctx := context.TODO()
req, err := http.NewRequest("GET", src.URL, nil)
if err != nil {
src.Error = fmt.Errorf("error fetching %q: %w", src.URL, err)
log.Println(src.Error)
return // return err?
}
req = req.WithContext(ctx)
req.Header.Set("User-Agent", UserAgent)
// TODO: If-Modified-Since, Etag
resp, err := http.DefaultClient.Do(req)
if err != nil {
err := fmt.Errorf("error fetching %q: %w", src.URL, err)
log.Println(err)
src.Error = err
return // return err?
}
if resp != nil && resp.Body != nil {
defer func() {
err := resp.Body.Close()
if err != nil {
log.Printf("error closing response body for %q: %v", src.URL, err)
}
}()
}
src.LastStatusCode = resp.StatusCode
src.LastStatus = resp.Status
if resp.StatusCode != 200 {
src.Error = fmt.Errorf("error fetching %q: status %s", src.URL, resp.Status)
log.Println(src.Error)
return
}
// TODO: cache body
feed, err := fp.Parse(resp.Body)
if err != nil {
err := fmt.Errorf("error parsing %q: %w", src.URL, err)
log.Println(err)
src.Error = err
return // return err?
}
items := feed.Items
sort.Slice(items, func(i, j int) bool {
return items[i].Updated >= items[j].Updated
})
src.Title = feed.Title
src.Items = items
src.LastFetch = time.Now()
src.Error = nil
}