// Feedget scrapes RSS feeds (and other sources) // and spits the latest headline from each onto a static web page. package main import ( "context" "fmt" "log" "net/http" "sort" "sync" "time" "github.com/mmcdole/gofeed" ) const UserAgent = "feedget/0.1" func main() { var sources = []Source{ NewFeed("https://tilde.team/~dozens/dreams/rss.xml"), NewFeed("https://tilde.town/~magical/xkcd.xml"), // "https://xkcd.com/atom.xml", //NewFeed("https://tilde.town/~magical/404.xml"), NewMastoSource("https://tilde.town/~magical/masto_test.html"), } ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second) defer cancel() var wg sync.WaitGroup wg.Add(len(sources)) for i := range sources { src := sources[i] go func() { src.update(ctx) wg.Done() }() } wg.Wait() for _, src := range sources { fmt.Println(src.GetTitle(), src.GetError()) for i, x := range src.GetItems() { if i > 5 { //break } fmt.Println("\t", x.Date.Format("2006 Jan _2 15:04"), x.Text) } } } type Source interface { GetTitle() string //GetLink() string GetError() error GetItems() []Item update(context.Context) } type Item struct { Date time.Time Link string Text string } // want to keep track of: // - whether the most recent update suceeded // - when the last successful update was // - how many of the last N updates succeeded // - status codes for the last N updates // - response time for the last N updates // - how frequently items are posted type Cache struct { } type FeedSource struct { Items []*gofeed.Item Title string URL string LastFetch time.Time Error error LastStatusCode int LastStatus string mu sync.Mutex } var _ Source = &FeedSource{} func NewFeed(url string) *FeedSource { return &FeedSource{ URL: url, } } func (src *FeedSource) update(ctx context.Context) { src.mu.Lock() defer src.mu.Unlock() fp := gofeed.NewParser() req, err := http.NewRequestWithContext(ctx, "GET", src.URL, nil) if err != nil { src.Error = fmt.Errorf("error fetching %q: %w", src.URL, err) log.Println(src.Error) return // return err? } req.Header.Set("User-Agent", UserAgent) // TODO: If-Modified-Since, Etag resp, err := http.DefaultClient.Do(req) if err != nil { err := fmt.Errorf("error fetching %q: %w", src.URL, err) log.Println(err) src.Error = err return // return err? } if resp != nil && resp.Body != nil { defer func() { err := resp.Body.Close() if err != nil { log.Printf("error closing response body for %q: %v", src.URL, err) } }() } src.LastStatusCode = resp.StatusCode src.LastStatus = resp.Status if resp.StatusCode != 200 { src.Error = fmt.Errorf("error fetching %q: status %s", src.URL, resp.Status) log.Println(src.Error) return } // TODO: cache body feed, err := fp.Parse(resp.Body) if err != nil { err := fmt.Errorf("error parsing %q: %w", src.URL, err) log.Println(err) src.Error = err return // return err? } items := feed.Items sort.SliceStable(items, func(i, j int) bool { var d1, d2 time.Time if items[i].PublishedParsed != nil { d1 = *items[i].PublishedParsed } else if items[i].UpdatedParsed != nil { d1 = *items[i].UpdatedParsed } if items[j].PublishedParsed != nil { d2 = *items[j].PublishedParsed } else if items[j].UpdatedParsed != nil { d2 = *items[j].UpdatedParsed } return !d1.Before(d2) }) src.Title = feed.Title src.Items = items src.LastFetch = time.Now() src.Error = nil } func (src *FeedSource) GetTitle() string { return src.Title } func (src *FeedSource) GetError() error { return src.Error } func (src *FeedSource) GetItems() (items []Item) { for _, x := range src.Items { d := time.Time{} if x.PublishedParsed != nil { d = *x.PublishedParsed } if x.UpdatedParsed != nil { d = *x.UpdatedParsed } items = append(items, Item{ Date: d, Link: x.Link, Text: x.Title, }) } return }