// Feedget scrapes RSS feeds (and other sources) // and spits the latest headline from each onto a static web page. package main import ( "context" "fmt" "log" "net/http" "sort" "sync" "time" "github.com/mmcdole/gofeed" ) const UserAgent = "feedget/0.1" func main() { var sources = []*FeedSource{ // TODO: interface Source NewFeed("https://tilde.team/~dozens/dreams/rss.xml"), NewFeed("https://tilde.town/~magical/xkcd.xml"), // "https://xkcd.com/atom.xml", NewFeed("https://tilde.town/~magical/404.xml"), } ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second) defer cancel() var wg sync.WaitGroup wg.Add(len(sources)) for i := range sources { src := sources[i] go func() { src.update(ctx) wg.Done() }() } wg.Wait() for _, src := range sources { fmt.Println(src.Title, src.Error, src.LastStatus) for i, x := range src.Items { if i > 5 { break } fmt.Println("\t", x.PublishedParsed.Format(time.Stamp), x.Title) } } src := NewMastoSource("https://tilde.town/~magical/masto_test.html") src.update(ctx) fmt.Println(src.Title, src.Error, src.LastStatus) for i, x := range src.Items { if i > 5 { break } d, _ := time.Parse(time.RFC3339, x.PublishedString) fmt.Println("\t", d.Format(time.Stamp), x.Content) } } type Source interface { Title() string Link() string Error() error Update(context.Context) } // want to keep track of: // - whether the most recent update suceeded // - when the last successful update was // - how many of the last N updates succeeded // - status codes for the last N updates // - response time for the last N updates // - how frequently items are posted type Cache struct { } type FeedSource struct { Items []*gofeed.Item Title string URL string LastFetch time.Time Error error LastStatusCode int LastStatus string mu sync.Mutex } func NewFeed(url string) *FeedSource { return &FeedSource{ URL: url, } } func (src *FeedSource) update(ctx context.Context) { src.mu.Lock() defer src.mu.Unlock() fp := gofeed.NewParser() req, err := http.NewRequestWithContext(ctx, "GET", src.URL, nil) if err != nil { src.Error = fmt.Errorf("error fetching %q: %w", src.URL, err) log.Println(src.Error) return // return err? } req.Header.Set("User-Agent", UserAgent) // TODO: If-Modified-Since, Etag resp, err := http.DefaultClient.Do(req) if err != nil { err := fmt.Errorf("error fetching %q: %w", src.URL, err) log.Println(err) src.Error = err return // return err? } if resp != nil && resp.Body != nil { defer func() { err := resp.Body.Close() if err != nil { log.Printf("error closing response body for %q: %v", src.URL, err) } }() } src.LastStatusCode = resp.StatusCode src.LastStatus = resp.Status if resp.StatusCode != 200 { src.Error = fmt.Errorf("error fetching %q: status %s", src.URL, resp.Status) log.Println(src.Error) return } // TODO: cache body feed, err := fp.Parse(resp.Body) if err != nil { err := fmt.Errorf("error parsing %q: %w", src.URL, err) log.Println(err) src.Error = err return // return err? } items := feed.Items sort.Slice(items, func(i, j int) bool { return items[i].Updated >= items[j].Updated }) src.Title = feed.Title src.Items = items src.LastFetch = time.Now() src.Error = nil }