190 lines
3.9 KiB
Go
190 lines
3.9 KiB
Go
// Feedget scrapes RSS feeds (and other sources)
|
|
// and spits the latest headline from each onto a static web page.
|
|
package main
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"log"
|
|
"net/http"
|
|
"sort"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/mmcdole/gofeed"
|
|
)
|
|
|
|
const UserAgent = "feedget/0.1"
|
|
|
|
func main() {
|
|
var sources = []Source{
|
|
NewFeed("https://tilde.team/~dozens/dreams/rss.xml"),
|
|
NewFeed("https://tilde.town/~magical/xkcd.xml"), // "https://xkcd.com/atom.xml",
|
|
//NewFeed("https://tilde.town/~magical/404.xml"),
|
|
NewMastoSource("https://tilde.town/~magical/masto_test.html"),
|
|
}
|
|
|
|
ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second)
|
|
defer cancel()
|
|
|
|
var wg sync.WaitGroup
|
|
wg.Add(len(sources))
|
|
for i := range sources {
|
|
src := sources[i]
|
|
go func() {
|
|
src.update(ctx)
|
|
wg.Done()
|
|
}()
|
|
}
|
|
wg.Wait()
|
|
|
|
for _, src := range sources {
|
|
fmt.Println(src.GetTitle(), src.GetError())
|
|
for i, x := range src.GetItems() {
|
|
if i > 5 {
|
|
//break
|
|
}
|
|
fmt.Println("\t", x.Date.Format("2006 Jan _2 15:04"), x.Text)
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
type Source interface {
|
|
GetTitle() string
|
|
//GetLink() string
|
|
GetError() error
|
|
GetItems() []Item
|
|
update(context.Context)
|
|
}
|
|
|
|
type Item struct {
|
|
Date time.Time
|
|
Link string
|
|
Text string
|
|
}
|
|
|
|
// want to keep track of:
|
|
// - whether the most recent update suceeded
|
|
// - when the last successful update was
|
|
// - how many of the last N updates succeeded
|
|
// - status codes for the last N updates
|
|
// - response time for the last N updates
|
|
// - how frequently items are posted
|
|
|
|
type Cache struct {
|
|
}
|
|
|
|
type FeedSource struct {
|
|
Items []*gofeed.Item
|
|
Title string
|
|
URL string
|
|
LastFetch time.Time
|
|
Error error
|
|
|
|
LastStatusCode int
|
|
LastStatus string
|
|
|
|
mu sync.Mutex
|
|
}
|
|
|
|
var _ Source = &FeedSource{}
|
|
|
|
func NewFeed(url string) *FeedSource {
|
|
return &FeedSource{
|
|
URL: url,
|
|
}
|
|
}
|
|
|
|
func (src *FeedSource) update(ctx context.Context) {
|
|
src.mu.Lock()
|
|
defer src.mu.Unlock()
|
|
fp := gofeed.NewParser()
|
|
|
|
req, err := http.NewRequestWithContext(ctx, "GET", src.URL, nil)
|
|
if err != nil {
|
|
src.Error = fmt.Errorf("error fetching %q: %w", src.URL, err)
|
|
log.Println(src.Error)
|
|
return // return err?
|
|
}
|
|
req.Header.Set("User-Agent", UserAgent)
|
|
// TODO: If-Modified-Since, Etag
|
|
|
|
resp, err := http.DefaultClient.Do(req)
|
|
if err != nil {
|
|
err := fmt.Errorf("error fetching %q: %w", src.URL, err)
|
|
log.Println(err)
|
|
src.Error = err
|
|
return // return err?
|
|
}
|
|
|
|
if resp != nil && resp.Body != nil {
|
|
defer func() {
|
|
err := resp.Body.Close()
|
|
if err != nil {
|
|
log.Printf("error closing response body for %q: %v", src.URL, err)
|
|
}
|
|
}()
|
|
}
|
|
|
|
src.LastStatusCode = resp.StatusCode
|
|
src.LastStatus = resp.Status
|
|
if resp.StatusCode != 200 {
|
|
src.Error = fmt.Errorf("error fetching %q: status %s", src.URL, resp.Status)
|
|
log.Println(src.Error)
|
|
return
|
|
}
|
|
|
|
// TODO: cache body
|
|
|
|
feed, err := fp.Parse(resp.Body)
|
|
if err != nil {
|
|
err := fmt.Errorf("error parsing %q: %w", src.URL, err)
|
|
log.Println(err)
|
|
src.Error = err
|
|
return // return err?
|
|
}
|
|
|
|
items := feed.Items
|
|
sort.SliceStable(items, func(i, j int) bool {
|
|
var d1, d2 time.Time
|
|
if items[i].PublishedParsed != nil {
|
|
d1 = *items[i].PublishedParsed
|
|
} else if items[i].UpdatedParsed != nil {
|
|
d1 = *items[i].UpdatedParsed
|
|
}
|
|
|
|
if items[j].PublishedParsed != nil {
|
|
d2 = *items[j].PublishedParsed
|
|
} else if items[j].UpdatedParsed != nil {
|
|
d2 = *items[j].UpdatedParsed
|
|
}
|
|
return !d1.Before(d2)
|
|
})
|
|
src.Title = feed.Title
|
|
src.Items = items
|
|
src.LastFetch = time.Now()
|
|
src.Error = nil
|
|
}
|
|
|
|
func (src *FeedSource) GetTitle() string { return src.Title }
|
|
func (src *FeedSource) GetError() error { return src.Error }
|
|
|
|
func (src *FeedSource) GetItems() (items []Item) {
|
|
for _, x := range src.Items {
|
|
d := time.Time{}
|
|
if x.PublishedParsed != nil {
|
|
d = *x.PublishedParsed
|
|
}
|
|
if x.UpdatedParsed != nil {
|
|
d = *x.UpdatedParsed
|
|
}
|
|
items = append(items, Item{
|
|
Date: d,
|
|
Link: x.Link,
|
|
Text: x.Title,
|
|
})
|
|
}
|
|
return
|
|
}
|