161 lines
3.3 KiB
Go
161 lines
3.3 KiB
Go
// Feedget scrapes RSS feeds (and other sources)
|
|
// and spits the latest headline from each onto a static web page.
|
|
package main
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"log"
|
|
"net/http"
|
|
"sort"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/mmcdole/gofeed"
|
|
)
|
|
|
|
const UserAgent = "feedget/0.1"
|
|
|
|
func main() {
|
|
var sources = []*FeedSource{ // TODO: interface Source
|
|
NewFeed("https://tilde.team/~dozens/dreams/rss.xml"),
|
|
NewFeed("https://tilde.town/~magical/xkcd.xml"), // "https://xkcd.com/atom.xml",
|
|
//NewFeed("https://tilde.town/~magical/404.xml"),
|
|
}
|
|
|
|
ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second)
|
|
defer cancel()
|
|
|
|
var wg sync.WaitGroup
|
|
wg.Add(len(sources))
|
|
for i := range sources {
|
|
src := sources[i]
|
|
go func() {
|
|
src.update(ctx)
|
|
wg.Done()
|
|
}()
|
|
}
|
|
wg.Wait()
|
|
|
|
for _, src := range sources {
|
|
fmt.Println(src.Title, src.Error, src.LastStatus)
|
|
for i, x := range src.Items {
|
|
if i > 5 {
|
|
break
|
|
}
|
|
fmt.Println("\t", x.PublishedParsed.Format(time.Stamp), x.Title)
|
|
}
|
|
}
|
|
|
|
src := NewMastoSource("https://tilde.town/~magical/masto_test.html")
|
|
src.update(ctx)
|
|
fmt.Println(src.Title, src.Error, src.LastStatus)
|
|
for i, x := range src.Items {
|
|
if i > 5 {
|
|
//break
|
|
}
|
|
auth := ""
|
|
if x.IsBoost {
|
|
auth = "RT @" + x.Author + ": "
|
|
}
|
|
d, _ := time.Parse(time.RFC3339, x.PublishedString)
|
|
fmt.Println("\t", d.Format(time.Stamp), auth+x.Content)
|
|
}
|
|
}
|
|
|
|
type Source interface {
|
|
Title() string
|
|
Link() string
|
|
Error() error
|
|
Update(context.Context)
|
|
}
|
|
|
|
// want to keep track of:
|
|
// - whether the most recent update suceeded
|
|
// - when the last successful update was
|
|
// - how many of the last N updates succeeded
|
|
// - status codes for the last N updates
|
|
// - response time for the last N updates
|
|
// - how frequently items are posted
|
|
|
|
type Cache struct {
|
|
}
|
|
|
|
type FeedSource struct {
|
|
Items []*gofeed.Item
|
|
Title string
|
|
URL string
|
|
LastFetch time.Time
|
|
Error error
|
|
|
|
LastStatusCode int
|
|
LastStatus string
|
|
|
|
mu sync.Mutex
|
|
}
|
|
|
|
func NewFeed(url string) *FeedSource {
|
|
return &FeedSource{
|
|
URL: url,
|
|
}
|
|
}
|
|
|
|
func (src *FeedSource) update(ctx context.Context) {
|
|
src.mu.Lock()
|
|
defer src.mu.Unlock()
|
|
fp := gofeed.NewParser()
|
|
|
|
req, err := http.NewRequestWithContext(ctx, "GET", src.URL, nil)
|
|
if err != nil {
|
|
src.Error = fmt.Errorf("error fetching %q: %w", src.URL, err)
|
|
log.Println(src.Error)
|
|
return // return err?
|
|
}
|
|
req.Header.Set("User-Agent", UserAgent)
|
|
// TODO: If-Modified-Since, Etag
|
|
|
|
resp, err := http.DefaultClient.Do(req)
|
|
if err != nil {
|
|
err := fmt.Errorf("error fetching %q: %w", src.URL, err)
|
|
log.Println(err)
|
|
src.Error = err
|
|
return // return err?
|
|
}
|
|
|
|
if resp != nil && resp.Body != nil {
|
|
defer func() {
|
|
err := resp.Body.Close()
|
|
if err != nil {
|
|
log.Printf("error closing response body for %q: %v", src.URL, err)
|
|
}
|
|
}()
|
|
}
|
|
|
|
src.LastStatusCode = resp.StatusCode
|
|
src.LastStatus = resp.Status
|
|
if resp.StatusCode != 200 {
|
|
src.Error = fmt.Errorf("error fetching %q: status %s", src.URL, resp.Status)
|
|
log.Println(src.Error)
|
|
return
|
|
}
|
|
|
|
// TODO: cache body
|
|
|
|
feed, err := fp.Parse(resp.Body)
|
|
if err != nil {
|
|
err := fmt.Errorf("error parsing %q: %w", src.URL, err)
|
|
log.Println(err)
|
|
src.Error = err
|
|
return // return err?
|
|
}
|
|
|
|
items := feed.Items
|
|
sort.Slice(items, func(i, j int) bool {
|
|
return items[i].Updated >= items[j].Updated
|
|
})
|
|
src.Title = feed.Title
|
|
src.Items = items
|
|
src.LastFetch = time.Now()
|
|
src.Error = nil
|
|
}
|