diff --git a/go.mod b/go.mod index 524f41f..f392835 100644 --- a/go.mod +++ b/go.mod @@ -2,4 +2,7 @@ module git.tilde.town/magical/feedget go 1.14 -require github.com/mmcdole/gofeed v1.1.3 +require ( + github.com/PuerkitoBio/goquery v1.5.1 + github.com/mmcdole/gofeed v1.1.3 +) diff --git a/mastodon.go b/mastodon.go new file mode 100644 index 0000000..84953d4 --- /dev/null +++ b/mastodon.go @@ -0,0 +1,165 @@ +package main + +import ( + "context" + "fmt" + "io" + "log" + "net/http" + "sort" + "sync" + "time" + + "github.com/PuerkitoBio/goquery" +) + +// https://docs.joinmastodon.org/spec/microformats/ + +// Root elements (h-*) +// h-feed +// Represents a stream of entries. Attached to a profile's toots. Also +// attached to the parent thread within detailed status views. +// +// h-entry +// Represents episodic or date stamped online content. Attached to a status. +// +// URL properties (u-*) +// +// u-photo +// Within h-card, represents the profile picture. Attached to the avatar image. +// +// u-uid +// Within h-entry or h-cite, represents a universally unique identifier. +// Attached to timestamp link. +// +// u-url +// Within h-entry or h-cite, represents the status permalink. Attached to +// timestamp link. Within h-card, represents the profile permalink. +// Attached to display name link. +// +// +// +// Datetime properties (dt-*) +// +// dt-published +// Within h-entry or h-cite, represents the date and time at which the +// status was published. Attached to data element with value attribute. +// +// Element tree (e-*) +// +// e-content +// Within h-entry or h-cite, represents the content of the status. Attached to status content. +// + +type MastoSource struct { + Items []*MastoItem + Title string + URL string + LastFetch time.Time + Error error + + LastStatusCode int + LastStatus string + + mu sync.Mutex +} + +type MastoFeed struct { + Title string + Items []*MastoItem +} + +type MastoItem struct { + Title string `json:"title,omitempty"` + Content string `json:"content,omitempty"` + Link string `json:"link,omitempty"` + PublishedString string `json:"published,omitempty"` + Author string `json:"author,omitempty"` +} + +func NewMastoSource(url string) *MastoSource { + return &MastoSource{ + URL: url, + } +} + +func (src *MastoSource) update(ctx context.Context) { + src.mu.Lock() + defer src.mu.Unlock() + + req, err := http.NewRequest("GET", src.URL, nil) + if err != nil { + src.Error = fmt.Errorf("error fetching %q: %w", src.URL, err) + log.Println(src.Error) + return // return err? + } + req = req.WithContext(ctx) + req.Header.Set("User-Agent", UserAgent) + // TODO: If-Modified-Since, Etag + + resp, err := http.DefaultClient.Do(req) + if err != nil { + err := fmt.Errorf("error fetching %q: %w", src.URL, err) + log.Println(err) + src.Error = err + return // return err? + } + + if resp != nil && resp.Body != nil { + defer func() { + err := resp.Body.Close() + if err != nil { + log.Printf("error closing response body for %q: %v", src.URL, err) + } + }() + } + + src.LastStatusCode = resp.StatusCode + src.LastStatus = resp.Status + if resp.StatusCode != 200 { + src.Error = fmt.Errorf("error fetching %q: status %s", src.URL, resp.Status) + log.Println(src.Error) + return + } + + // TODO: cache body + + feed, err := parseMicroformats(resp.Body) + if err != nil { + err := fmt.Errorf("error parsing %q: %w", src.URL, err) + log.Println(err) + src.Error = err + return // return err? + } + + items := feed.Items + sort.Slice(items, func(i, j int) bool { + return items[i].PublishedString >= items[j].PublishedString + }) + src.Title = feed.Title + src.Items = items + src.LastFetch = time.Now() + src.Error = nil +} + +func parseMicroformats(r io.Reader) (*MastoFeed, error) { + doc, err := goquery.NewDocumentFromReader(r) + if err != nil { + return nil, err + } + feed := new(MastoFeed) + if doc.Find(".h-feed").Length() == 0 { + return nil, fmt.Errorf("no feed content found") + } + feed.Title = doc.Find(".h-feed > .p-name").First().AttrOr("value", "") + doc.Find(".h-feed .h-entry").Each(func(i int, elem *goquery.Selection) { + feed.Items = append(feed.Items, &MastoItem{ + Title: "", + Content: elem.Find(".e-content").Text(), + Link: elem.Find("a.u-url.u-uid").AttrOr("href", ""), + Author: elem.Find(".p-author .p-name").Text(), + PublishedString: elem.Find("data.dt-published").AttrOr("value", ""), + }) + }) + return feed, nil +}