stub out a mastodon feed source

master
magical 2021-12-31 08:46:49 +00:00
parent 6a8856c170
commit 2e195e36a6
2 changed files with 169 additions and 1 deletions

5
go.mod
View File

@ -2,4 +2,7 @@ module git.tilde.town/magical/feedget
go 1.14
require github.com/mmcdole/gofeed v1.1.3
require (
github.com/PuerkitoBio/goquery v1.5.1
github.com/mmcdole/gofeed v1.1.3
)

165
mastodon.go 100644
View File

@ -0,0 +1,165 @@
package main
import (
"context"
"fmt"
"io"
"log"
"net/http"
"sort"
"sync"
"time"
"github.com/PuerkitoBio/goquery"
)
// https://docs.joinmastodon.org/spec/microformats/
// Root elements (h-*)
// h-feed
// Represents a stream of entries. Attached to a profile's toots. Also
// attached to the parent thread within detailed status views.
//
// h-entry
// Represents episodic or date stamped online content. Attached to a status.
//
// URL properties (u-*)
//
// u-photo
// Within h-card, represents the profile picture. Attached to the avatar image.
//
// u-uid
// Within h-entry or h-cite, represents a universally unique identifier.
// Attached to timestamp link.
//
// u-url
// Within h-entry or h-cite, represents the status permalink. Attached to
// timestamp link. Within h-card, represents the profile permalink.
// Attached to display name link.
//
//
//
// Datetime properties (dt-*)
//
// dt-published
// Within h-entry or h-cite, represents the date and time at which the
// status was published. Attached to data element with value attribute.
//
// Element tree (e-*)
//
// e-content
// Within h-entry or h-cite, represents the content of the status. Attached to status content.
//
type MastoSource struct {
Items []*MastoItem
Title string
URL string
LastFetch time.Time
Error error
LastStatusCode int
LastStatus string
mu sync.Mutex
}
type MastoFeed struct {
Title string
Items []*MastoItem
}
type MastoItem struct {
Title string `json:"title,omitempty"`
Content string `json:"content,omitempty"`
Link string `json:"link,omitempty"`
PublishedString string `json:"published,omitempty"`
Author string `json:"author,omitempty"`
}
func NewMastoSource(url string) *MastoSource {
return &MastoSource{
URL: url,
}
}
func (src *MastoSource) update(ctx context.Context) {
src.mu.Lock()
defer src.mu.Unlock()
req, err := http.NewRequest("GET", src.URL, nil)
if err != nil {
src.Error = fmt.Errorf("error fetching %q: %w", src.URL, err)
log.Println(src.Error)
return // return err?
}
req = req.WithContext(ctx)
req.Header.Set("User-Agent", UserAgent)
// TODO: If-Modified-Since, Etag
resp, err := http.DefaultClient.Do(req)
if err != nil {
err := fmt.Errorf("error fetching %q: %w", src.URL, err)
log.Println(err)
src.Error = err
return // return err?
}
if resp != nil && resp.Body != nil {
defer func() {
err := resp.Body.Close()
if err != nil {
log.Printf("error closing response body for %q: %v", src.URL, err)
}
}()
}
src.LastStatusCode = resp.StatusCode
src.LastStatus = resp.Status
if resp.StatusCode != 200 {
src.Error = fmt.Errorf("error fetching %q: status %s", src.URL, resp.Status)
log.Println(src.Error)
return
}
// TODO: cache body
feed, err := parseMicroformats(resp.Body)
if err != nil {
err := fmt.Errorf("error parsing %q: %w", src.URL, err)
log.Println(err)
src.Error = err
return // return err?
}
items := feed.Items
sort.Slice(items, func(i, j int) bool {
return items[i].PublishedString >= items[j].PublishedString
})
src.Title = feed.Title
src.Items = items
src.LastFetch = time.Now()
src.Error = nil
}
func parseMicroformats(r io.Reader) (*MastoFeed, error) {
doc, err := goquery.NewDocumentFromReader(r)
if err != nil {
return nil, err
}
feed := new(MastoFeed)
if doc.Find(".h-feed").Length() == 0 {
return nil, fmt.Errorf("no feed content found")
}
feed.Title = doc.Find(".h-feed > .p-name").First().AttrOr("value", "")
doc.Find(".h-feed .h-entry").Each(func(i int, elem *goquery.Selection) {
feed.Items = append(feed.Items, &MastoItem{
Title: "",
Content: elem.Find(".e-content").Text(),
Link: elem.Find("a.u-url.u-uid").AttrOr("href", ""),
Author: elem.Find(".p-author .p-name").Text(),
PublishedString: elem.Find("data.dt-published").AttrOr("value", ""),
})
})
return feed, nil
}