8 changed files with 247 additions and 37123 deletions
--- a/2
+++ b/2
@ -1,2 +0,0 @@
 default: books.csv genfeed.py
 	./genfeed.py < books.csv > feed.xml
--- a/README.md
+++ b/README.md
@ -4,17 +4,9 @@ This is a catalog of all the books in my calibre library!
 If you see a title here you would like to borrow, let me know! I'd be happy to share :)
-## How To (For me)
+## How To
- go to calibre and "convert books" -> "create a catalog...."
+If you're going to explore this dataset, I recommend using the awesome csvkit.
 - save it to the dir
 - `j all`
 ## How To (For you)
 If you want to browse the collection, I would look at `books.rec`.
 If you're going to really explore this dataset, I recommend using the awesome csvkit.
 => <https://csvkit.readthedocs.io/en/latest/index.html>
@ -25,20 +17,4 @@ It will allow you to do stuff like:
 - look at some stats: `csvcut -c languages,size,formats books.csv | csvstat`
 - find the largest pdfs in the collection: `csvcut -c title_sort,formats,size books.csv | csvgrep -c formats -m pdf | csvsort -c size -r | head`
 - `csvjson books.csv | jq | whatever`
 - show the most recently added books: `csvcut -c 13,1,3 books.csv | csvsort -c timestamp -r | head -n 20`
 - You can also perform actual SQL queries on it, and convert the data between csv and sqlite database: <https://csvkit.readthedocs.io/en/latest/tutorial/3_power_tools.html>
 ## RSS feed
 An RSS feed has been kindly provided by [the Rsszard of Syndication](https://tilde.town/~lucidiot)
 and is available at https://git.tilde.town/dozens/books/raw/branch/main/feed.xml
 Generating the feed requires you to have Python 3.7 or later installed, as well
 as the [xmltodict](https://pypi.org/project/xmltodict) package:
 `pip3 install xmltodict`.
 To generate the feed, run `./geenfeed.py <books.csv >lefeed.xml`.
 ## TODO
 - type definitions for Book
--- a/books.csv
+++ b/books.csv
--- a/books.rec
+++ b/books.rec
--- a/feed.xml
+++ b/feed.xml
--- a/genfeed.py
+++ b/genfeed.py
@ -1,88 +0,0 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 from datetime import datetime, timezone
 from typing import Mapping, MutableMapping
 import csv
 import sys
 import xmltodict
 RSS_DATE_FORMAT = '%a, %d %b %Y %T %z'
 ISO_DATE_FORMAT = '%Y-%m-%dT%H:%M:%S%z'
 def parse_book(book: MutableMapping[str, str]) -> Mapping:
    item = {
        "title": book["title_sort"],
        "pubDate": datetime.strptime(book.pop("timestamp"), ISO_DATE_FORMAT)
                           .strftime(RSS_DATE_FORMAT),
        "guid": {
            "@isPermaLink": "false",
            "#text": book.pop("uuid"),
        },
        "description": book.pop("comments"),
        # The CSV's first character is a non-breaking space for some reason,
        # which breaks the author column
        "author": book.get("author_sort") or book["\ufeffauthor_sort"],
    }
    # Prepend metadata to the item description
    item["description"] = "<dl>{}</dl>{}".format(
        "".join(
            "<dt>{}</dt><dd>{}</dd>".format(
                key.replace('_sort', '').replace('_', ' ').replace('\ufeff', '').capitalize(),
                value,
            )
            for key, value in book.items()
            # Ignore empty columns
            if value
        ),
        item['description']
    )
    if book.get("tags"):
        item["category"] = [
            {
                "@domain": "https://git.tilde.town/dozens/books",
                "#text": tag
            }
            for tag in book["tags"].split(", ")
        ]
    return item
 def main():
    sys.stdout.write(xmltodict.unparse({
        "rss": {
            "@version": "2.0",
            "@xmlns:atom": "http://www.w3.org/2005/Atom",
            "@xmlns:sy": "http://purl.org/rss/1.0/modules/syndication/",
            "channel": {
                "title": "dozens books",
                "description": "the cool calibre library of dozens",
                "link": "https://git.tilde.town/dozens/books",
                "atom:link": {
                    "@rel": "self",
                    "@type": "application/rss+xml",
                    "@href": "https://git.tilde.town/dozens/books/raw/branch/main/feed.xml",
                },
                "language": "en-US",
                "pubDate": datetime.now(timezone.utc)
                                   .strftime(RSS_DATE_FORMAT),
                "docs": "https://www.rssboard.org/rss-specification",
                "webMaster": "dozens@tilde.town (~dozens)",
                "generator": "Python " + ".".join(map(str, sys.version_info[:3])),
                # Update on the first of every month, at midnight UTC
                "sy:updatePeriod": "monthly",
                "sy:updateFrequency": "1",
                "sy:updateBase": "1971-01-01T00:00+00:00",
                # One month, roughly, for clients that do not support mod_syndication
                "ttl": 60 * 24 * 30,
                "item": list(map(parse_book, csv.DictReader(sys.stdin))),
            }
        }
    }, pretty=True, short_empty_elements=True))
 if __name__ == '__main__':
    main()
--- a/14
+++ b/14
@ -1,14 +0,0 @@
 # show all commands
 default:
  just --list
 # generate rss
 rss:
  ./genfeed.py < books.csv > feed.xml
 # make rec
 rec:
  csvformat books.csv | csv2rec > books.rec
 # do the damn thing
 all: rec rss
--- a/requirements.txt
+++ b/requirements.txt
@ -1 +0,0 @@
 xmltodict>=0.12
		`@ -1,2 +0,0 @@`
			`default: books.csv genfeed.py`
			`./genfeed.py < books.csv > feed.xml`