forked from dozens/books
Compare commits
14 Commits
Author | SHA1 | Date |
---|---|---|
Dozens B. McCuzzins | c3e42563c6 | |
Dozens B. McCuzzins | 8a59c47485 | |
Dozens B. McCuzzins | 12fa2770d5 | |
Dozens B. McCuzzins | 2dc0d374cd | |
Dozens B. McCuzzins | 23d6d84d43 | |
Dozens B. McCuzzins | 44bf8fabbd | |
Dozens B. McCuzzins | 70fb11b3a1 | |
Dozens B. McCuzzins | a10152ff67 | |
Dozens B. McCuzzins | 9238b1816d | |
Dozens B. McCuzzins | a04db2f312 | |
Dozens B. McCuzzins | dbceefe37c | |
Dozens B. McCuzzins | 53d8718ee9 | |
Lucidiot | 2dcb679567 | |
Lucidiot | b00b383411 |
|
@ -0,0 +1,2 @@
|
||||||
|
default: books.csv genfeed.py
|
||||||
|
./genfeed.py < books.csv > feed.xml
|
28
README.md
28
README.md
|
@ -4,9 +4,17 @@ This is a catalog of all the books in my calibre library!
|
||||||
|
|
||||||
If you see a title here you would like to borrow, let me know! I'd be happy to share :)
|
If you see a title here you would like to borrow, let me know! I'd be happy to share :)
|
||||||
|
|
||||||
## How To
|
## How To (For me)
|
||||||
|
|
||||||
If you're going to explore this dataset, I recommend using the awesome csvkit.
|
- go to calibre and "convert books" -> "create a catalog...."
|
||||||
|
- save it to the dir
|
||||||
|
- `j all`
|
||||||
|
|
||||||
|
## How To (For you)
|
||||||
|
|
||||||
|
If you want to browse the collection, I would look at `books.rec`.
|
||||||
|
|
||||||
|
If you're going to really explore this dataset, I recommend using the awesome csvkit.
|
||||||
|
|
||||||
=> <https://csvkit.readthedocs.io/en/latest/index.html>
|
=> <https://csvkit.readthedocs.io/en/latest/index.html>
|
||||||
|
|
||||||
|
@ -17,4 +25,20 @@ It will allow you to do stuff like:
|
||||||
- look at some stats: `csvcut -c languages,size,formats books.csv | csvstat`
|
- look at some stats: `csvcut -c languages,size,formats books.csv | csvstat`
|
||||||
- find the largest pdfs in the collection: `csvcut -c title_sort,formats,size books.csv | csvgrep -c formats -m pdf | csvsort -c size -r | head`
|
- find the largest pdfs in the collection: `csvcut -c title_sort,formats,size books.csv | csvgrep -c formats -m pdf | csvsort -c size -r | head`
|
||||||
- `csvjson books.csv | jq | whatever`
|
- `csvjson books.csv | jq | whatever`
|
||||||
|
- show the most recently added books: `csvcut -c 13,1,3 books.csv | csvsort -c timestamp -r | head -n 20`
|
||||||
- You can also perform actual SQL queries on it, and convert the data between csv and sqlite database: <https://csvkit.readthedocs.io/en/latest/tutorial/3_power_tools.html>
|
- You can also perform actual SQL queries on it, and convert the data between csv and sqlite database: <https://csvkit.readthedocs.io/en/latest/tutorial/3_power_tools.html>
|
||||||
|
|
||||||
|
## RSS feed
|
||||||
|
|
||||||
|
An RSS feed has been kindly provided by [the Rsszard of Syndication](https://tilde.town/~lucidiot)
|
||||||
|
and is available at https://git.tilde.town/dozens/books/raw/branch/main/feed.xml
|
||||||
|
|
||||||
|
Generating the feed requires you to have Python 3.7 or later installed, as well
|
||||||
|
as the [xmltodict](https://pypi.org/project/xmltodict) package:
|
||||||
|
`pip3 install xmltodict`.
|
||||||
|
|
||||||
|
To generate the feed, run `./geenfeed.py <books.csv >lefeed.xml`.
|
||||||
|
|
||||||
|
## TODO
|
||||||
|
|
||||||
|
- type definitions for Book
|
||||||
|
|
|
@ -0,0 +1,88 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from typing import Mapping, MutableMapping
|
||||||
|
import csv
|
||||||
|
import sys
|
||||||
|
import xmltodict
|
||||||
|
|
||||||
|
RSS_DATE_FORMAT = '%a, %d %b %Y %T %z'
|
||||||
|
ISO_DATE_FORMAT = '%Y-%m-%dT%H:%M:%S%z'
|
||||||
|
|
||||||
|
|
||||||
|
def parse_book(book: MutableMapping[str, str]) -> Mapping:
|
||||||
|
item = {
|
||||||
|
"title": book["title_sort"],
|
||||||
|
"pubDate": datetime.strptime(book.pop("timestamp"), ISO_DATE_FORMAT)
|
||||||
|
.strftime(RSS_DATE_FORMAT),
|
||||||
|
"guid": {
|
||||||
|
"@isPermaLink": "false",
|
||||||
|
"#text": book.pop("uuid"),
|
||||||
|
},
|
||||||
|
"description": book.pop("comments"),
|
||||||
|
# The CSV's first character is a non-breaking space for some reason,
|
||||||
|
# which breaks the author column
|
||||||
|
"author": book.get("author_sort") or book["\ufeffauthor_sort"],
|
||||||
|
}
|
||||||
|
|
||||||
|
# Prepend metadata to the item description
|
||||||
|
item["description"] = "<dl>{}</dl>{}".format(
|
||||||
|
"".join(
|
||||||
|
"<dt>{}</dt><dd>{}</dd>".format(
|
||||||
|
key.replace('_sort', '').replace('_', ' ').replace('\ufeff', '').capitalize(),
|
||||||
|
value,
|
||||||
|
)
|
||||||
|
for key, value in book.items()
|
||||||
|
# Ignore empty columns
|
||||||
|
if value
|
||||||
|
),
|
||||||
|
item['description']
|
||||||
|
)
|
||||||
|
|
||||||
|
if book.get("tags"):
|
||||||
|
item["category"] = [
|
||||||
|
{
|
||||||
|
"@domain": "https://git.tilde.town/dozens/books",
|
||||||
|
"#text": tag
|
||||||
|
}
|
||||||
|
for tag in book["tags"].split(", ")
|
||||||
|
]
|
||||||
|
|
||||||
|
return item
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
sys.stdout.write(xmltodict.unparse({
|
||||||
|
"rss": {
|
||||||
|
"@version": "2.0",
|
||||||
|
"@xmlns:atom": "http://www.w3.org/2005/Atom",
|
||||||
|
"@xmlns:sy": "http://purl.org/rss/1.0/modules/syndication/",
|
||||||
|
"channel": {
|
||||||
|
"title": "dozens books",
|
||||||
|
"description": "the cool calibre library of dozens",
|
||||||
|
"link": "https://git.tilde.town/dozens/books",
|
||||||
|
"atom:link": {
|
||||||
|
"@rel": "self",
|
||||||
|
"@type": "application/rss+xml",
|
||||||
|
"@href": "https://git.tilde.town/dozens/books/raw/branch/main/feed.xml",
|
||||||
|
},
|
||||||
|
"language": "en-US",
|
||||||
|
"pubDate": datetime.now(timezone.utc)
|
||||||
|
.strftime(RSS_DATE_FORMAT),
|
||||||
|
"docs": "https://www.rssboard.org/rss-specification",
|
||||||
|
"webMaster": "dozens@tilde.town (~dozens)",
|
||||||
|
"generator": "Python " + ".".join(map(str, sys.version_info[:3])),
|
||||||
|
# Update on the first of every month, at midnight UTC
|
||||||
|
"sy:updatePeriod": "monthly",
|
||||||
|
"sy:updateFrequency": "1",
|
||||||
|
"sy:updateBase": "1971-01-01T00:00+00:00",
|
||||||
|
# One month, roughly, for clients that do not support mod_syndication
|
||||||
|
"ttl": 60 * 24 * 30,
|
||||||
|
"item": list(map(parse_book, csv.DictReader(sys.stdin))),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}, pretty=True, short_empty_elements=True))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
|
@ -0,0 +1,14 @@
|
||||||
|
# show all commands
|
||||||
|
default:
|
||||||
|
just --list
|
||||||
|
|
||||||
|
# generate rss
|
||||||
|
rss:
|
||||||
|
./genfeed.py < books.csv > feed.xml
|
||||||
|
|
||||||
|
# make rec
|
||||||
|
rec:
|
||||||
|
csvformat books.csv | csv2rec > books.rec
|
||||||
|
|
||||||
|
# do the damn thing
|
||||||
|
all: rec rss
|
|
@ -0,0 +1 @@
|
||||||
|
xmltodict>=0.12
|
Loading…
Reference in New Issue