diff --git a/README.md b/README.md index 33d0ff0..e85b875 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,9 @@ # autoradio -Auto download of mp3s from podcast feeds using a little Python script. \ No newline at end of file +Auto download of mp3s from podcast feeds using a little Python script. + +## Installation + +I haven't worked out how to intall pycurl with poetry, as it looks like it +required a command-line flag to specify the SSL backend, and poetry doesn't +seem to support that. So for my local install I pip installed it by hand \ No newline at end of file diff --git a/autoradio/autoradio.py b/autoradio/autoradio.py index a187692..c7d8997 100644 --- a/autoradio/autoradio.py +++ b/autoradio/autoradio.py @@ -2,8 +2,11 @@ import feedparser import argparse import json +import pycurl import subprocess +import sys from pathlib import Path +import traceback def download_audio(link, destdir): @@ -11,11 +14,23 @@ def download_audio(link, destdir): ptarget = Path(destdir) / Path(parts[-1]) target = ptarget.resolve() if target.exists(): - print("File already exists, skipping") + print(f"File {target} already downloaded, skipping") else: - args = [ "wget", link, "-o", str(target) ] - print(f"downloading {link}") - subprocess.run(args) + with open(target, "wb") as fp: + curl = pycurl.Curl() + curl.setopt(pycurl.URL, link) + curl.setopt(pycurl.FOLLOWLOCATION, 1) + curl.setopt(pycurl.MAXREDIRS, 5) + curl.setopt(pycurl.CONNECTTIMEOUT, 30) + curl.setopt(pycurl.TIMEOUT, 3000) + curl.setopt(pycurl.NOSIGNAL, 1) + curl.setopt(pycurl.WRITEDATA, fp) + try: + curl.perform() + except: + traceback.print_exc(file=sys.stderr) + sys.stderr.flush() + curl.close() @@ -25,14 +40,14 @@ def looks_audio(link): return False -def get_latest(url, dir): +def get_latest(url, dir, max): d = feedparser.parse(url) title = d.feed.get('title', "[no title]") entries = d.get('entries', []) - if entries: - latest = entries[0] - if 'links' in latest: - sounds = [ l for l in latest['links'] if looks_audio(l) ] + urls = [] + for entry in entries[:max]: + if 'links' in entry: + sounds = [ l for l in entry['links'] if looks_audio(l) ] if len(sounds) < 0: print("No audio links") if len(sounds) > 1: @@ -40,8 +55,8 @@ def get_latest(url, dir): for s in sounds: audio_url = s.get('href', '') if audio_url: - return audio_url - return None + urls.append(audio_url) + return urls def main(): ap = argparse.ArgumentParser("autoradio - download audio from RSS feeds") @@ -54,10 +69,11 @@ def main(): args = ap.parse_args() with open(args.config, 'r') as cfh: cf = json.load(cfh) + m = int(cf.get("max", "5")) for name, config in cf['feeds'].items(): print(f"Checking {name}") - url = get_latest(config['url'], config['dir']) - if url: + urls = get_latest(config['url'], config['dir'], m) + for url in urls: print(f"content = {url}") download_audio(url, config['dir']) diff --git a/config.json b/config.json index 4584929..c8d31cd 100644 --- a/config.json +++ b/config.json @@ -2,11 +2,12 @@ "feeds": { "Utility Fog": { "url": "https://www.frogworth.com/utilityfog/feed/", - "dir": "./output/UFog" + "dir": "/media/pi/Storage/Music/Utility Fog" }, "RA Podcast": { "url": "https://ra.co/xml/podcast.xml", - "dir": "./output/RA" + "dir": "/media/pi/Storage/Music/RA_Podcast" } - } -} \ No newline at end of file + }, + "max": 10 +} diff --git a/poetry.lock b/poetry.lock index 0425be6..7daa77e 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,30 +1,43 @@ -# This file is automatically @generated by Poetry 1.5.0 and should not be changed by hand. - [[package]] name = "feedparser" version = "6.0.10" description = "Universal feed parser, handles RSS 0.9x, RSS 1.0, RSS 2.0, CDF, Atom 0.3, and Atom 1.0 feeds" +category = "main" optional = false python-versions = ">=3.6" -files = [ - {file = "feedparser-6.0.10-py3-none-any.whl", hash = "sha256:79c257d526d13b944e965f6095700587f27388e50ea16fd245babe4dfae7024f"}, - {file = "feedparser-6.0.10.tar.gz", hash = "sha256:27da485f4637ce7163cdeab13a80312b93b7d0c1b775bef4a47629a3110bca51"}, -] [package.dependencies] sgmllib3k = "*" +[[package]] +name = "pycurl" +version = "7.45.2" +description = "PycURL -- A Python Interface To The cURL library" +category = "main" +optional = false +python-versions = ">=3.5" + [[package]] name = "sgmllib3k" version = "1.0.0" description = "Py3k port of sgmllib." +category = "main" optional = false python-versions = "*" -files = [ - {file = "sgmllib3k-1.0.0.tar.gz", hash = "sha256:7868fb1c8bfa764c1ac563d3cf369c381d1325d36124933a726f29fcdaa812e9"}, -] [metadata] -lock-version = "2.0" -python-versions = "^3.9" -content-hash = "b174a7eabcd92b902e7d7381cd453724319fb6563151d344f5011cc51efb3689" +lock-version = "1.1" +python-versions = "^3.7" +content-hash = "049581a37f61f8307815a597cf5decff839acd1d72a6dd0570ee65b006075794" + +[metadata.files] +feedparser = [ + {file = "feedparser-6.0.10-py3-none-any.whl", hash = "sha256:79c257d526d13b944e965f6095700587f27388e50ea16fd245babe4dfae7024f"}, + {file = "feedparser-6.0.10.tar.gz", hash = "sha256:27da485f4637ce7163cdeab13a80312b93b7d0c1b775bef4a47629a3110bca51"}, +] +pycurl = [ + {file = "pycurl-7.45.2.tar.gz", hash = "sha256:5730590be0271364a5bddd9e245c9cc0fb710c4cbacbdd95264a3122d23224ca"}, +] +sgmllib3k = [ + {file = "sgmllib3k-1.0.0.tar.gz", hash = "sha256:7868fb1c8bfa764c1ac563d3cf369c381d1325d36124933a726f29fcdaa812e9"}, +] diff --git a/pyproject.toml b/pyproject.toml index 41ca383..8e0cc2a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,8 +6,9 @@ authors = ["Mike Lynch "] readme = "README.md" [tool.poetry.dependencies] -python = "^3.9" +python = "^3.7" feedparser = "^6.0.10" +pycurl = "^7.45.2" [tool.poetry.scripts] autoradio = "autoradio.autoradio:main"