From d5a570295d3297b4a4cb877c13d774419730f09b Mon Sep 17 00:00:00 2001 From: Mike Lynch Date: Tue, 24 Oct 2023 09:32:03 +1100 Subject: [PATCH 1/4] Added pycurl but I had to do it with pip install so I'm not sure if poetry knows about it in the right way --- autoradio/autoradio.py | 20 +++++++++++++++++--- poetry.lock | 35 ++++++++++++++++++++++++----------- 2 files changed, 41 insertions(+), 14 deletions(-) diff --git a/autoradio/autoradio.py b/autoradio/autoradio.py index a187692..b27d234 100644 --- a/autoradio/autoradio.py +++ b/autoradio/autoradio.py @@ -2,8 +2,10 @@ import feedparser import argparse import json +import pycurl import subprocess from pathlib import Path +import traceback def download_audio(link, destdir): @@ -13,9 +15,21 @@ def download_audio(link, destdir): if target.exists(): print("File already exists, skipping") else: - args = [ "wget", link, "-o", str(target) ] - print(f"downloading {link}") - subprocess.run(args) + with open(target, "wb") as fp: + curl = pycurl.Curl() + curl.setopt(pycurl.URL, link) + curl.setopt(pycurl.FOLLOWLOCATION, 1) + curl.setopt(pycurl.MAXREDIRS, 5) + curl.setopt(pycurl.CONNECTTIMEOUT, 30) + curl.setopt(pycurl.TIMEOUT, 300) + curl.setopt(pycurl.NOSIGNAL, 1) + curl.setopt(pycurl.WRITEDATA, fp) + try: + curl.perform() + except: + traceback.print_exc(file=sys.stderr) + sys.stderr.flush() + curl.close() diff --git a/poetry.lock b/poetry.lock index 0425be6..0296bbf 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,30 +1,43 @@ -# This file is automatically @generated by Poetry 1.5.0 and should not be changed by hand. - [[package]] name = "feedparser" version = "6.0.10" description = "Universal feed parser, handles RSS 0.9x, RSS 1.0, RSS 2.0, CDF, Atom 0.3, and Atom 1.0 feeds" +category = "main" optional = false python-versions = ">=3.6" -files = [ - {file = "feedparser-6.0.10-py3-none-any.whl", hash = "sha256:79c257d526d13b944e965f6095700587f27388e50ea16fd245babe4dfae7024f"}, - {file = "feedparser-6.0.10.tar.gz", hash = "sha256:27da485f4637ce7163cdeab13a80312b93b7d0c1b775bef4a47629a3110bca51"}, -] [package.dependencies] sgmllib3k = "*" +[[package]] +name = "pycurl" +version = "7.45.2" +description = "PycURL -- A Python Interface To The cURL library" +category = "main" +optional = false +python-versions = ">=3.5" + [[package]] name = "sgmllib3k" version = "1.0.0" description = "Py3k port of sgmllib." +category = "main" optional = false python-versions = "*" -files = [ - {file = "sgmllib3k-1.0.0.tar.gz", hash = "sha256:7868fb1c8bfa764c1ac563d3cf369c381d1325d36124933a726f29fcdaa812e9"}, -] [metadata] -lock-version = "2.0" +lock-version = "1.1" python-versions = "^3.9" -content-hash = "b174a7eabcd92b902e7d7381cd453724319fb6563151d344f5011cc51efb3689" +content-hash = "2048ee9508f876635bc68e651540b23101da7469317226a7e86353c432292db6" + +[metadata.files] +feedparser = [ + {file = "feedparser-6.0.10-py3-none-any.whl", hash = "sha256:79c257d526d13b944e965f6095700587f27388e50ea16fd245babe4dfae7024f"}, + {file = "feedparser-6.0.10.tar.gz", hash = "sha256:27da485f4637ce7163cdeab13a80312b93b7d0c1b775bef4a47629a3110bca51"}, +] +pycurl = [ + {file = "pycurl-7.45.2.tar.gz", hash = "sha256:5730590be0271364a5bddd9e245c9cc0fb710c4cbacbdd95264a3122d23224ca"}, +] +sgmllib3k = [ + {file = "sgmllib3k-1.0.0.tar.gz", hash = "sha256:7868fb1c8bfa764c1ac563d3cf369c381d1325d36124933a726f29fcdaa812e9"}, +] From 606a278d2889f7729ef85ae1a0a2942100352d46 Mon Sep 17 00:00:00 2001 From: Mike Lynch Date: Sat, 9 Dec 2023 12:47:51 +1100 Subject: [PATCH 2/4] Downgraded python to 3.7 so that it works on the Pi --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 41ca383..8a87d58 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ authors = ["Mike Lynch "] readme = "README.md" [tool.poetry.dependencies] -python = "^3.9" +python = "^3.7" feedparser = "^6.0.10" [tool.poetry.scripts] From cd37766dc2ea3b83e8e588c4112121221209ac77 Mon Sep 17 00:00:00 2001 From: Mike Lynch Date: Sat, 9 Dec 2023 12:48:54 +1100 Subject: [PATCH 3/4] Fetch most recent N files for each feed if they aren'y already downloaded --- autoradio/autoradio.py | 24 +++++++++++++----------- config.json | 9 +++++---- 2 files changed, 18 insertions(+), 15 deletions(-) diff --git a/autoradio/autoradio.py b/autoradio/autoradio.py index b27d234..c7d8997 100644 --- a/autoradio/autoradio.py +++ b/autoradio/autoradio.py @@ -4,6 +4,7 @@ import argparse import json import pycurl import subprocess +import sys from pathlib import Path import traceback @@ -13,7 +14,7 @@ def download_audio(link, destdir): ptarget = Path(destdir) / Path(parts[-1]) target = ptarget.resolve() if target.exists(): - print("File already exists, skipping") + print(f"File {target} already downloaded, skipping") else: with open(target, "wb") as fp: curl = pycurl.Curl() @@ -21,7 +22,7 @@ def download_audio(link, destdir): curl.setopt(pycurl.FOLLOWLOCATION, 1) curl.setopt(pycurl.MAXREDIRS, 5) curl.setopt(pycurl.CONNECTTIMEOUT, 30) - curl.setopt(pycurl.TIMEOUT, 300) + curl.setopt(pycurl.TIMEOUT, 3000) curl.setopt(pycurl.NOSIGNAL, 1) curl.setopt(pycurl.WRITEDATA, fp) try: @@ -39,14 +40,14 @@ def looks_audio(link): return False -def get_latest(url, dir): +def get_latest(url, dir, max): d = feedparser.parse(url) title = d.feed.get('title', "[no title]") entries = d.get('entries', []) - if entries: - latest = entries[0] - if 'links' in latest: - sounds = [ l for l in latest['links'] if looks_audio(l) ] + urls = [] + for entry in entries[:max]: + if 'links' in entry: + sounds = [ l for l in entry['links'] if looks_audio(l) ] if len(sounds) < 0: print("No audio links") if len(sounds) > 1: @@ -54,8 +55,8 @@ def get_latest(url, dir): for s in sounds: audio_url = s.get('href', '') if audio_url: - return audio_url - return None + urls.append(audio_url) + return urls def main(): ap = argparse.ArgumentParser("autoradio - download audio from RSS feeds") @@ -68,10 +69,11 @@ def main(): args = ap.parse_args() with open(args.config, 'r') as cfh: cf = json.load(cfh) + m = int(cf.get("max", "5")) for name, config in cf['feeds'].items(): print(f"Checking {name}") - url = get_latest(config['url'], config['dir']) - if url: + urls = get_latest(config['url'], config['dir'], m) + for url in urls: print(f"content = {url}") download_audio(url, config['dir']) diff --git a/config.json b/config.json index 4584929..c8d31cd 100644 --- a/config.json +++ b/config.json @@ -2,11 +2,12 @@ "feeds": { "Utility Fog": { "url": "https://www.frogworth.com/utilityfog/feed/", - "dir": "./output/UFog" + "dir": "/media/pi/Storage/Music/Utility Fog" }, "RA Podcast": { "url": "https://ra.co/xml/podcast.xml", - "dir": "./output/RA" + "dir": "/media/pi/Storage/Music/RA_Podcast" } - } -} \ No newline at end of file + }, + "max": 10 +} From 51b760cac335a67ed8dcc2ec7a048a4140129edc Mon Sep 17 00:00:00 2001 From: Mike Lynch Date: Sat, 9 Dec 2023 15:22:44 +1100 Subject: [PATCH 4/4] Added pycurl as a poetry dependency and a note in the README to explain what the problem is with that. --- README.md | 8 +++++++- poetry.lock | 4 ++-- pyproject.toml | 1 + 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 33d0ff0..e85b875 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,9 @@ # autoradio -Auto download of mp3s from podcast feeds using a little Python script. \ No newline at end of file +Auto download of mp3s from podcast feeds using a little Python script. + +## Installation + +I haven't worked out how to intall pycurl with poetry, as it looks like it +required a command-line flag to specify the SSL backend, and poetry doesn't +seem to support that. So for my local install I pip installed it by hand \ No newline at end of file diff --git a/poetry.lock b/poetry.lock index 0296bbf..7daa77e 100644 --- a/poetry.lock +++ b/poetry.lock @@ -27,8 +27,8 @@ python-versions = "*" [metadata] lock-version = "1.1" -python-versions = "^3.9" -content-hash = "2048ee9508f876635bc68e651540b23101da7469317226a7e86353c432292db6" +python-versions = "^3.7" +content-hash = "049581a37f61f8307815a597cf5decff839acd1d72a6dd0570ee65b006075794" [metadata.files] feedparser = [ diff --git a/pyproject.toml b/pyproject.toml index 8a87d58..8e0cc2a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,6 +8,7 @@ readme = "README.md" [tool.poetry.dependencies] python = "^3.7" feedparser = "^6.0.10" +pycurl = "^7.45.2" [tool.poetry.scripts] autoradio = "autoradio.autoradio:main"