Compare commits

...

5 Commits

Author SHA1 Message Date
bombinans 9989e5f4c8 Merge pull request 'feature-multi-downloads' (#1) from feature-multi-downloads into main
Reviewed-on: #1
2023-12-09 04:24:16 +00:00
Mike Lynch 51b760cac3 Added pycurl as a poetry dependency and a note in the README to explain
what the problem is with that.
2023-12-09 15:22:44 +11:00
Mike Lynch cd37766dc2 Fetch most recent N files for each feed if they aren'y already downloaded 2023-12-09 12:48:54 +11:00
Mike Lynch 606a278d28 Downgraded python to 3.7 so that it works on the Pi 2023-12-09 12:47:51 +11:00
Mike Lynch d5a570295d Added pycurl but I had to do it with pip install so I'm not sure
if poetry knows about it in the right way
2023-10-24 09:32:03 +11:00
5 changed files with 68 additions and 31 deletions

View File

@ -1,3 +1,9 @@
# autoradio # autoradio
Auto download of mp3s from podcast feeds using a little Python script. Auto download of mp3s from podcast feeds using a little Python script.
## Installation
I haven't worked out how to intall pycurl with poetry, as it looks like it
required a command-line flag to specify the SSL backend, and poetry doesn't
seem to support that. So for my local install I pip installed it by hand

View File

@ -2,8 +2,11 @@
import feedparser import feedparser
import argparse import argparse
import json import json
import pycurl
import subprocess import subprocess
import sys
from pathlib import Path from pathlib import Path
import traceback
def download_audio(link, destdir): def download_audio(link, destdir):
@ -11,11 +14,23 @@ def download_audio(link, destdir):
ptarget = Path(destdir) / Path(parts[-1]) ptarget = Path(destdir) / Path(parts[-1])
target = ptarget.resolve() target = ptarget.resolve()
if target.exists(): if target.exists():
print("File already exists, skipping") print(f"File {target} already downloaded, skipping")
else: else:
args = [ "wget", link, "-o", str(target) ] with open(target, "wb") as fp:
print(f"downloading {link}") curl = pycurl.Curl()
subprocess.run(args) curl.setopt(pycurl.URL, link)
curl.setopt(pycurl.FOLLOWLOCATION, 1)
curl.setopt(pycurl.MAXREDIRS, 5)
curl.setopt(pycurl.CONNECTTIMEOUT, 30)
curl.setopt(pycurl.TIMEOUT, 3000)
curl.setopt(pycurl.NOSIGNAL, 1)
curl.setopt(pycurl.WRITEDATA, fp)
try:
curl.perform()
except:
traceback.print_exc(file=sys.stderr)
sys.stderr.flush()
curl.close()
@ -25,14 +40,14 @@ def looks_audio(link):
return False return False
def get_latest(url, dir): def get_latest(url, dir, max):
d = feedparser.parse(url) d = feedparser.parse(url)
title = d.feed.get('title', "[no title]") title = d.feed.get('title', "[no title]")
entries = d.get('entries', []) entries = d.get('entries', [])
if entries: urls = []
latest = entries[0] for entry in entries[:max]:
if 'links' in latest: if 'links' in entry:
sounds = [ l for l in latest['links'] if looks_audio(l) ] sounds = [ l for l in entry['links'] if looks_audio(l) ]
if len(sounds) < 0: if len(sounds) < 0:
print("No audio links") print("No audio links")
if len(sounds) > 1: if len(sounds) > 1:
@ -40,8 +55,8 @@ def get_latest(url, dir):
for s in sounds: for s in sounds:
audio_url = s.get('href', '') audio_url = s.get('href', '')
if audio_url: if audio_url:
return audio_url urls.append(audio_url)
return None return urls
def main(): def main():
ap = argparse.ArgumentParser("autoradio - download audio from RSS feeds") ap = argparse.ArgumentParser("autoradio - download audio from RSS feeds")
@ -54,10 +69,11 @@ def main():
args = ap.parse_args() args = ap.parse_args()
with open(args.config, 'r') as cfh: with open(args.config, 'r') as cfh:
cf = json.load(cfh) cf = json.load(cfh)
m = int(cf.get("max", "5"))
for name, config in cf['feeds'].items(): for name, config in cf['feeds'].items():
print(f"Checking {name}") print(f"Checking {name}")
url = get_latest(config['url'], config['dir']) urls = get_latest(config['url'], config['dir'], m)
if url: for url in urls:
print(f"content = {url}") print(f"content = {url}")
download_audio(url, config['dir']) download_audio(url, config['dir'])

View File

@ -2,11 +2,12 @@
"feeds": { "feeds": {
"Utility Fog": { "Utility Fog": {
"url": "https://www.frogworth.com/utilityfog/feed/", "url": "https://www.frogworth.com/utilityfog/feed/",
"dir": "./output/UFog" "dir": "/media/pi/Storage/Music/Utility Fog"
}, },
"RA Podcast": { "RA Podcast": {
"url": "https://ra.co/xml/podcast.xml", "url": "https://ra.co/xml/podcast.xml",
"dir": "./output/RA" "dir": "/media/pi/Storage/Music/RA_Podcast"
} }
} },
} "max": 10
}

37
poetry.lock generated
View File

@ -1,30 +1,43 @@
# This file is automatically @generated by Poetry 1.5.0 and should not be changed by hand.
[[package]] [[package]]
name = "feedparser" name = "feedparser"
version = "6.0.10" version = "6.0.10"
description = "Universal feed parser, handles RSS 0.9x, RSS 1.0, RSS 2.0, CDF, Atom 0.3, and Atom 1.0 feeds" description = "Universal feed parser, handles RSS 0.9x, RSS 1.0, RSS 2.0, CDF, Atom 0.3, and Atom 1.0 feeds"
category = "main"
optional = false optional = false
python-versions = ">=3.6" python-versions = ">=3.6"
files = [
{file = "feedparser-6.0.10-py3-none-any.whl", hash = "sha256:79c257d526d13b944e965f6095700587f27388e50ea16fd245babe4dfae7024f"},
{file = "feedparser-6.0.10.tar.gz", hash = "sha256:27da485f4637ce7163cdeab13a80312b93b7d0c1b775bef4a47629a3110bca51"},
]
[package.dependencies] [package.dependencies]
sgmllib3k = "*" sgmllib3k = "*"
[[package]]
name = "pycurl"
version = "7.45.2"
description = "PycURL -- A Python Interface To The cURL library"
category = "main"
optional = false
python-versions = ">=3.5"
[[package]] [[package]]
name = "sgmllib3k" name = "sgmllib3k"
version = "1.0.0" version = "1.0.0"
description = "Py3k port of sgmllib." description = "Py3k port of sgmllib."
category = "main"
optional = false optional = false
python-versions = "*" python-versions = "*"
files = [
{file = "sgmllib3k-1.0.0.tar.gz", hash = "sha256:7868fb1c8bfa764c1ac563d3cf369c381d1325d36124933a726f29fcdaa812e9"},
]
[metadata] [metadata]
lock-version = "2.0" lock-version = "1.1"
python-versions = "^3.9" python-versions = "^3.7"
content-hash = "b174a7eabcd92b902e7d7381cd453724319fb6563151d344f5011cc51efb3689" content-hash = "049581a37f61f8307815a597cf5decff839acd1d72a6dd0570ee65b006075794"
[metadata.files]
feedparser = [
{file = "feedparser-6.0.10-py3-none-any.whl", hash = "sha256:79c257d526d13b944e965f6095700587f27388e50ea16fd245babe4dfae7024f"},
{file = "feedparser-6.0.10.tar.gz", hash = "sha256:27da485f4637ce7163cdeab13a80312b93b7d0c1b775bef4a47629a3110bca51"},
]
pycurl = [
{file = "pycurl-7.45.2.tar.gz", hash = "sha256:5730590be0271364a5bddd9e245c9cc0fb710c4cbacbdd95264a3122d23224ca"},
]
sgmllib3k = [
{file = "sgmllib3k-1.0.0.tar.gz", hash = "sha256:7868fb1c8bfa764c1ac563d3cf369c381d1325d36124933a726f29fcdaa812e9"},
]

View File

@ -6,8 +6,9 @@ authors = ["Mike Lynch <m.lynch@sydney.edu.au>"]
readme = "README.md" readme = "README.md"
[tool.poetry.dependencies] [tool.poetry.dependencies]
python = "^3.9" python = "^3.7"
feedparser = "^6.0.10" feedparser = "^6.0.10"
pycurl = "^7.45.2"
[tool.poetry.scripts] [tool.poetry.scripts]
autoradio = "autoradio.autoradio:main" autoradio = "autoradio.autoradio:main"