Merge pull request 'feature-multi-downloads' (#1) from feature-multi-downloads into main

Reviewed-on: #1
main
bombinans 2023-12-09 04:24:16 +00:00
commit 9989e5f4c8
5 changed files with 68 additions and 31 deletions

View File

@ -1,3 +1,9 @@
# autoradio # autoradio
Auto download of mp3s from podcast feeds using a little Python script. Auto download of mp3s from podcast feeds using a little Python script.
## Installation
I haven't worked out how to intall pycurl with poetry, as it looks like it
required a command-line flag to specify the SSL backend, and poetry doesn't
seem to support that. So for my local install I pip installed it by hand

View File

@ -2,8 +2,11 @@
import feedparser import feedparser
import argparse import argparse
import json import json
import pycurl
import subprocess import subprocess
import sys
from pathlib import Path from pathlib import Path
import traceback
def download_audio(link, destdir): def download_audio(link, destdir):
@ -11,11 +14,23 @@ def download_audio(link, destdir):
ptarget = Path(destdir) / Path(parts[-1]) ptarget = Path(destdir) / Path(parts[-1])
target = ptarget.resolve() target = ptarget.resolve()
if target.exists(): if target.exists():
print("File already exists, skipping") print(f"File {target} already downloaded, skipping")
else: else:
args = [ "wget", link, "-o", str(target) ] with open(target, "wb") as fp:
print(f"downloading {link}") curl = pycurl.Curl()
subprocess.run(args) curl.setopt(pycurl.URL, link)
curl.setopt(pycurl.FOLLOWLOCATION, 1)
curl.setopt(pycurl.MAXREDIRS, 5)
curl.setopt(pycurl.CONNECTTIMEOUT, 30)
curl.setopt(pycurl.TIMEOUT, 3000)
curl.setopt(pycurl.NOSIGNAL, 1)
curl.setopt(pycurl.WRITEDATA, fp)
try:
curl.perform()
except:
traceback.print_exc(file=sys.stderr)
sys.stderr.flush()
curl.close()
@ -25,14 +40,14 @@ def looks_audio(link):
return False return False
def get_latest(url, dir): def get_latest(url, dir, max):
d = feedparser.parse(url) d = feedparser.parse(url)
title = d.feed.get('title', "[no title]") title = d.feed.get('title', "[no title]")
entries = d.get('entries', []) entries = d.get('entries', [])
if entries: urls = []
latest = entries[0] for entry in entries[:max]:
if 'links' in latest: if 'links' in entry:
sounds = [ l for l in latest['links'] if looks_audio(l) ] sounds = [ l for l in entry['links'] if looks_audio(l) ]
if len(sounds) < 0: if len(sounds) < 0:
print("No audio links") print("No audio links")
if len(sounds) > 1: if len(sounds) > 1:
@ -40,8 +55,8 @@ def get_latest(url, dir):
for s in sounds: for s in sounds:
audio_url = s.get('href', '') audio_url = s.get('href', '')
if audio_url: if audio_url:
return audio_url urls.append(audio_url)
return None return urls
def main(): def main():
ap = argparse.ArgumentParser("autoradio - download audio from RSS feeds") ap = argparse.ArgumentParser("autoradio - download audio from RSS feeds")
@ -54,10 +69,11 @@ def main():
args = ap.parse_args() args = ap.parse_args()
with open(args.config, 'r') as cfh: with open(args.config, 'r') as cfh:
cf = json.load(cfh) cf = json.load(cfh)
m = int(cf.get("max", "5"))
for name, config in cf['feeds'].items(): for name, config in cf['feeds'].items():
print(f"Checking {name}") print(f"Checking {name}")
url = get_latest(config['url'], config['dir']) urls = get_latest(config['url'], config['dir'], m)
if url: for url in urls:
print(f"content = {url}") print(f"content = {url}")
download_audio(url, config['dir']) download_audio(url, config['dir'])

View File

@ -2,11 +2,12 @@
"feeds": { "feeds": {
"Utility Fog": { "Utility Fog": {
"url": "https://www.frogworth.com/utilityfog/feed/", "url": "https://www.frogworth.com/utilityfog/feed/",
"dir": "./output/UFog" "dir": "/media/pi/Storage/Music/Utility Fog"
}, },
"RA Podcast": { "RA Podcast": {
"url": "https://ra.co/xml/podcast.xml", "url": "https://ra.co/xml/podcast.xml",
"dir": "./output/RA" "dir": "/media/pi/Storage/Music/RA_Podcast"
} }
} },
"max": 10
} }

37
poetry.lock generated
View File

@ -1,30 +1,43 @@
# This file is automatically @generated by Poetry 1.5.0 and should not be changed by hand.
[[package]] [[package]]
name = "feedparser" name = "feedparser"
version = "6.0.10" version = "6.0.10"
description = "Universal feed parser, handles RSS 0.9x, RSS 1.0, RSS 2.0, CDF, Atom 0.3, and Atom 1.0 feeds" description = "Universal feed parser, handles RSS 0.9x, RSS 1.0, RSS 2.0, CDF, Atom 0.3, and Atom 1.0 feeds"
category = "main"
optional = false optional = false
python-versions = ">=3.6" python-versions = ">=3.6"
files = [
{file = "feedparser-6.0.10-py3-none-any.whl", hash = "sha256:79c257d526d13b944e965f6095700587f27388e50ea16fd245babe4dfae7024f"},
{file = "feedparser-6.0.10.tar.gz", hash = "sha256:27da485f4637ce7163cdeab13a80312b93b7d0c1b775bef4a47629a3110bca51"},
]
[package.dependencies] [package.dependencies]
sgmllib3k = "*" sgmllib3k = "*"
[[package]]
name = "pycurl"
version = "7.45.2"
description = "PycURL -- A Python Interface To The cURL library"
category = "main"
optional = false
python-versions = ">=3.5"
[[package]] [[package]]
name = "sgmllib3k" name = "sgmllib3k"
version = "1.0.0" version = "1.0.0"
description = "Py3k port of sgmllib." description = "Py3k port of sgmllib."
category = "main"
optional = false optional = false
python-versions = "*" python-versions = "*"
files = [
{file = "sgmllib3k-1.0.0.tar.gz", hash = "sha256:7868fb1c8bfa764c1ac563d3cf369c381d1325d36124933a726f29fcdaa812e9"},
]
[metadata] [metadata]
lock-version = "2.0" lock-version = "1.1"
python-versions = "^3.9" python-versions = "^3.7"
content-hash = "b174a7eabcd92b902e7d7381cd453724319fb6563151d344f5011cc51efb3689" content-hash = "049581a37f61f8307815a597cf5decff839acd1d72a6dd0570ee65b006075794"
[metadata.files]
feedparser = [
{file = "feedparser-6.0.10-py3-none-any.whl", hash = "sha256:79c257d526d13b944e965f6095700587f27388e50ea16fd245babe4dfae7024f"},
{file = "feedparser-6.0.10.tar.gz", hash = "sha256:27da485f4637ce7163cdeab13a80312b93b7d0c1b775bef4a47629a3110bca51"},
]
pycurl = [
{file = "pycurl-7.45.2.tar.gz", hash = "sha256:5730590be0271364a5bddd9e245c9cc0fb710c4cbacbdd95264a3122d23224ca"},
]
sgmllib3k = [
{file = "sgmllib3k-1.0.0.tar.gz", hash = "sha256:7868fb1c8bfa764c1ac563d3cf369c381d1325d36124933a726f29fcdaa812e9"},
]

View File

@ -6,8 +6,9 @@ authors = ["Mike Lynch <m.lynch@sydney.edu.au>"]
readme = "README.md" readme = "README.md"
[tool.poetry.dependencies] [tool.poetry.dependencies]
python = "^3.9" python = "^3.7"
feedparser = "^6.0.10" feedparser = "^6.0.10"
pycurl = "^7.45.2"
[tool.poetry.scripts] [tool.poetry.scripts]
autoradio = "autoradio.autoradio:main" autoradio = "autoradio.autoradio:main"