Compare commits

...

5 Commits

Author SHA1 Message Date
bombinans 9989e5f4c8 Merge pull request 'feature-multi-downloads' (#1) from feature-multi-downloads into main
Reviewed-on: #1
2023-12-09 04:24:16 +00:00
Mike Lynch 51b760cac3 Added pycurl as a poetry dependency and a note in the README to explain
what the problem is with that.
2023-12-09 15:22:44 +11:00
Mike Lynch cd37766dc2 Fetch most recent N files for each feed if they aren'y already downloaded 2023-12-09 12:48:54 +11:00
Mike Lynch 606a278d28 Downgraded python to 3.7 so that it works on the Pi 2023-12-09 12:47:51 +11:00
Mike Lynch d5a570295d Added pycurl but I had to do it with pip install so I'm not sure
if poetry knows about it in the right way
2023-10-24 09:32:03 +11:00
5 changed files with 68 additions and 31 deletions

View File

@ -1,3 +1,9 @@
# autoradio
Auto download of mp3s from podcast feeds using a little Python script.
## Installation
I haven't worked out how to intall pycurl with poetry, as it looks like it
required a command-line flag to specify the SSL backend, and poetry doesn't
seem to support that. So for my local install I pip installed it by hand

View File

@ -2,8 +2,11 @@
import feedparser
import argparse
import json
import pycurl
import subprocess
import sys
from pathlib import Path
import traceback
def download_audio(link, destdir):
@ -11,11 +14,23 @@ def download_audio(link, destdir):
ptarget = Path(destdir) / Path(parts[-1])
target = ptarget.resolve()
if target.exists():
print("File already exists, skipping")
print(f"File {target} already downloaded, skipping")
else:
args = [ "wget", link, "-o", str(target) ]
print(f"downloading {link}")
subprocess.run(args)
with open(target, "wb") as fp:
curl = pycurl.Curl()
curl.setopt(pycurl.URL, link)
curl.setopt(pycurl.FOLLOWLOCATION, 1)
curl.setopt(pycurl.MAXREDIRS, 5)
curl.setopt(pycurl.CONNECTTIMEOUT, 30)
curl.setopt(pycurl.TIMEOUT, 3000)
curl.setopt(pycurl.NOSIGNAL, 1)
curl.setopt(pycurl.WRITEDATA, fp)
try:
curl.perform()
except:
traceback.print_exc(file=sys.stderr)
sys.stderr.flush()
curl.close()
@ -25,14 +40,14 @@ def looks_audio(link):
return False
def get_latest(url, dir):
def get_latest(url, dir, max):
d = feedparser.parse(url)
title = d.feed.get('title', "[no title]")
entries = d.get('entries', [])
if entries:
latest = entries[0]
if 'links' in latest:
sounds = [ l for l in latest['links'] if looks_audio(l) ]
urls = []
for entry in entries[:max]:
if 'links' in entry:
sounds = [ l for l in entry['links'] if looks_audio(l) ]
if len(sounds) < 0:
print("No audio links")
if len(sounds) > 1:
@ -40,8 +55,8 @@ def get_latest(url, dir):
for s in sounds:
audio_url = s.get('href', '')
if audio_url:
return audio_url
return None
urls.append(audio_url)
return urls
def main():
ap = argparse.ArgumentParser("autoradio - download audio from RSS feeds")
@ -54,10 +69,11 @@ def main():
args = ap.parse_args()
with open(args.config, 'r') as cfh:
cf = json.load(cfh)
m = int(cf.get("max", "5"))
for name, config in cf['feeds'].items():
print(f"Checking {name}")
url = get_latest(config['url'], config['dir'])
if url:
urls = get_latest(config['url'], config['dir'], m)
for url in urls:
print(f"content = {url}")
download_audio(url, config['dir'])

View File

@ -2,11 +2,12 @@
"feeds": {
"Utility Fog": {
"url": "https://www.frogworth.com/utilityfog/feed/",
"dir": "./output/UFog"
"dir": "/media/pi/Storage/Music/Utility Fog"
},
"RA Podcast": {
"url": "https://ra.co/xml/podcast.xml",
"dir": "./output/RA"
"dir": "/media/pi/Storage/Music/RA_Podcast"
}
}
},
"max": 10
}

37
poetry.lock generated
View File

@ -1,30 +1,43 @@
# This file is automatically @generated by Poetry 1.5.0 and should not be changed by hand.
[[package]]
name = "feedparser"
version = "6.0.10"
description = "Universal feed parser, handles RSS 0.9x, RSS 1.0, RSS 2.0, CDF, Atom 0.3, and Atom 1.0 feeds"
category = "main"
optional = false
python-versions = ">=3.6"
files = [
{file = "feedparser-6.0.10-py3-none-any.whl", hash = "sha256:79c257d526d13b944e965f6095700587f27388e50ea16fd245babe4dfae7024f"},
{file = "feedparser-6.0.10.tar.gz", hash = "sha256:27da485f4637ce7163cdeab13a80312b93b7d0c1b775bef4a47629a3110bca51"},
]
[package.dependencies]
sgmllib3k = "*"
[[package]]
name = "pycurl"
version = "7.45.2"
description = "PycURL -- A Python Interface To The cURL library"
category = "main"
optional = false
python-versions = ">=3.5"
[[package]]
name = "sgmllib3k"
version = "1.0.0"
description = "Py3k port of sgmllib."
category = "main"
optional = false
python-versions = "*"
files = [
{file = "sgmllib3k-1.0.0.tar.gz", hash = "sha256:7868fb1c8bfa764c1ac563d3cf369c381d1325d36124933a726f29fcdaa812e9"},
]
[metadata]
lock-version = "2.0"
python-versions = "^3.9"
content-hash = "b174a7eabcd92b902e7d7381cd453724319fb6563151d344f5011cc51efb3689"
lock-version = "1.1"
python-versions = "^3.7"
content-hash = "049581a37f61f8307815a597cf5decff839acd1d72a6dd0570ee65b006075794"
[metadata.files]
feedparser = [
{file = "feedparser-6.0.10-py3-none-any.whl", hash = "sha256:79c257d526d13b944e965f6095700587f27388e50ea16fd245babe4dfae7024f"},
{file = "feedparser-6.0.10.tar.gz", hash = "sha256:27da485f4637ce7163cdeab13a80312b93b7d0c1b775bef4a47629a3110bca51"},
]
pycurl = [
{file = "pycurl-7.45.2.tar.gz", hash = "sha256:5730590be0271364a5bddd9e245c9cc0fb710c4cbacbdd95264a3122d23224ca"},
]
sgmllib3k = [
{file = "sgmllib3k-1.0.0.tar.gz", hash = "sha256:7868fb1c8bfa764c1ac563d3cf369c381d1325d36124933a726f29fcdaa812e9"},
]

View File

@ -6,8 +6,9 @@ authors = ["Mike Lynch <m.lynch@sydney.edu.au>"]
readme = "README.md"
[tool.poetry.dependencies]
python = "^3.9"
python = "^3.7"
feedparser = "^6.0.10"
pycurl = "^7.45.2"
[tool.poetry.scripts]
autoradio = "autoradio.autoradio:main"