feature-multi-downloads #1

已合併
bombinans 2023-12-09 04:24:17 +00:00 將 4 次代碼提交從 feature-multi-downloads合併至 main
共有 5 個文件被更改,包括 68 次插入31 次删除

查看文件

@ -1,3 +1,9 @@
# autoradio # autoradio
Auto download of mp3s from podcast feeds using a little Python script. Auto download of mp3s from podcast feeds using a little Python script.
## Installation
I haven't worked out how to intall pycurl with poetry, as it looks like it
required a command-line flag to specify the SSL backend, and poetry doesn't
seem to support that. So for my local install I pip installed it by hand

查看文件

@ -2,8 +2,11 @@
import feedparser import feedparser
import argparse import argparse
import json import json
import pycurl
import subprocess import subprocess
import sys
from pathlib import Path from pathlib import Path
import traceback
def download_audio(link, destdir): def download_audio(link, destdir):
@ -11,11 +14,23 @@ def download_audio(link, destdir):
ptarget = Path(destdir) / Path(parts[-1]) ptarget = Path(destdir) / Path(parts[-1])
target = ptarget.resolve() target = ptarget.resolve()
if target.exists(): if target.exists():
print("File already exists, skipping") print(f"File {target} already downloaded, skipping")
else: else:
args = [ "wget", link, "-o", str(target) ] with open(target, "wb") as fp:
print(f"downloading {link}") curl = pycurl.Curl()
subprocess.run(args) curl.setopt(pycurl.URL, link)
curl.setopt(pycurl.FOLLOWLOCATION, 1)
curl.setopt(pycurl.MAXREDIRS, 5)
curl.setopt(pycurl.CONNECTTIMEOUT, 30)
curl.setopt(pycurl.TIMEOUT, 3000)
curl.setopt(pycurl.NOSIGNAL, 1)
curl.setopt(pycurl.WRITEDATA, fp)
try:
curl.perform()
except:
traceback.print_exc(file=sys.stderr)
sys.stderr.flush()
curl.close()
@ -25,14 +40,14 @@ def looks_audio(link):
return False return False
def get_latest(url, dir): def get_latest(url, dir, max):
d = feedparser.parse(url) d = feedparser.parse(url)
title = d.feed.get('title', "[no title]") title = d.feed.get('title', "[no title]")
entries = d.get('entries', []) entries = d.get('entries', [])
if entries: urls = []
latest = entries[0] for entry in entries[:max]:
if 'links' in latest: if 'links' in entry:
sounds = [ l for l in latest['links'] if looks_audio(l) ] sounds = [ l for l in entry['links'] if looks_audio(l) ]
if len(sounds) < 0: if len(sounds) < 0:
print("No audio links") print("No audio links")
if len(sounds) > 1: if len(sounds) > 1:
@ -40,8 +55,8 @@ def get_latest(url, dir):
for s in sounds: for s in sounds:
audio_url = s.get('href', '') audio_url = s.get('href', '')
if audio_url: if audio_url:
return audio_url urls.append(audio_url)
return None return urls
def main(): def main():
ap = argparse.ArgumentParser("autoradio - download audio from RSS feeds") ap = argparse.ArgumentParser("autoradio - download audio from RSS feeds")
@ -54,10 +69,11 @@ def main():
args = ap.parse_args() args = ap.parse_args()
with open(args.config, 'r') as cfh: with open(args.config, 'r') as cfh:
cf = json.load(cfh) cf = json.load(cfh)
m = int(cf.get("max", "5"))
for name, config in cf['feeds'].items(): for name, config in cf['feeds'].items():
print(f"Checking {name}") print(f"Checking {name}")
url = get_latest(config['url'], config['dir']) urls = get_latest(config['url'], config['dir'], m)
if url: for url in urls:
print(f"content = {url}") print(f"content = {url}")
download_audio(url, config['dir']) download_audio(url, config['dir'])

查看文件

@ -2,11 +2,12 @@
"feeds": { "feeds": {
"Utility Fog": { "Utility Fog": {
"url": "https://www.frogworth.com/utilityfog/feed/", "url": "https://www.frogworth.com/utilityfog/feed/",
"dir": "./output/UFog" "dir": "/media/pi/Storage/Music/Utility Fog"
}, },
"RA Podcast": { "RA Podcast": {
"url": "https://ra.co/xml/podcast.xml", "url": "https://ra.co/xml/podcast.xml",
"dir": "./output/RA" "dir": "/media/pi/Storage/Music/RA_Podcast"
}
} }
},
"max": 10
} }

37
poetry.lock generated
查看文件

@ -1,30 +1,43 @@
# This file is automatically @generated by Poetry 1.5.0 and should not be changed by hand.
[[package]] [[package]]
name = "feedparser" name = "feedparser"
version = "6.0.10" version = "6.0.10"
description = "Universal feed parser, handles RSS 0.9x, RSS 1.0, RSS 2.0, CDF, Atom 0.3, and Atom 1.0 feeds" description = "Universal feed parser, handles RSS 0.9x, RSS 1.0, RSS 2.0, CDF, Atom 0.3, and Atom 1.0 feeds"
category = "main"
optional = false optional = false
python-versions = ">=3.6" python-versions = ">=3.6"
files = [
{file = "feedparser-6.0.10-py3-none-any.whl", hash = "sha256:79c257d526d13b944e965f6095700587f27388e50ea16fd245babe4dfae7024f"},
{file = "feedparser-6.0.10.tar.gz", hash = "sha256:27da485f4637ce7163cdeab13a80312b93b7d0c1b775bef4a47629a3110bca51"},
]
[package.dependencies] [package.dependencies]
sgmllib3k = "*" sgmllib3k = "*"
[[package]]
name = "pycurl"
version = "7.45.2"
description = "PycURL -- A Python Interface To The cURL library"
category = "main"
optional = false
python-versions = ">=3.5"
[[package]] [[package]]
name = "sgmllib3k" name = "sgmllib3k"
version = "1.0.0" version = "1.0.0"
description = "Py3k port of sgmllib." description = "Py3k port of sgmllib."
category = "main"
optional = false optional = false
python-versions = "*" python-versions = "*"
files = [
{file = "sgmllib3k-1.0.0.tar.gz", hash = "sha256:7868fb1c8bfa764c1ac563d3cf369c381d1325d36124933a726f29fcdaa812e9"},
]
[metadata] [metadata]
lock-version = "2.0" lock-version = "1.1"
python-versions = "^3.9" python-versions = "^3.7"
content-hash = "b174a7eabcd92b902e7d7381cd453724319fb6563151d344f5011cc51efb3689" content-hash = "049581a37f61f8307815a597cf5decff839acd1d72a6dd0570ee65b006075794"
[metadata.files]
feedparser = [
{file = "feedparser-6.0.10-py3-none-any.whl", hash = "sha256:79c257d526d13b944e965f6095700587f27388e50ea16fd245babe4dfae7024f"},
{file = "feedparser-6.0.10.tar.gz", hash = "sha256:27da485f4637ce7163cdeab13a80312b93b7d0c1b775bef4a47629a3110bca51"},
]
pycurl = [
{file = "pycurl-7.45.2.tar.gz", hash = "sha256:5730590be0271364a5bddd9e245c9cc0fb710c4cbacbdd95264a3122d23224ca"},
]
sgmllib3k = [
{file = "sgmllib3k-1.0.0.tar.gz", hash = "sha256:7868fb1c8bfa764c1ac563d3cf369c381d1325d36124933a726f29fcdaa812e9"},
]

查看文件

@ -6,8 +6,9 @@ authors = ["Mike Lynch <m.lynch@sydney.edu.au>"]
readme = "README.md" readme = "README.md"
[tool.poetry.dependencies] [tool.poetry.dependencies]
python = "^3.9" python = "^3.7"
feedparser = "^6.0.10" feedparser = "^6.0.10"
pycurl = "^7.45.2"
[tool.poetry.scripts] [tool.poetry.scripts]
autoradio = "autoradio.autoradio:main" autoradio = "autoradio.autoradio:main"