From cd37766dc2ea3b83e8e588c4112121221209ac77 Mon Sep 17 00:00:00 2001 From: Mike Lynch Date: Sat, 9 Dec 2023 12:48:54 +1100 Subject: [PATCH] Fetch most recent N files for each feed if they aren'y already downloaded --- autoradio/autoradio.py | 24 +++++++++++++----------- config.json | 9 +++++---- 2 files changed, 18 insertions(+), 15 deletions(-) diff --git a/autoradio/autoradio.py b/autoradio/autoradio.py index b27d234..c7d8997 100644 --- a/autoradio/autoradio.py +++ b/autoradio/autoradio.py @@ -4,6 +4,7 @@ import argparse import json import pycurl import subprocess +import sys from pathlib import Path import traceback @@ -13,7 +14,7 @@ def download_audio(link, destdir): ptarget = Path(destdir) / Path(parts[-1]) target = ptarget.resolve() if target.exists(): - print("File already exists, skipping") + print(f"File {target} already downloaded, skipping") else: with open(target, "wb") as fp: curl = pycurl.Curl() @@ -21,7 +22,7 @@ def download_audio(link, destdir): curl.setopt(pycurl.FOLLOWLOCATION, 1) curl.setopt(pycurl.MAXREDIRS, 5) curl.setopt(pycurl.CONNECTTIMEOUT, 30) - curl.setopt(pycurl.TIMEOUT, 300) + curl.setopt(pycurl.TIMEOUT, 3000) curl.setopt(pycurl.NOSIGNAL, 1) curl.setopt(pycurl.WRITEDATA, fp) try: @@ -39,14 +40,14 @@ def looks_audio(link): return False -def get_latest(url, dir): +def get_latest(url, dir, max): d = feedparser.parse(url) title = d.feed.get('title', "[no title]") entries = d.get('entries', []) - if entries: - latest = entries[0] - if 'links' in latest: - sounds = [ l for l in latest['links'] if looks_audio(l) ] + urls = [] + for entry in entries[:max]: + if 'links' in entry: + sounds = [ l for l in entry['links'] if looks_audio(l) ] if len(sounds) < 0: print("No audio links") if len(sounds) > 1: @@ -54,8 +55,8 @@ def get_latest(url, dir): for s in sounds: audio_url = s.get('href', '') if audio_url: - return audio_url - return None + urls.append(audio_url) + return urls def main(): ap = argparse.ArgumentParser("autoradio - download audio from RSS feeds") @@ -68,10 +69,11 @@ def main(): args = ap.parse_args() with open(args.config, 'r') as cfh: cf = json.load(cfh) + m = int(cf.get("max", "5")) for name, config in cf['feeds'].items(): print(f"Checking {name}") - url = get_latest(config['url'], config['dir']) - if url: + urls = get_latest(config['url'], config['dir'], m) + for url in urls: print(f"content = {url}") download_audio(url, config['dir']) diff --git a/config.json b/config.json index 4584929..c8d31cd 100644 --- a/config.json +++ b/config.json @@ -2,11 +2,12 @@ "feeds": { "Utility Fog": { "url": "https://www.frogworth.com/utilityfog/feed/", - "dir": "./output/UFog" + "dir": "/media/pi/Storage/Music/Utility Fog" }, "RA Podcast": { "url": "https://ra.co/xml/podcast.xml", - "dir": "./output/RA" + "dir": "/media/pi/Storage/Music/RA_Podcast" } - } -} \ No newline at end of file + }, + "max": 10 +}