import feedparser import argparse import json import pycurl import subprocess import sys from pathlib import Path import traceback def download_audio(link, destdir): parts = link.split('/') ptarget = Path(destdir) / Path(parts[-1]) target = ptarget.resolve() if target.exists(): print(f"File {target} already downloaded, skipping") else: with open(target, "wb") as fp: curl = pycurl.Curl() curl.setopt(pycurl.URL, link) curl.setopt(pycurl.FOLLOWLOCATION, 1) curl.setopt(pycurl.MAXREDIRS, 5) curl.setopt(pycurl.CONNECTTIMEOUT, 30) curl.setopt(pycurl.TIMEOUT, 3000) curl.setopt(pycurl.NOSIGNAL, 1) curl.setopt(pycurl.WRITEDATA, fp) try: curl.perform() except: traceback.print_exc(file=sys.stderr) sys.stderr.flush() curl.close() def looks_audio(link): if 'type' in link: return link['type'][:5] == 'audio' return False def get_latest(url, dir, max): d = feedparser.parse(url) title = d.feed.get('title', "[no title]") entries = d.get('entries', []) urls = [] for entry in entries[:max]: if 'links' in entry: sounds = [ l for l in entry['links'] if looks_audio(l) ] if len(sounds) < 0: print("No audio links") if len(sounds) > 1: print("Multiple audio links") for s in sounds: audio_url = s.get('href', '') if audio_url: urls.append(audio_url) return urls def main(): ap = argparse.ArgumentParser("autoradio - download audio from RSS feeds") ap.add_argument( "--config", default="./config.json", type=Path, help="Config file", ) args = ap.parse_args() with open(args.config, 'r') as cfh: cf = json.load(cfh) m = int(cf.get("max", "5")) for name, config in cf['feeds'].items(): print(f"Checking {name}") urls = get_latest(config['url'], config['dir'], m) for url in urls: print(f"content = {url}") download_audio(url, config['dir'])