59 lines
1.9 KiB
Python
59 lines
1.9 KiB
Python
import os
|
|
import sys
|
|
import json
|
|
import argparse
|
|
import requests
|
|
from bs4 import BeautifulSoup
|
|
from functools import partial
|
|
|
|
from utils import download_with_url, makedirs, parallel_run
|
|
|
|
base_path = os.path.dirname(os.path.realpath(__file__))
|
|
RSS_URL = "http://enabler.kbs.co.kr/api/podcast_channel/feed.xml?channel_id=R2010-0440"
|
|
|
|
def itunes_download(item):
|
|
audio_dir = os.path.join(base_path, "audio")
|
|
|
|
date, url = item
|
|
path = os.path.join(audio_dir, "{}.mp4".format(date))
|
|
|
|
if not os.path.exists(path):
|
|
download_with_url(url, path)
|
|
|
|
def download_all(config):
|
|
audio_dir = os.path.join(base_path, "audio")
|
|
makedirs(audio_dir)
|
|
|
|
soup = BeautifulSoup(requests.get(RSS_URL).text, "html5lib")
|
|
|
|
items = [item for item in soup.find_all('item')]
|
|
|
|
titles = [item.find('title').text[9:-3] for item in items]
|
|
guids = [item.find('guid').text for item in items]
|
|
|
|
accept_list = ['친절한 인나씨', '반납예정일', '귀욤열매 드세요']
|
|
|
|
new_guids = [guid for title, guid in zip(titles, guids) \
|
|
if any(accept in title for accept in accept_list) and '-' not in title]
|
|
new_titles = [title for title, _ in zip(titles, guids) \
|
|
if any(accept in title for accept in accept_list) and '-' not in title]
|
|
|
|
for idx, title in enumerate(new_titles):
|
|
print(" [{:3d}] {}, {}".format(idx + 1, title,
|
|
os.path.basename(new_guids[idx]).split('_')[2]))
|
|
if idx == config.max_num: print("="*30)
|
|
|
|
urls = {
|
|
os.path.basename(guid).split('_')[2]: guid \
|
|
for guid in new_guids[:config.max_num]
|
|
}
|
|
|
|
parallel_run(itunes_download, urls.items(),
|
|
desc=" [*] Itunes download", parallel=True)
|
|
|
|
if __name__ == '__main__':
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument('--max_num', default=100, type=int)
|
|
config, unparsed = parser.parse_known_args()
|
|
|
|
download_all(config)
|