import requests import json import os import re import time from bs4 import BeautifulSoup from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.common.keys import Keys from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support.expected_conditions import presence_of_element_located from selenium.webdriver.firefox.options import Options options = Options() options.headless = True base_url = "https://www.partnersinrhyme.com/royaltyfreemusic" base_files_url = "https://www.partnersinrhyme.com/files/" base_out_url = "files/PartnersInRhyme/" def getCategory(category_url): r = requests.get(category_url) soup = BeautifulSoup(r.text, 'html.parser') category_name = category_url.split("/")[-1] links = set(); for li in soup.find_all(href=re.compile(category_name+ "/")): links.add(li['href']) for lk in links: print(lk) getCollection(lk, category_name) def getCollection(collection_url, category_name): r = requests.get(collection_url) soup = BeautifulSoup(r.text, 'html.parser') player = soup.iframe["src"]; p = requests.get("http:" + player); with webdriver.Firefox(options=options) as driver: driver.get("http:" + player) el = WebDriverWait(driver, 60).until(f) time.sleep(2) psoup = BeautifulSoup(driver.page_source, 'html.parser') # print(psoup) driver.quit() for li in psoup.find_all("li"): try: print("downloading...", li.attrs['data-mp3']) downloadSong(base_files_url + li.attrs['data-mp3'], category_name) except KeyError: print("Could not load", collection_url.split("/")[-1], "possibly has hiearchy") def downloadSong(song_url, category_name): (collection_name, outFile) = getSongFromURL(song_url); outDir = base_out_url + category_name + "/" + collection_name if not os.path.exists(outDir): os.makedirs(outDir) if not os.path.isfile(os.path.join(outDir, outFile)): i = requests.get(song_url) with open(os.path.join(outDir, outFile), 'wb') as tempFile: tempFile.write(i.content) else: print("File", outFile, "already exists... skipping") print("") def getSongFromURL(song_url): list = song_url.split("/") return (list[4], list[-1]) def f(d): return d.find_element_by_class_name("listContainer")