diff --git a/main.py b/main.py index e6e7e26..fa957cc 100644 --- a/main.py +++ b/main.py @@ -1,3 +1,6 @@ from pir_connector import * -getCollection("https://www.partnersinrhyme.com/royaltyfreemusic/Corporate-Music-and-Motivational-Music/happymusic") +# getCollection("https://www.partnersinrhyme.com/royaltyfreemusic/Corporate-Music-and-Motivational-Music/happymusic") +# getCollection("https://www.partnersinrhyme.com/royaltyfreemusic/Corporate-Music-and-Motivational-Music/Corporate-Grooves-Vol-2") + +getCategory("https://www.partnersinrhyme.com/royaltyfreemusic/Corporate-Music-and-Motivational-Music") diff --git a/pir_connector.py b/pir_connector.py index 3039383..52a83d7 100644 --- a/pir_connector.py +++ b/pir_connector.py @@ -1,6 +1,7 @@ import requests import json import os +import re import time from bs4 import BeautifulSoup from selenium import webdriver @@ -17,7 +18,22 @@ base_url = "https://www.partnersinrhyme.com/royaltyfreemusic" base_files_url = "https://www.partnersinrhyme.com/files/" base_out_url = "files/PartnersInRhyme/" -def getCollection(collection_url): +def getCategory(category_url): + r = requests.get(category_url) + soup = BeautifulSoup(r.text, 'html.parser') + + category_name = category_url.split("/")[-1] + + links = set(); + + for li in soup.find_all(href=re.compile(category_name+ "/")): + links.add(li['href']) + + for lk in links: + print(lk) + getCollection(lk, category_name) + +def getCollection(collection_url, category_name): r = requests.get(collection_url) soup = BeautifulSoup(r.text, 'html.parser') player = soup.iframe["src"]; @@ -31,15 +47,19 @@ def getCollection(collection_url): time.sleep(2) psoup = BeautifulSoup(driver.page_source, 'html.parser') + # print(psoup) driver.quit() for li in psoup.find_all("li"): - print("downloading...", li.attrs['data-mp3']) - downloadSong(base_files_url + li.attrs['data-mp3']) + try: + print("downloading...", li.attrs['data-mp3']) + downloadSong(base_files_url + li.attrs['data-mp3'], category_name) + except KeyError: + print("Could not load", collection_url.split("/")[-1], "possibly has hiearchy") -def downloadSong(song_url): +def downloadSong(song_url, category_name): (collection_name, outFile) = getSongFromURL(song_url); - outDir = base_out_url + collection_name + outDir = base_out_url + category_name + "/" + collection_name if not os.path.exists(outDir): os.makedirs(outDir) @@ -51,6 +71,7 @@ def downloadSong(song_url): tempFile.write(i.content) else: print("File", outFile, "already exists... skipping") + print("") def getSongFromURL(song_url): list = song_url.split("/")