download all collections in category

This commit is contained in:
Alexander Matson 2019-01-14 13:31:14 -05:00
parent c2a9ae0ca2
commit 4bebd4b7b4
2 changed files with 30 additions and 6 deletions

View File

@ -1,3 +1,6 @@
from pir_connector import *
getCollection("https://www.partnersinrhyme.com/royaltyfreemusic/Corporate-Music-and-Motivational-Music/happymusic")
# getCollection("https://www.partnersinrhyme.com/royaltyfreemusic/Corporate-Music-and-Motivational-Music/happymusic")
# getCollection("https://www.partnersinrhyme.com/royaltyfreemusic/Corporate-Music-and-Motivational-Music/Corporate-Grooves-Vol-2")
getCategory("https://www.partnersinrhyme.com/royaltyfreemusic/Corporate-Music-and-Motivational-Music")

View File

@ -1,6 +1,7 @@
import requests
import json
import os
import re
import time
from bs4 import BeautifulSoup
from selenium import webdriver
@ -17,7 +18,22 @@ base_url = "https://www.partnersinrhyme.com/royaltyfreemusic"
base_files_url = "https://www.partnersinrhyme.com/files/"
base_out_url = "files/PartnersInRhyme/"
def getCollection(collection_url):
def getCategory(category_url):
r = requests.get(category_url)
soup = BeautifulSoup(r.text, 'html.parser')
category_name = category_url.split("/")[-1]
links = set();
for li in soup.find_all(href=re.compile(category_name+ "/")):
links.add(li['href'])
for lk in links:
print(lk)
getCollection(lk, category_name)
def getCollection(collection_url, category_name):
r = requests.get(collection_url)
soup = BeautifulSoup(r.text, 'html.parser')
player = soup.iframe["src"];
@ -31,15 +47,19 @@ def getCollection(collection_url):
time.sleep(2)
psoup = BeautifulSoup(driver.page_source, 'html.parser')
# print(psoup)
driver.quit()
for li in psoup.find_all("li"):
try:
print("downloading...", li.attrs['data-mp3'])
downloadSong(base_files_url + li.attrs['data-mp3'])
downloadSong(base_files_url + li.attrs['data-mp3'], category_name)
except KeyError:
print("Could not load", collection_url.split("/")[-1], "possibly has hiearchy")
def downloadSong(song_url):
def downloadSong(song_url, category_name):
(collection_name, outFile) = getSongFromURL(song_url);
outDir = base_out_url + collection_name
outDir = base_out_url + category_name + "/" + collection_name
if not os.path.exists(outDir):
os.makedirs(outDir)
@ -51,6 +71,7 @@ def downloadSong(song_url):
tempFile.write(i.content)
else:
print("File", outFile, "already exists... skipping")
print("")
def getSongFromURL(song_url):
list = song_url.split("/")