download all collections in category
This commit is contained in:
parent
c2a9ae0ca2
commit
4bebd4b7b4
5
main.py
5
main.py
|
@ -1,3 +1,6 @@
|
|||
from pir_connector import *
|
||||
|
||||
getCollection("https://www.partnersinrhyme.com/royaltyfreemusic/Corporate-Music-and-Motivational-Music/happymusic")
|
||||
# getCollection("https://www.partnersinrhyme.com/royaltyfreemusic/Corporate-Music-and-Motivational-Music/happymusic")
|
||||
# getCollection("https://www.partnersinrhyme.com/royaltyfreemusic/Corporate-Music-and-Motivational-Music/Corporate-Grooves-Vol-2")
|
||||
|
||||
getCategory("https://www.partnersinrhyme.com/royaltyfreemusic/Corporate-Music-and-Motivational-Music")
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
import requests
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
from bs4 import BeautifulSoup
|
||||
from selenium import webdriver
|
||||
|
@ -17,7 +18,22 @@ base_url = "https://www.partnersinrhyme.com/royaltyfreemusic"
|
|||
base_files_url = "https://www.partnersinrhyme.com/files/"
|
||||
base_out_url = "files/PartnersInRhyme/"
|
||||
|
||||
def getCollection(collection_url):
|
||||
def getCategory(category_url):
|
||||
r = requests.get(category_url)
|
||||
soup = BeautifulSoup(r.text, 'html.parser')
|
||||
|
||||
category_name = category_url.split("/")[-1]
|
||||
|
||||
links = set();
|
||||
|
||||
for li in soup.find_all(href=re.compile(category_name+ "/")):
|
||||
links.add(li['href'])
|
||||
|
||||
for lk in links:
|
||||
print(lk)
|
||||
getCollection(lk, category_name)
|
||||
|
||||
def getCollection(collection_url, category_name):
|
||||
r = requests.get(collection_url)
|
||||
soup = BeautifulSoup(r.text, 'html.parser')
|
||||
player = soup.iframe["src"];
|
||||
|
@ -31,15 +47,19 @@ def getCollection(collection_url):
|
|||
time.sleep(2)
|
||||
psoup = BeautifulSoup(driver.page_source, 'html.parser')
|
||||
|
||||
# print(psoup)
|
||||
driver.quit()
|
||||
|
||||
for li in psoup.find_all("li"):
|
||||
try:
|
||||
print("downloading...", li.attrs['data-mp3'])
|
||||
downloadSong(base_files_url + li.attrs['data-mp3'])
|
||||
downloadSong(base_files_url + li.attrs['data-mp3'], category_name)
|
||||
except KeyError:
|
||||
print("Could not load", collection_url.split("/")[-1], "possibly has hiearchy")
|
||||
|
||||
def downloadSong(song_url):
|
||||
def downloadSong(song_url, category_name):
|
||||
(collection_name, outFile) = getSongFromURL(song_url);
|
||||
outDir = base_out_url + collection_name
|
||||
outDir = base_out_url + category_name + "/" + collection_name
|
||||
|
||||
if not os.path.exists(outDir):
|
||||
os.makedirs(outDir)
|
||||
|
@ -51,6 +71,7 @@ def downloadSong(song_url):
|
|||
tempFile.write(i.content)
|
||||
else:
|
||||
print("File", outFile, "already exists... skipping")
|
||||
print("")
|
||||
|
||||
def getSongFromURL(song_url):
|
||||
list = song_url.split("/")
|
||||
|
|
Loading…
Reference in New Issue