download all collections in category
This commit is contained in:
parent
c2a9ae0ca2
commit
4bebd4b7b4
5
main.py
5
main.py
|
@ -1,3 +1,6 @@
|
||||||
from pir_connector import *
|
from pir_connector import *
|
||||||
|
|
||||||
getCollection("https://www.partnersinrhyme.com/royaltyfreemusic/Corporate-Music-and-Motivational-Music/happymusic")
|
# getCollection("https://www.partnersinrhyme.com/royaltyfreemusic/Corporate-Music-and-Motivational-Music/happymusic")
|
||||||
|
# getCollection("https://www.partnersinrhyme.com/royaltyfreemusic/Corporate-Music-and-Motivational-Music/Corporate-Grooves-Vol-2")
|
||||||
|
|
||||||
|
getCategory("https://www.partnersinrhyme.com/royaltyfreemusic/Corporate-Music-and-Motivational-Music")
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
import requests
|
import requests
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
|
import re
|
||||||
import time
|
import time
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
from selenium import webdriver
|
from selenium import webdriver
|
||||||
|
@ -17,7 +18,22 @@ base_url = "https://www.partnersinrhyme.com/royaltyfreemusic"
|
||||||
base_files_url = "https://www.partnersinrhyme.com/files/"
|
base_files_url = "https://www.partnersinrhyme.com/files/"
|
||||||
base_out_url = "files/PartnersInRhyme/"
|
base_out_url = "files/PartnersInRhyme/"
|
||||||
|
|
||||||
def getCollection(collection_url):
|
def getCategory(category_url):
|
||||||
|
r = requests.get(category_url)
|
||||||
|
soup = BeautifulSoup(r.text, 'html.parser')
|
||||||
|
|
||||||
|
category_name = category_url.split("/")[-1]
|
||||||
|
|
||||||
|
links = set();
|
||||||
|
|
||||||
|
for li in soup.find_all(href=re.compile(category_name+ "/")):
|
||||||
|
links.add(li['href'])
|
||||||
|
|
||||||
|
for lk in links:
|
||||||
|
print(lk)
|
||||||
|
getCollection(lk, category_name)
|
||||||
|
|
||||||
|
def getCollection(collection_url, category_name):
|
||||||
r = requests.get(collection_url)
|
r = requests.get(collection_url)
|
||||||
soup = BeautifulSoup(r.text, 'html.parser')
|
soup = BeautifulSoup(r.text, 'html.parser')
|
||||||
player = soup.iframe["src"];
|
player = soup.iframe["src"];
|
||||||
|
@ -31,15 +47,19 @@ def getCollection(collection_url):
|
||||||
time.sleep(2)
|
time.sleep(2)
|
||||||
psoup = BeautifulSoup(driver.page_source, 'html.parser')
|
psoup = BeautifulSoup(driver.page_source, 'html.parser')
|
||||||
|
|
||||||
|
# print(psoup)
|
||||||
driver.quit()
|
driver.quit()
|
||||||
|
|
||||||
for li in psoup.find_all("li"):
|
for li in psoup.find_all("li"):
|
||||||
|
try:
|
||||||
print("downloading...", li.attrs['data-mp3'])
|
print("downloading...", li.attrs['data-mp3'])
|
||||||
downloadSong(base_files_url + li.attrs['data-mp3'])
|
downloadSong(base_files_url + li.attrs['data-mp3'], category_name)
|
||||||
|
except KeyError:
|
||||||
|
print("Could not load", collection_url.split("/")[-1], "possibly has hiearchy")
|
||||||
|
|
||||||
def downloadSong(song_url):
|
def downloadSong(song_url, category_name):
|
||||||
(collection_name, outFile) = getSongFromURL(song_url);
|
(collection_name, outFile) = getSongFromURL(song_url);
|
||||||
outDir = base_out_url + collection_name
|
outDir = base_out_url + category_name + "/" + collection_name
|
||||||
|
|
||||||
if not os.path.exists(outDir):
|
if not os.path.exists(outDir):
|
||||||
os.makedirs(outDir)
|
os.makedirs(outDir)
|
||||||
|
@ -51,6 +71,7 @@ def downloadSong(song_url):
|
||||||
tempFile.write(i.content)
|
tempFile.write(i.content)
|
||||||
else:
|
else:
|
||||||
print("File", outFile, "already exists... skipping")
|
print("File", outFile, "already exists... skipping")
|
||||||
|
print("")
|
||||||
|
|
||||||
def getSongFromURL(song_url):
|
def getSongFromURL(song_url):
|
||||||
list = song_url.split("/")
|
list = song_url.split("/")
|
||||||
|
|
Loading…
Reference in New Issue