royalty-ripper/pir_connector.py

82 lines
2.4 KiB
Python
Raw Normal View History

2019-01-14 16:03:40 +00:00
import requests
import json
import os
2019-01-14 18:31:14 +00:00
import re
2019-01-14 17:34:19 +00:00
import time
2019-01-14 16:03:40 +00:00
from bs4 import BeautifulSoup
2019-01-14 17:34:19 +00:00
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support.expected_conditions import presence_of_element_located
from selenium.webdriver.firefox.options import Options
options = Options()
options.headless = True
2019-01-14 16:03:40 +00:00
base_url = "https://www.partnersinrhyme.com/royaltyfreemusic"
2019-01-14 17:34:19 +00:00
base_files_url = "https://www.partnersinrhyme.com/files/"
base_out_url = "files/PartnersInRhyme/"
2019-01-14 16:03:40 +00:00
2019-01-14 18:31:14 +00:00
def getCategory(category_url):
r = requests.get(category_url)
soup = BeautifulSoup(r.text, 'html.parser')
category_name = category_url.split("/")[-1]
links = set();
for li in soup.find_all(href=re.compile(category_name+ "/")):
links.add(li['href'])
for lk in links:
print(lk)
getCollection(lk, category_name)
def getCollection(collection_url, category_name):
2019-01-14 17:34:19 +00:00
r = requests.get(collection_url)
soup = BeautifulSoup(r.text, 'html.parser')
player = soup.iframe["src"];
p = requests.get("http:" + player);
with webdriver.Firefox(options=options) as driver:
driver.get("http:" + player)
el = WebDriverWait(driver, 60).until(f)
time.sleep(2)
2019-01-14 17:34:19 +00:00
psoup = BeautifulSoup(driver.page_source, 'html.parser')
2019-01-14 18:31:14 +00:00
# print(psoup)
2019-01-14 17:34:19 +00:00
driver.quit()
2019-01-14 17:34:19 +00:00
for li in psoup.find_all("li"):
2019-01-14 18:31:14 +00:00
try:
print("downloading...", li.attrs['data-mp3'])
downloadSong(base_files_url + li.attrs['data-mp3'], category_name)
except KeyError:
print("Could not load", collection_url.split("/")[-1], "possibly has hiearchy")
2019-01-14 17:34:19 +00:00
2019-01-14 18:31:14 +00:00
def downloadSong(song_url, category_name):
2019-01-14 17:34:19 +00:00
(collection_name, outFile) = getSongFromURL(song_url);
2019-01-14 18:31:14 +00:00
outDir = base_out_url + category_name + "/" + collection_name
2019-01-14 16:03:40 +00:00
if not os.path.exists(outDir):
os.makedirs(outDir)
if not os.path.isfile(os.path.join(outDir, outFile)):
i = requests.get(song_url)
2019-01-14 16:03:40 +00:00
with open(os.path.join(outDir, outFile), 'wb') as tempFile:
tempFile.write(i.content)
else:
print("File", outFile, "already exists... skipping")
2019-01-14 18:31:14 +00:00
print("")
2019-01-14 16:03:40 +00:00
2019-01-14 17:34:19 +00:00
def getSongFromURL(song_url):
list = song_url.split("/")
return (list[4], list[-1])
def f(d):
return d.find_element_by_class_name("listContainer")