royalty-ripper/pir_connector.py

82 lines
2.4 KiB
Python

import requests
import json
import os
import re
import time
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support.expected_conditions import presence_of_element_located
from selenium.webdriver.firefox.options import Options
options = Options()
options.headless = True
base_url = "https://www.partnersinrhyme.com/royaltyfreemusic"
base_files_url = "https://www.partnersinrhyme.com/files/"
base_out_url = "files/PartnersInRhyme/"
def getCategory(category_url):
r = requests.get(category_url)
soup = BeautifulSoup(r.text, 'html.parser')
category_name = category_url.split("/")[-1]
links = set();
for li in soup.find_all(href=re.compile(category_name+ "/")):
links.add(li['href'])
for lk in links:
print(lk)
getCollection(lk, category_name)
def getCollection(collection_url, category_name):
r = requests.get(collection_url)
soup = BeautifulSoup(r.text, 'html.parser')
player = soup.iframe["src"];
p = requests.get("http:" + player);
with webdriver.Firefox(options=options) as driver:
driver.get("http:" + player)
el = WebDriverWait(driver, 60).until(f)
time.sleep(2)
psoup = BeautifulSoup(driver.page_source, 'html.parser')
# print(psoup)
driver.quit()
for li in psoup.find_all("li"):
try:
print("downloading...", li.attrs['data-mp3'])
downloadSong(base_files_url + li.attrs['data-mp3'], category_name)
except KeyError:
print("Could not load", collection_url.split("/")[-1], "possibly has hiearchy")
def downloadSong(song_url, category_name):
(collection_name, outFile) = getSongFromURL(song_url);
outDir = base_out_url + category_name + "/" + collection_name
if not os.path.exists(outDir):
os.makedirs(outDir)
if not os.path.isfile(os.path.join(outDir, outFile)):
i = requests.get(song_url)
with open(os.path.join(outDir, outFile), 'wb') as tempFile:
tempFile.write(i.content)
else:
print("File", outFile, "already exists... skipping")
print("")
def getSongFromURL(song_url):
list = song_url.split("/")
return (list[4], list[-1])
def f(d):
return d.find_element_by_class_name("listContainer")