royalty-ripper/pir_connector.py

import requests
import json
import os
import re
import time
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support.expected_conditions import presence_of_element_located
from selenium.webdriver.firefox.options import Options

options = Options()
options.headless = True

base_url = "https://www.partnersinrhyme.com/royaltyfreemusic"
base_files_url = "https://www.partnersinrhyme.com/files/"
base_out_url = "files/PartnersInRhyme/"

def getCategory(category_url):
    r = requests.get(category_url)
    soup = BeautifulSoup(r.text, 'html.parser')

    category_name = category_url.split("/")[-1]

    links = set();

    for li in soup.find_all(href=re.compile(category_name+ "/")):
        links.add(li['href'])

    for lk in links:
        print(lk)
        getCollection(lk, category_name)

def getCollection(collection_url, category_name):
    r = requests.get(collection_url)
    soup = BeautifulSoup(r.text, 'html.parser')
    player = soup.iframe["src"];

    p = requests.get("http:" + player);

    with webdriver.Firefox(options=options) as driver:
        driver.get("http:" + player)
        el = WebDriverWait(driver, 60).until(f)

        time.sleep(2)
        psoup = BeautifulSoup(driver.page_source, 'html.parser')

        # print(psoup)
        driver.quit()

        for li in psoup.find_all("li"):
            try:
                print("downloading...", li.attrs['data-mp3'])
                downloadSong(base_files_url + li.attrs['data-mp3'], category_name)
            except KeyError:
                print("Could not load", collection_url.split("/")[-1], "possibly has hiearchy")

def downloadSong(song_url, category_name):
    (collection_name, outFile) = getSongFromURL(song_url);
    outDir = base_out_url + category_name + "/" + collection_name

    if not os.path.exists(outDir):
        os.makedirs(outDir)

    if not os.path.isfile(os.path.join(outDir, outFile)):
        i = requests.get(song_url)

        with open(os.path.join(outDir, outFile), 'wb') as tempFile:
            tempFile.write(i.content)
    else:
        print("File", outFile, "already exists... skipping")
    print("")

def getSongFromURL(song_url):
    list = song_url.split("/")
    return (list[4], list[-1])

def f(d):
    return d.find_element_by_class_name("listContainer")
create song downloader 2019-01-14 16:03:40 +00:00			`import requests`
			`import json`
			`import os`
download all collections in category 2019-01-14 18:31:14 +00:00			`import re`
working album fetching 2019-01-14 17:34:19 +00:00			`import time`
create song downloader 2019-01-14 16:03:40 +00:00			`from bs4 import BeautifulSoup`
working album fetching 2019-01-14 17:34:19 +00:00			`from selenium import webdriver`
			`from selenium.webdriver.common.by import By`
			`from selenium.webdriver.common.keys import Keys`
			`from selenium.webdriver.support.ui import WebDriverWait`
			`from selenium.webdriver.support.expected_conditions import presence_of_element_located`
			`from selenium.webdriver.firefox.options import Options`

			`options = Options()`
			`options.headless = True`
create song downloader 2019-01-14 16:03:40 +00:00
			`base_url = "https://www.partnersinrhyme.com/royaltyfreemusic"`
working album fetching 2019-01-14 17:34:19 +00:00			`base_files_url = "https://www.partnersinrhyme.com/files/"`
don't download if file is already there 2019-01-14 17:47:03 +00:00			`base_out_url = "files/PartnersInRhyme/"`
create song downloader 2019-01-14 16:03:40 +00:00
download all collections in category 2019-01-14 18:31:14 +00:00			`def getCategory(category_url):`
			`r = requests.get(category_url)`
			`soup = BeautifulSoup(r.text, 'html.parser')`

			`category_name = category_url.split("/")[-1]`

			`links = set();`

			`for li in soup.find_all(href=re.compile(category_name+ "/")):`
			`links.add(li['href'])`

			`for lk in links:`
			`print(lk)`
			`getCollection(lk, category_name)`

			`def getCollection(collection_url, category_name):`
working album fetching 2019-01-14 17:34:19 +00:00			`r = requests.get(collection_url)`
			`soup = BeautifulSoup(r.text, 'html.parser')`
			`player = soup.iframe["src"];`

			`p = requests.get("http:" + player);`

			`with webdriver.Firefox(options=options) as driver:`
			`driver.get("http:" + player)`
			`el = WebDriverWait(driver, 60).until(f)`

don't download if file is already there 2019-01-14 17:47:03 +00:00			`time.sleep(2)`
working album fetching 2019-01-14 17:34:19 +00:00			`psoup = BeautifulSoup(driver.page_source, 'html.parser')`

download all collections in category 2019-01-14 18:31:14 +00:00			`# print(psoup)`
working album fetching 2019-01-14 17:34:19 +00:00			`driver.quit()`
don't download if file is already there 2019-01-14 17:47:03 +00:00
working album fetching 2019-01-14 17:34:19 +00:00			`for li in psoup.find_all("li"):`
download all collections in category 2019-01-14 18:31:14 +00:00			`try:`
			`print("downloading...", li.attrs['data-mp3'])`
			`downloadSong(base_files_url + li.attrs['data-mp3'], category_name)`
			`except KeyError:`
			`print("Could not load", collection_url.split("/")[-1], "possibly has hiearchy")`
working album fetching 2019-01-14 17:34:19 +00:00
download all collections in category 2019-01-14 18:31:14 +00:00			`def downloadSong(song_url, category_name):`
working album fetching 2019-01-14 17:34:19 +00:00			`(collection_name, outFile) = getSongFromURL(song_url);`
download all collections in category 2019-01-14 18:31:14 +00:00			`outDir = base_out_url + category_name + "/" + collection_name`
create song downloader 2019-01-14 16:03:40 +00:00
			`if not os.path.exists(outDir):`
			`os.makedirs(outDir)`

don't download if file is already there 2019-01-14 17:47:03 +00:00			`if not os.path.isfile(os.path.join(outDir, outFile)):`
			`i = requests.get(song_url)`
create song downloader 2019-01-14 16:03:40 +00:00
don't download if file is already there 2019-01-14 17:47:03 +00:00			`with open(os.path.join(outDir, outFile), 'wb') as tempFile:`
			`tempFile.write(i.content)`
			`else:`
			`print("File", outFile, "already exists... skipping")`
download all collections in category 2019-01-14 18:31:14 +00:00			`print("")`
create song downloader 2019-01-14 16:03:40 +00:00
working album fetching 2019-01-14 17:34:19 +00:00			`def getSongFromURL(song_url):`
			`list = song_url.split("/")`
			`return (list[4], list[-1])`

			`def f(d):`
			`return d.find_element_by_class_name("listContainer")`