Compare commits

..

No commits in common. "4bebd4b7b4207beb241fbfb10602b9167001507b" and "4002b3d2fcb29505fdd3ccb12fb58ab19435c59a" have entirely different histories.

6 changed files with 0 additions and 207 deletions

4
.gitignore vendored
View File

@ -1,5 +1 @@
env/
files/
__pycache__/
*.tar.gz
*.log

Binary file not shown.

View File

@ -1,6 +0,0 @@
from pir_connector import *
# getCollection("https://www.partnersinrhyme.com/royaltyfreemusic/Corporate-Music-and-Motivational-Music/happymusic")
# getCollection("https://www.partnersinrhyme.com/royaltyfreemusic/Corporate-Music-and-Motivational-Music/Corporate-Grooves-Vol-2")
getCategory("https://www.partnersinrhyme.com/royaltyfreemusic/Corporate-Music-and-Motivational-Music")

View File

@ -1,108 +0,0 @@
import requests
import json
import os
from bs4 import BeautifulSoup
baseurl = "https://bvtlab.com"
phpsessid = "lps4od32b4kgibtgd1440df7i5"
sid = "b88e922dd251e1f6"
time = "6535904988"
reqcookies = dict(PHPSESSID=phpsessid)
def getImageName(imageURL):
result = ""
for char in imageURL:
result += char
if char == "/":
result = ""
return result
def downloadImage(imageURL, outDir, outFile):
i = requests.get(imageURL)
if not os.path.exists(outDir):
os.makedirs(outDir)
with open(os.path.join(outDir, outFile), 'wb') as tempFile:
tempFile.write(i.content)
def getSegments():
payload = {
"action": "getsegments",
"sid": sid,
}
r = requests.get(baseurl + "/ebook.php", params=payload, cookies=reqcookies)
items = r.json()
result = []
for i in range(0, len(items)):
if items[i]["Section"] == None:
#print(i+1, items[i]["Title"], items[i]["Short_Title"])
result.append([i+1, items[i]["Level1"], items[i]["Segment_Depth"], items[i]["Segment_Level"], items[i]["Line_Item"], items[i]["Short_Title"], items[i]["Title"]])
return result
def getPages(seg, lev, seg_dep, seg_lev, li):
payload = {
"action": "getpages",
"sid": sid,
"segment": seg,
"level": lev,
"segment_depth": seg_dep,
"segment_level": seg_lev,
"line_item": li,
"width": 550
}
r = requests.get(baseurl + "/ebook.php", params=payload, cookies=reqcookies)
soup = BeautifulSoup(r.text, 'html.parser')
return soup.find_all("div")
def ripPages(divlist, dir, pdf):
for d in divlist:
pagenum = d["data-page"]
imgs = d.find_all("img")
print(pagenum, "----", d)
width = 1100
height = 1491
stichedIm = Image.new('RGB', (width, height))
ih = 0
for im in imgs:
print(im)
imurl = im["data-src"]
downloadImage(imurl, dir + "/page_" + pagenum, im["id"] + ".jpg")
im = Image.open(os.path.join(dir + "/page_" + pagenum, im["id"] + ".jpg"))
print(im.size)
#height += im.size[1]
stichedIm.paste(im, (0, im.size[1] * ih))
ih += 1
stichedIm.save(dir + "/page_"+ pagenum + ".jpg")
pdf.add_page()
pdf.image(dir + "/page_"+ pagenum + ".jpg", 0, 0)
print("the height:", width, height)
chapters = getSegments()
pdf = FPDF(unit = "pt", format = [1100, 1491])
for x in range(0, len(chapters)):
s = getPages(chapters[x][0], chapters[x][1], chapters[x][2], chapters[x][3], chapters[x][4])
outdir = chapters[x][5] + "_" + chapters[x][6]
ripPages(s, outdir, pdf)
pdf.output("ebook.pdf", "F")

View File

@ -1,81 +0,0 @@
import requests
import json
import os
import re
import time
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support.expected_conditions import presence_of_element_located
from selenium.webdriver.firefox.options import Options
options = Options()
options.headless = True
base_url = "https://www.partnersinrhyme.com/royaltyfreemusic"
base_files_url = "https://www.partnersinrhyme.com/files/"
base_out_url = "files/PartnersInRhyme/"
def getCategory(category_url):
r = requests.get(category_url)
soup = BeautifulSoup(r.text, 'html.parser')
category_name = category_url.split("/")[-1]
links = set();
for li in soup.find_all(href=re.compile(category_name+ "/")):
links.add(li['href'])
for lk in links:
print(lk)
getCollection(lk, category_name)
def getCollection(collection_url, category_name):
r = requests.get(collection_url)
soup = BeautifulSoup(r.text, 'html.parser')
player = soup.iframe["src"];
p = requests.get("http:" + player);
with webdriver.Firefox(options=options) as driver:
driver.get("http:" + player)
el = WebDriverWait(driver, 60).until(f)
time.sleep(2)
psoup = BeautifulSoup(driver.page_source, 'html.parser')
# print(psoup)
driver.quit()
for li in psoup.find_all("li"):
try:
print("downloading...", li.attrs['data-mp3'])
downloadSong(base_files_url + li.attrs['data-mp3'], category_name)
except KeyError:
print("Could not load", collection_url.split("/")[-1], "possibly has hiearchy")
def downloadSong(song_url, category_name):
(collection_name, outFile) = getSongFromURL(song_url);
outDir = base_out_url + category_name + "/" + collection_name
if not os.path.exists(outDir):
os.makedirs(outDir)
if not os.path.isfile(os.path.join(outDir, outFile)):
i = requests.get(song_url)
with open(os.path.join(outDir, outFile), 'wb') as tempFile:
tempFile.write(i.content)
else:
print("File", outFile, "already exists... skipping")
print("")
def getSongFromURL(song_url):
list = song_url.split("/")
return (list[4], list[-1])
def f(d):
return d.find_element_by_class_name("listContainer")

View File

@ -1,8 +0,0 @@
beautifulsoup4==4.7.1
certifi==2018.11.29
chardet==3.0.4
idna==2.8
requests==2.21.0
selenium==3.141.0
soupsieve==1.7.1
urllib3==1.24.1