Compare commits
No commits in common. "4bebd4b7b4207beb241fbfb10602b9167001507b" and "4002b3d2fcb29505fdd3ccb12fb58ab19435c59a" have entirely different histories.
4bebd4b7b4
...
4002b3d2fc
|
@ -1,5 +1 @@
|
|||
env/
|
||||
files/
|
||||
__pycache__/
|
||||
*.tar.gz
|
||||
*.log
|
||||
|
|
BIN
geckodriver
BIN
geckodriver
Binary file not shown.
6
main.py
6
main.py
|
@ -1,6 +0,0 @@
|
|||
from pir_connector import *
|
||||
|
||||
# getCollection("https://www.partnersinrhyme.com/royaltyfreemusic/Corporate-Music-and-Motivational-Music/happymusic")
|
||||
# getCollection("https://www.partnersinrhyme.com/royaltyfreemusic/Corporate-Music-and-Motivational-Music/Corporate-Grooves-Vol-2")
|
||||
|
||||
getCategory("https://www.partnersinrhyme.com/royaltyfreemusic/Corporate-Music-and-Motivational-Music")
|
108
main_example.py
108
main_example.py
|
@ -1,108 +0,0 @@
|
|||
import requests
|
||||
import json
|
||||
import os
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
baseurl = "https://bvtlab.com"
|
||||
phpsessid = "lps4od32b4kgibtgd1440df7i5"
|
||||
sid = "b88e922dd251e1f6"
|
||||
time = "6535904988"
|
||||
|
||||
reqcookies = dict(PHPSESSID=phpsessid)
|
||||
|
||||
def getImageName(imageURL):
|
||||
result = ""
|
||||
for char in imageURL:
|
||||
result += char
|
||||
if char == "/":
|
||||
result = ""
|
||||
|
||||
return result
|
||||
|
||||
def downloadImage(imageURL, outDir, outFile):
|
||||
i = requests.get(imageURL)
|
||||
|
||||
if not os.path.exists(outDir):
|
||||
os.makedirs(outDir)
|
||||
|
||||
with open(os.path.join(outDir, outFile), 'wb') as tempFile:
|
||||
tempFile.write(i.content)
|
||||
|
||||
def getSegments():
|
||||
payload = {
|
||||
"action": "getsegments",
|
||||
"sid": sid,
|
||||
}
|
||||
|
||||
r = requests.get(baseurl + "/ebook.php", params=payload, cookies=reqcookies)
|
||||
items = r.json()
|
||||
|
||||
result = []
|
||||
|
||||
for i in range(0, len(items)):
|
||||
if items[i]["Section"] == None:
|
||||
#print(i+1, items[i]["Title"], items[i]["Short_Title"])
|
||||
result.append([i+1, items[i]["Level1"], items[i]["Segment_Depth"], items[i]["Segment_Level"], items[i]["Line_Item"], items[i]["Short_Title"], items[i]["Title"]])
|
||||
|
||||
return result
|
||||
|
||||
def getPages(seg, lev, seg_dep, seg_lev, li):
|
||||
payload = {
|
||||
"action": "getpages",
|
||||
"sid": sid,
|
||||
"segment": seg,
|
||||
"level": lev,
|
||||
"segment_depth": seg_dep,
|
||||
"segment_level": seg_lev,
|
||||
"line_item": li,
|
||||
"width": 550
|
||||
}
|
||||
|
||||
r = requests.get(baseurl + "/ebook.php", params=payload, cookies=reqcookies)
|
||||
soup = BeautifulSoup(r.text, 'html.parser')
|
||||
|
||||
return soup.find_all("div")
|
||||
|
||||
def ripPages(divlist, dir, pdf):
|
||||
for d in divlist:
|
||||
pagenum = d["data-page"]
|
||||
imgs = d.find_all("img")
|
||||
|
||||
print(pagenum, "----", d)
|
||||
|
||||
width = 1100
|
||||
height = 1491
|
||||
|
||||
stichedIm = Image.new('RGB', (width, height))
|
||||
|
||||
ih = 0
|
||||
|
||||
for im in imgs:
|
||||
print(im)
|
||||
imurl = im["data-src"]
|
||||
downloadImage(imurl, dir + "/page_" + pagenum, im["id"] + ".jpg")
|
||||
|
||||
im = Image.open(os.path.join(dir + "/page_" + pagenum, im["id"] + ".jpg"))
|
||||
print(im.size)
|
||||
#height += im.size[1]
|
||||
|
||||
stichedIm.paste(im, (0, im.size[1] * ih))
|
||||
|
||||
ih += 1
|
||||
|
||||
stichedIm.save(dir + "/page_"+ pagenum + ".jpg")
|
||||
pdf.add_page()
|
||||
pdf.image(dir + "/page_"+ pagenum + ".jpg", 0, 0)
|
||||
print("the height:", width, height)
|
||||
|
||||
|
||||
chapters = getSegments()
|
||||
|
||||
pdf = FPDF(unit = "pt", format = [1100, 1491])
|
||||
|
||||
for x in range(0, len(chapters)):
|
||||
s = getPages(chapters[x][0], chapters[x][1], chapters[x][2], chapters[x][3], chapters[x][4])
|
||||
outdir = chapters[x][5] + "_" + chapters[x][6]
|
||||
ripPages(s, outdir, pdf)
|
||||
|
||||
pdf.output("ebook.pdf", "F")
|
|
@ -1,81 +0,0 @@
|
|||
import requests
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
from bs4 import BeautifulSoup
|
||||
from selenium import webdriver
|
||||
from selenium.webdriver.common.by import By
|
||||
from selenium.webdriver.common.keys import Keys
|
||||
from selenium.webdriver.support.ui import WebDriverWait
|
||||
from selenium.webdriver.support.expected_conditions import presence_of_element_located
|
||||
from selenium.webdriver.firefox.options import Options
|
||||
|
||||
options = Options()
|
||||
options.headless = True
|
||||
|
||||
base_url = "https://www.partnersinrhyme.com/royaltyfreemusic"
|
||||
base_files_url = "https://www.partnersinrhyme.com/files/"
|
||||
base_out_url = "files/PartnersInRhyme/"
|
||||
|
||||
def getCategory(category_url):
|
||||
r = requests.get(category_url)
|
||||
soup = BeautifulSoup(r.text, 'html.parser')
|
||||
|
||||
category_name = category_url.split("/")[-1]
|
||||
|
||||
links = set();
|
||||
|
||||
for li in soup.find_all(href=re.compile(category_name+ "/")):
|
||||
links.add(li['href'])
|
||||
|
||||
for lk in links:
|
||||
print(lk)
|
||||
getCollection(lk, category_name)
|
||||
|
||||
def getCollection(collection_url, category_name):
|
||||
r = requests.get(collection_url)
|
||||
soup = BeautifulSoup(r.text, 'html.parser')
|
||||
player = soup.iframe["src"];
|
||||
|
||||
p = requests.get("http:" + player);
|
||||
|
||||
with webdriver.Firefox(options=options) as driver:
|
||||
driver.get("http:" + player)
|
||||
el = WebDriverWait(driver, 60).until(f)
|
||||
|
||||
time.sleep(2)
|
||||
psoup = BeautifulSoup(driver.page_source, 'html.parser')
|
||||
|
||||
# print(psoup)
|
||||
driver.quit()
|
||||
|
||||
for li in psoup.find_all("li"):
|
||||
try:
|
||||
print("downloading...", li.attrs['data-mp3'])
|
||||
downloadSong(base_files_url + li.attrs['data-mp3'], category_name)
|
||||
except KeyError:
|
||||
print("Could not load", collection_url.split("/")[-1], "possibly has hiearchy")
|
||||
|
||||
def downloadSong(song_url, category_name):
|
||||
(collection_name, outFile) = getSongFromURL(song_url);
|
||||
outDir = base_out_url + category_name + "/" + collection_name
|
||||
|
||||
if not os.path.exists(outDir):
|
||||
os.makedirs(outDir)
|
||||
|
||||
if not os.path.isfile(os.path.join(outDir, outFile)):
|
||||
i = requests.get(song_url)
|
||||
|
||||
with open(os.path.join(outDir, outFile), 'wb') as tempFile:
|
||||
tempFile.write(i.content)
|
||||
else:
|
||||
print("File", outFile, "already exists... skipping")
|
||||
print("")
|
||||
|
||||
def getSongFromURL(song_url):
|
||||
list = song_url.split("/")
|
||||
return (list[4], list[-1])
|
||||
|
||||
def f(d):
|
||||
return d.find_element_by_class_name("listContainer")
|
|
@ -1,8 +0,0 @@
|
|||
beautifulsoup4==4.7.1
|
||||
certifi==2018.11.29
|
||||
chardet==3.0.4
|
||||
idna==2.8
|
||||
requests==2.21.0
|
||||
selenium==3.141.0
|
||||
soupsieve==1.7.1
|
||||
urllib3==1.24.1
|
Loading…
Reference in New Issue