add project

This commit is contained in:
Alexander Matson 2019-01-14 10:47:15 -05:00
parent 4002b3d2fc
commit 672e708d44
2 changed files with 112 additions and 0 deletions

110
main.py Normal file
View File

@ -0,0 +1,110 @@
import requests
import json
import os
from bs4 import BeautifulSoup
from PIL import Image
from fpdf import FPDF
baseurl = "https://bvtlab.com"
phpsessid = "lps4od32b4kgibtgd1440df7i5"
sid = "b88e922dd251e1f6"
time = "6535904988"
reqcookies = dict(PHPSESSID=phpsessid)
def getImageName(imageURL):
result = ""
for char in imageURL:
result += char
if char == "/":
result = ""
return result
def downloadImage(imageURL, outDir, outFile):
i = requests.get(imageURL)
if not os.path.exists(outDir):
os.makedirs(outDir)
with open(os.path.join(outDir, outFile), 'wb') as tempFile:
tempFile.write(i.content)
def getSegments():
payload = {
"action": "getsegments",
"sid": sid,
}
r = requests.get(baseurl + "/ebook.php", params=payload, cookies=reqcookies)
items = r.json()
result = []
for i in range(0, len(items)):
if items[i]["Section"] == None:
#print(i+1, items[i]["Title"], items[i]["Short_Title"])
result.append([i+1, items[i]["Level1"], items[i]["Segment_Depth"], items[i]["Segment_Level"], items[i]["Line_Item"], items[i]["Short_Title"], items[i]["Title"]])
return result
def getPages(seg, lev, seg_dep, seg_lev, li):
payload = {
"action": "getpages",
"sid": sid,
"segment": seg,
"level": lev,
"segment_depth": seg_dep,
"segment_level": seg_lev,
"line_item": li,
"width": 550
}
r = requests.get(baseurl + "/ebook.php", params=payload, cookies=reqcookies)
soup = BeautifulSoup(r.text, 'html.parser')
return soup.find_all("div")
def ripPages(divlist, dir, pdf):
for d in divlist:
pagenum = d["data-page"]
imgs = d.find_all("img")
print(pagenum, "----", d)
width = 1100
height = 1491
stichedIm = Image.new('RGB', (width, height))
ih = 0
for im in imgs:
print(im)
imurl = im["data-src"]
downloadImage(imurl, dir + "/page_" + pagenum, im["id"] + ".jpg")
im = Image.open(os.path.join(dir + "/page_" + pagenum, im["id"] + ".jpg"))
print(im.size)
#height += im.size[1]
stichedIm.paste(im, (0, im.size[1] * ih))
ih += 1
stichedIm.save(dir + "/page_"+ pagenum + ".jpg")
pdf.add_page()
pdf.image(dir + "/page_"+ pagenum + ".jpg", 0, 0)
print("the height:", width, height)
chapters = getSegments()
pdf = FPDF(unit = "pt", format = [1100, 1491])
for x in range(0, len(chapters)):
s = getPages(chapters[x][0], chapters[x][1], chapters[x][2], chapters[x][3], chapters[x][4])
outdir = chapters[x][5] + "_" + chapters[x][6]
ripPages(s, outdir, pdf)
pdf.output("ebook.pdf", "F")

2
requirements.txt Normal file
View File

@ -0,0 +1,2 @@
beautifulsoup4==4.7.1
soupsieve==1.7.1