diff --git a/main.py b/main.py new file mode 100644 index 0000000..582d215 --- /dev/null +++ b/main.py @@ -0,0 +1,110 @@ +import requests +import json +import os +from bs4 import BeautifulSoup +from PIL import Image +from fpdf import FPDF + +baseurl = "https://bvtlab.com" +phpsessid = "lps4od32b4kgibtgd1440df7i5" +sid = "b88e922dd251e1f6" +time = "6535904988" + +reqcookies = dict(PHPSESSID=phpsessid) + +def getImageName(imageURL): + result = "" + for char in imageURL: + result += char + if char == "/": + result = "" + + return result + +def downloadImage(imageURL, outDir, outFile): + i = requests.get(imageURL) + + if not os.path.exists(outDir): + os.makedirs(outDir) + + with open(os.path.join(outDir, outFile), 'wb') as tempFile: + tempFile.write(i.content) + +def getSegments(): + payload = { + "action": "getsegments", + "sid": sid, + } + + r = requests.get(baseurl + "/ebook.php", params=payload, cookies=reqcookies) + items = r.json() + + result = [] + + for i in range(0, len(items)): + if items[i]["Section"] == None: + #print(i+1, items[i]["Title"], items[i]["Short_Title"]) + result.append([i+1, items[i]["Level1"], items[i]["Segment_Depth"], items[i]["Segment_Level"], items[i]["Line_Item"], items[i]["Short_Title"], items[i]["Title"]]) + + return result + +def getPages(seg, lev, seg_dep, seg_lev, li): + payload = { + "action": "getpages", + "sid": sid, + "segment": seg, + "level": lev, + "segment_depth": seg_dep, + "segment_level": seg_lev, + "line_item": li, + "width": 550 + } + + r = requests.get(baseurl + "/ebook.php", params=payload, cookies=reqcookies) + soup = BeautifulSoup(r.text, 'html.parser') + + return soup.find_all("div") + +def ripPages(divlist, dir, pdf): + for d in divlist: + pagenum = d["data-page"] + imgs = d.find_all("img") + + print(pagenum, "----", d) + + width = 1100 + height = 1491 + + stichedIm = Image.new('RGB', (width, height)) + + ih = 0 + + for im in imgs: + print(im) + imurl = im["data-src"] + downloadImage(imurl, dir + "/page_" + pagenum, im["id"] + ".jpg") + + im = Image.open(os.path.join(dir + "/page_" + pagenum, im["id"] + ".jpg")) + print(im.size) + #height += im.size[1] + + stichedIm.paste(im, (0, im.size[1] * ih)) + + ih += 1 + + stichedIm.save(dir + "/page_"+ pagenum + ".jpg") + pdf.add_page() + pdf.image(dir + "/page_"+ pagenum + ".jpg", 0, 0) + print("the height:", width, height) + + +chapters = getSegments() + +pdf = FPDF(unit = "pt", format = [1100, 1491]) + +for x in range(0, len(chapters)): + s = getPages(chapters[x][0], chapters[x][1], chapters[x][2], chapters[x][3], chapters[x][4]) + outdir = chapters[x][5] + "_" + chapters[x][6] + ripPages(s, outdir, pdf) + +pdf.output("ebook.pdf", "F") diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..d481511 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +beautifulsoup4==4.7.1 +soupsieve==1.7.1