import requests import json import os from bs4 import BeautifulSoup baseurl = "https://bvtlab.com" phpsessid = "lps4od32b4kgibtgd1440df7i5" sid = "b88e922dd251e1f6" time = "6535904988" reqcookies = dict(PHPSESSID=phpsessid) def getImageName(imageURL): result = "" for char in imageURL: result += char if char == "/": result = "" return result def downloadImage(imageURL, outDir, outFile): i = requests.get(imageURL) if not os.path.exists(outDir): os.makedirs(outDir) with open(os.path.join(outDir, outFile), 'wb') as tempFile: tempFile.write(i.content) def getSegments(): payload = { "action": "getsegments", "sid": sid, } r = requests.get(baseurl + "/ebook.php", params=payload, cookies=reqcookies) items = r.json() result = [] for i in range(0, len(items)): if items[i]["Section"] == None: #print(i+1, items[i]["Title"], items[i]["Short_Title"]) result.append([i+1, items[i]["Level1"], items[i]["Segment_Depth"], items[i]["Segment_Level"], items[i]["Line_Item"], items[i]["Short_Title"], items[i]["Title"]]) return result def getPages(seg, lev, seg_dep, seg_lev, li): payload = { "action": "getpages", "sid": sid, "segment": seg, "level": lev, "segment_depth": seg_dep, "segment_level": seg_lev, "line_item": li, "width": 550 } r = requests.get(baseurl + "/ebook.php", params=payload, cookies=reqcookies) soup = BeautifulSoup(r.text, 'html.parser') return soup.find_all("div") def ripPages(divlist, dir, pdf): for d in divlist: pagenum = d["data-page"] imgs = d.find_all("img") print(pagenum, "----", d) width = 1100 height = 1491 stichedIm = Image.new('RGB', (width, height)) ih = 0 for im in imgs: print(im) imurl = im["data-src"] downloadImage(imurl, dir + "/page_" + pagenum, im["id"] + ".jpg") im = Image.open(os.path.join(dir + "/page_" + pagenum, im["id"] + ".jpg")) print(im.size) #height += im.size[1] stichedIm.paste(im, (0, im.size[1] * ih)) ih += 1 stichedIm.save(dir + "/page_"+ pagenum + ".jpg") pdf.add_page() pdf.image(dir + "/page_"+ pagenum + ".jpg", 0, 0) print("the height:", width, height) chapters = getSegments() pdf = FPDF(unit = "pt", format = [1100, 1491]) for x in range(0, len(chapters)): s = getPages(chapters[x][0], chapters[x][1], chapters[x][2], chapters[x][3], chapters[x][4]) outdir = chapters[x][5] + "_" + chapters[x][6] ripPages(s, outdir, pdf) pdf.output("ebook.pdf", "F")