一、获取公众号图片
需要安装的包 1、pip install bs4 2、pip install requests
1 #!/usr/bin/env python 2 # -*- coding: utf-8 -*- 3 # Author: KaiSun 4 5 import requests 6 from bs4 import BeautifulSoup 7 import re 8 import os 9 10 #获取网页信息 11 def getHTMLText(url): 12 try: 13 r=requests.get(url,timeout=30) 14 r.raise_for_status() 15 r.encoding=r.apparent_encoding 16 return r.text 17 except: 18 return "" 19 20 #解析网页,获取所有图片url 21 def getimgURL(html): 22 soup = BeautifulSoup(html , "html.parser") 23 adlist=[] 24 for i in soup.find_all("img"): 25 try: 26 ad= re.findall(r‘.*src="(.*?)?" .*‘,str(i)) 27 if ad : 28 adlist.append(ad) 29 except: 30 continue 31 return adlist 32 33 #新建文件夹pic,下载并保存爬取的图片信息 34 def download(adlist): 35 #注意更改文件目录 36 root="/Users/sunkai/study_way/爬虫/picture/" 37 for i in range(len(adlist)): 38 path=root+str(i)+"."+‘png‘ 39 if not os.path.exists(root): 40 os.mkdir(root) 41 if not os.path.exists(path): 42 if adlist[i][0]: 43 r=requests.get(adlist[i][0]) 44 with open(path,‘wb‘) as f: 45 f.write(r.content) 46 f.close() 47 48 def main(): 49 url = ‘https://mp.weixin.qq.com/s/Jy5bUXb4aOmzEoPe6WODJA‘ 50 html=getHTMLText(url) 51 list=getimgURL(html) 52 download(list) 53 main()
二、生成PDF文件
1 #!/usr/bin/env python 2 # -*- coding: utf-8 -*- 3 # Author: KaiSun 4 5 6 # 简单生成方式 7 import os 8 9 from reportlab.lib.pagesizes import A4, portrait, landscape 10 from reportlab.pdfgen import canvas 11 12 def convert_images_to_pdf(img_path, pdf_path): 13 pages = 0 14 (w, h) = portrait(A4) 15 c = canvas.Canvas(pdf_path, pagesize = portrait(A4)) 16 l = os.listdir(img_path) 17 l.sort(key= lambda x:int(x[:-4])) 18 for i in l: 19 f = img_path + os.sep + str(i) 20 c.drawImage(f, 0, 0, w, h) 21 c.showPage() 22 pages = pages + 1 23 c.save() 24 25 convert_images_to_pdf(‘/Users/sunkai/study_way/爬虫/picture/‘, 26 ‘/Users/sunkai/study_way/爬虫/result.pdf‘) 27 28 # 根据不同尺寸生成 29 import os, shutil 30 from PIL import Image 31 from reportlab.lib.pagesizes import A4, portrait, landscape 32 from reportlab.pdfgen import canvas 33 from PyPDF2 import PdfFileWriter, PdfFileReader 34 def convert_image_to_pdf(img_path, pdf_path): 35 img = Image.open(img_path) 36 (w0, h0) = img.size 37 print(w0, h0) 38 if w0 > h0: 39 (w, h) = landscape(A4) 40 c = canvas.Canvas(pdf_path, pagesize = landscape(A4)) 41 c.drawImage(img_path, 0, 0, w, h) 42 c.showPage() 43 c.save() 44 else: 45 (w, h) = portrait(A4) 46 c = canvas.Canvas(pdf_path, pagesize = portrait(A4)) 47 c.drawImage(img_path, 0, 0, w, h) 48 c.showPage() 49 c.save() 50 51 def convert_images_to_pdf(img_path, pdf_path): 52 pages = 0 53 tmp_path = ‘.‘ + os.sep + ‘temp‘ 54 if not os.path.exists(tmp_path): 55 os.mkdir(tmp_path) 56 list = os.listdir(img_path) 57 list.sort(key=lambda x:int(x[:-4])) 58 output = PdfFileWriter() 59 for item in list: 60 img = img_path + os.sep + str(item) 61 pdf = tmp_path + os.sep + str(pages + 1) + ".pdf" 62 convert_image_to_pdf(img, pdf) 63 input = PdfFileReader(open(pdf, "rb")) 64 pageCount = input.getNumPages() 65 pages = pages + 1 66 for iPage in range(0, pageCount): 67 output.addPage(input.getPage(iPage)) 68 outputStream = open(pdf_path, "wb") 69 output.write(outputStream) 70 outputStream.close() 71 shutil.rmtree(tmp_path) 72 73 74 convert_images_to_pdf(‘/Users/sunkai/study_way/爬虫/picture/‘, 75 ‘/Users/sunkai/study_way/爬虫/result.pdf‘)
原文:https://www.cnblogs.com/sunkai1993/p/14371551.html