首页 > 编程语言 > 详细

python批量获取公众号图片生成PDF文件

时间:2021-02-04 12:25:47      阅读:45      评论:0      收藏:0      [点我收藏+]

一、获取公众号图片

需要安装的包
1、pip install bs4
2、pip install requests
 1 #!/usr/bin/env python
 2 # -*- coding: utf-8 -*-
 3 # Author: KaiSun
 4 
 5 import requests
 6 from bs4 import BeautifulSoup
 7 import re
 8 import os
 9 
10 #获取网页信息
11 def getHTMLText(url):
12     try:
13         r=requests.get(url,timeout=30)
14         r.raise_for_status()
15         r.encoding=r.apparent_encoding
16         return r.text
17     except:
18         return ""
19 
20 #解析网页,获取所有图片url
21 def getimgURL(html):
22     soup = BeautifulSoup(html , "html.parser")
23     adlist=[]
24     for i in soup.find_all("img"):
25         try:
26             ad= re.findall(r.*src="(.*?)?" .*,str(i))
27             if ad :
28                 adlist.append(ad)
29         except:
30             continue
31     return adlist
32 
33 #新建文件夹pic,下载并保存爬取的图片信息
34 def download(adlist):
35     #注意更改文件目录
36     root="/Users/sunkai/study_way/爬虫/picture/"
37     for i in range(len(adlist)):
38         path=root+str(i)+"."+png
39         if not os.path.exists(root):
40             os.mkdir(root)
41         if not os.path.exists(path):
42             if adlist[i][0]:
43                 r=requests.get(adlist[i][0])
44                 with open(path,wb) as f:
45                     f.write(r.content)
46                     f.close()
47 
48 def main():
49     url = https://mp.weixin.qq.com/s/Jy5bUXb4aOmzEoPe6WODJA
50     html=getHTMLText(url)
51     list=getimgURL(html)
52     download(list)
53 main()

二、生成PDF文件

 1 #!/usr/bin/env python
 2 # -*- coding: utf-8 -*-
 3 # Author: KaiSun
 4 
 5 
 6 #  简单生成方式
 7 import os
 8 
 9 from reportlab.lib.pagesizes import A4, portrait, landscape
10 from reportlab.pdfgen import canvas
11 
12 def convert_images_to_pdf(img_path, pdf_path):
13     pages = 0
14     (w, h) = portrait(A4)
15     c = canvas.Canvas(pdf_path, pagesize = portrait(A4))
16     l = os.listdir(img_path)
17     l.sort(key= lambda x:int(x[:-4]))
18     for i in l:
19         f = img_path + os.sep + str(i)
20         c.drawImage(f, 0, 0, w, h)
21         c.showPage()
22         pages = pages + 1
23     c.save()
24 
25 convert_images_to_pdf(/Users/sunkai/study_way/爬虫/picture/,
26                       /Users/sunkai/study_way/爬虫/result.pdf)
27 
28 # 根据不同尺寸生成
29 import os, shutil
30 from PIL import Image
31 from reportlab.lib.pagesizes import A4, portrait, landscape
32 from reportlab.pdfgen import canvas
33 from PyPDF2 import PdfFileWriter, PdfFileReader
34 def convert_image_to_pdf(img_path, pdf_path):
35     img = Image.open(img_path)
36     (w0, h0) = img.size
37     print(w0, h0)
38     if w0 > h0:
39         (w, h) = landscape(A4)
40         c = canvas.Canvas(pdf_path, pagesize = landscape(A4))
41         c.drawImage(img_path, 0, 0, w, h)
42         c.showPage()
43         c.save()
44     else:
45         (w, h) = portrait(A4)
46         c = canvas.Canvas(pdf_path, pagesize = portrait(A4))
47         c.drawImage(img_path, 0, 0, w, h)
48         c.showPage()
49         c.save()
50 
51 def convert_images_to_pdf(img_path, pdf_path):
52     pages = 0
53     tmp_path = . + os.sep + temp
54     if not os.path.exists(tmp_path):
55         os.mkdir(tmp_path)
56     list = os.listdir(img_path)
57     list.sort(key=lambda x:int(x[:-4]))
58     output = PdfFileWriter()
59     for item in list:
60         img = img_path + os.sep + str(item)
61         pdf = tmp_path + os.sep + str(pages + 1) + ".pdf"
62         convert_image_to_pdf(img, pdf)
63         input = PdfFileReader(open(pdf, "rb"))
64         pageCount = input.getNumPages()
65         pages = pages + 1
66         for iPage in range(0, pageCount):
67             output.addPage(input.getPage(iPage))
68     outputStream = open(pdf_path, "wb")
69     output.write(outputStream)
70     outputStream.close()
71     shutil.rmtree(tmp_path)
72 
73 
74 convert_images_to_pdf(/Users/sunkai/study_way/爬虫/picture/,
75                       /Users/sunkai/study_way/爬虫/result.pdf)

 

python批量获取公众号图片生成PDF文件

原文:https://www.cnblogs.com/sunkai1993/p/14371551.html

(0)
(0)
   
举报
评论 一句话评论(0
关于我们 - 联系我们 - 留言反馈 - 联系我们:wmxa8@hotmail.com
© 2014 bubuko.com 版权所有
打开技术之扣,分享程序人生!