首页 > 其他 > 详细

获取一篇新闻的全部信息

时间:2019-04-01 19:37:28      阅读:144      评论:0      收藏:0      [点我收藏+]
import requests
from datetime import datetime
from bs4 import BeautifulSoup


def newsdt(shareinfo):
    newsDate = shareinfo.split()[0].split(:)[1]
    newsTime = shareinfo.split()[1]
    showtime = datetime.strptime(newsDate + " " + newsTime, "%Y-%m-%d %H:%M:%S")
    print("新闻发布时间:", end="")
    print(showtime)


click_url = http://oa.gzcc.cn/api.php?op=count&id=11086&modelid=80
def click(click_url):
    return_click_num = requests.get(click_url)
    click_info = BeautifulSoup(return_click_num.text, html.parser)
    click_num = int(click_info.text.split(.html)[3].split("")[1])
    print("点击次数:", end="")
    print(click_num)


html_url = http://news.gzcc.cn/html/2017/xiaoyuanxinwen_0922/8232.html
def anews(html_url):
    resourses = requests.get(html_url)
    resourses.encoding = UTF-8
    soup = BeautifulSoup(resourses.text, html.parser)


    print("\n新闻标题:" + soup.select(.show-title)[0].text)
    publishing_unit = soup.select(.show-info)[0].text.split()[4].split()[1]
    print("新闻发布单位:", end="")
    print(publishing_unit)
    print("作者:", end="")
    writer = soup.select(.show-info)[0].text.split()[2].split()[1]
    print(writer)
    print("新闻内容:" + soup.select(.show-content)[0].text.replace(\u3000, ‘‘))
    shareinfo = soup.select(.show-info)[0].text

    newsdt(shareinfo)
    click(click_url)

anews(html_url)

技术分享图片

获取一篇新闻的全部信息

原文:https://www.cnblogs.com/huangjianke123/p/10638290.html

(0)
(0)
   
举报
评论 一句话评论(0
关于我们 - 联系我们 - 留言反馈 - 联系我们:wmxa8@hotmail.com
© 2014 bubuko.com 版权所有
打开技术之扣,分享程序人生!