首页 > 其他 > 详细

numpy+matplotlib+bs4的使用

时间:2021-02-18 23:22:59      阅读:46      评论:0      收藏:0      [点我收藏+]

1、首先安装库

安装pip install  BeautifulSoup

安装pip install  numpy

安装pip install  matplotlib

2、爬取指定网页数据,然后使用numpy和pandas对数据进行处理,最后使用 matplotlib 进行显示

import requests
from bs4 import BeautifulSoup
import numpy
import pandas
from matplotlib import pyplot


def getMessage(code, startdate, enddate):
    """获取网页内容组成数组返回"""
    url = "http://data.funds.hexun.com/outxml/detail/openfundnetvalue.aspx"
    data = dict(fundcode=code, startdate=startdate, enddate=enddate)
    header = {"Connection": "keep-alive",
              "Pragma": "no-cache",
              "Cache-Control": "no-cache",
              "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36",
              "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
              "Accept-Encoding": "gzip, deflate",
              "Accept-Language": zh-CN}
    result = requests.get(url, data, headers=header).text
    soup = BeautifulSoup(result, html.parser)
    print(soup.prettify())  # 爬取内容后格式输出

    hearder = ["日期", "单位净值", "历史净值", "总市值"]
    datas = soup.find_all(data)  # 查找data标签内容
    datalist = []
    for data in datas:
        # 此处需要为空验证,标签内容可能为空
        fld_enddate = data.find_all(fld_enddate)[0].contents[0] if data.find_all(fld_enddate)[
            0].contents else numpy.nan
        fld_unitnetvalue = data.find_all(fld_unitnetvalue)[0].contents[0] if data.find_all(fld_unitnetvalue)[
            0].contents else numpy.nan
        fld_netvalue = data.find_all(fld_netvalue)[0].contents[0] if data.find_all(fld_netvalue)[
            0].contents else numpy.nan
        fld_newprice = data.find_all(fld_newprice)[0].contents[0] if data.find_all(fld_newprice)[
            0].contents else numpy.nan
        d = [fld_enddate, fld_unitnetvalue, fld_netvalue, fld_newprice]
        datalist.append(d)
        data = numpy.array(datalist)  # 创建一个numpy对象
    datas = pandas.DataFrame()  # 创建一个DataFrame对象
    for col, col_name in enumerate(hearder):  # 表示生成一个enumerate数组对应
        datas[col_name] = data[:, col]
    print(datas)
    return datas


def createMap(data):
    from matplotlib.font_manager import FontProperties
    font_set = FontProperties(fname=r"c:\windows\fonts\simsun.ttc", size=12)  # 解决图像中文乱码问题
    # 设置pyplot相关属性
    pyplot.title("历史净值变化", fontproperties=font_set)
    pyplot.xlabel("日期", fontproperties=font_set)
    pyplot.ylabel("单位净值", fontproperties=font_set)
    pyplot.plot(pandas.to_datetime(data[日期], format=%Y/%m/%d), data["单位净值"].astype(float))
    pyplot.show()


if __name__ == __main__:
    reuslt = getMessage(code="159915", startdate=2019-11-21, enddate=2019-12-08)
    createMap(reuslt)

 

最后效果是这样

技术分享图片

numpy+matplotlib+bs4的使用

原文:https://www.cnblogs.com/hufengTE/p/14413104.html

(0)
(0)
   
举报
评论 一句话评论(0
关于我们 - 联系我们 - 留言反馈 - 联系我们:wmxa8@hotmail.com
© 2014 bubuko.com 版权所有
打开技术之扣,分享程序人生!