首页 > 其他 > 详细

你是啥成份?

时间:2018-09-26 01:07:09      阅读:57      评论:0      收藏:0      [点我收藏+]

标签:ttr   repo   tab   tor   while   res   resp   info   analyze   

各种编程语言我都很喜欢,但平时用的最多的是什么呢?

一个github小爬虫,获取全部repo及其主要语言,画出饼图。

技术分享图片

"""
你是什么成份?
"""

import requests
from pyquery import PyQuery as pq
import matplotlib.pyplot as plt
from collections import Counter
import numpy as np


def parse_page(url):
    print(url)
    resp = requests.get(url)
    html = pq(resp.text)
    repo_list = html("#user-repositories-list li")
    repos = []
    for i in range(repo_list.length):
        repo = repo_list.eq(i)
        it = dict()
        repo_name = repo('h3').text()
        repo_language = repo("[itemprop='programmingLanguage']").text()
        it['name'] = repo_name
        it['language'] = repo_language
        repos.append(it)
    sons = html(".pagination a").eq(0).attr('href')
    sons = [sons] if sons else []
    return repos, sons


def analyze(repos):
    # unique
    ma = dict([(i['name'], i) for i in repos])
    repos = ma.values()
    cnt = Counter([i['language'] for i in repos if i['language']])
    labels = cnt.keys()
    sizes = np.array(list(cnt.values()))
    explode = np.zeros_like(sizes, dtype=np.float32)  # 0.1表示将Hogs那一块凸显出来
    explode[np.argsort(sizes)[-3:].reshape(-1, 1)] = 0.1  # 前三名突出显示
    plt.pie(sizes, explode=explode, labels=labels, autopct='%1.1f%%', shadow=False, startangle=90)
    # startangle表示饼图的起始角度
    plt.show()


def schedule():
    user = "weiyinfu"
    q = []
    seed = "https://github.com/" + user + "?tab=repositories"
    q.append(seed)
    visited = set()
    repos = []
    while q:
        now = q.pop()
        repo_list, url_list = parse_page(now)
        for i in url_list:
            if i not in visited:
                q.append(i)
            visited.add(i)
        repos += repo_list

    return repos


def main():
    repos = schedule()
    print(repos)
    analyze(repos)


if __name__ == '__main__':
    main()

你是啥成份?

标签:ttr   repo   tab   tor   while   res   resp   info   analyze   

原文:https://www.cnblogs.com/weiyinfu/p/9704368.html

(0)
(0)
   
举报
评论 一句话评论(0
0条  
登录后才能评论!
© 2014 bubuko.com 版权所有 鲁ICP备09046678号-4
打开技术之扣,分享程序人生!
             

鲁公网安备 37021202000002号