首页 > 编程语言 > 详细

python 爬取媒体文件(使用chrome代理,启动客户端,有防火墙)

时间:2019-05-11 00:12:15      阅读:135      评论:0      收藏:0      [点我收藏+]
#coding = utf-8
‘‘‘
中文转经纬度
‘‘‘
import time,json
import urllib.request
from selenium import webdriver
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np

AK =C2hKkyF9fHbmzESq6dmSArZIzw8wEiS1
table = pd.read_csv(./data/test.csv,encoding=utf-8)
outfp = open(./data/result_test.csv,w,encoding=utf-8)
class LoadData:
    def __init__(self):
        print("start")
        self.m_driver = webdriver.Chrome(D:\Program Files (x86)\ChromeDriver\chromedriver.exe)
        self.loc_result = []

    def get_uri(self, addr, city = ‘‘):
        # try:
        server  = http://api.map.baidu.com/geocoder/v2/?
        params = urllib.parse.urlencode({address:addr,city:city,ak:AK,output:json})
        self.m_driver.get(server+params)
        bs = BeautifulSoup(self.m_driver.page_source,lxml)
        # temp = bs.prefix
        result = json.loads(bs.pre.get_text())[result]

        location = result.get(location)
        if( location != None ):
            lng = location.get(lng)
            lat = location.get(lat)
        return lng,lat
        # except:
        #     print("error addr:",addr)
        #     return np.NAN,np.NAN

    def get_lng_lat(self, addr):
        lng,lat = self.get_uri(addr)
        if((lng == None) or (lat == None)):
            print("error")
        self.loc_result.append([addr,lng,lat])



    def main(self):
        addr_list = table[ADDRESS].tolist()

        [self.get_lng_lat(addr) for addr in addr_list]

        outfp.write(str(self.loc_result))

if __name__ == __main__:
    tStart = time.clock()

    LD = LoadData()
    LD.main()

    tEnd = time.clock()
    print("%s s"%(tEnd - tStart))

附录:

chromdriver.exe与chrome版本映射及下载链接

https://blog.csdn.net/mmayanshuo/article/details/78962398

 

python 爬取媒体文件(使用chrome代理,启动客户端,有防火墙)

原文:https://www.cnblogs.com/smuxiaolei/p/10847381.html

(0)
(0)
   
举报
评论 一句话评论(0
关于我们 - 联系我们 - 留言反馈 - 联系我们:wmxa8@hotmail.com
© 2014 bubuko.com 版权所有
打开技术之扣,分享程序人生!