首页 > 其他 > 详细

地址经纬度解析

时间:2021-03-13 11:49:10      阅读:17      评论:0      收藏:0      [点我收藏+]

python\小脚本\地址经纬度解析.py

# -*- coding: utf-8 -*-
"""
Created on Thu Nov 24 10:10:13 2016

@author: Acer
"""

# -*- coding: utf-8 -*-
#import sys, urllib, urllib2, json ,requests,os,uniout
import requests,os,json
import random
#import _uniout
import json
import pandas as pd
import pandas as pd
from pandas import DataFrame
import xlrd,openpyxl

##读取文件
import pandas as pd

##读取excel文档地址信息
def xlsx_read(path ,sheet_name):
    xlsx = pd.ExcelFile(path)
    sheet = xlsx.parse(sheet_name)
    return(sheet)
#sheet = xlsx_read()
##选取excel文档的需要地址信息
def sheet_values(data,address):
    data.columns=data.columns.str.strip()
    data.columns
    #sheet[u‘地址‘]=sheet[u‘收货地址‘].str.replace(" ","")
    data[address]=data[address].str.replace(" ","")
    #del data[u‘收货地址‘]
    #del sheet[u‘收货人姓名.1‘]
    location_data = data
    return(location_data)
#location_data =  sheet_values(sheet)


##地址转换成url地址
def url_add(location):
    import random 
    AK = random.sample(api_key, 1)
    key = ",".join(AK)
    url = ‘http://restapi.amap.com/v3/geocode/geo?key=‘ + key + ‘&address=‘ + location+‘&city=上海市‘
    return(url)
#location_data[‘url‘]=location_data[u‘地址‘].apply(url_add)

##调用API,返回信息,‘URLError: <urlopen error timed out> All times is failed ‘ 再次调用2次
def requests_get(url,n=2):
    import requests
    global Max_Num
    Max_Num = n
    for i in range(Max_Num):
        try:
            return(requests.get(url))
        except:
             if i < Max_Num - 1:
                 continue
             else:
                 pass

##results=pool.map(requests_get,sheet[‘url‘])
##多线程调用
def ThreadPool(list_url,n=4):
    from multiprocessing.dummy import Pool as ThreadPool
    pool = ThreadPool(n)
    results=pool.map(requests_get,list_url)
    return results
#函数实例化
#results=ThreadPool(location_data[‘url‘])

def url_data(results,df):

    n_ok=0
    n_false=0
    location_items = [‘formatted_address‘,‘location‘,‘province‘,‘city‘,‘district‘,‘street‘,‘number‘,‘lng‘,‘lat‘]
    temp = pd.DataFrame([],columns=location_items)
    for index,resq in enumerate(results):
        try:
            data=json.loads(resq.text)
            n_ok=n_ok+1
            print (str(n_ok)+‘正在解析ing...................‘)
            ‘‘‘
            上面有这里就只是显示下
            location_items = [‘formatted_address‘,‘location‘,‘province‘,‘city‘,‘district‘,‘street‘,‘number‘]
            ‘‘‘
            location_data = pd.DataFrame(data[u‘geocodes‘],columns=location_items)
            location_data[‘lng‘] = location_data[‘location‘][0].split(‘,‘)[0]  ##经度
            location_data[‘lat‘] = location_data[‘location‘][0].split(‘,‘)[1]  ##维度
            temp = pd.concat([temp,location_data])
            location_address_data = temp
            df.loc[index,‘lng‘] = location_data[‘location‘][0].split(‘,‘)[0]  ##经度
            df.loc[index,‘lat‘] = location_data[‘location‘][0].split(‘,‘)[1]  ##维度
        except:
            try:
                data=json.loads(results_station[index].text)
                n_ok=n_ok+1
                print (str(n_ok)+‘正在解析ing...................‘)
                ‘‘‘
                上面有这里就只是显示下
                location_items = [‘formatted_address‘,‘location‘,‘province‘,‘city‘,‘district‘,‘street‘,‘number‘]
                ‘‘‘
                location_data = pd.DataFrame(data[u‘geocodes‘],columns=location_items)
                location_data[‘lng‘] = location_data[‘location‘][0].split(‘,‘)[0]  ##经度
                location_data[‘lat‘] = location_data[‘location‘][0].split(‘,‘)[1]  ##维度
                temp = pd.concat([temp,location_data])
                location_address_data = temp
                df.loc[index,‘lng‘] = location_data[‘location‘][0].split(‘,‘)[0]  ##经度
                df.loc[index,‘lat‘] = location_data[‘location‘][0].split(‘,‘)[1]  ##维度
            except:
                n_false=n_false+1
                print (str(n_false)+‘解析错误‘)
                pass
    return(location_address_data)
##测试用例
#data = url_data(results)


api_key=[‘8191d1b4718e17d8b5b2e2b9a9f31bb0‘,‘09b7d72a3dc2bd30e86b23dc11b382fc‘,‘efe64265959124ade43857e06322577b‘,‘00210b231b1895ddfc190142ccbfda59‘]


path = r‘C:\Users\HP\Desktop\location_info_temp.xlsx‘
path = r‘C:\Users\HP\Desktop\食行生鲜站点信息-20181017.xlsx‘
#path = ‘C:\\Users\\Acer\\Desktop\\orders_location_1111.xlsx‘
#path = unicode(path,encoding=‘utf-8‘)

sheet_name = ‘Sheet1‘
sheet_name = ‘上海‘
address=u‘地址‘
address_station=u‘站点名称‘
address_area = ‘区域‘



data = xlsx_read(path,sheet_name)
location_data =  sheet_values(data,address)
#地址取不到,则模糊取站点名称
location_data[‘url‘]=(location_data[address_area]+location_data[address_station]).apply(url_add)

location_data[‘station_url‘]=(location_data[address_area]+location_data[address]).apply(url_add)
results=ThreadPool(location_data[‘url‘])
results_station=ThreadPool(location_data[‘station_url‘])


df=location_data
results_values = url_data(results,df)
results_values.head()


out_path = r‘C:\Users\HP\Desktop\new-ok食行生鲜站点信息-20181017.xlsx‘
writer = pd.ExcelWriter(out_path, engine=‘xlsxwriter‘)
df.to_excel(writer,‘new‘+sheet_name)
writer.save()


地址经纬度解析

原文:https://www.cnblogs.com/ministep/p/14527699.html

(0)
(0)
   
举报
评论 一句话评论(0
关于我们 - 联系我们 - 留言反馈 - 联系我们:wmxa8@hotmail.com
© 2014 bubuko.com 版权所有
打开技术之扣,分享程序人生!