#!/usr/bin/python
import solr
import re
import urllib
import pycurl
import cStringIO
import collections
import operator
import string
import json
import pysolr
def solr_update(value):
pass
def function(value):
value_1={}
keys=value.keys()
t1=value[keys[0]]
t2=keys[0]
for ii in range(len(keys)-1):
if t1 < value[keys[ii+1]]:
t1=value[keys[ii+1]]
t2=keys[ii+1]
del value[t2]
value_1[t2]=t1
return (value_1,value)
def dict_sort(value):
result={}
i=0
while True:
aaa=function(value)
i+=1
result[i]=aaa[0]
if len(aaa[1]) == 1:
result[i]=aaa[1]
break
return result
def solr_1(value):
test_1=‘http://10.67.15.63:8800/solr/collection1/select?q=userid:‘+value+‘&wt=json&indent=true‘
buf=cStringIO.StringIO()
c=pycurl.Curl()
c.setopt(c.URL,test_1)
c.setopt(c.WRITEFUNCTION,buf.write)
c.perform()
d=buf.getvalue()
e=d.split(‘\n‘)
for i in e:
if i.find(‘numFound‘) != -1:
num=i.split(":")[1].split(",")[0]
num_1=num.split(" ")
num_2=num_1[0].split("<")[0]
if value not in ak_list_time:
ak_list_time[num_2]=num_1[2].split("=")[1]
return ak_list_time
def solr_data(value):
test_2=‘http://10.67.15.63:8800/solr/collection1/select?q=userid:‘+value+‘&wt=json&indent=true‘
buf=cStringIO.StringIO()
c=pycurl.Curl()
c.setopt(c.URL,test_2)
c.setopt(c.WRITEFUNCTION,buf.write)
c.perform()
d=buf.getvalue()
e=json.loads(d)
solr_data=e[‘response‘][‘docs‘]
#print solr_data
return solr_data
#for item in e[‘response‘][‘docs‘]:
# print item[‘content‘]
if __name__ == ‘__main__‘:
ak_list_time={}
dict_1={}
list_1=[]
a=open(‘/home/mysql.txt‘,‘r‘)
b=a.readlines()
s=pysolr.Solr(‘http://10.13.144.225:8983/solr/‘)
for d in b:
k=solr_1(d)
for kk in k.keys():
dict_1[kk]=int(k[kk][1:-1])
dd=sorted(dict_1.iteritems(),key=operator.itemgetter(1))
print dd
for kkk in dd:
if kkk[1] != 0:
g=solr_data(kkk[0])
for item_2 in g:
del item_2[‘_version_‘]
s.add(g)
本文出自 “expect批量同步数据” 博客,请务必保留此出处http://4249964.blog.51cto.com/4239964/1536030
原文:http://4249964.blog.51cto.com/4239964/1536030