1 #!/usr/local/bin/python 2 #coding=UTF-8 3 4 import os 5 import csv as csv 6 import re 7 import shutil 8 import os.path 9 import datetime 10 import time 12 import gzip 13 14 15 csv.field_size_limit(1000000) 16 17 18 19 20 21 22 # 输入: 23 # timestr:要创建的数据文件的时间 24 # granulityPeriods:时间粒度 25 # oridir:原始csv文件的存放目录 26 # 文件名为PM201310271604+080024A20131027.1545+0800-20131027.1600+0800_101_Carrier_-_1.xml 27 # outputdir:修改后csv文件的输出目录,格式示例为‘D:/OMC_DATA/HW/GSM/OMC1/TIL-HBSC-020/PM/DECODE/‘ 28 29 def ztetd15data(timestr,granulityPeriods,oridir,outputdir): 30 31 outpath = outputdir.strip() 32 33 if not os.path.exists(outpath): 34 os.makedirs(outpath) 35 36 37 38 createday = datetime.datetime.strptime(timestr, ‘%Y-%m-%d %H:%M:%S‘) 39 #createtime 40 createtime = createday.strftime(‘%Y%m%d%H%M‘) 41 delta = datetime.timedelta(minutes=granulityPeriods) 42 #createtime 43 #文件名开始时间 44 starttime = createday.strftime(‘%Y%m%d.%H%M‘) 45 #csv文件中collecttime:201204211100 46 47 48 createday = createday + delta 49 50 #文件名中endtime 51 52 endtime = createday.strftime(‘%Y%m%d.%H%M‘) 53 #csv文件名中endtime 54 modifycollecttime = createday.strftime(‘%Y%m%d%H%M‘) 55 56 newfilenametime = starttime+"+0800-"+endtime+"+0800" 57 58 59 60 find_file=re.compile(r".xml$") 61 for root ,dirs,files in os.walk(oridir): 62 63 for file in files: 64 if find_file.search(file): 65 filename = "%s"%(file) 66 inputpath = "%s"%(root+‘/‘+filename) 67 #inputpath = oridir+‘/‘+filename 68 print inputpath 69 #wangfanfan 70 print filename 71 fileprefix=filename[0:22] 72 73 filesuffix=filename[59:-4] 74 print fileprefix 75 print filesuffix 76 #wangfanfan 77 78 outfile = fileprefix+newfilenametime + filesuffix 79 print outfile 80 outputfile = outputdir+"/"+outfile+".xml" 81 print outputfile #复制文件 82 shutil.copyfile(inputpath, outputfile) 83 84 #将原始xml文件压缩后删除 85 f_in = open(outputfile,‘rb‘) 86 targetname=outputfile+".gz" 87 f_out = gzip.open(targetname,‘wb‘) 88 f_out.writelines(f_in) 89 f_out.close() 90 f_in.close() 91 92 os.remove(outputfile) 93 94 95 96 97 98 99 100 101 #读取配置的csv文件 102 def readpath(oripath): 103 104 pathlist = [] 105 106 fp = open(oripath,‘r‘) 107 for line in fp: 108 print line 109 line = line.strip(‘\n‘) 110 paths = [] 111 paths = line.split(‘,‘) 112 pathdic = {} 113 pathdic["ori"] = paths[0] 114 pathdic["target"] = paths[1] 115 if not os.path.exists(paths[1]): 116 os.makedirs(paths[1]) 117 pathlist.append(pathdic) 118 119 return pathlist 120 121 122 123 124 125 126 127 128 129 130 tdoripath = "/tomcat/***/CREATE_DATA/omcdatapathTDPM.csv" 131 132 133 134 def runBySystemTime(): 135 136 137 start = time.clock() 138 139 nowtime = time.strftime("%Y-%m-%d %H:%M:%S") 140 timestr = nowtime 141 142 #gsmpathlist = readpath(tdoripath) 143 tdpathlist = readpath(tdoripath) 144 145 granulityPeriods = 15 146 147 for j in range(len(tdpathlist)): 148 path ={} 149 path = tdpathlist[j] 150 ztetd15data(timestr,granulityPeriods,path.get("ori"),path.get("target")) 151 152 153 154 finish = time.clock() 155 156 print "finished,couse:" 157 print (finish-start) 158 #time.sleep(60*60) 159 160 161 162 def runByDuration(begintime,endtime,durantion): 163 164 165 print begintime 166 167 while begintime <= endtime: 168 169 timestr =begin_time; 170 171 pathlist = readpath(oripath) 172 173 for j in range(len(pathlist)): 174 path ={} 175 path = pathlist[j] 176 ztegsm60data(timestr,granulityPeriods,path.get("ori"),path.get("target")) 177 daytime = datetime.datetime.strptime(begintime, ‘%Y-%m-%d %H:%M:%S‘) 178 delta = datetime.timedelta(minutes=durantion) 179 daytime = daytime + delta 180 begintime = daytime.strftime(‘%Y-%m-%d %H:%M:%S‘) 181 else: 182 print "end of while"; 183 184 185 186 188 granulityPeriods = 60 189 #oripath = "C:/Users/Administrator/Desktop/python_wff/omcdatapathTDPM.csv" 190 #开始处理时间 191 begin_time = ‘2013-02-16 01:00:00‘ 192 #结束处理时间 193 end_time = ‘2013-02-17 01:00:00‘ 194 195 model = 2 196 197 if model == 1 : 198 runByDuration(begin_time,end_time,15) 199 elif model == 2 : 200 runBySystemTime() 201 else: 202 print "wrong args!" 203 204 读取csv文件的代码和上面类似。
1 def ztegsm60data(timestr,granulityPeriods,oridir,outputdir): 2 3 outpath = outputdir.strip() 4 5 if not os.path.exists(outpath): 6 os.makedirs(outpath) 7 8 9 10 createday = datetime.datetime.strptime(timestr, ‘%Y-%m-%d %H:%M:%S‘) 11 12 delta = datetime.timedelta(minutes=granulityPeriods) 13 14 #文件名开始时间 15 starttime = createday.strftime(‘%d%b%Y_%H%M‘) 16 #csv文件中collecttime:201204211100 17 18 19 createday = createday + delta 20 21 #文件名中endtime 22 endtime = createday.strftime(‘%d%b%Y_%H%M‘) 23 #csv文件名中endtime 24 modifycollecttime = createday.strftime(‘%Y%m%d%H%M‘) 25 26 newfilenametime = starttime+‘-‘+endtime 27 28 29 30 find_file=re.compile(r".csv$") 31 for root ,dirs,files in os.walk(oridir): 32 33 for file in files: 34 if find_file.search(file): 35 filename = "%s "%(file) 36 inputpath = "%s "%(root+‘/‘+filename) 37 38 #wangfanfan 39 fileprefix=filename[0:-34] 40 #wangfanfan 41 42 outfile = fileprefix+newfilenametime + ".csv" 43 44 outputfile = outpath+‘/‘+outfile 45 46 #写入第一行,字段名信息 47 reader = csv.reader(open(inputpath.strip(), ‘rb‘)) 48 header = reader.next() 49 print inputpath 50 51 writer = csv.writer(open(outputfile.strip(),‘wb‘)) 52 writer.writerow(header) 53 print outputfile 54 55 #修改时间相关的字段的内容 56 for row in reader: 57 row[0] = modifycollecttime 58 writer.writerow(row) 最难的是字符串到datetime的类型转换。还没有搞懂,只是在那试啊试啊,怎么走的通怎么走。
原文:http://www.cnblogs.com/wangfantasy/p/3540184.html