基于python的三方库pandas的excel表二次开发
import numpy as np import pandas as pd import time from pandas import merge import openpyxl import datetime Date = input(‘请输入日期:‘) def html_to_excel(): with open(r‘C:\Users\g\Desktop\CUX:缺件统计表_160618.xls‘,‘rb‘) as f: df_o = pd.read_html(f.read()) print(‘数据表正在合并!‘) bb = pd.ExcelWriter(‘F:\缺件表.xlsx‘) df_o[0].to_excel(bb) bb.close() ‘‘‘ 1.删除掉订单类型是SO的产线且是13开头的数据,再定义SO为紧急要料 2.按物料、产线对实际缺件数求和,并输出所有行>>>df_inner1 ‘‘‘ # df=pd.DataFrame(pd.read_excel(‘YC_扬柴主机缺.xlsx‘)) # print(df) # Url = ‘C:\Users\g\Desktop\CUX:缺件统计表_140618.xlsx‘ # Url = input(‘请输入文件名:‘) def change_excel(): # df = pd.read_excel(r‘C:\Users\g\Desktop\CUX:缺件统计表_150618.xlsx‘,sheet_name=0,header=7) df = pd.read_excel(‘F:\缺件表.xlsx‘, sheet_name=0, header=8) # df[‘需求日期‘] = df[‘需求日期‘].astype(str) df[‘需求日期‘] = pd.to_datetime(df[‘需求日期‘], format=‘%Y-%m-%d %H:%M:%S‘,errors=‘coerce‘) # print(df.head()) # print(df.info()) # df[‘产线‘] = df[‘产线‘].astype(‘object‘) df1 = df[(~df[‘产线‘].isin([130000007.0,130000001.0,130000002.0,130000003.0,130000005.0,130000021.0,130000022.0,130000023.0,130000024.0]))&(df[‘订单类型‘]==‘SO‘)] df2 = df[df[‘订单类型‘]!=‘SO‘] df = df1.append(df2) print(‘数据表合并已完成!‘) #######################逻辑处理有问题,excel导出有问题######################## #######################以上BUG已经修复######################################### df[‘产线‘] = df[‘产线‘].replace(130000001.0, ‘485装配线‘) df[‘产线‘] = df[‘产线‘].replace(130000007.0, ‘随机附件‘) df[‘产线‘] = df[‘产线‘].replace(130000002.0, ‘4102装配线‘) df[‘产线‘] = df[‘产线‘].replace(130000003.0, ‘VM装配线‘) df[‘产线‘] = df[‘产线‘].replace(130000005.0, ‘整理线‘) df[‘产线‘] = df[‘产线‘].replace(130000021.0, ‘二号厂装配线‘) df[‘产线‘] = df[‘产线‘].replace(130000022.0, ‘二号厂试车线‘) df[‘产线‘] = df[‘产线‘].replace(130000023.0, ‘二号厂成套线‘) df[‘产线‘] = df[‘产线‘].replace(130000024.0, ‘二号厂随机附件‘) df[‘产线‘] = df[‘产线‘].replace(130000600.0, ‘外协件销售出库‘) df[‘产线‘] = df[‘产线‘].replace(930000001.0, ‘制造工程部‘) df[‘产线‘] = df[‘产线‘].replace(930000002.0, ‘铸造厂‘) df[‘产线‘] = df[‘产线‘].replace(930000003.0, ‘金加工厂‘) df[‘产线‘] = df[‘产线‘].replace(930000004.0, ‘金加工厂机体线‘) df[‘产线‘] = df[‘产线‘].replace(930000005.0, ‘装配厂‘) df[‘产线‘] = df[‘产线‘].replace(930000006.0, ‘装配厂102线‘) df[‘产线‘] = df[‘产线‘].replace(930000007.0, ‘装配厂85线‘) df[‘产线‘] = df[‘产线‘].replace(930000008.0, ‘装配厂VM线‘) df[‘产线‘] = df[‘产线‘].replace(930000009.0, ‘调试厂产线‘) df[‘产线‘] = df[‘产线‘].replace(930000010.0, ‘调试厂生产‘) df[‘产线‘] = df[‘产线‘].replace(930000011.0, ‘调试厂设修‘) df[‘产线‘] = df[‘产线‘].replace(930000012.0, ‘质量部设修‘) df[‘产线‘] = df[‘产线‘].replace(930000013.0, ‘质量部市场‘) df[‘产线‘] = df[‘产线‘].replace(930000014.0, ‘质量部检验‘) df[‘产线‘] = df[‘产线‘].replace(930000015.0, ‘技术中心‘) df[‘产线‘] = df[‘产线‘].replace(930000016.0, ‘应用工程部‘) df[‘产线‘] = df[‘产线‘].replace(930000017.0, ‘产品试验中心‘) df[‘产线‘] = df[‘产线‘].replace(930000018.0, ‘平台开发‘) df[‘产线‘] = df[‘产线‘].replace(930000019.0, ‘物流部‘) df[‘产线‘] = df[‘产线‘].replace(930000020.0, ‘二号工厂机体加工线‘) df[‘产线‘] = df[‘产线‘].replace(930000021.0, ‘二号工厂缸盖加工线‘) df[‘产线‘] = df[‘产线‘].replace(930000022.0, ‘二号工厂装配线‘) df[‘产线‘] = df[‘产线‘].replace(930000023.0, ‘二号工厂试车线‘) df[‘产线‘] = df[‘产线‘].replace(930000024.0, ‘二号工厂成套线‘) df[‘产线‘] = df[‘产线‘].replace(930000025.0, ‘二号工厂机体加工线设修‘) df[‘产线‘] = df[‘产线‘].replace(930000026.0, ‘二号工厂缸盖加工线设修‘) df[‘产线‘] = df[‘产线‘].replace(930000027.0, ‘二号工厂装配线设修‘) df[‘产线‘] = df[‘产线‘].replace(930000028.0, ‘二号工厂试车线设修‘) df[‘产线‘] = df[‘产线‘].replace(930000029.0, ‘二号工厂试车线生产工具‘) df[‘产线‘] = df[‘产线‘].replace(930000030.0, ‘二号工厂成套线设修‘) df[‘产线‘] = df[‘产线‘].replace(930000031.0, ‘调试厂校机维修‘) df[‘产线‘] = df[‘产线‘].replace(930000032.0, ‘配件加工生产‘) df[‘产线‘] = df[‘产线‘].replace(930000033.0, ‘材料、零部件让售‘) df[‘产线‘] = df[‘产线‘].replace(930000034.0, ‘装配厂102线改型‘) df[‘产线‘] = df[‘产线‘].replace(930000035.0, ‘装配厂85线改型‘) df[‘产线‘] = df[‘产线‘].replace(930000036.0, ‘装配厂VM线改型‘) df[‘产线‘] = df[‘产线‘].replace(930000037.0, ‘调试厂产线改型‘) df[‘产线‘] = df[‘产线‘].replace(930000038.0, ‘二号工厂装配线改型‘) df[‘产线‘] = df[‘产线‘].replace(930000039.0, ‘二号工厂试车线改型‘) df[‘产线‘] = df[‘产线‘].replace(930000040.0, ‘二号工厂成套线改型‘) df[‘产线‘] = df[‘产线‘].replace(930000041.0, ‘装配厂102线补废‘) df[‘产线‘] = df[‘产线‘].replace(930000042.0, ‘装配厂85线补废‘) df[‘产线‘] = df[‘产线‘].replace(930000043.0, ‘装配厂VM线补废‘) df[‘产线‘] = df[‘产线‘].replace(930000044.0, ‘调试厂产线补废‘) df[‘产线‘] = df[‘产线‘].replace(930000045.0, ‘二号工厂装配线补废‘) df[‘产线‘] = df[‘产线‘].replace(930000046.0, ‘二号工厂试车线补废‘) df[‘产线‘] = df[‘产线‘].replace(930000047.0, ‘二号工厂成套线补废‘) df[‘产线‘] = df[‘产线‘].replace(930000048.0, ‘随机附件补充领用‘) df[‘产线‘] = df[‘产线‘].replace(np.nan, ‘未定义‘) df[‘保管员‘] = df[‘保管员‘].replace(np.nan,‘未定义‘) df[‘订单类型‘] = df[‘订单类型‘].replace(‘MO‘,‘一次下架‘) ######################################################## df[‘订单类型‘] = df[‘订单类型‘].replace(‘SO‘,‘紧急要料‘) ######################################################## # df_inner = df.groupby(‘实际缺件数‘)[‘物料‘].count() # 对city字段进行汇总,并分别计算prince的合计和均值 # df_inner.groupby(‘city‘)[‘price‘].agg([len,np.sum, np.mean]) # df[df[‘creativeID‘]<=10000] ########################################################### # print(df.head()) # df.to_excel(‘F:\excel_to_python.xlsx‘, sheet_name=‘gg‘) ##############导出操作################################### ‘‘‘ 以上已经完成保管员的数据表 下一步需要处理 1.获取物料、产线、订单类型维护汇总的缺件总数(完成) 2.合并表的内容(完成) 3.获取指定日期的缺件数() ‘‘‘ # df_inner1.to_excel(‘F:\kk.xlsx‘, sheet_name=‘gg‘) # df[‘产线‘] = df[‘产线‘].replace(‘485装配线‘,‘485装配线‘) # df[‘产线‘] = df[‘产线‘].replace(‘随机附件‘,‘随机附件‘) # df[‘产线‘] = df[‘产线‘].replace(‘4102装配线‘,‘4102装配线‘) # df[‘产线‘] = df[‘产线‘].replace(‘VM装配线‘,‘VM装配线‘) # df[‘产线‘] = df[‘产线‘].replace(‘整理线‘,‘整理线‘) # df[‘产线‘] = df[‘产线‘].replace(‘二号厂装配线‘,‘二号厂装配线‘) # df[‘产线‘] = df[‘产线‘].replace(‘二号厂试车线‘,‘二号厂试车线‘) # df[‘产线‘] = df[‘产线‘].replace(‘二号厂成套线‘,‘二号厂成套线‘) # df[‘产线‘] = df[‘产线‘].replace(‘二号厂随机附件‘,‘二号厂随机附件‘) # df[‘产线‘] = df[‘产线‘].replace(‘外协件销售出库‘,‘外协件销售出库‘) # df[‘产线‘] = df[‘产线‘].replace(‘制造工程部‘,‘制造工程部‘) # df[‘产线‘] = df[‘产线‘].replace(‘铸造厂‘,‘铸造厂‘) # df[‘产线‘] = df[‘产线‘].replace(‘金加工厂‘,‘金加工厂‘) # df[‘产线‘] = df[‘产线‘].replace(‘金加工厂机体线‘,‘金加工厂‘) # df[‘产线‘] = df[‘产线‘].replace(‘装配厂‘,‘4102装配线‘) # df[‘产线‘] = df[‘产线‘].replace(‘装配厂102线‘,‘4102装配线‘) # df[‘产线‘] = df[‘产线‘].replace(‘装配厂85线‘,‘485装配线‘) # df[‘产线‘] = df[‘产线‘].replace(‘装配厂VM线‘,‘VM装配线‘) # df[‘产线‘] = df[‘产线‘].replace(‘调试厂产线‘,‘整理线‘) # df[‘产线‘] = df[‘产线‘].replace(‘调试厂生产‘,‘整理线‘) # df[‘产线‘] = df[‘产线‘].replace(‘调试厂设修‘,‘整理线‘) # df[‘产线‘] = df[‘产线‘].replace(‘质量部设修‘,‘质量部‘) # df[‘产线‘] = df[‘产线‘].replace(‘质量部市场‘,‘质量部‘) # df[‘产线‘] = df[‘产线‘].replace(‘质量部检验‘,‘质量部‘) # df[‘产线‘] = df[‘产线‘].replace(‘技术中心‘,‘技术中心‘) # df[‘产线‘] = df[‘产线‘].replace(‘应用工程部‘,‘技术中心‘) # df[‘产线‘] = df[‘产线‘].replace(‘产品试验中心‘,‘技术中心‘) # df[‘产线‘] = df[‘产线‘].replace(‘平台开发‘,‘技术中心‘) # df[‘产线‘] = df[‘产线‘].replace(‘物流部‘,‘物流部‘) # df[‘产线‘] = df[‘产线‘].replace(‘二号工厂机体加工线‘,‘二号工厂机体‘) # df[‘产线‘] = df[‘产线‘].replace(‘二号工厂缸盖加工线‘,‘二号工厂缸盖‘) # df[‘产线‘] = df[‘产线‘].replace(‘二号工厂装配线‘,‘二号厂装配线‘) # df[‘产线‘] = df[‘产线‘].replace(‘二号工厂试车线‘,‘二号厂试车线‘) # df[‘产线‘] = df[‘产线‘].replace(‘二号工厂成套线‘,‘二号工厂成套线‘) # df[‘产线‘] = df[‘产线‘].replace(‘二号工厂机体加工线设修‘,‘二号工厂机体‘) # df[‘产线‘] = df[‘产线‘].replace(‘二号工厂缸盖加工线设修‘,‘二号工厂缸盖‘) # df[‘产线‘] = df[‘产线‘].replace(‘二号工厂装配线设修‘,‘二号厂装配线‘) # df[‘产线‘] = df[‘产线‘].replace(‘二号工厂试车线设修‘,‘二号厂试车线‘) # df[‘产线‘] = df[‘产线‘].replace(‘二号工厂试车线生产工具‘,‘二号厂试车线‘) # df[‘产线‘] = df[‘产线‘].replace(‘二号工厂成套线设修‘,‘二号工厂成套线‘) # df[‘产线‘] = df[‘产线‘].replace(‘调试厂校机维修‘,‘校机维修‘) # df[‘产线‘] = df[‘产线‘].replace(‘配件加工生产‘,‘配件加工‘) # df[‘产线‘] = df[‘产线‘].replace(‘材料、零部件让售‘,‘材料、零部件让售‘) # df[‘产线‘] = df[‘产线‘].replace(‘装配厂102线改型‘,‘4102装配线‘) # df[‘产线‘] = df[‘产线‘].replace(‘装配厂85线改型‘,‘485装配线‘) # df[‘产线‘] = df[‘产线‘].replace(‘装配厂VM线改型‘,‘VM装配线‘) # df[‘产线‘] = df[‘产线‘].replace(‘调试厂产线改型‘,‘整理线‘) # df[‘产线‘] = df[‘产线‘].replace(‘二号工厂装配线改型‘,‘二号厂装配线‘) # df[‘产线‘] = df[‘产线‘].replace(‘二号工厂试车线改型‘,‘二号厂试车线‘) # df[‘产线‘] = df[‘产线‘].replace(‘二号工厂成套线改型‘,‘二号工厂成套线‘) # df[‘产线‘] = df[‘产线‘].replace(‘装配厂102线补废‘,‘4102装配线‘) # df[‘产线‘] = df[‘产线‘].replace(‘装配厂85线补废‘,‘485装配线‘) # df[‘产线‘] = df[‘产线‘].replace(‘装配厂VM线补废‘,‘VM装配线‘) # df[‘产线‘] = df[‘产线‘].replace(‘调试厂产线补废‘,‘整理线‘) # df[‘产线‘] = df[‘产线‘].replace(‘二号工厂装配线补废‘,‘二号厂装配线‘) # df[‘产线‘] = df[‘产线‘].replace(‘二号工厂试车线补废‘,‘二号厂试车线‘) # df[‘产线‘] = df[‘产线‘].replace(‘二号工厂成套线补废‘,‘二号工厂成套线‘) # df[‘产线‘] = df[‘产线‘].replace(‘随机附件补充领用‘,‘随机附件‘) df_inner1 = df.groupby([‘产线‘,‘订单类型‘,‘物料‘,‘物料说明‘,‘保管员‘])[‘实际缺件数‘].agg([np.sum]).reset_index() df_inner1.rename(columns={‘sum‘:‘缺件总数‘,‘订单类型‘:‘业务类型‘}, inplace=True) # 物料 产线 订单类型 缺件总数 ----物料说明 ----保管员 # 01119248 随机附件 一次下架 190 # 1000045386 二号厂装配线 一次下架 1 # 1000047928 485装配线 一次下架 8 # 1000050574 调试厂产线改型 紧急要料 6 # 1000069366 485装配线 一次下架 68 # 1.获取物料、产线、订单类型维护汇总的缺件总数完成 # df_inner1.to_excel(‘F:\k1k.xlsx‘, sheet_name=‘gg‘,index=False) ############################################################################# print(‘数据表逻辑处理已完成!‘) Date_1 = datetime.datetime.strptime(Date, ‘%Y-%m-%d‘) df4 = df[df[‘需求日期‘]==Date_1] df_inner2 = df4.groupby([‘产线‘,‘订单类型‘,‘物料‘,‘物料说明‘,‘保管员‘])[‘实际缺件数‘].agg([np.sum]).reset_index() df_inner2.rename(columns={‘sum‘:‘当日缺件数‘,‘订单类型‘:‘业务类型‘}, inplace=True) # print(df_inner2.head()) result = pd.merge(df_inner1, df_inner2, how=‘left‘, on=[‘物料‘]) result = result.drop(columns = [‘产线_y‘,‘业务类型_y‘,‘物料说明_y‘,‘保管员_y‘]) result.rename(columns={‘产线_x‘:‘产线‘,‘业务类型_x‘:‘业务类型‘,‘物料说明_x‘:‘物料说明‘,‘保管员_x‘:‘保管员‘}, inplace=True) result = result.groupby([‘物料‘,‘物料说明‘,‘保管员‘,‘当日缺件数‘])[‘缺件总数‘].agg([np.sum]).reset_index() result.rename(columns={‘sum‘:‘缺件总数‘}, inplace=True) # result = result.sort(columns=‘保管员‘, ascending=False) print(result.head()) print(‘所有任务均已完成!‘) ‘‘‘ 1.增加导出表的日期功能(完成) 2.在缺件表上显示日期 3.按保管员自动打印 [加分项]:按产线维度汇总 [超级加分项]:检测存在缺件时才汇总(涉及到递归算法,难度大,要重写算法) ‘‘‘ result.to_excel(‘F:\缺件统计表{!s}.xlsx‘.format(Date), sheet_name=‘缺件总表‘,index=False) return ‘The Job is Finished‘ if __name__ == ‘__main__‘: html_to_excel() time.sleep(5) change_excel()
原文:https://www.cnblogs.com/pandaboy1123/p/9191976.html