功能介绍:
提取指定文件夹下的所有增值税发票(格式为jpg或png或pdf(暂时只处理第1页)), 把所有信息写到Excel表当前选中的单元格,并重命名原始发票(可指定规则)复制到新文件夹。
由于要用到百度的接口,所以需要注册百度智能云+实名认证+创建应用+领取资源
使用步骤:
;注册百度智能云+实名认证+创建应用+领取资源(财务) ; https://console.bce.baidu.com/ai/#/ai/ocr/overview/index (产品服务→人工智能→文字识别) ; 实名认证 ; 创建应用→随便输入应用名称→立即创建→查看应用详情→记录 appid apikey secretkey ; 领取相应的资源: ; 文字识别→概览→右侧【领取免费资源】→选中【财务票据OCR】→全部→0元领取 ; 等待资源到账:右键【资源列表】→已领取资源 ;搜索 hymodify 修改相应信息 ;功能: ; 提取 dn0 文件夹电子发票信息(pdf只提取第1页),并写到当前已打开Excel表(从【当前选中单元格】开始写) #SingleInstance force if (!ProcessExist("Excel.exe")) { msgbox("请打开Excel并选中第一个要写入单元格",,0x40000) ExitApp } if (0) { dn0 := "c:\Users\Administrator\Desktop\11" ;hymodify 【旧】发票文件夹 dn1 := "c:\Users\Administrator\Desktop\22" ;hymodify 【新】发票文件夹(发票重命名后复制到此文件夹) if !DirExist(dn1) DirCreate(dn1) } else { dn0 := DirSelect(, 2, "选择【旧】发票文件夹") dn1 := DirSelect(, 2, "选择【新】发票文件夹") } arrOcr := [ ["发票代码","InvoiceCode"], ["发票号码","InvoiceNum"], ["开票日期","InvoiceDate"], ["校验码","CheckCode"], ["机器编号","MachineCode"], ["金额","AmountInFiguers"], ["服务名称1","CommodityName"], ["税率1","CommodityTaxRate"], ["税额1","CommodityTax"], ["大写金额","AmountInWords"], ["销售方名称","SellerName"], ["销售方纳税人识别号","SellerRegisterNum"], ["销售方地址","SellerAddress"], ["销售方开户行","SellerBank"], ["购买方名称","PurchaserName"], ["购买方纳税人识别号","PurchaserRegisterNum"], ["购买方地址","PurchaserAddress"], ["购买方开户行","PurchaserBank"], ] arrOther := [ "新文件名", ;依赖 objOcr 结果 "原文件名", "序号", ] csOcr := arrOcr.length() cs := csOcr+arrOther.length() arrA := ComObjArray(12, 1, cs) xl := ox() st := xl.ActiveSheet ac := xl.ActiveCell r := 0 arrError := [] if (ac.row == 1) { ;在第1行,则初始化并写入标题 st.cells.NumberFormat := "@" ;设置标题 for _, arr in arrOcr arrA[0,A_Index-1] := arr[1] for _, v in arrOther arrA[0,csOcr+A_Index-1] := v ac.resize(1,cs).value := arrA ;要写的第1行 rng1 := ac.offset(1).resize(1,cs) } else rng1 := ac.resize(1,cs) loop files, dn0 . "\*.*", "RF" { ;hymodify 带R会处理子文件夹 if (A_LoopFileAttrib ~= "[HS]") continue if !(A_LoopFileName ~= "i)\.(pdf|jpg|png)") ;hymodify 过滤文件格式 continue tooltip(A_Index . "`n" . A_LoopFileName) objOcr := _Web.baiduOcr_vatInvoice(A_LoopFileFullPath) ; hyf_objView(objOcr) arrA := ComObjArray(12, 1, cs) ;每行写一次 ;写入 ocr 内容 noExt := "" if (isobject(objOcr) && objOcr["TotalAmount"]) { ;成功获取结果 for _, arr in arrOcr { res := objOcr[arr[2]] if (isobject(res)) { if (res.length()) arrA[0,A_Index-1] := res[1]["word"] } else arrA[0,A_Index-1] := res } noExt := format("{1}-{2}", objOcr["AmountInFiguers"],objOcr["InvoiceNum"]) ;hymodify 新文件名规则,默认是(金额-发票号码) arrA[0,csOcr] := noExt } ;常规内容 arrA[0,csOcr+1] := A_LoopFileName arrA[0,csOcr+2] := r+1 ;arrA写到整行 rng1.offset(r).value := arrA r++ ;文件处理 if (strlen(noExt)) { SplitPath(A_LoopFileFullPath, fn,, ext) FileCopy(A_LoopFileFullPath, format("{1}\{2}.{3}", dn1,noExt,ext)) if errorlevel arrError.push(A_LoopFileName) } else { SplitPath(A_LoopFileFullPath, fn,, ext, noExt) FileCopy(A_LoopFileFullPath, format("{1}\__{2}.{3}", dn1,noExt,ext)) } } WinActivate("ahk_id " . st.parent) tooltip if arrError.length() hyf_objView(arrError, "以下文件复制时出错了,请核实") else msgbox("已完成",,0x40000) return ox(winTitle:="ahk_class XLMAIN") { ctlID := ControlGetHwnd("EXCEL71", winTitle) if !ctlID ExitApp if dllcall("oleacc\AccessibleObjectFromWindow", "ptr",ctlID, "uint",4294967280, "ptr",-VarSetCapacity(IID,16)+NumPut(0x46000000000000C0,NumPut(0x0000000000020400,IID,"int64"),"int64"), "ptr*",pacc) = 0 win := ComObject(9, pacc, 1) loop { try xl := win.application catch ControlSend("{escape}", "EXCEL71", winTitle) } until !!xl return xl } return hyf_obj2Str(obj, char:="`n", level:=0) { static t := "", s := "" if level t .= A_Tab ;前置tab显示级数 else t := "", s := "" ;防止多次运行时结果叠加 if !isobject(obj) return "非对象,值为`n" . obj try { ;FIXME 无故出错 for k, v in obj { if isobject(v) { s .= t . k . char %A_ThisFunc%(v, char, level + 1) t := substr(t, 2) ;删除一个tab } else s .= t . k . A_Tab . v . char } } if (char != "`n") ;强制换行 s .= "`n" if (level = 0) ;返回结果 return s } hyf_objView(obj, str:="", char:="`n", n:=0) { if strlen(str) return msgbox(str . "`n" . hyf_obj2Str(obj,char),,0x40000+n) else return msgbox(hyf_obj2Str(obj,char),,0x40000+n) } _pic1ToPdf(fp) { fpPdf := RegExReplace(fp, "\w+$", "pdf") RunWait(format(‘python d:\AA\tool\python\pdf\images2pdf.py "{1}" "{2}"‘, fp,fpPdf),, "hide") } class _Web { ;来自帮助 SysGetIPAddresses get(url) { rst := ComObjCreate("WinHttp.WinHttpRequest.5.1") rst.open("GET", url) try { rst.send() return rst.ResponseText } } ;网址,编码, 请求方式,post数据(NOTE 可能不好用) ;https://docs.microsoft.com/en-us/windows/win32/winhttp/iwinhttprequest-send post(url, postData:="", Encoding:="", headers:="") { rst := ComObjCreate("WinHttp.WinHttpRequest.5.1") rst.open("POST", url) if isobject(headers) { for k, v in headers { if v rst.SetRequestHeader(k, v) } } rst.SetRequestHeader("Content-Type", "application/x-www-form-urlencoded") ; hyf_objView(postData) if isobject(postData) { for k, v in postData param := format("{1}={2}", k,this.UrlEncode(v)) ;NOTE 要转编码 rst.send(param) rst.WaitForResponse(postData.haskey("timeout") ? postData.timeout : -1) } else { rst.send() } ; rsy.option(2) := nPage ;Codepage:nPage if Encoding && rst.ResponseBody { oADO := ComObjCreate("adodb.stream") oADO.Type := 1 oADO.Mode := 3 oADO.Open() oADO.Write(rst.ResponseBody) oADO.Position := 0 oADO.Type := 2 oADO.Charset := Encoding res := oADO.ReadText() oADO.Close() return res } return rst.ResponseText } baiduToken() { appid := "修改" ;hymodify apikey := "修改" ;hymodify secretkey := "修改" ;hymodify host := format("https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id={1}&client_secret={2}&", apikey,secretkey) res := this.get(host) obj := json.load(res) return obj[‘access_token‘] } ; _Web.baiduOcr_vatInvoice("c:\Users\Administrator\Desktop\22\1.pdf") baiduOcr_vatInvoice(fp, bArr:=false) { b64 := (strlen(fp) >256) ? fp : _toBase64(fp) request_url := "https://aip.baidubce.com/rest/2.0/ocr/v1/vat_invoice" access_token := this.baiduToken() request_url := format("{1}?access_token={2}", request_url,access_token) if (strlen(fp) < 256 && (fp ~= "i)pdf$")) params := {"pdf_file":b64} else params := {"image":b64} response := this.post(request_url, params, "utf-8") obj := json.load(response) ; hyf_objView(obj) if (obj.haskey("error_code")) throw obj["error_code"] . "`n" . obj["error_msg"] else return obj["words_result"] _toBase64(fp) { f := FileOpen(fp, "r") size := f.length f.RawRead(Bin, size) f.Close() DllCall("Crypt32.dll\CryptBinaryToString", "Ptr",&Bin, "UInt",size, "UInt",0x01, "Ptr",0, "UIntP",B64Len) VarSetCapacity(b64, B64Len << 1, 0) DllCall("Crypt32.dll\CryptBinaryToString", "Ptr",&Bin, "UInt",size, "UInt",0x01, "Ptr",&b64, "UIntP",B64Len) Bin := "" VarSetCapacity(Bin, 0) VarSetCapacity(b64, -1) res := RegExReplace(b64, "\r\n") return res } } UrlEncode(str, enc:="UTF-8") { ;字符串特殊字符转义成URL格式(来自万年书妖) hex := "00" fun := "msvcrt\swprintf" VarSetCapacity(buff, size:=strput(str, enc)) strput(str, &buff, enc) while(code:=numget(buff, A_Index - 1, "UChar")) && dllcall(fun, "str",hex, "str","%%%02X", "uchar",code, "cdecl") r .= hex return r ;StringReplace, str, str, `%, , A ;%为URL特殊转义符,先处理(Google对%符的搜索支持不好才删除,否则替换为%25) ;array := {"&":"%26"," ":"%20","(":"%28",")":"%29","‘":"%27",":":"%3A","/":"%2F","+":"%2B",A_Tab:"%21","`r`n":"%0A"} ;`r`n必须放一起,可用记事本测试 ;for, key, value in array ;特殊字符url转义 ;StringReplace, str, str, %key%, %value%, A ;此处循环,两个参数必须一样 ;return str } } class JSON { /** * Method: Load * Parses a JSON string into an AHK value * Syntax: * value := JSON.Load( Text [, reviver ] ) * Parameter(s): * value [retval] - parsed value * Text [In, ByRef] - JSON formatted string * reviver [In, opt] - function Object, similar to JavaScript‘s * JSON.Parse() ‘reviver‘ parameter */ class Load extends JSON.Functor { call(self, ByRef Text, reviver:="") { this.rev := isobject(reviver) ? reviver : False ; Object keys(And Array indices) are temporarily stored In arrays so that ; we can enumerate them In the order they appear In the Document/Text instead ; of alphabetically. Skip if No reviver function Is specified. this.keys := this.rev ? {} : False static quot := chr(34), bashq := "\" . quot , json_value := quot . "{[01234567890-tfn" , json_value_or_array_closing := quot . "{[]01234567890-tfn" , object_key_or_object_closing := quot . "}" key := "" is_key := False root := {} stack := [root] next := json_value Pos := 0 While((ch:=SubStr(Text, ++Pos, 1)) != "") { if InStr(" `t`r`n", ch) Continue if !InStr(next, ch, 1) this.ParseError(next, Text, Pos) holder := stack[1] is_array := holder.IsArray if InStr(",:", ch) { next := (is_key := !is_array && ch == ",") ? quot : json_value } else if InStr("}]", ch) { stack.RemoveAt(1) next := stack[1]==root ? "" : stack[1].IsArray ? ",]" : ",}" } else { if InStr("{[", ch) { ; Check if Array() Is overridden And if its return value has ; the ‘IsArray‘ property. if so, Array() will be called normally, ; otherwise, use a custom base Object For arrays static json_array := Func("Array").IsBuiltIn || ![].IsArray ? {IsArray: True} : 0 ; sacrifice readability For minor(actually negligible) performance gain (ch == "{") ? ( is_key := True , value := {} , next := object_key_or_object_closing ) ; ch == "[" : ( value := json_array ? new json_array : [] , next := json_value_or_array_closing ) stack.insertat(1, value) if (this.keys) this.keys[value] := [] } else { if (ch == quot) { i := Pos While(i:=InStr(Text, quot,, i+1)) { value := StrReplace(SubStr(Text, Pos+1, i-Pos-1), "\\", "\u005c") static tail := A_AhkVersion<"2" ? 0 : -1 if (SubStr(value, tail) != "\") Break } if (!i) this.ParseError("‘", Text, Pos) value := StrReplace(value, "\/", "/") , value := StrReplace(value, bashq, quot) , value := StrReplace(value, "\b", "`b") , value := StrReplace(value, "\f", "`f") , value := StrReplace(value, "\n", "`n") , value := StrReplace(value, "\r", "`r") , value := StrReplace(value, "\t", "`t") Pos := i ; update Pos i := 0 While(i:=InStr(value, "\",, i+1)) { if !(SubStr(value, i+1, 1) == "u") this.ParseError("\", Text, Pos - strlen(SubStr(value, i+1))) uffff := Abs("0x" . SubStr(value, i+2, 4)) if (A_IsUnicode || uffff < 0x100) value := SubStr(value, 1, i-1) . chr(uffff) . SubStr(value, i+6) } if (is_key) { key := value, next := ":" Continue } } else { value := SubStr(Text, Pos, i := RegExMatch(Text, "[\]\},\s]|$",, Pos)-Pos) static Number := "Number", Integer :="Integer" if value Is %Number% { if value Is %Integer% value += 0 } else if (value == "True" || value == "False") value := %value% + 0 else if (value == "null") value := "" else ; we can do more here to pinpoint the actual culprit ; but that‘s just too much extra work. this.ParseError(next, Text, Pos, i) Pos += i-1 } next := holder==root ? "" : is_array ? ",]" : ",}" } ; if InStr("{[", ch) { ... } else is_array? key := holder.push(value) : holder[key] := value if (this.keys && this.keys.haskey(holder)) this.keys[holder].Push(key) } } ; While ( ... ) return this.rev ? this.Walk(root, "") : root[""] } ParseError(expect, ByRef Text, Pos, len:=1) { static quot := chr(34), qurly := quot . "}" line := StrSplit(SubStr(Text, 1, Pos), "`n", "`r").Length() col := Pos - InStr(Text, "`n",, -(strlen(Text)-Pos+1)) msg := Format("{1}`n`nLine:`t{2}`nCol:`t{3}`nChar:`t{4}" , (expect == "") ? "Extra data" : (expect == "‘") ? "Unterminated string starting at" : (expect == "\") ? "Invalid \Escape" : (expect == ":") ? "Expecting ‘:‘ Delimiter" : (expect == quot) ? "Expecting Object key enclosed In double quotes" : (expect == qurly) ? "Expecting Object key enclosed In double quotes Or Object closing ‘}‘" : (expect == ",}") ? "Expecting ‘,‘ Delimiter Or Object closing ‘}‘" : (expect == ",]") ? "Expecting ‘,‘ Delimiter Or Array closing ‘]‘" : InStr(expect, "]") ? "Expecting JSON value Or Array closing ‘]‘" : "Expecting JSON value(string, Number, True, False, null, Object Or Array)" , line, col, Pos) static offset := A_AhkVersion<"2" ? -3 : -4 Throw Exception(msg, offset, SubStr(Text, Pos, len)) } Walk(holder, key) { value := holder[key] if isobject(value) { For i, k In this.keys[value] { ; Check if ObjHasKey(value, k) ?? v := this.Walk(value, k) if (v != JSON.Undefined) value[k] := v else value.delete(k) } } return this.rev.call(holder, key, value) } } /** * Method: Dump * Converts an AHK value into a JSON string * Syntax: * str := JSON.Dump( value [, replacer, Space ] ) * Parameter(s): * str [retval] - JSON representation of an AHK value * value [In] - any value(Object, string, Number) * replacer [In, opt] - function Object, similar to JavaScript‘s * JSON.stringify() ‘replacer‘ parameter * Space [In, opt] - similar to JavaScript‘s JSON.stringify() * ‘Space‘ parameter */ class Dump extends JSON.Functor { call(self, value, replacer:="", Space:="") { this.rep := isobject(replacer) ? replacer : "" this.gap := "" if (Space) { static Integer := "Integer" if (Space ~= "^\d+$") { Loop(((n:=Abs(Space))>10 ? 10 : n)) this.gap .= " " } else { this.gap := SubStr(Space, 1, 10) } this.indent := "`n" } return this.Str({"": value}, "") } Str(holder, key) { value := holder[key] if (this.rep) value := this.rep.call(holder, key, holder.haskey(key) ? value : JSON.Undefined) if isobject(value) { ; Check Object Type, skip serialization For other Object types such as ; ComObject, Func, BoundFunc, FileObject, RegExMatchObject, Property, etc. static Type := A_AhkVersion<"2" ? "" : Func("Type") ;if (Type ? Type.call(value) == "Object" : ObjGetCapacity(value) != "") if (isobject(value)) { ;hy if (this.gap) { stepback := this.indent this.indent .= this.gap } is_array := value.IsArray ; Array() Is Not overridden, rollback to old method of ; identifying Array-like objects. Due to the use of a For-Loop ; sparse arrays such as ‘[1,,3]‘ are detected as objects({}). if (!is_array) { For i In value is_array := i == A_Index Until !is_array } str := "" if (is_array) { Loop(value.Length()) { if (this.gap) str .= this.indent v := this.Str(value, A_Index) str .= (v != "") ? v . "," : "null," } } else { colon := this.gap ? ": " : ":" For k In value { v := this.Str(value, k) if (v != "") { if (this.gap) str .= this.indent str .= this.Quote(k) . colon . v . "," } } } if (str != "") { str := RTrim(str, ",") if (this.gap) str .= stepback } if (this.gap) this.indent := stepback return is_array ? "[" . str . "]" : "{" . str . "}" } } else ; is_number ? value : "value" return type(value) != "String" ? value : this.Quote(value) ;return ObjGetCapacity([value])=="" ? value : this.Quote(value) ;hy } Quote(str) { static quot := chr(34), bashq := "\" . quot if (str != "") { str := StrReplace(str, "\", "\\") ; , str := StrReplace(str, "/", "\/") ; optional In ECMAScript , str := StrReplace(str, quot, bashq) , str := StrReplace(str, "`b", "\b") , str := StrReplace(str, "`f", "\f") , str := StrReplace(str, "`n", "\n") , str := StrReplace(str, "`r", "\r") , str := StrReplace(str, "`t", "\t") static rx_escapable := A_AhkVersion<"2" ? "O)[^\x20-\x7e]" : "[^\x20-\x7e]" ;While RegExMatch(str, rx_escapable, m) ;hy修改,中文不转成\u格式 ;str := StrReplace(str, m.Value, Format("\u{1:04x}", Ord(m.Value))) } return quot . str . quot } } /** * Property: Undefined * Proxy For ‘undefined‘ Type * Syntax: * undefined := JSON.Undefined * Remarks: * For use with reviver And replacer functions since AutoHotkey does Not * have an ‘undefined‘ Type. Returning blank("") Or 0 won‘t work since these * can‘t be distnguished from actual JSON values. this leaves us with objects. * Replacer() - the caller may return a non-serializable AHK objects such as * ComObject, Func, BoundFunc, FileObject, RegExMatchObject, And Property to * mimic the behavior of returning ‘undefined‘ In JavaScript but For the sake * of code readability And convenience, it‘s better to do ‘return JSON.Undefined‘. * Internally, the property returns a ComObject with the variant Type of VT_EMPTY. */ Undefined[] { get { static empty := {}, vt_empty := ComObject(0, &empty, 1) return vt_empty } } class Functor { __call(method, ByRef arg, args*) { ; When casting to call(), use a new instance of the "function Object" ; so as to avoid directly storing the properties(used across sub-methods) ; into the "function Object" itself. if isobject(method) return (new this).call(method, arg, args*) else if (method == "") return (new this).call(arg, args*) } } }
用AutoHotkey调用百度ocr接口提取增值税发票相关字段并写到Excel
原文:https://www.cnblogs.com/hyaray/p/15135219.html