import com.google.common.base.Joiner; import com.google.common.base.Splitter; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import org.apache.commons.lang.StringUtils; import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.File; import java.io.FileReader; import java.io.FileWriter; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.LinkedHashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * @author shiqil.liu * @date 2019-08-02 19:17 */ public class BB { public static void main(String[] args) { String str1 = "abasdf"; String str2 = "asdasd1(dfgdfg2)(asdsad3)(fdgfdg4)"; String str3 = "asdasd(sdfdsf(dsfdsfdsf(666)(dsfdsfds)))"; String str4 = "asdasd(dfgdfg(asdsad)fdgfdg)"; String str5 = "asdasd(sdfdsf(dsfdsfdsf(dsfdsfd(abc))))"; String str6 = "asdasd(dfgdfg)(sad)(aafdgfdg)"; String str7 = "asdasd(sdfdsf(dsfdsfdsf(dsfdsfds)))"; String str8 = "asdasd(dfgdfg(asdsad)fdgfdg)"; String str9 = "asdasd(sdfdsf(dsfdsfdsf(dsfdsfds())))"; String strr = "asdasd(dfgdfg)()"; /*List<String> dealed = dealUsedNames(str3); dealed.forEach(r -> { System.out.println(r); });*/ readByFile("D:\\usedNamesAll.txt","D:\\usedNamesLog.txt"); //readByFile("D:\\usedNamesBetaAll.txt","D:\\usedNamesBetaLog.txt"); //testOutPut("D:\\outputAll.txt"); } public static void testOutPut(String p1) { try (FileReader r1 = new FileReader(p1); BufferedReader br1 = new BufferedReader(r1)) { for(int i=0;i<10;i++) { String str = br1.readLine(); System.out.println(str); System.out.println("-------------------"); } } catch (Exception e) { e.printStackTrace(); } } public static Map<String,List<String>> readByFile(String p1,String p2) { HashMap<String, List<String>> map = Maps.newHashMap(); ArrayList<String> list = Lists.newArrayList(); File writeName = new File("D:\\outputAll.txt");// 相对路径,如果没有则要建立一个新的output.txt文件 File errorFile = new File("D:\\errorFile.txt"); try { writeName.createNewFile(); errorFile.createNewFile(); }catch (Exception e) { e.printStackTrace(); } try (FileReader r1 = new FileReader(p1); BufferedReader br1 = new BufferedReader(r1);//原来attrs里面的usedNmaes FileReader r2 = new FileReader(p2); BufferedReader br2 = new BufferedReader(r2);//log中记录的曾用名 FileWriter writer = new FileWriter(writeName); BufferedWriter out = new BufferedWriter(writer); FileWriter writer2 = new FileWriter(errorFile); BufferedWriter out2 = new BufferedWriter(writer2)){ String readLine; Map<String,String> oldMaps = Maps.newHashMap(); while ((readLine = br1.readLine()) != null) { // 一次读入一行数据 List<String> list1 = Splitter.on(‘\t‘).trimResults().omitEmptyStrings().splitToList(readLine);//这个list里装的是seq和a||b oldMaps.put(list1.get(0),list1.get(1)); String seq = list1.get(0); List<String> listTemp = Splitter.on("||").omitEmptyStrings().trimResults().splitToList(list1.get(1));//这个list里装的是a(b)(c) List<String> listAll = Lists.newArrayList();//这个list各个处理后的String(还没去重) listTemp.forEach( s1 -> { List<String> list2 = dealUsedNames(s1); listAll.addAll(list2); }); map.put(seq,listAll); } while ((readLine = br2.readLine()) != null) { // 一次读入一行数据 List<String> list1 = Splitter.on(‘\t‘).trimResults().omitEmptyStrings().splitToList(readLine);//这个list里装的是seq和曾用名(一个) String seq = list1.get(0); String usedStr = list1.get(1); if(StringUtils.isNotBlank(usedStr)) { List<String> list2 = map.get(seq); if(list2 == null || list2.isEmpty()) { map.put(seq,Lists.newArrayList(usedStr)); } else { list2.add(usedStr); } } } int errorNum1 = 0; int errorNum2 = 0; long count = 0; long count1 = 0; long count2 = 0; long count3 = 0; for(Map.Entry<String,List<String>> entry: map.entrySet()) { List<String> list3 = deleteRepeatCharMark(entry.getKey(),entry.getValue()); count += list3.size(); String join = Joiner.on("||").join(list3); if(join.contains("\\")) count2++; if(join.contains("‘‘")) count1++; join = join.replaceAll("‘","‘‘"); if(StringUtils.equals(join,oldMaps.get(entry.getKey()))) { count3++; continue; } out.write("update hotel_info set attrs = attrs || hstore(‘usedNames‘,‘"+join+"‘),online_status=2,last_mod=now() where hotel_seq=‘"+entry.getKey()+"‘;"+"\r\n"); } out.flush(); out2.flush(); System.out.println("error1:"+errorNum1); System.out.println("error2:"+errorNum2); System.out.println(count1); System.out.println(count2); System.out.println(count3); System.out.println(count); } catch (Exception e) { e.printStackTrace(); } return null; } public static List<String> dealOthers(String str) { String reg1 = "^([^(^)]+)(\\([^(^)]+\\))*?$"; String reg2 = "([^(^)]+)(\\([^(^)]+\\))*?"; Matcher matcher = Pattern.compile(reg1).matcher(str); if (matcher.find()) { ArrayList<String> list = Lists.newArrayList(); matcher = Pattern.compile(reg2).matcher(str); while (matcher.find()) { list.add(matcher.group()); } return list; } return Collections.emptyList(); } public static List<String> dealUsedNames(String str) { if (StringUtils.isBlank(str)) { return Collections.emptyList(); } ArrayList<String> list = Lists.newArrayList(); //将中文的括号转换成英文的,去掉反斜杠 str = str.replaceAll("(", "("); str = str.replaceAll(")", ")"); //判断左右括号数量是否相同 String strtemp1 = str.replaceAll("\\(", ""); String strtemp2 = str.replaceAll("\\)", ""); if (strtemp1.length() != strtemp2.length()) { return Collections.emptyList(); } //1.直接就是个字符串 if (str.indexOf(‘(‘, 0) == -1) { list.add(str); return list; } try { if (str.charAt(str.length() - 2) == ‘)‘) { //此时必须是嵌套结构 return dealNest(str); } else { //此时必须是A(b) 或者A(B)(C)这种 return dealOthers(str); } } catch (Exception e) { return Collections.emptyList(); } } public static List<String> dealNest(String str) { ArrayList<String> list = Lists.newArrayList(); int begin = 0; int endRight; int num = 0;//用来比较匹配之后左右括号是否相等 int endLeft = str.indexOf(‘(‘, begin); list.add(str.substring(begin, endLeft)); begin = endLeft + 1; while ((endLeft = str.indexOf(‘(‘, begin)) != -1) { list.add(str.substring(begin, endLeft)); begin = endLeft + 1; num++; } endRight = str.indexOf(‘)‘, begin); list.add(str.substring(begin, endRight)); begin = endRight + 1; if (begin + num != str.length()) return Collections.emptyList(); if (list.contains("")) { return Collections.emptyList(); } return list; } public static List<String> deleteRepeatCharMark(String sss,List<String> list) { LinkedHashMap<String, String> map = Maps.newLinkedHashMap(); for(String s:list) { s = s.trim(); if(s.length() <= 1) { continue; } if(StringUtils.isBlank(map.get(s.toLowerCase()))) { map.put(s.toLowerCase(),s); } } int temp1 = 10; int temp2 = 10; LinkedList<String> newList = Lists.newLinkedList(); for(Map.Entry<String,String> entry:map.entrySet()) { String str = entry.getValue(); str = str.replaceAll("\\\\",""); if(str.contains("‘‘")) { continue; } if(StringUtils.isNotBlank(str)) { if(str.charAt(0) == ‘\‘‘ && str.charAt(str.length()-1) == ‘\‘‘) { str = str.substring(1,str.length()-1); } } newList.add(str); } return newList; } }
import com.google.common.base.Joiner; import com.google.common.base.Splitter; import com.google.common.collect.Lists; import org.apache.commons.lang.StringUtils; import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.File; import java.io.FileReader; import java.io.FileWriter; import java.util.List; /** * @Version 1.0 * @Author shiqil.liu * @Date 2019/5/11 22:44 */ public class CC { public static void main(String[] args) { String p1 = "D:\\NamesAll3.txt"; File writeName = new File("D:\\outputAll3.txt");// 相对路径,如果没有则要建立一个新的output.txt文件 try (FileReader r1 = new FileReader(p1); FileWriter writer = new FileWriter(writeName); BufferedWriter out = new BufferedWriter(writer); BufferedReader br1 = new BufferedReader(r1)) { String readLine; int count = 0; int count2 = 0; int count3 = 0; int count4 = 0; while ((readLine = br1.readLine()) != null) { // 一次读入一行数据 List<String> list1 = Splitter.on(‘\t‘).trimResults().omitEmptyStrings().splitToList(readLine); String s1 = list1.get(0);//seq String s2 = list1.get(1);//cym String s3 = list1.get(2);//zwm String s3low = s3.toLowerCase(); String s4 = ""; if(list1.size() != 3) { s4 = list1.get(3); } if(s4.equals("\\N")) { s4 = ""; } String s4low = s4.toLowerCase(); List<String> list = Lists.newArrayList(Splitter.on("||").omitEmptyStrings().trimResults().splitToList(s2)); list.removeIf(s -> StringUtils.equals(s.toLowerCase(),s3low) || StringUtils.equals(s.toLowerCase(),s4low) ); if(list.size() > 20) { //System.out.println(s1); list = list.subList(list.size()-20,list.size()); } /* if(list.size() > 20) { count2++; }*/ String usedNames = Joiner.on("||").join(list); if(StringUtils.equals(s2,usedNames)) { continue; } /*!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!注意注释的内容和实际内容变化*/ if(StringUtils.isBlank(usedNames)) { count2++; //out.write("update hotel_info set attrs = delete(attrs,‘usedNames‘),online_status=2,last_mod=now() where hotel_seq=‘"+s1+"‘;"+"\r\n"); out.write("update hotel_info set attrs = delete(attrs,‘usedNames‘) where hotel_seq=‘"+s1+"‘;"+"\r\n"); } else { count3++; usedNames = usedNames.replaceAll("‘","‘‘"); //out.write("update hotel_info set attrs = attrs || hstore(‘usedNames‘,‘"+usedNames+"‘),online_status=2,last_mod=now() where hotel_seq=‘"+s1+"‘;"+"\r\n"); out.write("update hotel_info set attrs = attrs || hstore(‘usedNames‘,‘"+usedNames+"‘) where hotel_seq=‘"+s1+"‘;"+"\r\n"); } } System.out.println(count); System.out.println(count2); System.out.println(count3); System.out.println(count4); } catch (Exception e) { e.printStackTrace(); } } }
原文:https://www.cnblogs.com/TheQi/p/11390717.html