java类:
/** * @date 2020-12-10 010 13:28 */ public class SearchNode { private String words; private int index; private int lastIndex; private long id; public SearchNode() { } public SearchNode(String words, int index, int lastIndex) { this.words = words; this.index = index; this.lastIndex = lastIndex; } public SearchNode(String words, int index, int lastIndex, long id) { this.words = words; this.index = index; this.lastIndex = lastIndex; this.id = id; } public String getWords() { return words; } public void setWords(String words) { this.words = words; } public int getIndex() { return index; } public void setIndex(int index) { this.index = index; } public int getLastIndex() { return lastIndex; } public void setLastIndex(int lastIndex) { this.lastIndex = lastIndex; } public long getId() { return id; } public void setId(long id) { this.id = id; } }
/** * @date 2020-12-10 010 13:53 */ public class SensitiveWords { private long id; private String words; private String replace; public SensitiveWords() { } public SensitiveWords(long id, String words, String replace) { this.id = id; this.words = words; this.replace = replace; } public long getId() { return id; } public void setId(long id) { this.id = id; } public String getWords() { return words; } public void setWords(String words) { this.words = words; } public String getReplace() { return replace; } public void setReplace(String replace) { this.replace = replace; } }
/** * @date 2020-12-09 009 19:51 */ public class SensitiveWordsReplace { /** * 所有关键字 */ protected static List<SensitiveWords> sensitiveWordsList; protected static Map<Long, SensitiveWords> sensitiveWordsMap; public static void init(List<SensitiveWords> sensitiveWordsList) { SensitiveWordsReplace.sensitiveWordsList = sensitiveWordsList; SensitiveWordsReplace.sensitiveWordsMap = new HashMap<>(sensitiveWordsList.size()); for (SensitiveWords sensitiveWords : sensitiveWordsList) { SensitiveWordsReplace.sensitiveWordsMap.put(sensitiveWords.getId(), sensitiveWords); } } public static String findReplace(String text) { // 只能支持全文匹配 List<SearchNode> searchNodeList = SensitiveWordsSearch.getInstance().findWords(text, true); if (CollectionUtils.isEmpty(searchNodeList)) { return text; } Map<Integer, SearchNode> searchNodeMap = new HashMap<>(searchNodeList.size()); for (SearchNode searchNode : searchNodeList) { int index = searchNode.getIndex(); searchNodeMap.put(index, searchNode); } StringBuilder builder = new StringBuilder(); int length = text.length(); for (int i = 0; i < length; i++) { SearchNode searchNode = searchNodeMap.get(i); if (null != searchNode) { SensitiveWords sensitiveWords = SensitiveWordsReplace.sensitiveWordsMap.get(searchNode.getId()); if (null != sensitiveWords) { builder.append(sensitiveWords.getReplace()); } else { int i1 = searchNode.getLastIndex() - searchNode.getIndex(); for (int j = 0; j < i1; j++) { builder.append("*"); } } i = searchNode.getLastIndex() - 1; } else { builder.append(text.charAt(i)); } } return builder.toString(); } }
/** * @date 2020-12-09 009 19:36 */ public class SensitiveWordsSearch { /** * 关键字根节点 */ protected WordsNode rootNode; /** * 所有关键字 */ protected List<SensitiveWords> sensitiveWordsList; /** * 关键字加载中 */ protected boolean keywordsLoading; private SensitiveWordsSearch() { } /** * 获取实例 * * @return SensitiveWordsSearch */ public static SensitiveWordsSearch getInstance() { return SensitiveWordsSearchInstance.INSTANCE; } /** * 初始化关键字 */ private void initKeywords() { // 初始化 rootNode = new WordsNode(); for (SensitiveWords sensitiveWords : sensitiveWordsList) { WordsNode node = rootNode; String words = sensitiveWords.getWords(); int length = words.length(); for (int i = 0; i < length; i++) { node = node.add(words.charAt(i)); if (node.getLayer() == 0) { node.setLayer(i + 1); } } node.setEnd(true); node.setId(sensitiveWords.getId()); } System.out.println(JSON.toJSONString(rootNode)); } /** * 更新关键字 * * @param sensitiveWordsList 关键字集合 */ public void updateKeywords(List<SensitiveWords> sensitiveWordsList) { if (!this.keywordsLoading) { this.keywordsLoading = true; this.sensitiveWordsList = sensitiveWordsList; this.initKeywords(); this.keywordsLoading = false; } } /** * 获取关键字 * * @param text 检索文本 * @param maxMatch 最大匹配 * @return 查找到的关键字 */ public List<SearchNode> findWords(String text, boolean maxMatch) { if (null == rootNode) { throw new RuntimeException("SensitiveWordsSearch uninitialized."); } WordsNode top = null; List<SearchNode> list = new ArrayList<>(); WordsNode preNode = null; int length = text.length(); int lastLength = length - 1; for (int i = 0; i < length; i++) { final char t = text.charAt(i); WordsNode node; if (top == null) { node = rootNode.getNode(t); } else { if (top.hasKey(t)) { node = top.getNode(t); } else { if (maxMatch && top.isEnd()) { preNode = top; } node = rootNode.getNode(t); } } if (maxMatch) { // 下一个节点 if (preNode != null) { // 计算层级向前 list.add(new SearchNode(preNode.getWords(), i - preNode.getLayer(), i, preNode.getId())); preNode = null; } } else { // 当前节点 if (node != null && node.isEnd()) { list.add(new SearchNode(node.getWords(), i + 1 - node.getLayer(), i + 1, node.getId())); } } // 最大匹配时修正最后一个文本无法匹配的问题 if (lastLength == i && maxMatch && node != null && node.isEnd()) { // 当前节点 // 最后匹配 list.add(new SearchNode(node.getWords(), i + 1 - node.getLayer(), i + 1, node.getId())); } top = node; } return list; } /** * 静态内部类 */ private static class SensitiveWordsSearchInstance { /** * 实例对象 */ private static final SensitiveWordsSearch INSTANCE = new SensitiveWordsSearch(); } }
/** * @date 2020-12-09 009 19:29 */ public class WordsNode { private int layer; private boolean end; private char c; private long id; private Map<Character, WordsNode> nodeMap; private WordsNode parent; public WordsNode() { nodeMap = new HashMap<>(16); } /** * 新增字符 * * @param c c * @return WordsNode */ public WordsNode add(final Character c) { if (nodeMap.containsKey(c)) { return nodeMap.get(c); } final WordsNode node = new WordsNode(); node.parent = this; node.c = c; nodeMap.put(c, node); return node; } public boolean hasKey(final char c) { return nodeMap.containsKey(c); } public WordsNode getNode(final char c) { return nodeMap.get(c); } /** * 获取当前节点的文本 * * @return String */ public String getWords() { if (‘\u0000‘ == this.c) { return ""; } List<String> words = new ArrayList<>(this.layer); words.add(String.valueOf(this.c)); if (null != this.parent) { words.add(this.parent.getWords()); } Collections.reverse(words); StringBuilder builder = new StringBuilder(); for (String word : words) { builder.append(word); } return builder.toString(); } public int getLayer() { return layer; } public void setLayer(int layer) { this.layer = layer; } public boolean isEnd() { return end; } public void setEnd(boolean end) { this.end = end; } public char getC() { return c; } public void setC(char c) { this.c = c; } public long getId() { return id; } public void setId(long id) { this.id = id; } public Map<Character, WordsNode> getNodeMap() { return nodeMap; } public void setNodeMap(Map<Character, WordsNode> nodeMap) { this.nodeMap = nodeMap; } public WordsNode getParent() { return parent; } public void setParent(WordsNode parent) { this.parent = parent; } }
/** * @date 2020-12-10 010 10:15 */ public class TestSensitiveWordsSearch { @Test public void init() { SensitiveWordsSearch instance = SensitiveWordsSearch.getInstance(); List<SensitiveWords> sensitiveWordsList = new ArrayList<>(); sensitiveWordsList.add(new SensitiveWords(1L, "凌晨两点", "丑时三刻")); sensitiveWordsList.add(new SensitiveWords(2L, "国庆", "庆国")); sensitiveWordsList.add(new SensitiveWords(3L, "阅兵", "大阅")); sensitiveWordsList.add(new SensitiveWords(4L, "七点", "辰时")); sensitiveWordsList.add(new SensitiveWords(5L, "战地", "战场")); sensitiveWordsList.add(new SensitiveWords(6L, "维和军士", "和平使者")); sensitiveWordsList.add(new SensitiveWords(7L, "特警", "使者")); sensitiveWordsList.add(new SensitiveWords(8L, "小说", "软文")); instance.updateKeywords(sensitiveWordsList); System.out.println("已加载关键词:"); System.out.println(JSON.toJSONString(sensitiveWordsList)); String text = "凌晨两点毫无睡意,受国庆阅兵影响,七点爬起来看《白色橄榄树》这部小说。无论怎样评价玖月晞,战地记者与维和军士的配置简直招架不住。阿瓒又成为所看过小说里难以忘怀的一个名字,他是柔和淡定的排弹士兵,是机场突然出现的特警,也是在东国苏睿城郊5秒救下她的人。玖月晞轻描淡写,出来的故事却刻骨铭心,真的很喜欢这样力度的小说。"; System.out.println("查询文本:\n" + text); SensitiveWordsReplace.init(sensitiveWordsList); String replace = SensitiveWordsReplace.findReplace(text); System.out.println(replace); } }
结果对照:
查询文本:
凌晨两点毫无睡意,受国庆阅兵影响,七点爬起来看《白色橄榄树》这部小说。无论怎样评价玖月晞,战地记者与维和军士的配置简直招架不住。阿瓒又成为所看过小说里难以忘怀的一个名字,他是柔和淡定的排弹士兵,是机场突然出现的特警,也是在东国苏睿城郊5秒救下她的人。玖月晞轻描淡写,出来的故事却刻骨铭心,真的很喜欢这样力度的小说。
丑时三刻毫无睡意,受庆国大阅影响,辰时爬起来看《白色橄榄树》这部软文。无论怎样评价玖月晞,战场记者与和平使者的配置简直招架不住。阿瓒又成为所看过软文里难以忘怀的一个名字,他是柔和淡定的排弹士兵,是机场突然出现的使者,也是在东国苏睿城郊5秒救下她的人。玖月晞轻描淡写,出来的故事却刻骨铭心,真的很喜欢这样力度的软文。
原文:https://www.cnblogs.com/se7end/p/14122924.html