package com.sf.chap.controller.core; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.commons.lang3.StringUtils; import org.apache.http.HttpEntity; import org.apache.http.client.ClientProtocolException; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpGet; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; import org.apache.http.util.EntityUtils; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import com.sf.chap.util.AppConfigUtil; public class XEditorResourceLoader { private static String proxyUrl=""; //正向代理地址 private static String storePath=""; private static String path = ""; //保存的文件夹路径 private static List<String> igoreSrc = new ArrayList<String>(); //过滤的系统地址 private static Pattern p= Pattern.compile("http[s]?://([^/]*)/\\s*"); @SuppressWarnings("unchecked") public static String parse(String content){ proxyUrl = (String) AppConfigUtil.getDBValue("proxyUrl"); storePath = (String) AppConfigUtil.getDBValue("storePath"); igoreSrc = (List<String>) AppConfigUtil.getDBValue("igoreSrc"); path = (String) AppConfigUtil.getDBValue("nfscPath"); if(StringUtils.isNoneBlank(content)){ Document doc=Jsoup.parse(content); Elements els=doc.select("img"); for(int i=0;i<els.size();i++){ Element e=els.get(i); String src=e.attr("src"); if(!isIgore(src)){ String newSrc=downExtResource(src); if(newSrc!=null){ e.attr("src", newSrc); } } } return doc.body().html(); } return ""; } private static String downExtResource(String oldurl){ CloseableHttpClient httpclient = HttpClients.createDefault(); String url=proxyUrl; String host=parseHost(oldurl); url=url+oldurl.substring(oldurl.indexOf(host)+host.length(),oldurl.length()); HttpGet httpGet = new HttpGet(url); String fname=parseFileName(oldurl); File pathfile=new File(path); if(!pathfile.exists()){ pathfile.mkdirs(); } try { CloseableHttpResponse response1 = httpclient.execute(httpGet); HttpEntity httpEntity = response1.getEntity(); InputStream is = httpEntity.getContent(); ByteArrayOutputStream output = new ByteArrayOutputStream(); byte[] buffer = new byte[4096]; int r = 0; long totalRead = 0; while ((r = is.read(buffer)) > 0) { output.write(buffer, 0, r); totalRead += r; } FileOutputStream fos = new FileOutputStream(path+"/"+fname); output.writeTo(fos); output.flush(); output.close(); fos.close(); EntityUtils.consume(httpEntity); response1.close(); httpclient.close(); return storePath+"/"+fname; } catch (ClientProtocolException e) { e.printStackTrace(); } catch (UnsupportedOperationException e) { e.printStackTrace(); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } return null; } private static boolean isIgore(String src){ //没有指定SRC时,不用处理。 if(src==null||"".equals(src)){ return true; } //SRC为本地资源时,不用处理 if(src.indexOf("http://")<0&&src.indexOf("https://")<0){ return true; } //SRC为本系统相关资源时,不用处理 for(int i=0;i<igoreSrc.size();i++){ if(src.indexOf(igoreSrc.get(i))!=-1){ return true; } } return false; } private static String parseHost(String url){ Matcher m=p.matcher(url); if(m.find()){ return m.group(1); } return null; } private static String parseFileName(String url){ if(url!=null&&!"".equals(url)){ int index = url.indexOf("?"); if(index>=0){ url = url.substring(0,index); } String[] arr=url.split("/"); String last=arr[arr.length-1]; return last; } return null; } public static void main(String[] args){ String s="<section><section powered-by=\"xiumi.us\"><section><section> <section powered-by=\"xiumi.us\"><section><section><img src=\"http://chap-picture.sit.sf-express.com:45109/itcan/20180712/Tulips201807120946224622.jpg\"></section></section></section> </section><section> <section powered-by=\"xiumi.us\"><section><section><img src=\"http://statics.xiumi.us/stc/images/templates-assets/tpl-paper/image/2017-6-25-11.png\" _width=\"100%\"></section></section></section><section powered-by=\"xiumi.us\"><section><section><section powered-by=\"xiumi.us\"><section><section> <section powered-by=\"xiumi.us\"><section><section><img src=\"http://statics.xiumi.us/stc/images/templates-assets/tpl-paper/image/921a3fcf641a89a7164b53e14516a28c-sz_19043.png\" _width=\"100%\"></section></section></section> </section><section> <section powered-by=\"xiumi.us\"><section><section><p><strong>1921</strong></p><p><strong>2018</strong></p></section></section></section> </section></section></section></section></section></section><section powered-by=\"xiumi.us\"><section><section><img src=\"http://statics.xiumi.us/stc/images/templates-assets/tpl-paper/image/2017-6-25-6.png\" _width=\"100%\"></section></section></section> </section></section></section><section powered-by=\"xiumi.us\"><section><section> <section powered-by=\"xiumi.us\"><section><section><img src=\"http://statics.xiumi.us/stc/images/templates-assets/tpl-paper/image/df3c31db1c02bba14cbd9eb0ae2007b0-sz_22111.png\"></section></section></section> </section><section> <section powered-by=\"xiumi.us\"><section><section><img src=\"http://statics.xiumi.us/stc/images/templates-assets/tpl-paper/image/2017-6-25-11.png\" _width=\"100%\"></section></section></section><section powered-by=\"xiumi.us\"><section><section><section powered-by=\"xiumi.us\"><section><section> <section powered-by=\"xiumi.us\"><section><section><img src=\"http://statics.xiumi.us/stc/images/templates-assets/tpl-paper/image/921a3fcf641a89a7164b53e14516a28c-sz_19043.png\" _width=\"100%\"></section></section></section> </section><section> <section powered-by=\"xiumi.us\"><section><section><p><strong>1921</strong></p><p><strong>2018</strong></p></section></section></section> </section></section></section></section></section></section><section powered-by=\"xiumi.us\"><section><section><img src=\"http://statics.xiumi.us/stc/images/templates-assets/tpl-paper/image/2017-6-25-6.png\" _width=\"100%\"></section></section></section> </section></section></section></section>"; String p=parse(s); System.out.println(p); } }
正向代理取外网富文本,下载图片到本地,把富文本图片的地址替换成本地地址
原文:https://www.cnblogs.com/ericlaifagen/p/10649421.html