首页 > 其他 > 详细

正向代理取外网富文本,下载图片到本地,把富文本图片的地址替换成本地地址

时间:2019-07-26 15:46:26      阅读:83      评论:0      收藏:0      [点我收藏+]
package com.sf.chap.controller.core;

import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.lang3.StringUtils;
import org.apache.http.HttpEntity;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import com.sf.chap.util.AppConfigUtil;

public class XEditorResourceLoader { 
	 
	private static String proxyUrl=""; //正向代理地址
	private static String storePath="";
    private static String path = ""; //保存的文件夹路径
	private static List<String> igoreSrc = new ArrayList<String>(); //过滤的系统地址
	private static Pattern p= Pattern.compile("http[s]?://([^/]*)/\\s*");
	
	@SuppressWarnings("unchecked")
	public static String parse(String content){ 
		proxyUrl = (String) AppConfigUtil.getDBValue("proxyUrl");
		storePath = (String) AppConfigUtil.getDBValue("storePath");
		igoreSrc = (List<String>) AppConfigUtil.getDBValue("igoreSrc");
	    path = (String) AppConfigUtil.getDBValue("nfscPath");
		if(StringUtils.isNoneBlank(content)){
			Document doc=Jsoup.parse(content);
			Elements els=doc.select("img");
			for(int i=0;i<els.size();i++){
				Element e=els.get(i);
				String src=e.attr("src");
				if(!isIgore(src)){
					String newSrc=downExtResource(src);
					if(newSrc!=null){
						e.attr("src", newSrc);
					}
				}
			}
			return doc.body().html();
		} 
	    return "";  
	}
	
	private static String downExtResource(String oldurl){
		CloseableHttpClient httpclient = HttpClients.createDefault();
		String url=proxyUrl;
		String host=parseHost(oldurl); 
		url=url+oldurl.substring(oldurl.indexOf(host)+host.length(),oldurl.length());
		HttpGet httpGet = new HttpGet(url);
		String fname=parseFileName(oldurl); 
		File pathfile=new File(path);
		if(!pathfile.exists()){ 
			pathfile.mkdirs();
		}
		try {
			CloseableHttpResponse response1 = httpclient.execute(httpGet);
			HttpEntity httpEntity = response1.getEntity();
			InputStream is = httpEntity.getContent();
			ByteArrayOutputStream output = new ByteArrayOutputStream();
			byte[] buffer = new byte[4096];
			int r = 0;
			long totalRead = 0;
			while ((r = is.read(buffer)) > 0) { 
			    output.write(buffer, 0, r);
			    totalRead += r;
			}
			
			FileOutputStream fos = new FileOutputStream(path+"/"+fname);
			output.writeTo(fos);
			output.flush();
			output.close();
			fos.close();
			EntityUtils.consume(httpEntity);
			response1.close();
			httpclient.close();
			return storePath+"/"+fname;
		} catch (ClientProtocolException e) {
			e.printStackTrace();
		} catch (UnsupportedOperationException e) {
			e.printStackTrace();
		} catch (FileNotFoundException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
		return null;
	}
	
	private static boolean isIgore(String src){
		//没有指定SRC时,不用处理。
		if(src==null||"".equals(src)){
			return true;
		}
		//SRC为本地资源时,不用处理
		if(src.indexOf("http://")<0&&src.indexOf("https://")<0){
			return true;
		}
		//SRC为本系统相关资源时,不用处理
		for(int i=0;i<igoreSrc.size();i++){
			if(src.indexOf(igoreSrc.get(i))!=-1){
				return true;
			}
		}
		return false;
	}
	
	private static String parseHost(String url){
		Matcher m=p.matcher(url);
		if(m.find()){
			return m.group(1);
		}
		return null;
	}
	
	private static String parseFileName(String url){ 
		if(url!=null&&!"".equals(url)){
			int index = url.indexOf("?");
			if(index>=0){
				url = url.substring(0,index);
			}
			String[] arr=url.split("/");
			String last=arr[arr.length-1];
			return last;
		}
		return null;
	}
	
	public static void main(String[] args){
		String s="<section><section powered-by=\"xiumi.us\"><section><section> <section powered-by=\"xiumi.us\"><section><section><img src=\"http://chap-picture.sit.sf-express.com:45109/itcan/20180712/Tulips201807120946224622.jpg\"></section></section></section> </section><section> <section powered-by=\"xiumi.us\"><section><section><img src=\"http://statics.xiumi.us/stc/images/templates-assets/tpl-paper/image/2017-6-25-11.png\" _width=\"100%\"></section></section></section><section powered-by=\"xiumi.us\"><section><section><section powered-by=\"xiumi.us\"><section><section> <section powered-by=\"xiumi.us\"><section><section><img src=\"http://statics.xiumi.us/stc/images/templates-assets/tpl-paper/image/921a3fcf641a89a7164b53e14516a28c-sz_19043.png\" _width=\"100%\"></section></section></section> </section><section> <section powered-by=\"xiumi.us\"><section><section><p><strong>1921</strong></p><p><strong>2018</strong></p></section></section></section> </section></section></section></section></section></section><section powered-by=\"xiumi.us\"><section><section><img src=\"http://statics.xiumi.us/stc/images/templates-assets/tpl-paper/image/2017-6-25-6.png\" _width=\"100%\"></section></section></section> </section></section></section><section powered-by=\"xiumi.us\"><section><section> <section powered-by=\"xiumi.us\"><section><section><img src=\"http://statics.xiumi.us/stc/images/templates-assets/tpl-paper/image/df3c31db1c02bba14cbd9eb0ae2007b0-sz_22111.png\"></section></section></section> </section><section> <section powered-by=\"xiumi.us\"><section><section><img src=\"http://statics.xiumi.us/stc/images/templates-assets/tpl-paper/image/2017-6-25-11.png\" _width=\"100%\"></section></section></section><section powered-by=\"xiumi.us\"><section><section><section powered-by=\"xiumi.us\"><section><section> <section powered-by=\"xiumi.us\"><section><section><img src=\"http://statics.xiumi.us/stc/images/templates-assets/tpl-paper/image/921a3fcf641a89a7164b53e14516a28c-sz_19043.png\" _width=\"100%\"></section></section></section> </section><section> <section powered-by=\"xiumi.us\"><section><section><p><strong>1921</strong></p><p><strong>2018</strong></p></section></section></section> </section></section></section></section></section></section><section powered-by=\"xiumi.us\"><section><section><img src=\"http://statics.xiumi.us/stc/images/templates-assets/tpl-paper/image/2017-6-25-6.png\" _width=\"100%\"></section></section></section> </section></section></section></section>";
		String p=parse(s);
		System.out.println(p);
	}
}

  

正向代理取外网富文本,下载图片到本地,把富文本图片的地址替换成本地地址

原文:https://www.cnblogs.com/ericlaifagen/p/10649421.html

(0)
(0)
   
举报
评论 一句话评论(0
关于我们 - 联系我们 - 留言反馈 - 联系我们:wmxa8@hotmail.com
© 2014 bubuko.com 版权所有
打开技术之扣,分享程序人生!