首页 > Windows开发 > 详细

Delphi XE下获取网页源码记录

时间:2017-04-06 10:06:10      阅读:782      评论:0      收藏:0      [点我收藏+]

存放个自己写的获取网页源码,掌握了:

1.利用CreateOLEObject方式获取源码

2.自动判断网页格式编码

需要使用到的单元:Winapi.ActiveX,System.Win.ComObj,System.WideStrUtils

需要创建结构体:TResultWebHtml (用于存放返回的源码和Cookies)

 
Uses Winapi.ActiveX,System.Win.ComObj,System.WideStrUtils;

type
  TResultWebHtml = record
    Html : String;
    Cookie : String;
  end;

function GetWebHtml(Url,Method,Code,ReferText,ReferCookies:String;Overtime:Integer;Referer,Accept,Language,Charset,Agent,ContentType:String;Redirect,Encoding,XRequestedWith:BooLean):TResultWebHtml;stdcall;//访问网页
Var
  I:Integer;
  XMLHTTP:Olevariant;
  ResultWebHtml:TResultWebHtml;
  POvertime:Integer;
  PUrl,PMethod,PCode,PReferText,PReferCookies:String;
  PReferer,PAccept,PLanguage,PCharset,PAgent,PContentType:String;
  PHtml: RawByteString;
  Temp,PGetCookies:String;
  TempList:TStringList;
  HTML:TBytes;
Begin
  try
    CoInitialize(nil); //添加 CoInitialize 支持多线程调用
    XMLHTTP:= CreateOLEObject(WinHttp.WinHttpRequest.5.1);
    Try //避免超时报错、防止出错等
      //初始化默认值
      if Url = ‘‘ then Exit else PUrl:=Url; //网址初始化
      if Method = ‘‘ then PMethod:= get else PMethod:= LowerCase(Method); //初始化访问方式
      if Code <> ‘‘ then PCode:= LowerCase(Code);      //初始化网站编码,为空自动选择
      if ReferText <> ‘‘ then PReferText:=ReferText;//设置提交信息,用于Post操作
      if ReferCookies <> ‘‘ then PReferCookies:=ReferCookies;//设置网站Cookies
      if Overtime = 0 then POvertime:=15000 Else POvertime:=Overtime * 1000; //设置超时时间
      if Referer = ‘‘ then PReferer:= PUrl else PReferer:= Referer; //设置来路,如果为空则设置为访问网站地址
      if Accept = ‘‘ then PAccept:=text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8 else PAccept:=Accept;
      if Language = ‘‘ then PLanguage:=zh-CN,zh;q=0.8,en-US;q=0.6,en;q=0.4 else PLanguage:=Language;
      if Charset = ‘‘ then PCharset:=GBK,utf-8;q=0.7,*;q=0.3 else PCharset:=Charset;
      if Agent = ‘‘ then PAgent:=Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2107.3 Safari/537.36 else PAgent:=Agent;
      if ContentType = ‘‘ then PContentType:=application/x-www-form-urlencoded else PContentType:=ContentType;

      XMLHTTP.open(PMethod, PUrl, False);
      //设置头信息
      XMLHTTP.setRequestHeader(Referer, PReferer);  // 仅服务器可知 来路
      XMLHTTP.setRequestHeader(Accept, PAccept);
      XMLHTTP.setRequestHeader(Accept-Language, PLanguage);
      //XMLHTTP.setRequestHeader(‘Accept-Charset‘, PCharset);
      XMLHTTP.setRequestHeader(User-Agent, PAgent);
      XMLHTTP.setRequestHeader(Content-Type, PContentType);
      XMLHTTP.setRequestHeader(Connection, keep-alive);
      if PReferCookies <> ‘‘ then XMLHTTP.setRequestHeader(Cookie, PReferCookies);
      if Encoding then XMLHTTP.setRequestHeader(Accept-Encoding, gzip); //设置压缩 ‘gzip‘
      if Redirect then XMLHTTP.Option(6) :=True else XMLHTTP.Option(6) :=False;//设置是否支持转跳
      if XRequestedWith then XMLHTTP.setRequestHeader(X-Requested-With, XMLHttpRequest);


      XMLHTTP.setTimeouts(POvertime, POvertime, POvertime, POvertime);
      XMLHTTP.send(PReferText);

      HTML:=XMLHTTP.responseBody;
      if PCode = ‘‘ then begin //开启自动选择
        //自动判断网页格式后
        SetString(PHtml, PAnsiChar(Pointer(WideString(XMLHTTP.ResponseBody))), SysStringByteLen(PWideChar(WideString(XMLHTTP.ResponseBody))));
        if IsUTF8String(PHtml) then Temp := TEncoding.Default.GetEncoding(65001).GetString(HTML) else Temp :=TEncoding.Default.GetString(HTML);
      end else begin
        if PCode = utf-8 then Temp := TEncoding.Default.GetEncoding(65001).GetString(HTML) else Temp :=TEncoding.Default.GetString(HTML);
      end;

      ResultWebHtml.Html:=Temp;

      //获取返回COOKIES
      Temp:=XMLHTTP.GetallResponseHeaders;
      TempList:=TStringList.Create;
      TempList.Text :=Temp;
      for I := 0 to TempList.Count -1 do begin
        if Pos(Set-Cookie:,TempList[I]) <> 0 then begin
          Temp:=Copy(TempList[I],12,Length(TempList[I]));
          Temp:=Copy(Temp,1,Pos(;,Temp));
          if Temp <> ‘‘ then PGetCookies:=PGetCookies+Temp;
        end;
      end;
      TempList.Free;
      ResultWebHtml.Cookie :=PGetCookies;
      Result:=ResultWebHtml;
    except
    End;
  finally
    XMLHTTP := Unassigned;
    CoUnInitialize;
  end;
End;

procedure TForm1.Button1Click(Sender: TObject);
begin
  Memo1.Text:=GetWebHtml(http://www.cnblogs.com/sishen,Get,‘‘,‘‘,‘‘,30,‘‘,‘‘,‘‘,‘‘,‘‘,‘‘,True,False,False).Html;
end;

 

Delphi XE下获取网页源码记录

原文:http://www.cnblogs.com/sishen/p/6671421.html

(0)
(0)
   
举报
评论 一句话评论(0
关于我们 - 联系我们 - 留言反馈 - 联系我们:wmxa8@hotmail.com
© 2014 bubuko.com 版权所有
打开技术之扣,分享程序人生!