Csharp/C#教程:c#远程html数据抓取实例分享分享

代码如下:
///<summary>
        ///获取远程html
        ///</summary>
        ///<paramname=”url”></param>
        ///<paramname=”methed”></param>
        ///<paramname=”param”></param>
        ///<paramname=”html”></param>
        ///<returns></returns>
        publicstaticboolGetHttp(stringurl,stringmethed,stringparam,outstringhtml)
        {
            methed=methed.ToLower();
            if(param!=null&&methed==”get”&&param.Length>0)
            {
                url+=”?”+param;
            }
            try
            {
                MSXML2.XMLHTTPmx=newMSXML2.XMLHTTPClass();
                mx.open(methed,url,false,null,null);
                if(param!=null&&methed==”post”&&param.Length>0)
                {
                    mx.setRequestHeader(“Content-Length”,param.Length.ToString());
                    mx.setRequestHeader(“Content-Type”,”application/x-www-form-urlencoded”);
                }
                mx.send(param);
                if(mx.readyState!=4)
                {
                    html=”远程连接失败:-4″;
                    returnfalse;
                }
                html=mx.responseText;
                returntrue;
            }
            catch(Exceptionex)
            {
                html=”远程连接失败:”+ex.Message;
                returnfalse;
 &nbsp ;          }
        }
        publicstaticboolGetHttp1(stringurl,stringmethed,stringparam,stringreferer,stringencode,outstringhtml)
        {
            //returnGetHttp(url,methed,param,outhtml);
            //stringencode=”utf-8″;
            //stringmethed=sendType.ToString();
            if(param!=null&&methed==”get”&&param.Length>0)
            {
                if(url.IndexOf(“?”)>=0)
                {
                    url+=”&”+param;
                }
                else
                {
                    url+=”?”+param;
                }
            }
            try
            {
                HttpWebRequestwebreq=(HttpWebRequest)WebRequest.Create(url);
                webreq.Proxy=null;
                webreq.Timeout=1000*6;
                webreq.ContentType=”application/x-www-form-urlencoded”;
                webreq.UserAgent=”User-Agent:Mozilla/5.0(WindowsNT6.1;WOW64;rv:24.0)Gecko/20100101Firefox/24.0″;
                //webreq.UserAgent=”Mozilla/4.0(compatible;MSIE7.0;WindowsNT6.1;WOW64;Trident/6.0;SLCC2;.NETCLR2.0.50727;.NETCLR3.5.30729;.NETCLR3.0.30729;MediaCenterPC6.0;.NET4.0C;.NET4.0E)”;
                //谷歌的:User-Agent:Mozilla/5.0(WindowsNT6.1;WOW64)AppleWebKit/537.36(KHTML,likeGecko)Chrome/28.0.1500.95Safari/537.36
                //火狐的:User-Agent:Mozilla/5.0(WindowsNT6.1;WOW64;rv:24.0)Gecko/20100101Firefox/24.0
                //标准格式为:浏览器标识(操作系统标识;加密等级标识;浏览器语言)渲染引擎标识版本信息
                //webreq.AllowAutoRedirect=false;
                //频繁请求一个网址时,过段时间就会出现“基础连接已经关闭”
    &nbs p;           //webreq.KeepAlive=false;
                //webreq.ProtocolVersion=HttpVersion.Version10;
                if(referer.Length>0)
                {
                    webreq.Referer=referer;
                }
                CookieContainermycookies=newCookieContainer();
                webreq.CookieContainer=mycookies;
                //if(this.cookieList!=null)
                //{
                //   webreq.CookieContainer.Add(this.GetCookies(webreq.RequestUri,this.cookieList));
                //}
                webreq.Method=methed;
                //post开始
                if(param!=null&&methed==”post”)
                {
                    byte[]arrbyte=Encoding.GetEncoding(encode).GetBytes(param);
                    webreq.ContentLength=arrbyte.Length;
                    StreamnewStream=webreq.GetRequestStream();
                    newStream.Write(arrbyte,0,arrbyte.Length);
                    newStream.Close();
                }
                //post结束
 
                WebResponsew=webreq.GetResponse();
                //返回HTML
                using(HttpWebResponsewebres=(HttpWebResponse)webreq.GetResponse())
                {
                    using(StreamdataStream=webres.GetResponseStream())
                    {
                        using(StreamReaderreader=newStreamReader(dataStream,Encoding.GetEncoding(encode)))
  &n bsp;                     {
                            html=reader.ReadToEnd();
                            //this.cookieList=webreq.CookieContainer.GetCookies(webreq.RequestUri);
                            webreq.Abort();//可能会解决卡住或阻塞问题
                        }
                    }
                }
            }
            catch(Exceptionex)
            {
                html=”出现异常(HttpHelper.GetHTML),远程连接失败:”+ex.Message+”url:”+url;
                //System.Windows.Forms.MessageBox.Show(html);
                returnfalse;
            }
            returntrue;
        }

您可能感兴趣的文章:C#抓取网页数据解析标题描述图片等信息去除HTML标签c#抓取Web网页数据分析C#使用Selenium+PhantomJS抓取数据

标签: 数据 c# html tm

C#学习进阶Hello World的17种写法代码分享

8皇后问题的解法实例代码

上述就是C#学习教程:c#远程html数据抓取实例分享分享的全部内容,如果对大家有所用处且需要了解更多关于C#学习教程,希望大家多多关注—计算机技术网(www.ctvol.com)!

本文来自网络收集,不代表计算机技术网立场,如涉及侵权请联系管理员删除。

ctvol管理联系方式QQ:251552304

本文章地址:https://www.ctvol.com/cdevelopment/905064.html

(0)
上一篇 2021年10月22日
下一篇 2021年10月22日

精彩推荐