c#根据网页编码自动获取内容

来源:赵克立博客分类: 网络安全标签：--发布时间:2014-04-29 06:04:39最后更新:2017-06-09 18:50:26浏览:1243

本文为博主原创文章，转载请声明原文链接...谢谢。o_0。

原文链接:

http://www.zhaokeli.com /article/1684.html

更新时间：

2017-06-09 18:50:26

温馨提示：

学无止境,技术类文章有它的时效性,请留意文章更新时间,如发现内容有误请留言指出,防止别人"踩坑",我会及时更新文章

1、真接贴出一个函数，放入自己的项目就可以使用啦

//取网页内容,自动识别编码,成功返回内容，失败返回空字符串
public string GetPage(string url){
    string result = null;
    WebResponse response = null;
    try
    {
        HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
        request.Method = "GET";
        response = request.GetResponse();
        //判断网页编码
        string contentType = response.Headers["Content-Type"];
        Encoding encoding = null;
        Regex regex = new Regex("charset\\s*=\\s*(\\S+)", RegexOptions.IgnoreCase);
        Match match = null;
        if (contentType != null)
        {
            match = regex.Match(contentType);
            if (match.Success)
            {
                try
                {
                    encoding = Encoding.GetEncoding(match.Groups[1].Value.Trim());
                    using (TextReader reader = new StreamReader(response.GetResponseStream(), encoding))
                    {
                        string str = reader.ReadToEnd();
                        return str;
                    }
                }
                catch (Exception exx)
                {
                    MessageBox.Show("读取网页内容时出错");
                        return "";
                    // Console.WriteLine(exx);
                }
            }
        }
        if (contentType == null || (!match.Success))
        {
            using (TextReader reader = new StreamReader(response.GetResponseStream(), Encoding.Default))
            {
                string str = reader.ReadToEnd();
                regex = new Regex("<\\s*meta.+charset\\s*=\\s*(\\S+)\\s*\"", RegexOptions.IgnoreCase);
                match = regex.Match(str);
                if (match.Success)
                {
                    try
                    {
                        encoding = Encoding.GetEncoding(match.Groups[1].Value.Trim());
                        str = encoding.GetString(Encoding.Default.GetBytes(str));
                        return str;
                       // Console.WriteLine(str);
                    }
                    catch (Exception exx)
                    {
                        MessageBox.Show("读取网页内容时出错");
                        return "";
                       // Console.WriteLine(exx);
                    }
                }
            }
        }
        return "";
    }
    catch (Exception ex)
    {
   // handle error
        MessageBox.Show(ex.Message);
        return "";
    }     
}