c#根据网页编码自动获取内容
版权声明:
本文为博主原创文章,转载请声明原文链接...谢谢。o_0。
更新时间:
2017-06-09 18:50:26
温馨提示:
学无止境,技术类文章有它的时效性,请留意文章更新时间,如发现内容有误请留言指出,防止别人"踩坑",我会及时更新文章
1、真接贴出一个函数,放入自己的项目就可以使用啦
//取网页内容,自动识别编码,成功返回内容,失败返回空字符串 public string GetPage(string url){ string result = null; WebResponse response = null; try { HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url); request.Method = "GET"; response = request.GetResponse(); //判断网页编码 string contentType = response.Headers["Content-Type"]; Encoding encoding = null; Regex regex = new Regex("charset\\s*=\\s*(\\S+)", RegexOptions.IgnoreCase); Match match = null; if (contentType != null) { match = regex.Match(contentType); if (match.Success) { try { encoding = Encoding.GetEncoding(match.Groups[1].Value.Trim()); using (TextReader reader = new StreamReader(response.GetResponseStream(), encoding)) { string str = reader.ReadToEnd(); return str; } } catch (Exception exx) { MessageBox.Show("读取网页内容时出错"); return ""; // Console.WriteLine(exx); } } } if (contentType == null || (!match.Success)) { using (TextReader reader = new StreamReader(response.GetResponseStream(), Encoding.Default)) { string str = reader.ReadToEnd(); regex = new Regex("<\\s*meta.+charset\\s*=\\s*(\\S+)\\s*\"", RegexOptions.IgnoreCase); match = regex.Match(str); if (match.Success) { try { encoding = Encoding.GetEncoding(match.Groups[1].Value.Trim()); str = encoding.GetString(Encoding.Default.GetBytes(str)); return str; // Console.WriteLine(str); } catch (Exception exx) { MessageBox.Show("读取网页内容时出错"); return ""; // Console.WriteLine(exx); } } } } return ""; } catch (Exception ex) { // handle error MessageBox.Show(ex.Message); return ""; } }