- public static string GetInnerHtml(string url)
- {
- WebClient wc = new WebClient();
- wc.Credentials = CredentialCache.DefaultCredentials;
- byte[] htmlbuffer = wc.DownloadData(url);
- Regex regex = new Regex("charset\\s*=\\s*[\\W]?\\s*([\\w-]+)", RegexOptions.IgnoreCase);
- string content = Encoding.Default.GetString(htmlbuffer);
- Encoding encoding = Encoding.Default;
- if (regex.IsMatch(content))
- {
- encoding = Encoding.GetEncoding(regex.Match(content).Groups[1].Value.Trim());
- content = encoding.GetString(htmlbuffer);
- }
- else
- {
- content = Encoding.Default.GetString(htmlbuffer);
- }
- return content;
- }
2014年4月10日 星期四
[C#] 解析網頁 亂碼去去
[C#] 擷取網頁 HTMLagailityPack
- using System;
- using System.Collections.Generic;
- using System.Linq;
- using System.Text;
- using HtmlAgilityPack;
- namespace agilitypack
- {
- class Program
- {
- static void Main(string[] args)
- {
- HtmlWeb webClient = new HtmlWeb();
- HtmlDocument doc = webClient.Load("http://msdn.microsoft.com/zh-tw/ee787055.aspx");
- HtmlNodeCollection nodes = doc.DocumentNode.SelectNodes("/html/body/div/div[2]/div/div/div[4]/div/div/h2[5]");
- foreach (HtmlNode node in nodes)
- {
- Console.WriteLine(node.InnerText.Trim());
- }
- doc = null;
- nodes = null;
- webClient = null;
- Console.WriteLine("Completed.");
- Console.ReadLine();
- }
- }
- }
[C#] 測速用~~
- using System;
- using System.Collections.Generic;
- using System.Linq;
- using System.Web;
- using System.Web.UI;
- using System.Web.UI.WebControls;
- using HtmlAgilityPack;
- using System.Net;
- using System.IO;
- using System.Threading;
- using System.Diagnostics;
- namespace NoSave.web
- {
- public partial class Static : System.Web.UI.Page
- {
- protected void Page_Load(object sender, EventArgs e)
- {
- }
- protected void btnAll_Click(object sender, EventArgs e)
- {
- //指定要抓取的網頁
- string strURI = "http://tw.stock.yahoo.com/q/q?s=2002";
- //Agility
- //----------------------------------
- HtmlDocument hdoc = new HtmlDocument();
- HtmlWeb hw = new HtmlWeb();
- //處理編碼問題
- hw.AutoDetectEncoding = false;
- hw.OverrideEncoding = System.Text.Encoding.Default;
- Stopwatch stop_Agility = new Stopwatch();
- //計時開始
- stop_Agility.Start();
- hdoc = hw.Load(strURI);
- stop_Agility.Stop();
- TimeSpan ts = stop_Agility.Elapsed;
- string str_Agility=string.Format("{0:00}:{1:00}:{2:00}:{3:00}", ts.Hours, ts.Minutes, ts.Seconds, ts.Milliseconds / 10);
- //WebRequest
- //----------------------------------
- System.Net.WebResponse response = null;
- Stopwatch stop_WebRequest = new Stopwatch();
- //計時開始
- stop_WebRequest.Start();
- System.Net.WebRequest request = System.Net.WebRequest.Create(strURI);
- response = request.GetResponse();
- //計時結束
- stop_WebRequest.Stop();
- ts = stop_WebRequest.Elapsed;
- string str_WebRequest=string.Format("{0:00}:{1:00}:{2:00}:{3:00}", ts.Hours, ts.Minutes, ts.Seconds, ts.Milliseconds /10);
- //WebClient
- //----------------------------------
- Stopwatch stop_Client = new Stopwatch();
- //計時開始
- stop_Client.Start();
- WebClient client = new WebClient();
- MemoryStream ms = new MemoryStream(client.DownloadData(strURI));
- //計時結束
- stop_Client.Stop();
- ts = stop_Client.Elapsed;
- string str_Client=string.Format("{0:00}:{1:00}:{2:00}:{3:00}", ts.Hours, ts.Minutes, ts.Seconds, ts.Milliseconds / 10);
- //印出統計結果
- Response.Write(string.Format("<br>Total:<br><table><tr><th align='right'>HtmlWeb.Load():</th><td>{0}</td></tr><tr><th align='right'>WebRequest.GetResponse():</th><td>{1}</td></tr><tr><th align='right'>WebClient.DownloadData():</th><td>{2}</td></tr></table", str_Agility, str_WebRequest, str_Client));
- }
- }
- }
訂閱:
文章 (Atom)