标签:拉取 html catch write class highlight message stream nts
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Net;
using System.IO;
namespace xsharp
{
class Program
{
static int writeContent(string sHtml)
{
int iBgnIdx = sHtml.IndexOf("<div id=\"chaptercontent");
if (iBgnIdx <= 0)
return -1;
int iEndIdx = sHtml.IndexOf("</div>", iBgnIdx+1);
if (iEndIdx <= 0)
return -1;
string sDivSub = sHtml.Substring(iBgnIdx, iEndIdx - iBgnIdx);
iBgnIdx = sDivSub.IndexOf("</p>");
if (iBgnIdx <= 0)
return -1;
iEndIdx = sDivSub.IndexOf("<p style", iBgnIdx + 5);
if (iEndIdx <= 0)
return -1;
string sContentSub = sDivSub.Substring(iBgnIdx+5, iEndIdx - iBgnIdx-5);
sContentSub = sContentSub.Replace(" ", " ");
sContentSub = sContentSub.Replace("<br />", Environment.NewLine);
Console.WriteLine(sContentSub);
using (StreamWriter sw = new StreamWriter("G:\\content.html"))//将获取的内容写入文本
{
sw.Write(sContentSub);
}
return 0;
}
static void Main(string[] args)
{
try
{
WebClient MyWebClient = new WebClient();
MyWebClient.Credentials = CredentialCache.DefaultCredentials;//获取或设置用于对向Internet资源的请求进行身份验证的网络凭据。
Byte[] pageData = MyWebClient.DownloadData("http://wap.xxbiquge.com/59_59865/3184122.html"); //从指定网站下载数据
//string pageHtml = Encoding.Default.GetString(pageData); //如果获取网站页面采用的是GB2312,则使用这句
string pageHtml = Encoding.UTF8.GetString(pageData); //如果获取网站页面采用的是UTF-8,则使用这句
//Console.WriteLine(pageHtml);//在控制台输入获取的内容
writeContent(pageHtml);
//using (StreamWriter sw = new StreamWriter("G:\\ouput.html"))//将获取的内容写入文本
//{
// sw.Write(pageHtml);
//}
Console.ReadLine(); //让控制台暂停,否则一闪而过了
}
catch (WebException webEx)
{
Console.WriteLine(webEx.Message.ToString());
}
}
}
}
标签:拉取 html catch write class highlight message stream nts
原文地址:http://www.cnblogs.com/yylingyao/p/7162059.html