标签:

采集数据源:http://www.sgcc.com.cn/xwzx/gsyw/
//根据URL地址获取所有html
        public static string GetUrltoHtml(string Url, string type)
        {
            try
            {
                System.Net.WebRequest wReq = System.Net.WebRequest.Create(Url);
                // Get the response instance.
                System.Net.WebResponse wResp = wReq.GetResponse();
                System.IO.Stream respStream = wResp.GetResponseStream();
                // Dim reader As StreamReader = New StreamReader(respStream)
                using (System.IO.StreamReader reader = new System.IO.StreamReader(respStream, Encoding.GetEncoding(type)))
                {
                    return reader.ReadToEnd();
                }
            }
            catch (System.Exception ex)
            {
                //errorMsg = ex.Message;
            }
            return "";
        }
 /// <summary>
        /// GetSubString截取字符串
        /// </summary>
        /// <param name="strSource">原始字符</param>
        /// <param name="strIndexOf">开始字符</param>
        /// <param name="strLastOf">结束字符</param>
        /// <returns></returns>
        public static string GetSubString(string strSource, string strIndexOf, string strLastOf)
        {
            string strResult = string.Empty;
            int indexOf = strSource.IndexOf(strIndexOf);
            if (indexOf > -1)
            {
                string strTemp = strSource.Substring(indexOf + strIndexOf.Length);
                if (!string.IsNullOrEmpty(strTemp))
                {
                    strResult = strTemp.Substring(0, strTemp.IndexOf(strLastOf));
                }
            }
            return strResult;
        }
采集网页数据生成到静态模板newslist.html文件中(正则表达式)
标签:
原文地址:http://www.cnblogs.com/500k/p/4824086.html