码迷,mamicode.com
首页 > Web开发 > 详细

爬去网页离线数据

时间:2018-12-09 14:17:49      阅读:223      评论:0      收藏:0      [点我收藏+]

标签:bytes   sof   ble   until   row   public   参数   nts   body   

重命名文件

  On Error Resume Next
    Dim dd As String
    Dim k%

    ‘‘提取文件夹名称
    dd = Dir(Sheets("Sheet1").Cells(1, 2) & "*", vbDirectory)
    Do
        dd = Dir
        ‘‘判断是否为文件夹
        If dd <> "" And InStr(1, dd, ".") = 0 Then
            Dim aa
            Set aa = CreateObject("Scripting.FileSystemObject")
            k = k + 1
            ‘‘文件夹重命名
            aa.MoveFolder Sheets("Sheet1").Cells(1, 2) & dd, Sheets("Sheet1").Cells(1, 2) & "\改名" & k
        End If

    Loop Until Len(dd) = 0
    Set aa = Nothing

‘爬去数据

  Dim arr, brr, i%, s$, html, Ta, n%, j%, str$, Url$, Db, tr, td
    tempPath = Cells(1, 2)
    If Mid(tempPath, Len(tempPath), 1) <> "\" Then
        tempPath = tempPath & "\"
    End If
    If Dir(tempPath, vbDirectory) = "" Then
        MsgBox "错误!需要处理的文件目录不存在 " & tempPath
        Exit Sub
    End If
    Dim fn
    fn = Dir(tempPath & "*.htm")
    Do While fn <> ""
        Set html = CreateObject("htmlfile")
        dataTxt = GetCode("UTF-8", tempPath & fn) ‘tempPath & fn
        html.body.innerhtml = dataTxt
        If (InStr(dataTxt, "tooltip-title") = 0) Then
            oneclick dataTxt, onenum
            onenum = onenum + 1
        Else
           twoclick dataTxt, twonum
           twonum = twonum + 1
        End If
      fn = Dir()
    Loop

‘table数据

Set Db = html.all.tags("table")(3)
         i = 0: j = 0
         For Each tr In Db.Rows
            m = 0
            i = i + 1: j = 0
            If i > 1 Then
                For Each td In tr.Cells
                    m = m + 1
                    Sheets("Sheet3").Cells(m, pagenum) = Replace(td.innerText, Chr(10) & Chr(10), Chr(10))
                Next
            End If

    pagenum = pagenum + 1

Next

‘页面编码

Public Function GetCode(CodeBase, Url) ‘第一个参数是设置编码方式(GB2312或UTF-8)第二个参数是地址.
    Dim xmlHTTP1
    Set xmlHTTP1 = CreateObject("Microsoft.XMLHTTP")
    xmlHTTP1.Open "get", Url, True
    xmlHTTP1.send
    While xmlHTTP1.readyState <> 4
    DoEvents
    Wend
    GetCode = xmlHTTP1.responseBody
    If CStr(GetCode) <> "" Then GetCode = BytesToBstr(GetCode, CodeBase)
    Set xmlHTTP1 = Nothing
End Function


Public Function BytesToBstr(strBody, CodeBase)
    Dim ObjStream
    Set ObjStream = CreateObject("Adodb.Stream")
    With ObjStream
    .Type = 1
    .Mode = 3
    .Open
    .write strBody
    .Position = 0
    .Type = 2
    .Charset = CodeBase
    BytesToBstr = .ReadText
    .Close
    End With
    Set ObjStream = Nothing
End Function

 

爬去网页离线数据

标签:bytes   sof   ble   until   row   public   参数   nts   body   

原文地址:https://www.cnblogs.com/bignine/p/10090542.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!