Sub testData()
Dim array2() As String
Dim rowNum As Long
'Set objs1 = Sheets("ORGFCST")
Dim keywords As String
Dim ff As Long
rowNum = Sheets("Sheet1").Range("a65536").End(xlUp).Row
array2() = getCompanyList()
For ff = 0 To rowNum Step 1
If array2()(ff) <> "" Then
keywordsGeneral = clawResult(CStr(array2()(ff)), "aaa", ff)
End If
Next
Shell ("taskkill /f /im IEXPLORE.exe")
End Sub
Function getCompanyList() As String()
Dim d, a, c As Variant
Dim strT
Dim strs As String
Dim array1(100) As String
Set d = CreateObject("Scripting.Dictionary")
For i = 2 To Sheets("Sheet1").Range("a65536").End(xlUp).Row Step 1
'strT = GetChs(CStr(Sheets("Sheet1").Cells(i, 2)))
strT = CStr(Sheets("Sheet1").Cells(i, 1))
If strT <> "" Then
array1(i - 2) = strT
End If
Next
getCompanyList = array1()
End Function
Function clawResult(link As String, companyName As String, companyLine As Long) As String
Dim ie, dmt, tb, i&, j&, a&, strxP As String, strsH As String, strsA As String, strsResult As String
Set ie2 = CreateObject("InternetExplorer.Application")
With ie2
.Visible = False
.navigate link
Do Until .ReadyState = 4 Or .busy = False
DoEvents
Loop
Set dmt2 = .document
If TypeName(dmt2) <> "AcroPDF" Then
Set contentsP = dmt2.all.tags("p")
For i1 = 0 To contentsP.Length - 1
'strs2 = strs2 & vbCrLf & contentsP.Item(i1).innertext
strsResult = strsResult + contentContains(CStr(contentsP.Item(i1).innertext))
Next
Set contentsH = dmt2.all.tags("h3")
For i2 = 0 To contentsH.Length - 1
strsResult = strsResult + contentContains(CStr(contentsH.Item(i2).innertext))
Next
Set contentsA = dmt2.all.tags("a")
For i3 = 0 To contentsA.Length - 1
strsResult = strsResult + contentContains(CStr(contentsA.Item(i3).innertext))
Next
Cells(companyLine + 2, 4) = link
Cells(companyLine + 2, 3) = strsResult + "OVER"
End If
End With
clawResult = ""
End Function
Function contentContains(content As String) As String
Dim d, a, c As Variant
Dim strT
Dim strs As String
Dim array1(100) As String
Set d = CreateObject("Scripting.Dictionary")
For i = 2 To Sheets("Sheet1").Range("b65536").End(xlUp).Row Step 1
'strT = GetChs(CStr(Sheets("Sheet1").Cells(i, 2)))
strT = CStr(Sheets("Sheet1").Cells(i, 2))
If InStr(content, strT) Then
strs = strs & vbCrLf & "| " + strT + " : " + content + " |"
End If
Next
contentContains = strs
End Function
分享到:
相关推荐
【标题】"clawer_for_douban:Java的douban的clawer" 提供的信息表明,这是一个使用Java编程语言编写的豆瓣(Douban)数据抓取工具,通常被称为网络爬虫(Web Crawler)。网络爬虫是自动化地从互联网上获取大量信息的...
网盘文件永久链接 九章算法;目录中文件数:10个 2,九章算法基础班;目录中文件数:20个 ...clawer2 crawler1 jiuzhang_mapreduce_1 jiuzhang_mapreduce_2 lbs1.wmv lbs2.wmv 九章系统设计_bigtable .....
Project Web App 参见 在本地运行Tracker Web应用 npm start 在开发模式下运行应用程序。 打开在浏览器中查看。 如果进行编辑,页面将... node ./src/clawer.js并将生成一个data.json5 提交对生成的data.json5请求
2.require_way:nodejs模块的加载方式 3.clawer_github_stars:nodejs爬取github项目star数 4.upload:nodejs文件上传服务器 5.quickrun: 用进程模块从命令行快速启动应用 6.javascript_OOP: javascript面向对象学习...
2. **网络爬虫技术**:网络爬虫是自动遍历互联网并抓取网页信息的程序。在这个案例中,Java爬虫通过模拟用户行为,搜索视觉中国网站上的图片,根据预设的关键词进行筛选和下载。 3. **HTTP/HTTPS协议**:Java爬虫...
8. 版本控制:文件名`JimSunJing-douban_clawer-50c4f38`暗示了使用了版本控制系统,可能是Git,`50c4f38`是一个Git提交哈希,意味着项目源码有版本历史,便于团队协作和代码追踪。 在实际项目中,可能还需要考虑...
PTT Watcher一个分析ptt web版的爬虫主要爬web 版ptt来找热门文章以讨论程度做文章排行(后来加上fb分享做判定) web clawer : python3 (主要为selenium)