1.使用 tfhpple
1.1.导入TFHpple
1.2.引入静态库文件libxml2.2.tbd 不是 libxml2.dylib ,ios9 xcode7不使用了
2. 使用 xpath 获取数据 节点
//*[@id="index_main"]/div/div/h3/a
F12 输入 $x("//*[@id='index_main']/div/div/h3/a")
userName //*[@id='index_main']/div[2]/div/div[3]/a/text() userPicLink //*[@id='index_main']/div[2]/div/div[1]/div/a/img/ src userBlogLink //*[@id='index_main']/div[2]/div/div[3]/a href title //*[@id='index_main']/div[2]/div/h3/a/text() content //*[@id='index_main']/div[2]/div/div[1]/text()[2] articleLink //*[@id='index_main']/div[2]/div/h3/a href seeNum //*[@id='index_main']/div[2]/div/div[3]/span[2]/text() goodNum //*[@id='index_main']/div[2]/div/div[3]/div/div/a[1]/text() badNum //*[@id='index_main']/div[2]/div/div[3]/div/div/a[2]/text() insertTime //*[@id='index_main']/div[2]/div/div[3]/span[3]/text() type html
3. 代码部分
url = http://www.iteye.com/blogs
page = 0; category = 0; [self request]; } -(void) request{ NSString *url = [UrlUtil getIteyeBlog:category page:page]; // [ IteyeBlogAFNetworkingUtil httpRequestWithUrl:url success:^(NSString *result) { // // NSLog(@"success ====="); // // } fail:^{ // NSLog(@"error ====="); // }]; NSLog(@"%@",url); NSData *htmlData = [[NSData alloc]initWithContentsOfURL:[NSURL URLWithString:url]]; TFHpple * doc = [[TFHpple alloc] initWithHTMLData:htmlData]; NSArray *arrArticleLink= [doc searchWithXPathQuery:@"//*[@id='index_main']/div/div/h3/a"]; NSArray *arrBadNum= [doc searchWithXPathQuery:@"//*[@id='index_main']/div/div/div[3]/div/div/span[2]/text()"]; NSArray *arrGoodNum= [doc searchWithXPathQuery:@"//*[@id='index_main']/div/div/div[3]/div/div/span[1]/text()"]; NSArray *arrInsertTime= [doc searchWithXPathQuery:@"//*[@id='index_main']/div/div/div[3]/span[3]/text()"]; NSArray *arrSeeNum= [doc searchWithXPathQuery:@"//*[@id='index_main']/div/div/div[3]/span[2]/text()"]; NSArray *arrTitle= [doc searchWithXPathQuery:@"//*[@id='index_main']/div/div/h3/a/text()"]; NSArray *arrUserBlogLink= [doc searchWithXPathQuery:@"//*[@id='index_main']/div/div/div[3]/a"]; NSArray *arrUserName= [doc searchWithXPathQuery:@"//*[@id='index_main']/div/div/div[3]/a/text()"]; NSArray *arrUserPicLink= [doc searchWithXPathQuery:@"//*[@id='index_main']/div/div/div[1]/div/a/img"]; // userName //*[@id='index_main']/div/div/div[3]/a/text() // userPicLink //*[@id='index_main']/div/div/div[1]/div/a/img/ src // userBlogLink //*[@id='index_main']/div/div/div[3]/a href // title //*[@id='index_main']/div/div/h3/a/text() // content //*[@id='index_main']/div/div/div[1]/text()[2] // articleLink //*[@id='index_main']/div/div/h3/a href // seeNum //*[@id='index_main']/div/div/div[3]/span[2]/text() // goodNum //*[@id='index_main']/div/div/div[3]/div/div/span[1]/text() // badNum //*[@id='index_main']/div/div/div[3]/div/div/span[2]/text() // insertTime //*[@id='index_main']/div/div/div[3]/span[3]/text() // type // html //*[@id="index_main"]/div[1]/div/div[1] //*[@id="index_main"]/div[29]/div/div[1] //*[@id="index_main"]/div[30]/div/div[1] NSLog(@"arrTitle count======%ld",[arrTitle count] ); for(int i=0;i<[arrTitle count] ;i++){ NSString *userName = [[arrUserName objectAtIndex:i] content]; NSString *userPicLink = [[arrUserPicLink objectAtIndex:i] objectForKey:@"src"]; NSString *userBlogLink = [[arrUserBlogLink objectAtIndex:i] objectForKey:@"href"] ; NSString *title = [[arrTitle objectAtIndex:i] content]; NSString *reg = [[NSString alloc] initWithFormat:@"//*[@id='index_main']/div[%d]/div/div[1]/text()", i+1 ]; NSString *content =@""; @try{ content = [[[doc searchWithXPathQuery: reg ] objectAtIndex:1 ] content]; }@catch (NSException * e) {} NSString *articleLink = [[arrArticleLink objectAtIndex:i] objectForKey:@"href"]; NSString *seeNum = [[arrSeeNum objectAtIndex:i] content]; NSString *goodNum = [[arrGoodNum objectAtIndex:i] content]; NSString *badNum = [[arrBadNum objectAtIndex:i] content]; NSString *insertTime = [[arrInsertTime objectAtIndex:i] content]; NSLog(@"%d == %@-%@-%@-%@-%@-%@-%@-%@-%@-%@-",i,userName,userPicLink,userBlogLink,title,content,articleLink,seeNum,goodNum,badNum,insertTime ); } }
捐助开发者
在兴趣的驱动下,写一个免费
的东西,有欣喜,也还有汗水,希望你喜欢我的作品,同时也能支持一下。 当然,有钱捧个钱场(右上角的爱心标志,支持支付宝和PayPal捐助),没钱捧个人场,谢谢各位。
谢谢您的赞助,我会做的更好!
相关推荐
iteye博客抓取 网页解析 关键字提取 jsoup解析网页 包含数据库文件
WII用的资源IOS57.IOS57-64-v5918.wad
标题 "http://topkinghat.iteye.com/blog/840706" 提到的博客链接实际上指向了“姜铁”的个人博客文章,而描述中的 "NULL" 暂无具体信息。不过,标签“源码”和“工具”暗示了这篇博客可能涉及到软件开发的源代码...
总结一下,这个压缩包包含的脚本主要用于从iteye博客平台抓取和整理文章,然后将其转换为便于阅读和检索的CHM格式。这个过程利用了JavaScript编写,并且可能依赖于Firebug进行调试和执行。源代码的提供允许用户了解...
NULL 博文链接:https://leobluewing.iteye.com/blog/2020222
ITeye Java编程 Spring框架 AJAX技术 Agile敏捷软件开发 ruby on rails实践 - ITeye做最棒的软件开发交流社区.files\homepage.css
NULL 博文链接:https://zhousheng193.iteye.com/blog/1096241
NULL 博文链接:https://hj198703.iteye.com/blog/2029253
NULL 博文链接:https://zhangmingwei.iteye.com/blog/1990664
js中escape对应的C#解码函数 - Corrinejtt - ITeye
NULL 博文链接:https://374016526.iteye.com/blog/1480438
NULL 博文链接:https://free0007.iteye.com/blog/2185887
NULL 博文链接:https://lihao312.iteye.com/blog/1728307
NULL 博文链接:https://zhengkaifl.iteye.com/blog/1190201
NULL 博文链接:https://peterlij.iteye.com/blog/2361199
测试用的HelloWorld工程源文件 博文链接:https://anweixiao.iteye.com/blog/126252
NULL 博文链接:https://hobo86.iteye.com/blog/1042741
eclipse开发环境(支持tomcat、maven、jetty、JBoss) - - ITeye技术网站
NULL 博文链接:https://st4024589553.iteye.com/blog/2378674