`
knight_black_bob
  • 浏览: 853233 次
  • 性别: Icon_minigender_1
  • 来自: 北京
社区版块
存档分类
最新评论

ios tfhpple iteye 抓取

    博客分类:
  • ios
阅读更多




 
 

1.使用 tfhpple 

  1.1.导入TFHpple

  1.2.引入静态库文件libxml2.2.tbd 不是 libxml2.dylib ,ios9 xcode7不使用了

 

2. 使用 xpath 获取数据 节点



 
//*[@id="index_main"]/div/div/h3/a

 

F12    输入 $x("//*[@id='index_main']/div/div/h3/a") 

 

 

 

 

userName	//*[@id='index_main']/div[2]/div/div[3]/a/text()
 userPicLink    //*[@id='index_main']/div[2]/div/div[1]/div/a/img/    src
 userBlogLink   //*[@id='index_main']/div[2]/div/div[3]/a             href
 title          //*[@id='index_main']/div[2]/div/h3/a/text()
 content        //*[@id='index_main']/div[2]/div/div[1]/text()[2] 
 articleLink    //*[@id='index_main']/div[2]/div/h3/a                 href
 seeNum         //*[@id='index_main']/div[2]/div/div[3]/span[2]/text()
 goodNum        //*[@id='index_main']/div[2]/div/div[3]/div/div/a[1]/text()
 badNum         //*[@id='index_main']/div[2]/div/div[3]/div/div/a[2]/text()
 insertTime     //*[@id='index_main']/div[2]/div/div[3]/span[3]/text()
 type
 html

 

 

 

 

3. 代码部分

url = http://www.iteye.com/blogs 

 

 

 

page = 0;
    category = 0;
    [self request];
}

-(void) request{
     NSString *url = [UrlUtil getIteyeBlog:category page:page];
//    [ IteyeBlogAFNetworkingUtil httpRequestWithUrl:url success:^(NSString *result) {
//        
//        NSLog(@"success =====");
//       
//    } fail:^{
//        NSLog(@"error =====");
//    }];
    
    NSLog(@"%@",url);

    NSData *htmlData = [[NSData alloc]initWithContentsOfURL:[NSURL URLWithString:url]];
    TFHpple * doc      = [[TFHpple alloc] initWithHTMLData:htmlData];
   
   
    NSArray *arrArticleLink= [doc searchWithXPathQuery:@"//*[@id='index_main']/div/div/h3/a"];
    NSArray *arrBadNum= [doc searchWithXPathQuery:@"//*[@id='index_main']/div/div/div[3]/div/div/span[2]/text()"];
    NSArray *arrGoodNum= [doc searchWithXPathQuery:@"//*[@id='index_main']/div/div/div[3]/div/div/span[1]/text()"];
    NSArray *arrInsertTime= [doc searchWithXPathQuery:@"//*[@id='index_main']/div/div/div[3]/span[3]/text()"];
    NSArray *arrSeeNum= [doc searchWithXPathQuery:@"//*[@id='index_main']/div/div/div[3]/span[2]/text()"];
    NSArray *arrTitle= [doc searchWithXPathQuery:@"//*[@id='index_main']/div/div/h3/a/text()"];
    NSArray *arrUserBlogLink= [doc searchWithXPathQuery:@"//*[@id='index_main']/div/div/div[3]/a"];
    NSArray *arrUserName= [doc searchWithXPathQuery:@"//*[@id='index_main']/div/div/div[3]/a/text()"];
    NSArray *arrUserPicLink= [doc searchWithXPathQuery:@"//*[@id='index_main']/div/div/div[1]/div/a/img"];
   
    
//    userName       //*[@id='index_main']/div/div/div[3]/a/text()
//    userPicLink    //*[@id='index_main']/div/div/div[1]/div/a/img/    src
//    userBlogLink   //*[@id='index_main']/div/div/div[3]/a             href
//    title          //*[@id='index_main']/div/div/h3/a/text()
//    content        //*[@id='index_main']/div/div/div[1]/text()[2]
//    articleLink    //*[@id='index_main']/div/div/h3/a                 href
//    seeNum         //*[@id='index_main']/div/div/div[3]/span[2]/text()
//    goodNum        //*[@id='index_main']/div/div/div[3]/div/div/span[1]/text()
//    badNum         //*[@id='index_main']/div/div/div[3]/div/div/span[2]/text()
//    insertTime     //*[@id='index_main']/div/div/div[3]/span[3]/text()
//    type  
//    html
    
    //*[@id="index_main"]/div[1]/div/div[1]
    //*[@id="index_main"]/div[29]/div/div[1]
    //*[@id="index_main"]/div[30]/div/div[1]
    
    
    
     NSLog(@"arrTitle count======%ld",[arrTitle count] );
    for(int i=0;i<[arrTitle count] ;i++){
        
        NSString *userName = [[arrUserName objectAtIndex:i] content];
        NSString *userPicLink = [[arrUserPicLink objectAtIndex:i] objectForKey:@"src"];
        NSString *userBlogLink = [[arrUserBlogLink objectAtIndex:i] objectForKey:@"href"] ;
        NSString *title = [[arrTitle objectAtIndex:i] content];
        NSString *reg = [[NSString alloc] initWithFormat:@"//*[@id='index_main']/div[%d]/div/div[1]/text()", i+1 ];
        NSString *content =@"";
        @try{
        content = [[[doc searchWithXPathQuery: reg ] objectAtIndex:1 ] content];
        }@catch (NSException * e) {}
        NSString *articleLink = [[arrArticleLink objectAtIndex:i]  objectForKey:@"href"];
        NSString *seeNum = [[arrSeeNum objectAtIndex:i] content];
        NSString *goodNum = [[arrGoodNum objectAtIndex:i] content];
        NSString *badNum = [[arrBadNum objectAtIndex:i] content];
        NSString *insertTime = [[arrInsertTime objectAtIndex:i] content];
        
        NSLog(@"%d == %@-%@-%@-%@-%@-%@-%@-%@-%@-%@-",i,userName,userPicLink,userBlogLink,title,content,articleLink,seeNum,goodNum,badNum,insertTime );
    }
        
}

 

 

 

 

 

 

 

 

 

 

 

 

 

 

捐助开发者

在兴趣的驱动下,写一个免费的东西,有欣喜,也还有汗水,希望你喜欢我的作品,同时也能支持一下。 当然,有钱捧个钱场(右上角的爱心标志,支持支付宝和PayPal捐助),没钱捧个人场,谢谢各位。



 
 
 谢谢您的赞助,我会做的更好!

 

 

 

 

  • 大小: 610.9 KB
  • 大小: 322.7 KB
  • 大小: 768.1 KB
  • 大小: 165.8 KB
1
1
分享到:
评论

相关推荐

Global site tag (gtag.js) - Google Analytics