HttpClient和HtmlParser配合实现自动登陆系统抽取页面信息

逆风的香1314

浏览: 1453552 次
性别:
来自: 北京

最近访客更多访客>>

hhybq

sichunli_030

wangyy

斐斐宝贝

博主相关

博客

微博

相册

留言

关于我

文章分类

社区版块

存档分类

2008-12 ( 57)
2008-11 ( 117)
2008-10 ( 180)
更多存档...

Apache PHP

HtmlParser代码接口变化比较多，因此写一个最新的。废话不多说，贴代码共大家享用！

/*
* Main.java
*
* Created on 2007年1月19日, 上午9:14
*
* To change this template, choose Tools | Template Manager
* and open the template in the editor.
*/

package wapproxy;

import org.apache.commons.httpclient.*;
import org.apache.commons.httpclient.methods.*;
import org.apache.commons.httpclient.params.HttpMethodParams;

import java.io.*;

import org.htmlparser.Node;
import org.htmlparser.NodeFilter;
import org.htmlparser.Parser;
import org.htmlparser.filters.TagNameFilter;
import org.htmlparser.tags.*;
import org.htmlparser.util.NodeList;
import org.htmlparser.util.ParserException;

/**
*
* @author xcz
*/
public class Main {


    /** Creates a new instance of Main */
    public Main() {
    }

    /**
     * @param args the command line arguments
     */
    public static void main(String[] args) throws Exception {
        // Create an instance of HttpClient.
        HttpClient client = new HttpClient();

        // Create a method instance.
        PostMethod post_method = new PostMethod("http://localhost/rcpq/");

        NameValuePair[] data = {
            new NameValuePair("username", "admin"),
            new NameValuePair("password", "admin"),
            new NameValuePair("dologin", "1"),
        };

        post_method.setRequestBody(data);



        try {
            // Execute the method.
            int statusCode = client.executeMethod(post_method);

            if (statusCode != HttpStatus.SC_OK) {
                System.err.println("Method failed: " + post_method.getStatusLine());
            }

            // Read the response body.
            //byte[] responseBody = post_method.getResponseBody();

            // Deal with the response.
            // Use caution: ensure correct character encoding and is not binary data
            //System.out.println(new String(responseBody));

        } catch (HttpException e) {
            System.err.println("Fatal protocol violation: " + e.getMessage());
            e.printStackTrace();
        } catch (IOException e) {
            System.err.println("Fatal transport error: " + e.getMessage());
            e.printStackTrace();
        } finally {
            // Release the connection.
            post_method.releaseConnection();
        }

        byte[] responseBody = null;

        GetMethod get_method = new GetMethod("http://localhost/rcpq/unit.php");

        // Provide custom retry handler is necessary
        get_method.getParams().setParameter(HttpMethodParams.RETRY_HANDLER,
                new DefaultHttpMethodRetryHandler(3, false));

        try {
            // Execute the method.
            int statusCode = client.executeMethod(get_method);

            if (statusCode != HttpStatus.SC_OK) {
                System.err.println("Method failed: " + get_method.getStatusLine());
            }

            // Read the response body.
            //responseBody = get_method.getResponseBody();

            //这里用流来读页面

            InputStream in = get_method.getResponseBodyAsStream();
            if (in != null) {
                byte[] tmp = new byte[4096];
                int bytesRead = 0;
                ByteArrayOutputStream buffer = new ByteArrayOutputStream(1024);
                while ((bytesRead = in.read(tmp)) != -1) {
                    buffer.write(tmp, 0, bytesRead);
                }
                responseBody = buffer.toByteArray();
            }


            // Deal with the response.
            // Use caution: ensure correct character encoding and is not binary data
            //System.out.println(new String(responseBody));

        } catch (HttpException e) {
            System.err.println("Fatal protocol violation: " + e.getMessage());
            e.printStackTrace();
        } catch (IOException e) {
            System.err.println("Fatal transport error: " + e.getMessage());
            e.printStackTrace();
        } finally {
            // Release the connection.
            get_method.releaseConnection();
        }

        Parser parser;

        parser = Parser.createParser(new String(responseBody, "GBK"), "GBK");


        String filterStr = "table";
        NodeFilter filter = new TagNameFilter(filterStr);

        NodeList tables = parser.extractAllNodesThatMatch(filter);

        //System.out.println(tables.elementAt(17).toString());

       //找到单位列表所在的表格

        TableTag tabletag = (TableTag) tables.elementAt(17);

        TableRow row = tabletag.getRow(3);

        TableColumn[] cols = row.getColumns();
        //System.out.println("单位名称：" + cols[2].toHtml());
        System.out.println("单位名称：" + cols[2].childAt(0).getText());

    }

}

分享到：