`

CSDN自动回复灌水乐园帖子-httpClient篇

阅读更多

package com.ws;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
//1.首先下载apache的httpClient。。
import org.apache.commons.httpclient.Header;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpStatus;
import org.apache.commons.httpclient.NameValuePair;
import org.apache.commons.httpclient.cookie.CookiePolicy;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.httpclient.methods.PostMethod;

/**
*
* @author zk 自动回复灌水乐园帖子
*
*/
public class Csdn {
public static String COOKIE = "你的登陆后cookie";
private static final String CONTENT_CHARSET = "UTF-8";// httpclient使用的字符集

@SuppressWarnings("unchecked")
public static void main(String[] args) throws Exception, IOException {
List<String> urlList = TestUrl.getCardPostUrl(TestUrl.getCsdn(null));
getCard(urlList);
// getMethodTest();
}

public static void getMethodTest() throws Exception, IOException {
String html = "http://hi.csdn.net/my.html";
HttpClient hc = getHc();
GetMethod getMethod = new GetMethod(html);
List<Header> headers = new ArrayList<Header>();
headers.add(new Header("Proxy-Connection", "keep-alive"));
headers.add(new Header("Cookie", COOKIE));
hc.getHostConfiguration().getParams().setParameter(
"http.default-headers", headers);

int statusCode = hc.executeMethod(getMethod);
if (statusCode != HttpStatus.SC_OK) {
System.err.println("Method failed: " + getMethod.getStatusLine());
}
// 读取内容
byte[] responseBody = getMethod.getResponseBody();
// 处理内容

String hh = new String(responseBody);

System.out.println(hh);

}

public static HttpClient getHc() {
HttpClient httpClient = new HttpClient();
// java client将按照浏览器的方式来自动处理
httpClient.getParams().setCookiePolicy(
CookiePolicy.BROWSER_COMPATIBILITY);
httpClient.getHostConfiguration().setHost("http://www.csdn.net", 80,
"http");
return httpClient;
}

public static void getCard(List<String> urlList) throws Exception,
IOException {
HttpClient httpClient = null;
PostMethod p = null;
List<Header> headers = null;
NameValuePair __VIEWSTATE = null;
NameValuePair __EVENTVALIDATION = null;
NameValuePair BT_SUBMIT = null;
NameValuePair REPLYBODY = null;
NameValuePair[] params = null;
for (String url : urlList) {
headers = new ArrayList<Header>();
httpClient = getHc();
p = new PostMethod(url);
// 需要验证
// UsernamePasswordCredentials creds = new UsernamePasswordCredentials("chenlb", "123456");

headers.add(new Header(
"User-Agent",
"Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9.1.7) Gecko/20091221 Firefox/3.5.7 GTB6 (.NET CLR 1.1.4322)"));
headers.add(new Header("Proxy-Connection", "keep-alive"));
headers.add(new Header("Cookie", COOKIE));
headers.add(new Header("Content-Type",
"application/x-www-form-urlencoded;charset=" + CONTENT_CHARSET));
httpClient.getHostConfiguration().getParams().setParameter(
"http.default-headers", headers);
__VIEWSTATE = new NameValuePair(
"__VIEWSTATE",
"/wEPDwUKMTA2MTA3Njg5NA9kFgICCQ9kFgJmD2QWAgIFD2QWAmYPZBYCZg8PFgIeBE1vZGULKiVTeXN0ZW0uV2ViLlVJLldlYkNvbnRyb2xzLlRleHRCb3hNb2RlARYCHgVzdHlsZQUYaGVpZ2h0OjE4MHB4O3dpZHRoOjEwMCU7ZGRpl2NuIb2XmIUODhEniCtEXExdOA==");
__EVENTVALIDATION = new NameValuePair(
"__EVENTVALIDATION",
"/wEWAwLtl7ScBQK6873ZCgK3mOXeAjqcUaoqnb3Nj0uKUrGKImKcexCG");

BT_SUBMIT = new NameValuePair("bt_submit", "提交回复");

REPLYBODY = new NameValuePair(
"tb_ReplyBody$_$Editor", "[img=http://forum.csdn.net/PointForum/ui/scripts/csdn/Plugin/003/monkey/1.gif][/img]");

params = new NameValuePair[] { __VIEWSTATE,
__EVENTVALIDATION, REPLYBODY, BT_SUBMIT };
p.setRequestBody(params);
int statusCode = httpClient.executeMethod(p);
if (statusCode != HttpStatus.SC_OK) {
System.err.println("Method failed: " + p.getStatusLine());
}
System.out.println("Hello,World");
// 读取内容
//byte[] responseBody = p.getResponseBody();
// 处理内容

//String hh = new String(responseBody);
//System.out.println(hh);
}

}
}



package com.ws;

import java.awt.Image;
import java.awt.image.BufferedImage;
import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.Reader;
import java.io.UnsupportedEncodingException;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;

import org.apache.commons.lang.StringUtils;


public class TestUrl {

public static final String COOKIE = "登陆后cookie"                  
* 连接超时
*/
private static int connectTimeOut = 5000;

/**
* 读取数据超时
*/
private static int readTimeOut = 10000;

/**
* 请求编码
*/
private static String requestEncoding = "GBK";









/**
* 得到大分类的帖子 如:java
* @param java
* @return
* @throws Exception
*/
public static List getCsdn(String java) throws Exception {
List<String> urlList = new ArrayList<String>();
String url = "http://forum.csdn.net/SList/FreeZone";
String patternStrs = "<td class=\"caption\" style=\"word-break: break-all\">(.*?)</td>";
String p = "<a target=\"_blank\" title=\"(.*?)</a>";
String href = "<a target=\"_blank\" title=\"(.*?)\" href=\"(.*?)\" >(.*?)</a>";
String s = "";
String h = "";
URL u = new URL(url);
StringBuffer sTotalString = new StringBuffer("");
HttpURLConnection conn = (HttpURLConnection) u.openConnection();
conn.addRequestProperty("Cookie", COOKIE);

String sCurrentLine = "";
BufferedReader l_reader = new java.io.BufferedReader(
new java.io.InputStreamReader(u.openStream()));
while ((sCurrentLine = l_reader.readLine()) != null) {
sTotalString = sTotalString.append(new StringBuffer(sCurrentLine
+ "\n"));
s = RegexpCommon.getMatchString(sCurrentLine, p, 0);
if (StringUtils.isNotBlank(s)) {
h = RegexpCommon.getMatchString(s, href, 2);
System.out.println("得到的URL为:" + h);
urlList.add(h);
}

}

// byte[] b = (sTotalString.toString()).getBytes();
// BufferedOutputStream out = new BufferedOutputStream(
// new FileOutputStream("c:/test.html"));
// out.write(b);
return urlList;
}

/**
* 得到帖子的回复地址
* @param urlList
* @return
* @throws Exception
*/
public static List getCardPostUrl(List<String> urlList) throws Exception {
List<String> postList = new ArrayList<String>();
URL u = null;
HttpURLConnection conn = null;
BufferedReader l_reader = null;
String s = "";
for (String URL : urlList) {
u = new URL(URL);
StringBuffer sTotalString = new StringBuffer("");
conn = (HttpURLConnection) u.openConnection();
conn.addRequestProperty("Cookie", COOKIE);

String sCurrentLine = "";

String patternStrs = "iframe class=\"replyframe\" id=\"replyframe\" frameborder=\"0\" scrolling=\"no\" height=\"415px\" width=\"100%\" src=\"(.*?)\" csdnid=\"rframe\">";
l_reader = new java.io.BufferedReader(
new java.io.InputStreamReader(u.openStream()));
while ((sCurrentLine = l_reader.readLine()) != null) {
sTotalString = sTotalString.append(new StringBuffer(sCurrentLine
+ "\n"));
}
conn.disconnect();
s = RegexpCommon.getMatchString(sTotalString.toString(),
patternStrs, 0);
s = s.split("src=\"")[1].split("\" csdnid")[0];
System.out.println(s);
postList.add(s);
}
return postList;



}

public static void main(String[] args) throws Exception {
String s = "http://forum.csdn.net/PointForum/Forum/ReplyT.aspx?forumID=a3049f56-b572-48f5-89be-4797b70d71cd&topicID=b9fbc233-fadf-441b-aad8-2d6a77641f16&postDate=2010-02-01+08%3a40%3a49&v=13";
String d = "tb_ReplyBody___Editor=回复测试!!!";
// GetResponseDataByID(s, d);
//GetResponseDataByID(s, d);
//t();
List<String> urlList = getCsdn(null);
}

}

1.登陆验证码一直没攻克,所以是使用的cookie。

2.注释少了点。

 

 

分享到:
评论

相关推荐

    commons-httpclient-3.0.jar JAVA中使用HttpClient可以用到

    《JAVA中使用HttpClient:commons-httpclient-3.0.jar详解》 在JAVA开发中,进行HTTP请求时,Apache的HttpClient库是一个不可或缺的工具。本文将深入解析`commons-httpclient-3.0.jar`,它是HttpClient的一个重要...

    wechatpay-apache-httpclient-0.2.1.jar

    wechatpay-apache-httpclient-0.2.1.jar

    Common-httpClient各个版本jar及源码

    用快压解压 Common-httpClient各个版本jar及源码

    commons-httpclient.jar

    commons-httpclient

    ribbon-httpclient-2.2.5.jar

    ribbon-httpclient-2.2.5.jar

    commons-httpclient.rar

    9. **重试策略**:对于网络不稳定的情况,HttpClient可以配置重试策略,自动处理失败的请求。 10. **自定义化**:HttpClient的API设计灵活,允许开发者根据需求自定义请求行为,如设置超时、自定义编码解码等。 在...

    commons-httpclient依赖包

    包括了httpclient的所有包,commons-httpclient3.0.jar,httpclient4.0.jar,commons-logging1.1.1.jar,commons-codec-1.3.jar等

    commons-httpclient-3.1.jar,包内共5个资源

    5个jar包,commons-codec-1.9.jar,commons-httpclient-3.1.jar,commons-logging-1.2.jar,httpclient-4.5.jar,httpcore-4.4.1.jar

    commons-httpclient-3.1.jar

    HttpClient是Apache Jakarta Common下的子项目,用来提供高效的、最新的、功能丰富的支持...HttpClient已经应用在很多的项目中,比如Apache Jakarta上很著名的另外两个开源项目Cactus和HTMLUnit都使用了HttpClient。

    commons-httpclient-3.0.jar

    commons-httpclient-3.0.jar JAVA中使用HttpClient可以用到

    commons-httpclient-3.1jar包

    《Apache Commons HttpClient 3.1详解》 Apache Commons HttpClient 是一个功能强大的Java库,专为实现客户端HTTP通信而设计。这个3.1版本是HttpClient的一个重要里程碑,它提供了丰富的功能和改进,使得开发者能够...

    commons-httpclient-3.0-rc4

    httpclient常用的jar包,便于大家使用

    commons-httpclient-3.1jar

    HttpClient支持Cookie规范,能够自动处理服务器返回的Cookie,并在后续请求中自动添加。同时,也支持自定义Cookie策略以适应不同的服务器行为。 4. **认证机制**: 它提供多种身份验证机制,包括基本认证、摘要...

    commons-httpclient3.1.jar,commons-codec1.3.jar,commons-logging1.1.1.jar

    最后,`commons-httpclient-3.1.jar`作为主要的HTTP客户端库,负责建立连接、发送请求和接收响应。 在开发过程中,将这些库包含到项目的类路径中是必要的,这样就可以利用它们的功能来执行HTTP操作。然而,需要注意...

    commons-httpclient-3.1jar包下载

    http://jakarta.apache.org/commons/httpclient/ org.apache.commons.httpclient.URI org.apache.commons.httpclient.Wire org.apache.commons.httpclient.Cookie org.apache.commons.httpclient.Header org.apache.commons....

Global site tag (gtag.js) - Google Analytics