浏览 2161 次
精华帖 (0) :: 良好帖 (0) :: 新手帖 (0) :: 隐藏帖 (0)
|
|
---|---|
作者 | 正文 |
发表时间:2011-07-24
最后修改:2011-07-24
先贴代码: import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileOutputStream; import java.io.FileWriter; import java.io.InputStream; import java.net.HttpURLConnection; import java.net.URL; import java.util.ArrayList; import java.util.HashMap; import java.util.Map; import java.util.concurrent.TimeUnit; import java.util.regex.Matcher; import java.util.regex.Pattern; /* * 此类是用来获取http://developer.android.com/resources * 中的例子。 * */ public class GetFileFromWeb2 { public static final int times = 4; //请求失败继续请求直到成功,最多请求4次 public static final String[] imageType={"png","gif","jpg"}; //声明可读取的图片类型 public static final String fileName = "index.html"; //默认文件名 private static Map<String,String> map = new HashMap<String, String>(); //用来存储下载失败的文件及其地址信息 private static ArrayList<MultiThread> threadList = new ArrayList<MultiThread>(); public static void main(String[] args) { long startTime = System.currentTimeMillis(); String baseSavePath = "C:/Documents and Settings/Administrator/桌面/test/"; //写到桌面 String basePath = "http://developer.android.com/resources/samples/ApiDemos/"; try { oneKey2GetFile(basePath,baseSavePath); } catch (Exception e) { System.out.println("网络断开或不存在此页面:"+basePath+fileName); } while (!threadList.isEmpty()) { //确保其他线程执行完后计算时间 System.out.println("-------"); for(int i=0;i<threadList.size();i++){ MultiThread temp = threadList.get(i); if(!temp.isAlive()){ threadList.remove(temp); } } try { TimeUnit.SECONDS.sleep(1); } catch (InterruptedException e) { e.printStackTrace(); } } System.out.println("下载耗时:"+(System.currentTimeMillis()-startTime)+"ms"); System.out.println("===========保存出错列表============="); for(Map.Entry<String,String> entry:map.entrySet()){ String key = entry.getKey(); String value = entry.getValue(); System.out.println("文件名:"+key+"\t所在页面:"+value); } } /* * 此方法用于递归调用 * 一键获取此网页文件下所有文件 * */ public static void oneKey2GetFile(String basePath,String baseSavePath)throws Exception{ String dirPath = createDir(basePath,baseSavePath); //获取当前创建的目录 String data = null; try { data = new String(getData(basePath,fileName)); //获取上一层目录数据 } catch (Exception e1) { throw new Exception(); } ArrayList<String> fileNames = dataFilter(data); //读取上一层目录数据获取文件名 for(int i=0;i<fileNames.size();i++){ //遍历依次读取 String name = fileNames.get(i); System.out.println(name); if(name.endsWith("/")){ //表示是目录 String newBasePath = basePath+name; MultiThread multi = new MultiThread(newBasePath,dirPath); threadList.add(multi); multi.start(); }else{ //表示是文件或图片 try { byte[] files = getData(basePath,name); filterSave(files,name,dirPath); } catch (Exception e) { System.out.println(fileNames.get(i)+"读取出错2"); } } } } /* * 判断获取的文件名是否是图片文件 * */ public static boolean containImage(String name){ for(int i=0;i<imageType.length;i++){ if(imageType[i].equals(name.substring(name.lastIndexOf('.')+1))){ return true; } } return false; } /* * 静态方法获取目录名称。并创建它。并返回目录名称 * */ public static String createDir(String path,String baseSavePath){ path = path.substring(0, path.length()-1); String dir = path.substring(path.lastIndexOf('/')+1); String dirPath = baseSavePath+dir; System.out.println("保存路径:"+dirPath); File file = new File(dirPath); if(!file.exists()){ //创建目录 file.mkdirs(); } return dirPath+"/"; } /* * 返回指定网页路径返回的数据字节流 * */ public static byte[] getData(String basePath,String fileName) throws Exception{ String fileName2 = fileName; if(!containImage(fileName)){ fileName = fileName.replaceAll("\\.\\w+", "\\.html"); } ByteArrayOutputStream byteArray =null; for (int i = 0; i < times; i++) { // 指定请求次数 try { URL url = new URL(basePath + fileName); HttpURLConnection conn = (HttpURLConnection) url.openConnection(); conn.setConnectTimeout(10 * 1000); conn.setRequestMethod("GET"); InputStream inStream = conn.getInputStream(); byte[] buff = new byte[1024]; byteArray = new ByteArrayOutputStream(); int len = 0; while ((len = inStream.read(buff)) != -1) { byteArray.write(buff, 0, len); } byteArray.flush(); byteArray.close(); inStream.close(); System.out.println(conn.getResponseCode()); if (conn.getResponseCode() == 200) break; // 如果请求成功则终止循环 } catch (Exception e) { if(i>=times-1){ map.put(fileName2, basePath+GetFileFromWeb2.fileName); throw new Exception(); //请求大于指定次数后还不成功则抛出异常 } } } return byteArray.toByteArray(); } /* * 读取上一层目录数据获取文件名, * 过滤文件,获取指定的文件名数组 * */ public static ArrayList<String> dataFilter(String data){ Pattern p = Pattern.compile("<a href=\".*\">\\s*(.+/|.*\\.(xml|java|png|gif|jpg))\\s*</a>"); //匹配单个文件或目录 Matcher matcher = p.matcher(data); ArrayList<String> fileNames = new ArrayList<String>(); while(matcher.find()){ fileNames.add(matcher.group(1)); } return fileNames; } /* * 过滤获取指定的xml或java文件。并保存文件 * */ public static void filterSave(byte[] fileBytes,String fileName,String dirPath) throws Exception{ String fileData = new String(fileBytes); String name = dirPath + fileName; if(containImage(fileName)){ //判断是否是图片 FileOutputStream outStream = new FileOutputStream(name); outStream.write(fileBytes, 0, fileBytes.length); // 写入图片内容 outStream.flush(); outStream.close(); }else{ fileData = fileData.replaceAll("<", "<"); fileData = fileData.replaceAll(">", ">"); /*由于存在大量的回车换行, * 需先行替换,不然匹配不出内容, * 注意有些文件可能还含有/r,也要替换 * */ fileData = fileData.replaceAll("\n", "#@"); Pattern p = Pattern.compile("<pre>(.*)</pre>"); Matcher matcher = p.matcher(fileData); while(matcher.find()){ String str = matcher.group(1).replaceAll("#@", "\n"); //匹配到后再替换回来 File file = new File(name); FileWriter writer = new FileWriter(file); writer.write(str, 0, str.length()); writer.flush(); writer.close(); } } System.out.println(name); } static class MultiThread extends Thread{ private String basePath; private String baseSavePath; public MultiThread(){ } public MultiThread(String basePath,String baseSavePath){ this.basePath = basePath; this.baseSavePath = baseSavePath; } @Override public void run(){ try { oneKey2GetFile(basePath,baseSavePath); } catch (Exception e) { System.out.println("读取目录:"+basePath+" 出错"); } } } } 再贴图: 这是http://developer.android.com/resources/samples/ApiDemos/index.html页面下的路径格式 下面是保存下来的文件目录格式: 代码直接运行即可。 声明:ITeye文章版权属于作者,受法律保护。没有作者书面许可不得转载。
推荐链接
|
|
返回顶楼 | |
发表时间:2011-07-24
不错,支持
|
|
返回顶楼 | |
发表时间:2011-08-02
很好哦..楼主....不过这么多东西,6分钟就可以下载完?
|
|
返回顶楼 | |