Hadoop HDFS CRUD

fairy_xzc

浏览: 369592 次
性别:
来自: 北京

最近访客更多访客>>

huan0727

漂移361john

scj2cy

smallbug2010

博主相关

博客

微博

相册

留言

关于我

文章分类

社区版块

存档分类

博客分类：

Hadoop

Maven Dependency

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
	<modelVersion>4.0.0</modelVersion>

	<groupId>org.fool.hadoop</groupId>
	<artifactId>hadoop</artifactId>
	<version>0.0.1-SNAPSHOT</version>
	<packaging>jar</packaging>

	<name>hadoop</name>
	<url>http://maven.apache.org</url>

	<properties>
		<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
		<hadoop.version>2.6.4</hadoop.version>
	</properties>

	<dependencies>
		<dependency>
			<groupId>org.apache.hadoop</groupId>
			<artifactId>hadoop-common</artifactId>
			<version>${hadoop.version}</version>
		</dependency>

		<dependency>
			<groupId>org.apache.hadoop</groupId>
			<artifactId>hadoop-hdfs</artifactId>
			<version>${hadoop.version}</version>
		</dependency>

		<dependency>
			<groupId>jdk.tools</groupId>
			<artifactId>jdk.tools</artifactId>
			<version>1.8</version>
			<scope>system</scope>
			<systemPath>${JAVA_HOME}/lib/tools.jar</systemPath>
		</dependency>

		<dependency>
			<groupId>junit</groupId>
			<artifactId>junit</artifactId>
			<version>4.12</version>
			<scope>test</scope>
		</dependency>
	</dependencies>
</project>

首先确认Hadoop HDFS已经启动——start-dfs.sh

先来看一个上传和下载比较底层的写法（不推荐）

下载

@Test
public void downloadTest() throws Exception {
	Configuration conf = new Configuration();

	conf.set("fs.defaultFS", "hdfs://hadoop-0000:9000/");

	FileSystem fs = FileSystem.get(conf);

	// get the file on HDFS via input stream
	FSDataInputStream is = fs.open(new Path("/jdk-8u77-linux-x64.tar.gz"));

	// create an output stream
	BufferedOutputStream os = new BufferedOutputStream(new FileOutputStream(new File("C:/jdk.tgz")));

	// download the file to disk C
	IOUtils.copy(is, os);
}

这里尝试将HDFS的JDK下载到到本地C盘，首先获取一个输入流读取HDFS的文件，之后创建一个输出流用来写文件，最后调用IOUtils的copy方法完成文件的下载

上传

@Test
public void uploadTest() throws Exception {
	Configuration conf = new Configuration();

	conf.set("fs.defaultFS", "hdfs://hadoop-0000:9000/");

	FileSystem fs = FileSystem.get(conf);

	// create an output stream to upload file
	FSDataOutputStream os = fs.create(new Path("hdfs://hadoop-0000:9000/myupload/pom.xml"));

	// create an input stream to get local file e.g pom.xml
	BufferedInputStream is = new BufferedInputStream(new FileInputStream(new File("pom.xml")));

	// upload the file to HDFS
	IOUtils.copy(is, os);
}

同理，首先创建一个输出流用来写HDFS文件，之后创建一个输入流来读取本地的本区，比如当前工作目录的pom.xml，最后调用IOUtils的copy方法完成文件的上传。

但是这里直接运行这段代码会报错，提示没有权限写

这里有两种解决方法（这里采用第2种方法）

1.将Hadoop HDFS的文件的权限放大，这个嘛直接chmod 777 就行了（hadoop fs -chmod 777 /myupload）

2.运行Test的时候，加一段VM的参数-DHADOOP_USER_NAME=hadoop，即可解决问题

重新run test，绿色心情，O(∩_∩)O~，查看HDFS

这个底层的写法是不是感觉相当烦琐，不要怕，Hadoop的fs的已经封装了相关的方法，提供给我们直接调用，接下来看一个完整的例子

package org.fool.hadoop.hdfs;

import java.net.URI;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.junit.Before;
import org.junit.Test;

import com.google.common.base.Joiner;

public class HdfsTest {

	private FileSystem fs = null;

	@Before
	public void setUp() throws Exception {
		Configuration conf = new Configuration();

		conf.set("fs.defaultFS", "hdfs://hadoop-0000:9000/");

		fs = FileSystem.get(new URI("hdfs://hadoop-0000:9000/"), conf, "hadoop");
	}

	@Test
	public void testMkDir() throws Exception {
		fs.mkdirs(new Path("/aaa/bbb/ccc"));
	}
	
	@Test
	public void testUpload() throws Exception {
		fs.copyFromLocalFile(new Path("pom.xml"), new Path("hdfs://hadoop-0000:9000/pom.xml"));
		fs.copyFromLocalFile(new Path("pom.xml"), new Path("hdfs://hadoop-0000:9000/myupload/pom.xml"));
	}
	
	@Test
	public void testDownload() throws Exception {
		fs.copyToLocalFile(false, new Path("hdfs://hadoop-0000:9000/myupload/pom.xml"), new Path("D:/pom.xml"), true);
	}
	
	@Test
	public void testDelete() throws Exception {
		fs.delete(new Path("/myupload"), true);
		fs.delete(new Path("/pom.xml"), true);
		fs.delete(new Path("/aaa"), true);
	}
	
	@Test
	public void listFiles() throws Exception {
		for(RemoteIterator<LocatedFileStatus> files = fs.listFiles(new Path("/"), true); files.hasNext();) {
			LocatedFileStatus file = files.next();
			
			String name = file.getPath().getName();
			
			System.out.println(name);
		}
		
		System.out.println("\n");
		
		FileStatus[] listStatus = fs.listStatus(new Path("/"));
		for (FileStatus fileStatus : listStatus) {
			String name = fileStatus.getPath().getName();
			System.out.println(Joiner.on(" ").join(name, (fileStatus.isDirectory() ? "is dir" : "is file"), fileStatus.getPath()));
		}
	}

}

是不是so easy O(∩_∩)O哈哈~