hadoop统计单词数实例 -

u011820505

浏览: 76494 次
性别:
来自: 深圳

最近访客更多访客>>

流星空中

zkbucciarati

night_joe

dongguangming88

博主相关

博客

微博

相册

留言

关于我

文章分类

社区版块

存档分类

hadoop统计单词数实例

博客分类：

hadoop

etc/hadoop配置文件

1.vi core-site.xml

<?xml version="2.0" encoding="UTF-8"?>

<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>

<!--

Licensed under the Apache License, Version 2.0 (the "License");

you may not use this file except in compliance with the License.

You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software

distributed under the License is distributed on an "AS IS" BASIS,

WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

See the License for the specific language governing permissions and

limitations under the License. See accompanying LICENSE file.

-->

<name>fs.default.name </name>

<value>hdfs://localhost:9000</value>

</property>

</configuration>

2. vi mapred-site.xml

<?xml version="1.0"?>

<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>

<!--

Licensed under the Apache License, Version 2.0 (the "License");

you may not use this file except in compliance with the License.

You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software

distributed under the License is distributed on an "AS IS" BASIS,

WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

See the License for the specific language governing permissions and

limitations under the License. See accompanying LICENSE file.

-->

<name>mapred.job.tracker </name>

<value>localhost:9001</value>

</property>

</configuration>

3.vi hdfs-site.xml

<?xml version="1.0" encoding="UTF-8"?>

<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>

<!--

Licensed under the Apache License, Version 2.0 (the "License");

you may not use this file except in compliance with the License.

You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software

distributed under the License is distributed on an "AS IS" BASIS,

WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

See the License for the specific language governing permissions and

limitations under the License. See accompanying LICENSE file.

-->

<name>fds.replication</name>

</property>

</property>

</property>

</configuration>

4.vi hadoop-env.sh

export JAVA_HOME=/opt/jdk1.8.0_65 调用jdk

5.启动

sbin下面是启动命令

./start-all.sh

5.1jps 查看启动的服务（五个）

15509 ResourceManager

14808 NameNode

15241 SecondaryNameNode

14974 DataNode

15679 NodeManager

6.关闭./stop-all.sh

hdfs的命令

hadoop fs -mkdir /user/trunk

hadoop fs -ls /user

hadoop fs -lsr /user (递归的)

hadoop fs -put test.txt /user/trunk

hadoop fs -put test.txt . (复制到hdfs当前目录下，首先要创建当前目录)

hadoop fs -get /user/trunk/test.txt . (复制到本地当前目录下)

hadoop fs -cat /user/trunk/test.txt

hadoop fs -tail /user/trunk/test.txt (查看最后1000字节)

hadoop fs -rm /user/trunk/test.txt

hadoop fs -help ls (查看ls命令的帮助文档)

java代码

import java.io.IOException;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.LongWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Mapper;

public class WordcountMapper extends Mapper<LongWritable, Text, Text, IntWritable> {

protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, IntWritable>.Context context)

throws IOException, InterruptedException {

String[] arrayOfString1;

String val = value.toString();

String[] str = val.split(" ");

int j = (arrayOfString1 = str).length;

for (int i = 0; i < j; ++i) {

String s = arrayOfString1[i];

context.write(new Text(s), new IntWritable(1));

}

import java.io.IOException;

import java.util.Iterator;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Reducer;

import org.apache.hadoop.mapreduce.Reducer.Context;

public class WordcountReducer extends Reducer<Text, IntWritable, Text, IntWritable>

{

protected void reduce(Text key, Iterable<IntWritable> values, Reducer<Text, IntWritable, Text, IntWritable>.Context context)

throws IOException, InterruptedException

{

int sum = 0;

for (Iterator localIterator = values.iterator(); localIterator.hasNext(); ) { IntWritable val = (IntWritable)localIterator.next();

sum += val.get();

}

context.write(key, new IntWritable(sum));

}

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class WordCount

{

public static void main(String[] args)

throws Exception

{

Configuration conf = new Configuration();

Job job = new Job(conf, "word count");

job.setJarByClass(WordCount.class);

job.setMapperClass(WordcountMapper.class);

job.setCombinerClass(WordcountReducer.class);

job.setReducerClass(WordcountReducer.class);

job.setOutputKeyClass(Text.class);

job.setOutputValueClass(IntWritable.class);

FileInputFormat.addInputPath(job, new Path(args[0]));

FileOutputFormat.setOutputPath(job, new Path(args[1]));

System.exit((job.waitForCompletion(true)) ? 0 : 1);

}

hadoop-core-0.20.2.jar

分享到：

nginx配置。一次性启动加载多个域名 | 百度地图位置

2016-07-06 14:43
浏览 696
评论(0)
分类:开源软件
查看更多

发表评论

您还没有登录,请您登录后再发表评论

最近访客更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论

hadoop统计单词数实例

评论

发表评论

相关推荐

最近访客 更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论

hadoop统计单词数实例

评论

发表评论

相关推荐

Unable to load native-hadoop library for your platform

hadoop下载地址

最近访客更多访客>>