`
风过无声
  • 浏览: 92085 次
  • 性别: Icon_minigender_1
  • 来自: 深圳
社区版块
存档分类
最新评论

Hadoop 第一个Hadoop程序

 
阅读更多

1. 旧版API

-- 源代码

MaxTemperatureMapper.java

package com.hadoop.study.chap01;

import java.io.IOException;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;

public class MaxTemperatureMapper extends MapReduceBase implements
		Mapper<LongWritable, Text, Text, IntWritable> {
	
	private static final int MISSING = 9999;
	
	@Override
	public void map(LongWritable key, Text value,
			OutputCollector<Text, IntWritable> output, Reporter report)
			throws IOException {
		
		String line = value.toString();
		
		String year = line.substring(15, 19);
		
		int airTemperature;
		if (line.charAt(87) == '+') {
			airTemperature = Integer.parseInt(line.substring(88, 92));
		} else {
			airTemperature = Integer.parseInt(line.substring(87, 92));
		}
		
		String quaility = line.substring(92, 93);
		if (airTemperature != MISSING && quaility.matches("[01459]")) {
			output.collect(new Text(year), new IntWritable(airTemperature));
		}
		
	}

}

 MaxTemperatureReducer.java

package com.hadoop.study.chap01;

import java.io.IOException;
import java.util.Iterator;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;

public class MaxTemperatureReducer extends MapReduceBase implements
		Reducer<Text, IntWritable, Text, IntWritable> {

	@Override
	public void reduce(Text key, Iterator<IntWritable> values,
			OutputCollector<Text, IntWritable> output, Reporter report)
			throws IOException {
		
		int maxAirTemperature = Integer.MIN_VALUE;
		while (values.hasNext()) {
			maxAirTemperature = Math.max(maxAirTemperature, values.next().get());
		}
		
		output.collect(key, new IntWritable(maxAirTemperature));
	}

}

 MaxTemperature.java

package com.hadoop.study.chap01;

import java.io.IOException;

import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;


public class MaxTemperature {
	
	public static void main(String[] args) throws IOException {
		
		if (args.length != 2) {
			System.err.println("Usage: MaxTemperature <input path> <output path>");
			System.exit(-1);
		}
		
		JobConf conf = new JobConf(MaxTemperature.class);
		conf.setJobName("Max Temperature");
		
		FileInputFormat.addInputPath(conf, new Path(args[0]));
		FileOutputFormat.setOutputPath(conf, new Path(args[1]));
		
		conf.setMapperClass(MaxTemperatureMapper.class);
		conf.setReducerClass(MaxTemperatureReducer.class);
		conf.setOutputKeyClass(Text.class);
		conf.setOutputValueClass(IntWritable.class);
		
		JobClient.runJob(conf);
		
	}
	
}

-- 执行

1) 将程序打包成hadoop-study.jar

2) 将输入文件1901上传至hadoop的master节点的/home/hadoop/input目录下

3) 将输入文件导入HDFS中

hadoop fs -copyFromLocal /home/hadoop/input input

4) 将jar包上传至hadoop的master节点的/home/hadoop/task目录下

5) 运行代码

hadoop jar /home/hadoop/task/hadoop-study.jar com.hadoop.study.chap01.MaxTemperature input/1901 output

-- 控制台输出

14/02/24 23:03:20 WARN mapred.JobClient: Use GenericOptionsParser for parsing the arguments. Applications should implement Tool for the same.
14/02/24 23:03:20 INFO util.NativeCodeLoader: Loaded the native-hadoop library
14/02/24 23:03:20 WARN snappy.LoadSnappy: Snappy native library not loaded
14/02/24 23:03:20 INFO mapred.FileInputFormat: Total input paths to process : 1
14/02/24 23:03:21 INFO mapred.JobClient: Running job: job_201402241759_0005
14/02/24 23:03:22 INFO mapred.JobClient:  map 0% reduce 0%
14/02/24 23:03:29 INFO mapred.JobClient:  map 50% reduce 0%
14/02/24 23:03:31 INFO mapred.JobClient:  map 100% reduce 0%
14/02/24 23:03:37 INFO mapred.JobClient:  map 100% reduce 16%
14/02/24 23:04:15 INFO mapred.JobClient: Task Id : attempt_201402241759_0005_m_000001_0, Status : FAILED
Too many fetch-failures
14/02/24 23:04:16 WARN mapred.JobClient: Error reading task outputConnection refused
14/02/24 23:04:16 WARN mapred.JobClient: Error reading task outputConnection refused
14/02/24 23:04:17 INFO mapred.JobClient:  map 50% reduce 16%
14/02/24 23:04:20 INFO mapred.JobClient:  map 100% reduce 16%
14/02/24 23:04:38 INFO mapred.JobClient:  map 100% reduce 33%
14/02/24 23:04:40 INFO mapred.JobClient:  map 100% reduce 100%
14/02/24 23:04:41 INFO mapred.JobClient: Job complete: job_201402241759_0005
14/02/24 23:04:41 INFO mapred.JobClient: Counters: 30
14/02/24 23:04:41 INFO mapred.JobClient:   Job Counters 
14/02/24 23:04:41 INFO mapred.JobClient:     Launched reduce tasks=1
14/02/24 23:04:41 INFO mapred.JobClient:     SLOTS_MILLIS_MAPS=16037
14/02/24 23:04:41 INFO mapred.JobClient:     Total time spent by all reduces waiting after reserving slots (ms)=0
14/02/24 23:04:41 INFO mapred.JobClient:     Total time spent by all maps waiting after reserving slots (ms)=0
14/02/24 23:04:41 INFO mapred.JobClient:     Launched map tasks=3
14/02/24 23:04:41 INFO mapred.JobClient:     Data-local map tasks=3
14/02/24 23:04:41 INFO mapred.JobClient:     SLOTS_MILLIS_REDUCES=69940
14/02/24 23:04:41 INFO mapred.JobClient:   File Input Format Counters 
14/02/24 23:04:41 INFO mapred.JobClient:     Bytes Read=890559
14/02/24 23:04:41 INFO mapred.JobClient:   File Output Format Counters 
14/02/24 23:04:41 INFO mapred.JobClient:     Bytes Written=9
14/02/24 23:04:41 INFO mapred.JobClient:   FileSystemCounters
14/02/24 23:04:41 INFO mapred.JobClient:     FILE_BYTES_READ=72210
14/02/24 23:04:41 INFO mapred.JobClient:     HDFS_BYTES_READ=890763
14/02/24 23:04:41 INFO mapred.JobClient:     FILE_BYTES_WRITTEN=305149
14/02/24 23:04:41 INFO mapred.JobClient:     HDFS_BYTES_WRITTEN=9
14/02/24 23:04:41 INFO mapred.JobClient:   Map-Reduce Framework
14/02/24 23:04:41 INFO mapred.JobClient:     Map output materialized bytes=72216
14/02/24 23:04:41 INFO mapred.JobClient:     Map input records=6565
14/02/24 23:04:41 INFO mapred.JobClient:     Reduce shuffle bytes=72216
14/02/24 23:04:41 INFO mapred.JobClient:     Spilled Records=13128
14/02/24 23:04:41 INFO mapred.JobClient:     Map output bytes=59076
14/02/24 23:04:41 INFO mapred.JobClient:     Total committed heap usage (bytes)=412942336
14/02/24 23:04:41 INFO mapred.JobClient:     CPU time spent (ms)=3780
14/02/24 23:04:41 INFO mapred.JobClient:     Map input bytes=888190
14/02/24 23:04:41 INFO mapred.JobClient:     SPLIT_RAW_BYTES=204
14/02/24 23:04:41 INFO mapred.JobClient:     Combine input records=0
14/02/24 23:04:41 INFO mapred.JobClient:     Reduce input records=6564
14/02/24 23:04:41 INFO mapred.JobClient:     Reduce input groups=1
14/02/24 23:04:41 INFO mapred.JobClient:     Combine output records=0
14/02/24 23:04:41 INFO mapred.JobClient:     Physical memory (bytes) snapshot=333090816
14/02/24 23:04:41 INFO mapred.JobClient:     Reduce output records=1
14/02/24 23:04:41 INFO mapred.JobClient:     Virtual memory (bytes) snapshot=1122193408
14/02/24 23:04:41 INFO mapred.JobClient:     Map output records=6564

2.新版API

-- 源代码

MaxTemperatureMapper.java

package com.hadoop.study.chap01.news;

import java.io.IOException;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

public class MaxTemperatureMapper extends
		Mapper<LongWritable, Text, Text, IntWritable> {
	
	private static final int MISSING = 9999;
	
	@Override
	protected void map(LongWritable key, Text value, Context context)
			throws IOException, InterruptedException {
		
		String line = value.toString();
		
		String year = line.substring(15, 19);
		
		int airTemperature;
		if (line.charAt(87) == '+') {
			airTemperature = Integer.parseInt(line.substring(88, 92));
		} else {
			airTemperature = Integer.parseInt(line.substring(87, 92));
		}
		
		String quaility = line.substring(92, 93);
		if (airTemperature != MISSING && quaility.matches("[01459]")) {
			context.write(new Text(year), new IntWritable(airTemperature));
		}
		
	}
	
	
	
}

 MaxTemperatureReducer.java

package com.hadoop.study.chap01.news;

import java.io.IOException;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

public class MaxTemperatureReducer extends
		Reducer<Text, IntWritable, Text, IntWritable> {

	@Override
	protected void reduce(Text key, Iterable<IntWritable> values, Context context)
			throws IOException, InterruptedException {
		
		int maxAirTemperature = Integer.MIN_VALUE;
		
		for (IntWritable airTemperature : values) {
			maxAirTemperature = Math.max(maxAirTemperature, airTemperature.get());
		}
		
		context.write(new Text(key), new IntWritable(maxAirTemperature));
	}

}

 MaxTemperature.java

package com.hadoop.study.chap01.news;

import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class MaxTemperature {
	
	public static void main(String[] args) throws Exception {
		
		if (args.length != 2) {
			System.err.println("Usage: MaxTemperature <input path> <output path>");
			System.exit(-1);
		}
		
		Job job = new Job();
		
		job.setJarByClass(MaxTemperature.class);
		
		FileInputFormat.addInputPath(job, new Path(args[0]));
		FileOutputFormat.setOutputPath(job, new Path(args[1]));
		
		job.setMapperClass(MaxTemperatureMapper.class);
		job.setReducerClass(MaxTemperatureReducer.class);
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(IntWritable.class);
		
		System.exit(job.waitForCompletion(true) ? 0 : 1);
	}
	
}

-- 执行

参考旧版本执行步骤

-- 控制台输出

14/02/24 23:10:37 WARN mapred.JobClient: Use GenericOptionsParser for parsing the arguments. Applications should implement Tool for the same.
14/02/24 23:10:37 INFO input.FileInputFormat: Total input paths to process : 1
14/02/24 23:10:37 INFO util.NativeCodeLoader: Loaded the native-hadoop library
14/02/24 23:10:37 WARN snappy.LoadSnappy: Snappy native library not loaded
14/02/24 23:10:38 INFO mapred.JobClient: Running job: job_201402241759_0006
14/02/24 23:10:39 INFO mapred.JobClient:  map 0% reduce 0%
14/02/24 23:10:45 INFO mapred.JobClient:  map 100% reduce 0%
14/02/24 23:10:53 INFO mapred.JobClient:  map 100% reduce 33%
14/02/24 23:10:55 INFO mapred.JobClient:  map 100% reduce 100%
14/02/24 23:10:56 INFO mapred.JobClient: Job complete: job_201402241759_0006
14/02/24 23:10:56 INFO mapred.JobClient: Counters: 29
14/02/24 23:10:56 INFO mapred.JobClient:   Job Counters 
14/02/24 23:10:56 INFO mapred.JobClient:     Launched reduce tasks=1
14/02/24 23:10:56 INFO mapred.JobClient:     SLOTS_MILLIS_MAPS=6900
14/02/24 23:10:56 INFO mapred.JobClient:     Total time spent by all reduces waiting after reserving slots (ms)=0
14/02/24 23:10:56 INFO mapred.JobClient:     Total time spent by all maps waiting after reserving slots (ms)=0
14/02/24 23:10:56 INFO mapred.JobClient:     Launched map tasks=1
14/02/24 23:10:56 INFO mapred.JobClient:     Data-local map tasks=1
14/02/24 23:10:56 INFO mapred.JobClient:     SLOTS_MILLIS_REDUCES=9502
14/02/24 23:10:56 INFO mapred.JobClient:   File Output Format Counters 
14/02/24 23:10:56 INFO mapred.JobClient:     Bytes Written=9
14/02/24 23:10:56 INFO mapred.JobClient:   FileSystemCounters
14/02/24 23:10:56 INFO mapred.JobClient:     FILE_BYTES_READ=72210
14/02/24 23:10:56 INFO mapred.JobClient:     HDFS_BYTES_READ=888304
14/02/24 23:10:56 INFO mapred.JobClient:     FILE_BYTES_WRITTEN=252199
14/02/24 23:10:56 INFO mapred.JobClient:     HDFS_BYTES_WRITTEN=9
14/02/24 23:10:56 INFO mapred.JobClient:   File Input Format Counters 
14/02/24 23:10:56 INFO mapred.JobClient:     Bytes Read=888190
14/02/24 23:10:56 INFO mapred.JobClient:   Map-Reduce Framework
14/02/24 23:10:56 INFO mapred.JobClient:     Map output materialized bytes=72210
14/02/24 23:10:56 INFO mapred.JobClient:     Map input records=6565
14/02/24 23:10:56 INFO mapred.JobClient:     Reduce shuffle bytes=72210
14/02/24 23:10:56 INFO mapred.JobClient:     Spilled Records=13128
14/02/24 23:10:56 INFO mapred.JobClient:     Map output bytes=59076
14/02/24 23:10:56 INFO mapred.JobClient:     CPU time spent (ms)=2050
14/02/24 23:10:56 INFO mapred.JobClient:     Total committed heap usage (bytes)=210173952
14/02/24 23:10:56 INFO mapred.JobClient:     Combine input records=0
14/02/24 23:10:56 INFO mapred.JobClient:     SPLIT_RAW_BYTES=114
14/02/24 23:10:56 INFO mapred.JobClient:     Reduce input records=6564
14/02/24 23:10:56 INFO mapred.JobClient:     Reduce input groups=1
14/02/24 23:10:56 INFO mapred.JobClient:     Combine output records=0
14/02/24 23:10:56 INFO mapred.JobClient:     Physical memory (bytes) snapshot=190836736
14/02/24 23:10:56 INFO mapred.JobClient:     Reduce output records=1
14/02/24 23:10:56 INFO mapred.JobClient:     Virtual memory (bytes) snapshot=748298240
14/02/24 23:10:56 INFO mapred.JobClient:     Map output records=6564

 MaxTemperatureWithCombiner.java

package com.hadoop.study.chap01.news;

import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class MaxTemperatureWithCombiner {
	
	public static void main(String[] args) throws Exception {
		
		if (args.length != 2) {
			System.err.println("Usage: MaxTemperature <input path> <output path>");
			System.exit(-1);
		}
		
		Job job = new Job();
		job.setJobName("Max Temperature");
		job.setJarByClass(MaxTemperatureWithCombiner.class);
		
		FileInputFormat.addInputPath(job, new Path(args[0]));
		FileOutputFormat.setOutputPath(job, new Path(args[1]));
		
		job.setMapperClass(MaxTemperatureMapper.class);
		job.setReducerClass(MaxTemperatureReducer.class);
		job.setCombinerClass(MaxTemperatureReducer.class);
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(IntWritable.class);
		
		System.exit(job.waitForCompletion(true) ? 0 : 1);
	}
	
}

 -- 控制台输出

14/02/24 23:12:16 WARN mapred.JobClient: Use GenericOptionsParser for parsing the arguments. Applications should implement Tool for the same.
14/02/24 23:12:17 INFO input.FileInputFormat: Total input paths to process : 1
14/02/24 23:12:17 INFO util.NativeCodeLoader: Loaded the native-hadoop library
14/02/24 23:12:17 WARN snappy.LoadSnappy: Snappy native library not loaded
14/02/24 23:12:17 INFO mapred.JobClient: Running job: job_201402241759_0007
14/02/24 23:12:18 INFO mapred.JobClient:  map 0% reduce 0%
14/02/24 23:12:30 INFO mapred.JobClient:  map 100% reduce 0%
14/02/24 23:12:41 INFO mapred.JobClient:  map 100% reduce 33%
14/02/24 23:12:43 INFO mapred.JobClient:  map 100% reduce 100%
14/02/24 23:12:44 INFO mapred.JobClient: Job complete: job_201402241759_0007
14/02/24 23:12:44 INFO mapred.JobClient: Counters: 29
14/02/24 23:12:44 INFO mapred.JobClient:   Job Counters 
14/02/24 23:12:44 INFO mapred.JobClient:     Launched reduce tasks=1
14/02/24 23:12:44 INFO mapred.JobClient:     SLOTS_MILLIS_MAPS=10591
14/02/24 23:12:44 INFO mapred.JobClient:     Total time spent by all reduces waiting after reserving slots (ms)=0
14/02/24 23:12:44 INFO mapred.JobClient:     Total time spent by all maps waiting after reserving slots (ms)=0
14/02/24 23:12:44 INFO mapred.JobClient:     Launched map tasks=1
14/02/24 23:12:44 INFO mapred.JobClient:     Data-local map tasks=1
14/02/24 23:12:44 INFO mapred.JobClient:     SLOTS_MILLIS_REDUCES=13038
14/02/24 23:12:44 INFO mapred.JobClient:   File Output Format Counters 
14/02/24 23:12:44 INFO mapred.JobClient:     Bytes Written=9
14/02/24 23:12:44 INFO mapred.JobClient:   FileSystemCounters
14/02/24 23:12:44 INFO mapred.JobClient:     FILE_BYTES_READ=17
14/02/24 23:12:44 INFO mapred.JobClient:     HDFS_BYTES_READ=888304
14/02/24 23:12:44 INFO mapred.JobClient:     FILE_BYTES_WRITTEN=108261
14/02/24 23:12:44 INFO mapred.JobClient:     HDFS_BYTES_WRITTEN=9
14/02/24 23:12:44 INFO mapred.JobClient:   File Input Format Counters 
14/02/24 23:12:44 INFO mapred.JobClient:     Bytes Read=888190
14/02/24 23:12:44 INFO mapred.JobClient:   Map-Reduce Framework
14/02/24 23:12:44 INFO mapred.JobClient:     Map output materialized bytes=17
14/02/24 23:12:44 INFO mapred.JobClient:     Map input records=6565
14/02/24 23:12:44 INFO mapred.JobClient:     Reduce shuffle bytes=17
14/02/24 23:12:44 INFO mapred.JobClient:     Spilled Records=2
14/02/24 23:12:44 INFO mapred.JobClient:     Map output bytes=59076
14/02/24 23:12:44 INFO mapred.JobClient:     CPU time spent (ms)=4460
14/02/24 23:12:44 INFO mapred.JobClient:     Total committed heap usage (bytes)=210173952
14/02/24 23:12:44 INFO mapred.JobClient:     Combine input records=6564
14/02/24 23:12:44 INFO mapred.JobClient:     SPLIT_RAW_BYTES=114
14/02/24 23:12:44 INFO mapred.JobClient:     Reduce input records=1
14/02/24 23:12:44 INFO mapred.JobClient:     Reduce input groups=1
14/02/24 23:12:44 INFO mapred.JobClient:     Combine output records=1
14/02/24 23:12:44 INFO mapred.JobClient:     Physical memory (bytes) snapshot=191209472
14/02/24 23:12:44 INFO mapred.JobClient:     Reduce output records=1
14/02/24 23:12:44 INFO mapred.JobClient:     Virtual memory (bytes) snapshot=748470272
14/02/24 23:12:44 INFO mapred.JobClient:     Map output records=6564

3. 对比

--Mapper,Reducer的实现由实现接口变成继承类

--使用Job类来控制作业,而不是JobClient

--控制台输出,旧版本Launched map tasks=3(除去FAILED,数量为2),新版本Launched map tasks=1,使用combiner之后Reduce input records=1

4. 参考资料

Hadoop权威指南

分享到:
评论

相关推荐

    Hadoop第一次培训材料

    【Hadoop第一次培训材料】是针对初学者设计的一份详细教程,旨在介绍分布式计算框架Hadoop的核心概念和技术。Hadoop是Apache软件基金会开发的一个开源项目,它为处理和存储大量数据提供了可扩展的、可靠的解决方案。...

    Hadoop的单机伪分布式搭建和运行第一个WordCount程序

    Hadoop单机伪分布式搭建和运行第一个WordCount程序 Hadoop是Apache基金会下的一个开源的大数据处理框架,它广泛应用于数据处理、数据分析和机器学习等领域。下面是关于Hadoop单机伪分布式搭建和运行第一个WordCount...

    hadoop权威指南第三版完整版

    - **ZooKeeper**:一种分布式的协调服务,用于分布式应用程序之间的同步,比如配置维护、域名服务、分布式同步等。 - **Sqoop**:用于在Hadoop和关系型数据库之间高效传输数据的工具。 - **Flume**:一个高可靠、高...

    【IT十八掌徐培成】Hadoop第01天-05.hadoop伪分布式2.zip

    【IT十八掌徐培成】Hadoop第01天-05.hadoop伪分布式2.zip 这个压缩包文件主要涵盖了Hadoop技术中的一个重要概念——伪分布式模式。在深入讲解这个主题之前,我们先来理解一下Hadoop是什么。Hadoop是一个开源的、基于...

    Hadoop权威指南中文版(第二版)+Hadoop in Action

    1. **Hadoop概述**:Hadoop是由Apache基金会开发的一个开源项目,基于Google的MapReduce编程模型和GFS分布式文件系统。它的设计目标是高容错性、可扩展性和高效的数据处理。 2. **Hadoop组件**: - **HDFS(Hadoop...

    Hadoop权威指南 第二版(中文版)

    Hadoop的I/O、MapReduce应用程序开发;MapReduce的工作机制;MapReduce的类型和格式;MapReduce的特性;如何构建Hadoop集群,如何管理Hadoop;Pig简介;Hbase简介;Hive简介;ZooKeeper简介;开源工具Sqoop,最后还...

    hadoop2.6,window7 64bit,hadoop.dll、winutils.exe文件下载

    1. **hadoop.dll**:这是一个动态链接库文件,它是Hadoop在Windows系统上的一个组件,用于实现某些特定功能,如JNI(Java Native Interface)调用,使得Java代码可以与本地系统交互。如果没有这个文件,Hadoop可能...

    细细品味Hadoop_Hadoop集群(第9期)_MapReduce初级案例

    这个“细细品味Hadoop_Hadoop集群(第9期)_MapReduce初级案例”主题聚焦于Hadoop集群的核心组件——MapReduce,这是一种分布式计算模型,对于处理海量数据具有重要作用。 MapReduce的工作原理可以分为两个主要阶段...

    Hadoop第四版英文版+第三版中文版

    在Hadoop第四版中,作者们对Hadoop生态系统的最新发展进行了全面更新。这一版本涵盖了Hadoop 2.x系列的诸多改进,包括YARN(Yet Another Resource Negotiator),这是一种新的资源管理器,将作业调度与集群资源管理...

    hadoop实战第二版及源码

    《Hadoop实战第二版》是由陆嘉恒编著的一本深度解析Hadoop技术的专业书籍,其源码的提供为读者提供了亲自动手实践Hadoop的机会,增强了学习效果。Hadoop作为大数据处理领域的基石,它的核心在于分布式存储系统HDFS...

    新版Hadoop视频教程 段海涛老师Hadoop八天完全攻克Hadoop视频教程 Hadoop开发

    第一天 hadoop的基本概念 伪分布式hadoop集群安装 hdfs mapreduce 演示 01-hadoop职位需求状况.avi 02-hadoop课程安排.avi 03-hadoop应用场景.avi 04-hadoop对海量数据处理的解决思路.avi 05-hadoop版本选择和...

    hadoop权威指南 第三版

    《Hadoop权威指南》第三版是Hadoop领域的经典著作,为读者全面解析了这个分布式计算框架的核心原理与实际应用。本书旨在帮助读者深入理解Hadoop的架构、设计思想以及如何在实际项目中有效地运用它。以下是对这本书...

    hadoop eclips 的插件 和实例程序

    1. 下载Hadoop Eclipse插件:通常,你可以从Apache Hadoop官方网站或第三方插件库找到适用于Eclipse的Hadoop插件。确保下载的插件版本与你的Eclipse和Hadoop版本兼容。 2. 安装插件:将下载的插件文件(通常是.zip...

    hadoop/bin/hadoop.dll

    1. **验证下载**:首先,检查Hadoop的下载源,确保是从官方或信誉良好的第三方网站获取的。有时,不完整的下载或损坏的文件会导致缺失关键组件。 2. **重新下载**:如果确认下载无误,尝试重新下载并重新安装Hadoop...

    hadoop2.7.x_winutils_exe&&hadoop_dll

    在IT行业中,Hadoop是一个广泛使用的开源框架,主要用于大数据处理和分布式存储。标题"hadop2.7.x_winutils_exe&&hadoop_dll"暗示我们关注的是Hadoop 2.7.x版本在Windows环境下的两个关键组件:`winutils.exe`和`...

    Hadoop权威指南(第2版).pdf

    - Hadoop MapReduce:一个Java软件框架,用于编写应用程序,以并行方式处理大数据集。 - Hadoop Ozone:为Hadoop提供一个通用的、可扩展的和高可用的存储平台。 Hadoop的使用场景非常广泛,包括但不限于: - 网络...

    【IT十八掌徐培成】Hadoop第02天-01.Hadoop基础回顾-进程初识-webui.zip

    本资源"【IT十八掌徐培成】Hadoop第02天-01.Hadoop基础回顾-进程初识-webui.zip"聚焦于Hadoop的基础知识,特别是对Hadoop进程的理解以及如何通过Web UI进行监控。这里我们将深入探讨Hadoop的核心组件、进程架构以及...

    hadoop的各种jar包

    在大数据处理领域,Hadoop是一个不可或缺的开源框架,主要用于...总之,正确配置和使用Hadoop的JAR包是开发Hadoop应用程序的关键。通过Eclipse这样的IDE,可以方便地管理这些依赖,加速大数据应用的开发和调试过程。

    Hadoop权威指南,hadoop权威指南pdf,Hadoop

    6. **最佳实践**:作者分享了他们在开发和应用Hadoop过程中的经验和技巧,帮助读者避免常见的陷阱,提高Hadoop应用的效率和性能。 通过阅读《Hadoop权威指南》,无论是初学者还是有经验的开发者,都能对Hadoop有...

Global site tag (gtag.js) - Google Analytics