`
flyfoxs
  • 浏览: 300482 次
  • 性别: Icon_minigender_1
  • 来自: 合肥
社区版块
存档分类
最新评论

【大数据笔记】-解读hadoop命令

 
阅读更多

下面是hadoop发布版本, bin目录下面的hadoop命令的源码,hadoop命令支持好多种参数,一直记不住,想通过精度这部分代码,能记住部分参数.

 

#!/usr/bin/env bash

# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# This script runs the hadoop core commands. 


#这3行命令的主要目的是,获取Hadoop运行所在目录.
bin=`which $0`
bin=`dirname ${bin}`
bin=`cd "$bin"; pwd`


#定位找到 hadoop-config.sh 文件,里面包含了很多Hadoop命令的配置文件.
#先找HADOOP_LIBEXEC_DIR目录,如果没有定义,就使用默认的路径,也就是hadoop根目录下面的libexec
DEFAULT_LIBEXEC_DIR="$bin"/../libexec
HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
. $HADOOP_LIBEXEC_DIR/hadoop-config.sh

function print_usage(){
  echo "Usage: hadoop [--config confdir] COMMAND"
  echo "       where COMMAND is one of:"
  echo "  fs                   run a generic filesystem user client"
  echo "  version              print the version"
  echo "  jar <jar>            run a jar file"
  echo "  checknative [-a|-h]  check native hadoop and compression libraries availability"
  echo "  distcp <srcurl> <desturl> copy file or directories recursively"
  echo "  archive -archiveName NAME -p <parent path> <src>* <dest> create a hadoop archive"
  echo "  classpath            prints the class path needed to get the"
  echo "                       Hadoop jar and the required libraries"
  echo "  daemonlog            get/set the log level for each daemon"
  echo " or"
  echo "  CLASSNAME            run the class named CLASSNAME"
  echo ""
  echo "Most commands print help when invoked w/o parameters."
}

#如果命令参数个数为0,则打印提示,退出
if [ $# = 0 ]; then
  print_usage
  exit
fi

#解析第1个参数,第0个参数是命令本身
COMMAND=$1
case $COMMAND in
  # usage flags
  --help|-help|-h)
    print_usage
    exit
    ;;

  #hdfs commands
  namenode|secondarynamenode|datanode|dfs|dfsadmin|fsck|balancer|fetchdt|oiv|dfsgroups)
    echo "DEPRECATED: Use of this script to execute hdfs command is deprecated." 1>&2
    echo "Instead use the hdfs command for it." 1>&2
    echo "" 1>&2
    #try to locate hdfs and if present, delegate to it.  
    shift
    if [ -f "${HADOOP_HDFS_HOME}"/bin/hdfs ]; then
      exec "${HADOOP_HDFS_HOME}"/bin/hdfs ${COMMAND/dfsgroups/groups}  "$@"
    elif [ -f "${HADOOP_PREFIX}"/bin/hdfs ]; then
      exec "${HADOOP_PREFIX}"/bin/hdfs ${COMMAND/dfsgroups/groups} "$@"
    else
      echo "HADOOP_HDFS_HOME not found!"
      exit 1
    fi
    ;;

  #mapred commands for backwards compatibility
  pipes|job|queue|mrgroups|mradmin|jobtracker|tasktracker|mrhaadmin|mrzkfc|jobtrackerha)
    echo "DEPRECATED: Use of this script to execute mapred command is deprecated." 1>&2
    echo "Instead use the mapred command for it." 1>&2
    echo "" 1>&2
    #try to locate mapred and if present, delegate to it.
    shift
    if [ -f "${HADOOP_MAPRED_HOME}"/bin/mapred ]; then
      exec "${HADOOP_MAPRED_HOME}"/bin/mapred ${COMMAND/mrgroups/groups} "$@"
    elif [ -f "${HADOOP_PREFIX}"/bin/mapred ]; then
      exec "${HADOOP_PREFIX}"/bin/mapred ${COMMAND/mrgroups/groups} "$@"
    else
      echo "HADOOP_MAPRED_HOME not found!"
      exit 1
    fi
    ;;

#打印出Hadoop执行时的classpath,方便查找classpath的错误
  classpath)
    if $cygwin; then
      CLASSPATH=`cygpath -p -w "$CLASSPATH"`
    fi
    echo $CLASSPATH
    exit
    ;;

  #core commands  
  *)
    # the core commands
    if [ "$COMMAND" = "fs" ] ; then
      CLASS=org.apache.hadoop.fs.FsShell
    elif [ "$COMMAND" = "version" ] ; then
      CLASS=org.apache.hadoop.util.VersionInfo
    elif [ "$COMMAND" = "jar" ] ; then
      CLASS=org.apache.hadoop.util.RunJar
    elif [ "$COMMAND" = "checknative" ] ; then
      CLASS=org.apache.hadoop.util.NativeLibraryChecker
    elif [ "$COMMAND" = "distcp" ] ; then
      CLASS=org.apache.hadoop.tools.DistCp
      CLASSPATH=${CLASSPATH}:${TOOL_PATH}
    elif [ "$COMMAND" = "daemonlog" ] ; then
      CLASS=org.apache.hadoop.log.LogLevel
    elif [ "$COMMAND" = "archive" ] ; then
      CLASS=org.apache.hadoop.tools.HadoopArchives
      CLASSPATH=${CLASSPATH}:${TOOL_PATH}
    elif [[ "$COMMAND" = -*  ]] ; then
        # class and package names cannot begin with a -
        echo "Error: No command named \`$COMMAND' was found. Perhaps you meant \`hadoop ${COMMAND#-}'"
        exit 1
    else
	#如果上面的都没匹配上,那么第一个参数作为classname 来解析,比如下面就是一个示例
	#hadoop org.apache.hadoop.examples.WordCount /tmp/15 /tmp/46
      CLASS=$COMMAND
    fi
	
	#删除$@中的第一个参数,比如"hadoop org.apache.hadoop.examples.WordCount /tmp/15 /tmp/46"
	#在运行shift之前$@=org.apache.hadoop.examples.WordCount /tmp/15 /tmp/46
	#之后$@=/tmp/15 /tmp/46
    shift
    
    # Always respect HADOOP_OPTS and HADOOP_CLIENT_OPTS
	# 对应的这两个变量默认的定义在文件hadoop-config.sh,如果要修改启动参数,也可以修改这个文件,比如想开启远程debug
    HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"

    #make sure security appender is turned off
    HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,NullAppender}"

	#兼容cygwin模拟器
    if $cygwin; then
      CLASSPATH=`cygpath -p -w "$CLASSPATH"`
    fi
	
    #没什么意思,放在这是为了方便修改扩展CLASSPATH
	export CLASSPATH=$CLASSPATH
    exec "$JAVA" $JAVA_HEAP_MAX $HADOOP_OPTS $CLASS "$@"
    ;;

esac

 

0
1
分享到:
评论

相关推荐

    Time的hadoop学习笔记之--2017解读大数据

    根据2017大数据发展趋势,结合国内国外的大数据发展现状,以及政策纲要,浅层次的介绍了大数据发展的趋势,以及简单的大数据架构。文章所述仅代表个人观点,不足之处还请指正。文档仅限学习所用,禁止仅限商业转播。...

    Hadoop 培训笔记 及源码

    Hadoop架构分析之集群结构分析,Hadoop架构分析之HDFS架构分析,Hadoop架构分析之NN和DN原生文档解读,Hadoop MapReduce原理之流程图.Hadoop MapReduce原理之核心类Job和ResourceManager解读.Hadoop MapReduce原理之...

    IT十八掌_Hadoop阶段学习笔记(课堂笔记与源码流程)

    Hadoop架构分析之集群结构分析,Hadoop架构分析之HDFS架构分析,Hadoop架构分析之NN和DN原生文档解读,Hadoop MapReduce原理之流程图.Hadoop MapReduce原理之核心类Job和ResourceManager解读.Hadoop MapReduce原理之...

    哈工大大数据分析复习笔记

    首先,笔记详细介绍了大数据分析的基本概念,包括大数据的定义,特点以及大数据分析的重要性。其次,笔记深入探讨了大数据分析的核心技术,如数据采集,数据存储,数据处理和数据挖掘等。此外,笔记还结合了大量的...

    数据库笔记.zip

    以下是对这些笔记的详细解读: 1. **数据库基本概念**: - 数据库(Database):是一个有组织、可搜索的数据集合,用于存储和检索信息。 - 关系型数据库(Relational Database):是最常见的数据库类型,基于关系...

    测试CPU是否支持虚拟化软件使用说明.docx

    本文将详细介绍如何测试笔记本电脑CPU是否支持虚拟化,这对于成功安装和运行Hadoop等大数据软件至关重要。 #### 二、实验环境要求 为确保Hadoop及其他大数据软件能够顺利安装与运行,实验环境需满足以下条件: 1. *...

    机器学习笔记

    7. Hadoop是一个开源框架,用于分布式存储和处理大数据。它通过HDFS(Hadoop Distributed File System)来存储数据,并通过MapReduce来处理数据。 8. Pig和Hive都是大数据处理的工具。Pig是一种高阶的数据流语言和...

    hbase第04天

    "笔记.txt"可能是学习过程中的个人笔记,汇总了重要概念、命令示例和实践心得,对于巩固理解大有裨益。 最后,"coprocessor"这个词暗示了课程可能涉及到了HBase的协处理器机制。协处理器是HBase提供的一种扩展框架...

    十年光阴,梦想带我去飞翔1

    同时,数据处理和分析技能,如SQL和大数据工具(Hadoop、Spark)的应用,也是现代IT从业者必备的。 总的来说,“十年光阴,梦想带我去飞翔”不仅描绘了一个IT从业者追求技术梦想的旅程,也反映了在快速发展的信息...

Global site tag (gtag.js) - Google Analytics