hive insert overwrite into

bupt04406

浏览: 353370 次
性别:
来自: 杭州

最近访客更多访客>>

rotkNirvana

zhangyi0618

xuhai0605

pengcong90

博主相关

博客

微博

相册

留言

关于我

文章分类

社区版块

存档分类

博客分类：

Hive

hive

CREATE TABLE records (year STRING, temperature INT, quality INT)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t';

LOAD DATA LOCAL INPATH '/home/tianzhao/book/hadoop-book/input/ncdc/micro-tab/sample.txt' OVERWRITE INTO TABLE records;

hive> explain
    > LOAD DATA LOCAL INPATH '/home/tianzhao/book/hadoop-book/input/ncdc/micro-tab/sample.txt' OVERWRITE INTO TABLE records;
OK
ABSTRACT SYNTAX TREE:
(TOK_LOAD '/home/tianzhao/book/hadoop-book/input/ncdc/micro-tab/sample.txt' (TOK_TAB records) LOCAL OVERWRITE)

STAGE DEPENDENCIES:
Stage-0 is a root stage
Stage-1 depends on stages: Stage-0

STAGE PLANS:
Stage: Stage-0
    Copy
      source: file:/home/tianzhao/book/hadoop-book/input/ncdc/micro-tab/sample.txt
      destination: hdfs://localhost:54310/tmp/hive-tianzhao/hive_2011-08-21_00-10-28_984_2803781885868135028/-ext-10000

Stage: Stage-1
    Move Operator
      tables:
          replace: true
          table:
              input format: org.apache.hadoop.mapred.TextInputFormat
              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
              name: records

Time taken: 0.333 seconds

(TOK_LOAD
'/home/tianzhao/book/hadoop-book/input/ncdc/micro-tab/sample.txt'
(TOK_TAB records)
          LOCAL
          OVERWRITE
)

hive> explain extended
    > LOAD DATA LOCAL INPATH '/home/tianzhao/book/hadoop-book/input/ncdc/micro-tab/sample.txt' OVERWRITE INTO TABLE records;
OK
ABSTRACT SYNTAX TREE:
(TOK_LOAD '/home/tianzhao/book/hadoop-book/input/ncdc/micro-tab/sample.txt' (TOK_TAB records) LOCAL OVERWRITE)

STAGE DEPENDENCIES:
Stage-0 is a root stage
Stage-1 depends on stages: Stage-0

STAGE PLANS:
Stage: Stage-0
    Copy
      source: file:/home/tianzhao/book/hadoop-book/input/ncdc/micro-tab/sample.txt
      destination: hdfs://localhost:54310/tmp/hive-tianzhao/hive_2011-08-21_18-23-29_636_1841974897600272533/-ext-10000

Stage: Stage-1
    Move Operator
      tables:
          replace: true
          source: hdfs://localhost:54310/tmp/hive-tianzhao/hive_2011-08-21_18-23-29_636_1841974897600272533/-ext-10000
          table:
              input format: org.apache.hadoop.mapred.TextInputFormat
              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
              properties:
                bucket_count -1
                columns year,temperature,quality
                columns.types string:int:int
                field.delim
                file.inputformat org.apache.hadoop.mapred.TextInputFormat
                file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                location hdfs://localhost:54310/user/hive/warehouse/records
                name records
                serialization.ddl struct records { string year, i32 temperature, i32 quality}
                serialization.format
                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                transient_lastDdlTime 1313975965
              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
              name: records
          tmp directory: hdfs://localhost:54310/tmp/hive-tianzhao/hive_2011-08-21_18-23-29_636_1841974897600272533/-ext-10001

CliDriver：
CliDriver.main() {
   ret = cli.processLine(line);
}
CliDriver：
public int processLine(String line) {
   ret = processCmd(command);
}
CliDriver：
public int processCmd(String cmd) {
   CommandProcessor proc = CommandProcessorFactory.get(tokens[0]);
   Driver qp = (Driver) proc;
   ret = qp.run(cmd).getResponseCode();
}
Driver：
public CommandProcessorResponse run(String command) {
   // command = LOAD DATA LOCAL INPATH '/home/tianzhao/book/hadoop-book/input/ncdc/micro-tab/sample.txt' OVERWRITE INTO TABLE records

   int ret = compile(command);
   ret = execute();
}

Driver：
public int compile(String command) {
        ctx = new Context(conf); //

         SemanticAnalyzerFactory sfactory = new SemanticAnalyzerFactory(conf);
         BaseSemanticAnalyzer sem = sfactory.get(tree); // return new LoadSemanticAnalyzer(conf);
         sem.analyze(tree, ctx);

}

public Context(Configuration conf) throws IOException {
    this(conf, generateExecutionId());
}

/**
   * Create a Context with a given executionId. ExecutionId, together with
   * user name and conf, will determine the temporary directory locations.
   */
public Context(Configuration conf, String executionId) throws IOException {
    this.conf = conf;
    this.executionId = executionId; //hive_2011-08-21_00-02-22_445_7799135143086468923

    // non-local tmp location is configurable. however it is the same across
    // all external file systems
    nonLocalScratchPath =
        new Path(HiveConf.getVar(conf, HiveConf.ConfVars.SCRATCHDIR),
                executionId); // /tmp/hive-tianzhao/hive_2011-08-21_00-02-22_445_7799135143086468923

    // local tmp location is not configurable for now
    localScratchDir = System.getProperty("java.io.tmpdir")
           + Path.SEPARATOR + System.getProperty("user.name") + Path.SEPARATOR
           + executionId; // /tmp/tianzhao/hive_2011-08-21_00-02-22_445_7799135143086468923
}

public class LoadSemanticAnalyzer extends BaseSemanticAnalyzer {
public void analyzeInternal(ASTNode ast) throws SemanticException {
    isLocal = false;
    isOverWrite = false;
    Tree fromTree = ast.getChild(0); // '/home/tianzhao/book/hadoop-book/input/ncdc/micro-tab/sample.txt'
    Tree tableTree = ast.getChild(1); // TOK_TAB

    if (ast.getChildCount() == 4) { // true
      isLocal = true; //
      isOverWrite = true; //
    }

    if (ast.getChildCount() == 3) {
      if (ast.getChild(2).getText().toLowerCase().equals("local")) {
        isLocal = true;
      } else {
        isOverWrite = true;
      }
    }

    // initialize load path
    URI fromURI;
    try {
      String fromPath = stripQuotes(fromTree.getText()); ///home/tianzhao/book/hadoop-book/input/ncdc/micro-tab/sample.txt
      fromURI = initializeFromURI(fromPath); // file:/home/tianzhao/book/hadoop-book/input/ncdc/micro-tab/sample.txt
    } catch (IOException e) {
      throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(fromTree, e
          .getMessage()), e);
    } catch (URISyntaxException e) {
      throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(fromTree, e
          .getMessage()), e);
    }

    // initialize destination table/partition
    tableSpec ts = new tableSpec(db, conf, (ASTNode) tableTree); //
    Table tbl = ts.tableHandle;
    if (tbl.isView()) {
      throw new SemanticException(ErrorMsg.DML_AGAINST_VIEW.getMsg());
    }
    if (tbl.isNonNative()) {
      throw new SemanticException(ErrorMsg.LOAD_INTO_NON_NATIVE.getMsg());
    }

    genAuthorizeEntry(db.getCurrentDatabase(), tbl.getTableName(), null, Privilege.INSERT_PRIV);

    URI toURI = (ts.partHandle != null) ? ts.partHandle.getDataLocation()
        : ts.tableHandle.getDataLocation(); // hdfs://localhost:54310/user/hive/warehouse/records

    List<FieldSchema> parts = ts.tableHandle.getPartitionKeys();
    if (isOverWrite && (parts != null && parts.size() > 0)
        && (ts.partSpec == null || ts.partSpec.size() == 0)) {
      throw new SemanticException(ErrorMsg.NEED_PARTITION_ERROR.getMsg());
    }

    // make sure the arguments make sense
    applyConstraints(fromURI, toURI, fromTree, isLocal);

    Task<? extends Serializable> rTask = null;

    // create copy work
    if (isLocal) { //true
      // if the local keyword is specified - we will always make a copy. this
      // might seem redundant in the case
      // that the hive warehouse is also located in the local file system - but
      // that's just a test case.
      String copyURIStr = ctx.getExternalTmpFileURI(toURI); //hdfs://localhost:54310/tmp/hive-tianzhao/hive_2011-08-21_18-32-25_911_8382925475653721697/-ext-10000
      URI copyURI = URI.create(copyURIStr);
      rTask = TaskFactory.get(new CopyWork(fromURI.toString(), copyURIStr),
          conf);
      fromURI = copyURI;
    }

    // create final load/move work

    String loadTmpPath = ctx.getExternalTmpFileURI(toURI); // hdfs://localhost:54310/tmp/hive-tianzhao/hive_2011-08-21_18-32-25_911_8382925475653721697/-ext-10001
    Map<String, String> partSpec = ts.getPartSpec();
    if (partSpec == null) {
      partSpec = new LinkedHashMap<String, String>();
    }
    LoadTableDesc loadTableWork = new LoadTableDesc(fromURI.toString(),
        loadTmpPath, Utilities.getTableDesc(ts.tableHandle), partSpec, isOverWrite);

    if (rTask != null) {
      rTask.addDependentTask(TaskFactory.get(new MoveWork(getInputs(),
          getOutputs(), loadTableWork, null, true), conf));
    } else {
      rTask = TaskFactory.get(new MoveWork(getInputs(), getOutputs(),
          loadTableWork, null, true), conf);
    }

    rootTasks.add(rTask);
}

private URI initializeFromURI(String fromPath) throws IOException,
      URISyntaxException {
    URI fromURI = new Path(fromPath).toUri(); // /home/tianzhao/book/hadoop-book/input/ncdc/micro-tab/sample.txt

    String fromScheme = fromURI.getScheme(); // null
    String fromAuthority = fromURI.getAuthority(); // null
    String path = fromURI.getPath(); // /home/tianzhao/book/hadoop-book/input/ncdc/micro-tab/sample.txt

    // generate absolute path relative to current directory or hdfs home
    // directory
    if (!path.startsWith("/")) { //不是绝对路径，执行下面的操作
      if (isLocal) {
        path = new Path(System.getProperty("user.dir"), path).toString();
      } else {
        path = new Path(new Path("/user/" + System.getProperty("user.name")),
            path).toString();
      }
    }

    // set correct scheme and authority
    if (StringUtils.isEmpty(fromScheme)) { // true
      if (isLocal) { //true
        // file for local
        fromScheme = "file"; //
      } else {
        // use default values from fs.default.name
        URI defaultURI = FileSystem.get(conf).getUri();
        fromScheme = defaultURI.getScheme();
        fromAuthority = defaultURI.getAuthority();
      }
    }

    // if scheme is specified but not authority then use the default authority
    if (fromScheme.equals("hdfs") && StringUtils.isEmpty(fromAuthority)) {
      URI defaultURI = FileSystem.get(conf).getUri();
      fromAuthority = defaultURI.getAuthority();
    }

    LOG.debug(fromScheme + "@" + fromAuthority + "@" + path);
    return new URI(fromScheme, fromAuthority, path, null, null); //file:/home/tianzhao/book/hadoop-book/input/ncdc/micro-tab/sample.txt
}

    public tableSpec(Hive db, HiveConf conf, ASTNode ast)
        throws SemanticException {

      assert (ast.getToken().getType() == HiveParser.TOK_TAB);
      int childIndex = 0;
      numDynParts = 0;

      try {
        // get table metadata
        tableName = unescapeIdentifier(ast.getChild(0).getText()); // records
        boolean testMode = conf.getBoolVar(HiveConf.ConfVars.HIVETESTMODE); // false
        if (testMode) {
          tableName = conf.getVar(HiveConf.ConfVars.HIVETESTMODEPREFIX)
              + tableName;
        }

        tableHandle = db.getTable(tableName);
      } catch (InvalidTableException ite) {
        throw new SemanticException(ErrorMsg.INVALID_TABLE.getMsg(ast
            .getChild(0)), ite);
      } catch (HiveException e) {
        throw new SemanticException(ErrorMsg.GENERIC_ERROR.getMsg(ast
            .getChild(childIndex), e.getMessage()), e);
      }
      // get partition metadata if partition specified
      if (ast.getChildCount() == 2) { // false
        childIndex = 1;
        ASTNode partspec = (ASTNode) ast.getChild(1);
        // partSpec is a mapping from partition column name to its value.
        partSpec = new LinkedHashMap<String, String>(partspec.getChildCount());
        for (int i = 0; i < partspec.getChildCount(); ++i) {
          ASTNode partspec_val = (ASTNode) partspec.getChild(i);
          String val = null;
          if (partspec_val.getChildCount() < 2) { // DP in the form of T partition (ds, hr)
            ++numDynParts;
          } else { // in the form of T partition (ds="2010-03-03")
            val = stripQuotes(partspec_val.getChild(1).getText());
          }
          partSpec.put(unescapeIdentifier(partspec_val.getChild(0).getText().toLowerCase()), val);
        }
        // check if the partition spec is valid
        if (numDynParts > 0) {
          List<FieldSchema> parts = tableHandle.getPartitionKeys();
          int numStaPart = parts.size() - numDynParts;
          if (numStaPart == 0 &&
              conf.getVar(HiveConf.ConfVars.DYNAMICPARTITIONINGMODE).equalsIgnoreCase("strict")) {
            throw new SemanticException(ErrorMsg.DYNAMIC_PARTITION_STRICT_MODE.getMsg());
          }
        for (FieldSchema fs: parts) {
        if (partSpec.get(fs.getName().toLowerCase()) == null) {
            if (numStaPart > 0) { // found a DP, but there exists ST as subpartition
              throw new SemanticException(
                  ErrorMsg.PARTITION_DYN_STA_ORDER.getMsg(ast.getChild(childIndex)));
            }
            break;
          } else {
          --numStaPart;
          }
        }
          partHandle = null;
        } else {
          try {
            // this doesn't create partition. partition is created in MoveTask
            partHandle = new Partition(tableHandle, partSpec, null);
        } catch (HiveException e) {
         throw new SemanticException(
             ErrorMsg.INVALID_PARTITION.getMsg(ast.getChild(childIndex)));
        }
        }
      }
    }

Driver.execute() {

}

CopyTask.execute()
public int execute(DriverContext driverContext) {
    FileSystem dstFs = null;
    Path toPath = null;
    try {
      Path fromPath = new Path(work.getFromPath()); //file:/home/tianzhao/book/hadoop-book/input/ncdc/micro-tab/sample.txt
      toPath = new Path(work.getToPath()); //hdfs://localhost:54310/tmp/hive-tianzhao/hive_2011-08-21_18-32-25_911_8382925475653721697/-ext-10000

      console.printInfo("Copying data from " + fromPath.toString(), " to "
          + toPath.toString());

      FileSystem srcFs = fromPath.getFileSystem(conf); //org.apache.hadoop.fs.LocalFileSystem@1c9e4d2
      dstFs = toPath.getFileSystem(conf);

      FileStatus[] srcs = LoadSemanticAnalyzer.matchFilesOrDir(srcFs, fromPath);

      if (srcs == null || srcs.length == 0) {
        console.printError("No files matching path: " + fromPath.toString());
        errorMessage = "No files matching path: " + fromPath.toString();
        return 3;
      }

      if (!dstFs.mkdirs(toPath)) {
        console
            .printError("Cannot make target directory: " + toPath.toString());
        errorMessage = "Cannot make target directory: " + toPath.toString();
        return 2;
      }

      for (FileStatus oneSrc : srcs) {
        LOG.debug("Copying file: " + oneSrc.getPath().toString());
        if (!FileUtil.copy(srcFs, oneSrc.getPath(), dstFs, toPath, false, // delete
            // source
            true, // overwrite destination
            conf)) {
          console.printError("Failed to copy: '" + oneSrc.getPath().toString()
              + "to: '" + toPath.toString() + "'");
          errorMessage = "Failed to copy: '" + oneSrc.getPath().toString()
              + "to: '" + toPath.toString() + "'";
          return 1;
        }
      }
      return 0;

    } catch (Exception e) {
      console.printError("Failed with exception " + e.getMessage(), "\n"
          + StringUtils.stringifyException(e));
      errorMessage = "Failed with exception " + e.getMessage()+ "\n"
          + StringUtils.stringifyException(e);
      return (1);
    }
}

MoveTask：
public int execute(DriverContext driverContext) {

    try {
      // Do any hive related operations like moving tables and files
      // to appropriate locations
      LoadFileDesc lfd = work.getLoadFileWork(); // null
      if (lfd != null) {
        Path targetPath = new Path(lfd.getTargetDir());
        Path sourcePath = new Path(lfd.getSourceDir());
        FileSystem fs = sourcePath.getFileSystem(conf);
        if (lfd.getIsDfsDir()) {
          // Just do a rename on the URIs, they belong to the same FS
          String mesg = "Moving data to: " + lfd.getTargetDir();
          String mesg_detail = " from " + lfd.getSourceDir();
          console.printInfo(mesg, mesg_detail);

          // delete the output directory if it already exists
          fs.delete(targetPath, true);
          // if source exists, rename. Otherwise, create a empty directory
          if (fs.exists(sourcePath)) {
            if (!fs.rename(sourcePath, targetPath)) {
              throw new HiveException("Unable to rename: " + sourcePath
                  + " to: " + targetPath);
            }
          } else if (!fs.mkdirs(targetPath)) {
            throw new HiveException("Unable to make directory: " + targetPath);
          }
        } else {
          // This is a local file
          String mesg = "Copying data to local directory " + lfd.getTargetDir();
          String mesg_detail = " from " + lfd.getSourceDir();
          console.printInfo(mesg, mesg_detail);

          // delete the existing dest directory
          LocalFileSystem dstFs = FileSystem.getLocal(conf);

          if (dstFs.delete(targetPath, true) || !dstFs.exists(targetPath)) {
            console.printInfo(mesg, mesg_detail);
            // if source exists, rename. Otherwise, create a empty directory
            if (fs.exists(sourcePath)) {
              fs.copyToLocalFile(sourcePath, targetPath);
            } else {
              if (!dstFs.mkdirs(targetPath)) {
                throw new HiveException("Unable to make local directory: "
                    + targetPath);
              }
            }
          } else {
            throw new AccessControlException(
                "Unable to delete the existing destination directory: "
                + targetPath);
          }
        }
      }

      // Next we do this for tables and partitions
      LoadTableDesc tbd = work.getLoadTableWork();
      if (tbd != null) {
        StringBuilder mesg = new StringBuilder("Loading data to table ")
            .append( tbd.getTable().getTableName());
        if (tbd.getPartitionSpec().size() > 0) {
          mesg.append(" partition (");
          Map<String, String> partSpec = tbd.getPartitionSpec();
          for (String key: partSpec.keySet()) {
            mesg.append(key).append('=').append(partSpec.get(key)).append(", ");
          }
          mesg.setLength(mesg.length()-2);
          mesg.append(')');
        }
        String mesg_detail = " from " + tbd.getSourceDir();
        console.printInfo(mesg.toString(), mesg_detail); //11/08/21 21:58:44 INFO exec.MoveTask: Loading data to table records from hdfs://localhost:54310/tmp/hive-tianzhao/hive_2011-08-21_18-32-25_911_8382925475653721697/-ext-10000

        Table table = db.getTable(db.getCurrentDatabase(), tbd
            .getTable().getTableName());

        if (work.getCheckFileFormat()) {
          // Get all files from the src directory
          FileStatus[] dirs;
          ArrayList<FileStatus> files;
          FileSystem fs;
          try {
            fs = FileSystem.get(table.getDataLocation(), conf);
            dirs = fs.globStatus(new Path(tbd.getSourceDir()));
            files = new ArrayList<FileStatus>();
            for (int i = 0; (dirs != null && i < dirs.length); i++) {
              files.addAll(Arrays.asList(fs.listStatus(dirs[i].getPath())));
              // We only check one file, so exit the loop when we have at least
              // one.
              if (files.size() > 0) {
                break;
              }
            }
          } catch (IOException e) {
            throw new HiveException(
                "addFiles: filesystem error in check phase", e);
          }
          if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVECHECKFILEFORMAT)) {
            // Check if the file format of the file matches that of the table.
            boolean flag = HiveFileFormatUtils.checkInputFormat(
                fs, conf, tbd.getTable().getInputFileFormatClass(), files);
            if (!flag) {
              throw new HiveException(
                  "Wrong file format. Please check the file's format.");
            }
          }
        }

        // Create a data container
        DataContainer dc = null;
        if (tbd.getPartitionSpec().size() == 0) {
          dc = new DataContainer(table.getTTable());
          db.loadTable(new Path(tbd.getSourceDir()), tbd.getTable()
              .getTableName(), tbd.getReplace(), new Path(tbd.getTmpDir())); // 替换。
          if (work.getOutputs() != null) {
            work.getOutputs().add(new WriteEntity(table));
          }
        } else {
          LOG.info("Partition is: " + tbd.getPartitionSpec().toString());
          // deal with dynamic partitions
          DynamicPartitionCtx dpCtx = tbd.getDPCtx();
          if (dpCtx != null && dpCtx.getNumDPCols() > 0) { // dynamic partitions
            // load the list of DP partitions and return the list of partition specs
            ArrayList<LinkedHashMap<String, String>> dp =
              db.loadDynamicPartitions(
                  new Path(tbd.getSourceDir()),
                  tbd.getTable().getTableName(),
                tbd.getPartitionSpec(),
                tbd.getReplace(),
                new Path(tbd.getTmpDir()),
                dpCtx.getNumDPCols());
            // for each partition spec, get the partition
            // and put it to WriteEntity for post-exec hook
            for (LinkedHashMap<String, String> partSpec: dp) {
              Partition partn = db.getPartition(table, partSpec, false);

              WriteEntity enty = new WriteEntity(partn);
              if (work.getOutputs() != null) {
                work.getOutputs().add(enty);
              }
              // Need to update the queryPlan's output as well so that post-exec hook get executed.
              // This is only needed for dynamic partitioning since for SP the the WriteEntity is
              // constructed at compile time and the queryPlan already contains that.
              // For DP, WriteEntity creation is deferred at this stage so we need to update
              // queryPlan here.
              if (queryPlan.getOutputs() == null) {
                queryPlan.setOutputs(new HashSet<WriteEntity>());
              }
              queryPlan.getOutputs().add(enty);

              // update columnar lineage for each partition
              dc = new DataContainer(table.getTTable(), partn.getTPartition());

              if (SessionState.get() != null) {
                SessionState.get().getLineageState().setLineage(tbd.getSourceDir(), dc,
                    table.getCols());
              }

              console.printInfo("\tLoading partition " + partSpec);
            }
            dc = null; // reset data container to prevent it being added again.
          } else { // static partitions
            db.loadPartition(new Path(tbd.getSourceDir()), tbd.getTable().getTableName(),
                tbd.getPartitionSpec(), tbd.getReplace(), new Path(tbd.getTmpDir()));
          Partition partn = db.getPartition(table, tbd.getPartitionSpec(), false);
          dc = new DataContainer(table.getTTable(), partn.getTPartition());
          // add this partition to post-execution hook
          if (work.getOutputs() != null) {
          work.getOutputs().add(new WriteEntity(partn));
          }
         }
        }
        if (SessionState.get() != null && dc != null) {
          SessionState.get().getLineageState().setLineage(tbd.getSourceDir(), dc,
              table.getCols());
        }
      }

      return 0;
    } catch (Exception e) {
      console.printError("Failed with exception " + e.getMessage(), "\n"
          + StringUtils.stringifyException(e));
      errorMessage = "Failed with exception " + e.getMessage() + "\n"
          + StringUtils.stringifyException(e);
      return (1);
    }
}

Hive.java：
public void loadTable(Path loadPath, String tableName, boolean replace,
      Path tmpDirPath) throws HiveException {
    Table tbl = getTable(tableName); //records

    if (replace) { //true
      tbl.replaceFiles(loadPath, tmpDirPath);
// loadPath=hdfs://localhost:54310/tmp/hive-tianzhao/hive_2011-08-21_18-32-25_911_8382925475653721697/-ext-10000
// tmpDirPath=hdfs://localhost:54310/tmp/hive-tianzhao/hive_2011-08-21_18-32-25_911_8382925475653721697/-ext-10001
    } else {
      tbl.copyFiles(loadPath);
    }
}

Table：
protected void replaceFiles(Path srcf, Path tmpd) throws HiveException {
    FileSystem fs;
    try {
      fs = FileSystem.get(getDataLocation(), Hive.get().getConf());
      Hive.replaceFiles(srcf, new Path(getDataLocation().getPath()), fs, tmpd);
    } catch (IOException e) {
      throw new HiveException("addFiles: filesystem error in check phase", e);
    }
}
getDataLocation（） // hdfs://localhost:54310/user/hive/warehouse/records

static protected void replaceFiles(Path srcf, Path destf, FileSystem fs,
      Path tmppath) throws HiveException {
    FileStatus[] srcs;
    try {
      srcs = fs.globStatus(srcf); // srcf = hdfs://localhost:54310/tmp/hive-tianzhao/hive_2011-08-21_18-32-25_911_8382925475653721697/-ext-10000
    } catch (IOException e) {
      throw new HiveException("addFiles: filesystem error in check phase", e);
    }
    if (srcs == null) {
      LOG.info("No sources specified to move: " + srcf);
      return;
      // srcs = new FileStatus[0]; Why is this needed?
    }
    checkPaths(fs, srcs, destf, true);   //   destf = /user/hive/warehouse/records

    try {
      fs.mkdirs(tmppath); // tmppath = hdfs://localhost:54310/tmp/hive-tianzhao/hive_2011-08-21_18-32-25_911_8382925475653721697/-ext-10001
      for (FileStatus src : srcs) {
        FileStatus[] items = fs.listStatus(src.getPath());
        for (int j = 0; j < items.length; j++) {
          if (!fs.rename(items[j].getPath(), new Path(tmppath, items[j]
              .getPath().getName()))) {
//
//   public boolean rename(Path src, Path dst) throws IOException {
//    return dfs.rename(getPathName(src), getPathName(dst));
// }
// src = hdfs://localhost:54310/tmp/hive-tianzhao/hive_2011-08-21_18-32-25_911_8382925475653721697/-ext-10000/sample.txt
// dst = hdfs://localhost:54310/tmp/hive-tianzhao/hive_2011-08-21_18-32-25_911_8382925475653721697/-ext-10001/sample.txt

            throw new HiveException("Error moving: " + items[j].getPath()
                + " into: " + tmppath);
          }
        }
      }

      // point of no return
      boolean b = fs.delete(destf, true); // destf = /user/hive/warehouse/records
      LOG.debug("Deleting:" + destf.toString() + ",Status:" + b);

      // create the parent directory otherwise rename can fail if the parent
      // doesn't exist
      if (!fs.mkdirs(destf.getParent())) {
        throw new HiveException("Unable to create destination directory: "
            + destf.getParent().toString());
      }

      b = fs.rename(tmppath, destf);
// tmppath = hdfs://localhost:54310/tmp/hive-tianzhao/hive_2011-08-21_18-32-25_911_8382925475653721697/-ext-10001
// destf = /user/hive/warehouse/records
      if (!b) {
        throw new HiveException("Unable to move results from " + tmppath
            + " to destination directory: " + destf.getParent().toString());
      }
      LOG.debug("Renaming:" + tmppath.toString() + ",Status:" + b);

    } catch (IOException e) {
      throw new HiveException("replaceFiles: error while moving files from "
          + tmppath + " to " + destf + "!!!", e);
    }
    // In case of error, we should leave the temporary data there, so
    // that user can recover the data if necessary.
}

static private void checkPaths(FileSystem fs, FileStatus[] srcs, Path destf,
      boolean replace) throws HiveException {
    try {
      for (FileStatus src : srcs) {
        FileStatus[] items = fs.listStatus(src.getPath());
        for (FileStatus item : items) {

          if (Utilities.isTempPath(item)) {
            // This check is redundant because temp files are removed by
            // execution layer before
            // calling loadTable/Partition. But leaving it in just in case.
            fs.delete(item.getPath(), true);
            continue;
          }
          if (item.isDir()) {
            throw new HiveException("checkPaths: " + src.getPath()
                + " has nested directory" + item.getPath());
          }
          Path tmpDest = new Path(destf, item.getPath().getName()); ///user/hive/warehouse/records/sample.txt
          if (!replace && fs.exists(tmpDest)) {   // replace = true
            throw new HiveException("checkPaths: " + tmpDest
                + " already exists");
          }
        }
      }
    } catch (IOException e) {
      throw new HiveException("checkPaths: filesystem error in check phase", e);
    }
}

分享到：

hive Context | linux vi

2011-08-26 11:19
浏览 12380
评论(1)
分类:开源软件
查看更多

1 楼 lvyuan1234 2017-02-24

你好，你那个sample.txt文件可以分享给我吗

发表评论

您还没有登录,请您登录后再发表评论

最近访客更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论