hadoop学习--基于Hive的Hadoop日志分析

wbj0110

浏览: 1645535 次
性别:
来自: 上海

最近访客更多访客>>

一往无前bhz

ninja2006

loginboot

u012363178

博主相关

博客

微博

相册

留言

关于我

文章分类

社区版块

存档分类

博客分类：

Hive

hive

本文将本地的hadoop日志，加载到Hive数据仓库中，再过滤日志中有用的日志信息转存到Mysql数据库里。

环境：hive-0.12.0 + Hadoop1.2.1

1、日志格式

2014-04-17 22:53:30,621 INFO org.apache.hadoop.hdfs.StateChange: BLOCK* addToInvalidates: blk_4856124673669777083 to 127.0.0.1:50010 
2014-04-17 22:53:30,621 INFO org.apache.hadoop.hdfs.StateChange: BLOCK* addToInvalidates: blk_3952951948255939678 to 127.0.0.1:50010 
2014-04-17 22:53:30,629 INFO org.apache.hadoop.hdfs.StateChange: BLOCK* addToInvalidates: blk_-7659295115271387204 to 127.0.0.1:50010 
2014-04-17 22:53:30,713 INFO org.apache.hadoop.hdfs.StateChange: BLOCK* addToInvalidates: blk_1247712269990837159 to 127.0.0.1:50010 
2014-04-17 22:53:30,713 INFO org.apache.hadoop.hdfs.StateChange: BLOCK* addToInvalidates: blk_3041853621376201540 to 127.0.0.1:50010 
2014-04-17 22:53:30,713 INFO org.apache.hadoop.hdfs.StateChange: BLOCK* addToInvalidates: blk_5165085626508844330 to 127.0.0.1:50010 
2014-04-17 22:53:30,713 INFO org.apache.hadoop.hdfs.StateChange: BLOCK* addToInvalidates: blk_5058515618805863349 to 127.0.0.1:50010

日志的格式大概是这样子的，这里采用空格方式对其分隔，组织成多列，不过后面的提示信息不好处理，暂时以3列来存储。

表结构定义：

[java]view plaincopy 
//建立Hive表，用来存储日志信息  
        HiveUtil.createTable("create table if not exists loginfo11   
( rdate String,time ARRAY<string>,type STRING,relateclass STRING,  
information1 STRING,information2 STRING,information3 STRING)   
ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' COLLECTION ITEMS TERMINATED BY ',' MAP KEYS TERMINATED BY ':'");  

2、代码：

GetConnect.Java 类负责建立与Hive、Mysql的连接与关闭；

[java]view plaincopy 
//package com.my.hivetest;  
  
import java.sql.Connection;  
import java.sql.DriverManager;  
import java.sql.ResultSet;  
import java.sql.SQLException;  
import java.sql.Statement;  
  
public class getConnect {  
  
    private static Connection conn = null;  
    private static Connection conntomysql = null;  
  
    private getConnect() {  
    }  
    public static Connection getHiveConn() throws SQLException {  
        if (conn == null)  
        {  
            try {  
                Class.forName("org.apache.hadoop.hive.jdbc.HiveDriver");  
            } catch (ClassNotFoundException e) {  
                // TODO Auto-generated catch block  
                e.printStackTrace();  
                System.exit(1);  
            }  
            conn = DriverManager.getConnection(  
                    "jdbc:hive://localhost:50031/default", "", "");  
            System.out.println(1111);  
        }  
        return conn;  
    }  
  
    public static Connection getMysqlConn() throws SQLException {  
        if (conntomysql == null)  
        {  
            try {  
                Class.forName("com.mysql.jdbc.Driver");  
            } catch (ClassNotFoundException e) {  
                // TODO Auto-generated catch block  
                e.printStackTrace();  
                System.exit(1);  
            }  
            conntomysql = DriverManager.getConnection(  
                "jdbc:mysql://localhost:3306/hadoop?createDatabaseIfNotExist=true&useUnicode=true&characterEncoding=GBK",  
                "root", "123456");  
            System.out.println(1111);  
        }  
        return conntomysql;  
    }  
    public static void closeHive() throws SQLException {  
        if (conn != null)  
            conn.close();  
    }  
    public static void closemysql() throws SQLException {  
        if (conntomysql != null)  
            conntomysql.close();  
    }  
}  

hiveUtil.java类，用来创建Hive表、加载数据、依据条件查询数据。以及将数据存到mysql中的方法。

[java]view plaincopy 
//package com.my.hivetest;  
  
import java.sql.Connection;  
import java.sql.ResultSet;  
import java.sql.SQLException;  
import java.sql.Statement;  
  
public class HiveUtil {  
    //创建hive表  
    public static void createTable(String hiveql) throws SQLException{  
         Connection con=getConnect.getHiveConn();  
      
         Statement stmt = con.createStatement();    
         ResultSet res = stmt.executeQuery(hiveql);   
    }  
    //查询hive表  
    public static ResultSet queryHive(String hiveql) throws SQLException{  
        Connection con=getConnect.getHiveConn();  
          
        Statement stmt = con.createStatement();    
        ResultSet res = stmt.executeQuery(hiveql);  
        return res;  
    }  
    //加载数据  
    public static void loadDate(String hiveql) throws SQLException{  
        Connection con=getConnect.getHiveConn();   
        Statement stmt = con.createStatement();  
        ResultSet res = stmt.executeQuery(hiveql);  
    }  
    //转存到mysql中  
    public static void hiveTomysql(ResultSet Hiveres) throws SQLException{  
        Connection con=getConnect.getMysqlConn();  
        Statement stmt = con.createStatement();   
        while (Hiveres.next()) {  
              String rdate=Hiveres.getString(1);  
              String time=Hiveres.getString(2);  
              String type=Hiveres.getString(3);  
              String relateclass=Hiveres.getString(4);  
              String information=Hiveres.getString(5)+Hiveres.getString(6)+Hiveres.getString(7);//信息组合  
              System.out.println(rdate+"    "+time+"    "+type+"    "+relateclass+" "+information+" ");  
              int i = stmt.executeUpdate(  
            "insert into hadooplog values(0,'"+rdate+"','"+time+"','"+type+"','"+relateclass+"','"+information+"')");  
            }  
    }  
}  

exeHiveQL.java类，执行类，实现main函数。

[java]view plaincopy 
//package com.my.hivetest;  
  
import java.sql.Connection;  
import java.sql.DriverManager;  
import java.sql.ResultSet;  
import java.sql.SQLException;  
import java.sql.Statement;  
  
public class exeHiveQL {  
    public static void main(String[] args) throws SQLException {  
  
        if (args.length < 2) {  
            System.out.print("请输入查询条件： 日志级别 日期");  
            System.exit(1);  
        }  
  
        String type = args[0];  
        String date = args[1];  
  
        //在hive中创建表  
        HiveUtil.createTable(  
            "create table if not exists loginfo11   
            ( rdate String,time ARRAY<string>,type STRING,  
            relateclass STRING,information1 STRING,information2 STRING,  
            information3 STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' '   
            COLLECTION ITEMS TERMINATED BY ',' MAP KEYS TERMINATED BY ':'");  
        //加载hadoop日志  
        HiveUtil.loadDate("load data local inpath '/root/hadoop-1.2.1/logs/*.log.*' overwrite into table loginfo11");  
        //查询有用的信息  
        //test code  
        String str = "select rdate,time[0],type,relateclass,information1,information2,information3 from loginfo11 where type='"  
            + type + "' and rdate='" + date + "' ";  
        System.out.println(str + "----test");  
        ResultSet res1 = HiveUtil.queryHive(  
            "select rdate,time[0],type,relateclass,information1,  
            information2,information3 from loginfo11 where type='"+ type + "' and rdate='" + date + "' ");  
        //查询结果转存到mysql中  
        HiveUtil.hiveTomysql(res1);  
        //关闭hive连接  
        getConnect.closeHive();  
        //关闭mysql连接  
        getConnect.closemysql();  
    }  
}  

在运行之前需要开启hive server服务，这里端口号 50031 要与GetConnect.Java 类的一致。

# bin/hive --service hiveserver -p 50031

然后在eclipse中运行起来，设置输入参数

ERROR 2014-04-14

不过在运行之前还需要导入各种包：

hive-jdbc-0.12.0.jar

hive-metastore-0.12.0.jar

mysql-connector-java-5.1.10.jar（mysql的jdbc驱动，可到http://dev.mysql.com/downloads/connector/j/下载）
以及hive/lib下所有的包。。。（为图省事全导入了。。。）
还有一点就是，要预先在mysql中建立数据库hadoop，并且在里面建立表hadooplog。表的格式与代码中的保持一致即可。。

源代码：https://github.com/y521263/Hadoop_in_Action

参考资料：

Apache Hive TM

http://blog.csdn.net/y521263/article/details/23969745

分享到：

hive日志分析实战（二） | Using Hive for Data Analysis

2014-09-02 14:34
浏览 1490
评论(0)
分类:编程语言
查看更多

发表评论

您还没有登录,请您登录后再发表评论

最近访客更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论