wget http://mirrors.cnnic.cn/apache/gora/0.3/apache-gora-0.3-src.zip
unzip apache-gora-0.3-src.zip
cd apache-gora-0.3
mvn clean package
1、创建项目
mvn archetype:create -DgroupId=org.apdplat.demo.gora -DartifactId=gora-demo
2、增加依赖
vi gora-demo/pom.xml
在<dependencies>标签内增加:
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-core</artifactId>
<version>1.2.1</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase</artifactId>
<version>0.94.12</version>
</dependency>
<dependency>
<groupId>org.apache.gora</groupId>
<artifactId>gora-core</artifactId>
<version>0.3</version>
<exclusions>
<exclusion>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-core</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.cxf</groupId>
<artifactId>cxf-rt-frontend-jaxrs</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.gora</groupId>
<artifactId>gora-hbase</artifactId>
<version>0.3</version>
<exclusions>
<exclusion>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-test</artifactId>
</exclusion>
</exclusions>
</dependency>
3、数据建模
mkdir -p gora-demo/src/main/avro
vi gora-demo/src/main/avro/person.json
输入:
{
"type": "record",
"name": "Person",
"namespace":"org.apdplat.demo.gora.generated",
"fields" : [
{"name":"idcard", "type": "string"},
{"name":"name", "type": "string"},
{"name":"age", "type": "string"}
]
}
4、生成JAVA类
bin/gora goracompiler gora-demo/src/main/avro/person.json gora-demo/src/main/java/
5、模型映射
mkdir -p gora-demo/src/main/resources/
vi gora-demo/src/main/resources/gora-hbase-mapping.xml
输入:
<gora-orm>
<table name="Person">
<familyname="basic"/>
<familyname="detail"/>
</table>
<class table="Person"name="org.apdplat.demo.gora.generated.Person"keyClass="java.lang.String">
<field name="idcard"family="basic" qualifier="idcard"/>
<field name="name"family="basic" qualifier="name"/>
<field name="age"family="detail" qualifier="age"/>
</class>
</gora-orm>
6、Gora配置
vi gora-demo/src/main/resources/gora.properties
输入:
gora.datastore.default=org.apache.gora.hbase.store.HBaseStore
gora.datastore.autocreateschema=true
7、Hbase配置
vi gora-demo/src/main/resources/hbase-site.xml
输入:
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl"href="configuration.xsl"?>
<configuration>
<property>
<name>hbase.zookeeper.property.clientPort</name>
<value>2181</value>
</property>
<property>
<name>hbase.zookeeper.quorum</name>
<value>host001</value>
</property>
</configuration>
8、编写PersonManager.java和PersonAnalytics.java
vi gora-demo/src/main/java/org/apdplat/demo/gora/PersonManager.java
输入:
package org.apdplat.demo.gora;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.text.ParseException;
import org.apache.avro.util.Utf8;
import org.apache.gora.query.Query;
import org.apache.gora.query.Result;
import org.apache.gora.store.DataStore;
import org.apache.gora.store.DataStoreFactory;
import org.apache.hadoop.conf.Configuration;
import org.apdplat.demo.gora.generated.Person;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
publicclass PersonManager {
privatestaticfinal Logger log = LoggerFactory.getLogger(PersonManager.class);
private DataStore<String, Person> dataStore;
public PersonManager() {
try{
init();
} catch(IOException ex) {
thrownew RuntimeException(ex);
}
}
privatevoid init() throws IOException {
Configuration conf = new Configuration();
dataStore= DataStoreFactory.getDataStore(String.class, Person.class, conf);
}
privatevoid parse(String input) throws IOException,ParseException, Exception {
log.info("解析文件:" + input);
BufferedReader reader = new BufferedReader(new FileReader(input));
longlineCount = 0;
try{
String line = reader.readLine();
do {
Person person = parseLine(line);
if(person != null) {
//入库
storePerson(person.getIdcard().toString(), person);
}
lineCount++;
line = reader.readLine();
} while(line != null);
} finally{
reader.close();
}
log.info("文件解析完毕. 总人数:" + lineCount);
}
private Person parseLine(String line) throws ParseException {
String[] attrs = line.split(" ");
String idcard = attrs[0];
String name = attrs[1];
String age = attrs[2];
Person person = new Person();
person.setIdcard(new Utf8(idcard));
person.setName(new Utf8(name));
person.setAge(new Utf8(age));
return person;
}
privatevoid storePerson(String key,Person person) throwsIOException, Exception {
log.info("保存人员信息: " + person.getIdcard()+"\t"+person.getName()+"\t"+person.getAge());
dataStore.put(key,person);
}
privatevoid get(String key) throws IOException, Exception{
Person person = dataStore.get(key);
printPerson(person);
}
privatevoid query(String key) throws IOException, Exception{
Query<String, Person> query = dataStore.newQuery();
query.setKey(key);
Result<String, Person> result = query.execute();
printResult(result);
}
privatevoid query(String startKey,String endKey) throwsIOException, Exception {
Query<String, Person> query = dataStore.newQuery();
query.setStartKey(startKey);
query.setEndKey(endKey);
Result<String, Person> result = query.execute();
printResult(result);
}
privatevoid delete(String key) throws Exception {
dataStore.delete(key);
dataStore.flush();
log.info("身份证号码为:" + key + " 的人员信息被删除");
}
privatevoid deleteByQuery(StringstartKey, String endKey) throws IOException, Exception {
Query<String, Person> query = dataStore.newQuery();
query.setStartKey(startKey);
query.setEndKey(endKey);
dataStore.deleteByQuery(query);
log.info("身份证号码从 " + startKey + " 到 " + endKey + " 的人员信息被删除");
}
privatevoid printResult(Result<String, Person> result) throws IOException, Exception {
while(result.next()){
String resultKey =result.getKey();
Person resultPerson =result.get();
System.out.println(resultKey + ":");
printPerson(resultPerson);
}
System.out.println("人数:" + result.getOffset());
}
privatevoid printPerson(Personperson) {
if(person== null){
System.out.println("没有结果");
} else{
System.out.println(person.getIdcard()+"\t"+person.getName()+"\t"+person.getAge());
}
}
privatevoid close() throws IOException, Exception{
if(dataStore != null)
dataStore.close();
}
privatestaticfinal String USAGE = "PersonManager -parse<input_person_file>\n" +
" -get <idcard>\n" +
" -query <idcard>\n" +
" -query <startIdcard> <endIdcard>\n" +
" -delete <idcard>\n" +
" -deleteByQuery <startIdcard> <endIdcard>\n";
publicstaticvoid main(String[] args) throws Exception {
if(args.length < 2) {
System.err.println(USAGE);
System.exit(1);
}
PersonManager manager = new PersonManager();
if("-parse".equals(args[0])){
manager.parse(args[1]);
} elseif("-get".equals(args[0])){
manager.get(args[1]);
} elseif("-query".equals(args[0])){
if(args.length == 2)
manager.query(args[1]);
else
manager.query(args[1], args[2]);
} elseif("-delete".equals(args[0])){
manager.delete(args[1]);
} elseif("-deleteByQuery".equalsIgnoreCase(args[0])){
manager.deleteByQuery(args[1], args[2]);
} else{
System.err.println(USAGE);
System.exit(1);
}
manager.close();
}
}
vi gora-demo/src/main/java/org/apdplat/demo/gora/PersonAnalytics.java
输入:
package org.apdplat.demo.gora;
import java.io.IOException;
import org.apache.avro.util.Utf8;
import org.apache.gora.mapreduce.GoraMapper;
import org.apache.gora.store.DataStore;
import org.apache.gora.store.DataStoreFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apdplat.demo.gora.generated.Person;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
publicclass PersonAnalytics extends Configured implements Tool {
privatestaticfinal Logger log= LoggerFactory
.getLogger(PersonAnalytics.class);
publicstaticclassPersonAnalyticsMapper extends
GoraMapper<String,Person, Text, LongWritable> {
private LongWritable one = new LongWritable(1L);
@Override
protectedvoid map(String key, Person person, Contextcontext)
throws IOException,InterruptedException {
Utf8 age =person.getAge();
context.write(new Text(age.toString()), one);
};
}
publicstaticclassPersonAnalyticsReducer extends
Reducer<Text,LongWritable, Text, LongWritable> {
@Override
protectedvoid reduce(Text key,Iterable<LongWritable> values,
Context context) throws IOException,InterruptedException {
long sum = 0L;
for (LongWritable value :values) {
sum += value.get();
}
context.write(key, new LongWritable(sum));
};
}
public Job createJob(DataStore<String,Person> inStore, int numReducer)
throws IOException {
Job job = new Job(getConf());
job.setJobName("Person Analytics");
log.info("Creating Hadoop Job: " +job.getJobName());
job.setNumReduceTasks(numReducer);
job.setJarByClass(getClass());
GoraMapper.initMapperJob(job,inStore, Text.class,LongWritable.class,
PersonAnalyticsMapper.class, true);
job.setReducerClass(PersonAnalyticsReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
TextOutputFormat
.setOutputPath(job,newPath("person-analytics-output"));
return job;
}
@Override
publicint run(String[] args) throws Exception {
DataStore<String,Person> inStore;
Configuration conf = new Configuration();
if (args.length == 1) {
String dataStoreClass =args[0];
inStore =DataStoreFactory.getDataStore(dataStoreClass,
String.class, Person.class, conf);
} else {
inStore =DataStoreFactory.getDataStore(String.class, Person.class,
conf);
}
Job job = createJob(inStore,2);
boolean success = job.waitForCompletion(true);
inStore.close();
log.info("PersonAnalytics completed with "
+ (success ? "success": "failure"));
return success ? 0 : 1;
}
publicstaticvoidmain(String[] args) throws Exception {
int ret = ToolRunner.run(new PersonAnalytics(),args);
System.exit(ret);
}
}
9、准备数据
vi gora-demo/src/main/resources/persons.txt
输入:
533001198510125839 杨尚川 25
533001198510125840 杨尚华 22
533001198510125841 刘德华 55
533001198510125842 刘亦菲 25
533001198510125843 蔡卓妍 25
533001198510125844 林志玲 22
533001198510125845 李连杰 55
10、在Linux命令行使用maven2编译运行项目
cd gora-demo
mvn clean compile
mvn exec:java -Dexec.mainClass=org.apdplat.demo.gora.PersonManager
mvn exec:java -Dexec.mainClass="org.apdplat.demo.gora.PersonManager" -Dexec.args="-parse src/main/resources/persons.txt"
mvn exec:java -Dexec.mainClass=org.apdplat.demo.gora.PersonAnalytics
cat person-analytics-output/part-r-00000
mvn exec:java -Dexec.mainClass="org.apdplat.demo.gora.PersonManager" -Dexec.args="-get 533001198510125842"
mvn exec:java -Dexec.mainClass="org.apdplat.demo.gora.PersonManager" -Dexec.args="-query 533001198510125844"
mvn exec:java -Dexec.mainClass="org.apdplat.demo.gora.PersonManager" -Dexec.args="-query 533001198510125842 533001198510125845"
mvn exec:java -Dexec.mainClass="org.apdplat.demo.gora.PersonManager" -Dexec.args="-delete 533001198510125840"
mvn exec:java -Dexec.mainClass="org.apdplat.demo.gora.PersonManager" -Dexec.args="-deleteByQuery 533001198510125841 533001198510125842"
mvn exec:java -Dexec.mainClass="org.apdplat.demo.gora.PersonManager" -Dexec.args="-deleteByQuery 533001198510125845 533001198510125846"
mvn exec:java -Dexec.mainClass="org.apdplat.demo.gora.PersonManager" -Dexec.args="-query 533001198510125838 533001198510125848"
11、在windows下使用eclipse编译运行项目
mvn clean package
rm -r target
vi .classpath
删除所有包含path="M2_REPO的行
删除<classpathentry kind="src" path="target/maven-shared-archive-resources"excluding="**/*.java"/>
通过WinSCP把gora-demo传到windows
从http://yangshangchuan.iteye.com/blog/1839784下载修改过的hadoop-core-1.2.1.jar替换文件gora-demo\lib\hadoop-core-1.2.1.jar
将gora-demo导入eclipse
将lib下的所有jar加入构建路径
12、打包项目并提交Hadoop运行
cd gora-demo
mvn clean package
mkdir job
cp -r lib job/lib
cp -r target/classes/* job
hadoop fs -put persons.txt persons.txt
jar -cvf gora-demo.job *
hadoop jar gora-demo.job org.apdplat.demo.gora.PersonAnalytics
相关推荐
大数据工具持久化borm – 大数据的对象持久化[捐赠致谢]( https://github.com/ysc/QuestionAnsweringSystem/wiki/donation )HBase必1、建立Hadoop环境2、建立HBase环境3、启动项目建立读步骤如何切换底层存储为HBase...
2. **Gora持久化**:Gora的持久化机制负责在内存和存储之间转换数据。这通常涉及到定义数据存储的配置,以及如何将数据对象序列化和反序列化。 3. **Spring MVC与Gora集成**:如何在Spring MVC控制器中使用Gora操作...
Tika使用多种现有的开源内容解析项目来实现从多种格式的文件中提取元数据和结构化文本,Gora支持把大数据持久化到多种存储实现,Crawler Commons是一个通用的网络爬虫组件。大数据这个术语最早的引用可追溯到Nutch。...
Tika使用多种现有的开源内容解析项目来实现从多种格式的文件中提取元数据和结构化文本,Gora支持把大数据持久化到多种存储实现,Crawler Commons是一个通用的网络爬虫组件。 大数据这个术语最早的引用可追溯到Nutch...
Apache Gora项目 Apache Gora开源框架提供了内存数据模型和大数据持久性。 Gora支持持久存储到列存储,键值存储,文档存储和RDBMS,并通过广泛的Apache Hadoop MapReduce,Apache Spark,Apache Flink和Apache Pig...
大数据里的ORM --Gora使用于各种数据库
大数据相关框架讲义,包括:hadoop,hbase,pig,hive,mahout,storm,sqoop,spark,gora等
大数据图标大全中列举了...56. **Gora**: 大数据持久化框架,支持多种数据存储后端,如HBase、Cassandra等。 这些工具和框架共同构成了大数据生态系统,帮助企业及组织处理、分析海量数据,提升业务洞察力和决策能力。
Gora通过为用户提供易于使用的内存中数据模型以及具有特定于数据存储的映射并内置Apache Hadoop支持的大数据框架持久性来填补这一空白。https://mirrors.tuna.tsinghua.edu.cn/apache/gora/0.9/
- **Apache Gora**:内存中的数据模型框架,支持多种数据源的持久化。 - **Apache Hama**:用于BSP计算,适合矩阵运算和图算法。 - **Apache MapReduce**:Hadoop的原始分布式计算模型,处理大规模数据集。 4. *...
Gora是一个类似Hibernate的ORM框架,但是不只是支持关系数据库,更重要支持NoSQL之类大数据的存储。最新的Gora并不支持Mysql,本包是Gora支持Mysql的最新版本,需要的童鞋可以下载使用。
Tika使用多种现有的开源内容解析项目来实现从多种格式的文件中提取元数据和结构化文本,Gora支持把大数据持久化到多种存储实现,Crawler Commons是一个通用的网络爬虫组件。</p><p>大数据这个术语最早的引用可追溯到...
Tika使用多种现有的开源内容解析项目来实现从多种格式的文件中提取元数据和结构化文本,Gora支持把大数据持久化到多种存储实现,Crawler Commons是一个通用的网络爬虫组件。</p><p>大数据这个术语最早的引用可追溯到...
Gora,全称为Generic Object Relational Mapping,是一种针对大数据的开放源码对象持久层框架。它为NoSQL数据存储提供了一种统一的ORM(对象关系映射)解决方案。Gora的设计目标是允许开发者以面向对象的方式操作...
Gora 是一个开源的持久化层,支持多种分布式数据存储系统,如 HBase、Cassandra 和 MongoDB,提供了统一的接口来访问这些存储。 4. **Segment 分割**: 网页抓取过程中,Nutch 将数据分割成多个 Segment,每个 ...
gora-gradle-插件用于处理 文件的插件概述从描述符(.avsc 文件)生成 java 类型。 该插件会将所有模式读取和文件生成委托给 GoraCompiler。配置在你的项目中配置插件如下: buildscript { repositories { jcenter()...