Flume使用范例

raymond.chen

浏览: 1451044 次
性别:
来自: 广州

最近访客更多访客>>

林祥纤

whzresponse

loginboot

vicento4

博主相关

博客

微博

相册

留言

关于我

文章分类

社区版块

存档分类

博客分类：

大数据

范例主要的处理流程说明如下：

1、web应用的日志信息实时输出到ActiveMQ

2、Flume的Source从MQ指定队列中获取消息，并提交到内存型Channel中

3、自定义Sink从Channel提取event，对event进行转换处理，并写入到Oracle数据库

开发自定义的Sink，编译打包成jar包，并上传到/opt/apache-flume-1.6.0-bin/plugins.d/flume-oracle-sink/lib，自定义sink的存放目录规范请查阅官方相关文档。

自定义Sink的代码骨架如下：

public class OracleSink extends AbstractSink implements Configurable {
	private int batchSize;
	private String charset;
	
	public void configure(Context context) {
		this.batchSize = context.getInteger(OracleSinkConfiguration.BATCH_SIZE, new Integer(OracleSinkConfiguration.BATCH_SIZE_DEFAULT)).intValue();
		this.charset = context.getString(OracleSinkConfiguration.CHARSET, OracleSinkConfiguration.CHARSET_DEFAULT);
	}
	
	public synchronized void start() {
		super.start();
	}

	public Status process() throws EventDeliveryException {
	    Sink.Status result = Sink.Status.READY;
	    Channel channel = getChannel();
	    Transaction transaction = null;
	    Event event = null;
	    
	    try{
	    	transaction = channel.getTransaction();
	      	transaction.begin();

	      	for(int i=0; i<this.batchSize; i++){
		        event = channel.take();
		        //Map headers = event.getHeaders();
		        
	      		if(event == null){
	      			break;
	      		}else{
		      		byte[] eventBody = event.getBody();
					String bodyString = new String(eventBody, this.charset);
					
					//something code here
	      		}
	      	}
	        
	        transaction.commit();
	    } catch (Exception ex) {
	    	System.out.println(ex.toString());x
	    	result = Sink.Status.BACKOFF;
	    	if (transaction != null) {
		        try {
		        	transaction.rollback();
		        } catch (Exception e) {
		        	throw Throwables.propagate(e);
		        }
	    	}
	    } finally {
	    	if (transaction != null) {
	    		transaction.close();
	    	}
	    }
	    
	    return result;
	}
	
	public synchronized void stop() {
		super.stop();
	}
}

在Flume根目录的config文件夹内新建一个flume config文件，内容如下：

# Name the components on this agent
agent.sources = source1
agent.sinks = sink1 sink2
agent.channels = channel1


#source
agent.sources.source1.type = jms
agent.sources.source1.initialContextFactory = org.apache.activemq.jndi.ActiveMQInitialContextFactory
agent.sources.source1.connectionFactory = ConnectionFactory
agent.sources.source1.providerURL = failover://(tcp://192.168.247.2:61616?tcpNoDelay=true)
agent.sources.source1.destinationType = QUEUE
agent.sources.source1.destinationName = my_queue
agent.sources.source1.batchSize = 200
agent.sources.source1.pollTimeout = 2000

 
#channel
#agent.channels.channel1.type = file   
#agent.channels.channel1.checkpointDir = /tmp/flume/loadcheckpoint  
#agent.channels.channel1.dataDirs = /tmp/flume/loaddata
#agent1.channels.channel1.capacity = 1000
#agent1.channels.channel1.transactionCapactiy = 100

agent.channels.channel1.type = memory
agent.channels.channel1.capacity = 10000
agent.channels.channel1.transactionCapacity = 200


#sinkgroups
agent.sinkgroups = g1
#agent.sinkgroups.g1.sinks = sink1 sink2
#agent.sinkgroups.g1.processor.type = failover
#agent.sinkgroups.g1.processor.priority.sink1 = 10
#agent.sinkgroups.g1.processor.priority.sink2 = 5
#agent.sinkgroups.g1.processor.maxpenalty = 10000

agent.sinkgroups.g1.sinks = sink1 sink2
agent.sinkgroups.g1.processor.type = load_balance
agent.sinkgroups.g1.processor.backoff = true
agent.sinkgroups.g1.processor.selector = round_robin

# Describe the sink
agent.sinks.sink1.type = com.cjm.flume.oraclesink.OracleSink
agent.sinks.sink1.username = cjm
agent.sinks.sink1.password = 111

agent.sinks.sink2.type = com.cjm.flume.oraclesink.OracleSink
agent.sinks.sink2.username = cjm
agent.sinks.sink2.password = 222
agent.sinks.sink2.batchSize= 100
agent.sinks.sink2.charset = UTF-8


# Bind the source and sink to the channel
agent.sources.source1.channels = channel1
agent.sinks.sink1.channel = channel1
agent.sinks.sink2.channel = channel1

启动ActiveMQ

启动Flume

cd /opt/apache-flume-1.6.0-bin

bin/flume-ng agent -c conf -f conf/flume-agent.conf -n agent -Dflume.root.logger=INFO,console

后台服务方式启动 nohup bin/flume-ng agent -c conf -f conf/flume-agent.conf -n agent >flume.log &

模拟发送信息到MQ

public class UMProducer {
	private MessageProducer producer = null;
	private Connection connection = null;
	private Session session = null;
	private String data = null;
	
	public void start(){
		try{
			InputStream in = UMProducer.class.getResourceAsStream("/data.txt");
			this.data = IOUtils.toString(in);
			
			ConnectionFactory connectionFactory = new ActiveMQConnectionFactory(  
	                ActiveMQConnection.DEFAULT_USER,  
	                ActiveMQConnection.DEFAULT_PASSWORD,  
	                "failover://(tcp://192.168.247.2:61616?tcpNoDelay=true)"); 

			connection = connectionFactory.createConnection();  
			((ActiveMQConnection)connection).setUseAsyncSend(true);
			connection.start(); 
			
			session = connection.createSession(Boolean.TRUE, Session.CLIENT_ACKNOWLEDGE);
			Destination destination = session.createQueue("my_queue");
			
			producer = session.createProducer(destination);
			producer.setDeliveryMode(DeliveryMode.NON_PERSISTENT);
			producer.setTimeToLive(1000 * 60 * 60 * 24);
			
			while(true){
				TextMessage message = session.createTextMessage(this.data);
		        producer.send(message);
			}
			
		}catch(Exception ex){
			ex.printStackTrace();
		}
	}
	
	public void stop(){
		try{
			if(session != null) {
				session.close();  
			}
			
			if(connection != null) {
				connection.close();
			}
			
			if(producer != null){
				producer.close();
			}
		}catch(Exception ex){
			ex.printStackTrace();
		}
	}
	
	public static void main(String[] args) {
		UMProducer producer = new UMProducer();
		producer.start();
	}
}