erlang抽象码与basho的protobuf（五）执行过程 -

wqtn22

浏览: 102087 次
性别:
来自: 杭州

最近访客更多访客>>

daotadefeng

igamebox

westsum

mmoo

博主相关

博客

微博

相册

留言

关于我

文章分类

社区版块

存档分类

erlang抽象码与basho的protobuf（五）执行过程

博客分类：

application

上文介绍了代码生成过程，成功的从erlang抽象码生成了erlang源文件，抽象码的替换步骤很少，这主要得益于模板文件pokemon_pb.erl的设计。这里将继续分析pokemon_pb.erl的执行过程，从中学习它的编程技巧。

编码过程：

pokemon_pb.erl

encode_pikachu(Record) when is_record(Record, pikachu) ->

encode(pikachu, Record).

encode(pikachu, Record) ->

iolist_to_binary(iolist(pikachu, Record)).

iolist(pikachu, Record) ->

[pack(1, required, with_default(Record#pikachu.abc, none), string, [])].

主要的编码工作交给了pack函数，因此iolist函数仅仅需要保存message各个域的域编号，操作类型（required等），默认值，域类型（string等）。

pack(_, optional, undefined, _, _) -> [];

pack(_, repeated, undefined, _, _) -> [];

pack(_, repeated_packed, undefined, _, _) -> [];

pack(_, repeated_packed, [], _, _) -> [];

pack(FNum, required, undefined, Type, _) ->

exit({error, {required_field_is_undefined, FNum, Type}});

pack(_, repeated, [], _, Acc) ->

lists:reverse(Acc);

pack(FNum, repeated, [Head|Tail], Type, Acc) ->

pack(FNum, repeated, Tail, Type, [pack(FNum, optional, Head, Type, [])|Acc]);

pack(FNum, repeated_packed, Data, Type, _) ->

protobuffs:encode_packed(FNum, Data, Type);

pack(FNum, _, Data, _, _) when is_tuple(Data) ->

[RecName|_] = tuple_to_list(Data),

protobuffs:encode(FNum, encode(RecName, Data), bytes);

pack(FNum, _, Data, Type, _) when Type=:=bool;Type=:=int32;Type=:=uint32;

Type=:=int64;Type=:=uint64;Type=:=sint32;

Type=:=sint64;Type=:=fixed32;Type=:=sfixed32;

Type=:=fixed64;Type=:=sfixed64;Type=:=string;

Type=:=bytes;Type=:=float;Type=:=double ->

protobuffs:encode(FNum, Data, Type);

pack(FNum, _, Data, Type, _) when is_atom(Data) ->

protobuffs:encode(FNum, enum_to_int(Type,Data), enum).

enum_to_int(pikachu,value) ->

pack函数有五个地方值得注意：

1.对于optional的域，其值存在与否都不会有影响；

2.对于required的域，若值不存在，则应该引发异常；

3.对于repeated的域，由于其值是一个列表，因此在编码时，应该对列表的每一个元素都进行pack；

4.对于内建类型，直接调用protobuffs:encode进行编码，protobuffs:encode编码方式是protocol buffers编码方式的erlang实现，分析起来也比较枯燥，读者可以自行查阅；

5.对于枚举类型，需要先通过enum_to_int将枚举类型替换为具体的值，这个函数也是通过抽象码替换过程生成的，message的每个枚举定义都有一个独立的值。

解码过程：

decode_pikachu(Bytes) when is_binary(Bytes) ->

decode(pikachu, Bytes).

decode(pikachu, Bytes) when is_binary(Bytes) ->

Types = [{1, abc, int32, []}, {2, def, double, []}],

Decoded = decode(Bytes, Types, []),

to_record(pikachu, Decoded).

decode/2保存了解码时遇到的每一个域的处理方法，这相当于一个简单的语法定义，具体的解析过程由decode/3完成。

decode(<<>>, _, Acc) -> Acc;

decode(Bytes, Types, Acc) ->

{ok, FNum} = protobuffs:next_field_num(Bytes),

case lists:keysearch(FNum, 1, Types) of

{value, {FNum, Name, Type, Opts}} ->

{Value1, Rest1} =

case lists:member(is_record, Opts) of

true ->

{{FNum, V}, R} = protobuffs:decode(Bytes, bytes),

RecVal = decode(list_to_atom(string:to_lower(atom_to_list(Type))), V),

{RecVal, R};

false ->

case lists:member(repeated_packed, Opts) of

true ->

{{FNum, V}, R} = protobuffs:decode_packed(Bytes, Type),

{V, R};

false ->

{{FNum, V}, R} = protobuffs:decode(Bytes, Type),

{unpack_value(V, Type), R}

end

end,

case lists:member(repeated, Opts) of

true ->

case lists:keytake(FNum, 1, Acc) of

{value, {FNum, Name, List}, Acc1} ->

decode(Rest1, Types, [{FNum, Name, lists:reverse([int_to_enum(Type,Value1) | lists:reverse(List)])} | Acc1]);

false ->

decode(Rest1, Types, [{FNum, Name, [int_to_enum(Type,Value1)]}|Acc])

end;

false ->

decode(Rest1, Types, [{FNum, Name, int_to_enum(Type,Value1)}|Acc])

end;

false ->

exit({error, {unexpected_field_index, FNum}})

end.

int_to_enum(_,Val) ->

Val.

这个函数是最大的一个函数，Types即是语法定义，而该函数相当于一个词法分析器，通过protobuffs:decode/2得到的每一个“符号”，都将根据Types的语法定义进行相应的动作：

1.对于嵌套定义的message，将进行递归类型解码；

2.对于repeated的域，将合并到原先找到的域的值列表中；

3.对于枚举类型，需要先通过int_to_enum将枚举值替换为具体的atom，这个函数也是通过抽象码替换过程生成的，message的每个枚举定义都有一个独立的atom。

4.对于标量类型，将直接记录到message的域值中。

最终，decode/3将生成一个列表，记录了每一个message的各个域，一个message的所有域放置在一个列表内。

decode(pikachu, Bytes) when is_binary(Bytes) ->

Types = [{1, abc, int32, []}, {2, def, double, []}],

Decoded = decode(Bytes, Types, []),

to_record(pikachu, Decoded).

to_record(pikachu, DecodedTuples) ->

lists:foldl(

fun({_FNum, Name, Val}, Record) ->

set_record_field(record_info(fields, pikachu), Record, Name, Val)

end, #pikachu{}, DecodedTuples).

set_record_field(Fields, Record, Field, Value) ->

Index = list_index(Field, Fields),

erlang:setelement(Index+1, Record, Value).

list_index(Target, List) -> list_index(Target, List, 1).

list_index(Target, [Target|_], Index) -> Index;

list_index(Target, [_|Tail], Index) -> list_index(Target, Tail, Index+1);

list_index(_, [], _) -> 0.

to_record是一个语义分析过程，根据Types定义，遍历decode/3生成的语法结构，提取每个message的每个域及其值的绑定关系，然后填充到具体的record中去，这个填充的过程也较为巧妙，由于record实质也是一个元组，to_record将遍历一个message的域值绑定列表，由于每一个域在proto文件中都有顺序编号，取出域值绑定关系时也就得到了域在message里面的未知，继而也就得到了域在record元组中的位置，set_record_field即用于填充元组指定字段的值。

至此，erlang的protocol buffers实现protobuffs就已经分析完毕了，其设计思路较为新颖，从中可以学到很多编程技巧，感兴趣的读者可以深挖并应用。

抽象码是连接erlang代码与erlang虚拟机代码的纽带，抽象码的执行过程即代表了虚拟机的执行过程，分析抽象码可以有效帮助我们理解erlang代码的执行方式、性能等，利用抽象码替换（可参考smerl的实现），可以实现erlang代码trace、profile插桩，帮助我们找到代码的bug，调试代码的性能。

分享到：

erlang NIF部分接口实现（一）加载过程及编 ... | erlang抽象码与basho的protobuf（四）代码 ...

2012-07-07 22:54
浏览 3506
评论(0)
分类:互联网
查看更多

发表评论

您还没有登录,请您登录后再发表评论

最近访客更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论

erlang抽象码与basho的protobuf（五）执行过程

评论

发表评论

相关推荐

最近访客 更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论

erlang抽象码与basho的protobuf（五）执行过程

评论

发表评论

相关推荐

erlang抽象码与basho的protobuf（四）代码生成原理之代码生成

erlang抽象码与basho的protobuf（三）代码生成原理之语义分析

erlang抽象码与basho的protobuf（二）代码生成原理之词法与语法分析

erlang抽象码与basho的protobuf（一）使用

最近访客更多访客>>