递归和尾递归汇编层面的差别

mryufeng

浏览: 989526 次
性别:
来自: 广州

最近访客更多访客>>

bekda

wszt

antsmall

funing

博主相关

博客

微博

相册

留言

关于我

文章分类

社区版块

存档分类

博客分类：

erlang

Erlang EXT F#C C++

erlang的标准用法是尽可能的把函数调用写出尾递归的方式，实际的结果靠参数传递。尾递归的方式对进程的堆栈使用很小，只要一个WORD, 但是非尾递归就要看递归的层数，如果数量很大，会把堆栈撑的很大。我们在汇编一级看下如何实现的：

root@nd-desktop:~# cat tailcall.erl
-module(tailcall).
-export([start/1]).
-compile(export_all).

start(N)->
    X = loop(N),
    Y = tail_loop(N),
    X = Y,
    done.

loop(0)->
    1;
loop(N) when N >0 ->
   N * loop(N-1).

tail_loop(N)->
    tail_loop2(N, 1).
tail_loop2(0, R)->
    R;
tail_loop2(N, R) ->
    tail_loop2(N-1, N *R).

root@nd-desktop:~# erlc +"'S'" tailcall.erl
root@nd-desktop:~# cat tailcall.S
{module, tailcall}. %% version = 0

{exports, [{loop,1},
           {module_info,0},
           {module_info,1},
           {start,1},
           {tail_loop,1},
           {tail_loop2,2}]}.

{attributes, []}.

{labels, 16}.

{function, start, 1, 2}.
{label,1}.
    {func_info,{atom,tailcall},{atom,start},1}.
{label,2}.
    {allocate,1,1}.
    {move,{x,0},{y,0}}.
    {call,1,{f,5}}.
    {move,{x,0},{x,1}}.
    {move,{y,0},{x,0}}.
    {move,{x,1},{y,0}}.
    {call,1,{f,8}}.
    {test,is_eq_exact,{f,3},[{x,0},{y,0}]}.
    {move,{atom,done},{x,0}}.
    {deallocate,1}.
    return.
{label,3}.
    {badmatch,{x,0}}.

{function, loop, 1, 5}.
{label,4}.
    {func_info,{atom,tailcall},{atom,loop},1}.
{label,5}.
    {test,is_eq_exact,{f,6},[{x,0},{integer,0}]}.
    {move,{integer,1},{x,0}}.
    return.
{label,6}.
    {test,is_lt,{f,4},[{integer,0},{x,0}]}.
    {allocate_zero,1,1}.
%% 主要是这条 allocate_zero 这个指令把当前的调用栈保存同时分配个参数空间
%% 对应 AllocateZero 这个操作

    {gc_bif,'-',{f,0},1,[{x,0},{integer,1}],{x,1}}.
    {move,{x,0},{y,0}}.
    {move,{x,1},{x,0}}.
    {call,1,{f,5}}.
%% call opcode, 再次调用堆栈层数+1
    {gc_bif,'*',{f,0},1,[{y,0},{x,0}],{x,0}}.
    {deallocate,1}.
%% 恢复调用栈
%% 对应 deallocate_I操作

    return.

{function, tail_loop, 1, 8}.
{label,7}.
    {func_info,{atom,tailcall},{atom,tail_loop},1}.
{label,8}.
    {move,{integer,1},{x,1}}.
    {call_only,2,{f,10}}.

{function, tail_loop2, 2, 10}.
{label,9}.
    {func_info,{atom,tailcall},{atom,tail_loop2},2}.
{label,10}.
    {test,is_eq_exact,{f,11},[{x,0},{integer,0}]}.
    {move,{x,1},{x,0}}.
    return.
{label,11}.
    {gc_bif,'-',{f,0},2,[{x,0},{integer,1}],{x,2}}.
    {gc_bif,'*',{f,0},3,[{x,0},{x,1}],{x,1}}.
    {move,{x,2},{x,0}}.
    {call_only,2,{f,10}}.
%% 调用 call_only opcode 没有建立堆栈的过程

{function, module_info, 0, 13}.
{label,12}.
    {func_info,{atom,tailcall},{atom,module_info},0}.
{label,13}.
    {move,{atom,tailcall},{x,0}}.
    {call_ext_only,1,{extfunc,erlang,get_module_info,1}}.

{function, module_info, 1, 15}.
{label,14}.
    {func_info,{atom,tailcall},{atom,module_info},1}.
{label,15}.
    {move,{x,0},{x,1}}.
    {move,{atom,tailcall},{x,0}}.
    {call_ext_only,2,{extfunc,erlang,get_module_info,2}}.

我们在beam_emu.c中可以看到：

#define AllocateZero(Ns, Live)             \
do { Eterm* ptr;                          \
      int i = (Ns);                        \
      AH(i, 0, Live);                      \
      for (ptr = E + i; ptr > E; ptr--) { \
         make_blank(*ptr);                 \
     }                                     \
} while (0)

#define AH(StackNeed, HeapNeed, M) \
do { \
     int needed; \
     needed = (StackNeed) + 1; \
     if (E - HTOP < (needed + (HeapNeed))) { \
           SWAPOUT; \
           reg[0] = r(0); \
           PROCESS_MAIN_CHK_LOCKS(c_p); \
           FCALLS -= erts_garbage_collect(c_p, needed + (HeapNeed), reg, (M)); \
           PROCESS_MAIN_CHK_LOCKS(c_p); \
           r(0) = reg[0]; \
           SWAPIN; \
     } \
     E -= needed; \
     SAVE_CP(E); \
} while (0)

#define SAVE_CP(X)                              \
   do {                                         \
      *(X) = make_cp(c_p->cp);                  \
      c_p->cp = 0;                              \
   } while(0)

#define RESTORE_CP(X)           SET_CP(c_p, cp_val(*(X)))

#define D(N)             \
   RESTORE_CP(E);      \
     E += (N) + 1;

OpCase(deallocate_I): {
     Eterm* next;

     PreFetch(1, next);
     D(Arg(0));
     NextPF(1, next);
}

结论是：
尾递归的效率要比递归的好很多，包括堆栈的使用和参数的分配，就是代码写起来不那么直观。

分享到：

erlang对行读的支持 | R13B02的线程stack size设置

2009-09-24 22:11
浏览 3067
评论(11)
论坛回复 / 浏览 (11 / 7047)
分类:编程语言
查看更多

11 楼 EQualizer 2009-10-01

The Book 写道

编译尾递归的函数可以使一系列语句最后的一个函数调用可以被替换为一个简单的跳转指令，指向被调用函数的开头。这就意味着一个尾递归的函数可以无限循环而不需要消耗栈空间。

10 楼 mryufeng 2009-09-25

编译器可以做些明显的可以转换的但是人工又懒的去转的那种就可以了。我很喜欢erlang factory推荐的那个tidier 很强大，有人工的智能来帮你做erlang的最佳实践，不知道什么时候可以release给大众。

9 楼 t0uch 2009-09-25

Trustno1 写道

weiqingfei 写道

编译器能不能考虑把非尾递归优化为尾递归？

noway

并不是所有的递归都能转化成尾递归吧

8 楼 mryufeng 2009-09-25

尾递归和递归在erlang vm里占用的空间不同一个使用堆一个使用栈但是都在一个进程的空间里，所以效率差不多。只是使用heap会很灵活效率更高些些。

7 楼 argan 2009-09-25

受教了，新版本的erlang在有些情况下，非尾递归的性能不一定差的，所以有时候不写尾递归也无所谓

6 楼 mryufeng 2009-09-25

大部分是尾递归呀部分的数据集很小也无所谓的

5 楼 Trustno1 2009-09-25

xvyu 写道

但是erlang的很多标准库都没有用尾递归，比方lists，这是为什么呢

good enough.

4 楼 xvyu 2009-09-25

但是erlang的很多标准库都没有用尾递归，比方lists，这是为什么呢

3 楼 litaocheng 2009-09-25

开始只是知道要怎么做，在老大的剖析下知道了为什么这么做...
继续啊！

2 楼 Trustno1 2009-09-25

weiqingfei 写道

编译器能不能考虑把非尾递归优化为尾递归？

noway

1 楼 weiqingfei 2009-09-25

编译器能不能考虑把非尾递归优化为尾递归？

发表评论

您还没有登录,请您登录后再发表评论

最近访客更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论