浏览 4468 次
锁定老帖子 主题:头文字D,Spider
精华帖 (0) :: 良好帖 (0) :: 新手帖 (0) :: 隐藏帖 (0)
|
|
---|---|
作者 | 正文 |
发表时间:2007-10-12
越来越爱D了。 import std.stdio; import std.string; import std.conv; import std.socket; import std.socketstream; import std.stream; import std.regexp; import std.thread; import std.c.time; char[][] g_queue; int g_task_amount=0; const char[] homepage="http://mobile.younet.com/"; const ushort max_thread=20; alias std.string.find strfind; char[] getHTML(char[] url){ char[] domain,html; ubyte[1024] buf; ushort port=80; if(!isURL(url))return null; int i=strfind(url,"://")+3; url=url[i..$]; int j=strfind(url,":"); int e=strfind(url,"/"); if(e<0){ e=url.length; } if(j>0){ port=toUshort(url[j+1..e]); domain=url[0..j]; } else{ domain=url[0..e]; } if(e==url.length){ url="/"; } else{ url=url[e..$]; } debug(younet){ writefln(toString(port) ~" "~ domain ~" "~ url); } Socket sock=new TcpSocket(new InternetAddress(domain,port)); Stream ss=new SocketStream(sock); ss.writeString("GET " ~ url ~ " HTTP/1.0\r\n" "Host: " ~ domain ~ "\r\n" "Connection: close\r\n" "Referer: http://" ~ domain ~ url ~ "\r\n" "\r\n\r\n\r\n\r\n"); int recv_amount=ss.read(buf); while(recv_amount>0){ html ~= cast(char[])buf[0..recv_amount]; recv_amount=ss.read(buf); } ss.close(); sock.close(); char[][] mc=RegExp("(URL=|Location: )(.*?)[\"\r]").match(html); if(mc.length==3){ char[] new_location=mc[2]; html=getHTML(new_location); return html; } int start_pos=strfind(html,"\r\n\r\n") ; html=html[start_pos+4 .. $]; return html[0..$]; } int crawl(void * p){ while(true){ char[] url,html; synchronized{ if(g_queue.length==0) sleep(1); if(g_queue.length==0) break; url=g_queue[0]; writefln("begin:" ~ url); if(g_queue.length>0) g_queue=g_queue[1..$]; } try{ html=getHTML(url); } catch(Exception ex){ synchronized{ if(g_task_amount>0) g_task_amount-=1; } writefln(ex); writefln("failed:" ~ url); writefln("remains" ~ toString(g_task_amount)); continue; } debug(younet){ printf(toStringz("!!!" ~ html[0..200])); } if(strfind(url,"files/list")<0){ synchronized{ foreach(m;RegExp("files/list_\\d+\\.html").search(html)){ g_queue ~= homepage ~ m.match(0); g_task_amount+=1; } g_task_amount-=1; } writefln("done:" ~ url); debug(younet){ writefln(g_queue); } } else{ writefln("done:" ~ url); synchronized{ g_task_amount-=1; writefln("remains" ~ toString(g_task_amount)); } } } return 1; } int main(char[][] args){ //writefln("Hello"); g_queue ~= homepage; g_task_amount+=1; Thread[] tds; for(int i=0;i<max_thread;i++){ Thread t=new Thread(&crawl,null); t.start(); tds ~= t; } sleep(5); while(true){ sleep(1); if(g_task_amount<=0)break; } return 0; } 声明:ITeye文章版权属于作者,受法律保护。没有作者书面许可不得转载。
推荐链接
|
|
返回顶楼 | |
发表时间:2008-09-23
编译提示如下错误:
OPTLINK (R) for Win32 Release 8.00.1 Copyright (C) Digital Mars 1989-2004 All rights reserved. D:\d\dmd\bin\..\lib\phobos.lib(socket) Error 42: Symbol Undefined _getprotobyname@4 D:\d\dmd\bin\..\lib\phobos.lib(socket) Error 42: Symbol Undefined _getprotobynumber@4 D:\d\dmd\bin\..\lib\phobos.lib(socket) Error 42: Symbol Undefined _getservbyname@8 D:\d\dmd\bin\..\lib\phobos.lib(socket) Error 42: Symbol Undefined _getservbyport@8 D:\d\dmd\bin\..\lib\phobos.lib(socket) Error 42: Symbol Undefined _WSAGetLastError@0 D:\d\dmd\bin\..\lib\phobos.lib(socket) Error 42: Symbol Undefined _gethostbyname@4 D:\d\dmd\bin\..\lib\phobos.lib(socket) Error 42: Symbol Undefined _gethostbyaddr@12 D:\d\dmd\bin\..\lib\phobos.lib(socket) Error 42: Symbol Undefined _inet_addr@4 D:\d\dmd\bin\..\lib\phobos.lib(socket) Error 42: Symbol Undefined _inet_ntoa@4 D:\d\dmd\bin\..\lib\phobos.lib(socket) Error 42: Symbol Undefined _ioctlsocket@12 D:\d\dmd\bin\..\lib\phobos.lib(socket) Error 42: Symbol Undefined _getsockopt@20 D:\d\dmd\bin\..\lib\phobos.lib(socket) Error 42: Symbol Undefined _bind@12 D:\d\dmd\bin\..\lib\phobos.lib(socket) Error 42: Symbol Undefined _connect@12 D:\d\dmd\bin\..\lib\phobos.lib(socket) Error 42: Symbol Undefined _listen@8 D:\d\dmd\bin\..\lib\phobos.lib(socket) Error 42: Symbol Undefined _accept@12 D:\d\dmd\bin\..\lib\phobos.lib(socket) Error 42: Symbol Undefined _closesocket@4 D:\d\dmd\bin\..\lib\phobos.lib(socket) Error 42: Symbol Undefined _shutdown@8 D:\d\dmd\bin\..\lib\phobos.lib(socket) Error 42: Symbol Undefined _getpeername@12 D:\d\dmd\bin\..\lib\phobos.lib(socket) Error 42: Symbol Undefined _getsockname@12 D:\d\dmd\bin\..\lib\phobos.lib(socket) Error 42: Symbol Undefined _send@16 D:\d\dmd\bin\..\lib\phobos.lib(socket) Error 42: Symbol Undefined _sendto@24 D:\d\dmd\bin\..\lib\phobos.lib(socket) Error 42: Symbol Undefined _recv@16 D:\d\dmd\bin\..\lib\phobos.lib(socket) Error 42: Symbol Undefined _recvfrom@24 D:\d\dmd\bin\..\lib\phobos.lib(socket) Error 42: Symbol Undefined _setsockopt@20 D:\d\dmd\bin\..\lib\phobos.lib(socket) Error 42: Symbol Undefined _socket@12 D:\d\dmd\bin\..\lib\phobos.lib(socket) Error 42: Symbol Undefined _WSAStartup@8 D:\d\dmd\bin\..\lib\phobos.lib(socket) Error 42: Symbol Undefined _WSACleanup@0 --- errorlevel 27 |
|
返回顶楼 | |
发表时间:2008-10-04
缺少引用的库了.
试试 dmd Spider.d ws2_32.lib |
|
返回顶楼 | |