- 浏览: 39427 次
- 性别:
- 来自: 南京
最新评论
-
kely39:
不可能让所有用户都去设置IE8安全级别吧,这方法不明智。
js限制本地上传文件大小(支持ie6,7,8 firefox 谷歌) -
ldl_xz:
http://www.9958.pw/post/wenku 参 ...
php利用openoffice把doc转换为PDF, txt or HTML
自己以前写的一个采集程序比较简单
<?php include('global.php'); $conn = new db(); if(isset($_GET['company'])){ $company = $_GET['company']; }else{ $company = ''; } if(isset($_GET['memberid'])){ $memberid = $_GET['memberid']; }else{ $memberid = ''; } if(isset($_GET['zj_num'])){ $zj_num = $_GET['zj_num']; }else{ $zj_num = ''; } if(isset($_GET['zj_code'])){ $zj_code = $_GET['zj_code']; }else{ $zj_code = ''; } $url="http://zsb.house365.com/main.php?infotype=0&price=0&buildarea=0&district=0&keyword=&order_=1&page=".$zj_num."&agentcode=".$zj_code."&pkind=selllist&roomtype=&topic=&order="; $text = @file_get_contents($url); preg_match_all('/<td align="center" valign="middle"><a href=\'(.*?)\' target=\'_blank\' title="(.*?)"><img src="http:\/\/sell.house365.com\/images\/sellesflist_12.gif" width="77" height="18" border="0" \/><\/a><\/td>/i',$text,$row); $num_all = 0; $len = count($row[1]); //$len = 1; for($i=0;$i<$len;$i++) { $mrent = array(); $url1 = $row[1][$i]; $text1 = @file_get_contents($url1); $mrent['coltype'] = 'second'; $mrent['menuid'] = '10'; $mrent['memberid'] = $memberid; $mrent['memberprop'] = '2'; $mrent['shangquan'] = '0'; $mrent['infotype'] = 'sale'; $mrent['jz'] = '1'; $mrent['menuid'] ='10'; $mrent['author'] = $company;//iconv("UTF-8", "GBK", $company); //房源名称 preg_match_all('/<td colspan="2" class="fy_name"><h1 style="text-align:center;font-size:20px;font-family:黑体;font-weight:normal">(.*?)<\/h1><\/td>/i',$text1,$name_arr); $louopan = trim($name_arr[1][0]); $mrent['loupan'] =iconv( "GBK","UTF-8", $louopan); //售价 preg_match_all('/<td width="215" class="dash_line">售价:<span>(.*?)<\/span> 万元/i',$text1,$rentall_arr); $rentall = trim($rentall_arr[1][0]); $mrent['rentall'] = $rentall; //面积 preg_match_all('/<td class="dash_line">面积:<span>(.*?)<\/span> 平方米<\/td>/i',$text1,$area_arr); $area = trim($area_arr[1][0]); $mrent['area'] = $area; //单价 preg_match_all('/<td class="dash_line">单价:(\d*) 元\/平方米<\/td>/i',$text1,$rentavg_arr); $rentaverage = trim($rentavg_arr[1][0]); $mrent['rentaverage'] =$rentaverage; //户型 preg_match_all('/<td class="dash_line">户型:(\d)房(\d)厅(\d)卫(\d)阳台<\/td>/i',$text1,$type_arr); $shi = $type_arr[1][0]; $ting = $type_arr[2][0]; $wei = $type_arr[3][0]; $tai = $type_arr[4][0]; $mrent['shi'] = $shi; $mrent['ting'] = $ting; $mrent['wei'] = $wei; $mrent['tai'] = $tai; //楼层 preg_match_all('/<td class="dash_line">楼层:(\d*)楼,总高(\d*)层<\/td>/i',$text1,$floor_arr); $nowfloor = trim($floor_arr[1][0]); $allfloors = trim($floor_arr[2][0]); $mrent['nowfloor'] = $nowfloor; $mrent['allfloors'] = $allfloors; //年代 preg_match_all('/<td class="dash_line">年代:(\d*)年<\/td>/i',$text1,$buildtime_arr); $buildtime = trim($buildtime_arr[1][0]); $mrent['buildtime'] = $buildtime; //装修 preg_match_all('/<td class="dash_line">装修:(.*?)<\/td>/i',$text1,$upholster_arr); $upholster = trim($upholster_arr[1][0]); switch($upholster){ case "毛坯": $fitment = 1; break; case "简装": $fitment = 2; break; case "精装": $fitment = 3; break; case "豪华装": $fitment = 4; break; default: $fitment = 1; break; } $mrent['fitment'] = $fitment; //朝向 preg_match_all('/<td class="dash_line">朝向:(.*?)<\/td>/i',$text1,$face_arr); $face_to = trim($face_arr[1][0]); $mrent['chaoxiang'] = iconv( "GBK","UTF-8", $face_to); //物业类型 preg_match_all('/<td class="dash_line">类型:<a (.*?)>(.*?)<\/a><\/td>/i',$text1,$housetype_arr); $housetype = trim($housetype_arr[2][0]); $mrent['housetype'] = iconv( "GBK","UTF-8", $housetype); $cat_arr =array( "2" => "住宅" , "31" =>"写字楼" , "32" =>"商铺" , "33" =>"别墅" , ); if(in_array($housetype,$cat_arr)){ $catid = array_search($housetype,$cat_arr); }else{ $catid = 0; } $mrent['catid'] = $catid; //更新时间 preg_match_all('/<td class="dash_line">更新时间:(.*?)<\/td>/i',$text1,$uptime_arr); $uptime = strtotime(trim($uptime_arr[1][0])); $mrent['uptime'] = $uptime; //区属 preg_match_all('/<td width="220" class="dash_line">区属:<a (.*?)>(.*?)<\/a>\s<\/td>/i',$text1,$cat_arr); $cat = trim($cat_arr[2][0]); $district =array( "46" => "玄武区", "45" => "鼓楼区", "48" => "白下区", "49" => "建邺区", "47" => "秦淮区", "44" => "下关区", "51" => "雨花台区", "50" => "栖霞区", "52" => "江宁区", "53" => "浦口区", "54" => "六合区", "55" => "溧水县", "56" => "高淳县", "60" => "其它", ); $zoneid = array_search($cat,$district); $mrent['zoneid'] = $zoneid; //板块 preg_match_all('/<td width="150" class="dash_line">板块:<a (.*?)>(.*?)<\/a> <\/td>/i',$text1,$board_arr); $board = trim($board_arr[2][0]); $board = iconv( "GBK","UTF-8", $board); $sql = "SELECT id FROM `quyu` where sort =$zoneid and `name` like '%".$board."%'"; $quyu = $conn-> Query2SingleRowArray($sql); $mrent['quyu'] =$quyu['id']; //小区 preg_match_all('/<td class="dash_line">小区:<a (.*?)>(.*?)<\/a><\/td>\s*<td class="dash_line">(.*?)<\/td>/i',$text1,$xiaoqu_arr); $xq_name = trim($xiaoqu_arr[2][0]); $xq_address = trim($xiaoqu_arr[3][0]); $mrent['address'] = iconv( "GBK","UTF-8", $xq_address); //联系人 preg_match_all('/<td width="245" align="center"><strong><span>(.*?)<\/span><\/strong><\/td>\s*<td width="185">联系人:<span>(.*?)<\/span><\/td>/i',$text1,$lxr_arr); $lxr_tel = trim($lxr_arr[1][0]); $lxr_name = trim($lxr_arr[2][0]); $mrent['lxr'] = iconv( "GBK","UTF-8", $lxr_name); $th_tel = array('<span style="font-size:14px">','</span>'); $lxr_tel = str_replace($th_tel,"",$lxr_tel); $tel_arr = explode("-",$lxr_tel); for($tj=0;$tj<count($tel_arr);$tj++) { $tel_len = strlen($tel_arr[$tj]); if($tel_len==11) { $mrent['lxdh'] = $tel_arr[$tj]; //$mrent['lxrshouji'] = $tel_arr[$tj]; }else{ $mrent['lxdh'] = $tel_arr[$tj]; } } //详细信息 preg_match_all('/<div class="infor_fp_con">\s*(.*?)\s*<\/div>/i',$text1,$info_arr); $info = explode("<br>",$info_arr[1][0]); $fbdate = trim($info[0]); for($j=1;$j<count($info);$j++) { $if_type = substr($info[$j],0,10); switch($if_type) { case "交通线路:": $bus = substr(trim($info[$j]),10); break; case "基础配套:": $base = substr(trim($info[$j]),10); break; case "附属设施:": $attach = substr(trim($info[$j]),10); break; } } $mrent['fbdate'] = iconv( "GBK","UTF-8", $fbdate); // $mrent['froute'] = iconv( "GBK","UTF-8", $bus); $jichu = $base.','.$attach; $jc = explode(",",$jichu); foreach($jc as $jc_value) { switch($jc_value) { case "宽带" : $facnet = 1; case "管道煤气": $facgas = 1; case "有线电视": $factvnet = 1; case "电话" : $facphone = 1; case "冰箱" : $facfridge = 1; case "电视机" : $factv = 1; case "洗衣机" : $facwasher = 1; case "热水器" : $facwheater = 1; case "空调" : $facaircon = 1; case "家具" : $facfurniture = 1; } } $mrent['facfurniture'] = $facfurniture; $mrent['factvnet'] = $factvnet; $mrent['factv'] = $factv; $mrent['facnet'] = $facnet; $mrent['facphone'] = $facphone; $mrent['facwheater'] = $facwheater; $mrent['facaircon'] = $facaircon; $mrent['facwasher'] = $facwasher; $mrent['facfridge'] = $facfridge; $mrent['facgas'] = $facgas; if($mrent['loupan']!='') { $sql = "SELECT id FROM `hou_mrent` where memberid =$memberid loupan ='".$mrent['loupan']."'"; $chk = $conn-> Query2SingleRowArray($sql); if($chk['id']==''){ $conn ->insert('hou_mrent',$mrent,$debug = true); $rows = $conn->GetQueryAffectedRows(); if($rows>0) { $num_all++; } } } } $note = "抓取完成,本页一共抓取".$num_all."条房源"; $note = iconv("GBK", "UTF-8", $note); ?> <script type="text/javascript"> parent.document.all('note').innerHTML="<?=$note?>"; parent.document.all('btn_sc').disabled=""; parent.document.all('btn_zq').disabled=""; </script>
- 365.rar (3 KB)
- 下载次数: 0
发表评论
-
php数据采集
2012-11-03 10:59 847<?php function get_pa ... -
零碎积累
2012-11-05 09:01 845iframe 中运行父页面的函数---> 一句很有 ... -
Yii框架--file提交
2012-11-06 17:28 11821.表单文件(本例是_form.php,表单提交页面) ... -
PHP 文件下载
2012-11-02 10:22 700// $file = "/folder/filen ... -
在Linux上安装Memcached服务
2012-11-01 23:00 840ubutu下可以直接: apt-get install ... -
php利用openoffice把doc转换为PDF, txt or HTML
2012-11-01 22:42 3602参考文档:http://www.lampdeveloper.c ...
相关推荐
蚂蚁种类如Temnothorax albipennis在选择新巢址时采取分布式的方法,如果正确建模,这种行为可以为机器人集群和其他领域的选址算法提供有价值的信息。研究并复制这种蚂蚁的觅居行为,也将揭示自然进化出的有用分布式...
广东省博罗县泰美中学七年级英语上册 Module 10 Unit 2 My mother’s cleaning our houses and sweeping away bad luck导学案(无答案)(新版)外研版
在IT领域,尤其是在计算机视觉和图像处理中,"standard_test_images"是一个常见概念,它指的是用于测试和验证图像处理算法或计算机视觉系统性能的一组标准化图像。这些图像通常具有已知特性,比如特定的几何形状、...
首先,灰度图像是一种单通道图像,它只包含亮度信息,没有颜色信息。在计算机视觉和图像处理中,灰度图像通常用于简化分析,因为它们降低了数据的复杂性,使得算法更容易处理。这些经典的测试图像对于验证和比较不同...
Generic x86-64 需要使用balena-etcher及ubuntu live写入emmc
程序员常刷题目标 构建利用流量控制的方法 阅读并理解测试输出以开发工作程序 更加熟悉测试驱动开发的概念 关于本指南的说明 我们之前已经了解了测试驱动开发以及阅读和理解 RSpec 测试的概念。...
emgucv是对opencv的封装,他支持以C#为开发语言的图像处理系统的开发。Emgu CV是.NET平台下对OpenCV图像处理库的封装,也就是.NET版的OpenCV。解压密码:hous. 在官网下载太慢了
离线数据处理:项目内容为通过对网站访问日志的采集和清洗,结合数据库中的结构化用户数据,统计并展示网站的PV、UV情况,以对网站的运行情况进行监控。通过此项目,回顾并串联前面讲述的离线数据处理相关技术,如:...
邮差或您选择的任何其他HTTP客户端 如何使用它您可以使用以下命令克隆此存储库通过HTTPS: git clone https://github.com/gabrielloppes/housee_backend.git 通过SSH: git clone git@github.com:gabrielloppes/hous
cs.SymbolBitmapName := 'HOUS2-32.BMP'; cs.SymbolBitmapSize := 40; ``` 需要注意的是,自定义的位图必须放置在 MapInfo 安装目录下的默认共享路径 `C:\Program Files\Common Files\MapInfo Shared\MapXCommon\...
语言:English ...使用条款:https://ultra-tab.com/terms-and-conditions/隐私政策:https://ultra-tab.com/privacy-policy/ Syndic8 info@syndic8.asia +65 8522 2265 Maxwell Road 20 #09-17 Maxwell Hous
FRED 有一个数据平台,他们根据引入的数据量更新其信息。使用Kaggle浏览美国人口普查局以及通过美国人口普查局组织页面提供的所有数据源! file/opensearch/documents/92721/CHIC917BPPRIV.csv file/opensearch/...
在IT领域,尤其是在自动化控制和数据采集系统中,C#是一种常见的编程语言,用于与各种设备和控制系统进行交互。力控(ForceControl)是应用于工业自动化领域的专业人机界面(HMI)和SCADA(Supervisory Control and ...
1. 显示MIB信息:MIB浏览器能够解析并显示MIB文件的内容,其中包括各个OID及其对应的名称、描述、语法、访问权限和状态等。这对于网络管理员来说非常有价值,因为它们提供了理解网络设备配置和状态的途径。 2. OID...
在实际应用中,AnalogMachine可能被广泛应用于工业自动化、物联网设备控制、远程数据采集等领域。例如,通过网口连接的PC可以借助该服务与远端的串口设备(如PLC、传感器等)进行通信,使得数据传输不受物理距离限制...
4. 节点(Nodes):是OPCUA信息模型的基本单位,可以是变量、方法、对象或数据类型。 5. 安全(Security):提供认证和加密功能,确保数据传输的安全性。 在C#中,开发者可以使用OPCFoundation的OPCUA Stack或UA-...