`
hudeyong926
  • 浏览: 2037171 次
  • 来自: 武汉
社区版块
存档分类
最新评论

DedeHttpDown下载类

阅读更多

这个类的使用方法:
下载网页

<?php
$httpdown = new DedeHttpDown();
$httpdown->OpenUrl("http://www.dedecms.com");
echo $httpdown->GetHtml();

//如果保存为文件则用 $hd->SaveBin("dede.html");
$httpdown->Close();
?>

如果下载图片 并保存,可以用

<?php
$httpdown = new DedeHttpDown();
$httpdown->OpenUrl("http://prato.bokele.com/0/0/399/bGluMi5qcGc=.jpg");
echo $httpdown->SaveBin("test.jpg");
$httpdown->Close();
echo "<img src='test.jpg'>";
?>

 DedeHttpDown

<?php
@set_time_limit(0);

class DedeHttpDown
{
	var $m_url = '';
	var $m_urlpath = '';
	var $m_scheme = 'http';
	var $m_host = '';
	var $m_port = '80';
	var $m_user = '';
	var $m_pass = '';
	var $m_path = '/';
	var $m_query = '';
	var $m_fp = '';
	var $m_error = '';
	var $m_httphead = '';
	var $m_html = '';
	var $m_puthead = '';
	var $BaseUrlPath = '';
	var $HomeUrl = '';
	var $reTry = 0;
	var $JumpCount = 0;

	//初始化系统
	function PrivateInit($url)
	{
		if($url=='') {
			return ;
		}
		$urls = '';
		$urls = @parse_url($url);
		$this->m_url = $url;
		if(is_array($urls))
		{
			$this->m_host = $urls["host"];
			if(!empty($urls["scheme"]))
			{
				$this->m_scheme = $urls["scheme"];
			}
			if(!empty($urls["user"]))
			{
				$this->m_user = $urls["user"];
			}
			if(!empty($urls["pass"]))
			{
				$this->m_pass = $urls["pass"];
			}
			if(!empty($urls["port"]))
			{
				$this->m_port = $urls["port"];
			}
			if(!empty($urls["path"]))
			{
				$this->m_path = $urls["path"];
			}
			$this->m_urlpath = $this->m_path;
			if(!empty($urls["query"]))
			{
				$this->m_query = $urls["query"];
				$this->m_urlpath .= "?".$this->m_query;
			}
			$this->HomeUrl = $urls["host"];
			$this->BaseUrlPath = $this->HomeUrl.$urls["path"];
			$this->BaseUrlPath = preg_replace("/\/([^\/]*)\.(.*)$/","/",$this->BaseUrlPath);
			$this->BaseUrlPath = preg_replace("/\/$/","",$this->BaseUrlPath);
		}
	}

	function ResetAny()
	{
		//重设各参数
		$this->m_url = "";
		$this->m_urlpath = "";
		$this->m_scheme = "http";
		$this->m_host = "";
		$this->m_port = "80";
		$this->m_user = "";
		$this->m_pass = "";
		$this->m_path = "/";
		$this->m_query = "";
		$this->m_error = "";
	}

	//打开指定网址
	function OpenUrl($url,$requestType="GET")
	{
		$this->ResetAny();
		$this->JumpCount = 0;
		$this->m_httphead = Array() ;
		$this->m_html = '';
		$this->reTry = 0;
		$this->Close();

		//初始化系统
		$this->PrivateInit($url);
		$this->PrivateStartSession($requestType);
	}

	//转到303重定向网址
	function JumpOpenUrl($url)
	{
		$this->ResetAny();
		$this->JumpCount++;
		$this->m_httphead = Array() ;
		$this->m_html = "";
		$this->Close();

		//初始化系统
		$this->PrivateInit($url);
		$this->PrivateStartSession('GET');
	}

	//获得某操作错误的原因
	function printError()
	{
		echo "错误信息:".$this->m_error;
		echo "<br/>具体返回头:<br/>";
		foreach($this->m_httphead as $k=>$v){ echo "$k => $v <br/>\r\n"; }
	}

	//判别用Get方法发送的头的应答结果是否正确
	function IsGetOK()
	{
		if( ereg("^2",$this->GetHead("http-state")) )
		{
			return true;
		}
		else
		{
			$this->m_error .= $this->GetHead("http-state")." - ".$this->GetHead("http-describe")."<br/>";
			return false;
		}
	}

	//看看返回的网页是否是text类型
	function IsText()
	{
		if( ereg("^2",$this->GetHead("http-state")) && eregi("text|xml",$this->GetHead("content-type")) )
		{
			return true;
		}
		else
		{
			$this->m_error .= "内容为非文本类型或网址重定向<br/>";
			return false;
		}
	}

	//判断返回的网页是否是特定的类型
	function IsContentType($ctype)
	{
		if(ereg("^2",$this->GetHead("http-state"))
		&& $this->GetHead("content-type")==strtolower($ctype))
		{	return true; }
		else
		{
			$this->m_error .= "类型不对 ".$this->GetHead("content-type")."<br/>";
			return false;
		}
	}

	//用Http协议下载文件
	function SaveToBin($savefilename)
	{
		if(!$this->IsGetOK())
		{
			return false;
		}
		if(@feof($this->m_fp))
		{
			$this->m_error = "连接已经关闭!"; return false;
		}
		$fp = fopen($savefilename,"w");
		while(!feof($this->m_fp))
		{
			fwrite($fp,fread($this->m_fp,1024));
		}
		fclose($this->m_fp);
		fclose($fp);
		return true;
	}

	//保存网页内容为Text文件
	function SaveToText($savefilename)
	{
		if($this->IsText())
		{
			$this->SaveBinFile($savefilename);
		}
		else
		{
			return "";
		}
	}

	//用Http协议获得一个网页的内容
	function GetHtml()
	{
		if(!$this->IsText())
		{
			return '';
		}
		if($this->m_html!='')
		{
			return $this->m_html;
		}
		if(!$this->m_fp||@feof($this->m_fp))
		{
			return '';
		}
		while(!feof($this->m_fp))
		{
			$this->m_html .= fgets($this->m_fp,256);
		}
		@fclose($this->m_fp);
		return $this->m_html;
	}

	//开始HTTP会话
	function PrivateStartSession($requestType="GET")
	{
		if(!$this->PrivateOpenHost())
		{
			$this->m_error .= "打开远程主机出错!";
			return false;
		}
		$this->reTry++;
		if($this->GetHead("http-edition")=="HTTP/1.1")
		{
			$httpv = "HTTP/1.1";
		}
		else
		{
			$httpv = "HTTP/1.0";
		}
		$ps = explode('?',$this->m_urlpath);

		$headString = '';

		//发送固定的起始请求头GET、Host信息
		if($requestType=="GET")
		{
			$headString .= "GET ".$this->m_urlpath." $httpv\r\n";
		}
		else
		{
			$headString .= "POST ".$ps[0]." $httpv\r\n";
		}
		$this->m_puthead["Host"] = $this->m_host;

		//发送用户自定义的请求头
		if(!isset($this->m_puthead["Accept"]))
		{
			$this->m_puthead["Accept"] = "*/*";
		}
		if(!isset($this->m_puthead["User-Agent"]))
		{
			$this->m_puthead["User-Agent"] = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2)";
		}
		if(!isset($this->m_puthead["Refer"]))
		{
			$this->m_puthead["Refer"] = "http://".$this->m_puthead["Host"];
		}

		foreach($this->m_puthead as $k=>$v)
		{
			$k = trim($k);
			$v = trim($v);
			if($k!=""&&$v!="")
			{
				$headString .= "$k: $v\r\n";
			}
		}
		fputs($this->m_fp, $headString);
		if($requestType=="POST")
		{
			$postdata = "";
			if(count($ps)>1)
			{
				for($i=1;$i<count($ps);$i++)
				{
					$postdata .= $ps[$i];
				}
			}
			else
			{
				$postdata = "OK";
			}
			$plen = strlen($postdata);
			fputs($this->m_fp,"Content-Type: application/x-www-form-urlencoded\r\n");
			fputs($this->m_fp,"Content-Length: $plen\r\n");
		}

		//发送固定的结束请求头
		//HTTP1.1协议必须指定文档结束后关闭链接,否则读取文档时无法使用feof判断结束
		if($httpv=="HTTP/1.1")
		{
			fputs($this->m_fp,"Connection: Close\r\n\r\n");
		}
		else
		{
			fputs($this->m_fp,"\r\n");
		}
		if($requestType=="POST")
		{
			fputs($this->m_fp,$postdata);
		}

		//获取应答头状态信息
		$httpstas = explode(" ",fgets($this->m_fp,256));
		$this->m_httphead["http-edition"] = trim($httpstas[0]);
		$this->m_httphead["http-state"] = trim($httpstas[1]);
		$this->m_httphead["http-describe"] = "";
		for($i=2;$i<count($httpstas);$i++)
		{
			$this->m_httphead["http-describe"] .= " ".trim($httpstas[$i]);
		}

		//获取详细应答头
		while(!feof($this->m_fp))
		{
			$line = trim(fgets($this->m_fp,256));
			if($line == "")
			{
				break;
			}
			$hkey = "";
			$hvalue = "";
			$v = 0;
			for($i=0;$i<strlen($line);$i++)
			{
				if($v==1)
				{
					$hvalue .= $line[$i];
				}
				if($line[$i]==":")
				{
					$v = 1;
				}
				if($v==0)
				{
					$hkey .= $line[$i];
				}
			}
			$hkey = trim($hkey);
			if($hkey!="")
			{
				$this->m_httphead[strtolower($hkey)] = trim($hvalue);
			}
		}

		//如果连接被不正常关闭,重试
		if(feof($this->m_fp))
		{
			if($this->reTry > 10)
			{
				return false;
			}
			$this->PrivateStartSession($requestType);
		}

		//判断是否是3xx开头的应答
		if(ereg("^3",$this->m_httphead["http-state"]))
		{
			if($this->JumpCount > 3)
			{
				return;
			}
			if(isset($this->m_httphead["location"]))
			{
				$newurl = $this->m_httphead["location"];
				if(eregi("^http",$newurl))
				{
					$this->JumpOpenUrl($newurl);
				}
				else
				{
					$newurl = $this->FillUrl($newurl);
					$this->JumpOpenUrl($newurl);
				}
			}
			else
			{
				$this->m_error = "无法识别的答复!";
			}
		}
	}

	//获得一个Http头的值
	function GetHead($headname)
	{
		$headname = strtolower($headname);
		return isset($this->m_httphead[$headname]) ? $this->m_httphead[$headname] : '';
	}

	//设置Http头的值
	function SetHead($skey,$svalue)
	{
		$this->m_puthead[$skey] = $svalue;
	}

	//打开连接
	function PrivateOpenHost()
	{
		if($this->m_host=="")
		{
			return false;
		}
		$errno = "";
		$errstr = "";
		$this->m_fp = @fsockopen($this->m_host, $this->m_port, $errno, $errstr,10);
		if(!$this->m_fp)
		{
			$this->m_error = $errstr;
			return false;
		}
		else
		{
			return true;
		}
	}

	//关闭连接
	function Close()
	{
		@fclose($this->m_fp);
	}

	//补全相对网址
	function FillUrl($surl)
	{
		$i = 0;
		$dstr = "";
		$pstr = "";
		$okurl = "";
		$pathStep = 0;
		$surl = trim($surl);
		if($surl=="")
		{
			return "";
		}
		$pos = strpos($surl,"#");
		if($pos>0)
		{
			$surl = substr($surl,0,$pos);
		}
		if($surl[0]=="/")
		{
			$okurl = "http://".$this->HomeUrl.$surl;
		}
		else if($surl[0]==".")
		{
			if(strlen($surl)<=1)
			{
				return "";
			}
			else if($surl[1]=="/")
			{
				$okurl = "http://".$this->BaseUrlPath."/".substr($surl,2,strlen($surl)-2);
			}
			else
			{
				$urls = explode("/",$surl);
				foreach($urls as $u)
				{
					if($u=="..")
					{
						$pathStep++;
					}
					else if($i<count($urls)-1)
					{
						$dstr .= $urls[$i]."/";
					}
					else
					{
						$dstr .= $urls[$i];
					}
					$i++;
				}
				$urls = explode("/",$this->BaseUrlPath);
				if(count($urls) <= $pathStep)
				{
					return "";
				}
				else
				{
					$pstr = "http://";
					for($i=0;$i<count($urls)-$pathStep;$i++)
					{
						$pstr .= $urls[$i]."/";
					}
					$okurl = $pstr.$dstr;
				}
			}
		}
		else
		{
			if(strlen($surl)<7)
			{
				$okurl = "http://".$this->BaseUrlPath."/".$surl;
			}
			else if(strtolower(substr($surl,0,7))=="http://")
			{
				$okurl = $surl;
			}
			else
			{
				$okurl = "http://".$this->BaseUrlPath."/".$surl;
			}
		}
		$okurl = eregi_replace("^(http://)","",$okurl);
		$okurl = eregi_replace("/{1,}","/",$okurl);
		return "http://".$okurl;
	}
}

?>
分享到:
评论

相关推荐

    发布一个用PHP fsockopen写的HTTP下载的类

    为了实现一个HTTP下载类,我们可以创建一个名为`DedeHttpDown`的类,该类使用`fsockopen`来处理HTTP请求和下载。以下是`DedeHttpDown`类的部分代码: ```php class DedeHttpDown { // ... function OpenUrl($url)...

    dede采集https

    改动的文件有4个,由于改动地方有好几处,推荐用下载文件覆盖的方式,避免引起不必要的错误 提示:覆盖之前建议先备份你的这4个PHP文件后再覆盖我的 /include/dedecollection.class.php /include/dedecollection....

    爱不语CMS内容管理系统 v2.6.3.1.zip

    修复得到total错误造成的前台列表翻页数目错误 修改找不到DedeHttpDown类   爱不语CMS内容管理系统是一款基于PHP MySQL开发符合W3C标准的建站系统,体积小巧, 简介的标签语句, 最重要的是本系统开源,无任何加密. ...

    爱不语CMS内容管理系统 v2.6.3.1

    爱不语CMS内容管理系统更新说明: 增加对搜索选项的注入检测 修改部分模板的显示问题 增加自定义表单显示整数...修复得到total错误造成的前台列表翻页数目错误 修改找不到DedeHttpDown类 爱不语CMS内容管理系统是

    爱不语CMS内容管理系统 2.6.3.1

    一款基于PHP+MySQL开发符合W3C标准的建站系统,体积小巧,简介的标签语句,最重要的是本系统开源,无任何加密,为中国企业重新设计的人性化后台...修复得到total错误造成的前台列表翻页数目错误修改找不到DedeHttpDown类

Global site tag (gtag.js) - Google Analytics