`

利用AJAX显示根据分类显示子分类下拉列表,特殊情况显示多选框

阅读更多

此文章为利用AJAX选择一个下拉列表的一个选项则,在下个一子下拉列表显示数据,或者是可以选择多个子类即显示多选框

1、freemaker jsp内容

<!DOCTYPE HTML PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="content-type" content="text/html; charset=UTF-8" />
<title>畅销品抓取-商品列表</title>
<#include  "/WEB-INF/templates/common/include/css.ftl">
<#include  "/WEB-INF/templates/common/include/js.ftl">
</head>

<script type="text/javascript" src="js/category/category.js"></script>
<script type="text/javascript" src="js/jquery/jquery-1.4.2.min.js"></script>
<script type="text/javascript" src="js/jquery/jquery.min.js"></script>
<script type="text/javascript">
	function initPage()
	{
		var secondSelect = document.getElementById("second");
		var thirdSelect = document.getElementById("third");
		
		if(secondSelect.length == 0)
		{
			secondSelect.style.display="none";
		}
		else
		{
			secondSelect.style.display="block";
		}
		
		if(thirdSelect.length == 0)
		{
			thirdSelect.style.display="none";
		}
		else
		{
			thirdSelect.style.display="block";
		}
	}
	
    var xmlHttpRequest;
	function getFirstLevelCat()
    {
        var siteId=document.getElementById("siteId").value;
        var url="${base}/firstLevelCateAjax.action?siteId="+siteId;
        //alert(url);
        xmlHttpRequest=createXmlHttpRequest();    
        xmlHttpRequest.onreadystatechange=handleCategory;
        xmlHttpRequest.open("GET",url,true);
        xmlHttpRequest.send(null);
    }
    
    function handleCategory()
    {
       if(xmlHttpRequest.readyState == 4) 
        {        
            if(xmlHttpRequest.status == 200) 
            {
               //alert(xmlHttpRequest.responseText);
				var array = eval(xmlHttpRequest.responseText);
       			addOption(array[0],'first','\u8bf7\u9009\u62e9');
       			//addOption(array[1],'third','\u5168\u90E8');
       
        		// 控制标签隐藏,显示
			    initPage();
            }    
        }
    }
    
    function getNextLevelCat(categoryLevel,selectId)
    {
        var url="${base}/nextLevelCateAjax.action?parentCategoryId="+categoryLevel;
        //alert(url);
        xmlHttpRequest=createXmlHttpRequest();    
        xmlHttpRequest.onreadystatechange=function(){displayCategory(selectId)};
        xmlHttpRequest.open("GET",url,true);
        xmlHttpRequest.send(null);
    }
    
    function displayCategory(selectId)
    {
    	if(xmlHttpRequest.readyState == 4) 
        {        
            if(xmlHttpRequest.status == 200) 
            {
                //alert(xmlHttpRequest.responseText);
				var array = eval(xmlHttpRequest.responseText);
       			addOption(array[0],selectId,'\u8bf7\u9009\u62e9');
       			//addOption(array[1],'third','\u5168\u90E8');
       
        		// 控制标签隐藏,显示
			    initPage();
            }    
        }
    }
    
    function crawlProduct(type)
    {
    	var fetchSize = document.getElementById("fetchSize").value;
		var re = /^[\d]+$/ 
		if(re.test(fetchSize) == false)
		{
			alert("抓取数量必须为数字!");
			return;
		} 
		if(fetchSize<0)
		{
			alert("抓取数量不能为负数!");
			return;
		}
		
    	var categoryId;
    	var firstSelect = document.getElementById("first");
    	var secondSelect = document.getElementById("second");
		var thirdSelect = document.getElementById("third");
		if(thirdSelect != undefined && thirdSelect.value != '')
		{
			categoryId = thirdSelect.value;
		}
		else if(($("input[type=checkbox]")).length>0){
		    //alert("成功进入!")
		    var pcsgroups =$("input[type=checkbox]");
						var textV="";
						for(i=0;i<pcsgroups.length;i++){
							if($(pcsgroups[i]).attr("checked")){
								textV =textV + $(pcsgroups[i]).attr("value") + ",";
								textV =textV + $("#checkbox"+$(pcsgroups[i]).attr("value")).val() + ",";
							}
						} 
						//textV = textV.slice(0, -1);
						categoryId = textV;
		}
		else if(secondSelect != undefined && secondSelect.value != '')
		{
			categoryId = secondSelect.value;
		}
		else
		{
			categoryId = firstSelect.value;
		}
		if(categoryId == '')
		{
			alert("请选择分类!");
			return;
		}
		
		document.getElementById("categoryId").value = categoryId;
		var bestsellerForm = document.getElementById("bestsellerForm");
		bestsellerForm.action = "crawlBestseller.action";
		if(type=="children")
		{
			bestsellerForm.action = "crawlAllChildren.action";
		}
		bestsellerForm.submit();
    }
    
    function exportProduct()
    {
    	var bestsellerForm = document.getElementById("bestsellerForm");
		bestsellerForm.action = "exportBestseller.action";
		bestsellerForm.submit();
    }
</script>
<body onload="initPage(),getFirstLevelCat();">

<div class="pis_width">

<!-- head begin -->
<#include  "/WEB-INF/templates/head.ftl">
<!-- head end -->

<!-- left begin -->
<#include  "/WEB-INF/templates/left.ftl">
<!-- left end -->

<!-- content begin -->
<div class="pis_content">
<div><font color="red"><b>您的位置:畅销品抓取-商品列表</b></font></div>
</br>

<div><b>请设置待匹配一号店商品:</b></div>
<@s.form id="bestsellerForm" name="productForm" action="${base}/addCategoryMatch.action" method="post">
<table class="mt10" width="98%" align="center" border="0" cellpadding="0" cellspacing="0">
        <tbody valign="middle">
            <tr bgcolor="#edf5fa">
                <td width="20%">网站名称:</td>
                <td style="padding: 0px;" colspan="2">
                    <select id="siteId" name="siteCategory.siteId" value="" onchange="getFirstLevelCat();">
                        <#list siteList as site>
                        <option value="${site.siteId}">${site.siteName}</option>
                        </#list>
                    </select>
                </td>
            </tr>
            <tr bgcolor="#edf5fa">
                <td width="20%">抓取数量:</td>
                <td style="padding: 0px;" colspan="2">
                    <input type="text" id="fetchSize" name="siteCategory.fetchSize" value="100"/>
                    <font color="red">*</font>
                </td>
            </tr>
            <tr><td colspan="3"></td></tr>
            <tr bgcolor="#edf5fa">
                <input type="hidden" id="categoryId" name="siteCategory.ids" value="" />
                <td width="20%">一级分类:</td>
                <td style="padding: 0px;">
                    <select id="first"  onchange="getNextLevelCat(this.value,'second');"></select>
                </td>
                
                <td style="padding: 0px;">
                <input type="button" value="按分类抓取" class="button2" onclick="crawlProduct('this');"/>
                <input type="button" value="抓取子类商品" class="button2" onclick="crawlProduct('children');"/>
                </td>
            </tr>
            <tr bgcolor="#edf5fa">
            <td width="20%">二级分类:</td>
            <td width="80%" colspan="2">
                <span id="level2">
                    <select id="second"  onchange="getNextLevelCat(this.value,'third');"></select>
                </span>
            </td>
        </tr>
        
        <tr bgcolor="#edf5fa">
            <td width="20%">三级分类:</td>
            <td colspan="2">
                <span id="level3"><select id="third"></select>
                </span>
                <span id="level31"></span>
            </td>
        </tr>
        </tbody>
    </table>

    <hr/>

    <input type="hidden" id="pageUrl" name="matchProductDto.pageUrl" value="" />
       <#if bestsellerList ??>
           <div><font color="red"><b>
           <#if siteCategory ??>
         	目标网站:${siteCategory.siteName}
           &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;分类名称:${siteCategory.categoryName}
           &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;抓取商品数目:${siteCategory.fetchSize}
           &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;实际商品数目:${size}
           </#if>
           </b></font></div>
           <br>
       <div><input type="button" value="导出畅销商品" class="button2" onclick="exportProduct();"/></div>
       <br>
         
        <table id="productTable" class="mt10" width="98%" align="center" border="0" cellpadding="0" cellspacing="1">
         <thead> 
         	<th align="center" width="5%">畅销排名</th>
            <th align="center" width="20%">商品名称</th>
            <th align="center" width="10%">商品价格</th>
            <th align="center" width="10%">商品URL</th>
         </thead> 
        <tbody id="searchProduct">
            <#list bestsellerList as crawledProduct>
            <tr bgcolor="#edf5fa">
            	<td align="center">${crawledProduct_index+1}</td>
                <td align="center">${crawledProduct.name}</td>

                <#if crawledProduct.imgPrice ??>
                	<td align="center"><img src="${crawledProduct.imgPrice!''}"/>(${crawledProduct.price!''})</td>
                <#elseif crawledProduct.price?exists>
                	<td align="center">${crawledProduct.price!''}</td>
                <#else>
                	<td align="center">缺货</td>	
                </#if>
                <td align="center"><a href="${crawledProduct.url}" target="_blank">查看商品</a></td>
            </tr>
            </#list>
        </#if>  
            </tbody>
        </table>
    </div>
</@s.form>
</div>
</div>
<!-- content end -->
</body>
</html>

 2、JS控制显示和读取数据

//document.charset="UTF-8";

function createXmlHttpRequest()
{
     if(window.ActiveXObject)
     {
         return new ActiveXObject("Microsoft.XMLHTTP");
     }
     else if (window.XMLHttpRequest) // For general cases.
     {
        return new XMLHttpRequest();
     }
}
function getCategory(process,result,firstValue,secondValue)
{
	// 隐藏标签
    var level2=document.getElementById("level2");
    var level3=document.getElementById("level3");
    var product=document.getElementById("productName");
   
    document.getElementById('third').options.length=1;
    
    if(undefined !=  document.getElementById('product'))
    {
    	document.getElementById('product').options.length=1;
    }
    
    if(firstValue=="")
    {
    	level2.style.display="none";
    	level3.style.display="none";
	    if(product != undefined)
	    {
	    	product.style.display="none";
	    }
    }
    else if(secondValue=="")
    {
    	level3.style.display="none";
	    if(product != undefined)
	    {
	    	product.style.display="none";
	    }
    }
    else if(level3=="")
    {
	    if(product != undefined)
	    {
	    	product.style.display="none";
	    }
    }
    
    var url="${base}/category_getCategoryAjax.action?process="+process+"&firstId="+firstValue+"&secondId="+secondValue;
    xmlHttpRequest=createXmlHttpRequest();    
    xmlHttpRequest.onreadystatechange=result;
    xmlHttpRequest.open("GET",url,true);
    xmlHttpRequest.send(null);
}

function getProduct(categoryId,processMethod)
{
	if(categoryId=="")
	{
		categoryId = document.getElementById("second").value;
	}
    var url="${base}/getProduct.action?categoryId="+categoryId;
    xmlHttpRequest=createXmlHttpRequest();    
    xmlHttpRequest.onreadystatechange=processMethod;
    xmlHttpRequest.open("GET",url,true);
    xmlHttpRequest.send(null);
}

function getMatchProduct(categoryId,processMethod)
{
	document.getElementById('product').selectedIndex=0;
	if(categoryId=="")
	{
		categoryId = document.getElementById("second").value;
	}
    var url="${base}/matchProduct_getProductByCategory.action?matchProductDto.categoryId="+categoryId;
    xmlHttpRequest=createXmlHttpRequest();    
    xmlHttpRequest.onreadystatechange=processMethod;
    xmlHttpRequest.open("GET",url,true);
    xmlHttpRequest.send(null);
}

function uniencode(text)
{
     text = escape(text.toString()).replace(/\+/g, "%2B");
     var matches = text.match(/(%([0-9A-F]{2}))/gi);
     if (matches)
     {
         for (var matchid = 0; matchid < matches.length; matchid++)
         {
             var code = matches[matchid].substring(1,3);
             if (parseInt(code, 16) >= 128)
             {
                 text = text.replace(matches[matchid], '%u00' + code);
             }
         }
     }
     text = text.replace('%25', '%u0025');
  
     return text;
}
function getCrawledProduct(productId,siteId,keyword,processMethod)
{
	// var keyword = escape(keyword);
	// alert(keyword);
	// keyword.replace('%','\\');
	// keyword="\u98DE\u5229\u6D66\u5243\u987B\u5200";
	// uniencode(keyword);
	// keyword = uniencode(keyword);
	// keyword = encodeURIComponent(keyword);
	// alert(keyword);
    var url="${base}/crawlProductByKeyword.action?productId="+productId+"&siteId="+siteId+"&keyword="+keyword;
    // url=encodeURIComponent(url);
    // url=encodeURI(url);
    // alert(url);
    xmlHttpRequest=createXmlHttpRequest();    
    xmlHttpRequest.onreadystatechange=processMethod;
    // xmlHttpRequest.open("POST",url,true);
       xmlHttpRequest.open("GET", url, true);  
      xmlHttpRequest.setRequestHeader('Content-Type', 'application/x-www-form-urlencoded');  
      xmlHttpRequest.setRequestHeader("Content-Type","text/html");   
       xmlHttpRequest.setRequestHeader("Content-Type","utf-8"); 
    xmlHttpRequest.send(null);
}


// 页面初始化时,加载一级分类,屏蔽二三级分类和产品
function InitResult()
{
    if(xmlHttpRequest.readyState==4 && xmlHttpRequest.status==200)
    {
       var array = eval(xmlHttpRequest.responseText);
       addOption(array[0],'first','\u9009\u62e9\u76ee\u5f55');
       // addOption(array[1],'second','\u5168\u90E8');
       // addOption(array[2],'third','\u5168\u90E8');
    }
    
    // 隐藏标签
    var level2=document.getElementById("level2");
    var level3=document.getElementById("level3");
    var product=document.getElementById("productName");
    level2.style.display="none";
    level3.style.display="none";
    if(product != undefined)
    {
    	product.style.display="none";
    }
}

function SecondResult()
{
    if(xmlHttpRequest.readyState==4 && xmlHttpRequest.status==200)
    {
       var array = eval(xmlHttpRequest.responseText);
       addOption(array[0],'second','\u5168\u90E8');
       // addOption(array[1],'third','\u5168\u90E8');
       
        // 控制标签隐藏,显示
	    var level2=document.getElementById("level2");
	    level2.style.display="block";
    }

}

function ThirdResult()
{
    if(xmlHttpRequest.readyState==4 && xmlHttpRequest.status==200)
    {
       var array = eval(xmlHttpRequest.responseText);
       addOption(array[0],'third','\u5168\u90E8');
       
        // 控制标签隐藏,显示
	    var level3=document.getElementById("level3");
	    level3.style.display="block";
    }
}

function addOption(array,id,head)
{
	var first = document.getElementById(id);
	var othershowstyle = document.getElementById('level31') ;
	var normalshowstyle = document.getElementById('level3') ;
	first.options.length=0;
	if(array.length == 0){
		othershowstyle.style.display="none";
		normalshowstyle.style.display="block";
		if(head!='')
		   {
			var newOption = document.createElement("OPTION");                                                     
		    newOption.text=head;
		    newOption.value='';                                                                       
		    first.options.add(newOption);
		    }
	}else if(array.length>0){
		//alert('array[0].siteId'+array[0].siteId+'id' +id);
		//线上id为10
		if(array[0].siteId == '9' && id == 'third'){
			//alert('成功进入!');
			othershowstyle.style.display="block";
			normalshowstyle.style.display="none";
			var htmlstring='';
			 for(i=0;i<array.length;i++)                                                            
		   	    {
				 htmlstring=htmlstring+'<input type=\"checkbox\" name=\"checkbox\" value=\"'+array[i].id+'\"/>'+trim(array[i].categoryName);
		   	     htmlstring = htmlstring +'<input type=\"text\" id=\"checkbox'+array[i].id+'\" name=\"checkboxvalue\"/><br/>';
		   	    }
			 //alert(htmlstring);
			 othershowstyle.innerHTML=htmlstring;
		}else{
			othershowstyle.style.display="none";
			normalshowstyle.style.display="block";
			   if(head!='')
			   {
				var newOption = document.createElement("OPTION");                                                     
			    newOption.text=head;
			    newOption.value='';                                                                       
			    first.options.add(newOption);
			    }
		         for(i=0;i<array.length;i++)                                                            
		   	    {
		         var newOption = document.createElement("OPTION");                                                     
		         newOption.text=trim(array[i].categoryName);                                                                   
		         newOption.value=array[i].id;                                                                       
		         first.options.add(newOption);                                                                       
		   	     }
		}
	}
		   
}

function listProduct()
{
    if(xmlHttpRequest.readyState==4 && xmlHttpRequest.status==200)
    {
       var array = eval(xmlHttpRequest.responseText);
       var first = document.getElementById("product");
		first.options.length=0;
		
		var newOption = document.createElement("OPTION");                                                     
	    // newOption.text='\u9009\u62e9\u76ee\u5f55';
	    newOption.text='\u9009\u62e9\u5546\u54c1';                                                                   
	    newOption.value='';                                                                       
	    first.options.add(newOption);
	    var array=array[0];
	    for(i=0;i<array.length;i++)                                                            
	   	{
	         var newOption = document.createElement("OPTION");                                                     
	         newOption.text=trim(array[i].productCname);                                                                   
	         newOption.value=array[i].id;                                                                       
	         first.options.add(newOption);                                                                       
	   	}
	   	// 控制标签隐藏,显示
	    var product=document.getElementById("productName");
	    product.style.display="block";
    }
}

function listSearchProduct()
{
    if(xmlHttpRequest.readyState==4 && xmlHttpRequest.status==200)
    {
       var array = eval(xmlHttpRequest.responseText);
       alert("array="+array);
	    for(i=0;i<array.length;i++)                                                            
	   	{
	        var table=document.getElementById("productTable");  
			var length=table.rows.length;   
			var tr=document.createElement("tr");   
			tr.id=++i;  
			   
			var td0=document.createElement("td");  
			td0.innerHTML="<input type=\"text\" value=\""+array[i]+"\"/>";
			// 不用文本框下面取不出来。 (因遍历的方式需要不带文本框)
			tr.appendChild(td0);   
			  
			var td1=document.createElement("td");  
			tdname="第"+i+"行 "+tdname;  
			td1.innerHTML="<input type=\"text\" value=\""+tdname+"\" name=\"name\" id=\"name\"/>";  
			tr.appendChild(td1);  
			      
			var td2=document.createElement("td");  
			td2.innerHTML="<input type=\"text\" value=\""+unit+"\" name=\"unit\"/>";  
			tr.appendChild(td2);  
			var td3=document.createElement("td");   
			// td3.id=tr.id;
			td3.innerHTML="<input type=\"button\" value=\"del\" onclick=\"del(this.parentElement.parentElement,this.parentElement.parentElement.id)\"/>";  
			tr.appendChild(td3);                                                                      
	   	}
	   	// 控制标签隐藏,显示
	    var product=document.getElementById("productName");
	    product.style.display="block";
    }
}

function addRow(i)
{
	// i为行的id
	// 添加行
	var table=document.getElementById("productTable");  
	var length=table.rows.length;   
	var tr=document.createElement("tr");   
	tr.id=++i;  
	   
	var td0=document.createElement("td");  
	td0.innerHTML="<input type=\"text\" value=\""+num+"\"/>";
	// 不用文本框下面取不出来。 (因遍历的方式需要不带文本框)
	tr.appendChild(td0);   
	  
	var td1=document.createElement("td");  
	tdname="第"+i+"行 "+tdname;  
	td1.innerHTML="<input type=\"text\" value=\""+tdname+"\" name=\"name\" id=\"name\"/>";  
	tr.appendChild(td1);  
	      
	var td2=document.createElement("td");  
	td2.innerHTML="<input type=\"text\" value=\""+unit+"\" name=\"unit\"/>";  
	tr.appendChild(td2);  
	var td3=document.createElement("td");   
	// td3.id=tr.id;
	td3.innerHTML="<input type=\"button\" value=\"del\" onclick=\"del(this.parentElement.parentElement,this.parentElement.parentElement.id)\"/>";  
	tr.appendChild(td3);
	      
	addData(i);
	// 含数据验证
	if(flag==true)
	{   
		document.getElementById("newbody").appendChild (tr);   
	}
	else
	{   
		i--;   
	}   
}   
function listMatchProduct()
{

    if(xmlHttpRequest.readyState==4 && xmlHttpRequest.status==200)
    {
       var first = document.getElementById("product");
       	first.options.length=0;
       	
       	var array = eval(xmlHttpRequest.responseText);
       	
		var newOption = document.createElement("OPTION");                                                     
	    newOption.text='\u9009\u62e9\u76ee\u5f55';                                                                   
	    newOption.value='';                                                                       
	    first.options.add(newOption);

	    for(i=0;i<array[0].length;i++)                                                            
	   	{
	         var newOption = document.createElement("OPTION");                                                     
	         newOption.text=trim(array[0][i].name);                                                                   
	         newOption.value=array[0][i].productId;                                                                       
	         first.options.add(newOption);                                                                       
	   	}
	   	// 控制标签隐藏,显示
	    var product=document.getElementById("productName");
	    product.style.display="block";
    }
}
function trim(str)
{
	return str.replace(/^\s+|\s+$/g, "");
} 

 3、如果是得到下拉列表的一个链接直接根据ID号从数据库读取,然后再进行网页的抓取,如果需求是将多个子URL的数据显示在一个页面上,那么就附带java代码

 public String crawlBestseller()
    {
        int id = 0;
        int amount = 0;//siteCategory.getFetchSize();
        int index = 1;
        String CateName = "";
        int fetchSize = 0 ;
        String[] idStrings = null;
        //针对苏宁易购需求所做的修改
        if (siteCategory.getIds().indexOf(",")>0) {//说明是苏宁的多选子类
			idStrings =siteCategory.getIds().split(",");
			index = idStrings.length;
		}        
        for (int i = 0; i < index; i=i+2) {//一般情况下是执行一次,苏宁易购有执行多次的可能
        	List<BestSellerDto> transferList ;
        	if (index>1) {
				id=Integer.parseInt(idStrings[i]);
				amount = Integer.parseInt(idStrings[i+1]);
			}else{
				id=Integer.parseInt(siteCategory.getIds());
				amount=siteCategory.getFetchSize();
			}
        	// 根据id获取分类信息
            siteCategory = siteCategorySvc.getSiteCategoryById(id);
            fetchSize+=amount;
            if (CateName == "") {
				CateName = siteCategory.getCategoryName();
			}else {
				CateName = CateName + "-"+siteCategory.getCategoryName();
			}
            
            // 追加网站名称
            // 将网站信息放入内存map
            List<SiteDto> siteList = (List<SiteDto>)ActionContext.getContext().getSession().get("siteList");
            Map<Integer,SiteDto> siteMap = new HashMap<Integer,SiteDto>();
            for(SiteDto site : siteList)
            {
                siteMap.put(site.getSiteId(), site);
            }
            String siteName = siteMap.get(siteCategory.getSiteId()).getSiteName();
            siteCategory.setSiteName(siteName);
            ActionContext.getContext().getSession().put("params", siteCategory);
            
            // 获取畅销商品列表
            transferList = bestsellerSvc.fetchBestSeller(id, amount);
            for(BestSellerDto bestseller : transferList)
            {
                String price = bestseller.getPrice();
                
                if( price != null && price.contains("http"))
                {
                    // 解析图片价格
                    bestseller.setImgPrice(price);
                    BigDecimal decimal = ImgUtils.getDecimalFromImg(price);
                    if(decimal != null)
                    {
                        bestseller.setPrice(decimal.toString());
                    }
                }
            }
            if (i==0) {
				bestsellerList = transferList;
			}else {
				bestsellerList.addAll(transferList);
			}
		}
        siteCategory.setCategoryName(CateName);
        siteCategory.setFetchSize(fetchSize);
        ActionContext.getContext().getSession().put("topProducts", bestsellerList);
        ServletActionContext.getContext().put("size", bestsellerList.size());
        return SUCCESS;
    }
 
 public List<BestSellerDto> fetchBestSeller(Integer id, int amount) {
        List<BestSellerDto> bestSellers = new ArrayList<BestSellerDto>();

        if (amount <= 0) {
            logger.warn("畅销品数目不能为负数!");
            return null;
        }
        SiteCategoryDto siteCategory = getSiteCategoryById(id);

        // 抓取参数
        Map<String, String> params;

        // 组织抓取参数与页面参数
        params = pageParamItemDao.getPageConfigBySite(siteCategory.getSiteId());
        params.putAll(crawlerParamItemDao.getCrawlConfigBySite(siteCategory.
                getSiteId()));

        // 构建抓取对象
        String charset = params.get(PageParamNames.CONTENT_ENCODING);
        //int pageSize = Integer.parseInt(params.get(PageParamNames.BS_PAGE_SIZE));
        int pageSize = 20;
        int pages = amount / pageSize + 1;

         logger.info("畅销榜页数:" + pages);

        Crawler crawler = new Crawler(charset);
        String bestSellerHtml = null;
        PageParser pageParser = null;

        try {
            String categoryUrl = siteCategory.getCategoryUrl();
            if (categoryUrl == null || categoryUrl.equals("")) {
                logger.info("此分类不支持畅销榜!");
                return null;
            }

            bestSellerHtml = crawler.crawl(siteCategory.getCategoryUrl());
            pageParser = PageParserFactory.createPageParser(null, charset,
                    params);
            String nextPageUrlPattern=pageParser.extractNextPageUrlPattern(bestSellerHtml);
            if (siteCategory.getCategoryUrl().indexOf("suning")>0) {
            	String nextPageUrl0 = pageParser.getNextPageUrl(
                        nextPageUrlPattern, 1);
                bestSellerHtml = crawler.crawl(nextPageUrl0);
			}
            bestSellers = pageParser.extractBestSeller(bestSellerHtml);
            //if(amount<=bestSellers.size()) return bestSellers;
            
            if (nextPageUrlPattern != null) {
                for (int pageNum = 2; pageNum <= pages; pageNum++) {
                    logger.info("抓取畅销榜第 " + pageNum + " 页");

                    String nextPageUrl = pageParser.getNextPageUrl(
                            nextPageUrlPattern, pageNum);
                    bestSellerHtml = crawler.crawl(nextPageUrl);
                    List<BestSellerDto> moreBestSellers = pageParser.
                            extractBestSeller(bestSellerHtml);
                    if (moreBestSellers == null || moreBestSellers.isEmpty()) {
                        break;
                    }
                    if (bestSellers.get(bestSellers.size() - 1).getUrl().equals(
                            moreBestSellers.get(moreBestSellers.size() - 1).
                            getUrl())) {
                        break;
                    }
                    if (moreBestSellers != null && !moreBestSellers.isEmpty()) {
                        bestSellers.addAll(moreBestSellers);
                    }
                }
            }
        } catch (IOException e) {
            logger.error("抓取 " + siteCategory.getCategoryName() + " 畅销榜时出现异常!"
                    + "URL为:" + siteCategory.getCategoryUrl(), e);
        } catch (ParserException e) {
            logger.error("解析畅销榜页面时出现异常!" + "URL为:"
                    + siteCategory.getCategoryUrl(), e);
        }

        logger.info("++++++++++++++++++++++++++++++++++");
        logger.info("畅销集合大小:" + (bestSellers == null ? 0 : bestSellers.size()));
        if (bestSellers != null) {
            if (bestSellers.size() <= amount) {
                return bestSellers;
            }

            List<BestSellerDto> subList = bestSellers.subList(0, amount);
            logger.info("----------------------------------");
            logger.info("畅销集合大小:" + subList.size());

            return subList;
        } else {
            return new ArrayList<BestSellerDto>();
        }
    }

 下面是解析器

package com.yihaodian.pis.crawler;

import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.lang.StringUtils;
import org.apache.log4j.Logger;
import org.htmlparser.Node;
import org.htmlparser.NodeFilter;
import org.htmlparser.Parser;
import org.htmlparser.filters.HasAttributeFilter;
import org.htmlparser.tags.Bullet;
import org.htmlparser.tags.BulletList;
import org.htmlparser.tags.Div;
import org.htmlparser.tags.ImageTag;
import org.htmlparser.tags.LinkTag;
import org.htmlparser.tags.ScriptTag;
import org.htmlparser.tags.Span;
import org.htmlparser.util.NodeList;
import org.htmlparser.util.ParserException;

import com.yihaodian.pis.dto.BestSellerDto;

public class SuningPageParser extends PageParser{
	private static final Logger logger = Logger.getLogger(SuningPageParser.class);

	public SuningPageParser(String html, String charset) {
		super(html, charset);
		// TODO Auto-generated constructor stub
	}

	@Override
	public List<BestSellerDto> extractBestSeller(String bestSellerHtml)
			throws ParserException {
		  List<BestSellerDto> sellers = new ArrayList<BestSellerDto>();

	        Parser parser = Parser.createParser(bestSellerHtml, charset);
	        NodeFilter filter = new HasAttributeFilter("class", "product_list02 profix02 clearfix");
	        NodeList nodeList = parser.extractAllNodesThatMatch(filter);

	        if (nodeList == null || nodeList.size() == 0) {
	            return null;
	        }

	        Node div = nodeList.elementAt(0);
	        NodeList divchildren = div.getChildren();
	        BulletList ul = (BulletList)divchildren.elementAt(1);
	        NodeList children = ul.getChildren();

	        BestSellerDto bestSeller = null;
	        for (int i = 0; i < children.size(); i++) {
	            bestSeller = new BestSellerDto();

	            Node child = children.elementAt(i);

	            if (child instanceof Bullet) {
	                Bullet li = (Bullet) child;
	                
	                Span nameDiv = (Span) findTagByClassName(li, "pro_intro");
	                //寻找tagName是 LinkTag 的那个 
	                LinkTag link = (LinkTag)findTagByName(nameDiv,"LinkTag");               
	                bestSeller.setName(link.getLinkText());
	                bestSeller.setUrl("http://www.suning.cn"+link.getLink());

	                Span pricespan = (Span) findTagByClassName(li, "pro_price");
	                String priceteString =pricespan.getChildrenHTML().replace("<em>", "").replace("</em>", "").replace("¥", "");
	                bestSeller.setPrice(priceteString);
	                logger.info("畅销单品:" + bestSeller);
	                sellers.add(bestSeller);
	            } else {
	                continue;
	            }
	        }

	        return sellers;
	}

	@Override
	public String extractNextPageUrlPattern(String bestSellerHtml)
			throws ParserException {
		String nextPageUrl="";
		Parser parser = Parser.createParser(bestSellerHtml, "utf-8");
		NodeFilter filter = new HasAttributeFilter("type", "text/javascript");;
        NodeList children = parser.extractAllNodesThatMatch(filter);

        if (children == null || children.size() == 0) {
            System.out.println("没有值");
        }else{
        	System.out.println("有值");
        }
        for (int i = 0; i < children.size(); i++) {
            ScriptTag child = (ScriptTag) children.elementAt(i);
		if(child.findPositionOf("&currentPage=")==0){
			String putInCart1 = null;
			String putInCart2 = null;
			//Pattern pattern2 = Pattern.compile("(?<=currentPage[)] \\{)([^\\}]*?)(?=\\})");
			Pattern pattern2 = 
		    Pattern.compile("(?<=var[ \\s]{0,100}(jumpUrl)[\\s]{0,100}[=][\\s]{0,100}[\"])(.*?)(?=\"\\s{0,100}[+])");
			Pattern pattern1 = Pattern.compile("(?<=var[ \\s]{0,100}dfy\\s{0,100}=\\s{0,100}[\"])(.*?)(?=[\"][\\s]{0,100})");
			Matcher matcher1 = pattern1.matcher(child.getChildrenHTML());			
			if (matcher1.find()) {
				putInCart1 = matcher1.group(0).trim();
			}	
			Matcher matcher2 = pattern2.matcher(child.getChildrenHTML());			
			if (matcher2.find()) {
				putInCart2 = matcher2.group(0).trim();
			}
			//System.out.println(putInCart2.substring(15, putInCart2.indexOf(" + dfy")-1));
			nextPageUrl=putInCart2+putInCart1;
		}
        }
       // parser = Parser.createParser(bestSellerHtml, "utf-8");
        //得到当前页currentPage
//        String currentPage="";
//        filter = new HasAttributeFilter("class", "on");
//        children = parser.extractAllNodesThatMatch(filter);
        //LinkTag dLinkTag = (LinkTag) children.elementAt(0);
        //System.out.println(dLinkTag.getLinkText());
//        for (int i = 0; i < children.size(); i++) {
//			Node node =children.elementAt(i);
//			if (node.getChildren().size()<2) {
//				LinkTag dLinkTag = (LinkTag)node;
//				if(dLinkTag.getLink().equals("#"))
//					currentPage= dLinkTag.getLinkText();
//			}
//		}
        nextPageUrl+="&ip_sortBy=salevolumn0&sortType=4&currentPage=";// + currentPage;
        logger.info("畅销榜下一页URL模式:" + nextPageUrl);

        return nextPageUrl;
	}

	@Override
	public String getNextPageUrl(String nextPageUrlPattern, int pageNum) {
		    StringBuilder sb = new StringBuilder();
	        sb.append("http://www.suning.cn/webapp/wcs/stores/servlet/");
	        sb.append(nextPageUrlPattern+(pageNum-1));

	        String nextPageUrl = sb.toString();

	        return nextPageUrl;
	}

	@Override
	public String extractName(Map<String, String> params)
			throws ParserException {
		// TODO Auto-generated method stub
		return null;
	}

	@Override
	public String extractPrice(Map<String, String> params)
			throws ParserException {
		// TODO Auto-generated method stub
		return null;
	}

	@Override
	public String extractBrand(Map<String, String> params)
			throws ParserException {
		// TODO Auto-generated method stub
		return null;
	}

	@Override
	public String extractImageUrl(Map<String, String> params)
			throws ParserException {
		// TODO Auto-generated method stub
		return null;
	}

	@Override
	public boolean hasProduct(Map<String, String> params)
			throws ParserException {
		// TODO Auto-generated method stub
		return false;
	}

}

 此是用来抓取各大网站的目录然后对目录下的商品进行解析得到一个list。

不懂的联系QQ526151410

分享到:
评论

相关推荐

    带复选框且支持搜索功能的下拉列表1(1--14)

    标题“带复选框且支持搜索功能的下拉列表1(1--14)”提示我们,这个主题聚焦于一种特殊的下拉列表设计,它结合了复选框和搜索功能,适用于用户界面(UI)设计,特别是在网页开发中。这种组件允许用户在下拉菜单中多选...

    jQuery带分页的下拉选择框插件.zip

    当用户在输入框中输入字符时,插件会根据已有的数据集筛选出匹配的选项,并动态地展示出来,这样用户无需滚动长长的下拉列表就能找到想要的选项。 多选功能则让使用者可以同时选择多个选项,这对于需要进行复选操作...

    ExtJs 中文文档

    - **ComboBox**:下拉列表组件,常用于收集用户输入的选择。 #### 十一、Ajax 与 ComboBox - **Ext.Ajax 类**:用于处理异步请求,可以轻松地与服务器端进行数据交互。 - **文件上传**:利用 Ajax 实现文件的上传...

    轻松搞定Extjs

    - **带复选框的节点**: 介绍了如何在树节点中添加复选框功能,以便进行更精细的选择控制。 本书通过对Extjs框架各个方面的详细介绍,不仅为读者提供了一个全面的学习指南,也为实际开发工作打下了坚实的基础。无论...

    form表单标签大全

    - **单个表单元素标签**:这类标签用于创建表单中的各种元素,如文本框、复选框、下拉列表等。 #### 表单标签的通用属性 所有表单标签处理类都继承自 `UIBean` 类,这使得它们共享了一组通用属性,主要包括: 1. ...

    Wicket.pdf(Wicket开发指南)

    - **DropDownChoice:** 下拉列表。 - **ListBox:** 列表选择框。 - **SubmitLink:** 提交按钮。 #### 五、部署 Wicket 应用程序 - **部署 Wicket 1.2 程序:** - 针对 Wicket 1.2 版本的应用程序,部署过程...

    jquery模拟SELECT下拉框取值效果

    在这段脚本中,`slideDown`和`slideUp`方法分别用于显示和隐藏下拉列表。同时,通过`.val()`方法可以更新输入框的值,并通过`.text()`获取选中项的文本内容。 #### 总结 通过上述示例代码和解释,我们可以理解到...

    BootStrap智能表单demo示例详解

    级联下拉是指一个下拉列表的选择会影响另一个下拉列表的选项。这通常涉及到两个或多个下拉框的联动,可以通过监听事件和服务器通信来实现。 9. **图片上传**: Bootstrap提供了基础的文件输入组件,但图片上传...

    JQuery从头学起第三讲

    下拉列表框(select元素)的使用相对特殊一些,获取选中的下拉项的value值使用的是.val()方法,但若想获取选中项的显示文本,则需要先通过.val()获取到value值,再利用这个value值作为参数,使用JQuery的.text()或....

    Java_Web开发实战1200例第1卷.part2

    18.2 下拉列表与菜单的应用 706 18.3 单选按钮 713 18.4 复选框 716 18.5 密码域 717 18.6 表单的应用 718 第19章 表格的操作 724 19.1 应用JavaScript操作表格 725 19.2 对单元格进行控制 737 19.3 表格的特殊效果 ...

    Java_Web开发实战1200例第1卷.part3

    18.2 下拉列表与菜单的应用 706 18.3 单选按钮 713 18.4 复选框 716 18.5 密码域 717 18.6 表单的应用 718 第19章 表格的操作 724 19.1 应用JavaScript操作表格 725 19.2 对单元格进行控制 737 19.3 表格的特殊效果 ...

    Jquery 表单取值赋值的一些基本操作

    对于下拉框、单选框和多选框,我们同样可以利用 JQuery 来获取和设置它们的值。 **示例代码:** ```javascript // 获取下拉框选中项的文本 var cc1 = $(".formcselect option:selected").text(); // 获取单选按钮...

Global site tag (gtag.js) - Google Analytics