`

浏览器指纹等一些反爬手段的调研

阅读更多
很多时候我们做反爬虫都会建立ip黑名单,防止爬虫抓取信息,但是道高一尺魔高一丈;爬虫会使用代理ip跟拨号服务器去抓取内容;这是我们就会使用前端js生成类似浏览器指纹+代码混淆,来判断爬虫;除了浏览器指纹;

啥是浏览器指纹?就是说不同电脑的浏览器生成的指纹数值是不一致的;常用的指纹有Canvas指纹;webgl指纹;硬件指纹;AudioContext指纹;
类似:canvas指纹:https://browserleaks.com/canvas


以下是 canvas指纹算法实现:
function hashstr(s){
var hash = 0;
if (s.length == 0) return hash;
for (i = 0; i < s.length; i++) {
  char = s.charCodeAt(i);
  hash = ((hash<<5)-hash)+char;
  hash = hash & hash; // Convert to 32bit integer
}
return hash;
}

// 使用canvas绘图,并返回图片的Base64码对应的hash值
// 摘自http://t.cn/AiFHoZGV
function getCanvasFp() {
    var result = "";
    // Very simple now, need to make it more complex (geo shapes etc)
    var canvas = document.createElement('canvas');
    canvas.width = 2000;
    canvas.height = 200;
    canvas.style.display = 'inline';
    var ctx = canvas.getContext('2d');

    // detect browser support of canvas winding
    // http://t.cn/R7wzrRy
    // http://t.cn/AiFHoZG5
//    console.info(ctx.rect)
    ctx.rect(0, 0, 10, 10);
    ctx.rect(2, 2, 6, 6);
    console.info(ctx.isPointInPath(5, 5, 'evenodd'))
    result += 'canvas winding:' + ((ctx.isPointInPath(5, 5, 'evenodd') === false) ? 'yes' : 'no');

    ctx.textBaseline = 'alphabetic';
    ctx.fillStyle = '#f60';
    ctx.fillRect(125, 1, 62, 20);
    ctx.fillStyle = '#069';
    // http://t.cn/AiFHoZGx
    ctx.font = '11pt no-real-font-123';

    ctx.fillText('Cwm fjordbank glyphs vext quiz, \ud83d\ude03', 2, 15);
    ctx.fillStyle = 'rgba(111, 204, 0, 0.2)';
    ctx.font = '18pt Arial';
    ctx.fillText('Cwm fjordbank glyphs vext quiz, \ud83d\ude03', 4, 45);

    // canvas blending
    // http://t.cn/AiFHoZGt
    // http://t.cn/AiFHoZGM
    ctx.globalCompositeOperation = 'multiply';
    ctx.fillStyle = 'rgb(255,0,255)';
    ctx.beginPath();
    ctx.arc(50, 50, 50, 0, Math.PI * 2, true);
    ctx.closePath();
    ctx.fill();
    ctx.fillStyle = 'rgb(0,255,255)';
    ctx.beginPath();
    ctx.arc(100, 50, 50, 0, Math.PI * 2, true);
    ctx.closePath();
    ctx.fill();
    ctx.fillStyle = 'rgb(255,255,0)';
    ctx.beginPath();
    ctx.arc(75, 100, 50, 0, Math.PI * 2, true);
    ctx.closePath();
    ctx.fill();
    ctx.fillStyle = 'rgb(255,0,255)';
    // canvas winding
    // http://t.cn/R7wzrRy
    // http://t.cn/AiFHoZGf
    ctx.arc(75, 75, 75, 0, Math.PI * 2, true);
    ctx.arc(75, 75, 25, 0, Math.PI * 2, true);
    ctx.fill('evenodd');

    if (canvas.toDataURL) {
        result += ';canvas fp:' + canvas.toDataURL();
    }
//  console.info(canvas.toDataURL)
//    console.info(result)
    return hashstr(result);
  }

console.info(getCanvasFp())



除了指纹,还有webrtc;显卡信息;电池信息;cpu;内存;font校验;plugin;字符串正则正则校验;digest前后端签名校验;等等都可以作为前端js反爬或者反刷的手段;

字体校验(百度商桥其中一项验证算法)
var Detector = function() {
	    // a font will be compared against all the three default fonts.
	    // and if it doesn't match all 3 then that font is not available.
	    var baseFonts = ['monospace', 'sans-serif', 'serif'];
	    //we use m or w because these two characters take up the maximum width.
	    // And we use a LLi so that the same matching fonts can get separated
	    var testString = "haoroomstestfonts";
	    //we test using 72px font size, we may use any size. I guess larger the better.
	    var testSize = '72px';
	    var h = document.getElementsByTagName("body")[0];

	    // create a SPAN in the document to get the width of the text we use to test
	    var s = document.createElement("span");
	    s.style.fontSize = testSize;
	    s.innerHTML = testString;
	    var defaultWidth = {};
	    var defaultHeight = {};
	    for (var index in baseFonts) {
	        //get the default width for the three base fonts
	        s.style.fontFamily = baseFonts[index];
	        h.appendChild(s);
	        defaultWidth[baseFonts[index]] = s.offsetWidth; //width for the default font
	        defaultHeight[baseFonts[index]] = s.offsetHeight; //height for the defualt font
	        h.removeChild(s);
	    }

	    function detect(font) {
	        var detected = false;
	        for (var index in baseFonts) {
	            s.style.fontFamily = font + ',' + baseFonts[index]; // name of the font along with the base font for fallback.
	            h.appendChild(s);
	            var matched = (s.offsetWidth != defaultWidth[baseFonts[index]] || s.offsetHeight != defaultHeight[baseFonts[index]]);
	            h.removeChild(s);
	            detected = detected || matched;
	        }
	        return detected;
	    }

	    this.detect = detect;
	};
	
	//获取系统所有字体
	var getFonts = function() {
		console.info(11111111111)
		  var fontArray =  ['Samsung Telugu', 'VivoFont', 'ComingSoon', 'SmartZawgyi', 'HTCHand', 'hifont', 'Chococooky', 'slim', 'LockClock', 'FangZhengLTH', 'ifont', 'Cour', 'whatsfont', 'lthjt', 'Amazon Thin', 'iphoneLockClock', 'UIFont', 'mylikefonts', 'Clockopia', 'AndroidClock', 'UDMincho', 'Rosemary', 'fzmwfull', 'Verdana', 'Times', 'ArialMT', 'Zapfino', 'AppleGothic'];//所有字体列表枚举
		  var fontString = "";
		  var _ = new Detector();
		  for (var i = 0; i < fontArray.length; i++) {
			 _.detect(fontArray[i])&&(fontString +=fontArray[i]+ ", ")
		  }
		 
		  return fontString.length>0&&(fontString = fontString["substr"](0,fontString.length-2))
		  ,fontString;
	}
	
	//判断是否存在某个字体
	isFont = function(font) {
	      return new Detector().detect(font);
	 }
	
	console.info(getFonts())




分享到:
评论

相关推荐

Global site tag (gtag.js) - Google Analytics