프리이미지
사이트 내 전체검색

php robot 감지 함수 모음

페이지 정보

작성일14-07-16 08:40

본문

preg_match('/robot|spider|crawler|curl|^$/i', $_SERVER['HTTP_USER_AGENT']));

function _bot_detected() {

  if (isset($_SERVER['HTTP_USER_AGENT']) && preg_match('/bot|crawl|slurp|spider/i', $_SERVER['HTTP_USER_AGENT'])) {
    return TRUE;
  }
  else {
    return FALSE;
  }

}

$interestingCrawlers = array( 'google', 'yahoo' );
$pattern = '/(' . implode('|', $interestingCrawlers) .')/';
$matches = array();
$numMatches = preg_match($pattern, strtolower($_SERVER['HTTP_USER_AGENT']), $matches, 'i');
if($numMatches > 0) // Found a match
{
  // $matches[1] contains an array of all text matches to either 'google' or 'yahoo'
}


<?php // IPCLOACK HOOK
if (CLOAKING_LEVEL != 4) {
    $lastupdated = date("Ymd", filemtime(FILE_BOTS));
    if ($lastupdated != date("Ymd")) {
    $lists = array(
    'http://labs.getyacg.com/spiders/google.txt',
    'http://labs.getyacg.com/spiders/inktomi.txt',
    'http://labs.getyacg.com/spiders/lycos.txt',
    'http://labs.getyacg.com/spiders/msn.txt',
    'http://labs.getyacg.com/spiders/altavista.txt',
    'http://labs.getyacg.com/spiders/askjeeves.txt',
    'http://labs.getyacg.com/spiders/wisenut.txt',
    );
    foreach($lists as $list) {
    $opt .= fetch($list);
    }
    $opt = preg_replace("/(^[\r\n]*|[\r\n]+)[\s\t]*[\r\n]+/", "\n", $opt);
    $fp =  fopen(FILE_BOTS,"w");
    fwrite($fp,$opt);
    fclose($fp);
    }
    $ip = isset($_SERVER['REMOTE_ADDR']) ? $_SERVER['REMOTE_ADDR'] : '';
    $ref = isset($_SERVER['HTTP_REFERER']) ? $_SERVER['HTTP_REFERER'] : '';
    $agent = isset($_SERVER['HTTP_USER_AGENT']) ? $_SERVER['HTTP_USER_AGENT'] : '';
    $host = strtolower(gethostbyaddr($ip));
    $file = implode(" ", file(FILE_BOTS));
    $exp = explode(".", $ip);
    $class = $exp[0].'.'.$exp[1].'.'.$exp[2].'.';
    $threshold = CLOAKING_LEVEL;
    $cloak = 0;
    if (stristr($host, "googlebot") && stristr($host, "inktomi") && stristr($host, "msn")) {
    $cloak++;
    }
    if (stristr($file, $class)) {
    $cloak++;
    }
    if (stristr($file, $agent)) {
    $cloak++;
    }
    if (strlen($ref) > 0) {
    $cloak = 0;
    }

    if ($cloak >= $threshold) {
    $cloakdirective = 1;
    } else {
    $cloakdirective = 0;
    }
}
?>


<?php
function crawlerDetect($USER_AGENT)
{
$crawlers = array(
'Google' => 'Google',
'MSN' => 'msnbot',
      'Rambler' => 'Rambler',
      'Yahoo' => 'Yahoo',
      'AbachoBOT' => 'AbachoBOT',
      'accoona' => 'Accoona',
      'AcoiRobot' => 'AcoiRobot',
      'ASPSeek' => 'ASPSeek',
      'CrocCrawler' => 'CrocCrawler',
      'Dumbot' => 'Dumbot',
      'FAST-WebCrawler' => 'FAST-WebCrawler',
      'GeonaBot' => 'GeonaBot',
      'Gigabot' => 'Gigabot',
      'Lycos spider' => 'Lycos',
      'MSRBOT' => 'MSRBOT',
      'Altavista robot' => 'Scooter',
      'AltaVista robot' => 'Altavista',
      'ID-Search Bot' => 'IDBot',
      'eStyle Bot' => 'eStyle',
      'Scrubby robot' => 'Scrubby',
      'Facebook' => 'facebookexternalhit',
  );
  // to get crawlers string used in function uncomment it
  // it is better to save it in string than use implode every time
  // global $crawlers
   $crawlers_agents = implode('|',$crawlers);
  if (strpos($crawlers_agents, $USER_AGENT) === false)
      return false;
    else {
    return TRUE;
    }
}
?>

<?php $USER_AGENT = $_SERVER['HTTP_USER_AGENT'];
  if(crawlerDetect($USER_AGENT)) return "no need to lang redirection";?>




function _bot_detected() {

  if (isset($_SERVER['HTTP_USER_AGENT']) && preg_match('/bot|crawl|slurp|spider/i', $_SERVER['HTTP_USER_AGENT'])) {
    return TRUE;
  }
  else {
    return FALSE;
  }

}


interestingCrawlers = array( 'google', 'yahoo' );
$pattern = '/(' . implode('|', $interestingCrawlers) .')/';
$matches = array();
$numMatches = preg_match($pattern, strtolower($_SERVER['HTTP_USER_AGENT']), $matches, 'i');
if($numMatches > 0) // Found a match
{
  // $matches[1] contains an array of all text matches to either 'google' or 'yahoo'
}
추천 0
게시물 검색