|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166 |
- <?php
- if (!defined('DEDEINC')) exit('dedebiz');
- /*
- * This file is part of Crawler Detect - the web crawler detection library.
- *
- * (c) Mark Beech <m@rkbee.ch>
- *
- * This source file is subject to the MIT license that is bundled
- * with this source code in the file LICENSE.
- */
- require_once(DEDEINC."/libraries/fixtures/crawlers.php");
- require_once(DEDEINC."/libraries/fixtures/exclusions.php");
- require_once(DEDEINC."/libraries/fixtures/headers.php");
- class CrawlerDetect
- {
- /**
- * The user agent.
- *
- * @var string|null
- */
- protected $userAgent;
- /**
- * Headers that contain a user agent.
- *
- * @var array
- */
- protected $httpHeaders = array();
- /**
- * Store regex matches.
- *
- * @var array
- */
- protected $matches = array();
- /**
- * Crawlers object.
- *
- * @var \Jaybizzle\CrawlerDetect\Fixtures\Crawlers
- */
- protected $crawlers;
- /**
- * Exclusions object.
- *
- * @var \Jaybizzle\CrawlerDetect\Fixtures\Exclusions
- */
- protected $exclusions;
- /**
- * Headers object.
- *
- * @var \Jaybizzle\CrawlerDetect\Fixtures\Headers
- */
- protected $uaHttpHeaders;
- /**
- * The compiled regex string.
- *
- * @var string
- */
- protected $compiledRegex;
- /**
- * The compiled exclusions regex string.
- *
- * @var string
- */
- protected $compiledExclusions;
- /**
- * Class constructor.
- */
- public function __construct(array $headers = null, $userAgent = null)
- {
- $this->crawlers = new Crawlers();
- $this->exclusions = new Exclusions();
- $this->uaHttpHeaders = new Headers();
- $this->compiledRegex = $this->compileRegex($this->crawlers->getAll());
- $this->compiledExclusions = $this->compileRegex($this->exclusions->getAll());
- $this->setHttpHeaders($headers);
- $this->setUserAgent($userAgent);
- }
- /**
- * Compile the regex patterns into one regex string.
- *
- * @param array
- *
- * @return string
- */
- public function compileRegex($patterns)
- {
- return '('.implode('|', $patterns).')';
- }
- /**
- * Set HTTP headers.
- *
- * @param array|null $httpHeaders
- */
- public function setHttpHeaders($httpHeaders)
- {
- //Use global _SERVER if $httpHeaders aren't defined.
- if (! is_array($httpHeaders) || ! count($httpHeaders)) {
- $httpHeaders = $_SERVER;
- }
- //Clear existing headers.
- $this->httpHeaders = array();
- //Only save HTTP headers. In PHP land, that means
- //only _SERVER vars that start with HTTP_.
- foreach ($httpHeaders as $key => $value) {
- if (strpos($key, 'HTTP_') === 0) {
- $this->httpHeaders[$key] = $value;
- }
- }
- }
- /**
- * Return user agent headers.
- *
- * @return array
- */
- public function getUaHttpHeaders()
- {
- return $this->uaHttpHeaders->getAll();
- }
- /**
- * Set the user agent.
- *
- * @param string|null $userAgent
- */
- public function setUserAgent($userAgent)
- {
- if (is_null($userAgent)) {
- foreach ($this->getUaHttpHeaders() as $altHeader) {
- if (isset($this->httpHeaders[$altHeader])) {
- $userAgent .= $this->httpHeaders[$altHeader].' ';
- }
- }
- }
-
- return $this->userAgent = $userAgent;
- }
- /**
- * Check user agent string against the regex.
- *
- * @param string|null $userAgent
- *
- * @return bool
- */
- public function isCrawler($userAgent = null)
- {
- $agent = trim(preg_replace(
- "/{$this->compiledExclusions}/i",
- '',
- $userAgent ?: $this->userAgent ?: ''
- ));
-
- if ($agent === '') {
- return false;
- }
-
- return (bool) preg_match("/{$this->compiledRegex}/i", $agent, $this->matches);
- }
- /**
- * Return the matches.
- *
- * @return string|null
- */
- public function getMatches()
- {
- return isset($this->matches[0]) ? $this->matches[0] : null;
- }
- }
- ?>
|