Skip to content

Commit

Permalink
Improved CS and architecture here and there
Browse files Browse the repository at this point in the history
  • Loading branch information
Toflar committed Oct 22, 2019
1 parent 5753cb1 commit 82f734f
Show file tree
Hide file tree
Showing 19 changed files with 214 additions and 117 deletions.
27 changes: 19 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -263,19 +263,30 @@ $escargot->addSubscriber(new HtmlCrawlerSubscriber());

There are different configurations you can apply to the `Escargot` instance:

* `Escargot::setMaxRequests(int $maxRequests)`
* `Escargot::withMaxRequests(int $maxRequests)`

Lets you allow the maximum total requests that are going to be executed. It can be useful if you have limited
resources and only want to execute e.g. `100` requests in this run and continue later on.
Returns a clone of the `Escargot` instance with a maximum total requests that are going to be executed. It can be
useful if you have limited resources and only want to execute e.g. `100` requests in this run and continue later on.

* `Escargot::withUserAgent(string $userAgent)`

Returns a clone of the `Escargot` instance with a different `User-Agent` header. The header is sent with all the
requests and by default configured to `terminal42/escargot`.

* `Escargot::setConcurrency(int $concurrency)`
* `Escargot::withConcurrency(int $concurrency)`

Returns a clone of the `Escargot` instance with a maximum concurrent requests that are going to be sent at a time.
By default, this is configured to `10`.

* `Escargot::withRequestDelay(int $requestDelay)`

Lets you configure the maximum concurrent requests that are being sent. By default, this is configured to `10`.
Returns a clone of the `Escargot` instance with an added delay between requests in microseconds. By default, there's
no extra delay. It can be useful to make sure `Escargot` does not run into some (D)DOS protection or similar issues.

* `Escargot::setRequestDelay(int $requestDelay)`
* `Escargot::withEventDispatcher(EventDispatcherInterface $eventDispatcher)`

Lets you configure the delay between requests in microseconds. By default, it's `0` so there's no extra
delay. It can be useful to make sure `Escargot` does not run into some (D)DOS protection or similar issues.
Returns a clone of the `Escargot` instance with your custom implementation of the `EventDispatcherInterface` in case
you don't want to use the default `EventDispatcher`.

* `Escargot::setLogger(LoggerInterface $logger)`

Expand Down
1 change: 1 addition & 0 deletions composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
"require-dev": {
"doctrine/dbal": "^2.9",
"friendsofphp/php-cs-fixer": "^2.15",
"nunomaduro/phpinsights": "^1.9",
"symfony/finder": "^4.3",
"symfony/phpunit-bridge": "^4.3"
},
Expand Down
63 changes: 63 additions & 0 deletions phpinsights.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
<?php

declare(strict_types=1);

use PHP_CodeSniffer\Standards\Generic\Sniffs\Formatting\SpaceAfterNotSniff;
use SlevomatCodingStandard\Sniffs\Classes\SuperfluousAbstractClassNamingSniff;
use SlevomatCodingStandard\Sniffs\Classes\SuperfluousExceptionNamingSniff;
use SlevomatCodingStandard\Sniffs\Classes\SuperfluousInterfaceNamingSniff;
use SlevomatCodingStandard\Sniffs\ControlStructures\DisallowYodaComparisonSniff;

return [

/*
|--------------------------------------------------------------------------
| Default Preset
|--------------------------------------------------------------------------
|
| This option controls the default preset that will be used by PHP Insights
| to make your code reliable, simple, and clean. However, you can always
| adjust the `Metrics` and `Insights` below in this configuration file.
|
| Supported: "default", "laravel", "symfony", "magento2", "drupal"
|
*/

'preset' => 'symfony',

/*
|--------------------------------------------------------------------------
| Configuration
|--------------------------------------------------------------------------
|
| Here you may adjust all the various `Insights` that will be used by PHP
| Insights. You can either add, remove or configure `Insights`. Keep in
| mind, that all added `Insights` must belong to a specific `Metric`.
|
*/

'exclude' => [
// 'path/to/directory-or-file'
],

'add' => [
// ExampleMetric::class => [
// ExampleInsight::class,
// ]
],

'remove' => [
DisallowYodaComparisonSniff::class,
SuperfluousAbstractClassNamingSniff::class,
SuperfluousExceptionNamingSniff::class,
SuperfluousInterfaceNamingSniff::class,
SpaceAfterNotSniff::class
],

'config' => [
// ExampleInsight::class => [
// 'key' => 'value',
// ],
],

];
12 changes: 7 additions & 5 deletions src/BaseUriCollection.php
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,15 @@

use Psr\Http\Message\UriInterface;

class BaseUriCollection implements \IteratorAggregate, \Countable
final class BaseUriCollection implements \IteratorAggregate, \Countable
{
/**
* @var UriInterface[]
* @var array<UriInterface>
*/
private $baseUris = [];

/**
* @param UriInterface[] $baseUris
* @param array<UriInterface> $baseUris
*/
public function __construct(array $baseUris = [])
{
Expand All @@ -31,10 +31,12 @@ public function __construct(array $baseUris = [])
}
}

public function add(UriInterface $baseUri)
public function add(UriInterface $baseUri): self
{
$baseUri = CrawlUri::normalizeUri($baseUri);
$this->baseUris[(string) $baseUri] = $baseUri;

return $this;
}

public function contains(UriInterface $baseUri): bool
Expand Down Expand Up @@ -71,7 +73,7 @@ public function mergeWith(self $collection): self
}

/**
* @return UriInterface[]
* @return array<UriInterface>
*/
public function all(): array
{
Expand Down
12 changes: 6 additions & 6 deletions src/CrawlUri.php
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

use Psr\Http\Message\UriInterface;

class CrawlUri
final class CrawlUri
{
/**
* @var UriInterface
Expand All @@ -41,24 +41,24 @@ class CrawlUri
*/
private $foundOn = null;

public function __construct(UriInterface $uri, int $level, bool $processed = false, UriInterface $foundOn = null)
public function __construct(UriInterface $uri, int $level, bool $processed = false, ?UriInterface $foundOn = null)
{
$this->uri = static::normalizeUri($uri);
$this->uri = self::normalizeUri($uri);
$this->level = $level;
$this->processed = $processed;

if (null !== $foundOn) {
$this->foundOn = static::normalizeUri($foundOn);
$this->foundOn = self::normalizeUri($foundOn);
}
}

public function __toString()
public function __toString(): string
{
return sprintf('URI: %s (Level: %d, Processed: %s, Found on: %s)',
(string) $this->getUri(),
$this->getLevel(),
$this->isProcessed() ? 'yes' : 'no',
(string) $this->getFoundOn() ?: 'root'
(string) ($this->getFoundOn() ? $this->getFoundOn() : 'root')
);
}

Expand Down
95 changes: 55 additions & 40 deletions src/Escargot.php
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,13 @@
use Symfony\Component\EventDispatcher\EventDispatcherInterface;
use Symfony\Component\EventDispatcher\EventSubscriberInterface;
use Symfony\Component\HttpClient\HttpClient;
use Symfony\Contracts\HttpClient\ChunkInterface;
use Symfony\Contracts\HttpClient\Exception\ClientExceptionInterface;
use Symfony\Contracts\HttpClient\Exception\RedirectionExceptionInterface;
use Symfony\Contracts\HttpClient\Exception\ServerExceptionInterface;
use Symfony\Contracts\HttpClient\Exception\TransportExceptionInterface;
use Symfony\Contracts\HttpClient\HttpClientInterface;
use Symfony\Contracts\HttpClient\ResponseInterface;
use Terminal42\Escargot\Event\FinishedCrawlingEvent;
use Terminal42\Escargot\Event\PreRequestEvent;
use Terminal42\Escargot\Event\RequestExceptionEvent;
Expand Down Expand Up @@ -105,21 +107,22 @@ final class Escargot implements LoggerAwareInterface
*/
private $runningRequests = 0;

private function __construct(QueueInterface $queue, string $jobId, BaseUriCollection $baseUris, HttpClientInterface $client = null)
private function __construct(QueueInterface $queue, string $jobId, BaseUriCollection $baseUris, ?HttpClientInterface $client = null)
{
$this->client = $client;
$this->queue = $queue;
$this->jobId = $jobId;
$this->baseUris = $baseUris;

$this->setUserAgent(self::DEFAULT_USER_AGENT);
$this->userAgent = self::DEFAULT_USER_AGENT;
}

public function setEventDispatcher(EventDispatcherInterface $eventDispatcher): self
public function withEventDispatcher(EventDispatcherInterface $eventDispatcher): self
{
$this->eventDispatcher = $eventDispatcher;
$new = clone $this;
$new->eventDispatcher = $eventDispatcher;

return $this;
return $new;
}

public function getEventDispatcher(): EventDispatcherInterface
Expand All @@ -136,33 +139,41 @@ public function getUserAgent(): string
return $this->userAgent;
}

public function setUserAgent(string $userAgent): self
public function withUserAgent(string $userAgent): self
{
$this->userAgent = $userAgent;
$new = clone $this;
$new->userAgent = $userAgent;

return $this;
return $new;
}

public function setMaxRequests(int $maxRequests): void
public function withMaxRequests(int $maxRequests): self
{
$this->maxRequests = $maxRequests;
$new = clone $this;
$new->maxRequests = $maxRequests;

return $new;
}

public function setConcurrency(int $concurrency): void
public function withConcurrency(int $concurrency): self
{
$this->concurrency = $concurrency;
$new = clone $this;
$new->concurrency = $concurrency;

return $new;
}

public function getRequestDelay(): int
{
return $this->requestDelay;
}

public function setRequestDelay(int $requestDelay): self
public function withRequestDelay(int $requestDelay): self
{
$this->requestDelay = $requestDelay;
$new = clone $this;
$new->requestDelay = $requestDelay;

return $this;
return $new;
}

public function addSubscriber(EventSubscriberInterface $subscriber): self
Expand Down Expand Up @@ -216,10 +227,7 @@ public function getRequestsSent(): int
return $this->requestsSent;
}

/**
* @throws InvalidJobIdException if the provided job ID could not be retrieved by the queue
*/
public static function createFromJobId(string $jobId, QueueInterface $queue, HttpClientInterface $client = null): self
public static function createFromJobId(string $jobId, QueueInterface $queue, ?HttpClientInterface $client = null): self
{
if (!$queue->isJobIdValid($jobId)) {
throw new InvalidJobIdException(sprintf('Job ID "%s" is invalid!', $jobId));
Expand All @@ -233,7 +241,7 @@ public static function createFromJobId(string $jobId, QueueInterface $queue, Htt
);
}

public static function create(BaseUriCollection $baseUris, QueueInterface $queue, HttpClientInterface $client = null): self
public static function create(BaseUriCollection $baseUris, QueueInterface $queue, ?HttpClientInterface $client = null): self
{
if (0 === \count($baseUris)) {
throw new InvalidJobIdException('Cannot create an Escargot instance with an empty BaseUriCollection!');
Expand Down Expand Up @@ -286,6 +294,8 @@ public function addUriToQueue(UriInterface $uri, CrawlUri $foundOn, bool $proces

/**
* Logs a message to the logger if one was provided.
*
* @param array<string,array|string|int> $context
*/
public function log(string $level, string $message, array $context = []): void
{
Expand All @@ -296,33 +306,38 @@ public function log(string $level, string $message, array $context = []): void
$this->logger->log($level, $message, $context);
}

/**
* @param array<ResponseInterface> $responses
*/
private function processResponses(array $responses): void
{
foreach ($this->getClient()->stream($responses) as $response => $chunk) {
try {
// Dispatch event
$event = new ResponseEvent($this, $response, $chunk);
$this->getEventDispatcher()->dispatch($event);

// Response was canceled by listener
if ($event->responseWasCanceled()) {
--$this->runningRequests;
continue;
}

if ($chunk->isLast()) {
--$this->runningRequests;
}
} catch (TransportExceptionInterface | RedirectionExceptionInterface | ClientExceptionInterface | ServerExceptionInterface $e) {
--$this->runningRequests;
$this->getEventDispatcher()->dispatch(new RequestExceptionEvent($this, $e, $response));
}
$this->processResponseChunk($response, $chunk);
}

// Continue crawling
$this->crawl();
}

private function processResponseChunk(ResponseInterface $response, ChunkInterface $chunk): void
{
try {
// Dispatch event
$event = new ResponseEvent($this, $response, $chunk);
$this->getEventDispatcher()->dispatch($event);

if ($event->responseWasCanceled() || $chunk->isLast()) {
--$this->runningRequests;
}
} catch (TransportExceptionInterface | RedirectionExceptionInterface | ClientExceptionInterface | ServerExceptionInterface $exception) {
--$this->runningRequests;
$this->getEventDispatcher()->dispatch(new RequestExceptionEvent($this, $exception, $response));
}
}

/**
* @return array<ResponseInterface>
*/
private function prepareResponses(): array
{
$responses = [];
Expand Down Expand Up @@ -360,10 +375,10 @@ private function prepareResponses(): array
]);
++$this->runningRequests;
++$this->requestsSent;
} catch (TransportExceptionInterface $e) {
} catch (TransportExceptionInterface $exception) {
--$this->runningRequests;

$this->getEventDispatcher()->dispatch(new RequestExceptionEvent($this, $e));
$this->getEventDispatcher()->dispatch(new RequestExceptionEvent($this, $exception));
}
}

Expand Down
2 changes: 1 addition & 1 deletion src/Event/FinishedCrawlingEvent.php
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,6 @@

namespace Terminal42\Escargot\Event;

class FinishedCrawlingEvent extends AbstractEscargotEvent
final class FinishedCrawlingEvent extends AbstractEscargotEvent
{
}
Loading

0 comments on commit 82f734f

Please sign in to comment.