Skip to content
This repository has been archived by the owner on Jul 6, 2020. It is now read-only.

Commit

Permalink
udpate picofeed
Browse files Browse the repository at this point in the history
  • Loading branch information
Bernhard Posselt committed Dec 22, 2014
1 parent d2d16c4 commit 5697f7c
Show file tree
Hide file tree
Showing 36 changed files with 902 additions and 114 deletions.
9 changes: 5 additions & 4 deletions composer.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion vendor/autoload.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@

require_once __DIR__ . '/composer' . '/autoload_real.php';

return ComposerAutoloaderInitb70f37963a41b6db289ef240676024ef::getLoader();
return ComposerAutoloaderInit473bffa75e8c08e86770574b2fe57877::getLoader();
10 changes: 5 additions & 5 deletions vendor/composer/autoload_real.php
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

// autoload_real.php @generated by Composer

class ComposerAutoloaderInitb70f37963a41b6db289ef240676024ef
class ComposerAutoloaderInit473bffa75e8c08e86770574b2fe57877
{
private static $loader;

Expand All @@ -19,9 +19,9 @@ public static function getLoader()
return self::$loader;
}

spl_autoload_register(array('ComposerAutoloaderInitb70f37963a41b6db289ef240676024ef', 'loadClassLoader'), true, true);
spl_autoload_register(array('ComposerAutoloaderInit473bffa75e8c08e86770574b2fe57877', 'loadClassLoader'), true, true);
self::$loader = $loader = new \Composer\Autoload\ClassLoader();
spl_autoload_unregister(array('ComposerAutoloaderInitb70f37963a41b6db289ef240676024ef', 'loadClassLoader'));
spl_autoload_unregister(array('ComposerAutoloaderInit473bffa75e8c08e86770574b2fe57877', 'loadClassLoader'));

$includePaths = require __DIR__ . '/include_paths.php';
array_push($includePaths, get_include_path());
Expand All @@ -46,14 +46,14 @@ public static function getLoader()

$includeFiles = require __DIR__ . '/autoload_files.php';
foreach ($includeFiles as $file) {
composerRequireb70f37963a41b6db289ef240676024ef($file);
composerRequire473bffa75e8c08e86770574b2fe57877($file);
}

return $loader;
}
}

function composerRequireb70f37963a41b6db289ef240676024ef($file)
function composerRequire473bffa75e8c08e86770574b2fe57877($file)
{
require $file;
}
8 changes: 4 additions & 4 deletions vendor/composer/installed.json
Original file line number Diff line number Diff line change
Expand Up @@ -119,18 +119,18 @@
"source": {
"type": "git",
"url": "https://github.com/fguillot/picoFeed.git",
"reference": "6485f32d62698be73c3f0456bb87d960fcae1586"
"reference": "11589851f91cc3f04c84ba873484486d1457e638"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/fguillot/picoFeed/zipball/6485f32d62698be73c3f0456bb87d960fcae1586",
"reference": "6485f32d62698be73c3f0456bb87d960fcae1586",
"url": "https://api.github.com/repos/fguillot/picoFeed/zipball/11589851f91cc3f04c84ba873484486d1457e638",
"reference": "11589851f91cc3f04c84ba873484486d1457e638",
"shasum": ""
},
"require": {
"php": ">=5.3.0"
},
"time": "2014-12-16 23:53:59",
"time": "2014-12-22 03:23:04",
"type": "library",
"installation-source": "dist",
"autoload": {
Expand Down
1 change: 1 addition & 0 deletions vendor/fguillot/picofeed/README.markdown
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ Authors
- Major Contributors:
- [Bernhard Posselt](https://github.com/Raydiation)
- [David Pennington](https://github.com/Xeoncross)
- [Mathias Kresin](https://github.com/mkresin)

Real world usage
----------------
Expand Down
1 change: 1 addition & 0 deletions vendor/fguillot/picofeed/docs/feed-parsing.markdown
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ Feed::language = en-US
Feed::description =
Feed::logo =
Feed::items = 15 items
Feed::isRTL() = false
----
Item::id = 38d8f48284fb03940cbb3aff9101089b81e44efb1281641bdd7c3e7e4bf3b0cd
Item::title = openSUSE 13.2 : nouvelle version du caméléon disponible !
Expand Down
28 changes: 26 additions & 2 deletions vendor/fguillot/picofeed/docs/grabber.markdown
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,36 @@ How the content grabber works?

**The best results are obtained with XPath rules file.**

How to use the content scraper?
-------------------------------
Standalone usage
----------------

```php
<?php

use PicoFeed\Client\Grabber;

$grabber = new Grabber($item_url);
$grabber->download();
$grabber->parse();

// Get raw HTML content
echo $grabber->getRawContent();

// Get relevant content
echo $grabber->getContent();

// Get filtered relevant content
echo $grabber->getFilteredContent();
```

Fetch full item contents during feed parsing
--------------------------------------------

Before parsing all items, just call the method `$parser->enableContentGrabber()`:

```php
<?php

use PicoFeed\Reader\Reader;
use PicoFeed\PicoFeedException;

Expand Down
50 changes: 33 additions & 17 deletions vendor/fguillot/picofeed/lib/PicoFeed/Client/Client.php
Original file line number Diff line number Diff line change
Expand Up @@ -199,16 +199,9 @@ public function handleNotModifiedResponse(array $response)
$this->is_modified = false;
}
else if ($response['status'] == 200) {

$etag = $this->getHeader($response, 'ETag');
$last_modified = $this->getHeader($response, 'Last-Modified');

if ($this->isPropertyEquals('etag', $etag) || $this->isPropertyEquals('last_modified', $last_modified)) {
$this->is_modified = false;
}

$this->etag = $etag;
$this->last_modified = $last_modified;
$this->is_modified = $this->hasBeenModified($response, $this->etag, $this->last_modified);
$this->etag = $this->getHeader($response, 'ETag');
$this->last_modified = $this->getHeader($response, 'Last-Modified');
}

if ($this->is_modified === false) {
Expand Down Expand Up @@ -245,16 +238,39 @@ public function handleNormalResponse(array $response)
}

/**
* Check if a class property equals to a value
* Check if a request has been modified according to the parameters
*
* @access public
* @param string $property Class property
* @param string $value Value
* @param array $response
* @param string $etag
* @param string $lastModified
* @return boolean
*/
private function isPropertyEquals($property, $value)
private function hasBeenModified($response, $etag, $lastModified)
{
return $this->$property && $this->$property === $value;
$headers = array(
'Etag' => $etag,
'Last-Modified' => $lastModified
);

// Compare the values for each header that is present
$presentCacheHeaderCount = 0;
foreach ($headers as $key => $value) {
if (isset($response['headers'][$key])) {
if ($response['headers'][$key] !== $value) {
return true;
}
$presentCacheHeaderCount++;
}
}

// If at least one header is present and the values match, the response
// was not modified
if ($presentCacheHeaderCount > 0) {
return false;
}

return true;
}

/**
Expand Down Expand Up @@ -324,7 +340,7 @@ public function parseHeaders(array $lines)
Logger::setMessage(get_called_class().' HTTP header: '.$name.' => '.$value);
}

return array($status, $headers);
return array($status, new HttpHeaders($headers));
}

/**
Expand Down Expand Up @@ -552,7 +568,7 @@ public function setProxyPassword($password)
*
* @access public
* @param \PicoFeed\Config\Config $config Config instance
* @return \PicoFeed\Config\Config
* @return \PicoFeed\Client\Client
*/
public function setConfig($config)
{
Expand Down
9 changes: 6 additions & 3 deletions vendor/fguillot/picofeed/lib/PicoFeed/Client/Curl.php
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ public function readHeaders($ch, $buffer)
* Prepare HTTP headers
*
* @access private
* @return array
* @return string[]
*/
private function prepareHeaders()
{
Expand All @@ -123,7 +123,7 @@ private function prepareHeaders()
* Prepare curl proxy context
*
* @access private
* @return resource
* @return resource $ch
*/
private function prepareProxyContext($ch)
{
Expand Down Expand Up @@ -199,6 +199,9 @@ private function executeContext()
$this->handleError($curl_errno);
}

// Update the url if there where redirects
$this->url = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL);

curl_close($ch);
}

Expand All @@ -215,7 +218,7 @@ public function doRequest($follow_location = true)

list($status, $headers) = $this->parseHeaders(explode("\r\n", $this->headers[$this->headers_counter - 1]));

// When resticted with open_basedir
// When restricted with open_basedir
if ($this->needToHandleRedirection($follow_location, $status)) {
return $this->handleRedirection($headers['Location']);
}
Expand Down
19 changes: 16 additions & 3 deletions vendor/fguillot/picofeed/lib/PicoFeed/Client/Grabber.php
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
namespace PicoFeed\Client;

use DOMXPath;

use PicoFeed\Encoding\Encoding;
use PicoFeed\Logging\Logger;
use PicoFeed\Filter\Filter;
Expand Down Expand Up @@ -148,7 +147,7 @@ public function __construct($url, $html = '', $encoding = 'utf-8')
*
* @access public
* @param \PicoFeed\Config\Config $config Config instance
* @return \PicoFeed\Grabber
* @return Grabber
*/
public function setConfig($config)
{
Expand Down Expand Up @@ -178,6 +177,19 @@ public function getRawContent()
return $this->html;
}

/**
* Get filtered relevant content
*
* @access public
* @return string
*/
public function getFilteredContent()
{
$filter = Filter::html($this->content, $this->url);
$filter->setConfig($this->config);
return $filter->execute();
}

/**
* Parse the HTML content
*
Expand All @@ -191,8 +203,8 @@ public function parse()
Logger::setMessage(get_called_class().' Fix encoding');
Logger::setMessage(get_called_class().': HTTP Encoding "'.$this->encoding.'"');

$this->html = Filter::stripHeadTags($this->html);
$this->html = Encoding::convert($this->html, $this->encoding);
$this->html = Filter::stripHeadTags($this->html);

Logger::setMessage(get_called_class().' Content length: '.strlen($this->html).' bytes');
$rules = $this->getRules();
Expand Down Expand Up @@ -228,6 +240,7 @@ public function download()
$client->setConfig($this->config);
$client->execute($this->url);

$this->url = $client->getUrl();
$this->html = $client->getContent();
$this->encoding = $client->getEncoding();

Expand Down
43 changes: 43 additions & 0 deletions vendor/fguillot/picofeed/lib/PicoFeed/Client/HttpHeaders.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
<?php

namespace PicoFeed\Client;

use ArrayAccess;

/**
* Class to handle http headers case insensitivity
*
* @author Bernhard Posselt
* @package Client
*/
class HttpHeaders implements ArrayAccess
{
private $headers = array();

public function __construct(array $headers)
{
foreach ($headers as $key => $value) {
$this->headers[strtolower($key)] = $value;
}
}

public function offsetGet($offset)
{
return $this->headers[strtolower($offset)];
}

public function offsetSet($offset, $value)
{
$this->headers[strtolower($offset)] = $value;
}

public function offsetExists($offset)
{
return isset($this->headers[strtolower($offset)]);
}

public function offsetUnset($offset)
{
unset($this->headers[strtolower($offset)]);
}
}
8 changes: 4 additions & 4 deletions vendor/fguillot/picofeed/lib/PicoFeed/Client/Stream.php
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ class Stream extends Client
* Prepare HTTP headers
*
* @access private
* @return array
* @return string[]
*/
private function prepareHeaders()
{
Expand Down Expand Up @@ -128,11 +128,11 @@ public function doRequest()
* Decode body response according to the HTTP headers
*
* @access public
* @param string $body Raw body
* @param array $headers HTTP headers
* @param string $body Raw body
* @param HttpHeaders $headers HTTP headers
* @return string
*/
public function decodeBody($body, array $headers)
public function decodeBody($body, HttpHeaders $headers)
{
if (isset($headers['Transfer-Encoding']) && $headers['Transfer-Encoding'] === 'chunked') {
$body = $this->decodeChunked($body);
Expand Down
Loading

0 comments on commit 5697f7c

Please sign in to comment.