Merge branch 'feature/parse-webmentions-command' into develop

This commit is contained in:
Jonny Barnes 2016-09-17 21:14:56 +01:00
commit 9f76de5c42
8 changed files with 269 additions and 50 deletions

View file

@ -0,0 +1,70 @@
<?php
namespace App\Console\Commands;
use App\WebMention;
use Illuminate\Console\Command;
use Illuminate\FileSystem\FileSystem;
class ParseCachedWebMentions extends Command
{
/**
* The name and signature of the console command.
*
* @var string
*/
protected $signature = 'webmentions:parsecached';
/**
* The console command description.
*
* @var string
*/
protected $description = 'Re-parse the webmentions cached HTML';
/**
* Create a new command instance.
*
* @return void
*/
public function __construct()
{
parent::__construct();
}
/**
* Execute the console command.
*
* @return mixed
*/
public function handle(FileSystem $filesystem)
{
$HTMLfiles = $filesystem->allFiles(storage_path() . '/HTML');
foreach($HTMLfiles as $file) {
$filepath = $file->getPathname();
$html = $filesystem->get($filepath);
$url = $this->URLFromFilename($filepath);
$microformats = \Mf2\parse($html, $url);
$webmention = WebMention::where('source', $url)->firstOrFail();
$webmention->mf2 = json_encode($microformats);
$webmention->save();
}
}
/**
* Determine the source URL from a filename.
*
* @param string
* @return string
*/
private function URLFromFilename($filepath)
{
$dir = mb_substr($filepath, mb_strlen(storage_path() . '/HTML/'));
$url = str_replace(['http/', 'https/'], ['http://', 'https://'], $dir);
if (mb_substr($url, -1) == 'index.html') {
$url = mb_substr($url, 0, mb_strlen($url) - 10);
}
return $url;
}
}

View file

@ -0,0 +1,47 @@
<?php
namespace App\Console\Commands;
use App\WebMention;
use Illuminate\Console\Command;
use App\Jobs\DownloadWebMention;
class ReDownloadWebMentions extends Command
{
/**
* The name and signature of the console command.
*
* @var string
*/
protected $signature = 'webmentions:redownload';
/**
* The console command description.
*
* @var string
*/
protected $description = 'Redownload the HTML content of webmentions';
/**
* Create a new command instance.
*
* @return void
*/
public function __construct()
{
parent::__construct();
}
/**
* Execute the console command.
*
* @return mixed
*/
public function handle()
{
$webmentions = WebMention::all();
foreach ($webmentions as $webmention) {
$this->dispatch(new DownloadWebMention($webmention->source));
}
}
}

View file

@ -14,6 +14,8 @@ class Kernel extends ConsoleKernel
*/
protected $commands = [
Commands\SecurityCheck::class,
Commands\ParseCachedWebMentions::class,
Commands\ReDownloadWebMentions::class,
];
/**

View file

@ -0,0 +1,67 @@
<?php
namespace App\Jobs;
use GuzzleHttp\Client;
use Illuminate\Bus\Queueable;
use Illuminate\Queue\SerializesModels;
use Illuminate\Queue\InteractsWithQueue;
use Illuminate\Contracts\Queue\ShouldQueue;
class DownloadWebMention implements ShouldQueue
{
use InteractsWithQueue, Queueable, SerializesModels;
/**
* The webmention source URL.
*
* @var
*/
protected $source;
/**
* Create a new job instance.
*
* @return void
*/
public function __construct(string $source)
{
$this->source = $source;
}
/**
* Execute the job.
*
* @return void
*/
public function handle(Client $guzzle)
{
$response = $guzzle->request('GET', $source);
//4XX and 5XX responses should get Guzzle to throw an exception,
//Laravel should catch and retry these automatically.
if ($response->getStatusCode() == '200') {
$filesystem = \Illuminate\FileSystem\FileSystem();
$filesystem->put(
$this->createFilenameFromURL($source),
(string) $response->getBody())
}
}
}
/**
* Create a file path from a URL. This is used when caching the HTML
* response.
*
* @param string The URL
* @return string The path name
*/
private function createFilenameFromURL($url)
{
$url = str_replace(['https://', 'http://'], ['https/', 'http/'], $url);
if (substr($url, -1) == '/') {
$url = $url . 'index.html';
}
return $url;
}
}

View file

@ -4,10 +4,8 @@ namespace App\Jobs;
use Mf2;
use App\Note;
use HTMLPurifier;
use App\WebMention;
use GuzzleHttp\Client;
use HTMLPurifier_Config;
use Illuminate\Queue\SerializesModels;
use Illuminate\Queue\InteractsWithQueue;
use Jonnybarnes\WebmentionsParser\Parser;
@ -22,7 +20,6 @@ class ProcessWebMention extends Job implements ShouldQueue
protected $note;
protected $source;
protected $guzzle;
/**
* Create a new job instance.
@ -31,28 +28,26 @@ class ProcessWebMention extends Job implements ShouldQueue
* @param string $source
* @return void
*/
public function __construct(Note $note, $source, Client $guzzle = null)
public function __construct(Note $note, $source)
{
$this->note = $note;
$this->source = $source;
$this->guzzle = $guzzle ?? new Client();
}
/**
* Execute the job.
*
* @param \Jonnybarnes\WebmentionsParser\Parser $parser
* @param \Jonnybarnes\WebmentionsParser\Parser $parser
* @param \GuzzleHttp\Client $guzzle
* @return void
*/
public function handle(Parser $parser)
public function handle(Parser $parser, Client $guzzle)
{
$sourceURL = parse_url($this->source);
$baseURL = $sourceURL['scheme'] . '://' . $sourceURL['host'];
$remoteContent = $this->getRemoteContent($this->source);
$remoteContent = $this->getRemoteContent($this->source, $guzzle);
if ($remoteContent === null) {
throw new RemoteContentNotFoundException;
}
$microformats = Mf2\parse($remoteContent, $baseURL);
$microformats = Mf2\parse($remoteContent, $this->source);
$webmentions = WebMention::where('source', $this->source)->get();
foreach ($webmentions as $webmention) {
//check webmention still references target
@ -65,7 +60,6 @@ class ProcessWebMention extends Job implements ShouldQueue
return;
}
//webmenion is still a reply, so update content
$microformats = $this->filterHTML($microformats);
$this->dispatch(new SaveProfileImage($microformats));
$webmention->mf2 = json_encode($microformats);
$webmention->save();
@ -94,7 +88,6 @@ class ProcessWebMention extends Job implements ShouldQueue
$webmention = new WebMention();
$type = $parser->getMentionType($microformats); //throw error here?
$this->dispatch(new SaveProfileImage($microformats));
$microformats = $this->filterHTML($microformats);
$webmention->source = $this->source;
$webmention->target = $this->note->longurl;
$webmention->commentable_id = $this->note->id;
@ -107,13 +100,14 @@ class ProcessWebMention extends Job implements ShouldQueue
/**
* Retreive the remote content from a URL, and caches the result.
*
* @param string The URL to retreive content from
* @return string|null The HTML from the URL (or null if error)
* @param string $url
* @param GuzzleHttp\client $guzzle
* @return string|null
*/
private function getRemoteContent($url)
private function getRemoteContent($url, Client $guzzle)
{
try {
$response = $this->guzzle->request('GET', $url);
$response = $guzzle->request('GET', $url);
} catch (RequestException $e) {
return;
}
@ -139,43 +133,11 @@ class ProcessWebMention extends Job implements ShouldQueue
*/
private function createFilenameFromURL($url)
{
$url = str_replace(['https://', 'http://'], ['', ''], $url);
$url = str_replace(['https://', 'http://'], ['https/', 'http/'], $url);
if (substr($url, -1) == '/') {
$url = $url . 'index.html';
}
return $url;
}
/**
* Filter the HTML in a reply webmention.
*
* @param array The unfiltered microformats
* @return array The filtered microformats
*/
private function filterHTML($microformats)
{
if (isset($microformats['items'][0]['properties']['content'][0]['html'])) {
$microformats['items'][0]['properties']['content'][0]['html_purified'] = $this->useHTMLPurifier(
$microformats['items'][0]['properties']['content'][0]['html']
);
}
return $microformats;
}
/**
* Set up and use HTMLPurifer on some HTML.
*
* @param string The HTML to be processed
* @return string The processed HTML
*/
private function useHTMLPurifier($html)
{
$config = HTMLPurifier_Config::createDefault();
$config->set('Cache.SerializerPath', storage_path() . '/HTMLPurifier');
$purifier = new HTMLPurifier($config);
return $purifier->purify($html);
}
}

View file

@ -0,0 +1,65 @@
<?php
namespace App\Observers;
use HTMLPurifier;
use App\WebMention;
use HTMLPurifier_Config;
class WebMentionObserver
{
/**
* Listen for the created event.
*
* @param WebMention $webmention
* @return void
*/
public function created(WebMention $webmention)
{
$this->addFilteredHTML($webmention);
}
/**
* Listen for the updated event.
*
* @param WebMention $webmention
* @return void
*/
public function updated(WebMention $webmention)
{
$this->addFilteredHTML($webmention);
}
/**
* Filter the HTML in a reply webmention.
*
* @param WebMention The WebMention model
* @return void
*/
private function addFilteredHTML(WebMention $webmention)
{
$mf2 = json_decode($webmention->mf2);
if (isset($mf2['items'][0]['properties']['content'][0]['html'])) {
$mf2['items'][0]['properties']['content'][0]['html_purified'] = $this->useHTMLPurifier(
$mf2['items'][0]['properties']['content'][0]['html']
);
}
$webmention->mf2 = json_encode($mf2);
$webmetion->save();
}
/**
* Set up and use HTMLPurifer on some HTML.
*
* @param string The HTML to be processed
* @return string The processed HTML
*/
private function useHTMLPurifier($html)
{
$config = HTMLPurifier_Config::createDefault();
$config->set('Cache.SerializerPath', storage_path() . '/HTMLPurifier');
$purifier = new HTMLPurifier($config);
return $purifier->purify($html);
}
}

View file

@ -5,6 +5,8 @@ namespace App\Providers;
use App\Tag;
use App\Note;
use Validator;
use App\WebMention;
use App\Observers\WebMentionObserver;
use Illuminate\Support\ServiceProvider;
class AppServiceProvider extends ServiceProvider
@ -45,6 +47,9 @@ class AppServiceProvider extends ServiceProvider
$note->tags()->attach($tagsToAdd);
}
});
//observer the webmention model
WebMention::observe(WebMentionObserver::class);
}
/**

View file

@ -5,6 +5,7 @@
- Added `integrity` values to external assets (issue#10)
- Move mapbox links into own sub-view (issue#11)
- Updated mapbox version (issue#12)
- Massive refactor of webmention code, allowing for re-parse command (issue#8)
## Version 0.0.10 (2016-09-10)
- Add an artisan command for sensiolabs security check