Merge branch 'feature/parse-webmentions-command' into develop
This commit is contained in:
commit
9f76de5c42
8 changed files with 269 additions and 50 deletions
70
app/Console/Commands/ParseCachedWebMentions.php
Normal file
70
app/Console/Commands/ParseCachedWebMentions.php
Normal file
|
@ -0,0 +1,70 @@
|
|||
<?php
|
||||
|
||||
namespace App\Console\Commands;
|
||||
|
||||
use App\WebMention;
|
||||
use Illuminate\Console\Command;
|
||||
use Illuminate\FileSystem\FileSystem;
|
||||
|
||||
class ParseCachedWebMentions extends Command
|
||||
{
|
||||
/**
|
||||
* The name and signature of the console command.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
protected $signature = 'webmentions:parsecached';
|
||||
|
||||
/**
|
||||
* The console command description.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
protected $description = 'Re-parse the webmention’s cached HTML';
|
||||
|
||||
/**
|
||||
* Create a new command instance.
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
public function __construct()
|
||||
{
|
||||
parent::__construct();
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute the console command.
|
||||
*
|
||||
* @return mixed
|
||||
*/
|
||||
public function handle(FileSystem $filesystem)
|
||||
{
|
||||
$HTMLfiles = $filesystem->allFiles(storage_path() . '/HTML');
|
||||
foreach($HTMLfiles as $file) {
|
||||
$filepath = $file->getPathname();
|
||||
$html = $filesystem->get($filepath);
|
||||
$url = $this->URLFromFilename($filepath);
|
||||
$microformats = \Mf2\parse($html, $url);
|
||||
$webmention = WebMention::where('source', $url)->firstOrFail();
|
||||
$webmention->mf2 = json_encode($microformats);
|
||||
$webmention->save();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Determine the source URL from a filename.
|
||||
*
|
||||
* @param string
|
||||
* @return string
|
||||
*/
|
||||
private function URLFromFilename($filepath)
|
||||
{
|
||||
$dir = mb_substr($filepath, mb_strlen(storage_path() . '/HTML/'));
|
||||
$url = str_replace(['http/', 'https/'], ['http://', 'https://'], $dir);
|
||||
if (mb_substr($url, -1) == 'index.html') {
|
||||
$url = mb_substr($url, 0, mb_strlen($url) - 10);
|
||||
}
|
||||
|
||||
return $url;
|
||||
}
|
||||
}
|
47
app/Console/Commands/ReDownloadWebMentions.php
Normal file
47
app/Console/Commands/ReDownloadWebMentions.php
Normal file
|
@ -0,0 +1,47 @@
|
|||
<?php
|
||||
|
||||
namespace App\Console\Commands;
|
||||
|
||||
use App\WebMention;
|
||||
use Illuminate\Console\Command;
|
||||
use App\Jobs\DownloadWebMention;
|
||||
|
||||
class ReDownloadWebMentions extends Command
|
||||
{
|
||||
/**
|
||||
* The name and signature of the console command.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
protected $signature = 'webmentions:redownload';
|
||||
|
||||
/**
|
||||
* The console command description.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
protected $description = 'Redownload the HTML content of webmentions';
|
||||
|
||||
/**
|
||||
* Create a new command instance.
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
public function __construct()
|
||||
{
|
||||
parent::__construct();
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute the console command.
|
||||
*
|
||||
* @return mixed
|
||||
*/
|
||||
public function handle()
|
||||
{
|
||||
$webmentions = WebMention::all();
|
||||
foreach ($webmentions as $webmention) {
|
||||
$this->dispatch(new DownloadWebMention($webmention->source));
|
||||
}
|
||||
}
|
||||
}
|
|
@ -14,6 +14,8 @@ class Kernel extends ConsoleKernel
|
|||
*/
|
||||
protected $commands = [
|
||||
Commands\SecurityCheck::class,
|
||||
Commands\ParseCachedWebMentions::class,
|
||||
Commands\ReDownloadWebMentions::class,
|
||||
];
|
||||
|
||||
/**
|
||||
|
|
67
app/Jobs/DownloadWebMention.php
Normal file
67
app/Jobs/DownloadWebMention.php
Normal file
|
@ -0,0 +1,67 @@
|
|||
<?php
|
||||
|
||||
namespace App\Jobs;
|
||||
|
||||
use GuzzleHttp\Client;
|
||||
use Illuminate\Bus\Queueable;
|
||||
use Illuminate\Queue\SerializesModels;
|
||||
use Illuminate\Queue\InteractsWithQueue;
|
||||
use Illuminate\Contracts\Queue\ShouldQueue;
|
||||
|
||||
class DownloadWebMention implements ShouldQueue
|
||||
{
|
||||
use InteractsWithQueue, Queueable, SerializesModels;
|
||||
|
||||
/**
|
||||
* The webmention source URL.
|
||||
*
|
||||
* @var
|
||||
*/
|
||||
protected $source;
|
||||
|
||||
/**
|
||||
* Create a new job instance.
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
public function __construct(string $source)
|
||||
{
|
||||
$this->source = $source;
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute the job.
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
public function handle(Client $guzzle)
|
||||
{
|
||||
$response = $guzzle->request('GET', $source);
|
||||
//4XX and 5XX responses should get Guzzle to throw an exception,
|
||||
//Laravel should catch and retry these automatically.
|
||||
if ($response->getStatusCode() == '200') {
|
||||
$filesystem = \Illuminate\FileSystem\FileSystem();
|
||||
$filesystem->put(
|
||||
$this->createFilenameFromURL($source),
|
||||
(string) $response->getBody())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a file path from a URL. This is used when caching the HTML
|
||||
* response.
|
||||
*
|
||||
* @param string The URL
|
||||
* @return string The path name
|
||||
*/
|
||||
private function createFilenameFromURL($url)
|
||||
{
|
||||
$url = str_replace(['https://', 'http://'], ['https/', 'http/'], $url);
|
||||
if (substr($url, -1) == '/') {
|
||||
$url = $url . 'index.html';
|
||||
}
|
||||
|
||||
return $url;
|
||||
}
|
||||
}
|
|
@ -4,10 +4,8 @@ namespace App\Jobs;
|
|||
|
||||
use Mf2;
|
||||
use App\Note;
|
||||
use HTMLPurifier;
|
||||
use App\WebMention;
|
||||
use GuzzleHttp\Client;
|
||||
use HTMLPurifier_Config;
|
||||
use Illuminate\Queue\SerializesModels;
|
||||
use Illuminate\Queue\InteractsWithQueue;
|
||||
use Jonnybarnes\WebmentionsParser\Parser;
|
||||
|
@ -22,7 +20,6 @@ class ProcessWebMention extends Job implements ShouldQueue
|
|||
|
||||
protected $note;
|
||||
protected $source;
|
||||
protected $guzzle;
|
||||
|
||||
/**
|
||||
* Create a new job instance.
|
||||
|
@ -31,28 +28,26 @@ class ProcessWebMention extends Job implements ShouldQueue
|
|||
* @param string $source
|
||||
* @return void
|
||||
*/
|
||||
public function __construct(Note $note, $source, Client $guzzle = null)
|
||||
public function __construct(Note $note, $source)
|
||||
{
|
||||
$this->note = $note;
|
||||
$this->source = $source;
|
||||
$this->guzzle = $guzzle ?? new Client();
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute the job.
|
||||
*
|
||||
* @param \Jonnybarnes\WebmentionsParser\Parser $parser
|
||||
* @param \Jonnybarnes\WebmentionsParser\Parser $parser
|
||||
* @param \GuzzleHttp\Client $guzzle
|
||||
* @return void
|
||||
*/
|
||||
public function handle(Parser $parser)
|
||||
public function handle(Parser $parser, Client $guzzle)
|
||||
{
|
||||
$sourceURL = parse_url($this->source);
|
||||
$baseURL = $sourceURL['scheme'] . '://' . $sourceURL['host'];
|
||||
$remoteContent = $this->getRemoteContent($this->source);
|
||||
$remoteContent = $this->getRemoteContent($this->source, $guzzle);
|
||||
if ($remoteContent === null) {
|
||||
throw new RemoteContentNotFoundException;
|
||||
}
|
||||
$microformats = Mf2\parse($remoteContent, $baseURL);
|
||||
$microformats = Mf2\parse($remoteContent, $this->source);
|
||||
$webmentions = WebMention::where('source', $this->source)->get();
|
||||
foreach ($webmentions as $webmention) {
|
||||
//check webmention still references target
|
||||
|
@ -65,7 +60,6 @@ class ProcessWebMention extends Job implements ShouldQueue
|
|||
return;
|
||||
}
|
||||
//webmenion is still a reply, so update content
|
||||
$microformats = $this->filterHTML($microformats);
|
||||
$this->dispatch(new SaveProfileImage($microformats));
|
||||
$webmention->mf2 = json_encode($microformats);
|
||||
$webmention->save();
|
||||
|
@ -94,7 +88,6 @@ class ProcessWebMention extends Job implements ShouldQueue
|
|||
$webmention = new WebMention();
|
||||
$type = $parser->getMentionType($microformats); //throw error here?
|
||||
$this->dispatch(new SaveProfileImage($microformats));
|
||||
$microformats = $this->filterHTML($microformats);
|
||||
$webmention->source = $this->source;
|
||||
$webmention->target = $this->note->longurl;
|
||||
$webmention->commentable_id = $this->note->id;
|
||||
|
@ -107,13 +100,14 @@ class ProcessWebMention extends Job implements ShouldQueue
|
|||
/**
|
||||
* Retreive the remote content from a URL, and caches the result.
|
||||
*
|
||||
* @param string The URL to retreive content from
|
||||
* @return string|null The HTML from the URL (or null if error)
|
||||
* @param string $url
|
||||
* @param GuzzleHttp\client $guzzle
|
||||
* @return string|null
|
||||
*/
|
||||
private function getRemoteContent($url)
|
||||
private function getRemoteContent($url, Client $guzzle)
|
||||
{
|
||||
try {
|
||||
$response = $this->guzzle->request('GET', $url);
|
||||
$response = $guzzle->request('GET', $url);
|
||||
} catch (RequestException $e) {
|
||||
return;
|
||||
}
|
||||
|
@ -139,43 +133,11 @@ class ProcessWebMention extends Job implements ShouldQueue
|
|||
*/
|
||||
private function createFilenameFromURL($url)
|
||||
{
|
||||
$url = str_replace(['https://', 'http://'], ['', ''], $url);
|
||||
$url = str_replace(['https://', 'http://'], ['https/', 'http/'], $url);
|
||||
if (substr($url, -1) == '/') {
|
||||
$url = $url . 'index.html';
|
||||
}
|
||||
|
||||
return $url;
|
||||
}
|
||||
|
||||
/**
|
||||
* Filter the HTML in a reply webmention.
|
||||
*
|
||||
* @param array The unfiltered microformats
|
||||
* @return array The filtered microformats
|
||||
*/
|
||||
private function filterHTML($microformats)
|
||||
{
|
||||
if (isset($microformats['items'][0]['properties']['content'][0]['html'])) {
|
||||
$microformats['items'][0]['properties']['content'][0]['html_purified'] = $this->useHTMLPurifier(
|
||||
$microformats['items'][0]['properties']['content'][0]['html']
|
||||
);
|
||||
}
|
||||
|
||||
return $microformats;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set up and use HTMLPurifer on some HTML.
|
||||
*
|
||||
* @param string The HTML to be processed
|
||||
* @return string The processed HTML
|
||||
*/
|
||||
private function useHTMLPurifier($html)
|
||||
{
|
||||
$config = HTMLPurifier_Config::createDefault();
|
||||
$config->set('Cache.SerializerPath', storage_path() . '/HTMLPurifier');
|
||||
$purifier = new HTMLPurifier($config);
|
||||
|
||||
return $purifier->purify($html);
|
||||
}
|
||||
}
|
||||
|
|
65
app/Observers/WebMentionObserver.php
Normal file
65
app/Observers/WebMentionObserver.php
Normal file
|
@ -0,0 +1,65 @@
|
|||
<?php
|
||||
|
||||
namespace App\Observers;
|
||||
|
||||
use HTMLPurifier;
|
||||
use App\WebMention;
|
||||
use HTMLPurifier_Config;
|
||||
|
||||
class WebMentionObserver
|
||||
{
|
||||
/**
|
||||
* Listen for the created event.
|
||||
*
|
||||
* @param WebMention $webmention
|
||||
* @return void
|
||||
*/
|
||||
public function created(WebMention $webmention)
|
||||
{
|
||||
$this->addFilteredHTML($webmention);
|
||||
}
|
||||
|
||||
/**
|
||||
* Listen for the updated event.
|
||||
*
|
||||
* @param WebMention $webmention
|
||||
* @return void
|
||||
*/
|
||||
public function updated(WebMention $webmention)
|
||||
{
|
||||
$this->addFilteredHTML($webmention);
|
||||
}
|
||||
|
||||
/**
|
||||
* Filter the HTML in a reply webmention.
|
||||
*
|
||||
* @param WebMention The WebMention model
|
||||
* @return void
|
||||
*/
|
||||
private function addFilteredHTML(WebMention $webmention)
|
||||
{
|
||||
$mf2 = json_decode($webmention->mf2);
|
||||
if (isset($mf2['items'][0]['properties']['content'][0]['html'])) {
|
||||
$mf2['items'][0]['properties']['content'][0]['html_purified'] = $this->useHTMLPurifier(
|
||||
$mf2['items'][0]['properties']['content'][0]['html']
|
||||
);
|
||||
}
|
||||
$webmention->mf2 = json_encode($mf2);
|
||||
$webmetion->save();
|
||||
}
|
||||
|
||||
/**
|
||||
* Set up and use HTMLPurifer on some HTML.
|
||||
*
|
||||
* @param string The HTML to be processed
|
||||
* @return string The processed HTML
|
||||
*/
|
||||
private function useHTMLPurifier($html)
|
||||
{
|
||||
$config = HTMLPurifier_Config::createDefault();
|
||||
$config->set('Cache.SerializerPath', storage_path() . '/HTMLPurifier');
|
||||
$purifier = new HTMLPurifier($config);
|
||||
|
||||
return $purifier->purify($html);
|
||||
}
|
||||
}
|
|
@ -5,6 +5,8 @@ namespace App\Providers;
|
|||
use App\Tag;
|
||||
use App\Note;
|
||||
use Validator;
|
||||
use App\WebMention;
|
||||
use App\Observers\WebMentionObserver;
|
||||
use Illuminate\Support\ServiceProvider;
|
||||
|
||||
class AppServiceProvider extends ServiceProvider
|
||||
|
@ -45,6 +47,9 @@ class AppServiceProvider extends ServiceProvider
|
|||
$note->tags()->attach($tagsToAdd);
|
||||
}
|
||||
});
|
||||
|
||||
//observer the webmention model
|
||||
WebMention::observe(WebMentionObserver::class);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
- Added `integrity` values to external assets (issue#10)
|
||||
- Move mapbox links into own sub-view (issue#11)
|
||||
- Updated mapbox version (issue#12)
|
||||
- Massive refactor of webmention code, allowing for re-parse command (issue#8)
|
||||
|
||||
## Version 0.0.10 (2016-09-10)
|
||||
- Add an artisan command for sensiolab’s security check
|
||||
|
|
Loading…
Add table
Reference in a new issue