From bd6af021701590614e53a7e2843f00244f2cc3ca Mon Sep 17 00:00:00 2001 From: Jonny Barnes Date: Thu, 15 Sep 2016 15:41:54 +0100 Subject: [PATCH 01/15] =?UTF-8?q?Artisan=20command=20to=20initiaite=20redo?= =?UTF-8?q?wnload=20of=20webmention=E2=80=99s=20HTML?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/Console/Commands/DownloadWebMentions.php | 47 ++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 app/Console/Commands/DownloadWebMentions.php diff --git a/app/Console/Commands/DownloadWebMentions.php b/app/Console/Commands/DownloadWebMentions.php new file mode 100644 index 00000000..4b9a8701 --- /dev/null +++ b/app/Console/Commands/DownloadWebMentions.php @@ -0,0 +1,47 @@ +dispatch(new DownloadWebMention($webmention->source)); + } + } +} From b340d5a07637f40b77acd30d54f7ac97f5eedf95 Mon Sep 17 00:00:00 2001 From: Jonny Barnes Date: Thu, 15 Sep 2016 15:54:57 +0100 Subject: [PATCH 02/15] Add job that downloads and saves the HTML of a webmention --- app/Jobs/DownloadWebMention.php | 65 +++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 app/Jobs/DownloadWebMention.php diff --git a/app/Jobs/DownloadWebMention.php b/app/Jobs/DownloadWebMention.php new file mode 100644 index 00000000..ac7779df --- /dev/null +++ b/app/Jobs/DownloadWebMention.php @@ -0,0 +1,65 @@ +source = $source; + } + + /** + * Execute the job. + * + * @return void + */ + public function handle(Client $guzzle) + { + $response = $guzzle->request('GET', $source); + if ($response->getStatusCode() == '200') { + $filesystem = \Illuminate\FileSystem\FileSystem(); + $filesystem->put( + $this->createFilenameFromURL($source), + (string) $response->getBody()) + } + } + } + + /** + * Create a file path from a URL. This is used when caching the HTML + * response. + * + * @param string The URL + * @return string The path name + */ + private function createFilenameFromURL($url) + { + $url = str_replace(['https://', 'http://'], ['', ''], $url); + if (substr($url, -1) == '/') { + $url = $url . 'index.html'; + } + + return $url; + } +} From 7abf8383de0e72f4951ee7785b2e81afee2f7631 Mon Sep 17 00:00:00 2001 From: Jonny Barnes Date: Fri, 16 Sep 2016 16:33:05 +0100 Subject: [PATCH 03/15] leave http/https in folder names so we know which sheme to use --- app/Jobs/DownloadWebMention.php | 2 +- app/Jobs/ProcessWebMention.php | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/app/Jobs/DownloadWebMention.php b/app/Jobs/DownloadWebMention.php index ac7779df..0b73384f 100644 --- a/app/Jobs/DownloadWebMention.php +++ b/app/Jobs/DownloadWebMention.php @@ -55,7 +55,7 @@ class DownloadWebMention implements ShouldQueue */ private function createFilenameFromURL($url) { - $url = str_replace(['https://', 'http://'], ['', ''], $url); + $url = str_replace(['https://', 'http://'], ['https/', 'http/'], $url); if (substr($url, -1) == '/') { $url = $url . 'index.html'; } diff --git a/app/Jobs/ProcessWebMention.php b/app/Jobs/ProcessWebMention.php index fadd8399..24c03d53 100644 --- a/app/Jobs/ProcessWebMention.php +++ b/app/Jobs/ProcessWebMention.php @@ -139,7 +139,7 @@ class ProcessWebMention extends Job implements ShouldQueue */ private function createFilenameFromURL($url) { - $url = str_replace(['https://', 'http://'], ['', ''], $url); + $url = str_replace(['https://', 'http://'], ['https/', 'http/'], $url); if (substr($url, -1) == '/') { $url = $url . 'index.html'; } From 7cfcc0f899ca393412c2831f71d54efd0436f9c1 Mon Sep 17 00:00:00 2001 From: Jonny Barnes Date: Sat, 17 Sep 2016 18:18:53 +0100 Subject: [PATCH 04/15] We should be parsing the saved HTML now --- .../Commands/ParseCachedWebMentions.php | 70 +++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 app/Console/Commands/ParseCachedWebMentions.php diff --git a/app/Console/Commands/ParseCachedWebMentions.php b/app/Console/Commands/ParseCachedWebMentions.php new file mode 100644 index 00000000..262e26eb --- /dev/null +++ b/app/Console/Commands/ParseCachedWebMentions.php @@ -0,0 +1,70 @@ +allFiles(storage_path() . '/HTML'); + foreach($HTMLfiles as $file) { + $filepath = $file->getPathname(); + $html = $filesystem->get($filepath); + $url = $this->URLFromFilename($filepath); + $microformats = \Mf2\parse($html, $url); + $webmention = WebMention::where('source' $url)->firstOrFail(); + $webmention->mf2 = json_encode($microformats); + $webmention->save(); + } + } + + /** + * Determine the source URL from a filename. + * + * @param string + * @return string + */ + private function URLFromFilename($filepath) + { + $dir = mb_substr($filepath, mb_strlen(storage_path() . '/HTML/')); + $url = str_replace(['http/', 'https/'], ['http://', 'https://'], $dir); + if (mb_substr($url, -1) == 'index.html') { + $url = mb_substr($url, 0, mb_strlen($url) - 10); + } + + return $url; + } +} From 23d7d97ed06b1086701b6e07d93a4af0d5024156 Mon Sep 17 00:00:00 2001 From: Jonny Barnes Date: Sat, 17 Sep 2016 18:19:54 +0100 Subject: [PATCH 05/15] Regsiter an observer for the webmention model --- app/Providers/AppServiceProvider.php | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/app/Providers/AppServiceProvider.php b/app/Providers/AppServiceProvider.php index 3c5c8377..a83013ab 100644 --- a/app/Providers/AppServiceProvider.php +++ b/app/Providers/AppServiceProvider.php @@ -5,6 +5,8 @@ namespace App\Providers; use App\Tag; use App\Note; use Validator; +use App\WebMention; +use App\Observers\WebMentionObserver; use Illuminate\Support\ServiceProvider; class AppServiceProvider extends ServiceProvider @@ -45,6 +47,9 @@ class AppServiceProvider extends ServiceProvider $note->tags()->attach($tagsToAdd); } }); + + //observer the webmention model + WebMention::observe(WebMentionObserver::class); } /** From a3d6767ccb540b1f3addbc57e81704fc43243a8f Mon Sep 17 00:00:00 2001 From: Jonny Barnes Date: Sat, 17 Sep 2016 19:58:00 +0100 Subject: [PATCH 06/15] Move HTML filtering into an observer --- app/Jobs/ProcessWebMention.php | 36 ---------------------------------- 1 file changed, 36 deletions(-) diff --git a/app/Jobs/ProcessWebMention.php b/app/Jobs/ProcessWebMention.php index 24c03d53..5dd51a3e 100644 --- a/app/Jobs/ProcessWebMention.php +++ b/app/Jobs/ProcessWebMention.php @@ -4,10 +4,8 @@ namespace App\Jobs; use Mf2; use App\Note; -use HTMLPurifier; use App\WebMention; use GuzzleHttp\Client; -use HTMLPurifier_Config; use Illuminate\Queue\SerializesModels; use Illuminate\Queue\InteractsWithQueue; use Jonnybarnes\WebmentionsParser\Parser; @@ -65,7 +63,6 @@ class ProcessWebMention extends Job implements ShouldQueue return; } //webmenion is still a reply, so update content - $microformats = $this->filterHTML($microformats); $this->dispatch(new SaveProfileImage($microformats)); $webmention->mf2 = json_encode($microformats); $webmention->save(); @@ -94,7 +91,6 @@ class ProcessWebMention extends Job implements ShouldQueue $webmention = new WebMention(); $type = $parser->getMentionType($microformats); //throw error here? $this->dispatch(new SaveProfileImage($microformats)); - $microformats = $this->filterHTML($microformats); $webmention->source = $this->source; $webmention->target = $this->note->longurl; $webmention->commentable_id = $this->note->id; @@ -146,36 +142,4 @@ class ProcessWebMention extends Job implements ShouldQueue return $url; } - - /** - * Filter the HTML in a reply webmention. - * - * @param array The unfiltered microformats - * @return array The filtered microformats - */ - private function filterHTML($microformats) - { - if (isset($microformats['items'][0]['properties']['content'][0]['html'])) { - $microformats['items'][0]['properties']['content'][0]['html_purified'] = $this->useHTMLPurifier( - $microformats['items'][0]['properties']['content'][0]['html'] - ); - } - - return $microformats; - } - - /** - * Set up and use HTMLPurifer on some HTML. - * - * @param string The HTML to be processed - * @return string The processed HTML - */ - private function useHTMLPurifier($html) - { - $config = HTMLPurifier_Config::createDefault(); - $config->set('Cache.SerializerPath', storage_path() . '/HTMLPurifier'); - $purifier = new HTMLPurifier($config); - - return $purifier->purify($html); - } } From 2d1565f6a163ed73a64a998d92d272ddf955d718 Mon Sep 17 00:00:00 2001 From: Jonny Barnes Date: Sat, 17 Sep 2016 19:58:59 +0100 Subject: [PATCH 07/15] An observer that generates the filtered HTML on model creation/updating --- app/Observers/WebMentionObserver.php | 65 ++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 app/Observers/WebMentionObserver.php diff --git a/app/Observers/WebMentionObserver.php b/app/Observers/WebMentionObserver.php new file mode 100644 index 00000000..b15c3b3e --- /dev/null +++ b/app/Observers/WebMentionObserver.php @@ -0,0 +1,65 @@ +addFilteredHTML($webmention); + } + + /** + * Listen for the updated event. + * + * @param WebMention $webmention + * @return void + */ + public function updated(WebMention $webmention) + { + $this->addFilteredHTML($webmention); + } + + /** + * Filter the HTML in a reply webmention. + * + * @param WebMention The WebMention model + * @return void + */ + private function addFilteredHTML(WebMention $webmention) + { + $mf2 = json_decode($webmention->mf2); + if (isset($mf2['items'][0]['properties']['content'][0]['html'])) { + $mf2['items'][0]['properties']['content'][0]['html_purified'] = $this->useHTMLPurifier( + $mf2['items'][0]['properties']['content'][0]['html'] + ); + } + $webmention->mf2 = json_encode($mf2); + $webmetion->save(); + } + + /** + * Set up and use HTMLPurifer on some HTML. + * + * @param string The HTML to be processed + * @return string The processed HTML + */ + private function useHTMLPurifier($html) + { + $config = HTMLPurifier_Config::createDefault(); + $config->set('Cache.SerializerPath', storage_path() . '/HTMLPurifier'); + $purifier = new HTMLPurifier($config); + + return $purifier->purify($html); + } +} From 6e04296e2d96a328a4eaa35fde927933a67e2300 Mon Sep 17 00:00:00 2001 From: Jonny Barnes Date: Sat, 17 Sep 2016 20:12:16 +0100 Subject: [PATCH 08/15] =?UTF-8?q?Pass=20full=20URL=20into=20php-mf2?= =?UTF-8?q?=E2=80=99s=20`parse`=20method?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/Jobs/ProcessWebMention.php | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/app/Jobs/ProcessWebMention.php b/app/Jobs/ProcessWebMention.php index 5dd51a3e..81bd5043 100644 --- a/app/Jobs/ProcessWebMention.php +++ b/app/Jobs/ProcessWebMention.php @@ -44,13 +44,11 @@ class ProcessWebMention extends Job implements ShouldQueue */ public function handle(Parser $parser) { - $sourceURL = parse_url($this->source); - $baseURL = $sourceURL['scheme'] . '://' . $sourceURL['host']; $remoteContent = $this->getRemoteContent($this->source); if ($remoteContent === null) { throw new RemoteContentNotFoundException; } - $microformats = Mf2\parse($remoteContent, $baseURL); + $microformats = Mf2\parse($remoteContent, $this->source); $webmentions = WebMention::where('source', $this->source)->get(); foreach ($webmentions as $webmention) { //check webmention still references target From 769d2aabd46141f81227deb1fd600f26899f439b Mon Sep 17 00:00:00 2001 From: Jonny Barnes Date: Sat, 17 Sep 2016 20:50:22 +0100 Subject: [PATCH 09/15] Register the new commands --- app/Console/Kernel.php | 2 ++ 1 file changed, 2 insertions(+) diff --git a/app/Console/Kernel.php b/app/Console/Kernel.php index ad10b8f8..affc9de7 100644 --- a/app/Console/Kernel.php +++ b/app/Console/Kernel.php @@ -14,6 +14,8 @@ class Kernel extends ConsoleKernel */ protected $commands = [ Commands\SecurityCheck::class, + Commands\ParseCachedWebMentions::class, + Commands\DownloadWebMentions::class, ]; /** From fb81b37f4d64bc6b8ec1ae20fb55dc067d49af5f Mon Sep 17 00:00:00 2001 From: Jonny Barnes Date: Sat, 17 Sep 2016 20:50:56 +0100 Subject: [PATCH 10/15] More refactoring of the initial webmention processing job --- app/Jobs/ProcessWebMention.php | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/app/Jobs/ProcessWebMention.php b/app/Jobs/ProcessWebMention.php index 81bd5043..3b28b337 100644 --- a/app/Jobs/ProcessWebMention.php +++ b/app/Jobs/ProcessWebMention.php @@ -20,7 +20,6 @@ class ProcessWebMention extends Job implements ShouldQueue protected $note; protected $source; - protected $guzzle; /** * Create a new job instance. @@ -29,22 +28,22 @@ class ProcessWebMention extends Job implements ShouldQueue * @param string $source * @return void */ - public function __construct(Note $note, $source, Client $guzzle = null) + public function __construct(Note $note, $source) { $this->note = $note; $this->source = $source; - $this->guzzle = $guzzle ?? new Client(); } /** * Execute the job. * - * @param \Jonnybarnes\WebmentionsParser\Parser $parser + * @param \Jonnybarnes\WebmentionsParser\Parser $parser + * @param \GuzzleHttp\Client $guzzle * @return void */ - public function handle(Parser $parser) + public function handle(Parser $parser, Client $guzzle) { - $remoteContent = $this->getRemoteContent($this->source); + $remoteContent = $this->getRemoteContent($this->source, $guzzle); if ($remoteContent === null) { throw new RemoteContentNotFoundException; } @@ -101,13 +100,14 @@ class ProcessWebMention extends Job implements ShouldQueue /** * Retreive the remote content from a URL, and caches the result. * - * @param string The URL to retreive content from - * @return string|null The HTML from the URL (or null if error) + * @param string $url + * @param GuzzleHttp\client $guzzle + * @return string|null */ - private function getRemoteContent($url) + private function getRemoteContent($url, Client $guzzle) { try { - $response = $this->guzzle->request('GET', $url); + $response = $guzzle->request('GET', $url); } catch (RequestException $e) { return; } From 756f40fb551f28042154e51a34b3f57aadf75469 Mon Sep 17 00:00:00 2001 From: Jonny Barnes Date: Sat, 17 Sep 2016 20:56:57 +0100 Subject: [PATCH 11/15] Tweak command description --- app/Console/Commands/ParseCachedWebMentions.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/Console/Commands/ParseCachedWebMentions.php b/app/Console/Commands/ParseCachedWebMentions.php index 262e26eb..1d219035 100644 --- a/app/Console/Commands/ParseCachedWebMentions.php +++ b/app/Console/Commands/ParseCachedWebMentions.php @@ -20,7 +20,7 @@ class ParseCachedWebMentions extends Command * * @var string */ - protected $description = 'Re-parse the cached webmention’s HTML'; + protected $description = 'Re-parse the webmention’s cached HTML'; /** * Create a new command instance. From c5e0b621a69c5cbe1efb41343f4b6e3616584d73 Mon Sep 17 00:00:00 2001 From: Jonny Barnes Date: Sat, 17 Sep 2016 20:58:47 +0100 Subject: [PATCH 12/15] Add missing comma --- app/Console/Commands/ParseCachedWebMentions.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/Console/Commands/ParseCachedWebMentions.php b/app/Console/Commands/ParseCachedWebMentions.php index 1d219035..c4541c44 100644 --- a/app/Console/Commands/ParseCachedWebMentions.php +++ b/app/Console/Commands/ParseCachedWebMentions.php @@ -45,7 +45,7 @@ class ParseCachedWebMentions extends Command $html = $filesystem->get($filepath); $url = $this->URLFromFilename($filepath); $microformats = \Mf2\parse($html, $url); - $webmention = WebMention::where('source' $url)->firstOrFail(); + $webmention = WebMention::where('source', $url)->firstOrFail(); $webmention->mf2 = json_encode($microformats); $webmention->save(); } From f9fc24dd0426de1601e4ba80010403b902145b02 Mon Sep 17 00:00:00 2001 From: Jonny Barnes Date: Sat, 17 Sep 2016 21:01:23 +0100 Subject: [PATCH 13/15] Give the redownload command a better name --- .../{DownloadWebMentions.php => ReDownloadWebMentions.php} | 4 ++-- app/Console/Kernel.php | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) rename app/Console/Commands/{DownloadWebMentions.php => ReDownloadWebMentions.php} (89%) diff --git a/app/Console/Commands/DownloadWebMentions.php b/app/Console/Commands/ReDownloadWebMentions.php similarity index 89% rename from app/Console/Commands/DownloadWebMentions.php rename to app/Console/Commands/ReDownloadWebMentions.php index 4b9a8701..12622fa3 100644 --- a/app/Console/Commands/DownloadWebMentions.php +++ b/app/Console/Commands/ReDownloadWebMentions.php @@ -6,14 +6,14 @@ use App\WebMention; use Illuminate\Console\Command; use App\Jobs\DownloadWebMention; -class DownloadWebMentions extends Command +class ReDownloadWebMentions extends Command { /** * The name and signature of the console command. * * @var string */ - protected $signature = 'webmentions:download'; + protected $signature = 'webmentions:redownload'; /** * The console command description. diff --git a/app/Console/Kernel.php b/app/Console/Kernel.php index affc9de7..51ec553b 100644 --- a/app/Console/Kernel.php +++ b/app/Console/Kernel.php @@ -15,7 +15,7 @@ class Kernel extends ConsoleKernel protected $commands = [ Commands\SecurityCheck::class, Commands\ParseCachedWebMentions::class, - Commands\DownloadWebMentions::class, + Commands\ReDownloadWebMentions::class, ]; /** From 99a6f665b0f1884920c3ab2e0604258b65153fa3 Mon Sep 17 00:00:00 2001 From: Jonny Barnes Date: Sat, 17 Sep 2016 21:03:53 +0100 Subject: [PATCH 14/15] Add explanetory comment --- app/Jobs/DownloadWebMention.php | 2 ++ 1 file changed, 2 insertions(+) diff --git a/app/Jobs/DownloadWebMention.php b/app/Jobs/DownloadWebMention.php index 0b73384f..3ff505bc 100644 --- a/app/Jobs/DownloadWebMention.php +++ b/app/Jobs/DownloadWebMention.php @@ -37,6 +37,8 @@ class DownloadWebMention implements ShouldQueue public function handle(Client $guzzle) { $response = $guzzle->request('GET', $source); + //4XX and 5XX responses should get Guzzle to throw an exception, + //Laravel should catch and retry these automatically. if ($response->getStatusCode() == '200') { $filesystem = \Illuminate\FileSystem\FileSystem(); $filesystem->put( From 17edd7000317e3d1981314b861daf682216f204a Mon Sep 17 00:00:00 2001 From: Jonny Barnes Date: Sat, 17 Sep 2016 21:14:10 +0100 Subject: [PATCH 15/15] Summarize webmention changes --- changelog.md | 1 + 1 file changed, 1 insertion(+) diff --git a/changelog.md b/changelog.md index ebaf048c..d6ddaedb 100644 --- a/changelog.md +++ b/changelog.md @@ -5,6 +5,7 @@ - Added `integrity` values to external assets (issue#10) - Move mapbox links into own sub-view (issue#11) - Updated mapbox version (issue#12) + - Massive refactor of webmention code, allowing for re-parse command (issue#8) ## Version 0.0.10 (2016-09-10) - Add an artisan command for sensiolab’s security check