diff --git a/app/Console/Commands/ParseCachedWebMentions.php b/app/Console/Commands/ParseCachedWebMentions.php new file mode 100644 index 00000000..c4541c44 --- /dev/null +++ b/app/Console/Commands/ParseCachedWebMentions.php @@ -0,0 +1,70 @@ +allFiles(storage_path() . '/HTML'); + foreach($HTMLfiles as $file) { + $filepath = $file->getPathname(); + $html = $filesystem->get($filepath); + $url = $this->URLFromFilename($filepath); + $microformats = \Mf2\parse($html, $url); + $webmention = WebMention::where('source', $url)->firstOrFail(); + $webmention->mf2 = json_encode($microformats); + $webmention->save(); + } + } + + /** + * Determine the source URL from a filename. + * + * @param string + * @return string + */ + private function URLFromFilename($filepath) + { + $dir = mb_substr($filepath, mb_strlen(storage_path() . '/HTML/')); + $url = str_replace(['http/', 'https/'], ['http://', 'https://'], $dir); + if (mb_substr($url, -1) == 'index.html') { + $url = mb_substr($url, 0, mb_strlen($url) - 10); + } + + return $url; + } +} diff --git a/app/Console/Commands/ReDownloadWebMentions.php b/app/Console/Commands/ReDownloadWebMentions.php new file mode 100644 index 00000000..12622fa3 --- /dev/null +++ b/app/Console/Commands/ReDownloadWebMentions.php @@ -0,0 +1,47 @@ +dispatch(new DownloadWebMention($webmention->source)); + } + } +} diff --git a/app/Console/Kernel.php b/app/Console/Kernel.php index ad10b8f8..51ec553b 100644 --- a/app/Console/Kernel.php +++ b/app/Console/Kernel.php @@ -14,6 +14,8 @@ class Kernel extends ConsoleKernel */ protected $commands = [ Commands\SecurityCheck::class, + Commands\ParseCachedWebMentions::class, + Commands\ReDownloadWebMentions::class, ]; /** diff --git a/app/Jobs/DownloadWebMention.php b/app/Jobs/DownloadWebMention.php new file mode 100644 index 00000000..3ff505bc --- /dev/null +++ b/app/Jobs/DownloadWebMention.php @@ -0,0 +1,67 @@ +source = $source; + } + + /** + * Execute the job. + * + * @return void + */ + public function handle(Client $guzzle) + { + $response = $guzzle->request('GET', $source); + //4XX and 5XX responses should get Guzzle to throw an exception, + //Laravel should catch and retry these automatically. + if ($response->getStatusCode() == '200') { + $filesystem = \Illuminate\FileSystem\FileSystem(); + $filesystem->put( + $this->createFilenameFromURL($source), + (string) $response->getBody()) + } + } + } + + /** + * Create a file path from a URL. This is used when caching the HTML + * response. + * + * @param string The URL + * @return string The path name + */ + private function createFilenameFromURL($url) + { + $url = str_replace(['https://', 'http://'], ['https/', 'http/'], $url); + if (substr($url, -1) == '/') { + $url = $url . 'index.html'; + } + + return $url; + } +} diff --git a/app/Jobs/ProcessWebMention.php b/app/Jobs/ProcessWebMention.php index fadd8399..3b28b337 100644 --- a/app/Jobs/ProcessWebMention.php +++ b/app/Jobs/ProcessWebMention.php @@ -4,10 +4,8 @@ namespace App\Jobs; use Mf2; use App\Note; -use HTMLPurifier; use App\WebMention; use GuzzleHttp\Client; -use HTMLPurifier_Config; use Illuminate\Queue\SerializesModels; use Illuminate\Queue\InteractsWithQueue; use Jonnybarnes\WebmentionsParser\Parser; @@ -22,7 +20,6 @@ class ProcessWebMention extends Job implements ShouldQueue protected $note; protected $source; - protected $guzzle; /** * Create a new job instance. @@ -31,28 +28,26 @@ class ProcessWebMention extends Job implements ShouldQueue * @param string $source * @return void */ - public function __construct(Note $note, $source, Client $guzzle = null) + public function __construct(Note $note, $source) { $this->note = $note; $this->source = $source; - $this->guzzle = $guzzle ?? new Client(); } /** * Execute the job. * - * @param \Jonnybarnes\WebmentionsParser\Parser $parser + * @param \Jonnybarnes\WebmentionsParser\Parser $parser + * @param \GuzzleHttp\Client $guzzle * @return void */ - public function handle(Parser $parser) + public function handle(Parser $parser, Client $guzzle) { - $sourceURL = parse_url($this->source); - $baseURL = $sourceURL['scheme'] . '://' . $sourceURL['host']; - $remoteContent = $this->getRemoteContent($this->source); + $remoteContent = $this->getRemoteContent($this->source, $guzzle); if ($remoteContent === null) { throw new RemoteContentNotFoundException; } - $microformats = Mf2\parse($remoteContent, $baseURL); + $microformats = Mf2\parse($remoteContent, $this->source); $webmentions = WebMention::where('source', $this->source)->get(); foreach ($webmentions as $webmention) { //check webmention still references target @@ -65,7 +60,6 @@ class ProcessWebMention extends Job implements ShouldQueue return; } //webmenion is still a reply, so update content - $microformats = $this->filterHTML($microformats); $this->dispatch(new SaveProfileImage($microformats)); $webmention->mf2 = json_encode($microformats); $webmention->save(); @@ -94,7 +88,6 @@ class ProcessWebMention extends Job implements ShouldQueue $webmention = new WebMention(); $type = $parser->getMentionType($microformats); //throw error here? $this->dispatch(new SaveProfileImage($microformats)); - $microformats = $this->filterHTML($microformats); $webmention->source = $this->source; $webmention->target = $this->note->longurl; $webmention->commentable_id = $this->note->id; @@ -107,13 +100,14 @@ class ProcessWebMention extends Job implements ShouldQueue /** * Retreive the remote content from a URL, and caches the result. * - * @param string The URL to retreive content from - * @return string|null The HTML from the URL (or null if error) + * @param string $url + * @param GuzzleHttp\client $guzzle + * @return string|null */ - private function getRemoteContent($url) + private function getRemoteContent($url, Client $guzzle) { try { - $response = $this->guzzle->request('GET', $url); + $response = $guzzle->request('GET', $url); } catch (RequestException $e) { return; } @@ -139,43 +133,11 @@ class ProcessWebMention extends Job implements ShouldQueue */ private function createFilenameFromURL($url) { - $url = str_replace(['https://', 'http://'], ['', ''], $url); + $url = str_replace(['https://', 'http://'], ['https/', 'http/'], $url); if (substr($url, -1) == '/') { $url = $url . 'index.html'; } return $url; } - - /** - * Filter the HTML in a reply webmention. - * - * @param array The unfiltered microformats - * @return array The filtered microformats - */ - private function filterHTML($microformats) - { - if (isset($microformats['items'][0]['properties']['content'][0]['html'])) { - $microformats['items'][0]['properties']['content'][0]['html_purified'] = $this->useHTMLPurifier( - $microformats['items'][0]['properties']['content'][0]['html'] - ); - } - - return $microformats; - } - - /** - * Set up and use HTMLPurifer on some HTML. - * - * @param string The HTML to be processed - * @return string The processed HTML - */ - private function useHTMLPurifier($html) - { - $config = HTMLPurifier_Config::createDefault(); - $config->set('Cache.SerializerPath', storage_path() . '/HTMLPurifier'); - $purifier = new HTMLPurifier($config); - - return $purifier->purify($html); - } } diff --git a/app/Observers/WebMentionObserver.php b/app/Observers/WebMentionObserver.php new file mode 100644 index 00000000..b15c3b3e --- /dev/null +++ b/app/Observers/WebMentionObserver.php @@ -0,0 +1,65 @@ +addFilteredHTML($webmention); + } + + /** + * Listen for the updated event. + * + * @param WebMention $webmention + * @return void + */ + public function updated(WebMention $webmention) + { + $this->addFilteredHTML($webmention); + } + + /** + * Filter the HTML in a reply webmention. + * + * @param WebMention The WebMention model + * @return void + */ + private function addFilteredHTML(WebMention $webmention) + { + $mf2 = json_decode($webmention->mf2); + if (isset($mf2['items'][0]['properties']['content'][0]['html'])) { + $mf2['items'][0]['properties']['content'][0]['html_purified'] = $this->useHTMLPurifier( + $mf2['items'][0]['properties']['content'][0]['html'] + ); + } + $webmention->mf2 = json_encode($mf2); + $webmetion->save(); + } + + /** + * Set up and use HTMLPurifer on some HTML. + * + * @param string The HTML to be processed + * @return string The processed HTML + */ + private function useHTMLPurifier($html) + { + $config = HTMLPurifier_Config::createDefault(); + $config->set('Cache.SerializerPath', storage_path() . '/HTMLPurifier'); + $purifier = new HTMLPurifier($config); + + return $purifier->purify($html); + } +} diff --git a/app/Providers/AppServiceProvider.php b/app/Providers/AppServiceProvider.php index 3c5c8377..a83013ab 100644 --- a/app/Providers/AppServiceProvider.php +++ b/app/Providers/AppServiceProvider.php @@ -5,6 +5,8 @@ namespace App\Providers; use App\Tag; use App\Note; use Validator; +use App\WebMention; +use App\Observers\WebMentionObserver; use Illuminate\Support\ServiceProvider; class AppServiceProvider extends ServiceProvider @@ -45,6 +47,9 @@ class AppServiceProvider extends ServiceProvider $note->tags()->attach($tagsToAdd); } }); + + //observer the webmention model + WebMention::observe(WebMentionObserver::class); } /** diff --git a/changelog.md b/changelog.md index ebaf048c..d6ddaedb 100644 --- a/changelog.md +++ b/changelog.md @@ -5,6 +5,7 @@ - Added `integrity` values to external assets (issue#10) - Move mapbox links into own sub-view (issue#11) - Updated mapbox version (issue#12) + - Massive refactor of webmention code, allowing for re-parse command (issue#8) ## Version 0.0.10 (2016-09-10) - Add an artisan command for sensiolab’s security check