jonnybarnes.uk/app/Jobs/ProcessWebMention.php

<?php

namespace App\Jobs;

use Mf2;
use App\Note;
use HTMLPurifier;
use App\WebMention;
use GuzzleHttp\Client;
use HTMLPurifier_Config;
use Illuminate\Queue\SerializesModels;
use Illuminate\Queue\InteractsWithQueue;
use Jonnybarnes\WebmentionsParser\Parser;
use GuzzleHttp\Exception\RequestException;
use Illuminate\Contracts\Queue\ShouldQueue;
use Illuminate\Foundation\Bus\DispatchesJobs;
use App\Exceptions\RemoteContentNotFoundException;

class ProcessWebMention extends Job implements ShouldQueue
{
    use InteractsWithQueue, SerializesModels, DispatchesJobs;

    protected $note;
    protected $source;
    protected $guzzle;

    /**
     * Create a new job instance.
     *
     * @param  \App\Note $note
     * @param  string $source
     * @return void
     */
    public function __construct(Note $note, $source, Client $guzzle = null)
    {
        $this->note = $note;
        $this->source = $source;
        $this->guzzle = $guzzle ?? new Client();
    }

    /**
     * Execute the job.
     *
     * @param  \Jonnybarnes\WebmentionsParser\Parser $parser
     * @return void
     */
    public function handle(Parser $parser)
    {
        $sourceURL = parse_url($this->source);
        $baseURL = $sourceURL['scheme'] . '://' . $sourceURL['host'];
        $remoteContent = $this->getRemoteContent($this->source);
        if ($remoteContent === null) {
            throw new RemoteContentNotFoundException;
        }
        $microformats = Mf2\parse($remoteContent, $baseURL);
        $webmentions = WebMention::where('source', $this->source)->get();
        foreach ($webmentions as $webmention) {
            //check webmention still references target
            //we try each type of mention (reply/like/repost)
            if ($webmention->type == 'in-reply-to') {
                if ($parser->checkInReplyTo($microformats, $this->note->longurl) == false) {
                    //it doesn't so delete
                    $webmention->delete();

                    return;
                }
                //webmenion is still a reply, so update content
                $microformats = $this->filterHTML($microformats);
                $this->dispatch(new SaveProfileImage($microformats));
                $webmention->mf2 = json_encode($microformats);
                $webmention->save();

                return;
            }
            if ($webmention->type == 'like-of') {
                if ($parser->checkLikeOf($microformats, $note->longurl) == false) {
                    //it doesn't so delete
                    $webmention->delete();

                    return;
                } //note we don't need to do anything if it still is a like
            }
            if ($webmention->type == 'repost-of') {
                if ($parser->checkRepostOf($microformats, $note->longurl) == false) {
                    //it doesn't so delete
                    $webmention->delete();

                    return;
                } //again, we don't need to do anything if it still is a repost
            }
        }//foreach

        //no wemention in db so create new one
        $webmention = new WebMention();
        $type = $parser->getMentionType($microformats); //throw error here?
        $this->dispatch(new SaveProfileImage($microformats));
        $microformats = $this->filterHTML($microformats);
        $webmention->source = $this->source;
        $webmention->target = $this->note->longurl;
        $webmention->commentable_id = $this->note->id;
        $webmention->commentable_type = 'App\Note';
        $webmention->type = $type;
        $webmention->mf2 = json_encode($microformats);
        $webmention->save();
    }

    /**
     * Retreive the remote content from a URL, and caches the result.
     *
     * @param  string       The URL to retreive content from
     * @return string|null  The HTML from the URL (or null if error)
     */
    private function getRemoteContent($url)
    {
        try {
            $response = $this->guzzle->request('GET', $url);
        } catch (RequestException $e) {
            return;
        }
        $html = (string) $response->getBody();
        $path = storage_path() . '/HTML/' . $this->createFilenameFromURL($url);
        $parts = explode('/', $path);
        $name = array_pop($parts);
        $dir = implode('/', $parts);
        if (! is_dir($dir)) {
            mkdir($dir, 0755, true);
        }
        file_put_contents("$dir/$name", $html);

        return $html;
    }

    /**
     * Create a file path from a URL. This is used when caching the HTML
     * response.
     *
     * @param  string  The URL
     * @return string  The path name
     */
    private function createFilenameFromURL($url)
    {
        $url = str_replace(['https://', 'http://'], ['https/', 'http/'], $url);
        if (substr($url, -1) == '/') {
            $url = $url . 'index.html';
        }

        return $url;
    }

    /**
     * Filter the HTML in a reply webmention.
     *
     * @param  array  The unfiltered microformats
     * @return array  The filtered microformats
     */
    private function filterHTML($microformats)
    {
        if (isset($microformats['items'][0]['properties']['content'][0]['html'])) {
            $microformats['items'][0]['properties']['content'][0]['html_purified'] = $this->useHTMLPurifier(
                $microformats['items'][0]['properties']['content'][0]['html']
            );
        }

        return $microformats;
    }

    /**
     * Set up and use HTMLPurifer on some HTML.
     *
     * @param  string  The HTML to be processed
     * @return string  The processed HTML
     */
    private function useHTMLPurifier($html)
    {
        $config = HTMLPurifier_Config::createDefault();
        $config->set('Cache.SerializerPath', storage_path() . '/HTMLPurifier');
        $purifier = new HTMLPurifier($config);

        return $purifier->purify($html);
    }
}
Initial commit to new repo 2016-05-19 15:01:28 +01:00			`<?php`

			`namespace App\Jobs;`

Start work on better webmention support 2016-07-29 09:55:27 +01:00			`use Mf2;`
Initial commit to new repo 2016-05-19 15:01:28 +01:00			`use App\Note;`
			`use HTMLPurifier;`
			`use App\WebMention;`
			`use GuzzleHttp\Client;`
			`use HTMLPurifier_Config;`
			`use Illuminate\Queue\SerializesModels;`
			`use Illuminate\Queue\InteractsWithQueue;`
			`use Jonnybarnes\WebmentionsParser\Parser;`
Start work on better webmention support 2016-07-29 09:55:27 +01:00			`use GuzzleHttp\Exception\RequestException;`
Initial commit to new repo 2016-05-19 15:01:28 +01:00			`use Illuminate\Contracts\Queue\ShouldQueue;`
Work on webmwtion code refactoring 2016-08-03 16:08:30 +01:00			`use Illuminate\Foundation\Bus\DispatchesJobs;`
			`use App\Exceptions\RemoteContentNotFoundException;`
Initial commit to new repo 2016-05-19 15:01:28 +01:00
			`class ProcessWebMention extends Job implements ShouldQueue`
			`{`
Work on webmwtion code refactoring 2016-08-03 16:08:30 +01:00			`use InteractsWithQueue, SerializesModels, DispatchesJobs;`
Initial commit to new repo 2016-05-19 15:01:28 +01:00
			`protected $note;`
			`protected $source;`
Work on webmwtion code refactoring 2016-08-03 16:08:30 +01:00			`protected $guzzle;`
Initial commit to new repo 2016-05-19 15:01:28 +01:00
			`/**`
			`* Create a new job instance.`
			`*`
			`* @param \App\Note $note`
			`* @param string $source`
			`* @return void`
			`*/`
Work on webmwtion code refactoring 2016-08-03 16:08:30 +01:00			`public function __construct(Note $note, $source, Client $guzzle = null)`
Initial commit to new repo 2016-05-19 15:01:28 +01:00			`{`
			`$this->note = $note;`
			`$this->source = $source;`
Work on webmwtion code refactoring 2016-08-03 16:08:30 +01:00			`$this->guzzle = $guzzle ?? new Client();`
Initial commit to new repo 2016-05-19 15:01:28 +01:00			`}`

			`/**`
			`* Execute the job.`
			`*`
			`* @param \Jonnybarnes\WebmentionsParser\Parser $parser`
			`* @return void`
			`*/`
			`public function handle(Parser $parser)`
			`{`
			`$sourceURL = parse_url($this->source);`
			`$baseURL = $sourceURL['scheme'] . '://' . $sourceURL['host'];`
			`$remoteContent = $this->getRemoteContent($this->source);`
Start work on better webmention support 2016-07-29 09:55:27 +01:00			`if ($remoteContent === null) {`
Work on webmwtion code refactoring 2016-08-03 16:08:30 +01:00			`throw new RemoteContentNotFoundException;`
Start work on better webmention support 2016-07-29 09:55:27 +01:00			`}`
Work on webmwtion code refactoring 2016-08-03 16:08:30 +01:00			`$microformats = Mf2\parse($remoteContent, $baseURL);`
			`$webmentions = WebMention::where('source', $this->source)->get();`
			`foreach ($webmentions as $webmention) {`
			`//check webmention still references target`
			`//we try each type of mention (reply/like/repost)`
			`if ($webmention->type == 'in-reply-to') {`
			`if ($parser->checkInReplyTo($microformats, $this->note->longurl) == false) {`
			`//it doesn't so delete`
			`$webmention->delete();`

			`return;`
			`}`
			`//webmenion is still a reply, so update content`
			`$microformats = $this->filterHTML($microformats);`
			`$this->dispatch(new SaveProfileImage($microformats));`
			`$webmention->mf2 = json_encode($microformats);`
			`$webmention->save();`

			`return;`
			`}`
			`if ($webmention->type == 'like-of') {`
			`if ($parser->checkLikeOf($microformats, $note->longurl) == false) {`
			`//it doesn't so delete`
			`$webmention->delete();`

			`return;`
			`} //note we don't need to do anything if it still is a like`
			`}`
			`if ($webmention->type == 'repost-of') {`
			`if ($parser->checkRepostOf($microformats, $note->longurl) == false) {`
			`//it doesn't so delete`
			`$webmention->delete();`

			`return;`
			`} //again, we don't need to do anything if it still is a repost`
			`}`
			`}//foreach`

Initial commit to new repo 2016-05-19 15:01:28 +01:00			`//no wemention in db so create new one`
			`$webmention = new WebMention();`
Work on webmwtion code refactoring 2016-08-03 16:08:30 +01:00			`$type = $parser->getMentionType($microformats); //throw error here?`
			`$this->dispatch(new SaveProfileImage($microformats));`
			`$microformats = $this->filterHTML($microformats);`
			`$webmention->source = $this->source;`
			`$webmention->target = $this->note->longurl;`
			`$webmention->commentable_id = $this->note->id;`
			`$webmention->commentable_type = 'App\Note';`
			`$webmention->type = $type;`
			`$webmention->mf2 = json_encode($microformats);`
			`$webmention->save();`
Initial commit to new repo 2016-05-19 15:01:28 +01:00			`}`

			`/**`
			`* Retreive the remote content from a URL, and caches the result.`
			`*`
Start work on better webmention support 2016-07-29 09:55:27 +01:00			`* @param string The URL to retreive content from`
			`* @return string\|null The HTML from the URL (or null if error)`
Initial commit to new repo 2016-05-19 15:01:28 +01:00			`*/`
			`private function getRemoteContent($url)`
			`{`
Start work on better webmention support 2016-07-29 09:55:27 +01:00			`try {`
Work on webmwtion code refactoring 2016-08-03 16:08:30 +01:00			`$response = $this->guzzle->request('GET', $url);`
Add a missing space (PSR-2) 2016-07-29 10:48:05 +01:00			`} catch (RequestException $e) {`
Start work on better webmention support 2016-07-29 09:55:27 +01:00			`return;`
			`}`
Initial commit to new repo 2016-05-19 15:01:28 +01:00			`$html = (string) $response->getBody();`
			`$path = storage_path() . '/HTML/' . $this->createFilenameFromURL($url);`
Work on webmwtion code refactoring 2016-08-03 16:08:30 +01:00			`$parts = explode('/', $path);`
			`$name = array_pop($parts);`
			`$dir = implode('/', $parts);`
			`if (! is_dir($dir)) {`
			`mkdir($dir, 0755, true);`
			`}`
			`file_put_contents("$dir/$name", $html);`
Initial commit to new repo 2016-05-19 15:01:28 +01:00
			`return $html;`
			`}`

			`/**`
			`* Create a file path from a URL. This is used when caching the HTML`
			`* response.`
			`*`
			`* @param string The URL`
			`* @return string The path name`
			`*/`
			`private function createFilenameFromURL($url)`
			`{`
leave http/https in folder names so we know which sheme to use 2016-09-16 16:33:05 +01:00			`$url = str_replace(['https://', 'http://'], ['https/', 'http/'], $url);`
Initial commit to new repo 2016-05-19 15:01:28 +01:00			`if (substr($url, -1) == '/') {`
			`$url = $url . 'index.html';`
			`}`

			`return $url;`
			`}`

			`/**`
Work on webmwtion code refactoring 2016-08-03 16:08:30 +01:00			`* Filter the HTML in a reply webmention.`
Initial commit to new repo 2016-05-19 15:01:28 +01:00			`*`
Work on webmwtion code refactoring 2016-08-03 16:08:30 +01:00			`* @param array The unfiltered microformats`
			`* @return array The filtered microformats`
Initial commit to new repo 2016-05-19 15:01:28 +01:00			`*/`
Work on webmwtion code refactoring 2016-08-03 16:08:30 +01:00			`private function filterHTML($microformats)`
Initial commit to new repo 2016-05-19 15:01:28 +01:00			`{`
Work on webmwtion code refactoring 2016-08-03 16:08:30 +01:00			`if (isset($microformats['items'][0]['properties']['content'][0]['html'])) {`
			`$microformats['items'][0]['properties']['content'][0]['html_purified'] = $this->useHTMLPurifier(`
			`$microformats['items'][0]['properties']['content'][0]['html']`
			`);`
Initial commit to new repo 2016-05-19 15:01:28 +01:00			`}`

Work on webmwtion code refactoring 2016-08-03 16:08:30 +01:00			`return $microformats;`
Initial commit to new repo 2016-05-19 15:01:28 +01:00			`}`

			`/**`
Work on webmwtion code refactoring 2016-08-03 16:08:30 +01:00			`* Set up and use HTMLPurifer on some HTML.`
Initial commit to new repo 2016-05-19 15:01:28 +01:00			`*`
			`* @param string The HTML to be processed`
			`* @return string The processed HTML`
			`*/`
Work on webmwtion code refactoring 2016-08-03 16:08:30 +01:00			`private function useHTMLPurifier($html)`
Initial commit to new repo 2016-05-19 15:01:28 +01:00			`{`
			`$config = HTMLPurifier_Config::createDefault();`
			`$config->set('Cache.SerializerPath', storage_path() . '/HTMLPurifier');`
			`$purifier = new HTMLPurifier($config);`

			`return $purifier->purify($html);`
			`}`
			`}`