jonnybarnes.uk/app/Jobs/ProcessWebMention.php

182 lines
5.8 KiB
PHP
Raw Normal View History

2016-05-19 15:01:28 +01:00
<?php
namespace App\Jobs;
use Mf2;
2016-05-19 15:01:28 +01:00
use App\Note;
use HTMLPurifier;
use App\WebMention;
use GuzzleHttp\Client;
use HTMLPurifier_Config;
use Illuminate\Queue\SerializesModels;
use Illuminate\Queue\InteractsWithQueue;
use Jonnybarnes\WebmentionsParser\Parser;
use GuzzleHttp\Exception\RequestException;
2016-05-19 15:01:28 +01:00
use Illuminate\Contracts\Queue\ShouldQueue;
2016-08-03 16:08:30 +01:00
use Illuminate\Foundation\Bus\DispatchesJobs;
use App\Exceptions\RemoteContentNotFoundException;
2016-05-19 15:01:28 +01:00
class ProcessWebMention extends Job implements ShouldQueue
{
2016-08-03 16:08:30 +01:00
use InteractsWithQueue, SerializesModels, DispatchesJobs;
2016-05-19 15:01:28 +01:00
protected $note;
protected $source;
2016-08-03 16:08:30 +01:00
protected $guzzle;
2016-05-19 15:01:28 +01:00
/**
* Create a new job instance.
*
* @param \App\Note $note
* @param string $source
* @return void
*/
2016-08-03 16:08:30 +01:00
public function __construct(Note $note, $source, Client $guzzle = null)
2016-05-19 15:01:28 +01:00
{
$this->note = $note;
$this->source = $source;
2016-08-03 16:08:30 +01:00
$this->guzzle = $guzzle ?? new Client();
2016-05-19 15:01:28 +01:00
}
/**
* Execute the job.
*
* @param \Jonnybarnes\WebmentionsParser\Parser $parser
* @return void
*/
public function handle(Parser $parser)
{
$sourceURL = parse_url($this->source);
$baseURL = $sourceURL['scheme'] . '://' . $sourceURL['host'];
$remoteContent = $this->getRemoteContent($this->source);
if ($remoteContent === null) {
2016-08-03 16:08:30 +01:00
throw new RemoteContentNotFoundException;
}
2016-08-03 16:08:30 +01:00
$microformats = Mf2\parse($remoteContent, $baseURL);
$webmentions = WebMention::where('source', $this->source)->get();
foreach ($webmentions as $webmention) {
//check webmention still references target
//we try each type of mention (reply/like/repost)
if ($webmention->type == 'in-reply-to') {
if ($parser->checkInReplyTo($microformats, $this->note->longurl) == false) {
//it doesn't so delete
$webmention->delete();
return;
}
//webmenion is still a reply, so update content
$microformats = $this->filterHTML($microformats);
$this->dispatch(new SaveProfileImage($microformats));
$webmention->mf2 = json_encode($microformats);
$webmention->save();
return;
}
if ($webmention->type == 'like-of') {
if ($parser->checkLikeOf($microformats, $note->longurl) == false) {
//it doesn't so delete
$webmention->delete();
return;
} //note we don't need to do anything if it still is a like
}
if ($webmention->type == 'repost-of') {
if ($parser->checkRepostOf($microformats, $note->longurl) == false) {
//it doesn't so delete
$webmention->delete();
return;
} //again, we don't need to do anything if it still is a repost
}
}//foreach
2016-05-19 15:01:28 +01:00
//no wemention in db so create new one
$webmention = new WebMention();
2016-08-03 16:08:30 +01:00
$type = $parser->getMentionType($microformats); //throw error here?
$this->dispatch(new SaveProfileImage($microformats));
$microformats = $this->filterHTML($microformats);
$webmention->source = $this->source;
$webmention->target = $this->note->longurl;
$webmention->commentable_id = $this->note->id;
$webmention->commentable_type = 'App\Note';
$webmention->type = $type;
$webmention->mf2 = json_encode($microformats);
$webmention->save();
2016-05-19 15:01:28 +01:00
}
/**
* Retreive the remote content from a URL, and caches the result.
*
* @param string The URL to retreive content from
* @return string|null The HTML from the URL (or null if error)
2016-05-19 15:01:28 +01:00
*/
private function getRemoteContent($url)
{
try {
2016-08-03 16:08:30 +01:00
$response = $this->guzzle->request('GET', $url);
2016-07-29 10:48:05 +01:00
} catch (RequestException $e) {
return;
}
2016-05-19 15:01:28 +01:00
$html = (string) $response->getBody();
$path = storage_path() . '/HTML/' . $this->createFilenameFromURL($url);
2016-08-03 16:08:30 +01:00
$parts = explode('/', $path);
$name = array_pop($parts);
$dir = implode('/', $parts);
if (! is_dir($dir)) {
mkdir($dir, 0755, true);
}
file_put_contents("$dir/$name", $html);
2016-05-19 15:01:28 +01:00
return $html;
}
/**
* Create a file path from a URL. This is used when caching the HTML
* response.
*
* @param string The URL
* @return string The path name
*/
private function createFilenameFromURL($url)
{
$url = str_replace(['https://', 'http://'], ['https/', 'http/'], $url);
2016-05-19 15:01:28 +01:00
if (substr($url, -1) == '/') {
$url = $url . 'index.html';
}
return $url;
}
/**
2016-08-03 16:08:30 +01:00
* Filter the HTML in a reply webmention.
2016-05-19 15:01:28 +01:00
*
2016-08-03 16:08:30 +01:00
* @param array The unfiltered microformats
* @return array The filtered microformats
2016-05-19 15:01:28 +01:00
*/
2016-08-03 16:08:30 +01:00
private function filterHTML($microformats)
2016-05-19 15:01:28 +01:00
{
2016-08-03 16:08:30 +01:00
if (isset($microformats['items'][0]['properties']['content'][0]['html'])) {
$microformats['items'][0]['properties']['content'][0]['html_purified'] = $this->useHTMLPurifier(
$microformats['items'][0]['properties']['content'][0]['html']
);
2016-05-19 15:01:28 +01:00
}
2016-08-03 16:08:30 +01:00
return $microformats;
2016-05-19 15:01:28 +01:00
}
/**
2016-08-03 16:08:30 +01:00
* Set up and use HTMLPurifer on some HTML.
2016-05-19 15:01:28 +01:00
*
* @param string The HTML to be processed
* @return string The processed HTML
*/
2016-08-03 16:08:30 +01:00
private function useHTMLPurifier($html)
2016-05-19 15:01:28 +01:00
{
$config = HTMLPurifier_Config::createDefault();
$config->set('Cache.SerializerPath', storage_path() . '/HTMLPurifier');
$purifier = new HTMLPurifier($config);
return $purifier->purify($html);
}
}