From 45c71bbb470a974777e932502d4c0f7afe3b99e5 Mon Sep 17 00:00:00 2001 From: Jonny Barnes Date: Mon, 2 Jan 2023 09:39:23 +0000 Subject: [PATCH] Setup CloudConvert to take webpage screenshots --- app/Jobs/ProcessBookmark.php | 10 +- app/Jobs/SaveScreenshot.php | 107 ++++++++++++++++++++++ app/Providers/AppServiceProvider.php | 33 +++++++ app/Services/BookmarkService.php | 30 +----- config/services.php | 4 + tests/Feature/Jobs/SaveScreenshotTest.php | 22 +++++ tests/Unit/ArticlesTest.php | 9 +- 7 files changed, 177 insertions(+), 38 deletions(-) create mode 100755 app/Jobs/SaveScreenshot.php create mode 100755 tests/Feature/Jobs/SaveScreenshotTest.php diff --git a/app/Jobs/ProcessBookmark.php b/app/Jobs/ProcessBookmark.php index ce7d7637..de3c65ed 100644 --- a/app/Jobs/ProcessBookmark.php +++ b/app/Jobs/ProcessBookmark.php @@ -20,8 +20,7 @@ class ProcessBookmark implements ShouldQueue use Queueable; use SerializesModels; - /** @var Bookmark */ - protected $bookmark; + protected Bookmark $bookmark; /** * Create a new job instance. @@ -38,14 +37,13 @@ class ProcessBookmark implements ShouldQueue * * @return void */ - public function handle() + public function handle(): void { - $uuid = (resolve(BookmarkService::class))->saveScreenshot($this->bookmark->url); - $this->bookmark->screenshot = $uuid; + SaveScreenshot::dispatch($this->bookmark); try { $archiveLink = (resolve(BookmarkService::class))->getArchiveLink($this->bookmark->url); - } catch (InternetArchiveException $e) { + } catch (InternetArchiveException) { $archiveLink = null; } $this->bookmark->archive = $archiveLink; diff --git a/app/Jobs/SaveScreenshot.php b/app/Jobs/SaveScreenshot.php new file mode 100755 index 00000000..691cb02f --- /dev/null +++ b/app/Jobs/SaveScreenshot.php @@ -0,0 +1,107 @@ +bookmark = $bookmark; + } + + /** + * Execute the job. + * + * @return void + * @throws JsonException + */ + public function handle(): void + { + // A normal Guzzle client + $client = resolve(Client::class); + // A Guzzle client with a custom Middleware to retry the CloudConvert API requests + $retryClient = resolve('RetryClient'); + + // First request that CloudConvert takes a screenshot of the URL + $takeScreenshotJobResponse = $client->request('POST', 'https://api.cloudconvert.com/v2/capture-website', [ + 'headers' => [ + 'Authorization' => 'Bearer ' . config('services.cloudconvert.token'), + ], + 'json' => [ + 'url' => $this->bookmark->url, + 'output_format' => 'png', + 'screen_width' => 1440, + 'screen_height' => 900, + 'wait_until' => 'networkidle0', + 'wait_time' => 100 + ], + ]); + + $jobId = json_decode($takeScreenshotJobResponse->getBody()->getContents(), false, 512, JSON_THROW_ON_ERROR)->data->id; + + // Now wait till the status job is finished + $screenshotJobStatusResponse = $retryClient->request('GET', 'https://api.cloudconvert.com/v2/tasks/' . $jobId, [ + 'headers' => [ + 'Authorization' => 'Bearer ' . config('services.cloudconvert.token'), + ], + 'query' => [ + 'include' => 'payload', + ], + ]); + + $finishedCaptureId = json_decode($screenshotJobStatusResponse->getBody()->getContents(), false, 512, JSON_THROW_ON_ERROR)->data->id; + + // Now we can create a new job to request thst the screenshot is exported to a temporary URL we can download the screenshot from + $exportImageJob = $client->request('POST', 'https://api.cloudconvert.com/v2/export/url', [ + 'headers' => [ + 'Authorization' => 'Bearer ' . config('services.cloudconvert.token'), + ], + 'json' => [ + 'input' => $finishedCaptureId, + 'archive_multiple_files' => false, + ], + ]); + + $exportImageJobId = json_decode($exportImageJob->getBody()->getContents(), false, 512, JSON_THROW_ON_ERROR)->data->id; + + // Again, wait till the status of this export job is finished + $finalImageUrlResponse = $retryClient->request('GET', 'https://api.cloudconvert.com/v2/tasks/' . $exportImageJobId, [ + 'headers' => [ + 'Authorization' => 'Bearer ' . config('services.cloudconvert.token'), + ], + 'query' => [ + 'include' => 'payload', + ], + ]); + + // Now we can download the screenshot and save it to the storage + $finalImageUrl = json_decode($finalImageUrlResponse->getBody()->getContents(), false, 512, JSON_THROW_ON_ERROR)->data->url; + + $finalImageUrlContent = $client->request('GET', $finalImageUrl); + + Storage::disk('public')->put('/assets/img/bookmarks/' . $jobId . '.png', $finalImageUrlContent->getBody()->getContents()); + + $this->bookmark->screenshot = $jobId; + } +} diff --git a/app/Providers/AppServiceProvider.php b/app/Providers/AppServiceProvider.php index f75ee2ae..f2046a66 100644 --- a/app/Providers/AppServiceProvider.php +++ b/app/Providers/AppServiceProvider.php @@ -5,6 +5,8 @@ namespace App\Providers; use App\Models\Note; use App\Observers\NoteObserver; use Codebird\Codebird; +use GuzzleHttp\Client; +use GuzzleHttp\Middleware; use Illuminate\Database\Eloquent\Model; use Illuminate\Http\Request; use Illuminate\Pagination\LengthAwarePaginator; @@ -104,6 +106,37 @@ class AppServiceProvider extends ServiceProvider ); }); + // Configure Guzzle + $this->app->bind('RetryGuzzle', function () { + $handlerStack = \GuzzleHttp\HandlerStack::create(); + $handlerStack->push(Middleware::retry( + function ($retries, $request, $response, $exception) { + // Limit the number of retries to 5 + if ($retries >= 5) { + return false; + } + + // Retry connection exceptions + if ($exception instanceof \GuzzleHttp\Exception\ConnectException) { + return true; + } + + // Retry on server errors + if ($response && $response->getStatusCode() >= 500) { + return true; + } + + // Finally for CloudConvert, retry if status is not final + return json_decode($response, false, 512, JSON_THROW_ON_ERROR)->data->status !== 'finished'; + }, + function () { + // Retry after 1 second + return 1000; + } + )); + return new Client(['handler' => $handlerStack]); + }); + // Turn on Eloquent strict mode when developing Model::shouldBeStrict(! $this->app->isProduction()); } diff --git a/app/Services/BookmarkService.php b/app/Services/BookmarkService.php index d08d96cf..17a03a1b 100644 --- a/app/Services/BookmarkService.php +++ b/app/Services/BookmarkService.php @@ -15,15 +15,14 @@ use GuzzleHttp\Exception\ClientException; use Illuminate\Support\Arr; use Illuminate\Support\Str; use Ramsey\Uuid\Uuid; -use Spatie\Browsershot\Browsershot; -use Spatie\Browsershot\Exceptions\CouldNotTakeBrowsershot; class BookmarkService extends Service { /** * Create a new Bookmark. * - * @param array $request Data from request()->all() + * @param array $request Data from request()->all() + * @param string|null $client * @return Bookmark */ public function create(array $request, ?string $client = null): Bookmark @@ -75,31 +74,6 @@ class BookmarkService extends Service return $bookmark; } - /** - * Given a URL, use `browsershot` to save an image of the page. - * - * @param string $url - * @return string The uuid for the screenshot - * - * @throws CouldNotTakeBrowsershot - * - * @codeCoverageIgnore - */ - public function saveScreenshot(string $url): string - { - $browsershot = new Browsershot(); - - $uuid = Uuid::uuid4(); - - $browsershot->url($url) - ->setIncludePath('$PATH:/usr/local/bin') - ->noSandbox() - ->windowSize(960, 640) - ->save(public_path() . '/assets/img/bookmarks/' . $uuid . '.png'); - - return $uuid->toString(); - } - /** * Given a URL, attempt to save it to the Internet Archive. * diff --git a/config/services.php b/config/services.php index 0ace530e..83889931 100644 --- a/config/services.php +++ b/config/services.php @@ -31,4 +31,8 @@ return [ 'region' => env('AWS_DEFAULT_REGION', 'us-east-1'), ], + 'cloudconvert' => [ + 'token' => env('CLOUDCONVERT_API_TOKEN'), + ], + ]; diff --git a/tests/Feature/Jobs/SaveScreenshotTest.php b/tests/Feature/Jobs/SaveScreenshotTest.php new file mode 100755 index 00000000..23ad26b2 --- /dev/null +++ b/tests/Feature/Jobs/SaveScreenshotTest.php @@ -0,0 +1,22 @@ +get('/'); + + $response->assertStatus(200); + } +} diff --git a/tests/Unit/ArticlesTest.php b/tests/Unit/ArticlesTest.php index 3229985a..2327eac2 100644 --- a/tests/Unit/ArticlesTest.php +++ b/tests/Unit/ArticlesTest.php @@ -85,14 +85,15 @@ class ArticlesTest extends TestCase public function dateScopeReturnsExpectedArticlesForDecember(): void { Article::factory()->create([ - 'created_at' => Carbon::now()->setMonth(11)->toDateTimeString(), - 'updated_at' => Carbon::now()->setMonth(11)->toDateTimeString(), + 'created_at' => Carbon::now()->setDay(11)->setMonth(11)->toDateTimeString(), + 'updated_at' => Carbon::now()->setDay(11)->setMonth(11)->toDateTimeString(), ]); Article::factory()->create([ - 'created_at' => Carbon::now()->setMonth(12)->toDateTimeString(), - 'updated_at' => Carbon::now()->setMonth(12)->toDateTimeString(), + 'created_at' => Carbon::now()->setMonth(12)->setDay(12)->toDateTimeString(), + 'updated_at' => Carbon::now()->setMonth(12)->setDay(12)->toDateTimeString(), ]); + $this->assertCount(1, Article::date(date('Y'), 12)->get()); } }