Setup CloudConvert to take webpage screenshots

This commit is contained in:
Jonny Barnes 2023-01-02 09:39:23 +00:00
parent 0d393bd172
commit 45c71bbb47
Signed by: jonny
SSH key fingerprint: SHA256:CTuSlns5U7qlD9jqHvtnVmfYV3Zwl2Z7WnJ4/dqOaL8
7 changed files with 177 additions and 38 deletions

View file

@ -20,8 +20,7 @@ class ProcessBookmark implements ShouldQueue
use Queueable; use Queueable;
use SerializesModels; use SerializesModels;
/** @var Bookmark */ protected Bookmark $bookmark;
protected $bookmark;
/** /**
* Create a new job instance. * Create a new job instance.
@ -38,14 +37,13 @@ class ProcessBookmark implements ShouldQueue
* *
* @return void * @return void
*/ */
public function handle() public function handle(): void
{ {
$uuid = (resolve(BookmarkService::class))->saveScreenshot($this->bookmark->url); SaveScreenshot::dispatch($this->bookmark);
$this->bookmark->screenshot = $uuid;
try { try {
$archiveLink = (resolve(BookmarkService::class))->getArchiveLink($this->bookmark->url); $archiveLink = (resolve(BookmarkService::class))->getArchiveLink($this->bookmark->url);
} catch (InternetArchiveException $e) { } catch (InternetArchiveException) {
$archiveLink = null; $archiveLink = null;
} }
$this->bookmark->archive = $archiveLink; $this->bookmark->archive = $archiveLink;

107
app/Jobs/SaveScreenshot.php Executable file
View file

@ -0,0 +1,107 @@
<?php
declare(strict_types=1);
namespace App\Jobs;
use App\Models\Bookmark;
use GuzzleHttp\Client;
use Illuminate\Bus\Queueable;
use Illuminate\Contracts\Queue\ShouldQueue;
use Illuminate\Foundation\Bus\Dispatchable;
use Illuminate\Queue\InteractsWithQueue;
use Illuminate\Queue\SerializesModels;
use Illuminate\Support\Facades\Storage;
use JsonException;
class SaveScreenshot implements ShouldQueue
{
use Dispatchable, InteractsWithQueue, Queueable, SerializesModels;
private Bookmark $bookmark;
/**
* Create a new job instance.
*
* @return void
*/
public function __construct(Bookmark $bookmark)
{
$this->bookmark = $bookmark;
}
/**
* Execute the job.
*
* @return void
* @throws JsonException
*/
public function handle(): void
{
// A normal Guzzle client
$client = resolve(Client::class);
// A Guzzle client with a custom Middleware to retry the CloudConvert API requests
$retryClient = resolve('RetryClient');
// First request that CloudConvert takes a screenshot of the URL
$takeScreenshotJobResponse = $client->request('POST', 'https://api.cloudconvert.com/v2/capture-website', [
'headers' => [
'Authorization' => 'Bearer ' . config('services.cloudconvert.token'),
],
'json' => [
'url' => $this->bookmark->url,
'output_format' => 'png',
'screen_width' => 1440,
'screen_height' => 900,
'wait_until' => 'networkidle0',
'wait_time' => 100
],
]);
$jobId = json_decode($takeScreenshotJobResponse->getBody()->getContents(), false, 512, JSON_THROW_ON_ERROR)->data->id;
// Now wait till the status job is finished
$screenshotJobStatusResponse = $retryClient->request('GET', 'https://api.cloudconvert.com/v2/tasks/' . $jobId, [
'headers' => [
'Authorization' => 'Bearer ' . config('services.cloudconvert.token'),
],
'query' => [
'include' => 'payload',
],
]);
$finishedCaptureId = json_decode($screenshotJobStatusResponse->getBody()->getContents(), false, 512, JSON_THROW_ON_ERROR)->data->id;
// Now we can create a new job to request thst the screenshot is exported to a temporary URL we can download the screenshot from
$exportImageJob = $client->request('POST', 'https://api.cloudconvert.com/v2/export/url', [
'headers' => [
'Authorization' => 'Bearer ' . config('services.cloudconvert.token'),
],
'json' => [
'input' => $finishedCaptureId,
'archive_multiple_files' => false,
],
]);
$exportImageJobId = json_decode($exportImageJob->getBody()->getContents(), false, 512, JSON_THROW_ON_ERROR)->data->id;
// Again, wait till the status of this export job is finished
$finalImageUrlResponse = $retryClient->request('GET', 'https://api.cloudconvert.com/v2/tasks/' . $exportImageJobId, [
'headers' => [
'Authorization' => 'Bearer ' . config('services.cloudconvert.token'),
],
'query' => [
'include' => 'payload',
],
]);
// Now we can download the screenshot and save it to the storage
$finalImageUrl = json_decode($finalImageUrlResponse->getBody()->getContents(), false, 512, JSON_THROW_ON_ERROR)->data->url;
$finalImageUrlContent = $client->request('GET', $finalImageUrl);
Storage::disk('public')->put('/assets/img/bookmarks/' . $jobId . '.png', $finalImageUrlContent->getBody()->getContents());
$this->bookmark->screenshot = $jobId;
}
}

View file

@ -5,6 +5,8 @@ namespace App\Providers;
use App\Models\Note; use App\Models\Note;
use App\Observers\NoteObserver; use App\Observers\NoteObserver;
use Codebird\Codebird; use Codebird\Codebird;
use GuzzleHttp\Client;
use GuzzleHttp\Middleware;
use Illuminate\Database\Eloquent\Model; use Illuminate\Database\Eloquent\Model;
use Illuminate\Http\Request; use Illuminate\Http\Request;
use Illuminate\Pagination\LengthAwarePaginator; use Illuminate\Pagination\LengthAwarePaginator;
@ -104,6 +106,37 @@ class AppServiceProvider extends ServiceProvider
); );
}); });
// Configure Guzzle
$this->app->bind('RetryGuzzle', function () {
$handlerStack = \GuzzleHttp\HandlerStack::create();
$handlerStack->push(Middleware::retry(
function ($retries, $request, $response, $exception) {
// Limit the number of retries to 5
if ($retries >= 5) {
return false;
}
// Retry connection exceptions
if ($exception instanceof \GuzzleHttp\Exception\ConnectException) {
return true;
}
// Retry on server errors
if ($response && $response->getStatusCode() >= 500) {
return true;
}
// Finally for CloudConvert, retry if status is not final
return json_decode($response, false, 512, JSON_THROW_ON_ERROR)->data->status !== 'finished';
},
function () {
// Retry after 1 second
return 1000;
}
));
return new Client(['handler' => $handlerStack]);
});
// Turn on Eloquent strict mode when developing // Turn on Eloquent strict mode when developing
Model::shouldBeStrict(! $this->app->isProduction()); Model::shouldBeStrict(! $this->app->isProduction());
} }

View file

@ -15,8 +15,6 @@ use GuzzleHttp\Exception\ClientException;
use Illuminate\Support\Arr; use Illuminate\Support\Arr;
use Illuminate\Support\Str; use Illuminate\Support\Str;
use Ramsey\Uuid\Uuid; use Ramsey\Uuid\Uuid;
use Spatie\Browsershot\Browsershot;
use Spatie\Browsershot\Exceptions\CouldNotTakeBrowsershot;
class BookmarkService extends Service class BookmarkService extends Service
{ {
@ -24,6 +22,7 @@ class BookmarkService extends Service
* Create a new Bookmark. * Create a new Bookmark.
* *
* @param array $request Data from request()->all() * @param array $request Data from request()->all()
* @param string|null $client
* @return Bookmark * @return Bookmark
*/ */
public function create(array $request, ?string $client = null): Bookmark public function create(array $request, ?string $client = null): Bookmark
@ -75,31 +74,6 @@ class BookmarkService extends Service
return $bookmark; return $bookmark;
} }
/**
* Given a URL, use `browsershot` to save an image of the page.
*
* @param string $url
* @return string The uuid for the screenshot
*
* @throws CouldNotTakeBrowsershot
*
* @codeCoverageIgnore
*/
public function saveScreenshot(string $url): string
{
$browsershot = new Browsershot();
$uuid = Uuid::uuid4();
$browsershot->url($url)
->setIncludePath('$PATH:/usr/local/bin')
->noSandbox()
->windowSize(960, 640)
->save(public_path() . '/assets/img/bookmarks/' . $uuid . '.png');
return $uuid->toString();
}
/** /**
* Given a URL, attempt to save it to the Internet Archive. * Given a URL, attempt to save it to the Internet Archive.
* *

View file

@ -31,4 +31,8 @@ return [
'region' => env('AWS_DEFAULT_REGION', 'us-east-1'), 'region' => env('AWS_DEFAULT_REGION', 'us-east-1'),
], ],
'cloudconvert' => [
'token' => env('CLOUDCONVERT_API_TOKEN'),
],
]; ];

View file

@ -0,0 +1,22 @@
<?php
namespace Tests\Feature\Jobs;
use Illuminate\Foundation\Testing\RefreshDatabase;
use Illuminate\Foundation\Testing\WithFaker;
use Tests\TestCase;
class SaveScreenshotTest extends TestCase
{
/**
* A basic feature test example.
*
* @return void
*/
public function test_example()
{
$response = $this->get('/');
$response->assertStatus(200);
}
}

View file

@ -85,14 +85,15 @@ class ArticlesTest extends TestCase
public function dateScopeReturnsExpectedArticlesForDecember(): void public function dateScopeReturnsExpectedArticlesForDecember(): void
{ {
Article::factory()->create([ Article::factory()->create([
'created_at' => Carbon::now()->setMonth(11)->toDateTimeString(), 'created_at' => Carbon::now()->setDay(11)->setMonth(11)->toDateTimeString(),
'updated_at' => Carbon::now()->setMonth(11)->toDateTimeString(), 'updated_at' => Carbon::now()->setDay(11)->setMonth(11)->toDateTimeString(),
]); ]);
Article::factory()->create([ Article::factory()->create([
'created_at' => Carbon::now()->setMonth(12)->toDateTimeString(), 'created_at' => Carbon::now()->setMonth(12)->setDay(12)->toDateTimeString(),
'updated_at' => Carbon::now()->setMonth(12)->toDateTimeString(), 'updated_at' => Carbon::now()->setMonth(12)->setDay(12)->toDateTimeString(),
]); ]);
$this->assertCount(1, Article::date(date('Y'), 12)->get()); $this->assertCount(1, Article::date(date('Y'), 12)->get());
} }
} }