Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
cae47e0
feat: scaffold jobs for processing mw jobs in background
m90 May 2, 2023
88a2641
fix: ensure jobs can run in parallel for different wikis
m90 May 2, 2023
9b870d9
feat: query for running mediawiki job
m90 May 2, 2023
ba4e4a8
fix: fix some syntax mistakes
m90 May 2, 2023
877690c
feat: port mediawiki job spec
m90 May 3, 2023
cf9c7db
feat: query wikis for pending jobs using http
m90 May 3, 2023
1102404
feat: merge env and image from running mediawiki pod
m90 May 3, 2023
c042a80
feat: implement naive waiting mechanism for k8s jobs
m90 May 3, 2023
19ea494
chore(k8s): keep job objects in the api for an hour
m90 May 8, 2023
2ccf1d8
chore(queues): raise timeout values so mw jobs can run
m90 May 8, 2023
f6241bd
test(mw-jobs): scaffold test case
m90 May 8, 2023
1feb785
refactor: use data_get helper
m90 May 8, 2023
a2b0d42
refactor: handle deduplication of jobs on kubernetes level
m90 May 9, 2023
380fef1
feat(mw-jobs): limit concurrent number of jobs
m90 May 10, 2023
daa8893
fix: completed jobs need to be filtered out manually
m90 May 10, 2023
2c46b47
test: use RefreshDatabase trait in sibling test
m90 May 11, 2023
790a4a6
test: add further expectations for zero job test case
m90 May 11, 2023
9f65553
test: add test case for fanning out
m90 May 11, 2023
f1fb42a
test: http error should fail job and dispatch nothing
m90 May 11, 2023
374f4ef
test: reset changes to foreign test case
m90 May 11, 2023
6cca3e4
fix: do not fail entire job when unable to poll single wiki
m90 May 15, 2023
d6ef9df
test: add test case for spawning k8s jobs
m90 May 15, 2023
d187f0c
fix: delete model in teardown
m90 May 15, 2023
2a5ffd4
docs: add changelog entry
m90 May 15, 2023
8d0e591
test: unskip login test again
m90 May 15, 2023
8dedf28
refactor: improve naming
m90 May 15, 2023
01b0942
feat: make job namespace configurable
m90 May 15, 2023
2bab770
test: tear down is not required when using RefreshDatabase
m90 May 15, 2023
a6b6e6f
refactor: laravel env helper can provide defaults
m90 May 16, 2023
3efdca7
feat: job concurrency should be handled by k8s
m90 May 16, 2023
df08fb6
refactor(jobs): filter finished jobs by excluding completed ones instead
m90 May 16, 2023
5cc6dd3
feat: jobs can expire instantly, removing the need to filter
m90 May 17, 2023
38caebe
feat: deduplicate mw jobs on k8s level
m90 May 17, 2023
3bee2e8
fix(mw-jobs): sha256 hash exceeds k8s limits for resource names
m90 May 31, 2023
0e10644
fix(mw-jobs): fail when it's not possible to look up job name
m90 May 31, 2023
edf976c
fix(mw-jobs): log message is misleading for existing jobs
m90 Jun 1, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# api

## 8x.12.0 - TBD
- Poll wikis for pending MediaWiki jobs and create Kubernetes jobs to process them if needed

## 8x.11.1 - 18 April 2023
- Do not disable elastic search on wikis after a failure

Expand Down
3 changes: 3 additions & 0 deletions app/Console/Kernel.php
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
use App\Jobs\PruneEventPageUpdatesTable;
use App\Jobs\PruneQueryserviceBatchesTable;
use App\Jobs\SandboxCleanupJob;
use App\Jobs\PollForMediaWikiJobsJob;
use Illuminate\Console\Scheduling\Schedule;
use Illuminate\Foundation\Console\Kernel as ConsoleKernel;
use App\Jobs\PlatformStatsSummaryJob;
Expand Down Expand Up @@ -43,6 +44,8 @@ protected function schedule(Schedule $schedule)

// Schedule site stat updates for each wiki and platform-summary
$schedule->command('schedule:stats')->daily();

$schedule->job(new PollForMediaWikiJobsJob)->everyMinute();
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we wanted to have this run at a higher frequency, we'd have two options:

I would guess the former is a bit cleaner.

}

/**
Expand Down
45 changes: 45 additions & 0 deletions app/Jobs/PollForMediaWikiJobsJob.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
<?php

namespace App\Jobs;

use App\Wiki;
use Illuminate\Support\Facades\Http;
use Illuminate\Support\Facades\Log;

class PollForMediaWikiJobsJob extends Job
{
public function handle (): void
{
$allWikiDomains = Wiki::all()->pluck('domain');
foreach ($allWikiDomains as $wikiDomain) {
if ($this->hasPendingJobs($wikiDomain)) {
$this->enqueueWiki($wikiDomain);
}
}
}

private function hasPendingJobs (string $wikiDomain): bool
{
$response = Http::withHeaders([
'host' => $wikiDomain
])->get(
getenv('PLATFORM_MW_BACKEND_HOST').'/w/api.php?action=query&meta=siteinfo&siprop=statistics&format=json'
);

if ($response->failed()) {
$this->job->markAsFailed();
Log::error(
'Failure polling wiki '.$wikiDomain.' for pending MediaWiki jobs: '.$response->clientError()
);
return false;
}

$pendingJobsCount = data_get($response->json(), 'query.statistics.jobs', 0);
return $pendingJobsCount > 0;
}

private function enqueueWiki (string $wikiDomain): void
{
dispatch(new ProcessMediaWikiJobsJob($wikiDomain));
Copy link
Contributor Author

@m90 m90 May 11, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we want/need this to fan out to another job (as is) or should we rather merge the two jobs into one?

}
}
118 changes: 118 additions & 0 deletions app/Jobs/ProcessMediaWikiJobsJob.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
<?php

namespace App\Jobs;

use Illuminate\Bus\Queueable;
use Illuminate\Contracts\Queue\ShouldQueue;
use Illuminate\Contracts\Queue\ShouldBeUnique;
use Illuminate\Queue\InteractsWithQueue;
use Illuminate\Support\Facades\Log;
use Maclof\Kubernetes\Client;
use Maclof\Kubernetes\Models\Job as KubernetesJob;

class ProcessMediaWikiJobsJob implements ShouldQueue, ShouldBeUnique
{
use InteractsWithQueue, Queueable;

private string $wikiDomain;
private string $jobsKubernetesNamespace;

public function __construct (string $wikiDomain)
{
$this->wikiDomain = $wikiDomain;
$this->jobsKubernetesNamespace = env('API_JOB_NAMESPACE', 'api-jobs');
}

public function uniqueId(): string
{
return $this->wikiDomain;
}

public function handle (Client $kubernetesClient): void
{
$kubernetesClient->setNamespace('default');
$mediawikiPod = $kubernetesClient->pods()->setFieldSelector([
'status.phase' => 'Running'
])->setLabelSelector([
'app.kubernetes.io/name' => 'mediawiki',
'app.kubernetes.io/component' => 'app-backend'
])->first();

if ($mediawikiPod === null) {
$this->fail(
new \RuntimeException(
'Unable to find a running MediaWiki pod in the cluster, '.
'cannot continue.'
)
);
return;
}
$mediawikiPod = $mediawikiPod->toArray();

$kubernetesClient->setNamespace($this->jobsKubernetesNamespace);
$jobSpec = new KubernetesJob([
'metadata' => [
'name' => 'run-all-mw-jobs-'.hash('sha1', $this->wikiDomain),
'namespace' => $this->jobsKubernetesNamespace,
'labels' => [
'app.kubernetes.io/instance' => $this->wikiDomain,
'app.kubernetes.io/name' => 'run-all-mw-jobs'
]
],
'spec' => [
'ttlSecondsAfterFinished' => 0,
'template' => [
'metadata' => [
'name' => 'run-all-mw-jobs'
],
'spec' => [
'containers' => [
0 => [
'name' => 'run-all-mw-jobs',
'image' => $mediawikiPod['spec']['containers'][0]['image'],
'env' => array_merge(
$mediawikiPod['spec']['containers'][0]['env'],
[['name' => 'WBS_DOMAIN', 'value' => $this->wikiDomain]]
),
'command' => [
0 => 'bash',
1 => '-c',
2 => <<<'CMD'
JOBS_TO_GO=1
while [ "$JOBS_TO_GO" != "0" ]
do
echo "Running 1000 jobs"
php w/maintenance/runJobs.php --maxjobs 1000
echo Waiting for 1 seconds...
sleep 1
JOBS_TO_GO=$(php w/maintenance/showJobs.php | tr -d '[:space:]')
echo $JOBS_TO_GO jobs to go
done
CMD
],
]
],
'restartPolicy' => 'Never'
]
]
]
]);

$job = $kubernetesClient->jobs()->apply($jobSpec);
Copy link
Contributor Author

@m90 m90 May 17, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Turns out that when using apply instead of create and using a unique .metadata.name per wiki, we can have Kubernetes deduplicate jobs for us and we don't have to check anything at all. Only downside is that we have to use a .spec.ttlSecondsAfterFinished of 0. Still think being able to keep this job free from any logic at all is worth it.

$jobName = data_get($job, 'metadata.name');
if (!$jobName) {
// The k8s client does not fail reliably on 4xx responses, so checking the name
// currently serves as poor man's error handling.
$this->fail(
new \RuntimeException('Job creation for wiki "'.$this->wikiDomain.'" failed.')
);
return;
}
Log::info(
'MediaWiki Job for wiki "'.$this->wikiDomain.'" exists or was created with name "'.$jobName.'".'
);

return;
}

}
3 changes: 1 addition & 2 deletions config/queue.php
Original file line number Diff line number Diff line change
Expand Up @@ -63,10 +63,9 @@
'driver' => 'redis',
'connection' => 'default',
'queue' => env('REDIS_QUEUE', 'default'),
'retry_after' => 90,
'retry_after' => 100,
'block_for' => null,
],

],

/*
Expand Down
1 change: 1 addition & 0 deletions phpunit.xml
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,6 @@
<env name="APP_ENV" value="testing"/>
<env name="CACHE_DRIVER" value="array"/>
<env name="QUEUE_CONNECTION" value="sync"/>
<env name="PLATFORM_MW_BACKEND_HOST" value="mediawiki-139-app-backend.default.svc.cluster.default"/>
</php>
</phpunit>
94 changes: 94 additions & 0 deletions tests/Jobs/PollForMediaWikiJobsJobTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
<?php

namespace Tests\Jobs;

use App\Wiki;
use App\Jobs\PollForMediaWikiJobsJob;
use App\Jobs\ProcessMediaWikiJobsJob;
use Tests\TestCase;
use Illuminate\Contracts\Queue\Job;
use Illuminate\Foundation\Testing\RefreshDatabase;
use Illuminate\Support\Facades\Http;
use Illuminate\Support\Facades\Bus;
use Illuminate\Database\Eloquent\Model;

class PollForMediaWikiJobsJobTest extends TestCase
{

use RefreshDatabase;

private Model $wiki;

public function setUp(): void
{
parent::setUp();
$this->wiki = Wiki::factory()->create();
}

public function testNoJobs()
{
Http::fake([
getenv('PLATFORM_MW_BACKEND_HOST').'/w/api.php?action=query&meta=siteinfo&siprop=statistics&format=json' => Http::response([
'query' => [
'statistics' => [
'jobs' => 0
]
]
], 200)
]);

Bus::fake();
$mockJob = $this->createMock(Job::class);
$job = new PollForMediaWikiJobsJob();
$job->setJob($mockJob);

$mockJob->expects($this->never())->method('fail');
$mockJob->expects($this->never())->method('markAsFailed');
$job->handle();
Bus::assertNothingDispatched();
}

public function testWithJobs()
{
Http::fake([
getenv('PLATFORM_MW_BACKEND_HOST').'/w/api.php?action=query&meta=siteinfo&siprop=statistics&format=json' => Http::response([
'query' => [
'statistics' => [
'jobs' => 3
]
]
], 200)
]);
Bus::fake();

$mockJob = $this->createMock(Job::class);

$job = new PollForMediaWikiJobsJob();
$job->setJob($mockJob);

$mockJob->expects($this->never())->method('fail');
$mockJob->expects($this->never())->method('markAsFailed');
$job->handle();
Bus::assertDispatched(ProcessMediaWikiJobsJob::class);
}

public function testWithFailure()
{
Http::fake([
getenv('PLATFORM_MW_BACKEND_HOST').'/w/api.php?action=query&meta=siteinfo&siprop=statistics&format=json' => Http::response([
'error' => 'Something went wrong'
], 500)
]);
Bus::fake();

$mockJob = $this->createMock(Job::class);

$job = new PollForMediaWikiJobsJob();
$job->setJob($mockJob);

$mockJob->expects($this->once())->method('markAsFailed');
$mockJob->expects($this->never())->method('fail');
$job->handle();
Bus::assertNothingDispatched();
}
}
86 changes: 86 additions & 0 deletions tests/Jobs/ProcessMediaWikiJobsJobTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
<?php

namespace Tests\Jobs;

use Illuminate\Foundation\Testing\RefreshDatabase;
use Tests\TestCase;
use Illuminate\Contracts\Queue\Job;
use App\Jobs\ProcessMediaWikiJobsJob;
use Maclof\Kubernetes\Client;
use Http\Adapter\Guzzle6\Client as Guzzle6Client;
use GuzzleHttp\HandlerStack;
use GuzzleHttp\Handler\MockHandler;
use GuzzleHttp\Psr7\Response;

class ProcessMediaWikiJobsJobTest extends TestCase
{
use RefreshDatabase;

public function testJobFailOnNoMediaWikiPod()
{
$mockJob = $this->createMock(Job::class);
$mockJob->expects($this->once())->method('fail');

$job = new ProcessMediaWikiJobsJob('test.wikibase.cloud');
$job->setJob($mockJob);

$mock = new MockHandler([
new Response(200, [], json_encode([ 'items' => [] ])),
]);

$handlerStack = HandlerStack::create($mock);
$mockGuzzle = Guzzle6Client::createWithConfig([
'handler' => $handlerStack,
'verify' => '/var/run/secrets/kubernetes.io/serviceaccount/ca.crt',
]);

$job->handle(new Client([
'master' => 'https://kubernetes.default.svc',
'token' => '/var/run/secrets/kubernetes.io/serviceaccount/token',
], null, $mockGuzzle));
}

public function testJobDoesNotFail()
{
$mockJob = $this->createMock(Job::class);
$mockJob->expects($this->never())->method('fail');

$job = new ProcessMediaWikiJobsJob('test.wikibase.cloud');
$job->setJob($mockJob);

$mock = new MockHandler([
new Response(200, [], json_encode([ 'items' => [
[
'kind' => 'Pod',
'spec' => [
'containers' => [
[
'image' => 'helloworld',
'env' => [
'SOMETHING' => 'something'
]
]
]
]
]
]])),
new Response(200, [], json_encode([ 'items' => [] ])),
new Response(201, [], json_encode([
'metadata' => [
'name' => 'some-job-name'
]
]))
]);

$handlerStack = HandlerStack::create($mock);
$mockGuzzle = Guzzle6Client::createWithConfig([
'handler' => $handlerStack,
'verify' => '/var/run/secrets/kubernetes.io/serviceaccount/ca.crt',
]);

$job->handle(new Client([
'master' => 'https://kubernetes.default.svc',
'token' => '/var/run/secrets/kubernetes.io/serviceaccount/token',
], null, $mockGuzzle));
}
}