Skip to content

Commit

Permalink
PageTriage should not compile article not in queue
Browse files Browse the repository at this point in the history
1. Fix it so it only compiles data in the queue
2. a maintenance script to clean the junk data

Change-Id: I0bf48b1cf75c96720e24e27b07751282dec58dd2
  • Loading branch information
bsitu authored and kaldari committed Aug 28, 2012
1 parent 00ecdd3 commit 00a7523
Show file tree
Hide file tree
Showing 3 changed files with 69 additions and 3 deletions.
6 changes: 5 additions & 1 deletion PageTriage.hooks.php
Expand Up @@ -131,7 +131,11 @@ public static function onArticleSaveComplete( $article, $user, $text, $summary,
return true;
}

$acp = ArticleCompileProcessor::newFromPageId( array( $article->getId() ) );
// if there is a previous revision, it's safe to check against slave database
$validateDb = $revision->getPrevious() ? DB_SLAVE : DB_MASTER;

// false will enforce a validation against pagetriage_page table
$acp = ArticleCompileProcessor::newFromPageId( array( $article->getId() ), false, $validateDb );
if ( $acp ) {
// Register the article object so we can get the content and other useful information
// this is primarily for replication delay from slave
Expand Down
4 changes: 2 additions & 2 deletions includes/ArticleMetadata.php
Expand Up @@ -9,7 +9,7 @@ class ArticleMetadata {

/**
* @param $pageId array - list of page id
* @param $validated bool - whether the page ids are validated
* @param $validated bool - whether the page ids have been validated
* @param $validateDb const - DB_MASTER/DB_SLAVE
*/
public function __construct( array $pageId, $validated = true, $validateDb = DB_MASTER ) {
Expand Down Expand Up @@ -186,7 +186,7 @@ public function getMetadata() {
// Compile the data if it is not available, this is a very rare case unless
// the metadata gets deleted manually
if ( $articles ) {
$acp = ArticleCompileProcessor::newFromPageId( $articles );
$acp = ArticleCompileProcessor::newFromPageId( $articles, false, DB_SLAVE );
if ( $acp ) {
$pageData += $acp->compileMetadata();
}
Expand Down
62 changes: 62 additions & 0 deletions tools/cleanupPageTriagePageTags.php
@@ -0,0 +1,62 @@
<?php
/**
* Remove page with namespace other than NS_MAIN/NS_USER from pagetriage queue
*
* @ingroup Maintenance
*/

require_once( dirname( __FILE__ ) . '/../../../maintenance/Maintenance.php' );

/**
* Maintenance script that removes data from pagetriage_page_tags with page_id not in pagetriage_page
*
* @ingroup Maintenance
*/
class CleanupPageTriagePageTags extends Maintenance {

protected $batchSize = 100;

public function execute() {
$dbw = wfGetDB( DB_MASTER );
$dbr = wfGetDB( DB_SLAVE );

$count = $this->batchSize;
$start = 0;

while ( $count == $this->batchSize ) {
$res = $dbr->select(
array( 'pagetriage_page_tags', 'pagetriage_page' ),
array( 'DISTINCT ptrpt_page_id AS page_id' ),
array(
'ptrpt_page_id > ' . $start,
'ptrp_page_id IS NULL'
),
__METHOD__,
array( 'LIMIT' => $this->batchSize, 'ORDER BY' => 'ptrpt_page_id' ),
array( 'pagetriage_page' => array( 'LEFT JOIN', 'ptrp_page_id = ptrpt_page_id' ) )
);

$page = array();
foreach( $res as $row ) {
$page[] = $row->page_id;
$start = $row->page_id;
};
$count = count( $page );

if ( $count > 0 ) {
$dbw->delete(
'pagetriage_page_tags',
array( 'ptrpt_page_id' => $page ),
__METHOD__
);

$this->output( "processing " . $count . "\n" );
wfWaitForSlaves();
}

}
}
}

$maintClass = 'CleanupPageTriagePageTags'; // Tells it to run the class
require_once( RUN_MAINTENANCE_IF_MAIN );

0 comments on commit 00a7523

Please sign in to comment.