Skip to content

Commit

Permalink
Merge pull request #55 from wp-cli/append_next
Browse files Browse the repository at this point in the history
Lessen context duplication in db search.
  • Loading branch information
danielbachhuber committed Oct 13, 2017
2 parents 8728a1f + 44b9d79 commit 16e42ed
Show file tree
Hide file tree
Showing 2 changed files with 72 additions and 14 deletions.
40 changes: 38 additions & 2 deletions features/db-search.feature
Original file line number Diff line number Diff line change
Expand Up @@ -956,7 +956,7 @@ Feature: Search through the database
"""
And STDOUT should contain:
"""
:1234_XYXYX_2345678_X [...] X_2345678_XYXYX_234567890 [...] 345678901_XYXYX_2345
:1234_XYXYX_2345678_XYXYX_234567890 [...] 345678901_XYXYX_2345
"""
And STDERR should be empty

Expand All @@ -967,6 +967,42 @@ Feature: Search through the database
"""
And STDOUT should contain:
"""
:1234_XYXYX_2345678_X [...] X_2345678_XYXYX_234567890 [...] 345678901_XYXYX_2345
:1234_XYXYX_2345678_XYXYX_234567890 [...] 345678901_XYXYX_2345
"""
And STDERR should be empty

Scenario: Search with large data
Given a WP install
# Note "_utf8 X'CC88'" is combining umlaut. Doing it this way as non-ASCII stuff gets stripped due to (eventually) been put thru `escapeshellarg()` with a default C locale.
# Also restricted by default MySQL values for the version-dependent size of the innodb redo log file (max 10% one transaction) and `max_allowed_packet` size (16MB).
And I run `wp db query "INSERT INTO wp_options (option_name, option_value) VALUES ('opt_large', CONCAT(REPEAT('a', 1024 * 1024 * 8 - 9), 'o', _utf8 X'CC88', 'XYXYX'));"`

When I run `wp db search XYXYX --before_context=1 --stats`
Then STDOUT should contain:
"""
Success: Found 1 match
"""
And STDOUT should contain:
"""
:öXYXYX
"""
And STDOUT should not contain:
"""
:aöXYXYX
"""
And STDERR should be empty

When I run `wp db search XYXYX --regex --before_context=1 --stats`
Then STDOUT should contain:
"""
Success: Found 1 match
"""
And STDOUT should contain:
"""
:öXYXYX
"""
And STDOUT should not contain:
"""
:aöXYXYX
"""
And STDERR should be empty
46 changes: 34 additions & 12 deletions src/DB_Command.php
Original file line number Diff line number Diff line change
Expand Up @@ -905,21 +905,43 @@ public function search( $args, $assoc_args ) {

$bits = array();
$col_encoding = $encoding;
if ( null === $col_encoding ) {
$col_encoding = false;
if ( ( $before_context || $after_context ) && function_exists( 'mb_detect_encoding' ) ) {
$col_encoding = mb_detect_encoding( $col_val, null, true /*strict*/ );
}
if ( ! $col_encoding && ( $before_context || $after_context ) && function_exists( 'mb_detect_encoding' ) ) {
$col_encoding = mb_detect_encoding( $col_val, null, true /*strict*/ );
}
foreach ( $matches[0] as $match_arr ) {
$match = $match_arr[0];
$offset = $match_arr[1];
$append_next = false;
$last_offset = 0;
$match_cnt = count( $matches[0] );
for ( $i = 0; $i < $match_cnt; $i++ ) {
$match = $matches[0][ $i ][0];
$offset = $matches[0][ $i ][1];
$log = $colors['match'][0] . $match . $colors['match'][1];
$before = $after = '';
$after_shortened = false;

// Offsets are in bytes, so need to use `strlen()` and `substr()` before using `safe_substr()`.
$before = $before_context && $offset ? \cli\safe_substr( substr( $col_val, 0, $offset ), -$before_context, null /*length*/, false /*is_width*/, $col_encoding ) : '';
$after = $after_context ? \cli\safe_substr( substr( $col_val, $offset + strlen( $match ) ), 0, $after_context, false /*is_width*/, $col_encoding ) : '';
$bits[] = $before . $colors['match'][0] . $match . $colors['match'][1] . $after;
if ( $before_context && $offset && ! $append_next ) {
$before = \cli\safe_substr( substr( $col_val, $last_offset, $offset - $last_offset ), -$before_context, null /*length*/, false /*is_width*/, $col_encoding );
}
if ( $after_context ) {
$end_offset = $offset + strlen( $match );
$after = \cli\safe_substr( substr( $col_val, $end_offset ), 0, $after_context, false /*is_width*/, $col_encoding );
// To lessen context duplication in output, shorten the after context if it overlaps with the next match.
if ( $i + 1 < $match_cnt && $end_offset + strlen( $after ) > $matches[0][ $i + 1 ][1] ) {
$after = substr( $after, 0, $matches[0][ $i + 1 ][1] - $end_offset );
$after_shortened = true;
// On the next iteration, will append with no before context.
}
}
if ( $append_next ) {
$cnt = count( $bits );
$bits[ $cnt - 1 ] .= $log . $after;
} else {
$bits[] = $before . $log . $after;
}
$append_next = $after_shortened;
$last_offset = $offset;
}
$match_count += count( $bits );
$match_count += $match_cnt;
$col_val = implode( ' [...] ', $bits );

WP_CLI::log( $matches_only ? $col_val : ( $one_line ? "{$table_column_val}:{$pk_val}{$col_val}" : "{$pk_val}{$col_val}" ) );
Expand Down

0 comments on commit 16e42ed

Please sign in to comment.