Skip to content

Commit

Permalink
Ähnlichkeitssuche bei konjunktiver Suche (AND) jetzt korrekt implemen…
Browse files Browse the repository at this point in the history
…tiert
  • Loading branch information
tyrant88 committed Dec 27, 2016
1 parent e25d82b commit cba53e5
Show file tree
Hide file tree
Showing 11 changed files with 141 additions and 63 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
@@ -1,5 +1,9 @@
#Änderungen von search_it

##Version 6.1.3 (2016-12-27)
- Ähnlichkeitssuche bei konjunktiver Suche (AND) jetzt korrekt implementiert
- Hinweis dass der Suchindex erneuert werden muss, wenn Ähnlichkeitssuche eingeschaltet wird

##Version 6.1.2 (2016-11-21)
- beim Löschen von Tabellen wird jetzt `TRUNCATE` benutzt, damit die autoincrementwerte zurückgesetzt werden @Flo
- update.php löscht die alten Plugins "search_highlighter" und "reindex" aus den 5er Versionen
Expand Down
54 changes: 54 additions & 0 deletions install.sql
@@ -0,0 +1,54 @@
CREATE TABLE IF NOT EXISTS `%TABLE_PREFIX%search_it_index` (
`id` int(11) NOT NULL auto_increment,
`fid` varchar(255) NULL,
`catid` int(11) NULL,
`ftable` varchar(255) NULL,
`fcolumn` varchar(255) NULL,
`texttype` varchar(255) NOT NULL,
`clang` int(11) NULL,
`filename` varchar(255) NULL,
`fileext` varchar(255) NULL,
`plaintext` longtext NOT NULL default '',
`unchangedtext` longtext NOT NULL default '',
`teaser` longtext NOT NULL default '',
`values` longtext NOT NULL default '',
PRIMARY KEY (`id`),
INDEX (`fid`),
FULLTEXT (`plaintext`),
FULLTEXT (`unchangedtext`),
FULLTEXT (`plaintext`,`unchangedtext`)
) ENGINE=MyISAM DEFAULT CHARSET=utf8;

ALTER TABLE `%TABLE_PREFIX%search_it_index` CHANGE COLUMN fid fid varchar(255) NULL;
/* ALTER TABLE `%TABLE_PREFIX%search_it_index` ADD COLUMN `values` text NOT NULL default ''; */

/*DROP TRIGGER IF EXISTS minfid;
CREATE TRIGGER minfid BEFORE INSERT ON `%TABLE_PREFIX%search_it_index`
FOR EACH ROW
SET NEW.fid = CASE WHEN NEW.fid IS NULL THEN (SELECT IF(IFNULL(MIN(fid), 0) > 0, 0, IFNULL(MIN(fid), 0)) FROM `%TABLE_PREFIX%search_it_index`) - 1 ELSE NEW.fid END;*/

CREATE TABLE IF NOT EXISTS `%TABLE_PREFIX%search_it_cache` (
`id` int(11) NOT NULL auto_increment,
`hash` char(32) NOT NULL,
`returnarray` longtext NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;

CREATE TABLE IF NOT EXISTS `%TABLE_PREFIX%search_it_cacheindex_ids` (
`id` int(11) NOT NULL auto_increment,
`index_id` int(11) NULL,
`cache_id` varchar(255) NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;

CREATE TABLE IF NOT EXISTS `%TABLE_PREFIX%search_it_keywords` (
`id` int(11) NOT NULL auto_increment,
`keyword` varchar(255) NOT NULL,
`soundex` varchar(255) NOT NULL,
`metaphone` varchar(255) NOT NULL,
`colognephone` varchar(255) NOT NULL,
`clang` int(11) NOT NULL DEFAULT -1,
`count` int(11) NOT NULL DEFAULT 1,
PRIMARY KEY (`id`),
UNIQUE (`keyword`,`clang`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
4 changes: 1 addition & 3 deletions lib/cronjob.php
@@ -1,7 +1,5 @@
<?php
/**
* @package redaxo\search_it
*/

class rex_cronjob_reindex extends rex_cronjob
{
public function execute()
Expand Down
2 changes: 1 addition & 1 deletion lib/pdf2txt.php
Expand Up @@ -7,7 +7,7 @@ class pdf2txt
var $data;

// constructor
function pdf2txt($_src = false, $_dest = false)
function __construct($_src = false, $_dest = false)
{
$this->setSource($_src);
$this->setDestination($_dest);
Expand Down
124 changes: 73 additions & 51 deletions lib/search_it.php
Expand Up @@ -878,8 +878,8 @@ public function parseSearchString($_searchString)
preg_match_all('~(?:(\+*)"([^"]*)")|(?:(\+*)(\S+))~isu', $_searchString, $matches, PREG_SET_ORDER);

$count = 0;
$replaceValues = array();
$sql = rex_sql::factory();
$searchWords = array();

foreach ($matches as $match) {
if (count($match) == 5) {
// words without double quotes (foo)
Expand All @@ -892,6 +892,7 @@ public function parseSearchString($_searchString)
} else {
continue;
}
if ( in_array($word,$searchWords) ) { continue; } else { $searchWords[] = $word; }

$notBlacklisted = true;
// blacklisted words are excluded
Expand Down Expand Up @@ -1012,6 +1013,7 @@ public function setSearchAllArticlesAnyway($_bool = false)
$this->searchAllArticlesAnyway = $_bool;
$this->hashMe .= $_bool;
}

/**
* deprecated: use setSearchAllArticlesAnyway()
*
Expand Down Expand Up @@ -1738,9 +1740,14 @@ public function deleteKeywords()
function search($_search)
{
$startTime = microtime(true);
$this->searchString = trim(stripslashes($_search));

$keywordCount = $this->parseSearchString($this->searchString);
$return = array();
$return['errormessages'] = '';
$return['simwordsnewsearch'] = '';
$return['simwords'] = array();

$this->searchString = trim(stripslashes($_search));
$keywordCount = $this->parseSearchString($this->searchString); // setzt $this->searchArray

if (empty($this->searchString) OR empty($this->searchArray)) {
return array(
Expand All @@ -1767,17 +1774,14 @@ function search($_search)

// EP registrieren
rex_extension::registerPoint(new rex_extension_point('SEARCH_IT_SEARCH_EXECUTED', $this->cachedArray));
//var_dump($this->cachedArray['sql']);

return $this->cachedArray;
}

$return = array();
$return['errormessages'] = '';
$return['simwordsnewsearch'] = '';
$return['simwords'] = array();

if ($this->similarwords) {
$simWordsSQL = rex_sql::factory();
$simwords = array();
$simwordQuerys = array();
foreach ($this->searchArray as $keyword) {
$sounds = array();
if ($this->similarwordsMode & SEARCH_IT_SIMILARWORDS_SOUNDEX) {
Expand All @@ -1791,39 +1795,47 @@ function search($_search)
if ($this->similarwordsMode & SEARCH_IT_SIMILARWORDS_COLOGNEPHONE) {
$sounds[] = "colognephone = '" . soundex_ger($keyword['search']) . "'";
}
$simwords[] = sprintf("
SELECT
$simwordQuerys[] = sprintf("
(SELECT
GROUP_CONCAT(DISTINCT keyword SEPARATOR ' ') as keyword,
%s AS typedin,
SUM(count) as count
FROM `%s`
WHERE 1
%s
AND (%s)",
AND (%s))",
$simWordsSQL->escape($keyword['search']),
$this->tablePrefix . 'search_it_keywords',
($this->clang !== false) ? 'AND (clang = ' . intval($this->clang) . ' OR clang IS NULL)' : '',
implode(' OR ', $sounds)
);
}
//echo '<br><pre>'; var_dump($simwordQuerys);echo '</pre>'; // Eine SQL-Abfrage pro Suchwort

// simwords
$simWordsSQL = rex_sql::factory();
foreach ($simWordsSQL->getArray(sprintf("
%s
GROUP BY %s
ORDER BY SUM(count)",
implode(' UNION ', $simwords),
$this->similarwordsPermanent ? "''" : 'keyword, typedin'
)
) as $simword) {
SELECT * FROM (%s) AS t
%s
ORDER BY count",
implode(' UNION ', $simwordQuerys),
$this->similarwordsPermanent ? '' : 'GROUP BY keyword, typedin'
)
) as $simword) {
//echo '<br><pre>'; var_dump($simword);echo '</pre>';
$return['simwords'][$simword['typedin']] = array(
'keyword' => $simword['keyword'],
'typedin' => $simword['typedin'],
'count' => $simword['count'],
);
}

/*echo '<br><pre>' .sprintf("
SELECT * FROM (%s) AS t
%s
ORDER BY count",
implode(' UNION ', $simwordQuerys),
$this->similarwordsPermanent ? '' : 'GROUP BY keyword, typedin'
).'</pre>'; die();*/
$newsearch = array();
foreach ($this->searchArray as $keyword) {
if (preg_match('~\s~isu', $keyword['search'])) {
Expand All @@ -1842,9 +1854,12 @@ function search($_search)
$return['simwordsnewsearch'] = implode(' ', $newsearch);
}

//print_r($this->searchArray);echo '<br><br>';
if ($this->similarwordsPermanent) {
$keywordCount = $this->parseSearchString($this->searchString . ' ' . $return['simwordsnewsearch']);
}
//echo '<br><pre>'; print_r($return['simwords']); echo '</pre>';


$searchColumns = array();
switch ($this->textMode) {
Expand All @@ -1861,48 +1876,55 @@ function search($_search)
$searchColumns[] = 'plaintext';
}

$sql = rex_sql::factory();

$Awhere = array();
$sql = rex_sql::factory();
$A2Where = array();
$Amatch = array();

foreach ($this->searchArray as $keyword) {
// build MATCH-Array
$match = sprintf("(( MATCH (`%s`) AGAINST (%s)) * %d)", implode('`,`', $searchColumns), $sql->escape($keyword['search']), $keyword['weight']);

if ($this->searchHtmlEntities) {
$match .= ' + ' . sprintf("(( MATCH (`%s`) AGAINST (%s)) * %d)", implode('`,`', $searchColumns), $sql->escape(htmlentities($keyword['search'], ENT_COMPAT, 'UTF-8')), $keyword['weight']);
}

$Amatch[] = $match;
foreach ($this->searchArray as $searchword) {
$AWhere = array();
$similarkeywords = '';
if ( $this->similarwords && !isset($return['simwords'][$searchword['search']])) { continue; }
if ( isset($return['simwords'][$searchword['search']]['keyword']) ) { $similarkeywords = $return['simwords'][$searchword['search']]['keyword']; }
foreach ($this->searchArray as $keyword) {
if ( $keyword['search'] !== $searchword['search'] && !in_array( $keyword['search'], explode(' ', $similarkeywords)) ) { continue; }
// build MATCH-Array
$match = sprintf("(( MATCH (`%s`) AGAINST (%s)) * %d)", implode('`,`', $searchColumns), $sql->escape($keyword['search']), $keyword['weight']);
if ($this->searchHtmlEntities) {
$match .= ' + ' . sprintf("(( MATCH (`%s`) AGAINST (%s)) * %d)", implode('`,`', $searchColumns), $sql->escape(htmlentities($keyword['search'], ENT_COMPAT, 'UTF-8')), $keyword['weight']);
}
$Amatch[] = $match;

// build WHERE-Array
if ($this->searchMode == 'match') {
$AWhere[] = $match;
} else {
$tmpWhere = array();
foreach ($searchColumns as $searchColumn) {
$tmpWhere[] = sprintf("(`%s` LIKE '%%%s%%')", $searchColumn, str_replace(array('%', '_'), array('\%', '\_'), substr($sql->escape($keyword['search']), 1, -1)));
// build WHERE-Array
if ($this->searchMode == 'match') {
$AWhere[] = $match;
} else {
$tmpWhere = array();
foreach ($searchColumns as $searchColumn) {
$tmpWhere[] = sprintf("(`%s` LIKE '%%%s%%')", $searchColumn, str_replace(array('%', '_'), array('\%', '\_'), substr($sql->escape($keyword['search']), 1, -1)));

if ($this->searchHtmlEntities) {
$tmpWhere[] = sprintf("(`%s` LIKE '%%%s%%')", $searchColumn, str_replace(array('%', '_'), array('\%', '\_'), htmlentities($keyword['search'], ENT_COMPAT, 'UTF-8')));
if ($this->searchHtmlEntities) {
$tmpWhere[] = sprintf("(`%s` LIKE '%%%s%%')", $searchColumn, str_replace(array('%', '_'), array('\%', '\_'), htmlentities($keyword['search'], ENT_COMPAT, 'UTF-8')));
}
}
$AWhere[] = '(' . implode(' OR ', $tmpWhere) . ')';
}
$AWhere[] = '(' . implode(' OR ', $tmpWhere) . ')';
}
// echo '<br><pre>'; print_r($keyword); var_dump($AWhere);echo '</pre>';

/*if($this->logicalMode == ' AND ')
$Awhere[] = '+*'.$keyword['search'].'*';
else
$AWhere[] = '*'.$keyword['search'].'*';*/
/*if($this->logicalMode == ' AND ')
$Awhere[] = '+*'.$keyword['search'].'*';
else
$AWhere[] = '*'.$keyword['search'].'*';*/
}
$A2Where[] = '(' . implode(' OR ', $AWhere) . ')';
}

// build MATCH-String
$match = '(' . implode(' + ', $Amatch) . ' + 1)';

// build WHERE-String
$where = '(' . implode($this->logicalMode, $AWhere) . ')';
#$where = sprintf("( MATCH (%s) AGAINST ('%s' IN BOOLEAN MODE)) > 0",implode(',',$searchColumns),implode(' ',$Awhere));
$where = '(' . implode($this->logicalMode, $A2Where) . ')';
//$where = sprintf("( MATCH (%s) AGAINST ('%s' IN BOOLEAN MODE)) > 0",implode(',',$searchColumns),implode(' ',$Awhere));


// language
if ($this->clang !== false) {
Expand Down Expand Up @@ -2017,7 +2039,7 @@ function search($_search)
$this->limit[0], $this->limit[1]
);
}
//echo '<pre>'.$query.'</pre>';
//echo '<pre>'.$query.'</pre>';die();
//echo '<pre>'.implode(",\n",$selectFields).'</pre>';
try {
$sqlResult = $sql->getArray($query);
Expand Down
2 changes: 1 addition & 1 deletion package.yml
@@ -1,5 +1,5 @@
package: search_it
version: '6.1.1'
version: '6.1.3'
author: Robert Rupf/Norbert Micheel
supportpage: https://github.com/tyrant88/search_it

Expand Down
1 change: 1 addition & 0 deletions pages/settings.mode.php
Expand Up @@ -20,6 +20,7 @@

$changed = array_keys(array_merge(array_diff_assoc($posted_config,$this->getConfig()), array_diff_assoc($this->getConfig(),$posted_config)));
foreach ( array(
'similarwordsmode',
'indexmode',
'indexoffline',
'ep_outputfilter',
Expand Down
2 changes: 1 addition & 1 deletion plugins/documentation/package.yml
@@ -1,5 +1,5 @@
package: search_it/documentation
version: '6.1.1'
version: '6.1.3'
author: Alexander Walther/Norbert Micheel

title: 'translate:search_it_documentation_title'
Expand Down
7 changes: 3 additions & 4 deletions plugins/documentation/pages/index.php
Expand Up @@ -4,12 +4,11 @@
$path = rex_path::plugin('search_it','documentation','docs/'.$langpath.'/');

$files = [];
foreach(scandir($path) as $i_file) {
if ($i_file != "." && $i_file != "..") {
$files[$i_file] = $i_file;
}
foreach(rex_finder::factory($path)->filesOnly() as $file) {
$files[$file->getFileName()] = $file->getFileName();
}


if ( rex_request("search_it_document_image","string") != "" && isset($files[rex_request("search_it_document_image","string")]) ) {
ob_end_clean();
$content = rex_file::get($path.basename(rex_request("search_it_document_image","string")));
Expand Down
2 changes: 1 addition & 1 deletion plugins/plaintext/package.yml
@@ -1,5 +1,5 @@
package: search_it/plaintext
version: '6.1.1'
version: '6.1.3'
author: Robert Rupf/Norbert Micheel

title: 'translate:search_it_plaintext_title'
Expand Down
2 changes: 1 addition & 1 deletion plugins/stats/package.yml
@@ -1,5 +1,5 @@
package: search_it/stats
version: '6.1.1'
version: '6.1.3'
author: Robert Rupf/Norbert Micheel

title: 'translate:search_it_stats_plugin_title'
Expand Down

0 comments on commit cba53e5

Please sign in to comment.