Skip to content

Commit

Permalink
minor changes
Browse files Browse the repository at this point in the history
- correct wiki link
- utf-8 support only
  • Loading branch information
xong committed Jan 7, 2014
1 parent d2dbf57 commit 1e7850e
Show file tree
Hide file tree
Showing 8 changed files with 20 additions and 236 deletions.
44 changes: 14 additions & 30 deletions classes/class.rexsearch.inc.php
Expand Up @@ -87,7 +87,6 @@ class RexSearch
var $surroundTags = array('<strong>','</strong>');
var $tablePrefix;
var $textMode = 'plain';
var $utf8 = true;
var $whitelist = array();
var $where = '';

Expand Down Expand Up @@ -186,26 +185,18 @@ function RexSearch($_clang = false, $_loadSettings = true, $_useStopwords = true
$this->tablePrefix = $REX['TABLE_PREFIX'];
$this->includePath = $REX['INCLUDE_PATH'];
$this->generatedPath = $REX['GENERATED_PATH'];
$this->utf8 = rex_lang_is_utf8();
$this->documentRoot = realpath($_SERVER['DOCUMENT_ROOT']);
$this->mediaFolder = $REX['MEDIAFOLDER'];

#$I18N->appendFile($REX['INCLUDE_PATH'].'/addons/rexsearch/lang/');
if($this->utf8)
$locale = 'de_de_utf8';
else
$locale = 'de_de';
$locale = 'de_de';

$langfile = new i18n($locale, $REX['INCLUDE_PATH'].'/addons/rexsearch/lang/');
$this->ellipsis = $langfile->Msg('a587_ellipsis');

// german stopwords
if($_useStopwords)
{
if($this->utf8)
include $this->includePath.'/addons/rexsearch/lang/stopwords_utf8.inc.php';
else
include $this->includePath.'/addons/rexsearch/lang/stopwords.inc.php';
include $this->includePath.'/addons/rexsearch/lang/stopwords.inc.php';
$this->stopwords = $german_stopwords;
}
}
Expand Down Expand Up @@ -426,16 +417,14 @@ function setSearchInIDs($_searchInIDs, $_reset = false)

/**
* If utf8-encoding is used, the parameter will be appended with an "u".
* Since there is only UTF-8 supported, it always appends the "u".
*
* @param string $_regex
* @return string
*/
function encodeRegex($_regex)
{
if($this->utf8)
return $_regex.'u';
else
return $_regex;
return $_regex.'u';
}

/**
Expand Down Expand Up @@ -675,7 +664,7 @@ function indexArticle($_id,$_clang = false)

foreach(preg_split($this->encodeRegex('~[[:punct:][:space:]]+~ism'), $plaintext) as $keyword)
{
if($this->significantCharacterCount <= mb_strlen($keyword,$this->utf8?'UTF-8':mb_internal_encoding()))
if($this->significantCharacterCount <= mb_strlen($keyword,'UTF-8'))
$keywords[] = array('search'=>$keyword,'clang'=>$langID);
}

Expand Down Expand Up @@ -831,7 +820,7 @@ function indexColumn($_table, $_column, $_idcol = false, $_id = false, $_start =

foreach(preg_split($this->encodeRegex('~[[:punct:][:space:]]+~ism'), $plaintext) as $keyword)
{
if($this->significantCharacterCount <= mb_strlen($keyword,$this->utf8?'UTF-8':mb_internal_encoding()))
if($this->significantCharacterCount <= mb_strlen($keyword,'UTF-8'))
$keywords[] = array('search'=>$keyword,'clang'=>is_null($indexData['clang'])?false:$indexData['clang']);
}

Expand Down Expand Up @@ -950,10 +939,7 @@ function indexFile($_filename, $_doPlaintext = false, $_clang = false, $_fid = f
{
$tempFile = tempnam($this->generatedPath.'/files/', 'rexsearch');

if($this->utf8)
$encoding = 'UTF-8';
else
$encoding = 'Latin1';
$encoding = 'UTF-8';

exec('pdftotext '.escapeshellarg($this->documentRoot.'/'.$_filename).' '.escapeshellarg($tempFile).' -enc '.$encoding, $dummy, $return);
if($return > 0)
Expand Down Expand Up @@ -987,8 +973,6 @@ function indexFile($_filename, $_doPlaintext = false, $_clang = false, $_fid = f
{
require_once 'class.pdf2txt.inc.php';
$text = pdf2txt::directConvert($pdfContent);
if(!$this->utf8)
$text = utf8_decode($text);
$error = false;
}
}
Expand Down Expand Up @@ -1016,7 +1000,7 @@ function indexFile($_filename, $_doPlaintext = false, $_clang = false, $_fid = f
return A587_FILE_NOEXIST;
}

$text = @iconv(mb_detect_encoding($text), $this->utf8?'UTF-8':'ISO-8859-15', $text);
$text = @iconv(mb_detect_encoding($text), 'UTF-8', $text);

// Plaintext
if(empty($plaintext))
Expand Down Expand Up @@ -1053,7 +1037,7 @@ function indexFile($_filename, $_doPlaintext = false, $_clang = false, $_fid = f
$keywords = array();
foreach(preg_split($this->encodeRegex('~[[:punct:][:space:]]+~ism'), $plaintext) as $keyword)
{
if($this->significantCharacterCount <= mb_strlen($keyword,$this->utf8?'UTF-8':mb_internal_encoding()))
if($this->significantCharacterCount <= mb_strlen($keyword,'UTF-8'))
$keywords[] = array('search'=>$keyword,'clang'=>!isset($fileData['clang'])?false:$fileData['clang']);
}
$this->storeKeywords($keywords, false);
Expand Down Expand Up @@ -1624,7 +1608,7 @@ function getHighlightedText($_text)
{
foreach($this->searchArray as $keyword)
{
$this->searchArray[] = array('search' => htmlentities($keyword['search'], ENT_COMPAT, $this->utf8?'UTF-8':mb_internal_encoding()));
$this->searchArray[] = array('search' => htmlentities($keyword['search'], ENT_COMPAT, 'UTF-8'));
}
}

Expand Down Expand Up @@ -1933,8 +1917,8 @@ function storeKeywords($_keywords, $_doCount = true)
foreach($_keywords as $keyword)
{
if(
!in_array(mb_strtolower($keyword['search'], $this->utf8?'UTF-8':mb_internal_encoding()), $this->blacklist) AND
!in_array(mb_strtolower($keyword['search'], $this->utf8?'UTF-8':mb_internal_encoding()), $this->stopwords)
!in_array(mb_strtolower($keyword['search'], 'UTF-8'), $this->blacklist) AND
!in_array(mb_strtolower($keyword['search'], 'UTF-8'), $this->stopwords)
)
{
$simWords[] = sprintf(
Expand Down Expand Up @@ -2124,7 +2108,7 @@ function search($_search)

if($this->searchEntities)
{
$match .= ' + '.sprintf("(( MATCH (`%s`) AGAINST ('%s')) * %d)", implode('`,`',$searchColumns), $sql->escape(htmlentities($keyword['search'], ENT_COMPAT, $this->utf8?'UTF-8':mb_internal_encoding())), $keyword['weight']);
$match .= ' + '.sprintf("(( MATCH (`%s`) AGAINST ('%s')) * %d)", implode('`,`',$searchColumns), $sql->escape(htmlentities($keyword['search'], ENT_COMPAT, 'UTF-8')), $keyword['weight']);
}

$Amatch[] = $match;
Expand All @@ -2143,7 +2127,7 @@ function search($_search)

if($this->searchEntities)
{
$tmpWhere[] = sprintf("(`%s` LIKE '%%%s%%')", $searchColumn, str_replace(array('%','_'),array('\%','\_'),$sql->escape(htmlentities($keyword['search'], ENT_COMPAT, $this->utf8?'UTF-8':mb_internal_encoding()))));
$tmpWhere[] = sprintf("(`%s` LIKE '%%%s%%')", $searchColumn, str_replace(array('%','_'),array('\%','\_'),$sql->escape(htmlentities($keyword['search'], ENT_COMPAT, 'UTF-8'))));
}
}

Expand Down
4 changes: 2 additions & 2 deletions lang/de_de.lang
Expand Up @@ -94,5 +94,5 @@ a587_ellipsis =

a587_cat_deleted = Alle Artikel der Kategorie sind noch im Suchindex enthalten. Sie m�ssen den <a href="index.php?page=rexsearch&subpage=generate">Suchindex erneuern</a>.

a587_help_wiki = Die Dokumentation des Addons finden Sie unter <a href="http://wiki.redaxo.de/index.php?n=R4.RexSearch">http://wiki.redaxo.de/index.php?n=R4.RexSearch</a>.
a587_help_forum = Sollten Sie Fragen oder Probleme haben, k�nnen Sie diese im Redaxo-Forum stellen: <a href="http://forum.redaxo.de/ftopic12965">http://forum.redaxo.de/ftopic12965</a>.
a587_help_wiki = Die Dokumentation des Addons finden Sie unter <a href="http://www.redaxo.org/de/wiki/index.php?n=R4.Rexsearch">http://www.redaxo.org/de/wiki/index.php?n=R4.Rexsearch</a>.
a587_help_forum = Sollten Sie Fragen oder Probleme haben, k�nnen Sie diese im Redaxo-Forum stellen: <a href="http://www.redaxo.org/de/forum/addons-f30/addon-rexsearch-fulltext-search-addon-t12965.html">http://www.redaxo.org/de/forum/addons-f30/addon-rexsearch-fulltext-search-addon-t12965.html</a>.
98 changes: 0 additions & 98 deletions lang/de_de_utf8.lang

This file was deleted.

4 changes: 2 additions & 2 deletions lang/en_gb.lang
Expand Up @@ -94,5 +94,5 @@ a587_ellipsis =

a587_cat_deleted = All articles are still included in the search index. You have to <a href="index.php?page=rexsearch&subpage=generate">regenerate the search index</a>.

a587_help_wiki = The documentation of this addon can be found at <a href="http://wiki.redaxo.de/index.php?n=R4.RexSearch">http://wiki.redaxo.de/index.php?n=R4.RexSearch</a>.
a587_help_forum = Any questions or suggestions can be asked/made at <a href="http://forum.redaxo.de/ftopic12965">http://forum.redaxo.de/ftopic12965</a>.
a587_help_wiki = The documentation of this addon can be found at <a href="http://www.redaxo.org/de/wiki/index.php?n=R4.Rexsearch">http://www.redaxo.org/de/wiki/index.php?n=R4.Rexsearch</a>.
a587_help_forum = Any questions or suggestions can be asked/made at <a href="http://www.redaxo.org/de/forum/addons-f30/addon-rexsearch-fulltext-search-addon-t12965.html">http://www.redaxo.org/de/forum/addons-f30/addon-rexsearch-fulltext-search-addon-t12965.html</a>.

0 comments on commit 1e7850e

Please sign in to comment.