diff --git a/classes/class.rexsearch.inc.php b/classes/class.rexsearch.inc.php index 1c8565c..746517c 100644 --- a/classes/class.rexsearch.inc.php +++ b/classes/class.rexsearch.inc.php @@ -87,7 +87,6 @@ class RexSearch var $surroundTags = array('',''); var $tablePrefix; var $textMode = 'plain'; - var $utf8 = true; var $whitelist = array(); var $where = ''; @@ -186,15 +185,10 @@ function RexSearch($_clang = false, $_loadSettings = true, $_useStopwords = true $this->tablePrefix = $REX['TABLE_PREFIX']; $this->includePath = $REX['INCLUDE_PATH']; $this->generatedPath = $REX['GENERATED_PATH']; - $this->utf8 = rex_lang_is_utf8(); $this->documentRoot = realpath($_SERVER['DOCUMENT_ROOT']); $this->mediaFolder = $REX['MEDIAFOLDER']; - #$I18N->appendFile($REX['INCLUDE_PATH'].'/addons/rexsearch/lang/'); - if($this->utf8) - $locale = 'de_de_utf8'; - else - $locale = 'de_de'; + $locale = 'de_de'; $langfile = new i18n($locale, $REX['INCLUDE_PATH'].'/addons/rexsearch/lang/'); $this->ellipsis = $langfile->Msg('a587_ellipsis'); @@ -202,10 +196,7 @@ function RexSearch($_clang = false, $_loadSettings = true, $_useStopwords = true // german stopwords if($_useStopwords) { - if($this->utf8) - include $this->includePath.'/addons/rexsearch/lang/stopwords_utf8.inc.php'; - else - include $this->includePath.'/addons/rexsearch/lang/stopwords.inc.php'; + include $this->includePath.'/addons/rexsearch/lang/stopwords.inc.php'; $this->stopwords = $german_stopwords; } } @@ -426,16 +417,14 @@ function setSearchInIDs($_searchInIDs, $_reset = false) /** * If utf8-encoding is used, the parameter will be appended with an "u". + * Since there is only UTF-8 supported, it always appends the "u". * * @param string $_regex * @return string */ function encodeRegex($_regex) { - if($this->utf8) - return $_regex.'u'; - else - return $_regex; + return $_regex.'u'; } /** @@ -675,7 +664,7 @@ function indexArticle($_id,$_clang = false) foreach(preg_split($this->encodeRegex('~[[:punct:][:space:]]+~ism'), $plaintext) as $keyword) { - if($this->significantCharacterCount <= mb_strlen($keyword,$this->utf8?'UTF-8':mb_internal_encoding())) + if($this->significantCharacterCount <= mb_strlen($keyword,'UTF-8')) $keywords[] = array('search'=>$keyword,'clang'=>$langID); } @@ -831,7 +820,7 @@ function indexColumn($_table, $_column, $_idcol = false, $_id = false, $_start = foreach(preg_split($this->encodeRegex('~[[:punct:][:space:]]+~ism'), $plaintext) as $keyword) { - if($this->significantCharacterCount <= mb_strlen($keyword,$this->utf8?'UTF-8':mb_internal_encoding())) + if($this->significantCharacterCount <= mb_strlen($keyword,'UTF-8')) $keywords[] = array('search'=>$keyword,'clang'=>is_null($indexData['clang'])?false:$indexData['clang']); } @@ -950,10 +939,7 @@ function indexFile($_filename, $_doPlaintext = false, $_clang = false, $_fid = f { $tempFile = tempnam($this->generatedPath.'/files/', 'rexsearch'); - if($this->utf8) - $encoding = 'UTF-8'; - else - $encoding = 'Latin1'; + $encoding = 'UTF-8'; exec('pdftotext '.escapeshellarg($this->documentRoot.'/'.$_filename).' '.escapeshellarg($tempFile).' -enc '.$encoding, $dummy, $return); if($return > 0) @@ -987,8 +973,6 @@ function indexFile($_filename, $_doPlaintext = false, $_clang = false, $_fid = f { require_once 'class.pdf2txt.inc.php'; $text = pdf2txt::directConvert($pdfContent); - if(!$this->utf8) - $text = utf8_decode($text); $error = false; } } @@ -1016,7 +1000,7 @@ function indexFile($_filename, $_doPlaintext = false, $_clang = false, $_fid = f return A587_FILE_NOEXIST; } - $text = @iconv(mb_detect_encoding($text), $this->utf8?'UTF-8':'ISO-8859-15', $text); + $text = @iconv(mb_detect_encoding($text), 'UTF-8', $text); // Plaintext if(empty($plaintext)) @@ -1053,7 +1037,7 @@ function indexFile($_filename, $_doPlaintext = false, $_clang = false, $_fid = f $keywords = array(); foreach(preg_split($this->encodeRegex('~[[:punct:][:space:]]+~ism'), $plaintext) as $keyword) { - if($this->significantCharacterCount <= mb_strlen($keyword,$this->utf8?'UTF-8':mb_internal_encoding())) + if($this->significantCharacterCount <= mb_strlen($keyword,'UTF-8')) $keywords[] = array('search'=>$keyword,'clang'=>!isset($fileData['clang'])?false:$fileData['clang']); } $this->storeKeywords($keywords, false); @@ -1624,7 +1608,7 @@ function getHighlightedText($_text) { foreach($this->searchArray as $keyword) { - $this->searchArray[] = array('search' => htmlentities($keyword['search'], ENT_COMPAT, $this->utf8?'UTF-8':mb_internal_encoding())); + $this->searchArray[] = array('search' => htmlentities($keyword['search'], ENT_COMPAT, 'UTF-8')); } } @@ -1933,8 +1917,8 @@ function storeKeywords($_keywords, $_doCount = true) foreach($_keywords as $keyword) { if( - !in_array(mb_strtolower($keyword['search'], $this->utf8?'UTF-8':mb_internal_encoding()), $this->blacklist) AND - !in_array(mb_strtolower($keyword['search'], $this->utf8?'UTF-8':mb_internal_encoding()), $this->stopwords) + !in_array(mb_strtolower($keyword['search'], 'UTF-8'), $this->blacklist) AND + !in_array(mb_strtolower($keyword['search'], 'UTF-8'), $this->stopwords) ) { $simWords[] = sprintf( @@ -2124,7 +2108,7 @@ function search($_search) if($this->searchEntities) { - $match .= ' + '.sprintf("(( MATCH (`%s`) AGAINST ('%s')) * %d)", implode('`,`',$searchColumns), $sql->escape(htmlentities($keyword['search'], ENT_COMPAT, $this->utf8?'UTF-8':mb_internal_encoding())), $keyword['weight']); + $match .= ' + '.sprintf("(( MATCH (`%s`) AGAINST ('%s')) * %d)", implode('`,`',$searchColumns), $sql->escape(htmlentities($keyword['search'], ENT_COMPAT, 'UTF-8')), $keyword['weight']); } $Amatch[] = $match; @@ -2143,7 +2127,7 @@ function search($_search) if($this->searchEntities) { - $tmpWhere[] = sprintf("(`%s` LIKE '%%%s%%')", $searchColumn, str_replace(array('%','_'),array('\%','\_'),$sql->escape(htmlentities($keyword['search'], ENT_COMPAT, $this->utf8?'UTF-8':mb_internal_encoding())))); + $tmpWhere[] = sprintf("(`%s` LIKE '%%%s%%')", $searchColumn, str_replace(array('%','_'),array('\%','\_'),$sql->escape(htmlentities($keyword['search'], ENT_COMPAT, 'UTF-8')))); } } diff --git a/lang/de_de.lang b/lang/de_de.lang index 8accd63..4107cf9 100644 --- a/lang/de_de.lang +++ b/lang/de_de.lang @@ -94,5 +94,5 @@ a587_ellipsis = a587_cat_deleted = Alle Artikel der Kategorie sind noch im Suchindex enthalten. Sie müssen den Suchindex erneuern. -a587_help_wiki = Die Dokumentation des Addons finden Sie unter http://wiki.redaxo.de/index.php?n=R4.RexSearch. -a587_help_forum = Sollten Sie Fragen oder Probleme haben, können Sie diese im Redaxo-Forum stellen: http://forum.redaxo.de/ftopic12965. +a587_help_wiki = Die Dokumentation des Addons finden Sie unter http://www.redaxo.org/de/wiki/index.php?n=R4.Rexsearch. +a587_help_forum = Sollten Sie Fragen oder Probleme haben, können Sie diese im Redaxo-Forum stellen: http://www.redaxo.org/de/forum/addons-f30/addon-rexsearch-fulltext-search-addon-t12965.html. diff --git a/lang/de_de_utf8.lang b/lang/de_de_utf8.lang deleted file mode 100644 index 0b13470..0000000 --- a/lang/de_de_utf8.lang +++ /dev/null @@ -1,98 +0,0 @@ -a587_title_settings = Einstellungen -a587_title_generate = Wartung -a587_title_help = Hilfe - -a587_settings_logicalmode = Logischer Suchmodus -a587_settings_logicalmode_and = Konjunktive Suche (AND) -a587_settings_logicalmode_or = Disjunktive Suche (OR) - -a587_settings_modi_header = Suchmodi - -a587_settings_textmode = Textmodus -a587_settings_textmode_plain = Durchsuche Text ohne HTML-Tags (Plain) -a587_settings_textmode_html = Durchsuche Text mit HTML-Tags (HTML) -a587_settings_textmode_both = Durchsuche beides (HTML und Plain) - -a587_settings_title_indexmode = Indexierung -a587_settings_indexmode_label = Art und Weise -a587_settings_indexmode_viahttp = Indexierung der Artikel über eine HTTP-GET-Anfrage -a587_settings_indexmode_viacache = Indexierung der Artikel über den Redaxo-Cache (ohne Template, nur der Artikel) -a587_settings_indexmode_viacachetpl = Indexierung der Artikel über den Redaxo-Cache (mit Template, liefert das gleiche Ergebnis wie per HTTP-GET-Anfrage) -a587_settings_indexoffline = Offline-Artikel indexieren -a587_settings_automaticindex_label = Artikel (ADD, EDIT, DELETE) automatisch (de)indexieren -a587_settings_ep_outputfilter_label = Extension Point "OUTPUT_FILTER" aufrufen - -a587_settings_searchmode = MySQL-Suchmodus -a587_settings_searchmode_like = LIKE (findet auch Teilwörter, aber langsamer) -a587_settings_searchmode_match = MATCH AGAINST (findet nur ganze Wörter, schneller) - -a587_settings_highlight_header = Erscheinungsbild des Highlight-Texts -a587_settings_surroundtags_start = Start-Tag -a587_settings_surroundtags_end = End-Tag - -a587_settings_limit = Maximale Trefferanzahl -a587_settings_maxteaserchars = Maximale Zeichenanzahl für Teaser -a587_settings_maxhighlightchars = Maximale Zeichenanzahl um hervorgehobene Suchbegriffe herum - -a587_settings_highlight_label = Highlighttyp -a587_settings_highlight_sentence = Ab Anfang des Satzes, in dem mindestens einer der Suchbegriffe auftaucht -a587_settings_highlight_paragraph = Ab Anfang des Absatzes, in dem mindestens einer der Suchbegriffe auftaucht -a587_settings_highlight_surroundtext = Alle gefundenen Suchbegriffe werden mit den sie umgebenden Wörtern dargestellt -a587_settings_highlight_surroundtextsingle = Für jeden gefundenen Suchbegriff wird genau eine Textstelle wiedergegeben -a587_settings_highlight_teaser = Als Teaser, in dem eventuell vorkommende Suchbegriffe hervorgehoben sind -a587_settings_highlight_array = Als Array mit allen Suchbegriffen und Textstellen -a587_settings_highlight_sample = Beispieltext mit Sucheingabe "velit esse" accusam - -a587_settings_similarwords_label = Ähnlichkeitssuche -a587_settings_similarwords_none = Deaktivieren -a587_settings_similarwords_soundex = Soundex -a587_settings_similarwords_metaphone = Metaphone -a587_settings_similarwords_cologne = Kölner Phonetik -a587_settings_similarwords_all = Alle -a587_settings_similarwords_permanent = Die Ähnlichkeitssuche auch dann durchführen, wenn Ergebnisse vorhanden sind. - -a587_settings_exclude = Wörter, Kategorien und Artikel von der Suche ausschließen -a587_settings_exclude_blacklist = Wörter (kommasepariert) -a587_settings_exclude_categories = Kategorien -a587_settings_exclude_articles = Artikel - -a587_settings_include = Datenbankspalten in die Suche einschließen - -a587_settings_submitbutton = Einstellungen speichern -a587_settings_saved = Einstellungen wurden gespeichert -a587_settings_saved_warning = Der Suchindex muss erneuert werden. - -a587_settings_generate_full = Index vollständig erstellen -a587_settings_generate_full_text = Die Index-Tabelle wird gelöscht und neu aufgebaut. -a587_settings_generate_incremental = Index schrittweise erstellen -a587_settings_generate_incremental_text = Bei Problemen mit der maximalen Skriptlaufzeit (max_execution_time) kann der Index schrittweise erstellt werden. -a587_settings_generate_incremental_confirm = Die schrittweise Indexierung starten? -a587_settings_generate_delete_cache = Suchcache löschen -a587_settings_generate_delete_cache_text = Wenn eine Neuindexierung nicht erforderlich ist, kann auch ausschließlich der Cache gelöscht werden. -a587_settings_generate_cache_deleted = Suchcache wurde gelöscht -a587_settings_generate_delete_keywords = Keyword-Index leeren -a587_settings_generate_delete_keywords_text = Löscht alle Keywords, die bei der Indexierung oder über Suchanfragen gesammelt wurden. Diese Keywords werden unter anderem für die Ähnlichkeitssuche gebraucht und sollten nicht gelöscht werden. -a587_settings_generate_delete_keywords_confirm = Wirklich alle Keywords löschen? -a587_settings_generate_keywords_deleted = Alle Keywords wurden gelöscht -a587_settings_generate_inprogress = Index wird erstellt. Es erscheint eine Nachricht, wenn der Index erstellt wurde. -a587_settings_generate_done = Index wurde erstellt -a587_settings_generate_cancel = Erstellung des Index abgebrochen -a587_settings_generate_duration = Dauer:  -a587_settings_generate_timeleft = Verbleibend:  - -a587_settings_fileext_header = Dateisuche -a587_settings_fileext_label = Dateiendungen
(frei lassen für beliebige Dateien) -a587_settings_file_mediapool = Medienpool indexieren -a587_settings_file_dirdepth_label = Verzeichnistiefe -a587_settings_folders_label = Folgende Ordner in die Suche einschließen -a587_settings_folders_dirselect_label = Unterordner der Tiefe %DEPTH% auswählen - -a587_settings_show_all = Alle aufklappen -a587_settings_show_none = Alle zuklappen - -a587_ellipsis = … - -a587_cat_deleted = Alle Artikel der Kategorie sind noch im Suchindex enthalten. Sie müssen den Suchindex erneuern. - -a587_help_wiki = Die Dokumentation des Addons finden Sie unter http://wiki.redaxo.de/index.php?n=R4.RexSearch. -a587_help_forum = Sollten Sie Fragen oder Probleme haben, können Sie diese im Redaxo-Forum stellen: http://forum.redaxo.de/ftopic12965. diff --git a/lang/en_gb.lang b/lang/en_gb.lang index f69d3bc..43ee6f5 100644 --- a/lang/en_gb.lang +++ b/lang/en_gb.lang @@ -94,5 +94,5 @@ a587_ellipsis = a587_cat_deleted = All articles are still included in the search index. You have to regenerate the search index. -a587_help_wiki = The documentation of this addon can be found at http://wiki.redaxo.de/index.php?n=R4.RexSearch. -a587_help_forum = Any questions or suggestions can be asked/made at http://forum.redaxo.de/ftopic12965. +a587_help_wiki = The documentation of this addon can be found at http://www.redaxo.org/de/wiki/index.php?n=R4.Rexsearch. +a587_help_forum = Any questions or suggestions can be asked/made at http://www.redaxo.org/de/forum/addons-f30/addon-rexsearch-fulltext-search-addon-t12965.html. diff --git a/lang/en_gb_utf8.lang b/lang/en_gb_utf8.lang deleted file mode 100644 index 5aa525c..0000000 --- a/lang/en_gb_utf8.lang +++ /dev/null @@ -1,98 +0,0 @@ -a587_title_settings = Settings -a587_title_generate = Maintenance -a587_title_help = Help - -a587_settings_logicalmode = Logical search mode -a587_settings_logicalmode_and = Conjunctive search (AND) -a587_settings_logicalmode_or = Disjunctive Suche (OR) - -a587_settings_modi_header = Search modes - -a587_settings_textmode = Text mode -a587_settings_textmode_plain = Search in text without HTML-Tags (plain) -a587_settings_textmode_html = Search in text with HTML-Tags (HTML) -a587_settings_textmode_both = Search in both (HTML and plain) - -a587_settings_title_indexmode = Indexing -a587_settings_indexmode_label = Type -a587_settings_indexmode_viahttp = Indexing of articles via a HTTP-GET-Request -a587_settings_indexmode_viacache = Indexing of articles via redaxo cache (without template, article only) -a587_settings_indexmode_viacachetpl = Indexing of articles ia redaxo cache (with template, same result as with HTTP-GET-Request) -a587_settings_indexoffline = Index offline articles -a587_settings_automaticindex_label = de(index) articles automatically (ADD, EDIT, DELETE) -a587_settings_ep_outputfilter_label = Execute extension point "OUTPUT_FILTER" - -a587_settings_searchmode = MySQL search modus -a587_settings_searchmode_like = LIKE (finds subwords, but slower) -a587_settings_searchmode_match = MATCH AGAINST (finds only complete words, faster) - -a587_settings_highlight_header = Appearance of highlighted text -a587_settings_surroundtags_start = start tag -a587_settings_surroundtags_end = end tag - -a587_settings_limit = Maximum hit count -a587_settings_maxteaserchars = Maximum character count for teaser -a587_settings_maxhighlightchars = Maximum character count around highlighted texts - -a587_settings_highlight_label = Highlight type -a587_settings_highlight_sentence = Starting at the beginning of the sentence, which contains at least one found search term -a587_settings_highlight_paragraph = Starting at the beginning of the paragraph, which contains at least one found search term -a587_settings_highlight_surroundtext = Every found text is displayed with the surrounding words -a587_settings_highlight_surroundtextsingle = For every found search term only one text is displayed with the surrounding words -a587_settings_highlight_teaser = Teaser with, if present, highlighted search terms -a587_settings_highlight_array = Array with every found text for every search term -a587_settings_highlight_sample = Sample text with search term "velit esse" accusam - -a587_settings_similarwords_label = Fuzzy search -a587_settings_similarwords_none = Deactivate -a587_settings_similarwords_soundex = Soundex -a587_settings_similarwords_metaphone = Metaphone -a587_settings_similarwords_cologne = Kölner Phonetik -a587_settings_similarwords_all = All -a587_settings_similarwords_permanent = Perform fuzzy search even if results are found. - -a587_settings_exclude = Exclude words, Categories and Articles from the search -a587_settings_exclude_blacklist = Words (comma-separated) -a587_settings_exclude_categories = Categories -a587_settings_exclude_articles = Articles - -a587_settings_include = Include database tables - -a587_settings_submitbutton = Save settings -a587_settings_saved = Settings saved -a587_settings_saved_warning = The search index has to be regenerated. - -a587_settings_generate_full = Generate total index -a587_settings_generate_full_text = Deletes and rebuilds the index-table. -a587_settings_generate_incremental = Generate index incrementally -a587_settings_generate_incremental_text = When having problems with maximum execution time the index can be generated incrementally. -a587_settings_generate_incremental_confirm = Start incremental indexing? -a587_settings_generate_delete_cache = Clear search cache -a587_settings_generate_delete_cache_text = If regenerating of the index is not nescessary, the search cache can be deleted exclusively. -a587_settings_generate_cache_deleted = Search cache cleared -a587_settings_generate_delete_keywords = Truncate keyword-index -a587_settings_generate_delete_keywords_text = Deletes all keywords collected during the indexing process and with searches. These keywords are nescessary for the fuzzy search and should not be deleted. -a587_settings_generate_delete_keywords_confirm = Do you really want to delete all keywords? -a587_settings_generate_keywords_deleted = Keywords deleted -a587_settings_generate_inprogress = Generating index. There will be a message when the process is complete. -a587_settings_generate_done = Index generated -a587_settings_generate_cancel = Indexing aborted -a587_settings_generate_duration = Time:  -a587_settings_generate_timeleft = Remaining:  - -a587_settings_fileext_header = File search -a587_settings_fileext_label = Extensions
(keep empty for any files) -a587_settings_file_mediapool = Index mediapool -a587_settings_file_dirdepth_label = Directory depth -a587_settings_folders_label = Include following directories -a587_settings_folders_dirselect_label = Select Subdirectories of depth %DEPTH% - -a587_settings_show_all = Show all -a587_settings_show_none = Hide all - -a587_ellipsis = … - -a587_cat_deleted = All articles are still included in the search index. You have to regenerate the search index. - -a587_help_wiki = The documentation of this addon can be found at http://wiki.redaxo.de/index.php?n=R4.RexSearch. -a587_help_forum = Any questions or suggestions can be asked/made at http://forum.redaxo.de/ftopic12965. diff --git a/lang/stopwords.inc.php b/lang/stopwords.inc.php index d22f096..d931725 100644 --- a/lang/stopwords.inc.php +++ b/lang/stopwords.inc.php @@ -1 +1 @@ -