Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

opt: full text index compact #1040

Merged
merged 2 commits into from
Aug 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
19 changes: 19 additions & 0 deletions src/common/utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -68,4 +68,23 @@ std::string basename( std::string const & str )
return std::string( str, x + 1 );
}

void removeDirectory( QString const & directory )
{
QDir dir( directory );
Q_FOREACH ( QFileInfo info,
dir.entryInfoList( QDir::NoDotAndDotDot | QDir::AllDirs | QDir::Files, QDir::DirsFirst ) ) {
if ( info.isDir() )
removeDirectory( info.absoluteFilePath() );
else
QFile::remove( info.absoluteFilePath() );
}

dir.rmdir( directory );
}

void removeDirectory( string const & directory )
{
removeDirectory( QString::fromStdString( directory ) );
}

} // namespace Utils::Fs
2 changes: 2 additions & 0 deletions src/common/utils.hh
Original file line number Diff line number Diff line change
Expand Up @@ -332,7 +332,9 @@ char separator();

/// Returns the name part of the given filename.
string basename( string const & );
void removeDirectory( QString const & directory );

void removeDirectory( string const & directory );
} // namespace Fs

} // namespace Utils
Expand Down
53 changes: 5 additions & 48 deletions src/ftshelpers.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,6 @@
#include <vector>
#include <string>


#include <QRegularExpression>


using std::vector;
using std::string;

Expand Down Expand Up @@ -49,49 +45,6 @@ bool ftsIndexIsOldOrBad( BtreeIndexing::BtreeDictionary * dict )
}
}


void tokenizeCJK( QStringList & indexWords, QRegularExpression wordRegExp, QStringList list )
{
QStringList wordList, hieroglyphList;
for ( auto word : list ) {
// Check for CJK symbols in word
bool parsed = false;
QString hieroglyph;
for ( int x = 0; x < word.size(); x++ )
if ( Utils::isCJKChar( word.at( x ).unicode() ) ) {
parsed = true;
hieroglyph.append( word[ x ] );

if ( QChar( word.at( x ) ).isHighSurrogate() && QChar( word[ x + 1 ] ).isLowSurrogate() )
hieroglyph.append( word[ ++x ] );

hieroglyphList.append( hieroglyph );
hieroglyph.clear();
}

// If word don't contains CJK symbols put it in list as is
if ( !parsed )
wordList.append( word );
}

indexWords = wordList.filter( wordRegExp );
indexWords.removeDuplicates();

hieroglyphList.removeDuplicates();
indexWords += hieroglyphList;
}

bool containCJK( QString const & str )
{
bool hasCJK = false;
for ( auto x : str )
if ( Utils::isCJKChar( x.unicode() ) ) {
hasCJK = true;
break;
}
return hasCJK;
}

void makeFTSIndex( BtreeIndexing::BtreeDictionary * dict, QAtomicInt & isCancelled )
{
QMutexLocker _( &dict->getFtsMutex() );
Expand All @@ -105,7 +58,7 @@ void makeFTSIndex( BtreeIndexing::BtreeDictionary * dict, QAtomicInt & isCancell
throw exUserAbort();

// Open the database for update, creating a new database if necessary.
Xapian::WritableDatabase db( dict->ftsIndexName(), Xapian::DB_CREATE_OR_OPEN );
Xapian::WritableDatabase db( dict->ftsIndexName() + "_temp", Xapian::DB_CREATE_OR_OPEN );

Xapian::TermGenerator indexer;
// Xapian::Stem stemmer("english");
Expand Down Expand Up @@ -206,6 +159,10 @@ void makeFTSIndex( BtreeIndexing::BtreeDictionary * dict, QAtomicInt & isCancell
offsets.clear();

db.commit();

db.compact( dict->ftsIndexName() );

Utils::Fs::removeDirectory( dict->ftsIndexName() + "_temp" );
}
catch ( Xapian::Error & e ) {
qWarning() << "create xapian index:" << QString::fromStdString( e.get_description() );
Expand Down