Permalink
Browse files

Turns out brute force is *much* faster than SQLite :-)

  • Loading branch information...
1 parent d5bdcf5 commit 7f860011bba82c2fbf59810003f56faaa5782e64 @xolox committed Nov 22, 2011
Showing with 214 additions and 187 deletions.
  1. +4 −5 README.md
  2. +4 −4 autoload/xolox/notes.vim
  3. +22 −23 doc/notes.txt
  4. +0 −149 misc/notes/scanner.py
  5. +178 −0 misc/notes/search-notes.py
  6. +3 −3 misc/notes/shadow/Note taking commands
  7. +3 −3 plugin/notes.vim
View
@@ -12,7 +12,7 @@ The notes.vim plug-in for the [Vim text editor] [vim] makes it easy to manage yo
* **Searching notes:** `:SearchNotes keyword …` searches for keywords and `:SearchNotes /pattern/` searches for regular expressions
* **Smart defaults:** Without an argument `:SearchNotes` searches for the word under the cursor (if the word starts with `@` that character will be included in the search, this means you can easily search for *@tagged* notes)
* **Back-references:** The `:RelatedNotes` command find all notes referencing the current file
- * A [Python 2] [python] script is included that accelerates keyword searches using an [SQLite] [sqlite] database
+ * A [Python 2] [python] script is included that accelerates keyword searches using a keyword index
* The `:RecentNotes` command lists your notes by modification date, starting with the most recently edited note
* **Navigating between notes:** The included file type plug-in redefines [gf] [gf] to jump between notes and the syntax script highlights note names as hyper links
* **Writing aids:** The included file type plug-in contains mappings for automatic curly quotes, arrows and list bullets and supports completion of note titles using Control-X Control-U and completion of tags using Control-X Control-O
@@ -135,11 +135,11 @@ If you don't pass any arguments to the `:SearchNotes` command it will search for
These mappings are currently not enabled by default because they conflict with already useful key mappings, but if you have any suggestions for alternatives feel free to contact me through GitHub or at <peter@peterodding.com>.
-#### Accelerated searching with Python and SQLite
+#### Accelerated searching with Python
-After collecting a fair amount of notes (say more than 5 MB) you will probably start to get annoyed at how long it takes Vim to search through all of your notes. To make searching more scalable the notes plug-in includes a Python script which uses a full text index of your notes stored in an SQLite database.
+After collecting a fair amount of notes (say more than 5 MB) you will probably start to get annoyed at how long it takes Vim to search through all of your notes. To make searching more scalable the notes plug-in includes a Python script which uses a persistent full text index of your notes stored in a file.
-The first time the Python script is run it will need to build the complete index which can take a few minutes, but after the index has been initialized updates and searches should be more or less instantaneous.
+The first time the Python script is run it will need to build the complete index which can take a moment, but after the index has been initialized updates and searches should be more or less instantaneous.
### The `:RelatedNotes` command
@@ -187,7 +187,6 @@ This software is licensed under the [MIT license] [mit].
[shell]: http://www.vim.org/scripts/script.php?script_id=3123
[slate]: http://code.google.com/p/vim/source/browse/runtime/colors/slate.vim
[split]: http://vimdoc.sourceforge.net/htmldoc/windows.html#:split
-[sqlite]: http://sqlite.org/
[tabedit]: http://vimdoc.sourceforge.net/htmldoc/tabpage.html#:tabedit
[update]: http://vimdoc.sourceforge.net/htmldoc/editing.html#:update
[utl]: http://www.vim.org/scripts/script.php?script_id=293
View
@@ -1,12 +1,12 @@
" Vim auto-load script
" Author: Peter Odding <peter@peterodding.com>
-" Last Change: November 21, 2011
+" Last Change: November 22, 2011
" URL: http://peterodding.com/code/vim/notes/
" Note: This file is encoded in UTF-8 including a byte order mark so
" that Vim loads the script using the right encoding transparently.
-let g:xolox#notes#version = '0.12.12'
+let g:xolox#notes#version = '0.14'
function! xolox#notes#shortcut() " {{{1
" The "note:" pseudo protocol is just a shortcut for the :Note command.
@@ -551,7 +551,7 @@ function! s:run_scanner(keywords, matches) " {{{2
if !(executable(python) && filereadable(scanner))
call xolox#misc#msg#debug("notes.vim %s: The %s script isn't executable.", g:xolox#notes#version, scanner)
else
- let arguments = [scanner, g:notes_indexfile, g:notes_directory, a:keywords]
+ let arguments = [scanner, '--database', g:notes_indexfile, '--notes', g:notes_directory, a:keywords]
call map(arguments, 'xolox#misc#escape#shell(v:val)')
let output = xolox#misc#str#trim(system(join([python] + arguments)))
if !v:shell_error
@@ -582,7 +582,7 @@ function! xolox#notes#get_fnames(include_shadow_notes) " {{{3
let starttime = xolox#misc#timer#start()
let pattern = xolox#misc#path#merge(g:notes_directory, '*')
let listing = glob(xolox#misc#path#absolute(pattern))
- call extend(s:cached_fnames, split(listing, '\n'))
+ call extend(s:cached_fnames, filter(split(listing, '\n'), 'filereadable(v:val)'))
let s:have_cached_names = 1
call xolox#misc#timer#stop('notes.vim %s: Cached note filenames in %s.', g:xolox#notes#version, starttime)
endif
View
@@ -38,8 +38,8 @@ notes in Vim:
- Back-references: The |:RelatedNotes| command find all notes referencing the
current file
- - A Python 2 [1] script is included that accelerates keyword searches using an
- SQLite [2] database
+ - A Python 2 [1] script is included that accelerates keyword searches using a
+ keyword index
- The |:RecentNotes| command lists your notes by modification date, starting
with the most recently edited note
@@ -57,15 +57,15 @@ notes in Vim:
highlighting using blocks marked with '{{{type … }}}' which allows you to
embed highlighted code and configuration snippets in your notes
-Here's a screen shot of the syntax mode using the slate [3] color scheme:
+Here's a screen shot of the syntax mode using the slate [2] color scheme:
- Syntax mode screen shot, see reference [4]
+ Syntax mode screen shot, see reference [3]
===============================================================================
*notes-install-usage*
Install & usage ~
-Unzip the most recent ZIP archive [5] file inside your Vim profile directory
+Unzip the most recent ZIP archive [4] file inside your Vim profile directory
(usually this is '~/.vim' on UNIX and '%USERPROFILE%\vimfiles' on Windows),
restart Vim and execute the command ':helptags ~/.vim/doc' (use ':helptags
~\vimfiles\doc' instead on Windows). To get started execute |:Note| or ':edit
@@ -257,16 +257,16 @@ already useful key mappings, but if you have any suggestions for alternatives
feel free to contact me through GitHub or at peter@peterodding.com.
-------------------------------------------------------------------------------
-Accelerated searching with Python and SQLite ~
+ *notes-accelerated-searching-with-python*
+Accelerated searching with Python ~
After collecting a fair amount of notes (say more than 5 MB) you will probably
start to get annoyed at how long it takes Vim to search through all of your
notes. To make searching more scalable the notes plug-in includes a Python
-script which uses a full text index of your notes stored in an SQLite
-database.
+script which uses a persistent full text index of your notes stored in a file.
The first time the Python script is run it will need to build the complete
-index which can take a few minutes, but after the index has been initialized
+index which can take a moment, but after the index has been initialized
updates and searches should be more or less instantaneous.
-------------------------------------------------------------------------------
@@ -317,14 +317,14 @@ If for any reason you want to recreate the list of tags you can execute the
===============================================================================
Other plug-ins that work well with the notes plug-in ~
- - The utl.vim [6] universal text linking plug-in enables links between your
+ - The utl.vim [5] universal text linking plug-in enables links between your
notes, other local files and remote resources like web pages
- - My shell.vim [7] plug-in also enables easy navigation between your notes and
+ - My shell.vim [6] plug-in also enables easy navigation between your notes and
environment like local files and directories, web pages and e-mail
addresses
- - The VOoM [8] outlining plug-in should work well for notes if you use the
+ - The VOoM [7] outlining plug-in should work well for notes if you use the
Markdown style headers starting with '#', however it has been reported that
this combination may not always work so well in practice (sometimes losing
notes!)
@@ -336,28 +336,27 @@ Contact ~
If you have questions, bug reports, suggestions, etc. the author can be
contacted at peter@peterodding.com. The latest version is available at
http://peterodding.com/code/vim/notes/ and http://github.com/xolox/vim-notes.
-If you like the script please vote for it on Vim Online [9].
+If you like the script please vote for it on Vim Online [8].
===============================================================================
*notes-license*
License ~
-This software is licensed under the MIT license [10]. Copyright 2011 Peter
+This software is licensed under the MIT license [9]. Copyright 2011 Peter
Odding <peter@peterodding.com>.
===============================================================================
*notes-references*
References ~
[1] http://python.org/
-[2] http://sqlite.org/
-[3] http://code.google.com/p/vim/source/browse/runtime/colors/slate.vim
-[4] http://peterodding.com/code/vim/notes/syntax.png
-[5] http://peterodding.com/code/vim/downloads/notes.zip
-[6] http://www.vim.org/scripts/script.php?script_id=293
-[7] http://www.vim.org/scripts/script.php?script_id=3123
-[8] http://www.vim.org/scripts/script.php?script_id=2657
-[9] http://www.vim.org/scripts/script.php?script_id=3375
-[10] http://en.wikipedia.org/wiki/MIT_License
+[2] http://code.google.com/p/vim/source/browse/runtime/colors/slate.vim
+[3] http://peterodding.com/code/vim/notes/syntax.png
+[4] http://peterodding.com/code/vim/downloads/notes.zip
+[5] http://www.vim.org/scripts/script.php?script_id=293
+[6] http://www.vim.org/scripts/script.php?script_id=3123
+[7] http://www.vim.org/scripts/script.php?script_id=2657
+[8] http://www.vim.org/scripts/script.php?script_id=3375
+[9] http://en.wikipedia.org/wiki/MIT_License
vim: ft=help
View
@@ -1,149 +0,0 @@
-#!/usr/bin/env python
-
-"""
-Copyright 2011 Peter Odding <peter@peterodding.com>
-This program is licensed under the MIT license.
-
-This Python script can be used by the notes.vim plug-in to perform fast
-keyword searches in the user's notes. It has two advantages over just using
-Vim's internal :vimgrep command to search all of the user's notes:
-
- * Very large notes don't slow searching down so much;
- * Hundreds of notes can be searched in less than a second.
-
-For more information see http://peterodding.com/code/vim/notes/
-"""
-
-# The character encoding of the command line arguments passed to this script
-# and the text files read by this script (needed for accurate word splitting).
-CHARACTER_ENCODING = 'UTF-8'
-
-# Load the required standard library modules.
-import fnmatch, os, re, sqlite3, sys
-
-# Parse command line arguments. {{{1
-
-script_name = os.path.split(sys.argv[0])[1]
-if len(sys.argv) < 4:
- sys.stderr.write("%s: Not enough arguments!\n" % script_name)
- sys.exit(1)
-
-def mungepath(p):
- return os.path.abspath(os.path.expanduser(p))
-
-def decodestr(s):
- return s.decode(CHARACTER_ENCODING, 'ignore')
-
-database_file = mungepath(sys.argv[1])
-user_directory = mungepath(sys.argv[2])
-keywords = decodestr(' '.join(sys.argv[3:]))
-
-# Create or open SQLite database. {{{1
-
-first_use = not os.path.exists(database_file)
-connection = sqlite3.connect(database_file)
-connection.text_factory = str
-
-# Initialize database schema?
-if first_use:
- connection.execute('create table if not exists files (file_id integer primary key, filename text, last_modified integer)')
- connection.execute('create table if not exists keywords (keyword_id integer primary key, value text)')
- connection.execute('create table if not exists occurrences (file_id integer, keyword_id integer, primary key (file_id, keyword_id))')
-
-# Function to scan text files for keywords. {{{1
-
-UNSAVED_CHANGES = False
-CACHED_KEYWORDS = {}
-
-def scan_note(note):
- global UNSAVED_CHANGES
- with open(note['filename']) as handle:
- result = connection.execute('select file_id from files where filename = ?', (note['filename'],)).fetchone()
- if result:
- file_id = result[0]
- connection.execute('delete from occurrences where file_id = ?', (file_id,))
- connection.execute('update files set last_modified = ? where file_id = ?', (note['last_modified'], file_id))
- else:
- connection.execute('insert into files (filename, last_modified) values (?, ?)', (note['filename'], note['last_modified']))
- file_id = connection.execute('select last_insert_rowid()').fetchone()[0]
- for keyword in tokenize(decodestr(handle.read())):
- if keyword in CACHED_KEYWORDS:
- keyword_id = CACHED_KEYWORDS[keyword]
- else:
- record = connection.execute('select keyword_id from keywords where value = ?', (keyword,)).fetchone()
- if not record:
- connection.execute('insert into keywords (value) values (?)', (keyword,))
- record = connection.execute('select last_insert_rowid()').fetchone()
- keyword_id = record[0]
- CACHED_KEYWORDS[keyword] = keyword_id
- connection.execute('insert into occurrences (file_id, keyword_id) values (?, ?)', (file_id, keyword_id))
- UNSAVED_CHANGES = True
-
-# Function to tokenize text strings into words. {{{1
-
-def tokenize(text):
- words = set()
- for word in re.findall(r'\w+', text.lower(), re.UNICODE):
- word = word.strip()
- if word != '' and not word.isspace():
- words.add(word)
- return words
-
-# Find filenames & last modified times of existing notes. {{{1
-
-notes_on_disk = {}
-for filename in os.listdir(user_directory):
- if filename != '.swp' and not fnmatch.fnmatch(filename, '.*.s??'): # (Vim swap files are ignored)
- filename = os.path.join(user_directory, filename)
- notes_on_disk[filename] = dict(filename=filename, last_modified=os.path.getmtime(filename))
-if first_use:
- for note in notes_on_disk.itervalues():
- scan_note(note)
-else:
- deleted_notes = []
- updated_notes = []
- for file_id, filename, last_modified in connection.execute('select file_id, filename, last_modified from files order by filename'):
- filename = str(filename)
- if filename not in notes_on_disk:
- deleted_notes.append((file_id, filename))
- else:
- note = notes_on_disk[filename]
- del notes_on_disk[filename]
- if note['last_modified'] > last_modified:
- updated_notes.append(note)
- created_notes = notes_on_disk.values()
- for file_id, filename in deleted_notes:
- connection.execute('delete from files where file_id = ?', (file_id,))
- connection.execute('delete from occurrences where file_id = ?', (file_id,))
- UNSAVED_CHANGES = True
- for note in sorted(created_notes + updated_notes, key=lambda x: x['filename']):
- scan_note(note)
-
-# Commit unsaved changes to SQLite database?
-if UNSAVED_CHANGES:
- connection.commit()
-
-# Query database for given keyword(s), print matching files. {{{1
-
-if keywords != '' and not keywords.isspace():
- query = """
- select filename from files where file_id in (
- select file_id from occurrences where keyword_id in (
- select keyword_id from keywords where value like ?
- )
- ) """
- global_matches = set()
- for i, keyword in enumerate(tokenize(keywords)):
- current_matches = set()
- for result in connection.execute(query, ('%' + keyword + '%',)):
- filename = str(result[0])
- current_matches.add(filename)
- if i == 0:
- global_matches = current_matches
- else:
- global_matches &= current_matches
- print '\n'.join(sorted(global_matches))
-
-connection.close()
-
-# vim: ts=2 sw=2 et
Oops, something went wrong.

0 comments on commit 7f86001

Please sign in to comment.