Skip to content
This repository has been archived by the owner on May 13, 2020. It is now read-only.

Commit

Permalink
Compute scaled_int the same way everywhere.
Browse files Browse the repository at this point in the history
  • Loading branch information
Tim Peters committed May 17, 2002
1 parent e61b9d9 commit 5d2064d
Show file tree
Hide file tree
Showing 3 changed files with 7 additions and 35 deletions.
20 changes: 3 additions & 17 deletions CosineIndex.py
Expand Up @@ -21,28 +21,14 @@

from Products.ZCTextIndex.IIndex import IIndex
from Products.ZCTextIndex import WidCode
from Products.ZCTextIndex.BaseIndex import BaseIndex, inverse_doc_frequency
from Products.ZCTextIndex.BaseIndex import BaseIndex, \
inverse_doc_frequency, \
scaled_int, SCALE_FACTOR
from Products.ZCTextIndex.SetOps import mass_weightedIntersection, \
mass_weightedUnion

import ZODB

# Instead of storing floats, we generally store scaled ints. Binary pickles
# can store those more efficiently. The default SCALE_FACTOR of 1024
# is large enough to get about 3 decimal digits of fractional info, and
# small enough so that scaled values should almost always fit in a signed
# 16-bit int (we're generally storing logs, so a few bits before the radix
# point goes a long way; on the flip side, for reasonably small numbers x
# most of the info in log(x) is in the fractional bits, so we do want to
# save a lot of those).
SCALE_FACTOR = 1024.0

def scaled_int(f, scale=SCALE_FACTOR):
# We expect only positive inputs, so "add a half and chop" is the
# same as round(). Surprising, calling round() is significantly more
# expensive.
return int(f * scale + 0.5)

class CosineIndex(BaseIndex):

__implements__ = IIndex
Expand Down
20 changes: 3 additions & 17 deletions OkapiIndex.py
Expand Up @@ -24,28 +24,14 @@

from Products.ZCTextIndex.IIndex import IIndex
from Products.ZCTextIndex import WidCode
from Products.ZCTextIndex.BaseIndex import BaseIndex, inverse_doc_frequency
from Products.ZCTextIndex.BaseIndex import BaseIndex, \
inverse_doc_frequency, \
scaled_int
from Products.ZCTextIndex.SetOps import mass_weightedIntersection, \
mass_weightedUnion

import ZODB

# Instead of storing floats, we generally store scaled ints. Binary pickles
# can store those more efficiently. The default SCALE_FACTOR of 1024
# is large enough to get about 3 decimal digits of fractional info, and
# small enough so that scaled values should almost always fit in a signed
# 16-bit int (we're generally storing logs, so a few bits before the radix
# point goes a long way; on the flip side, for reasonably small numbers x
# most of the info in log(x) is in the fractional bits, so we do want to
# save a lot of those).
SCALE_FACTOR = 1024.0

def scaled_int(f, scale=SCALE_FACTOR):
# We expect only positive inputs, so "add a half and chop" is the
# same as round(). Surprising, calling round() is significantly more
# expensive.
return int(f * scale + 0.5)

class OkapiIndex(BaseIndex):

__implements__ = IIndex
Expand Down
2 changes: 1 addition & 1 deletion tests/testZCTextIndex.py
@@ -1,7 +1,7 @@
from Products.ZCTextIndex.ZCTextIndex import ZCTextIndex
from Products.ZCTextIndex.tests \
import testIndex, testQueryEngine, testQueryParser
from Products.ZCTextIndex.CosineIndex import scaled_int, SCALE_FACTOR
from Products.ZCTextIndex.BaseIndex import scaled_int, SCALE_FACTOR
from Products.ZCTextIndex.CosineIndex import CosineIndex
from Products.ZCTextIndex.OkapiIndex import OkapiIndex
from Products.ZCTextIndex.Lexicon import Lexicon, Splitter
Expand Down

0 comments on commit 5d2064d

Please sign in to comment.