Skip to content
This repository has been archived by the owner on May 1, 2020. It is now read-only.

Commit

Permalink
Merge 45f0903 into 5be9289
Browse files Browse the repository at this point in the history
  • Loading branch information
Marcel Kurovski committed Nov 21, 2017
2 parents 5be9289 + 45f0903 commit 5067493
Show file tree
Hide file tree
Showing 4 changed files with 21 additions and 13 deletions.
29 changes: 18 additions & 11 deletions python/lopq/model.py
Expand Up @@ -240,7 +240,7 @@ def compute_residuals(data, C):
return residuals, assignments


def train_coarse(data, V=8, kmeans_coarse_iters=10, n_init=10, random_state=None):
def train_coarse(data, V=8, kmeans_coarse_iters=10, n_init=10, random_state=None, n_jobs=1):
"""
Train a kmeans model.
Expand All @@ -252,6 +252,8 @@ def train_coarse(data, V=8, kmeans_coarse_iters=10, n_init=10, random_state=None
the nubmer of iterations
:param int random_state:
a random state to seed the clustering
:param int n_jobs:
the number of jobs to use for KMeans cluster computation
:returns ndarray:
a VxD matrix of cluster centroids
Expand All @@ -260,23 +262,23 @@ def train_coarse(data, V=8, kmeans_coarse_iters=10, n_init=10, random_state=None
logger.info('Fitting coarse quantizer...')

# Fit coarse model
model = KMeans(n_clusters=V, init="k-means++", max_iter=kmeans_coarse_iters, n_init=n_init, n_jobs=1, verbose=False, random_state=random_state)
model = KMeans(n_clusters=V, init="k-means++", max_iter=kmeans_coarse_iters, n_init=n_init, n_jobs=n_jobs, verbose=False, random_state=random_state)
model.fit(data)

logger.info('Done fitting coarse quantizer.')

return model.cluster_centers_


def train_subquantizers(data, num_buckets, subquantizer_clusters=256, kmeans_local_iters=20, n_init=10, random_state=None):
def train_subquantizers(data, num_buckets, subquantizer_clusters=256, kmeans_local_iters=20, n_init=10, random_state=None, n_jobs=1):
"""
Fit a set of num_buckets subquantizers for corresponding subvectors.
"""

subquantizers = list()
for i, d in enumerate(np.split(data, num_buckets, axis=1)):
model = KMeans(n_clusters=subquantizer_clusters, init="k-means++", max_iter=kmeans_local_iters,
n_init=n_init, n_jobs=1, verbose=False, random_state=random_state)
n_init=n_init, n_jobs=n_jobs, verbose=False, random_state=random_state)
model.fit(d)
subquantizers.append(model.cluster_centers_)
logger.info('Fit subquantizer %d of %d.' % (i + 1, num_buckets))
Expand All @@ -286,7 +288,8 @@ def train_subquantizers(data, num_buckets, subquantizer_clusters=256, kmeans_loc

def train(data, V=8, M=4, subquantizer_clusters=256, parameters=None,
kmeans_coarse_iters=10, kmeans_local_iters=20, n_init=10,
subquantizer_sample_ratio=1.0, random_state=None, verbose=False):
subquantizer_sample_ratio=1.0, random_state=None, verbose=False,
n_jobs=1):
"""
Fit an LOPQ model.
Expand Down Expand Up @@ -316,6 +319,8 @@ def train(data, V=8, M=4, subquantizer_clusters=256, parameters=None,
a random seed used in all random operations during training if provided
:param bool verbose:
a bool enabling verbose output during training
:param int n_jobs:
the number of jobs to use for KMeans cluster computation
:returns tuple:
a tuple of model parameters that can be used to instantiate an LOPQModel object
Expand Down Expand Up @@ -343,8 +348,8 @@ def train(data, V=8, M=4, subquantizer_clusters=256, parameters=None,
logger.info('Using existing coarse quantizers.')
C1, C2 = Cs
else:
C1 = train_coarse(first_half, V, kmeans_coarse_iters, n_init, random_state)
C2 = train_coarse(second_half, V, kmeans_coarse_iters, n_init, random_state)
C1 = train_coarse(first_half, V, kmeans_coarse_iters, n_init, random_state, n_jobs)
C2 = train_coarse(second_half, V, kmeans_coarse_iters, n_init, random_state, n_jobs)

# Compute local rotations
if Rs is not None and mus is not None:
Expand Down Expand Up @@ -379,8 +384,8 @@ def train(data, V=8, M=4, subquantizer_clusters=256, parameters=None,
projected2 = project_residuals_to_local(residuals2, assignments2, Rs2, mu2)

logger.info('Fitting subquantizers...')
subquantizers1 = train_subquantizers(projected1, M / 2, subquantizer_clusters, kmeans_local_iters, n_init, random_state=random_state)
subquantizers2 = train_subquantizers(projected2, M / 2, subquantizer_clusters, kmeans_local_iters, n_init, random_state=random_state)
subquantizers1 = train_subquantizers(projected1, M / 2, subquantizer_clusters, kmeans_local_iters, n_init, random_state=random_state, n_jobs=n_jobs)
subquantizers2 = train_subquantizers(projected2, M / 2, subquantizer_clusters, kmeans_local_iters, n_init, random_state=random_state, n_jobs=n_jobs)
logger.info('Done fitting subquantizers.')

return (C1, C2), (Rs1, Rs2), (mu1, mu2), (subquantizers1, subquantizers2)
Expand Down Expand Up @@ -441,7 +446,7 @@ def __init__(self, V=8, M=4, subquantizer_clusters=256, parameters=None):
self.M = M
self.subquantizer_clusters = subquantizer_clusters

def fit(self, data, kmeans_coarse_iters=10, kmeans_local_iters=20, n_init=10, subquantizer_sample_ratio=1.0, random_state=None, verbose=False):
def fit(self, data, kmeans_coarse_iters=10, kmeans_local_iters=20, n_init=10, subquantizer_sample_ratio=1.0, random_state=None, verbose=False, n_jobs=1):
"""
Fit a model with the current model parameters. This method will use existing parameters and only
train missing parameters.
Expand All @@ -459,12 +464,14 @@ def fit(self, data, kmeans_coarse_iters=10, kmeans_local_iters=20, n_init=10, su
a random seed used in all random operations during training if provided
:param bool verbose:
a bool enabling verbose output during training
:param int n_jobs:
the number of jobs to use for KMeans cluster computation
"""
existing_parameters = (self.Cs, self.Rs, self.mus, self.subquantizers)

parameters = train(data, self.V, self.M, self.subquantizer_clusters, existing_parameters,
kmeans_coarse_iters, kmeans_local_iters, n_init, subquantizer_sample_ratio,
random_state, verbose)
random_state, verbose, n_jobs)

self.Cs, self.Rs, self.mus, self.subquantizers = parameters

Expand Down
1 change: 1 addition & 0 deletions python/lopq/package_metadata.json
@@ -0,0 +1 @@
{"git_hash": "5be92898ae856b8c75fb5ec577f8cd0c95754488", "version": "1.0.36", "git_branch": "python2_parallel", "git_origin": "https://github.com/squall-1002/lopq.git"}
2 changes: 1 addition & 1 deletion python/requirements.txt
@@ -1,6 +1,6 @@
nose>=1.3.4
numpy>=1.9
protobuf>=2.6
scikit-learn>=0.15
scikit-learn>=0.18
scipy>=0.14
lmdb>=0.87
2 changes: 1 addition & 1 deletion python/test/tests.py
Expand Up @@ -6,7 +6,7 @@
import sys
import os
import numpy as np
from sklearn.cross_validation import train_test_split
from sklearn.model_selection import train_test_split

sys.path.insert(1, os.path.abspath('..'))
from lopq.model import LOPQModel, eigenvalue_allocation, accumulate_covariance_estimators, compute_rotations_from_accumulators
Expand Down

0 comments on commit 5067493

Please sign in to comment.