Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Some updates to #13099 #5

Merged
merged 5 commits into from
May 17, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions python/docs/pyspark.ml.rst
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,14 @@ pyspark.ml.clustering module
:undoc-members:
:inherited-members:

pyspark.ml.linalg module
----------------------------

.. automodule:: pyspark.ml.linalg
:members:
:undoc-members:
:inherited-members:

pyspark.ml.recommendation module
--------------------------------

Expand Down
132 changes: 1 addition & 131 deletions python/pyspark/ml/linalg/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,7 @@


__all__ = ['Vector', 'DenseVector', 'SparseVector', 'Vectors',
'Matrix', 'DenseMatrix', 'SparseMatrix', 'Matrices',
'QRDecomposition']
'Matrix', 'DenseMatrix', 'SparseMatrix', 'Matrices']


if sys.version_info[:2] == (2, 7):
Expand Down Expand Up @@ -278,28 +277,6 @@ def __init__(self, ar):
ar = ar.astype(np.float64)
self.array = ar

@staticmethod
def parse(s):
"""
Parse string representation back into the DenseVector.

>>> DenseVector.parse(' [ 0.0,1.0,2.0, 3.0]')
DenseVector([0.0, 1.0, 2.0, 3.0])
"""
start = s.find('[')
if start == -1:
raise ValueError("Array should start with '['.")
end = s.find(']')
if end == -1:
raise ValueError("Array should end with ']'.")
s = s[start + 1: end]

try:
values = [float(val) for val in s.split(',') if val]
except ValueError:
raise ValueError("Unable to parse values from %s" % s)
return DenseVector(values)

def __reduce__(self):
return DenseVector, (self.array.tostring(),)

Expand Down Expand Up @@ -557,55 +534,6 @@ def __reduce__(self):
SparseVector,
(self.size, self.indices.tostring(), self.values.tostring()))

@staticmethod
def parse(s):
"""
Parse string representation back into the SparseVector.

>>> SparseVector.parse(' (4, [0,1 ],[ 4.0,5.0] )')
SparseVector(4, {0: 4.0, 1: 5.0})
"""
start = s.find('(')
if start == -1:
raise ValueError("Tuple should start with '('")
end = s.find(')')
if start == -1:
raise ValueError("Tuple should end with ')'")
s = s[start + 1: end].strip()

size = s[: s.find(',')]
try:
size = int(size)
except ValueError:
raise ValueError("Cannot parse size %s." % size)

ind_start = s.find('[')
if ind_start == -1:
raise ValueError("Indices array should start with '['.")
ind_end = s.find(']')
if ind_end == -1:
raise ValueError("Indices array should end with ']'")
new_s = s[ind_start + 1: ind_end]
ind_list = new_s.split(',')
try:
indices = [int(ind) for ind in ind_list if ind]
except ValueError:
raise ValueError("Unable to parse indices from %s." % new_s)
s = s[ind_end + 1:].strip()

val_start = s.find('[')
if val_start == -1:
raise ValueError("Values array should start with '['.")
val_end = s.find(']')
if val_end == -1:
raise ValueError("Values array should end with ']'.")
val_list = s[val_start + 1: val_end].split(',')
try:
values = [float(val) for val in val_list if val]
except ValueError:
raise ValueError("Unable to parse values from %s." % s)
return SparseVector(size, indices, values)

def dot(self, other):
"""
Dot product with a SparseVector or 1- or 2-dimensional Numpy array.
Expand Down Expand Up @@ -845,19 +773,6 @@ def dense(*elements):
elements = elements[0]
return DenseVector(elements)

@staticmethod
def stringify(vector):
"""
Converts a vector into a string, which can be recognized by
Vectors.parse().

>>> Vectors.stringify(Vectors.sparse(2, [1], [1.0]))
'(2,[1],[1.0])'
>>> Vectors.stringify(Vectors.dense([0.0, 1.0]))
'[0.0,1.0]'
"""
return str(vector)

@staticmethod
def squared_distance(v1, v2):
"""
Expand All @@ -880,23 +795,6 @@ def norm(vector, p):
"""
return _convert_to_vector(vector).norm(p)

@staticmethod
def parse(s):
"""Parse a string representation back into the Vector.

>>> Vectors.parse('[2,1,2 ]')
DenseVector([2.0, 1.0, 2.0])
>>> Vectors.parse(' ( 100, [0], [2])')
SparseVector(100, {0: 2.0})
"""
if s.find('(') == -1 and s.find('[') != -1:
return DenseVector.parse(s)
elif s.find('(') != -1:
return SparseVector.parse(s)
else:
raise ValueError(
"Cannot find tokens '[' or '(' from the input string.")

@staticmethod
def zeros(size):
return DenseVector(np.zeros(size))
Expand Down Expand Up @@ -1237,34 +1135,6 @@ def sparse(numRows, numCols, colPtrs, rowIndices, values):
return SparseMatrix(numRows, numCols, colPtrs, rowIndices, values)


class QRDecomposition(object):
"""
.. note:: Experimental

Represents QR factors.
"""
def __init__(self, Q, R):
self._Q = Q
self._R = R

@property
@since('2.0.0')
def Q(self):
"""
An orthogonal matrix Q in a QR decomposition.
May be null if not computed.
"""
return self._Q

@property
@since('2.0.0')
def R(self):
"""
An upper triangular matrix R in a QR decomposition.
"""
return self._R


def _test():
import doctest
(failure_count, test_count) = doctest.testmod(optionflags=doctest.ELLIPSIS)
Expand Down
16 changes: 0 additions & 16 deletions python/pyspark/ml/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -1348,22 +1348,6 @@ def test_dense_matrix_is_transposed(self):
self.assertTrue(array_equal(sm.colPtrs, [0, 2, 5]))
self.assertTrue(array_equal(sm.values, [1, 3, 4, 6, 9]))

def test_parse_vector(self):
a = DenseVector([])
self.assertEqual(str(a), '[]')
self.assertEqual(Vectors.parse(str(a)), a)
a = DenseVector([3, 4, 6, 7])
self.assertEqual(str(a), '[3.0,4.0,6.0,7.0]')
self.assertEqual(Vectors.parse(str(a)), a)
a = SparseVector(4, [], [])
self.assertEqual(str(a), '(4,[],[])')
self.assertEqual(SparseVector.parse(str(a)), a)
a = SparseVector(4, [0, 2], [3, 4])
self.assertEqual(str(a), '(4,[0,2],[3.0,4.0])')
self.assertEqual(Vectors.parse(str(a)), a)
a = SparseVector(10, [0, 1], [4, 5])
self.assertEqual(SparseVector.parse(' (10, [0,1 ],[ 4.0,5.0] )'), a)

def test_norms(self):
a = DenseVector([0, 2, 3, -1])
self.assertAlmostEqual(a.norm(2), 3.742, 3)
Expand Down