Skip to content
This repository
Browse code

cleanup python client and add JSON serialization support to it

  • Loading branch information...
commit 2100b2050184ef354ff156fcb3319d4392af192d 1 parent edc7f3a
Jonathan Traupman authored

Showing 22 changed files with 2,352 additions and 533 deletions. Show diff stats Hide diff stats

  1. +1 0  build.properties
  2. +10 10 build.xml
  3. +63 0 clients/python/README
  4. +5 0 clients/python/setup.cfg
  5. +15 0 clients/python/setup.py
  6. +0 59 clients/python/test.py
  7. +302 0 clients/python/tests/test_client.py
  8. +11 0 clients/python/tests/voldemort_config/config/cluster.xml
  9. +27 0 clients/python/tests/voldemort_config/config/server.properties
  10. +32 0 clients/python/tests/voldemort_config/config/stores.xml
  11. +0 399 clients/python/voldemort.py
  12. +1 0  clients/python/voldemort/__init__.py
  13. +478 0 clients/python/voldemort/client.py
  14. 0  clients/python/voldemort/protocol/__init__.py
  15. 0  clients/python/{ → voldemort/protocol}/slop_pb2.py
  16. +65 65 clients/python/{ → voldemort/protocol}/voldemort_admin_pb2.py
  17. 0  clients/python/{ → voldemort/protocol}/voldemort_client_pb2.py
  18. +8 0 clients/python/voldemort/serialization/__init__.py
  19. +3 0  clients/python/voldemort/serialization/common.py
  20. +1,252 0 clients/python/voldemort/serialization/json_serializer.py
  21. +69 0 clients/python/voldemort/serialization/ordered_dict.py
  22. +10 0 clients/python/voldemort/serialization/string_serializer.py
1  build.properties
@@ -2,6 +2,7 @@
2 2 src.dir=src
3 3 java.dir=src/java
4 4 python.dir=clients/python
  5 +python.proto.dir=clients/python/voldemort/protocol
5 6 protobuff.dir=src/proto
6 7 lib.dir=lib
7 8 classes.dir=dist/classes
20 build.xml
@@ -22,7 +22,7 @@
22 22 <isset property="env.BUILD_NUMBER" />
23 23 <not>
24 24 <equals arg1="" arg2="${env.BUILD_NUMBER}" trim="yes"/>
25   - </not>
  25 + </not>
26 26 </and>
27 27 </condition>
28 28
@@ -112,7 +112,7 @@
112 112 <fileset dir="${testclasses.dir}" />
113 113 </jar>
114 114 </target>
115   -
  115 +
116 116 <target name="protobuff" description="Generate source files from .proto files">
117 117 <pathconvert property="proto.sources" pathsep=" ">
118 118 <path id="proto-files">
@@ -120,9 +120,9 @@
120 120 </path>
121 121 </pathconvert>
122 122
123   - <property name="proto.path" location="${protobuff.dir}"/>
  123 + <property name="proto.path" location="${protobuff.dir}"/>
124 124 <property name="javaout.path" location="${java.dir}"/>
125   - <property name="pythonout.path" location="${python.dir}"/>
  125 + <property name="pythonout.path" location="${python.proto.dir}"/>
126 126 <exec executable="protoc" failonerror="true">
127 127 <arg value="--proto_path=${proto.path}"/>
128 128 <arg value="--java_out=${javaout.path}"/>
@@ -151,7 +151,7 @@
151 151 </fileset>
152 152 </jar>
153 153 </target>
154   -
  154 +
155 155 <target name="alljar" depends="build, contrib-build" description="Build a jar file that includes all contrib code.">
156 156 <jar destfile="${dist.dir}/${name}-${curr.release}-all.jar">
157 157 <fileset dir="${classes.dir}">
@@ -166,7 +166,7 @@
166 166 </fileset>
167 167 </jar>
168 168 </target>
169   -
  169 +
170 170 <target name="war" depends="build" description="Build server war file">
171 171 <war destfile="${dist.dir}/${name}.war" webxml="web.xml" basedir="${classes.dir}">
172 172 <classes dir="${classes.dir}"/>
@@ -312,7 +312,7 @@
312 312 <report todir="${contribtesthtml.dir}" format="frames" />
313 313 </junitreport>
314 314 </target>
315   -
  315 +
316 316 <macrodef name="create-release-artifacts">
317 317 <attribute name="version" />
318 318 <sequential>
@@ -340,12 +340,12 @@
340 340 <target name="snapshot" description="Create a release-snapshot zip file with everything pre-built.">
341 341 <create-release-artifacts version="${curr.release.snapshot}" />
342 342 </target>
343   -
  343 +
344 344 <target name="release" description="Create a release zip file with everything pre-built.">
345 345 <create-release-artifacts version="${curr.release}" />
346 346 </target>
347   -
348   - <target name="hadoop-benchmark-jar" depends="build, contrib-build"
  347 +
  348 + <target name="hadoop-benchmark-jar" depends="build, contrib-build"
349 349 description="Build a jar file that includes all contrib code plus the necessary jars for running the hadoop benchmark.">
350 350 <jar destfile="${dist.dir}/hadoop-benchmark.jar">
351 351 <fileset dir="${classes.dir}">
63 clients/python/README
... ... @@ -0,0 +1,63 @@
  1 +ABOUT
  2 +
  3 +This directory contains a pure Python implementation of a voldemort client.
  4 +It supports both raw (string) and JSON serialized stores. Only the protocol
  5 +buffer interface over TCP is supported for talking to the server. Only
  6 +server-side routing is supported.
  7 +
  8 +INSTALLING
  9 +
  10 +To install the module, you will need the following dependencies:
  11 +- nose >= 0.11
  12 +- simplejson >= 2.1.1
  13 +- Google protobuf > 2.3.0
  14 +
  15 +The setup process will automatically install nose and simplejson, since they
  16 +are well behaved Python packages. The protobuf module will need to be downloaded
  17 +from https://code.google.com/p/protobuf/downloads/list and installed manually.
  18 +
  19 +Once the dependencies, run the test suite to sanity check things. You need to
  20 +first start up a Voldemort server locally, pointing to the config files in
  21 +tests/voldemort_config. From the root voldemort of the voldemort source tree, run:
  22 +
  23 +> bin/voldemort-server clients/python/tests/voldemort_config
  24 +
  25 +In a separate shell, change into the clients/python directory and run:
  26 +
  27 +> python setup.py nosetests
  28 +
  29 +If all tests pass, you can install the package with the command:
  30 +
  31 +> python setup.py install
  32 +
  33 +This may need to be run as root if you don't have permissions to install to your
  34 +local python library.
  35 +
  36 +USING THE MODULE
  37 +
  38 +To use the client, simple import it into your program with the statement:
  39 +
  40 +import voldemort
  41 +
  42 +To create a client connection, instantiate a StoreClient object:
  43 +
  44 +client = voldemort.StoreClient('store_name', [('node1', 6666), ('node2', 6666)])
  45 +
  46 +The values of the store name and cluster nodes/ports will depend on your particular
  47 +Voldemort setup. The key and value serialization type will be determined
  48 +automatically during client initialization using the values in your cluster's
  49 +stores.xml file.
  50 +
  51 +The StoreClient object implements the get(), get_all(), put(), maybe_put(),
  52 +and delete() methods. For example:
  53 +
  54 +> v1 = client.put("foo", "hello")
  55 +> resp = client.get("foo")
  56 +> resp[0][0]
  57 +"hello"
  58 +
  59 +> client.delete("foo")
  60 +> client.get("foo")
  61 +[]
  62 +
  63 +The test suite contains many other usage examples.
5 clients/python/setup.cfg
... ... @@ -0,0 +1,5 @@
  1 +[nosetests]
  2 +detailed-errors=1
  3 +with-doctest=1
  4 +
  5 +
15 clients/python/setup.py
... ... @@ -0,0 +1,15 @@
  1 +from setuptools import setup
  2 +
  3 +setup(name='voldemort',
  4 + version='0.1',
  5 + description='Python Voldemort Client',
  6 + long_description=('Pure python client for accessing Voldemort key/value stores. ' +
  7 + 'Supports both raw and JSON stores. Only supports the tcp protocol ' +
  8 + 'buffer interface with server-side routing.'),
  9 + packages=['voldemort', 'voldemort.protocol', 'voldemort.serialization'],
  10 + author='LinkedIn Corporation',
  11 + license='Apache 2.0',
  12 + url='http://project-voldemort.com',
  13 + install_requires=['protobuf>=2.3.0', 'simplejson>=2.1.1'],
  14 + setup_requires=['nose>=0.11'],
  15 +)
59 clients/python/test.py
... ... @@ -1,59 +0,0 @@
1   -import logging
2   -import time
3   -from voldemort import StoreClient
4   -
5   -if __name__ == '__main__':
6   -
7   - logging.basicConfig(level=logging.INFO,)
8   -
9   - ## some random tests
10   - s = StoreClient('test', [('localhost', 6666)])
11   - version = s.put("hello", "1")
12   - assert s.get("hello")[0][0] == "1"
13   - s.put("hello", "2", version)
14   - assert s.get("hello")[0][0] == "2"
15   - s.put("hello", "3")
16   - assert s.get("hello")[0][0] == "3"
17   - s.delete("hello")
18   - assert len(s.get("hello")) == 0
19   -
20   - ## test get_all
21   - pairs = [("a1", "1"), ("a2", "2"), ("a3", "3"), ("a4", "4")]
22   - for k, v in pairs:
23   - s.put(k, v)
24   -
25   - vals = s.get_all([k for k, v in pairs])
26   - for k, v in pairs:
27   - assert vals[k][0][0] == v
28   -
29   - requests = 10000
30   -
31   - ## Time get requests
32   - s.put("hello", "world")
33   - start = time.time()
34   - for i in xrange(requests):
35   - s.get('hello')
36   - print requests/(time.time() - start), ' get requests per second'
37   -
38   - ## Time put requests
39   - version = s.put('abc', 'def')
40   - start = time.time()
41   - for i in xrange(requests):
42   - version = s.put('abc', 'def', version)
43   - print requests/(time.time() - start), ' put requests per second'
44   -
45   - ## Time get_all requests
46   - keys = [k for k,v in pairs]
47   - start = time.time()
48   - for i in xrange(requests):
49   - vals = s.get_all(keys)
50   - print requests/(time.time() - start), ' get_all requests per second'
51   -
52   - ## Time delete requests
53   - version = None
54   - for i in xrange(requests):
55   - version = s.put(str(i), str(i))
56   - start = time.time()
57   - for i in xrange(requests):
58   - vals = s.delete(str(i), version)
59   - print requests/(time.time() - start), ' delete requests per second'
302 clients/python/tests/test_client.py
... ... @@ -0,0 +1,302 @@
  1 +# Tests of the client code.
  2 +#
  3 +# To run these tests, you must have a local Voldemort server running using the configuration files
  4 +# in tests/voldemort_config.
  5 +
  6 +import unittest
  7 +import datetime
  8 +
  9 +from voldemort import StoreClient, VoldemortException
  10 +
  11 +def _vector_clock_equal(clock1, clock2):
  12 + """
  13 + Compares two vector clocks, ignoring the timestamp field, which may be skewed.
  14 + """
  15 +
  16 + clock1_entries = dict((entry.node_id, entry.version) for entry in clock1.entries)
  17 + clock2_entries = dict((entry.node_id, entry.version) for entry in clock2.entries)
  18 + return clock1_entries == clock2_entries
  19 +
  20 +class VoldemortClientTest(unittest.TestCase):
  21 + def _reinit_raw_client(self):
  22 + s = StoreClient('test', [('localhost', 6666)])
  23 + for k in ['a', 'b', 'c']:
  24 + s.delete(k)
  25 + return s
  26 +
  27 + def _reinit_json_client(self):
  28 + s = StoreClient('json_test', [('localhost', 6666)])
  29 + for k in [1, 2, 3]:
  30 + s.delete(k)
  31 + return s
  32 +
  33 + def test_raw_get(self):
  34 + """
  35 + Tests basic puts/gets in raw (non-serialized) mode.
  36 + """
  37 +
  38 + s = self._reinit_raw_client()
  39 +
  40 + s.put('a', '1')
  41 + resp = s.get('a')
  42 + self.assertEquals(len(resp), 1)
  43 + self.assertEquals(len(resp[0]), 2)
  44 + self.assertEquals(resp[0][0], '1')
  45 +
  46 + s.put('b', '2')
  47 + resp = s.get('b')
  48 + self.assertEquals(len(resp), 1)
  49 + self.assertEquals(len(resp[0]), 2)
  50 + self.assertEquals(resp[0][0], '2')
  51 +
  52 +
  53 + def test_raw_get_all(self):
  54 + """
  55 + Tests the get_all() method in raw mode.
  56 + """
  57 +
  58 + s = self._reinit_raw_client()
  59 +
  60 + pairs = [('a', '1'), ('b', '2'), ('c', '3')]
  61 + for k, v in pairs:
  62 + s.put(k, v)
  63 +
  64 + resp = s.get_all([k for k, v in pairs])
  65 + self.assertEquals(len(resp), len(pairs))
  66 +
  67 + for k, v in pairs:
  68 + self.assertTrue(k in resp)
  69 + self.assertEquals(len(resp[k]), 1)
  70 + self.assertEquals(len(resp[k][0]), 2)
  71 +
  72 + self.assertEquals(resp[k][0][0], v)
  73 +
  74 +
  75 + def test_raw_versions(self):
  76 + """
  77 + Tests the put_maybe() method in raw mode.
  78 + """
  79 +
  80 + s = self._reinit_raw_client()
  81 +
  82 + v1 = s.put('a', '1')
  83 + self.assertTrue(v1 is not None)
  84 +
  85 + v2 = s.put('a', '2')
  86 + self.assertTrue(v2 is not None)
  87 + self.assertFalse(_vector_clock_equal(v2, v1))
  88 +
  89 + v3 = s.put('a', '3')
  90 + self.assertTrue(v3 is not None)
  91 + self.assertFalse(_vector_clock_equal(v3, v1))
  92 + self.assertFalse(_vector_clock_equal(v3, v2))
  93 +
  94 + resp = s.get('a')
  95 + self.assertEquals(resp[0][0], '3')
  96 +
  97 + # put() should fail because v2 is not the current version
  98 + self.assertRaises(VoldemortException, s.put, 'a', '4', version=v2)
  99 +
  100 + # maybe_put() won't raise an exception, but will return None
  101 + v4 = s.maybe_put('a', '4', version=v2)
  102 + self.assertTrue(v4 is None)
  103 +
  104 + # this put() should succeed
  105 + v4 = s.put('a', '4', version=v3)
  106 + self.assertTrue(v4 is not None)
  107 + self.assertFalse(_vector_clock_equal(v4, v1))
  108 + self.assertFalse(_vector_clock_equal(v4, v2))
  109 + self.assertFalse(_vector_clock_equal(v4, v2))
  110 +
  111 + # and this maybe_put() should not return None
  112 + v5 = s.maybe_put('a', '5', version=v4)
  113 + self.assertTrue(v5 is not None)
  114 + self.assertFalse(_vector_clock_equal(v5, v1))
  115 + self.assertFalse(_vector_clock_equal(v5, v2))
  116 + self.assertFalse(_vector_clock_equal(v5, v3))
  117 + self.assertFalse(_vector_clock_equal(v5, v4))
  118 +
  119 + # the value at the latest version should be "5"
  120 + resp = s.get('a')
  121 + self.assertEquals(resp[0][0], '5')
  122 + self.assertTrue(_vector_clock_equal(resp[0][1], v5))
  123 +
  124 + # deleting old versions should have no effect
  125 + s.delete('a', version=v3)
  126 + resp = s.get('a')
  127 + self.assertEquals(len(resp), 1)
  128 + self.assertEquals(resp[0][0], '5')
  129 + self.assertTrue(_vector_clock_equal(resp[0][1], v5))
  130 +
  131 + # deleting the current version should erase the entry
  132 + s.delete('a', version=v5)
  133 + resp = s.get('a')
  134 + self.assertEquals(resp, [])
  135 +
  136 + val1 = {
  137 + 'a': 0.25,
  138 + 'b': [1,2,3],
  139 + 'c': u'foo',
  140 + 'd': { 'foo': True,
  141 + 'bar': datetime.datetime(2010, 11, 24, 20, 8, 7, 155000)
  142 + }
  143 + }
  144 +
  145 + val2 = {
  146 + 'a': 4.0,
  147 + 'b': [5,6],
  148 + 'c': u'bar',
  149 + 'd': { 'foo': None,
  150 + 'bar': datetime.datetime(2003, 5, 5, 1, 23, 45, 678000)
  151 + }
  152 + }
  153 +
  154 + val3 = {
  155 + 'a': 8.0,
  156 + 'b': [],
  157 + 'c': u'',
  158 + 'd': None
  159 + }
  160 +
  161 + val4 = {
  162 + 'a': 4.0,
  163 + 'b': [5,6],
  164 + 'c': 'bar',
  165 + 'd': { 'foo': True,
  166 + 'bar': datetime.datetime(2003, 5, 5, 1, 23, 45, 678123)
  167 + }
  168 + }
  169 +
  170 + def test_json_get(self):
  171 + """
  172 + Tests the JSON serialization with put()/get()
  173 + """
  174 +
  175 + s = self._reinit_json_client()
  176 +
  177 + s.put(1, self.val1)
  178 + resp = s.get(1)
  179 + self.assertEquals(len(resp), 1)
  180 + self.assertEquals(len(resp[0]), 2)
  181 + self.assertEquals(resp[0][0], self.val1)
  182 +
  183 + s.put(2, self.val2)
  184 + resp = s.get(2)
  185 + self.assertEquals(len(resp), 1)
  186 + self.assertEquals(len(resp[0]), 2)
  187 + self.assertEquals(resp[0][0], self.val2)
  188 +
  189 + def test_json_get_all(self):
  190 + """
  191 + Tests JSON serialized get_all()
  192 + """
  193 + s = self._reinit_json_client()
  194 +
  195 + pairs = [(1, self.val1), (2, self.val2), (3, self.val3)]
  196 + for k, v in pairs:
  197 + s.put(k, v)
  198 +
  199 + resp = s.get_all([k for k, v in pairs])
  200 + self.assertEquals(len(resp), len(pairs))
  201 +
  202 + for k, v in pairs:
  203 + self.assertTrue(k in resp)
  204 + self.assertEquals(len(resp[k]), 1)
  205 + self.assertEquals(len(resp[k][0]), 2)
  206 +
  207 + self.assertEquals(resp[k][0][0], v)
  208 +
  209 + def test_json_mismatches(self):
  210 + """
  211 + Sometimes the result we get out of Voldemort is a little different than what
  212 + went in, but it's not always a problem.
  213 + """
  214 +
  215 + s = self._reinit_json_client()
  216 +
  217 + s.put(1, self.val4)
  218 + resp = s.get(1)
  219 + self.assertEquals(len(resp), 1)
  220 + self.assertEquals(len(resp[0]), 2)
  221 +
  222 + output = resp[0][0]
  223 + # the input and output won't be the same
  224 + self.assertNotEquals(output, self.val4)
  225 +
  226 + # the float should have survived the trip
  227 + self.assertEquals(output['a'], self.val4['a'])
  228 +
  229 + # as should the list
  230 + self.assertEquals(output['b'], self.val4['b'])
  231 +
  232 + # the string is now a unicode, but it should still compare equal to the original
  233 + self.assertTrue(isinstance(output['c'], unicode))
  234 + self.assertEquals(output['c'], self.val4['c'])
  235 +
  236 + # the boolean should be the same
  237 + self.assertEquals(output['d']['foo'], self.val4['d']['foo'])
  238 +
  239 + # but the date gets truncated:
  240 + self.assertNotEquals(output['d']['bar'], self.val4['d']['bar'])
  241 +
  242 + # the difference should be small
  243 + td = self.val4['d']['bar'] - output['d']['bar']
  244 + self.assertEquals(td.days, 0)
  245 + self.assertEquals(td.seconds, 0)
  246 + self.assertEquals(td.microseconds, 123)
  247 +
  248 + def test_raw_versions(self):
  249 + """
  250 + Tests versioning in JSON mode.
  251 + """
  252 +
  253 + s = self._reinit_json_client()
  254 +
  255 + v1 = s.put(1, self.val4)
  256 + self.assertTrue(v1 is not None)
  257 +
  258 + v2 = s.put(1, self.val3)
  259 + self.assertTrue(v2 is not None)
  260 + self.assertFalse(_vector_clock_equal(v2, v1))
  261 +
  262 + resp = s.get(1)
  263 + self.assertEquals(resp[0][0], self.val3)
  264 + self.assertTrue(_vector_clock_equal(resp[0][1], v2))
  265 +
  266 + # put() should fail because v1 is not the current version
  267 + self.assertRaises(VoldemortException, s.put, 1, self.val2, version=v1)
  268 +
  269 + # maybe_put() won't raise an exception, but will return None
  270 + v3 = s.maybe_put(1, self.val2, version=v1)
  271 + self.assertTrue(v3 is None)
  272 +
  273 + # this put() should succeed
  274 + v3 = s.put(1, self.val2, version=v2)
  275 + self.assertTrue(v3 is not None)
  276 + self.assertFalse(_vector_clock_equal(v3, v1))
  277 + self.assertFalse(_vector_clock_equal(v3, v2))
  278 +
  279 + # and this maybe_put() should not return None
  280 + v4 = s.maybe_put(1, self.val1, version=v3)
  281 + self.assertTrue(v4 is not None)
  282 + self.assertFalse(_vector_clock_equal(v4, v1))
  283 + self.assertFalse(_vector_clock_equal(v4, v2))
  284 + self.assertFalse(_vector_clock_equal(v4, v3))
  285 +
  286 + # the value at the latest version should be val1 at version v4
  287 + resp = s.get(1)
  288 + self.assertEquals(resp[0][0], self.val1)
  289 + self.assertTrue(_vector_clock_equal(resp[0][1], v4))
  290 +
  291 + # deleting old versions should have no effect
  292 + s.delete(1, version=v2)
  293 + resp = s.get(1)
  294 + self.assertEquals(len(resp), 1)
  295 + self.assertEquals(resp[0][0], self.val1)
  296 + self.assertTrue(_vector_clock_equal(resp[0][1], v4))
  297 +
  298 + # deleting the current version should erase the entry
  299 + s.delete(1, version=v4)
  300 + resp = s.get(1)
  301 + self.assertEquals(resp, [])
  302 +
11 clients/python/tests/voldemort_config/config/cluster.xml
... ... @@ -0,0 +1,11 @@
  1 +<cluster>
  2 + <name>mycluster</name>
  3 + <server>
  4 + <id>0</id>
  5 + <host>localhost</host>
  6 + <http-port>8081</http-port>
  7 + <socket-port>6666</socket-port>
  8 + <partitions>0, 1</partitions>
  9 + </server>
  10 +</cluster>
  11 +
27 clients/python/tests/voldemort_config/config/server.properties
... ... @@ -0,0 +1,27 @@
  1 +# The ID of *this* particular cluster node
  2 +node.id=0
  3 +
  4 +max.threads=100
  5 +
  6 +############### DB options ######################
  7 +
  8 +http.enable=true
  9 +socket.enable=true
  10 +
  11 +# BDB
  12 +bdb.write.transactions=false
  13 +bdb.flush.transactions=false
  14 +bdb.cache.size=1G
  15 +
  16 +# Mysql
  17 +mysql.host=localhost
  18 +mysql.port=1521
  19 +mysql.user=root
  20 +mysql.password=3306
  21 +mysql.database=test
  22 +
  23 +#NIO connector settings.
  24 +enable.nio.connector=false
  25 +
  26 +request.format=vp3
  27 +storage.configs=voldemort.store.bdb.BdbStorageConfiguration, voldemort.store.readonly.ReadOnlyStorageConfiguration
32 clients/python/tests/voldemort_config/config/stores.xml
... ... @@ -0,0 +1,32 @@
  1 +<stores>
  2 + <store>
  3 + <name>test</name>
  4 + <persistence>bdb</persistence>
  5 + <routing>client</routing>
  6 + <replication-factor>1</replication-factor>
  7 + <required-reads>1</required-reads>
  8 + <required-writes>1</required-writes>
  9 + <key-serializer>
  10 + <type>string</type>
  11 + </key-serializer>
  12 + <value-serializer>
  13 + <type>string</type>
  14 + </value-serializer>
  15 + </store>
  16 + <store>
  17 + <name>json_test</name>
  18 + <persistence>bdb</persistence>
  19 + <routing>client</routing>
  20 + <replication-factor>1</replication-factor>
  21 + <required-reads>1</required-reads>
  22 + <required-writes>1</required-writes>
  23 + <key-serializer>
  24 + <type>json</type>
  25 + <schema-info version="0">"int32"</schema-info>
  26 + </key-serializer>
  27 + <value-serializer>
  28 + <type>json</type>
  29 + <schema-info version="0">{ "a":"float32", "b":["int16"], "c":"string", "d":{ "foo":"boolean", "bar":"date" }}</schema-info>
  30 + </value-serializer>
  31 + </store>
  32 +</stores>
399 clients/python/voldemort.py
... ... @@ -1,399 +0,0 @@
1   -import socket
2   -import struct
3   -import time
4   -import sys
5   -import re
6   -import random
7   -import logging
8   -import sets
9   -
10   -import voldemort_client_pb2 as protocol
11   -from xml.dom import minidom
12   -from datetime import datetime
13   -
14   -##################################################################
15   -# A Voldemort client. Each client uses a single connection to one
16   -# Voldemort server. All routing is done server-side.
17   -##################################################################
18   -
19   -
20   -## Extract all the child text of the given element
21   -def _extract_text(elm):
22   - if elm.nodeType == minidom.Node.TEXT_NODE:
23   - return elm.data
24   - elif elm.nodeType == minidom.Node.ELEMENT_NODE:
25   - text = ""
26   - for child in elm.childNodes:
27   - text += _extract_text(child)
28   - return text
29   -
30   -
31   -## Get a single child from the element, if there are multiple children, explode.
32   -def _child(elmt, name):
33   - children = elmt.getElementsByTagName(name)
34   - if len(children) != 1:
35   - raise Exception, "No child '" + str(name) + "' for element " + str(elmt.nodeName)
36   - return children[0]
37   -
38   -
39   -## Get the child text of a single element
40   -def _child_text(elmt, name):
41   - return _extract_text(_child(elmt, name))
42   -
43   -
44   -
45   -##################################################################
46   -# A node class representing a single voldemort server in the
47   -# cluster. The cluster itself is just a list of nodes
48   -##################################################################
49   -class Node:
50   - """A Voldemort node with the appropriate host and port information for contacting that node"""
51   -
52   - def __init__(self, id, host, socket_port, http_port, partitions, is_available = True, last_contact = None):
53   - self.id = id
54   - self.host = host
55   - self.socket_port = socket_port
56   - self.http_port = http_port
57   - self.partitions = partitions
58   - self.is_available = True
59   - if not last_contact:
60   - self.last_contact = time.clock()
61   -
62   - def __repr__(self):
63   - return 'node(id = ' + str(self.id) + ', host = ' + self.host + ', socket_port = ' + str(self.socket_port) + \
64   - ', http_port = ' + str(self.http_port) + ', partitions = ' + ', '.join(map(str, self.partitions)) + ')'
65   -
66   - @staticmethod
67   - def parse_cluster(xml):
68   - """Parse the cluster.xml file and return a dictionary of the nodes in the cluster indexed by node id """
69   - doc = minidom.parseString(xml)
70   - nodes = {}
71   - for curr in doc.getElementsByTagName('server'):
72   - id = int(_child_text(curr, 'id'))
73   - host = _child_text(curr, 'host')
74   - http_port = int(_child_text(curr, 'http-port'))
75   - socket_port = int(_child_text(curr, 'socket-port'))
76   - partition_str = _child_text(curr, 'partitions')
77   - partitions = [int(p) for p in re.split('[\s,]+', partition_str)]
78   - nodes[id] = Node(id = id, host = host, socket_port = socket_port, http_port = http_port, partitions = partitions)
79   - return nodes
80   -
81   -
82   -
83   -class VoldemortException(Exception):
84   - def __init__(self, message, code = 1):
85   - self.code = code
86   - self.message = message
87   -
88   - def __str__(self):
89   - return repr(self.message)
90   -
91   -
92   -
93   -class StoreClient:
94   - """A simple Voldemort client. It is single-threaded and supports only server-side routing."""
95   -
96   - def __init__(self, store_name, bootstrap_urls, reconnect_interval = 500, conflict_resolver = None):
97   - self.store_name = store_name
98   - self.request_count = 0
99   - self.conflict_resolver = conflict_resolver
100   - self.nodes = self._bootstrap_metadata(bootstrap_urls)
101   - self.node_id = random.randint(0, len(self.nodes) - 1)
102   - self.node_id, self.connection = self._reconnect()
103   - self.reconnect_interval = reconnect_interval
104   - self.open = True
105   -
106   - def _make_connection(self, host, port):
107   - protocol = 'pb0'
108   - logging.debug('Attempting to connect to ' + host + ':' + str(port))
109   - connection = socket.socket()
110   - connection.connect((host, port))
111   - logging.debug('Connection succeeded, negotiating protocol')
112   - connection.send(protocol)
113   - resp = connection.recv(2)
114   - if resp != 'ok':
115   - raise VoldemortException('Server does not understand the protocol ' + protocol)
116   - logging.debug('Protocol negotiation suceeded')
117   - return connection
118   -
119   -
120   - ## Connect to a the next available node in the cluster
121   - ## returns a tuple of (node_id, connection)
122   - def _reconnect(self):
123   - num_nodes = len(self.nodes)
124   - attempts = 0
125   - new_node_id = self.node_id
126   - while attempts < num_nodes:
127   - new_node_id = (new_node_id + 1) % num_nodes
128   - new_node = self.nodes[new_node_id]
129   - connection = None
130   - try:
131   - connection = self._make_connection(new_node.host, new_node.socket_port)
132   - self.request_count = 0
133   - return new_node_id, connection
134   - except socket.error, (err_num, message):
135   - self._close_socket(connection)
136   - logging.warn('Error connecting to node ' + str(new_node_id) + ': ' + message)
137   - attempts += 1
138   -
139   - # If we get here all nodes have failed us, explode
140   - raise VoldemortException('Connections to all nodes failed.')
141   -
142   -
143   - ## Safely close the socket, catching and logging any exceptions
144   - def _close_socket(self, socket):
145   - try:
146   - if socket:
147   - socket.close()
148   - except socket.error, exp:
149   - logging.error('Error while closing socket: ' + str(exp))
150   -
151   -
152   - ## Check if the the number of requests made on this connection is greater than the reconnect interval.
153   - ## If so reconnect to a random node in the cluster. No attempt is made at preventing the reconnecting
154   - ## from going back to the same node
155   - def _maybe_reconnect(self):
156   - if self.request_count >= self.reconnect_interval:
157   - logging.debug('Completed ' + str(self.request_count) + ' requests using this connection, reconnecting...')
158   - self._close_socket(self.connection)
159   - self.node_id, self.connection = self._reconnect()
160   -
161   -
162   - ## send a request to the server using the given connection
163   - def _send_request(self, connection, req_bytes):
164   - connection.send(struct.pack('>i', len(req_bytes)) + req_bytes)
165   - self.request_count += 1
166   -
167   -
168   - ## read a response from the connection
169   - def _receive_response(self, connection):
170   - size_bytes = connection.recv(4)
171   - size = struct.unpack('>i', size_bytes)
172   -
173   - if not size[0]:
174   - return ''
175   -
176   - return connection.recv(size[0])
177   -
178   - ## Bootstrap cluster metadata from a list of urls of nodes in the cluster.
179   - ## The urls are tuples in the form (host, port).
180   - ## A dictionary of node_id => node is returned.
181   - def _bootstrap_metadata(self, bootstrap_urls):
182   - random.shuffle(bootstrap_urls)
183   - for host, port in bootstrap_urls:
184   - logging.debug('Attempting to bootstrap metadata from ' + host + ':' + str(port))
185   - connection = None
186   - try:
187   - connection = self._make_connection(host, port)
188   - cluster_xmls = self._get_with_connection(connection, 'metadata', 'cluster.xml', should_route = False)
189   - if len(cluster_xmls) != 1:
190   - raise VoldemortException('Expected exactly one version of the metadata but found ' + str(cluster_xmls))
191   - nodes = Node.parse_cluster(cluster_xmls[0][0])
192   - logging.debug('Bootstrap from ' + host + ':' + str(port) + ' succeeded, found ' + str(len(nodes)) + " nodes.")
193   - return nodes
194   - except socket.error, (err_num, message):
195   - logging.warn('Metadata bootstrap from ' + host + ':' + str(port) + " failed: " + message)
196   - finally:
197   - self._close_socket(connection)
198   - raise VoldemortException('All bootstrap attempts failed')
199   -
200   -
201   - ## check if the server response has an error, if so throw an exception
202   - def _check_error(self, resp):
203   - if resp._has_error:
204   - raise VoldemortException(resp.error.error_message, resp.error.error_code)
205   -
206   -
207   - ## Increment the version for a vector clock
208   - def _increment(self, clock):
209   - # See if we already have a version for this guy, if so increment it
210   - for entry in clock.entries:
211   - if entry.node_id == self.node_id:
212   - entry.version += 1
213   - return
214   - # Otherwise add a version
215   - entry = clock.entries.add()
216   - entry.node_id = self.node_id
217   - entry.version = 1
218   - clock.timestamp = int(time.time() * 1000)
219   -
220   -
221   - ## Take a list of versions, and, if a conflict resolver has been given, resolve any conflicts that can be resolved
222   - def _resolve_conflicts(self, versions):
223   - if self.conflict_resolver and versions:
224   - return self.conflict_resolver(versions)
225   - else:
226   - return versions
227   -
228   -
229   - ## Turn a protocol buffer list of versioned items into a python list of items
230   - def _extract_versions(self, pb_versioneds):
231   - versions = []
232   - for versioned in pb_versioneds:
233   - versions.append((versioned.value, versioned.version))
234   - return self._resolve_conflicts(versions)
235   -
236   - ## A basic request wrapper, that handles reconnection logic and failures
237   - def _execute_request(self, fun, args):
238   - assert self.open, 'Store has been closed.'
239   - self._maybe_reconnect()
240   -
241   - failures = 0
242   - num_nodes = len(self.nodes)
243   - while failures < num_nodes:
244   - try:
245   - return apply(fun, args)
246   - except socket.error, (err_num, message):
247   - logging.warn('Error while performing ' + fun.__name__ + ' on node ' + str(self.node_id) + ': ' + message)
248   - self._reconnect()
249   - failures += 1
250   - raise VoldemortException('All nodes are down, ' + fun.__name__ + ' failed.')
251   -
252   -
253   - ## An internal get function that take the connection and store name as parameters. This is
254   - ## used by both the public get() method and also the metadata bootstrap process
255   - def _get_with_connection(self, connection, store_name, key, should_route):
256   - """Execute get request to the given store. Returns a (value, version) pair."""
257   -
258   - req = protocol.VoldemortRequest()
259   - req.should_route = should_route
260   - req.store = store_name
261   - req.type = protocol.GET
262   - req.get.key = key
263   -
264   - # send request
265   - self._send_request(connection, req.SerializeToString())
266   -
267   - # read and parse response
268   - resp_str = self._receive_response(connection)
269   - resp = protocol.GetResponse()
270   - resp.ParseFromString(resp_str)
271   - self._check_error(resp)
272   -
273   - return self._extract_versions(resp.versioned)
274   -
275   -
276   - ## Inner helper function for get
277   - def _get(self, key):
278   - return self._get_with_connection(self.connection, self.store_name, key, True)
279   -
280   -
281   - def get(self, key):
282   - """Execute a get request. Returns a list of (value, version) pairs."""
283   - return self._execute_request(self._get, [key])
284   -
285   -
286   - ## Inner get_all method that takes the connection and store_name as parameters
287   - def _get_all(self, keys):
288   - req = protocol.VoldemortRequest()
289   - req.should_route = True
290   - req.store = self.store_name
291   - req.type = protocol.GET_ALL
292   - for key in keys:
293   - req.getAll.keys.append(key)
294   -
295   - # send request
296   - self._send_request(self.connection, req.SerializeToString())
297   -
298   - # read and parse response
299   - resp_str = self._receive_response(self.connection)
300   - resp = protocol.GetAllResponse()
301   - resp.ParseFromString(resp_str)
302   - self._check_error(resp)
303   - values = {}
304   - for key_val in resp.values:
305   - values[key_val.key] = self._extract_versions(key_val.versions)
306   - return values
307   -
308   -
309   - def get_all(self, keys):
310   - """Execute get request for multiple keys given as a list or tuple.
311   - Returns a dictionary of key => [(value, version), ...] pairs."""
312   - return self._execute_request(self._get_all, [keys])
313   -
314   -
315   - ## Get the current version of the given key by doing a get request to the store
316   - def _fetch_version(self, key):
317   - versioned = self.get(key)
318   - if versioned:
319   - version = versioned[0][1]
320   - else:
321   - version = protocol.VectorClock()
322   - version.timestamp = int(time.time() * 1000)
323   - return version
324   -
325   -
326   - def _put(self, key, value, version):
327   - req = protocol.VoldemortRequest()
328   - req.should_route = True
329   - req.store = self.store_name
330   - req.type = protocol.PUT
331   - req.put.key = key
332   - req.put.versioned.value = value
333   - req.put.versioned.version.MergeFrom(version)
334   -
335   - # send request
336   - self._send_request(self.connection, req.SerializeToString())
337   -
338   - # read and parse response
339   - resp_str = self._receive_response(self.connection)
340   - resp = protocol.PutResponse()
341   - resp.ParseFromString(resp_str)
342   - self._check_error(resp)
343   - self._increment(version)
344   - return version
345   -
346   - def put (self, key, value, version = None):
347   - """Execute a put request using the given key and value. If no version is specified a get(key) request
348   - will be done to get the current version. The updated version is returned."""
349   - # if we don't have a version, fetch one
350   - if not version:
351   - version = self._fetch_version(key)
352   - return self._execute_request(self._put, [key, value, version])
353   -
354   -
355   - def maybe_put(self, key, value, version = None):
356   - """Execute a put request using the given key and value. If the version being put is obsolete,
357   - no modification will be made and this function will return None. Otherwise it will return the new version."""
358   - try:
359   - return self.put(key, value, version)
360   - except:
361   - return None
362   -
363   - def _delete(self, key, version):
364   - req = protocol.VoldemortRequest()
365   - req.should_route = True
366   - req.store = self.store_name
367   - req.type = protocol.DELETE
368   - req.delete.key = key
369   - req.delete.version.MergeFrom(version)
370   -
371   - # send request
372   - self._send_request(self.connection, req.SerializeToString())
373   -
374   - # read and parse response
375   - resp_str = self._receive_response(self.connection)
376   - resp = protocol.DeleteResponse()
377   - resp.ParseFromString(resp_str)
378   - self._check_error(resp)
379   -
380   - return resp.success
381   -
382   -
383   - def delete(self, key, version = None):
384   - """Execute a delete request, deleting all keys up to and including the given version.
385   - If no version is given a get(key) request will be done to find the latest version."""
386   - # if we don't have a version, fetch one
387   - if version == None:
388   - version = self._fetch_version(key)
389   - return self._execute_request(self._delete, [key, version])
390   -
391   -
392   - def close(self):
393   - """Close the connection this store maintains."""
394   - self.open = False
395   - self.connection.close()
396   -
397   -
398   -
399   -
1  clients/python/voldemort/__init__.py
... ... @@ -0,0 +1 @@
  1 +from client import StoreClient, VoldemortException
478 clients/python/voldemort/client.py
... ... @@ -0,0 +1,478 @@
  1 +import socket
  2 +import struct
  3 +import time
  4 +import sys
  5 +import re
  6 +import random
  7 +import logging
  8 +
  9 +import protocol.voldemort_client_pb2 as protocol
  10 +from xml.dom import minidom
  11 +from datetime import datetime
  12 +
  13 +import serialization
  14 +
  15 +##################################################################
  16 +# A Voldemort client. Each client uses a single connection to one
  17 +# Voldemort server. All routing is done server-side.
  18 +##################################################################
  19 +
  20 +
  21 +## Extract all the child text of the given element
  22 +def _extract_text(elm):
  23 + if elm.nodeType == minidom.Node.TEXT_NODE:
  24 + return elm.data
  25 + elif elm.nodeType == minidom.Node.ELEMENT_NODE:
  26 + text = ""
  27 + for child in elm.childNodes:
  28 + text += _extract_text(child)
  29 + return text
  30 +
  31 +
  32 +## Get a single child from the element, if there are multiple children, explode.
  33 +def _child(elmt, name, required=True):
  34 + children = elmt.getElementsByTagName(name)
  35 + if not children:
  36 + if required:
  37 + raise VoldemortException("No child '%s' for element '%s'." % (name, elmt.nodeName))
  38 + else:
  39 + return None
  40 +
  41 + if len(children) > 1:
  42 + raise VoldemortException("Multiple children '%s' for element '%s'." % (name, elmt.nodeName))
  43 + return children[0]
  44 +
  45 +
  46 +## Get the child text of a single element
  47 +def _child_text(elmt, name, required=True, default=None):
  48 + if default:
  49 + required = False
  50 +
  51 + child = _child(elmt, name, required=required)
  52 + if not child:
  53 + return default
  54 +
  55 + return _extract_text(child)
  56 +
  57 +
  58 +def _int_or_none(s):
  59 + if s is None:
  60 + return s
  61 + return int(s)
  62 +
  63 +
  64 +##################################################################
  65 +# A node class representing a single voldemort server in the
  66 +# cluster. The cluster itself is just a list of nodes
  67 +##################################################################
  68 +class Node:
  69 + """A Voldemort node with the appropriate host and port information for contacting that node"""
  70 +
  71 + def __init__(self, id, host, socket_port, http_port, partitions, is_available = True, last_contact = None):
  72 + self.id = id
  73 + self.host = host
  74 + self.socket_port = socket_port
  75 + self.http_port = http_port
  76 + self.partitions = partitions
  77 + self.is_available = True
  78 + if not last_contact:
  79 + self.last_contact = time.clock()
  80 +
  81 + def __repr__(self):
  82 + return 'node(id = ' + str(self.id) + ', host = ' + self.host + ', socket_port = ' + str(self.socket_port) + \
  83 + ', http_port = ' + str(self.http_port) + ', partitions = ' + ', '.join(map(str, self.partitions)) + ')'
  84 +
  85 + @staticmethod
  86 + def parse_cluster(xml):
  87 + """Parse the cluster.xml file and return a dictionary of the nodes in the cluster indexed by node id """
  88 + doc = minidom.parseString(xml)
  89 + nodes = {}
  90 + for curr in doc.getElementsByTagName('server'):
  91 + id = int(_child_text(curr, 'id'))
  92 + host = _child_text(curr, 'host')
  93 + http_port = int(_child_text(curr, 'http-port'))
  94 + socket_port = int(_child_text(curr, 'socket-port'))
  95 + partition_str = _child_text(curr, 'partitions')
  96 + partitions = [int(p) for p in re.split('[\s,]+', partition_str)]
  97 + nodes[id] = Node(id = id, host = host, socket_port = socket_port, http_port = http_port, partitions = partitions)
  98 + return nodes
  99 +
  100 +
  101 +class Store:
  102 + def __init__(self, store_node):
  103 + self.name = _child_text(store_node, "name")
  104 + self.persistence = _child_text(store_node, "persistence")
  105 + self.routing = _child_text(store_node, "routing")
  106 + self.routing_strategy = _child_text(store_node, "routing-strategy", default="consistent-routing")
  107 + self.replication_factor = int(_child_text(store_node, "replication-factor"))
  108 + self.required_reads = int(_child_text(store_node, "required-reads"))
  109 + self.preferred_reads = _int_or_none(_child_text(store_node, "preferred-reads", required=False))
  110 + self.required_writes = int(_child_text(store_node, "required-writes"))
  111 + self.preferred_writes = _int_or_none(_child_text(store_node, "preferred-writes", required=False))
  112 +
  113 + key_serializer_node = _child(store_node, "key-serializer")
  114 + self.key_serializer_type = _child_text(key_serializer_node, "type")
  115 + self.key_serializer = self._create_serializer(self.key_serializer_type, key_serializer_node)
  116 +
  117 + value_serializer_node = _child(store_node, "value-serializer")
  118 + self.value_serializer_type = _child_text(value_serializer_node, "type")
  119 + self.value_serializer = self._create_serializer(self.value_serializer_type, value_serializer_node)
  120 +
  121 + def _create_serializer(self, serializer_type, serializer_node):
  122 + if serializer_type not in serialization.SERIALIZER_CLASSES:
  123 + raise VoldemortException("Unknown serializer type: %s" % serializer_type)
  124 +
  125 + return serialization.SERIALIZER_CLASSES[serializer_type].create_from_xml(serializer_node)
  126 +
  127 + @staticmethod
  128 + def parse_stores_xml(xml):
  129 + doc = minidom.parseString(xml)
  130 + stores = [Store(store_node) for store_node in doc.getElementsByTagName("store")]
  131 + return dict((store.name, store) for store in stores)
  132 +
  133 +class VoldemortException(Exception):
  134 + def __init__(self, msg, code = 1):
  135 + self.code = code
  136 + self.msg = msg
  137 +
  138 + def __str__(self):
  139 + return repr(self.msg)
  140 +
  141 +
  142 +class StoreClient: