Initial commit

xmunoz · Feb 23, 2015 · a3395cb · a3395cb
1 parent e4e4eb3
commit a3395cb
Show file tree

Hide file tree

Showing 10 changed files with 387 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,41 @@
+*.py[cod]
+MANIFEST
+
+# virtualenv
+venv_*
+
+# C extensions
+*.so
+
+# Packages
+*.egg
+*.egg-info
+dist
+build
+eggs
+parts
+var
+sdist
+develop-eggs
+.installed.cfg
+lib
+lib64
+
+# Installer logs
+pip-log.txt
+
+# Unit test / coverage reports
+.coverage
+.tox
+nosetests.xml
+
+# Translations
+*.mo
+
+# Mr Developer
+.mr.developer.cfg
+.project
+.pydevproject
+
+# vim
+*.swp
diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,13 @@
+Copyright (c) 2014 Cristina Munoz
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
diff --git a/README.md b/README.md
@@ -0,0 +1,65 @@
+# sodapy
+Python bindings for the Socrata Open Data API
+
+## Installation
+You can install with `pip install sodapy`.
+
+If you want to install from source, then `python setup.py install`.
+
+## Requirements
+
+## Documentation
+
+The [official Socrata API docs](http://dev.socrata.com/) provide thorough documentation of the available methods, as well as other client libraries. A quick list of eligible domains to use with the API is available [here](https://opendata.socrata.com/dataset/Socrata-Customer-Spotlights/6wk3-4ija).
+
+## Examples
+
+Retrieving data is easy!
+
+    >>> from sodapy import Socrata
+    >>> client = Socrata("sandbox.demo.socrata.com", "FakeAppToken", username="fakeuser@somedomain.com", password="ndKS92mS01msjJKs")
+
+    >>> client.get("/resource/nimj-3ivp.json", limit=2)
+	[{u'geolocation': {u'latitude': u'41.1085', u'needs_recoding': False, u'longitude': u'-117.6135'}, u'version': u'9', u'source': u'nn', u'region': u'Nevada', u'occurred_at': u'2012-09-14T22:38:01', u'number_of_stations': u'15', u'depth': u'7.60', u'magnitude': u'2.7', u'earthquake_id': u'00388610'}, {u'geolocation': {u'latitude': u'34.525', u'needs_recoding': False, u'longitude': u'-118.1527'}, u'version': u'0', u'source': u'ci', u'region': u'Southern California', u'occurred_at': u'2012-09-14T22:14:45', u'number_of_stations': u'35', u'depth': u'10.60', u'magnitude': u'1.5', u'earthquake_id': u'15215753'}]
+
+	>>> client.get("/resource/nimj-3ivp.json", where="depth > 300", order="magnitude DESC", exclude_system_fields=False)
+	[{u'geolocation': {u'latitude': u'-15.563', u'needs_recoding': False, u'longitude': u'-175.6104'}, u'version': u'9', u':updated_at': 1348778988, u'number_of_stations': u'275', u'region': u'Tonga', u':created_meta': u'21484', u'occurred_at': u'2012-09-13T21:16:43', u':id': 132, u'source': u'us', u'depth': u'328.30', u'magnitude': u'4.8', u':meta': u'{\n}', u':updated_meta': u'21484', u'earthquake_id': u'c000cnb5', u':created_at': 1348778988}, {u'geolocation': {u'latitude': u'-23.5123', u'needs_recoding': False, u'longitude': u'-179.1089'}, u'version': u'3', u':updated_at': 1348778988, u'number_of_stations': u'93', u'region': u'south of the Fiji Islands', u':created_meta': u'21484', u'occurred_at': u'2012-09-14T16:14:58', u':id': 32, u'source': u'us', u'depth': u'387.00', u'magnitude': u'4.6', u':meta': u'{\n}', u':updated_meta': u'21484', u'earthquake_id': u'c000cp1z', u':created_at': 1348778988}, {u'geolocation': {u'latitude': u'21.6711', u'needs_recoding': False, u'longitude': u'142.9236'}, u'version': u'C', u':updated_at': 1348778988, u'number_of_stations': u'136', u'region': u'Mariana Islands region', u':created_meta': u'21484', u'occurred_at': u'2012-09-13T11:19:07', u':id': 193, u'source': u'us', u'depth': u'300.70', u'magnitude': u'4.4', u':meta': u'{\n}', u':updated_meta': u'21484', u'earthquake_id': u'c000cmsq', u':created_at': 1348778988}]
+
+Query an individual row by simply appending the row idenifier to the resource endpoint for that dataset.
+	>>> client.get("/resource/nimj-3ivp/193.json", exclude_system_fields=False)
+	{u'geolocation': {u'latitude': u'21.6711', u'needs_recoding': False, u'longitude': u'142.9236'}, u'version': u'C', u':updated_at': 1348778988, u'number_of_stations': u'136', u'region': u'Mariana Islands region', u':created_meta': u'21484', u'occurred_at': u'2012-09-13T11:19:07', u':id': 193, u'source': u'us', u'depth': u'300.70', u'magnitude': u'4.4', u':meta': u'{\n}', u':updated_meta': u'21484', u':position': 193, u'earthquake_id': u'c000cmsq', u':created_at': 1348778988}
+
+
+Create a new dataset.
+	>>> cllient.get
+
+Create a new row in an existing dataset
+    >>> data = [{'Delegation': 'AJU', 'Name': 'Alaska', 'Key': 'AL', 'Entity': 'Juneau'}]
+    >>> client.upsert("/resource/eb9n-hr43.json", data)
+	{u'Errors': 0, u'Rows Deleted': 0, u'Rows Updated': 0, u'By SID': 0, u'Rows Created': 1, u'By RowIdentifier': 0}
+
+Update/Delete rows in a dataset.
+    >>> data = [{'Delegation': 'sfa', ':id': 8, 'Name': 'bar', 'Key': 'doo', 'Entity': 'dsfsd'}, {':id': 7, ':deleted': True}]
+	>>> client.upsert("/resource/eb9n-hr43.json", data)
+	{u'Errors': 0, u'Rows Deleted': 1, u'Rows Updated': 1, u'By SID': 2, u'Rows Created': 0, u'By RowIdentifier': 0}
+
+Upserts can even be preformed with a csv file.
+	>>> data = open("upsert_test.csv")
+	>>> client.update("/resource/eb9n-hr43.json", data)
+	{u'Errors': 0, u'Rows Deleted': 0, u'Rows Updated': 1, u'By SID': 1, u'Rows Created': 0, u'By RowIdentifier': 0}
+
+The same is true for full replace.
+	>>> data = open("replace_test.csv")
+	>>> client.replace("/resource/eb9n-hr43.json", data)
+	{u'Errors': 0, u'Rows Deleted': 0, u'Rows Updated': 0, u'By SID': 0, u'Rows Created': 12, u'By RowIdentifier': 0}
+
+Delete an individual row.
+	>>> client.delete("/resource/nimj-3ivp.json", id=2)
+	<Response [200]>
+
+Delete the entire dataset.
+	>>> client.delete("/resource/nimj-3ivp.json")
+	<Response [200]>
+
+Wrap up when you're finished.
+	>>> client.close()
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1 @@
+requests==2.5.1
diff --git a/runtests b/runtests
@@ -0,0 +1,3 @@
+#!/bin/bash
+
+PYTHONPATH=. exec py.test "$@"
diff --git a/setup.py b/setup.py
@@ -0,0 +1,34 @@
+#!/usr/bin/env python
+
+import setuptools
+from distutils.core import setup
+
+execfile('sodapy/version.py')
+
+with open('requirements.txt') as requirements:
+    required = requirements.read().splitlines()
+
+kwargs = {
+    "name": "sodapy",
+    "version": str(__version__),
+    "packages": ["sodapy"]
+    "description": "Python bindings for the Socrata Open Data API",
+    "long_description": open("README").read(),
+    "author": "Cristina Munoz",
+    "maintainer": "Cristina Munoz",
+    "author_email": "hi@xmunoz.com",
+    "maintainer_email": "hi@xmunoz.com",
+    "license": "Apache",
+    "install_requires": required,
+    "url": "https://github.com/xmunoz/sodapy",
+    "download_url": "https://github.com/xmunoz/sodapy/archive/master.tar.gz",
+    "classifiers": [
+        "Programming Language :: Python",
+        "Topic :: Software Development",
+        "Topic :: Software Development :: Libraries",
+        "Topic :: Software Development :: Libraries :: Python Modules",
+    ]
+}
+
+setup(**kwargs)
+
diff --git a/sodapy/__init__.py b/sodapy/__init__.py
@@ -0,0 +1,219 @@
+import requests
+from cStringIO import StringIO
+import csv
+import json
+
+from .constants import MAX_LIMIT
+
+__author__ = "Cristina Munoz <hi@xmunoz.com>"
+from version import __version__, version_info
+
+class Socrata(object):
+	def __init__(self, domain, app_token, username=None, password=None, access_token=None, 
+		session_adapter=None):
+		'''
+		The required arguments are:
+			domain: the domain you wish you to access
+			app_token: your Socrata application token
+		Simple requests are possible without an app_token, though these requests will be rate-
+		limited.
+
+		For write/update/delete operations or private datasets, the Socrata API currently supports
+		basic HTTP authentication, which requires these additional parameters.
+			username: your Socrata username
+			password: your Socrata password
+
+		The basic HTTP authentication comes with a deprecation warning, and the current
+		recommended authentication method is OAuth 2.0. To make requests on behalf of the user
+		using OAuth 2.0 authentication, follow the recommended procedure and provide the final
+		access_token to the client.
+
+		More information about authentication can be found in the official docs:
+			http://dev.socrata.com/docs/authentication.html
+		'''
+		if not domain:
+			raise Exception("A domain is required.")
+		self.domain = domain
+
+		# set up the session with proper authentication crendentials
+		self.session = requests.Session()
+		if not app_token:
+			print "Warning: requests made without an app_token will be subject to strict"
+			" throttling limits."
+		else:
+			self.session.headers.update({"X-App-token": app_token})
+
+		self.authentication_validation(username, password, access_token)
+
+		# use either basic HTTP auth or OAuth2.0
+		if username and password:
+			self.session.auth = (username, password)
+		elif access_token:
+			self.session.headers.update({"Authorization": "OAuth {}".format(access_token)})
+
+		if session_adapter:
+			self.session.mount(session_adapter["prefix"], session_adapter["adapter"])
+			self.uri_prefix = session_adapter["prefix"]
+		else:
+			self.uri_prefix = "https"
+
+
+	def authentication_validation(self, username, password, access_token): 
+		'''
+		Only accept one form of authentication.
+		'''
+		if bool(username) != bool(password):
+			raise Exception("Basic authentication requires a username AND password.")
+		if (username and access_token) or (password and access_token):
+			raise Exception("Cannot use both Basic Authentication and OAuth 2.0. Please use only"
+			" one authentication method.")
+
+
+	def create(self, file_object):
+		raise NotImplementedError()
+
+	def get(self, resource, **kwargs):
+		'''
+		Read data from the requested resource. Optionally, specify a keyword arg to filter results:
+			select : the set of columns to be returned, defaults to *
+			where : filters the rows to be returned, defaults to limit
+			order : specifies the order of results
+			group : column to group results on
+			limit : max number of results to return, defaults to 1000
+			offset : offset, used for paging. Defaults to 0
+			q : performs a full text search for a value
+			exclude_system_fields : defaults to true. If set to false, the response will include 
+				system fields (:id, :created_at, and :updated_at)
+		More information about the SoQL parameters can be found at the official docs: 
+		http://dev.socrata.com/docs/queries.html
+
+		More information about system fields can be found here:
+		http://dev.socrata.com/docs/system-fields.html
+		'''
+		headers = _clear_empty_values({"Accept": kwargs.pop("format", None)})
+
+		params = {
+			"$select"	: kwargs.pop("select", None),
+			"$where"	: kwargs.pop("where", None),
+			"$order"	: kwargs.pop("order", None),
+			"$group"	: kwargs.pop("group", None),
+			"$limit" 	: kwargs.pop("limit", None) ,
+			"$offset"	: kwargs.pop("offset", None),
+			"$q"		: kwargs.pop("q", None),
+			"$$exclude_system_fields" : kwargs.pop("exclude_system_fields", None)
+		}
+
+		params.update(kwargs)
+		params = _clear_empty_values(params)
+
+		if params.get("$limit") and params["$limit"] > MAX_LIMIT:
+			raise Exception("Max limit exceeded! {} is greater than the Socrata API limit of {}. "
+			"More information on the official API docs: http://dev.socrata.com/docs/paging.html"
+			.format(params["$limit"], MAX_LIMIT))
+
+		response = self._perform_request("get", resource, headers=headers, params=params)
+		return response 
+
+
+	def upsert(self, resource, payload):
+		'''
+		Insert, update or delete data to/from an existing dataset. Currently supports json
+		and csv file objects. See here for the upsert documentation:
+	    http://dev.socrata.com/publishers/upsert.html	
+		'''
+		return self._perform_update("post", resource, payload)
+
+
+	def replace(self, resource, payload):
+		'''
+		Same logic as upsert, but overwrites existing data with the payload using PUT instead of
+		POST.
+		'''
+		return self._perform_update("put", resource, payload)
+
+
+	def _perform_update(self, method, resource, payload):
+		if isinstance(payload, list):
+			response = self._perform_request(method, resource, data=json.dumps(payload))
+		elif isinstance(payload, file):
+			headers = {
+				"content-type": "text/csv",
+			}
+			response = self._perform_request(method, resource, data=payload, headers=headers)
+		else:
+			raise Exception("Unrecognized payload {}. Currently only lists and files are "
+				"supported.".format(type(payload)))
+
+		return response 
+
+
+	def delete(self, resource, id=None):
+		'''
+		Delete the entire dataset, e.g.
+			client.delete("/resource/nimj-3ivp.json")
+		or a single row, e.g.
+			client.delete("/resource/nimj-3ivp.json", id=4)
+		'''
+		if id:
+			base, content_type = resource.rsplit(".", 1)
+			delete_uri = "{}/{}.{}".format(base, id, content_type) 
+		else:
+			delete_uri = resource.replace("resource", "api/views")
+
+		return self._perform_request("delete", delete_uri)
+
+	@property
+	def response_formats(self):
+		return set(["application/json; charset=utf-8", "text/csv; charset=utf-8",
+			"application/rdf+xml"])
+
+	def unaunthorized(self):
+		pass
+
+	def _perform_request(self, request_type, resource, **kwargs):
+		'''
+		Utility method that performs all requests.
+		'''
+		request_type_methods = set(["get", "post", "put", "delete"])
+		if request_type not in request_type_methods:
+			raise Exception("Unknown request type. Supported request types are: {}".format(", ".join(request_type_methods)))
+
+		uri = "{}://{}{}".format(self.uri_prefix, self.domain, resource)
+
+		# set a timeout, just to be safe	
+		kwargs["timeout"] = 10
+
+		response = getattr(self.session, request_type)(uri, **kwargs)
+
+		# handle errors
+		if response.status_code not in (200, 202):
+			# TODO: handle this better
+			print response.json()
+			response.raise_for_status()
+
+		# deletes have no content body, simple return the whole response	
+		if request_type == "delete":
+			return response
+
+		# for other request types, analyze the contents to return most useful data
+		content_type = response.headers.get('content-type').strip().lower()
+		if content_type == "application/json; charset=utf-8":
+			return response.json()
+		elif content_type == "text/csv; charset=utf-8":
+			csv_stream = StringIO(response.text)
+			return [line for line in csv.reader(csv_stream)]
+		elif content_type == "application/rdf+xml; charset=utf-8":
+			return response.content
+		else:
+			raise Exception("Unknown response format: {}".format(content_type))
+
+	def close(self):
+		self.session.close()
+
+
+def _clear_empty_values(args):
+	result = {}
+	for param in args:
+		if args[param] is not None:
+			result[param] = args[param]
+	return result
diff --git a/sodapy/constants.py b/sodapy/constants.py
@@ -0,0 +1,3 @@
+DEFAULT_LIMIT = 1000
+MAX_LIMIT = 50000
+DEFAULT_OFFSET = None
diff --git a/sodapy/version.py b/sodapy/version.py
@@ -0,0 +1,2 @@
+version_info = (0, 1)
+__version__ = '.'.join(str(v) for v in version_info)
diff --git a/tests/test_soda.py b/tests/test_soda.py
@@ -0,0 +1,6 @@
+
+from sodapy import Socrata
+
+def test_client():
+	client = Socrata("something.com", "FakeAppToken")
+	assert isinstance(client, Socrata)