From a3395cb1d20669d3ed624924b629bb0a87f3aada Mon Sep 17 00:00:00 2001 From: Cristina Munoz Date: Mon, 23 Feb 2015 12:36:29 -0800 Subject: [PATCH] Initial commit --- .gitignore | 41 +++++++++ LICENSE | 13 +++ README.md | 65 +++++++++++++ requirements.txt | 1 + runtests | 3 + setup.py | 34 +++++++ sodapy/__init__.py | 219 ++++++++++++++++++++++++++++++++++++++++++++ sodapy/constants.py | 3 + sodapy/version.py | 2 + tests/test_soda.py | 6 ++ 10 files changed, 387 insertions(+) create mode 100644 .gitignore create mode 100644 LICENSE create mode 100644 README.md create mode 100644 requirements.txt create mode 100755 runtests create mode 100644 setup.py create mode 100644 sodapy/__init__.py create mode 100644 sodapy/constants.py create mode 100644 sodapy/version.py create mode 100644 tests/test_soda.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c5ff5d5 --- /dev/null +++ b/.gitignore @@ -0,0 +1,41 @@ +*.py[cod] +MANIFEST + +# virtualenv +venv_* + +# C extensions +*.so + +# Packages +*.egg +*.egg-info +dist +build +eggs +parts +var +sdist +develop-eggs +.installed.cfg +lib +lib64 + +# Installer logs +pip-log.txt + +# Unit test / coverage reports +.coverage +.tox +nosetests.xml + +# Translations +*.mo + +# Mr Developer +.mr.developer.cfg +.project +.pydevproject + +# vim +*.swp diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..9a99b33 --- /dev/null +++ b/LICENSE @@ -0,0 +1,13 @@ +Copyright (c) 2014 Cristina Munoz + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/README.md b/README.md new file mode 100644 index 0000000..8cd474c --- /dev/null +++ b/README.md @@ -0,0 +1,65 @@ +# sodapy +Python bindings for the Socrata Open Data API + +## Installation +You can install with `pip install sodapy`. + +If you want to install from source, then `python setup.py install`. + +## Requirements + +## Documentation + +The [official Socrata API docs](http://dev.socrata.com/) provide thorough documentation of the available methods, as well as other client libraries. A quick list of eligible domains to use with the API is available [here](https://opendata.socrata.com/dataset/Socrata-Customer-Spotlights/6wk3-4ija). + +## Examples + +Retrieving data is easy! + + >>> from sodapy import Socrata + >>> client = Socrata("sandbox.demo.socrata.com", "FakeAppToken", username="fakeuser@somedomain.com", password="ndKS92mS01msjJKs") + + >>> client.get("/resource/nimj-3ivp.json", limit=2) + [{u'geolocation': {u'latitude': u'41.1085', u'needs_recoding': False, u'longitude': u'-117.6135'}, u'version': u'9', u'source': u'nn', u'region': u'Nevada', u'occurred_at': u'2012-09-14T22:38:01', u'number_of_stations': u'15', u'depth': u'7.60', u'magnitude': u'2.7', u'earthquake_id': u'00388610'}, {u'geolocation': {u'latitude': u'34.525', u'needs_recoding': False, u'longitude': u'-118.1527'}, u'version': u'0', u'source': u'ci', u'region': u'Southern California', u'occurred_at': u'2012-09-14T22:14:45', u'number_of_stations': u'35', u'depth': u'10.60', u'magnitude': u'1.5', u'earthquake_id': u'15215753'}] + + >>> client.get("/resource/nimj-3ivp.json", where="depth > 300", order="magnitude DESC", exclude_system_fields=False) + [{u'geolocation': {u'latitude': u'-15.563', u'needs_recoding': False, u'longitude': u'-175.6104'}, u'version': u'9', u':updated_at': 1348778988, u'number_of_stations': u'275', u'region': u'Tonga', u':created_meta': u'21484', u'occurred_at': u'2012-09-13T21:16:43', u':id': 132, u'source': u'us', u'depth': u'328.30', u'magnitude': u'4.8', u':meta': u'{\n}', u':updated_meta': u'21484', u'earthquake_id': u'c000cnb5', u':created_at': 1348778988}, {u'geolocation': {u'latitude': u'-23.5123', u'needs_recoding': False, u'longitude': u'-179.1089'}, u'version': u'3', u':updated_at': 1348778988, u'number_of_stations': u'93', u'region': u'south of the Fiji Islands', u':created_meta': u'21484', u'occurred_at': u'2012-09-14T16:14:58', u':id': 32, u'source': u'us', u'depth': u'387.00', u'magnitude': u'4.6', u':meta': u'{\n}', u':updated_meta': u'21484', u'earthquake_id': u'c000cp1z', u':created_at': 1348778988}, {u'geolocation': {u'latitude': u'21.6711', u'needs_recoding': False, u'longitude': u'142.9236'}, u'version': u'C', u':updated_at': 1348778988, u'number_of_stations': u'136', u'region': u'Mariana Islands region', u':created_meta': u'21484', u'occurred_at': u'2012-09-13T11:19:07', u':id': 193, u'source': u'us', u'depth': u'300.70', u'magnitude': u'4.4', u':meta': u'{\n}', u':updated_meta': u'21484', u'earthquake_id': u'c000cmsq', u':created_at': 1348778988}] + +Query an individual row by simply appending the row idenifier to the resource endpoint for that dataset. + >>> client.get("/resource/nimj-3ivp/193.json", exclude_system_fields=False) + {u'geolocation': {u'latitude': u'21.6711', u'needs_recoding': False, u'longitude': u'142.9236'}, u'version': u'C', u':updated_at': 1348778988, u'number_of_stations': u'136', u'region': u'Mariana Islands region', u':created_meta': u'21484', u'occurred_at': u'2012-09-13T11:19:07', u':id': 193, u'source': u'us', u'depth': u'300.70', u'magnitude': u'4.4', u':meta': u'{\n}', u':updated_meta': u'21484', u':position': 193, u'earthquake_id': u'c000cmsq', u':created_at': 1348778988} + + +Create a new dataset. + >>> cllient.get + +Create a new row in an existing dataset + >>> data = [{'Delegation': 'AJU', 'Name': 'Alaska', 'Key': 'AL', 'Entity': 'Juneau'}] + >>> client.upsert("/resource/eb9n-hr43.json", data) + {u'Errors': 0, u'Rows Deleted': 0, u'Rows Updated': 0, u'By SID': 0, u'Rows Created': 1, u'By RowIdentifier': 0} + +Update/Delete rows in a dataset. + >>> data = [{'Delegation': 'sfa', ':id': 8, 'Name': 'bar', 'Key': 'doo', 'Entity': 'dsfsd'}, {':id': 7, ':deleted': True}] + >>> client.upsert("/resource/eb9n-hr43.json", data) + {u'Errors': 0, u'Rows Deleted': 1, u'Rows Updated': 1, u'By SID': 2, u'Rows Created': 0, u'By RowIdentifier': 0} + +Upserts can even be preformed with a csv file. + >>> data = open("upsert_test.csv") + >>> client.update("/resource/eb9n-hr43.json", data) + {u'Errors': 0, u'Rows Deleted': 0, u'Rows Updated': 1, u'By SID': 1, u'Rows Created': 0, u'By RowIdentifier': 0} + +The same is true for full replace. + >>> data = open("replace_test.csv") + >>> client.replace("/resource/eb9n-hr43.json", data) + {u'Errors': 0, u'Rows Deleted': 0, u'Rows Updated': 0, u'By SID': 0, u'Rows Created': 12, u'By RowIdentifier': 0} + +Delete an individual row. + >>> client.delete("/resource/nimj-3ivp.json", id=2) + + +Delete the entire dataset. + >>> client.delete("/resource/nimj-3ivp.json") + + +Wrap up when you're finished. + >>> client.close() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..be04c9a --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +requests==2.5.1 diff --git a/runtests b/runtests new file mode 100755 index 0000000..ac97f8a --- /dev/null +++ b/runtests @@ -0,0 +1,3 @@ +#!/bin/bash + +PYTHONPATH=. exec py.test "$@" diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..ded0776 --- /dev/null +++ b/setup.py @@ -0,0 +1,34 @@ +#!/usr/bin/env python + +import setuptools +from distutils.core import setup + +execfile('sodapy/version.py') + +with open('requirements.txt') as requirements: + required = requirements.read().splitlines() + +kwargs = { + "name": "sodapy", + "version": str(__version__), + "packages": ["sodapy"] + "description": "Python bindings for the Socrata Open Data API", + "long_description": open("README").read(), + "author": "Cristina Munoz", + "maintainer": "Cristina Munoz", + "author_email": "hi@xmunoz.com", + "maintainer_email": "hi@xmunoz.com", + "license": "Apache", + "install_requires": required, + "url": "https://github.com/xmunoz/sodapy", + "download_url": "https://github.com/xmunoz/sodapy/archive/master.tar.gz", + "classifiers": [ + "Programming Language :: Python", + "Topic :: Software Development", + "Topic :: Software Development :: Libraries", + "Topic :: Software Development :: Libraries :: Python Modules", + ] +} + +setup(**kwargs) + diff --git a/sodapy/__init__.py b/sodapy/__init__.py new file mode 100644 index 0000000..f7bf1bc --- /dev/null +++ b/sodapy/__init__.py @@ -0,0 +1,219 @@ +import requests +from cStringIO import StringIO +import csv +import json + +from .constants import MAX_LIMIT + +__author__ = "Cristina Munoz " +from version import __version__, version_info + +class Socrata(object): + def __init__(self, domain, app_token, username=None, password=None, access_token=None, + session_adapter=None): + ''' + The required arguments are: + domain: the domain you wish you to access + app_token: your Socrata application token + Simple requests are possible without an app_token, though these requests will be rate- + limited. + + For write/update/delete operations or private datasets, the Socrata API currently supports + basic HTTP authentication, which requires these additional parameters. + username: your Socrata username + password: your Socrata password + + The basic HTTP authentication comes with a deprecation warning, and the current + recommended authentication method is OAuth 2.0. To make requests on behalf of the user + using OAuth 2.0 authentication, follow the recommended procedure and provide the final + access_token to the client. + + More information about authentication can be found in the official docs: + http://dev.socrata.com/docs/authentication.html + ''' + if not domain: + raise Exception("A domain is required.") + self.domain = domain + + # set up the session with proper authentication crendentials + self.session = requests.Session() + if not app_token: + print "Warning: requests made without an app_token will be subject to strict" + " throttling limits." + else: + self.session.headers.update({"X-App-token": app_token}) + + self.authentication_validation(username, password, access_token) + + # use either basic HTTP auth or OAuth2.0 + if username and password: + self.session.auth = (username, password) + elif access_token: + self.session.headers.update({"Authorization": "OAuth {}".format(access_token)}) + + if session_adapter: + self.session.mount(session_adapter["prefix"], session_adapter["adapter"]) + self.uri_prefix = session_adapter["prefix"] + else: + self.uri_prefix = "https" + + + def authentication_validation(self, username, password, access_token): + ''' + Only accept one form of authentication. + ''' + if bool(username) != bool(password): + raise Exception("Basic authentication requires a username AND password.") + if (username and access_token) or (password and access_token): + raise Exception("Cannot use both Basic Authentication and OAuth 2.0. Please use only" + " one authentication method.") + + + def create(self, file_object): + raise NotImplementedError() + + def get(self, resource, **kwargs): + ''' + Read data from the requested resource. Optionally, specify a keyword arg to filter results: + select : the set of columns to be returned, defaults to * + where : filters the rows to be returned, defaults to limit + order : specifies the order of results + group : column to group results on + limit : max number of results to return, defaults to 1000 + offset : offset, used for paging. Defaults to 0 + q : performs a full text search for a value + exclude_system_fields : defaults to true. If set to false, the response will include + system fields (:id, :created_at, and :updated_at) + More information about the SoQL parameters can be found at the official docs: + http://dev.socrata.com/docs/queries.html + + More information about system fields can be found here: + http://dev.socrata.com/docs/system-fields.html + ''' + headers = _clear_empty_values({"Accept": kwargs.pop("format", None)}) + + params = { + "$select" : kwargs.pop("select", None), + "$where" : kwargs.pop("where", None), + "$order" : kwargs.pop("order", None), + "$group" : kwargs.pop("group", None), + "$limit" : kwargs.pop("limit", None) , + "$offset" : kwargs.pop("offset", None), + "$q" : kwargs.pop("q", None), + "$$exclude_system_fields" : kwargs.pop("exclude_system_fields", None) + } + + params.update(kwargs) + params = _clear_empty_values(params) + + if params.get("$limit") and params["$limit"] > MAX_LIMIT: + raise Exception("Max limit exceeded! {} is greater than the Socrata API limit of {}. " + "More information on the official API docs: http://dev.socrata.com/docs/paging.html" + .format(params["$limit"], MAX_LIMIT)) + + response = self._perform_request("get", resource, headers=headers, params=params) + return response + + + def upsert(self, resource, payload): + ''' + Insert, update or delete data to/from an existing dataset. Currently supports json + and csv file objects. See here for the upsert documentation: + http://dev.socrata.com/publishers/upsert.html + ''' + return self._perform_update("post", resource, payload) + + + def replace(self, resource, payload): + ''' + Same logic as upsert, but overwrites existing data with the payload using PUT instead of + POST. + ''' + return self._perform_update("put", resource, payload) + + + def _perform_update(self, method, resource, payload): + if isinstance(payload, list): + response = self._perform_request(method, resource, data=json.dumps(payload)) + elif isinstance(payload, file): + headers = { + "content-type": "text/csv", + } + response = self._perform_request(method, resource, data=payload, headers=headers) + else: + raise Exception("Unrecognized payload {}. Currently only lists and files are " + "supported.".format(type(payload))) + + return response + + + def delete(self, resource, id=None): + ''' + Delete the entire dataset, e.g. + client.delete("/resource/nimj-3ivp.json") + or a single row, e.g. + client.delete("/resource/nimj-3ivp.json", id=4) + ''' + if id: + base, content_type = resource.rsplit(".", 1) + delete_uri = "{}/{}.{}".format(base, id, content_type) + else: + delete_uri = resource.replace("resource", "api/views") + + return self._perform_request("delete", delete_uri) + + @property + def response_formats(self): + return set(["application/json; charset=utf-8", "text/csv; charset=utf-8", + "application/rdf+xml"]) + + def unaunthorized(self): + pass + + def _perform_request(self, request_type, resource, **kwargs): + ''' + Utility method that performs all requests. + ''' + request_type_methods = set(["get", "post", "put", "delete"]) + if request_type not in request_type_methods: + raise Exception("Unknown request type. Supported request types are: {}".format(", ".join(request_type_methods))) + + uri = "{}://{}{}".format(self.uri_prefix, self.domain, resource) + + # set a timeout, just to be safe + kwargs["timeout"] = 10 + + response = getattr(self.session, request_type)(uri, **kwargs) + + # handle errors + if response.status_code not in (200, 202): + # TODO: handle this better + print response.json() + response.raise_for_status() + + # deletes have no content body, simple return the whole response + if request_type == "delete": + return response + + # for other request types, analyze the contents to return most useful data + content_type = response.headers.get('content-type').strip().lower() + if content_type == "application/json; charset=utf-8": + return response.json() + elif content_type == "text/csv; charset=utf-8": + csv_stream = StringIO(response.text) + return [line for line in csv.reader(csv_stream)] + elif content_type == "application/rdf+xml; charset=utf-8": + return response.content + else: + raise Exception("Unknown response format: {}".format(content_type)) + + def close(self): + self.session.close() + + +def _clear_empty_values(args): + result = {} + for param in args: + if args[param] is not None: + result[param] = args[param] + return result diff --git a/sodapy/constants.py b/sodapy/constants.py new file mode 100644 index 0000000..85bebb9 --- /dev/null +++ b/sodapy/constants.py @@ -0,0 +1,3 @@ +DEFAULT_LIMIT = 1000 +MAX_LIMIT = 50000 +DEFAULT_OFFSET = None diff --git a/sodapy/version.py b/sodapy/version.py new file mode 100644 index 0000000..9a6ff49 --- /dev/null +++ b/sodapy/version.py @@ -0,0 +1,2 @@ +version_info = (0, 1) +__version__ = '.'.join(str(v) for v in version_info) diff --git a/tests/test_soda.py b/tests/test_soda.py new file mode 100644 index 0000000..59f414d --- /dev/null +++ b/tests/test_soda.py @@ -0,0 +1,6 @@ + +from sodapy import Socrata + +def test_client(): + client = Socrata("something.com", "FakeAppToken") + assert isinstance(client, Socrata)