This repository has been archived by the owner on Aug 31, 2022. It is now read-only.
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
10 changed files
with
387 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
*.py[cod] | ||
MANIFEST | ||
|
||
# virtualenv | ||
venv_* | ||
|
||
# C extensions | ||
*.so | ||
|
||
# Packages | ||
*.egg | ||
*.egg-info | ||
dist | ||
build | ||
eggs | ||
parts | ||
var | ||
sdist | ||
develop-eggs | ||
.installed.cfg | ||
lib | ||
lib64 | ||
|
||
# Installer logs | ||
pip-log.txt | ||
|
||
# Unit test / coverage reports | ||
.coverage | ||
.tox | ||
nosetests.xml | ||
|
||
# Translations | ||
*.mo | ||
|
||
# Mr Developer | ||
.mr.developer.cfg | ||
.project | ||
.pydevproject | ||
|
||
# vim | ||
*.swp |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
Copyright (c) 2014 Cristina Munoz | ||
|
||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
|
||
http://www.apache.org/licenses/LICENSE-2.0 | ||
|
||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
# sodapy | ||
Python bindings for the Socrata Open Data API | ||
|
||
## Installation | ||
You can install with `pip install sodapy`. | ||
|
||
If you want to install from source, then `python setup.py install`. | ||
|
||
## Requirements | ||
|
||
## Documentation | ||
|
||
The [official Socrata API docs](http://dev.socrata.com/) provide thorough documentation of the available methods, as well as other client libraries. A quick list of eligible domains to use with the API is available [here](https://opendata.socrata.com/dataset/Socrata-Customer-Spotlights/6wk3-4ija). | ||
|
||
## Examples | ||
|
||
Retrieving data is easy! | ||
|
||
>>> from sodapy import Socrata | ||
>>> client = Socrata("sandbox.demo.socrata.com", "FakeAppToken", username="fakeuser@somedomain.com", password="ndKS92mS01msjJKs") | ||
|
||
>>> client.get("/resource/nimj-3ivp.json", limit=2) | ||
[{u'geolocation': {u'latitude': u'41.1085', u'needs_recoding': False, u'longitude': u'-117.6135'}, u'version': u'9', u'source': u'nn', u'region': u'Nevada', u'occurred_at': u'2012-09-14T22:38:01', u'number_of_stations': u'15', u'depth': u'7.60', u'magnitude': u'2.7', u'earthquake_id': u'00388610'}, {u'geolocation': {u'latitude': u'34.525', u'needs_recoding': False, u'longitude': u'-118.1527'}, u'version': u'0', u'source': u'ci', u'region': u'Southern California', u'occurred_at': u'2012-09-14T22:14:45', u'number_of_stations': u'35', u'depth': u'10.60', u'magnitude': u'1.5', u'earthquake_id': u'15215753'}] | ||
|
||
>>> client.get("/resource/nimj-3ivp.json", where="depth > 300", order="magnitude DESC", exclude_system_fields=False) | ||
[{u'geolocation': {u'latitude': u'-15.563', u'needs_recoding': False, u'longitude': u'-175.6104'}, u'version': u'9', u':updated_at': 1348778988, u'number_of_stations': u'275', u'region': u'Tonga', u':created_meta': u'21484', u'occurred_at': u'2012-09-13T21:16:43', u':id': 132, u'source': u'us', u'depth': u'328.30', u'magnitude': u'4.8', u':meta': u'{\n}', u':updated_meta': u'21484', u'earthquake_id': u'c000cnb5', u':created_at': 1348778988}, {u'geolocation': {u'latitude': u'-23.5123', u'needs_recoding': False, u'longitude': u'-179.1089'}, u'version': u'3', u':updated_at': 1348778988, u'number_of_stations': u'93', u'region': u'south of the Fiji Islands', u':created_meta': u'21484', u'occurred_at': u'2012-09-14T16:14:58', u':id': 32, u'source': u'us', u'depth': u'387.00', u'magnitude': u'4.6', u':meta': u'{\n}', u':updated_meta': u'21484', u'earthquake_id': u'c000cp1z', u':created_at': 1348778988}, {u'geolocation': {u'latitude': u'21.6711', u'needs_recoding': False, u'longitude': u'142.9236'}, u'version': u'C', u':updated_at': 1348778988, u'number_of_stations': u'136', u'region': u'Mariana Islands region', u':created_meta': u'21484', u'occurred_at': u'2012-09-13T11:19:07', u':id': 193, u'source': u'us', u'depth': u'300.70', u'magnitude': u'4.4', u':meta': u'{\n}', u':updated_meta': u'21484', u'earthquake_id': u'c000cmsq', u':created_at': 1348778988}] | ||
|
||
Query an individual row by simply appending the row idenifier to the resource endpoint for that dataset. | ||
>>> client.get("/resource/nimj-3ivp/193.json", exclude_system_fields=False) | ||
{u'geolocation': {u'latitude': u'21.6711', u'needs_recoding': False, u'longitude': u'142.9236'}, u'version': u'C', u':updated_at': 1348778988, u'number_of_stations': u'136', u'region': u'Mariana Islands region', u':created_meta': u'21484', u'occurred_at': u'2012-09-13T11:19:07', u':id': 193, u'source': u'us', u'depth': u'300.70', u'magnitude': u'4.4', u':meta': u'{\n}', u':updated_meta': u'21484', u':position': 193, u'earthquake_id': u'c000cmsq', u':created_at': 1348778988} | ||
|
||
|
||
Create a new dataset. | ||
>>> cllient.get | ||
|
||
Create a new row in an existing dataset | ||
>>> data = [{'Delegation': 'AJU', 'Name': 'Alaska', 'Key': 'AL', 'Entity': 'Juneau'}] | ||
>>> client.upsert("/resource/eb9n-hr43.json", data) | ||
{u'Errors': 0, u'Rows Deleted': 0, u'Rows Updated': 0, u'By SID': 0, u'Rows Created': 1, u'By RowIdentifier': 0} | ||
|
||
Update/Delete rows in a dataset. | ||
>>> data = [{'Delegation': 'sfa', ':id': 8, 'Name': 'bar', 'Key': 'doo', 'Entity': 'dsfsd'}, {':id': 7, ':deleted': True}] | ||
>>> client.upsert("/resource/eb9n-hr43.json", data) | ||
{u'Errors': 0, u'Rows Deleted': 1, u'Rows Updated': 1, u'By SID': 2, u'Rows Created': 0, u'By RowIdentifier': 0} | ||
|
||
Upserts can even be preformed with a csv file. | ||
>>> data = open("upsert_test.csv") | ||
>>> client.update("/resource/eb9n-hr43.json", data) | ||
{u'Errors': 0, u'Rows Deleted': 0, u'Rows Updated': 1, u'By SID': 1, u'Rows Created': 0, u'By RowIdentifier': 0} | ||
|
||
The same is true for full replace. | ||
>>> data = open("replace_test.csv") | ||
>>> client.replace("/resource/eb9n-hr43.json", data) | ||
{u'Errors': 0, u'Rows Deleted': 0, u'Rows Updated': 0, u'By SID': 0, u'Rows Created': 12, u'By RowIdentifier': 0} | ||
|
||
Delete an individual row. | ||
>>> client.delete("/resource/nimj-3ivp.json", id=2) | ||
<Response [200]> | ||
|
||
Delete the entire dataset. | ||
>>> client.delete("/resource/nimj-3ivp.json") | ||
<Response [200]> | ||
|
||
Wrap up when you're finished. | ||
>>> client.close() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
requests==2.5.1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
#!/bin/bash | ||
|
||
PYTHONPATH=. exec py.test "$@" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
#!/usr/bin/env python | ||
|
||
import setuptools | ||
from distutils.core import setup | ||
|
||
execfile('sodapy/version.py') | ||
|
||
with open('requirements.txt') as requirements: | ||
required = requirements.read().splitlines() | ||
|
||
kwargs = { | ||
"name": "sodapy", | ||
"version": str(__version__), | ||
"packages": ["sodapy"] | ||
"description": "Python bindings for the Socrata Open Data API", | ||
"long_description": open("README").read(), | ||
"author": "Cristina Munoz", | ||
"maintainer": "Cristina Munoz", | ||
"author_email": "hi@xmunoz.com", | ||
"maintainer_email": "hi@xmunoz.com", | ||
"license": "Apache", | ||
"install_requires": required, | ||
"url": "https://github.com/xmunoz/sodapy", | ||
"download_url": "https://github.com/xmunoz/sodapy/archive/master.tar.gz", | ||
"classifiers": [ | ||
"Programming Language :: Python", | ||
"Topic :: Software Development", | ||
"Topic :: Software Development :: Libraries", | ||
"Topic :: Software Development :: Libraries :: Python Modules", | ||
] | ||
} | ||
|
||
setup(**kwargs) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,219 @@ | ||
import requests | ||
from cStringIO import StringIO | ||
import csv | ||
import json | ||
|
||
from .constants import MAX_LIMIT | ||
|
||
__author__ = "Cristina Munoz <hi@xmunoz.com>" | ||
from version import __version__, version_info | ||
|
||
class Socrata(object): | ||
def __init__(self, domain, app_token, username=None, password=None, access_token=None, | ||
session_adapter=None): | ||
''' | ||
The required arguments are: | ||
domain: the domain you wish you to access | ||
app_token: your Socrata application token | ||
Simple requests are possible without an app_token, though these requests will be rate- | ||
limited. | ||
For write/update/delete operations or private datasets, the Socrata API currently supports | ||
basic HTTP authentication, which requires these additional parameters. | ||
username: your Socrata username | ||
password: your Socrata password | ||
The basic HTTP authentication comes with a deprecation warning, and the current | ||
recommended authentication method is OAuth 2.0. To make requests on behalf of the user | ||
using OAuth 2.0 authentication, follow the recommended procedure and provide the final | ||
access_token to the client. | ||
More information about authentication can be found in the official docs: | ||
http://dev.socrata.com/docs/authentication.html | ||
''' | ||
if not domain: | ||
raise Exception("A domain is required.") | ||
self.domain = domain | ||
|
||
# set up the session with proper authentication crendentials | ||
self.session = requests.Session() | ||
if not app_token: | ||
print "Warning: requests made without an app_token will be subject to strict" | ||
" throttling limits." | ||
else: | ||
self.session.headers.update({"X-App-token": app_token}) | ||
|
||
self.authentication_validation(username, password, access_token) | ||
|
||
# use either basic HTTP auth or OAuth2.0 | ||
if username and password: | ||
self.session.auth = (username, password) | ||
elif access_token: | ||
self.session.headers.update({"Authorization": "OAuth {}".format(access_token)}) | ||
|
||
if session_adapter: | ||
self.session.mount(session_adapter["prefix"], session_adapter["adapter"]) | ||
self.uri_prefix = session_adapter["prefix"] | ||
else: | ||
self.uri_prefix = "https" | ||
|
||
|
||
def authentication_validation(self, username, password, access_token): | ||
''' | ||
Only accept one form of authentication. | ||
''' | ||
if bool(username) != bool(password): | ||
raise Exception("Basic authentication requires a username AND password.") | ||
if (username and access_token) or (password and access_token): | ||
raise Exception("Cannot use both Basic Authentication and OAuth 2.0. Please use only" | ||
" one authentication method.") | ||
|
||
|
||
def create(self, file_object): | ||
raise NotImplementedError() | ||
|
||
def get(self, resource, **kwargs): | ||
''' | ||
Read data from the requested resource. Optionally, specify a keyword arg to filter results: | ||
select : the set of columns to be returned, defaults to * | ||
where : filters the rows to be returned, defaults to limit | ||
order : specifies the order of results | ||
group : column to group results on | ||
limit : max number of results to return, defaults to 1000 | ||
offset : offset, used for paging. Defaults to 0 | ||
q : performs a full text search for a value | ||
exclude_system_fields : defaults to true. If set to false, the response will include | ||
system fields (:id, :created_at, and :updated_at) | ||
More information about the SoQL parameters can be found at the official docs: | ||
http://dev.socrata.com/docs/queries.html | ||
More information about system fields can be found here: | ||
http://dev.socrata.com/docs/system-fields.html | ||
''' | ||
headers = _clear_empty_values({"Accept": kwargs.pop("format", None)}) | ||
|
||
params = { | ||
"$select" : kwargs.pop("select", None), | ||
"$where" : kwargs.pop("where", None), | ||
"$order" : kwargs.pop("order", None), | ||
"$group" : kwargs.pop("group", None), | ||
"$limit" : kwargs.pop("limit", None) , | ||
"$offset" : kwargs.pop("offset", None), | ||
"$q" : kwargs.pop("q", None), | ||
"$$exclude_system_fields" : kwargs.pop("exclude_system_fields", None) | ||
} | ||
|
||
params.update(kwargs) | ||
params = _clear_empty_values(params) | ||
|
||
if params.get("$limit") and params["$limit"] > MAX_LIMIT: | ||
raise Exception("Max limit exceeded! {} is greater than the Socrata API limit of {}. " | ||
"More information on the official API docs: http://dev.socrata.com/docs/paging.html" | ||
.format(params["$limit"], MAX_LIMIT)) | ||
|
||
response = self._perform_request("get", resource, headers=headers, params=params) | ||
return response | ||
|
||
|
||
def upsert(self, resource, payload): | ||
''' | ||
Insert, update or delete data to/from an existing dataset. Currently supports json | ||
and csv file objects. See here for the upsert documentation: | ||
http://dev.socrata.com/publishers/upsert.html | ||
''' | ||
return self._perform_update("post", resource, payload) | ||
|
||
|
||
def replace(self, resource, payload): | ||
''' | ||
Same logic as upsert, but overwrites existing data with the payload using PUT instead of | ||
POST. | ||
''' | ||
return self._perform_update("put", resource, payload) | ||
|
||
|
||
def _perform_update(self, method, resource, payload): | ||
if isinstance(payload, list): | ||
response = self._perform_request(method, resource, data=json.dumps(payload)) | ||
elif isinstance(payload, file): | ||
headers = { | ||
"content-type": "text/csv", | ||
} | ||
response = self._perform_request(method, resource, data=payload, headers=headers) | ||
else: | ||
raise Exception("Unrecognized payload {}. Currently only lists and files are " | ||
"supported.".format(type(payload))) | ||
|
||
return response | ||
|
||
|
||
def delete(self, resource, id=None): | ||
''' | ||
Delete the entire dataset, e.g. | ||
client.delete("/resource/nimj-3ivp.json") | ||
or a single row, e.g. | ||
client.delete("/resource/nimj-3ivp.json", id=4) | ||
''' | ||
if id: | ||
base, content_type = resource.rsplit(".", 1) | ||
delete_uri = "{}/{}.{}".format(base, id, content_type) | ||
else: | ||
delete_uri = resource.replace("resource", "api/views") | ||
|
||
return self._perform_request("delete", delete_uri) | ||
|
||
@property | ||
def response_formats(self): | ||
return set(["application/json; charset=utf-8", "text/csv; charset=utf-8", | ||
"application/rdf+xml"]) | ||
|
||
def unaunthorized(self): | ||
pass | ||
|
||
def _perform_request(self, request_type, resource, **kwargs): | ||
''' | ||
Utility method that performs all requests. | ||
''' | ||
request_type_methods = set(["get", "post", "put", "delete"]) | ||
if request_type not in request_type_methods: | ||
raise Exception("Unknown request type. Supported request types are: {}".format(", ".join(request_type_methods))) | ||
|
||
uri = "{}://{}{}".format(self.uri_prefix, self.domain, resource) | ||
|
||
# set a timeout, just to be safe | ||
kwargs["timeout"] = 10 | ||
|
||
response = getattr(self.session, request_type)(uri, **kwargs) | ||
|
||
# handle errors | ||
if response.status_code not in (200, 202): | ||
# TODO: handle this better | ||
print response.json() | ||
response.raise_for_status() | ||
|
||
# deletes have no content body, simple return the whole response | ||
if request_type == "delete": | ||
return response | ||
|
||
# for other request types, analyze the contents to return most useful data | ||
content_type = response.headers.get('content-type').strip().lower() | ||
if content_type == "application/json; charset=utf-8": | ||
return response.json() | ||
elif content_type == "text/csv; charset=utf-8": | ||
csv_stream = StringIO(response.text) | ||
return [line for line in csv.reader(csv_stream)] | ||
elif content_type == "application/rdf+xml; charset=utf-8": | ||
return response.content | ||
else: | ||
raise Exception("Unknown response format: {}".format(content_type)) | ||
|
||
def close(self): | ||
self.session.close() | ||
|
||
|
||
def _clear_empty_values(args): | ||
result = {} | ||
for param in args: | ||
if args[param] is not None: | ||
result[param] = args[param] | ||
return result |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
DEFAULT_LIMIT = 1000 | ||
MAX_LIMIT = 50000 | ||
DEFAULT_OFFSET = None |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
version_info = (0, 1) | ||
__version__ = '.'.join(str(v) for v in version_info) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
|
||
from sodapy import Socrata | ||
|
||
def test_client(): | ||
client = Socrata("something.com", "FakeAppToken") | ||
assert isinstance(client, Socrata) |