Skip to content
This repository has been archived by the owner on Aug 31, 2022. It is now read-only.

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
xmunoz committed Feb 23, 2015
1 parent e4e4eb3 commit a3395cb
Show file tree
Hide file tree
Showing 10 changed files with 387 additions and 0 deletions.
41 changes: 41 additions & 0 deletions .gitignore
@@ -0,0 +1,41 @@
*.py[cod]
MANIFEST

# virtualenv
venv_*

# C extensions
*.so

# Packages
*.egg
*.egg-info
dist
build
eggs
parts
var
sdist
develop-eggs
.installed.cfg
lib
lib64

# Installer logs
pip-log.txt

# Unit test / coverage reports
.coverage
.tox
nosetests.xml

# Translations
*.mo

# Mr Developer
.mr.developer.cfg
.project
.pydevproject

# vim
*.swp
13 changes: 13 additions & 0 deletions LICENSE
@@ -0,0 +1,13 @@
Copyright (c) 2014 Cristina Munoz

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
65 changes: 65 additions & 0 deletions README.md
@@ -0,0 +1,65 @@
# sodapy
Python bindings for the Socrata Open Data API

## Installation
You can install with `pip install sodapy`.

If you want to install from source, then `python setup.py install`.

## Requirements

## Documentation

The [official Socrata API docs](http://dev.socrata.com/) provide thorough documentation of the available methods, as well as other client libraries. A quick list of eligible domains to use with the API is available [here](https://opendata.socrata.com/dataset/Socrata-Customer-Spotlights/6wk3-4ija).

## Examples

Retrieving data is easy!

>>> from sodapy import Socrata
>>> client = Socrata("sandbox.demo.socrata.com", "FakeAppToken", username="fakeuser@somedomain.com", password="ndKS92mS01msjJKs")

>>> client.get("/resource/nimj-3ivp.json", limit=2)
[{u'geolocation': {u'latitude': u'41.1085', u'needs_recoding': False, u'longitude': u'-117.6135'}, u'version': u'9', u'source': u'nn', u'region': u'Nevada', u'occurred_at': u'2012-09-14T22:38:01', u'number_of_stations': u'15', u'depth': u'7.60', u'magnitude': u'2.7', u'earthquake_id': u'00388610'}, {u'geolocation': {u'latitude': u'34.525', u'needs_recoding': False, u'longitude': u'-118.1527'}, u'version': u'0', u'source': u'ci', u'region': u'Southern California', u'occurred_at': u'2012-09-14T22:14:45', u'number_of_stations': u'35', u'depth': u'10.60', u'magnitude': u'1.5', u'earthquake_id': u'15215753'}]

>>> client.get("/resource/nimj-3ivp.json", where="depth > 300", order="magnitude DESC", exclude_system_fields=False)
[{u'geolocation': {u'latitude': u'-15.563', u'needs_recoding': False, u'longitude': u'-175.6104'}, u'version': u'9', u':updated_at': 1348778988, u'number_of_stations': u'275', u'region': u'Tonga', u':created_meta': u'21484', u'occurred_at': u'2012-09-13T21:16:43', u':id': 132, u'source': u'us', u'depth': u'328.30', u'magnitude': u'4.8', u':meta': u'{\n}', u':updated_meta': u'21484', u'earthquake_id': u'c000cnb5', u':created_at': 1348778988}, {u'geolocation': {u'latitude': u'-23.5123', u'needs_recoding': False, u'longitude': u'-179.1089'}, u'version': u'3', u':updated_at': 1348778988, u'number_of_stations': u'93', u'region': u'south of the Fiji Islands', u':created_meta': u'21484', u'occurred_at': u'2012-09-14T16:14:58', u':id': 32, u'source': u'us', u'depth': u'387.00', u'magnitude': u'4.6', u':meta': u'{\n}', u':updated_meta': u'21484', u'earthquake_id': u'c000cp1z', u':created_at': 1348778988}, {u'geolocation': {u'latitude': u'21.6711', u'needs_recoding': False, u'longitude': u'142.9236'}, u'version': u'C', u':updated_at': 1348778988, u'number_of_stations': u'136', u'region': u'Mariana Islands region', u':created_meta': u'21484', u'occurred_at': u'2012-09-13T11:19:07', u':id': 193, u'source': u'us', u'depth': u'300.70', u'magnitude': u'4.4', u':meta': u'{\n}', u':updated_meta': u'21484', u'earthquake_id': u'c000cmsq', u':created_at': 1348778988}]

Query an individual row by simply appending the row idenifier to the resource endpoint for that dataset.
>>> client.get("/resource/nimj-3ivp/193.json", exclude_system_fields=False)
{u'geolocation': {u'latitude': u'21.6711', u'needs_recoding': False, u'longitude': u'142.9236'}, u'version': u'C', u':updated_at': 1348778988, u'number_of_stations': u'136', u'region': u'Mariana Islands region', u':created_meta': u'21484', u'occurred_at': u'2012-09-13T11:19:07', u':id': 193, u'source': u'us', u'depth': u'300.70', u'magnitude': u'4.4', u':meta': u'{\n}', u':updated_meta': u'21484', u':position': 193, u'earthquake_id': u'c000cmsq', u':created_at': 1348778988}


Create a new dataset.
>>> cllient.get

Create a new row in an existing dataset
>>> data = [{'Delegation': 'AJU', 'Name': 'Alaska', 'Key': 'AL', 'Entity': 'Juneau'}]
>>> client.upsert("/resource/eb9n-hr43.json", data)
{u'Errors': 0, u'Rows Deleted': 0, u'Rows Updated': 0, u'By SID': 0, u'Rows Created': 1, u'By RowIdentifier': 0}

Update/Delete rows in a dataset.
>>> data = [{'Delegation': 'sfa', ':id': 8, 'Name': 'bar', 'Key': 'doo', 'Entity': 'dsfsd'}, {':id': 7, ':deleted': True}]
>>> client.upsert("/resource/eb9n-hr43.json", data)
{u'Errors': 0, u'Rows Deleted': 1, u'Rows Updated': 1, u'By SID': 2, u'Rows Created': 0, u'By RowIdentifier': 0}

Upserts can even be preformed with a csv file.
>>> data = open("upsert_test.csv")
>>> client.update("/resource/eb9n-hr43.json", data)
{u'Errors': 0, u'Rows Deleted': 0, u'Rows Updated': 1, u'By SID': 1, u'Rows Created': 0, u'By RowIdentifier': 0}

The same is true for full replace.
>>> data = open("replace_test.csv")
>>> client.replace("/resource/eb9n-hr43.json", data)
{u'Errors': 0, u'Rows Deleted': 0, u'Rows Updated': 0, u'By SID': 0, u'Rows Created': 12, u'By RowIdentifier': 0}

Delete an individual row.
>>> client.delete("/resource/nimj-3ivp.json", id=2)
<Response [200]>

Delete the entire dataset.
>>> client.delete("/resource/nimj-3ivp.json")
<Response [200]>

Wrap up when you're finished.
>>> client.close()
1 change: 1 addition & 0 deletions requirements.txt
@@ -0,0 +1 @@
requests==2.5.1
3 changes: 3 additions & 0 deletions runtests
@@ -0,0 +1,3 @@
#!/bin/bash

PYTHONPATH=. exec py.test "$@"
34 changes: 34 additions & 0 deletions setup.py
@@ -0,0 +1,34 @@
#!/usr/bin/env python

import setuptools
from distutils.core import setup

execfile('sodapy/version.py')

with open('requirements.txt') as requirements:
required = requirements.read().splitlines()

kwargs = {
"name": "sodapy",
"version": str(__version__),
"packages": ["sodapy"]
"description": "Python bindings for the Socrata Open Data API",
"long_description": open("README").read(),
"author": "Cristina Munoz",
"maintainer": "Cristina Munoz",
"author_email": "hi@xmunoz.com",
"maintainer_email": "hi@xmunoz.com",
"license": "Apache",
"install_requires": required,
"url": "https://github.com/xmunoz/sodapy",
"download_url": "https://github.com/xmunoz/sodapy/archive/master.tar.gz",
"classifiers": [
"Programming Language :: Python",
"Topic :: Software Development",
"Topic :: Software Development :: Libraries",
"Topic :: Software Development :: Libraries :: Python Modules",
]
}

setup(**kwargs)

219 changes: 219 additions & 0 deletions sodapy/__init__.py
@@ -0,0 +1,219 @@
import requests
from cStringIO import StringIO
import csv
import json

from .constants import MAX_LIMIT

__author__ = "Cristina Munoz <hi@xmunoz.com>"
from version import __version__, version_info

class Socrata(object):
def __init__(self, domain, app_token, username=None, password=None, access_token=None,
session_adapter=None):
'''
The required arguments are:
domain: the domain you wish you to access
app_token: your Socrata application token
Simple requests are possible without an app_token, though these requests will be rate-
limited.
For write/update/delete operations or private datasets, the Socrata API currently supports
basic HTTP authentication, which requires these additional parameters.
username: your Socrata username
password: your Socrata password
The basic HTTP authentication comes with a deprecation warning, and the current
recommended authentication method is OAuth 2.0. To make requests on behalf of the user
using OAuth 2.0 authentication, follow the recommended procedure and provide the final
access_token to the client.
More information about authentication can be found in the official docs:
http://dev.socrata.com/docs/authentication.html
'''
if not domain:
raise Exception("A domain is required.")
self.domain = domain

# set up the session with proper authentication crendentials
self.session = requests.Session()
if not app_token:
print "Warning: requests made without an app_token will be subject to strict"
" throttling limits."
else:
self.session.headers.update({"X-App-token": app_token})

self.authentication_validation(username, password, access_token)

# use either basic HTTP auth or OAuth2.0
if username and password:
self.session.auth = (username, password)
elif access_token:
self.session.headers.update({"Authorization": "OAuth {}".format(access_token)})

if session_adapter:
self.session.mount(session_adapter["prefix"], session_adapter["adapter"])
self.uri_prefix = session_adapter["prefix"]
else:
self.uri_prefix = "https"


def authentication_validation(self, username, password, access_token):
'''
Only accept one form of authentication.
'''
if bool(username) != bool(password):
raise Exception("Basic authentication requires a username AND password.")
if (username and access_token) or (password and access_token):
raise Exception("Cannot use both Basic Authentication and OAuth 2.0. Please use only"
" one authentication method.")


def create(self, file_object):
raise NotImplementedError()

def get(self, resource, **kwargs):
'''
Read data from the requested resource. Optionally, specify a keyword arg to filter results:
select : the set of columns to be returned, defaults to *
where : filters the rows to be returned, defaults to limit
order : specifies the order of results
group : column to group results on
limit : max number of results to return, defaults to 1000
offset : offset, used for paging. Defaults to 0
q : performs a full text search for a value
exclude_system_fields : defaults to true. If set to false, the response will include
system fields (:id, :created_at, and :updated_at)
More information about the SoQL parameters can be found at the official docs:
http://dev.socrata.com/docs/queries.html
More information about system fields can be found here:
http://dev.socrata.com/docs/system-fields.html
'''
headers = _clear_empty_values({"Accept": kwargs.pop("format", None)})

params = {
"$select" : kwargs.pop("select", None),
"$where" : kwargs.pop("where", None),
"$order" : kwargs.pop("order", None),
"$group" : kwargs.pop("group", None),
"$limit" : kwargs.pop("limit", None) ,
"$offset" : kwargs.pop("offset", None),
"$q" : kwargs.pop("q", None),
"$$exclude_system_fields" : kwargs.pop("exclude_system_fields", None)
}

params.update(kwargs)
params = _clear_empty_values(params)

if params.get("$limit") and params["$limit"] > MAX_LIMIT:
raise Exception("Max limit exceeded! {} is greater than the Socrata API limit of {}. "
"More information on the official API docs: http://dev.socrata.com/docs/paging.html"
.format(params["$limit"], MAX_LIMIT))

response = self._perform_request("get", resource, headers=headers, params=params)
return response


def upsert(self, resource, payload):
'''
Insert, update or delete data to/from an existing dataset. Currently supports json
and csv file objects. See here for the upsert documentation:
http://dev.socrata.com/publishers/upsert.html
'''
return self._perform_update("post", resource, payload)


def replace(self, resource, payload):
'''
Same logic as upsert, but overwrites existing data with the payload using PUT instead of
POST.
'''
return self._perform_update("put", resource, payload)


def _perform_update(self, method, resource, payload):
if isinstance(payload, list):
response = self._perform_request(method, resource, data=json.dumps(payload))
elif isinstance(payload, file):
headers = {
"content-type": "text/csv",
}
response = self._perform_request(method, resource, data=payload, headers=headers)
else:
raise Exception("Unrecognized payload {}. Currently only lists and files are "
"supported.".format(type(payload)))

return response


def delete(self, resource, id=None):
'''
Delete the entire dataset, e.g.
client.delete("/resource/nimj-3ivp.json")
or a single row, e.g.
client.delete("/resource/nimj-3ivp.json", id=4)
'''
if id:
base, content_type = resource.rsplit(".", 1)
delete_uri = "{}/{}.{}".format(base, id, content_type)
else:
delete_uri = resource.replace("resource", "api/views")

return self._perform_request("delete", delete_uri)

@property
def response_formats(self):
return set(["application/json; charset=utf-8", "text/csv; charset=utf-8",
"application/rdf+xml"])

def unaunthorized(self):
pass

def _perform_request(self, request_type, resource, **kwargs):
'''
Utility method that performs all requests.
'''
request_type_methods = set(["get", "post", "put", "delete"])
if request_type not in request_type_methods:
raise Exception("Unknown request type. Supported request types are: {}".format(", ".join(request_type_methods)))

uri = "{}://{}{}".format(self.uri_prefix, self.domain, resource)

# set a timeout, just to be safe
kwargs["timeout"] = 10

response = getattr(self.session, request_type)(uri, **kwargs)

# handle errors
if response.status_code not in (200, 202):
# TODO: handle this better
print response.json()
response.raise_for_status()

# deletes have no content body, simple return the whole response
if request_type == "delete":
return response

# for other request types, analyze the contents to return most useful data
content_type = response.headers.get('content-type').strip().lower()
if content_type == "application/json; charset=utf-8":
return response.json()
elif content_type == "text/csv; charset=utf-8":
csv_stream = StringIO(response.text)
return [line for line in csv.reader(csv_stream)]
elif content_type == "application/rdf+xml; charset=utf-8":
return response.content
else:
raise Exception("Unknown response format: {}".format(content_type))

def close(self):
self.session.close()


def _clear_empty_values(args):
result = {}
for param in args:
if args[param] is not None:
result[param] = args[param]
return result
3 changes: 3 additions & 0 deletions sodapy/constants.py
@@ -0,0 +1,3 @@
DEFAULT_LIMIT = 1000
MAX_LIMIT = 50000
DEFAULT_OFFSET = None
2 changes: 2 additions & 0 deletions sodapy/version.py
@@ -0,0 +1,2 @@
version_info = (0, 1)
__version__ = '.'.join(str(v) for v in version_info)
6 changes: 6 additions & 0 deletions tests/test_soda.py
@@ -0,0 +1,6 @@

from sodapy import Socrata

def test_client():
client = Socrata("something.com", "FakeAppToken")
assert isinstance(client, Socrata)

0 comments on commit a3395cb

Please sign in to comment.