In [27]:
# Make sure this runs ok before proceeding.
# If you get an import error, do `pip install ckanapi`
import ckanapi

In [28]:
API_KEY = r'put your API key here'
SERVER = r'http://data2.vta.org'
USER_AGENT = r'CKAN SHP Uploader'
ckan_inst = ckanapi.RemoteCKAN(
            SERVER,
            apikey=API_KEY,
            user_agent=USER_AGENT
        )

In [7]:
SAMPLE_FILE_1a = 'samples/Sacramentorealestatetransactions_a.csv'
SAMPLE_FILE_1b = 'samples/Sacramentorealestatetransactions_b.csv'
SAMPLE_FILE_2 = 'samples/SalesJan2009.csv'

import os.path
def check_preview_file(filename):
    # check that the file exists and
    # preview what we're going to upload
    if os.path.isfile(filename):
        with open(filename) as f:
            head = [next(f) for x in iter(range(5))]
        print(head)
    else:
        print('file not found')

check_preview_file(SAMPLE_FILE_1a)

['street,city,zip,state,beds,baths,sq__ft,type,sale_date,price,latitude,longitude\n', '3526 HIGH ST,SACRAMENTO,95838,CA,2,1,836,Residential,Wed May 21 00:00:00 EDT 2008,59222,38.631913,-121.434879\n', '51 OMAHA CT,SACRAMENTO,95823,CA,3,1,1167,Residential,Wed May 21 00:00:00 EDT 2008,68212,38.478902,-121.431028\n', '2796 BRANCH ST,SACRAMENTO,95815,CA,2,1,796,Residential,Wed May 21 00:00:00 EDT 2008,68880,38.618305,-121.443839\n', '2805 JANETTE WAY,SACRAMENTO,95815,CA,2,1,852,Residential,Wed May 21 00:00:00 EDT 2008,69307,38.616835,-121.439146\n']


In [25]:
def create_dataset(dataset_name, dataset_title, owner_org='vta'):
    """
    Create a dataset with an associated resource
    """
    try:
        ckan_inst.action.package_create(
                name=dataset_name,
                title=dataset_title,
                owner_org=owner_org)
    except ckanapi.ValidationError as ex:
        print(ex)
    except ckanapi.NotAuthorized as ex:
        print('access denied. Is your API key valid?')
        print(ex)
        return
    print('done')

def add_resource_to_dataset(package_id, filepath, name=None, url='dummy-value', data_format='csv'):
    """
    Create a dataset with an associated resource
    """
    if name is None:
        name = os.path.basename(filepath)
    try:
        print('uploading...')
        res = ckan_inst.action.resource_create(
            package_id=package_id,
            name=name,
            upload=open(filepath, 'rb'),
            url=url,
            format=data_format)
        print('done')
        return res
    except ckanapi.ValidationError as ex:
        print(ex)
    except ckanapi.NotAuthorized as ex:
        print('access denied. Is your API key valid?')
        print(ex)
        return
    print('done')

    
def update_resource(dataset_title, filepath, owner_org='vta', name=None, url='dummy-value', data_format='csv'):
    """
    For this to work, the resource names should be unique (this is not enforced).
    If the names are not unique, only the last one with the same name will be updated.
    
    http://docs.ckan.org/en/latest/api/index.html#ckan.logic.action.update.resource_update
    """
    # run a SOLR search for the package
    # http://data2.vta.org/api/3/action/package_search?q=&fq=title:ins_sample%20AND%20organization:city-of-san-jose
    solr_query = 'title:{0} AND organization:{1}'.format(dataset_title, owner_org)
    res = ckan_inst.action.package_search(q=solr_query)
    if res.get('count') is not 1:
        print('could not find the requested dataset; dataset title and organization not specific enough')
        return
    
    print('looking for file "{0}" inside the "{1}" dataset'.format( name, dataset_title))
    resource_id = None
    for r in res.get('results')[0].get('resources'):
        print (str(r.get('name'))+' : '+str(r.get('id')))
        if str(r.get('name')) == str(name):
            resource_id = r.get('id')
    
    if resource_id is None:
        print('could not find the requested resource')
        return
    else:
        print('found resource id "{0}"'.format(resource_id))
    
    print('uploading...')
    try:
        res = ckan_inst.action.resource_update(
            id=resource_id,
            name=name,
            upload=open(filepath, 'rb'),
            url=url,
            format=data_format)
        print('done')
        return res
    except ckanapi.ValidationError as ex:
        print(ex)
    except ckanapi.NotAuthorized as ex:
        print('access denied. Is your API key valid?')
        print(ex)
        return
    print('done')


    
def purge_dataset(dataset_id):
    """
    WARNING: cannot be undone
    This frees up the URL of the resource
    """
    try:
        ckan_inst.call_action('dataset_purge', {'id': dataset_id})
    except ckanapi.ValidationError as ex:
        print(ex)
    except ckanapi.NotAuthorized as ex:
        print('access denied. Is your API key valid?')
        print(ex)
        return

    
def delete_dataset(dataset_id):
    """
    Delete a dataset
    """
    try:
        ckan_inst.action.package_delete(id=dataset_id)
    except ckanapi.ValidationError as ex:
        print(ex)
    except ckanapi.NotAuthorized as ex:
        print('access denied. Is your API key valid?')
        print(ex)
        return

In [188]:
create_dataset('ins_sample', 'ins sample', owner_org='city-of-san-jose')

done


In [189]:
# upload the contents of SAMPLE_FILE_1a to a resource named SAMPLE_FILE_1a under the 'ins_sample' dataset
add_resource_to_dataset('ins_sample', SAMPLE_FILE_1b, name=os.path.basename(SAMPLE_FILE_1a))

done


In [24]:
# now we replace the contents of the resource named SAMPLE_FILE_1a with the contents of the file in SAMPLE_FILE_1b
update_resource('ins_sample', filepath=SAMPLE_FILE_1b, name=os.path.basename(SAMPLE_FILE_1a), owner_org='city-of-san-jose')

looking for file "Sacramentorealestatetransactions_a.csv" inside the "ins_sample" dataset
Sacramentorealestatetransactions_a.csv : ecc12dca-d81a-4ee8-9e99-f0903cda095e
found resource id "ecc12dca-d81a-4ee8-9e99-f0903cda095e"
uploading...
done


{'cache_last_updated': None,
 'cache_url': None,
 'created': '2016-09-25T05:22:57.985253',
 'datastore_active': False,
 'description': '',
 'format': 'CSV',
 'hash': '',
 'id': 'ecc12dca-d81a-4ee8-9e99-f0903cda095e',
 'last_modified': '2016-09-26T02:03:30.078056',
 'mimetype': None,
 'mimetype_inner': None,
 'name': 'Sacramentorealestatetransactions_a.csv',
 'package_id': 'ccd74d26-6c3d-42f1-a93d-b00c9cfaa207',
 'position': 0,
 'resource_type': None,
 'revision_id': 'f81869c7-27cc-4525-84ae-6cbd47882fee',
 'size': None,
 'state': 'active',
 'url': 'http://52.42.208.85/dataset/ccd74d26-6c3d-42f1-a93d-b00c9cfaa207/resource/ecc12dca-d81a-4ee8-9e99-f0903cda095e/download/sacramentorealestatetransactionsb.csv',
 'url_type': 'upload',
 'webstore_last_updated': None,
 'webstore_url': None}

In [26]:
# Run these if you want to clear and start over
#delete_dataset('ins_sample')
#purge_dataset('ins_sample')