In [1]:
#we're going to use the Requests python module, which is a simple library that supports HTTP. 
# https://docs.python-requests.org/en/latest/
# so let's first import the library

import requests

In [2]:
# the form of the basic call is 

# `requests.get(URL)`

# let's use a simple webpage call to see how it works

requests.get("http://www.example.com/")

<Response [200]>

In [3]:
# without storing the response object in a variable, it just outputs the first attribute listed, 
# which is a http response status code.  in this case, 200 means the request succeeded
# let's run that get request again, but this time store it in a variable

response = requests.get("http://www.example.com/")

In [4]:
# now we can look at the methods available to us using dot notation

# `response. /tab`

# and, let's just select `text` to see what was stored.
response.text

'<!doctype html>\n<html>\n<head>\n    <title>Example Domain</title>\n\n    <meta charset="utf-8" />\n    <meta http-equiv="Content-type" content="text/html; charset=utf-8" />\n    <meta name="viewport" content="width=device-width, initial-scale=1" />\n    <style type="text/css">\n    body {\n        background-color: #f0f0f2;\n        margin: 0;\n        padding: 0;\n        font-family: -apple-system, system-ui, BlinkMacSystemFont, "Segoe UI", "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif;\n        \n    }\n    div {\n        width: 600px;\n        margin: 5em auto;\n        padding: 2em;\n        background-color: #fdfdff;\n        border-radius: 0.5em;\n        box-shadow: 2px 3px 7px 2px rgba(0,0,0,0.02);\n    }\n    a:link, a:visited {\n        color: #38488f;\n        text-decoration: none;\n    }\n    @media (max-width: 700px) {\n        div {\n            margin: 0 auto;\n            width: auto;\n        }\n    }\n    </style>    \n</head>\n\n<body>\n<div>\n    <

In [5]:
# we can look at the headers to discover a little more about the object
response.headers

{'Content-Encoding': 'gzip', 'Accept-Ranges': 'bytes', 'Age': '573005', 'Cache-Control': 'max-age=604800', 'Content-Type': 'text/html; charset=UTF-8', 'Date': 'Sat, 25 Dec 2021 18:08:06 GMT', 'Etag': '"3147526947"', 'Expires': 'Sat, 01 Jan 2022 18:08:06 GMT', 'Last-Modified': 'Thu, 17 Oct 2019 07:18:26 GMT', 'Server': 'ECS (sab/5693)', 'Vary': 'Accept-Encoding', 'X-Cache': 'HIT', 'Content-Length': '648'}

In [6]:
response.content

b'<!doctype html>\n<html>\n<head>\n    <title>Example Domain</title>\n\n    <meta charset="utf-8" />\n    <meta http-equiv="Content-type" content="text/html; charset=utf-8" />\n    <meta name="viewport" content="width=device-width, initial-scale=1" />\n    <style type="text/css">\n    body {\n        background-color: #f0f0f2;\n        margin: 0;\n        padding: 0;\n        font-family: -apple-system, system-ui, BlinkMacSystemFont, "Segoe UI", "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif;\n        \n    }\n    div {\n        width: 600px;\n        margin: 5em auto;\n        padding: 2em;\n        background-color: #fdfdff;\n        border-radius: 0.5em;\n        box-shadow: 2px 3px 7px 2px rgba(0,0,0,0.02);\n    }\n    a:link, a:visited {\n        color: #38488f;\n        text-decoration: none;\n    }\n    @media (max-width: 700px) {\n        div {\n            margin: 0 auto;\n            width: auto;\n        }\n    }\n    </style>    \n</head>\n\n<body>\n<div>\n    

In [8]:
response.url

'http://www.example.com/'

In [7]:
# now that we've used Requests to obtain an unstructured response, let's see how we can use it to 
# request a structured object from an API, specifically the Zenodo API

The Zenodo REST API supports
- Deposit — upload and publishing of research outputs (identical to functionality available in the user interface).
- Records — search published records.
- Files — download/upload of files.

Zenodo's [quickstart guide](https://developers.zenodo.org/#quickstart-upload), which is the first thing on the page when looking at their documentation, describes how to upload, which requires a key.  We want to download, so we need to [jump to that section](https://developers.zenodo.org/#records) to find the URL structure we need to make an API records call.  Actually, the reference is buried a little in the documentation.  This is a good example of why you should familiarize yourself with an API's documentation prior to jumping right in.

The structure is

`GET /api/records/`

so our requests query will be 

In [9]:
response = requests.get("https://zenodo.org/api/records")

In [10]:
# we know from the developer documentation that the call returns a json object, 
# so to see the contents of the object we use
response.json()

{'aggregations': {'access_right': {'buckets': [{'doc_count': 2182876,
     'key': 'open'},
    {'doc_count': 47453, 'key': 'closed'},
    {'doc_count': 6591, 'key': 'restricted'},
    {'doc_count': 1400, 'key': 'embargoed'}],
   'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0},
  'file_type': {'buckets': [{'doc_count': 995551, 'key': 'pdf'},
    {'doc_count': 371754, 'key': 'jpg'},
    {'doc_count': 362714, 'key': 'png'},
    {'doc_count': 231468, 'key': 'html'},
    {'doc_count': 110394, 'key': 'zip'},
    {'doc_count': 26392, 'key': 'xlsx'},
    {'doc_count': 23038, 'key': 'txt'},
    {'doc_count': 19247, 'key': 'docx'},
    {'doc_count': 18875, 'key': 'csv'},
    {'doc_count': 17926, 'key': 'xml'}],
   'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 149107},
  'keywords': {'buckets': [{'doc_count': 906874, 'key': 'Taxonomy'},
    {'doc_count': 905692, 'key': 'Biodiversity'},
    {'doc_count': 566792, 'key': 'Animalia'},
    {'doc_count': 437668, 'key': 'Art

In [11]:
# The data in the object is structured, but can end up being quite large, so let's use a dataframe 
# to help view the content

import pandas as pd

In [12]:
pd.DataFrame(response)

Unnamed: 0,0
0,"b'{""aggregations"":{""access_right"":{""buckets"":[..."
1,"b'591,""key"":""restricted""},{""doc_count"":1400,""k..."
2,"b'e"":{""buckets"":[{""doc_count"":995551,""key"":""pd..."
3,"b'31468,""key"":""html""},{""doc_count"":110394,""key..."
4,"b't"":19247,""key"":""docx""},{""doc_count"":18875,""k..."
...,...
248,"b'"",""type"":""dataset""},""title"":""IDSL.UFA result..."
249,"b'""unique_downloads"":0.0,""unique_views"":0.0,""v..."
250,"b'sion_views"":0.0,""version_volume"":0.0,""views""..."
251,"b'links"":{""next"":""https://zenodo.org/api/recor..."


In [13]:
#  oops, we put the whole response into the dataframe, when we actually just want the JSON structured data

pd.DataFrame(response.json())

Unnamed: 0,aggregations,hits,links
access_right,"{'buckets': [{'doc_count': 2182876, 'key': 'op...",,
file_type,"{'buckets': [{'doc_count': 995551, 'key': 'pdf...",,
keywords,"{'buckets': [{'doc_count': 906874, 'key': 'Tax...",,
type,"{'buckets': [{'doc_count': 1251375, 'key': 'pu...",,
hits,,"[{'conceptdoi': '10.5281/zenodo.5804524', 'con...",
total,,2238320,
next,,,https://zenodo.org/api/records/?sort=mostrecen...
self,,,https://zenodo.org/api/records/?sort=mostrecen...


In [17]:
# talk about difficulty of working with JSON objects

In [14]:
output = response.json()

In [15]:
output.values()

dict_values([{'access_right': {'buckets': [{'doc_count': 2182876, 'key': 'open'}, {'doc_count': 47453, 'key': 'closed'}, {'doc_count': 6591, 'key': 'restricted'}, {'doc_count': 1400, 'key': 'embargoed'}], 'doc_count_error_upper_bound': 0, 'sum_other_doc_count': 0}, 'file_type': {'buckets': [{'doc_count': 995551, 'key': 'pdf'}, {'doc_count': 371754, 'key': 'jpg'}, {'doc_count': 362714, 'key': 'png'}, {'doc_count': 231468, 'key': 'html'}, {'doc_count': 110394, 'key': 'zip'}, {'doc_count': 26392, 'key': 'xlsx'}, {'doc_count': 23038, 'key': 'txt'}, {'doc_count': 19247, 'key': 'docx'}, {'doc_count': 18875, 'key': 'csv'}, {'doc_count': 17926, 'key': 'xml'}], 'doc_count_error_upper_bound': 0, 'sum_other_doc_count': 149107}, 'keywords': {'buckets': [{'doc_count': 906874, 'key': 'Taxonomy'}, {'doc_count': 905692, 'key': 'Biodiversity'}, {'doc_count': 566792, 'key': 'Animalia'}, {'doc_count': 437668, 'key': 'Arthropoda'}, {'doc_count': 298167, 'key': 'Insecta'}, {'doc_count': 270753, 'key': 'Ter

In [18]:
 output['aggregations']

{'access_right': {'buckets': [{'doc_count': 2182876, 'key': 'open'},
   {'doc_count': 47453, 'key': 'closed'},
   {'doc_count': 6591, 'key': 'restricted'},
   {'doc_count': 1400, 'key': 'embargoed'}],
  'doc_count_error_upper_bound': 0,
  'sum_other_doc_count': 0},
 'file_type': {'buckets': [{'doc_count': 995551, 'key': 'pdf'},
   {'doc_count': 371754, 'key': 'jpg'},
   {'doc_count': 362714, 'key': 'png'},
   {'doc_count': 231468, 'key': 'html'},
   {'doc_count': 110394, 'key': 'zip'},
   {'doc_count': 26392, 'key': 'xlsx'},
   {'doc_count': 23038, 'key': 'txt'},
   {'doc_count': 19247, 'key': 'docx'},
   {'doc_count': 18875, 'key': 'csv'},
   {'doc_count': 17926, 'key': 'xml'}],
  'doc_count_error_upper_bound': 0,
  'sum_other_doc_count': 149107},
 'keywords': {'buckets': [{'doc_count': 906874, 'key': 'Taxonomy'},
   {'doc_count': 905692, 'key': 'Biodiversity'},
   {'doc_count': 566792, 'key': 'Animalia'},
   {'doc_count': 437668, 'key': 'Arthropoda'},
   {'doc_count': 298167, 'key': 

In [24]:
# we're going to use a package to flatten the json, so we can work with it.
# https://anaconda.org/conda-forge/flatten_json
# it should have already been installed.  The other libraries we use can be installed using the
# anaconda interface, but we need to install this one using
# `conda install -c conda-forge flatten_json`
# from the command line

from flatten_json import flatten