In [None]:
# ensure required packages are installed in this notebook environment
# link for website: https://www.usitc.gov/applications/dataweb/api/dataweb_query_api.html
%pip install --quiet pandas requests

import pandas as pd      # primary library for managing and manipulating data
import requests
import os  # library for calling API endpoints

In [None]:
token = os.getenv('TRADE_API_KEY', '')  # Get from API tab in DataWeb 
baseUrl = 'https://datawebws.usitc.gov/dataweb'
headers = {
    "Content-Type": "application/json; charset=utf-8", 
    "Authorization": "Bearer " + token
}

requests.packages.urllib3.disable_warnings() 

In [None]:
basicQuery = {
    "savedQueryName":"",
    "savedQueryDesc":"",
    "isOwner":True,
    "runMonthly":False,
    "reportOptions":{
        "tradeType":"Import",
        "classificationSystem":"HTS"
    },
    "searchOptions":{
        "MiscGroup":{
            "districts":{
                "aggregation":"Aggregate District",
                "districtGroups":{
                    "userGroups":[]
                },
                "districts":[],
                "districtsExpanded":
                    [
                        {
                            "name":"All Districts",
                            "value":"all"
                        }
                    ],
                "districtsSelectType":"all"
            },
            "importPrograms":{
                "aggregation":None,
                "importPrograms":[],
                "programsSelectType":"all"
            },
            "extImportPrograms":{
                "aggregation":"Aggregate CSC",
                "extImportPrograms":[],
                "extImportProgramsExpanded":[],
                "programsSelectType":"all"
            },
            "provisionCodes":{
                "aggregation":"Aggregate RPCODE",
                "provisionCodesSelectType":"all",
                "rateProvisionCodes":[],
                "rateProvisionCodesExpanded":[]
            }
        },
        "commodities":{
            "aggregation":"Aggregate Commodities",
            "codeDisplayFormat":"YES",
            "commodities":[],
            "commoditiesExpanded":[],
            "commoditiesManual":"",
            "commodityGroups":{
                "systemGroups":[],
                "userGroups":[]
            },
            "commoditySelectType":"all",
            "granularity":"2",
            "groupGranularity":None,
            "searchGranularity":None
        },
        "componentSettings":{
            "dataToReport":
                [
                    "CONS_FIR_UNIT_QUANT"
                ],
            "scale":"1",
            "timeframeSelectType":"fullYears",
            "years":
                [
                    "2022","2023"
                ],
            "startDate":None,
            "endDate":None,
            "startMonth":None,
            "endMonth":None,
            "yearsTimeline":"Annual"
        },
        "countries":{
            "aggregation":"Aggregate Countries",
            "countries":[],
            "countriesExpanded":
                [
                    {
                        "name":"All Countries",
                        "value":"all"
                    }
                ],
            "countriesSelectType":"all",
            "countryGroups":{
                "systemGroups":[],
                "userGroups":[]
            }
        }
    },
    "sortingAndDataFormat":{
        "DataSort":{
            "columnOrder":[],
            "fullColumnOrder":[],
            "sortOrder":[]
        },
        "reportCustomizations":{
            "exportCombineTables":False,
            "showAllSubtotal":True,
            "subtotalRecords":"",
            "totalRecords":"20000",
            "exportRawData":False
        }
    }
}

In [None]:
requestData = basicQuery

In [None]:
response = requests.get(baseUrl+"/api/v2/savedQuery/getAllSavedQueries", 
                        headers=headers, verify=False)
print(response)

First API Call (Basic Query)

In [None]:
response = requests.post(baseUrl+'/api/v2/report2/runReport', 
                         headers=headers, json=requestData, verify=False)
print(response)

In [None]:
columns = []
columns.append(response.json()['dto']['tables'][0]['column_groups'][0]['columns'][0]['label'])
columns.append(response.json()['dto']['tables'][0]['column_groups'][1]['columns'][0]['label'])
columns.append(response.json()['dto']['tables'][0]['column_groups'][1]['columns'][1]['label'])

data = [[x[0]['value'], x[1]['value'], x[2]['value']] for x in [x['rowEntries'] 
                        for x in response.json()['dto']['tables'][0]['row_groups'][0]['rowsNew']]]

df = pd.DataFrame(data, columns = columns)

df.head() # Shows first 5 rows in table

How to run a proper complex query like the website

In [None]:
# make a copy of the basic query to modify for TFCS example
tfcsExampleQuery = basicQuery.copy()

In [None]:
# change trade type to Import
tfcsExampleQuery['reportOptions']['tradeType'] = 'Import'

In [None]:
# change classification system to TFCS
tfcsExampleQuery['reportOptions']['classificationSystem'] = 'HTS'
# run the TFCS example query
response = requests.post(baseUrl+"/api/v2/report2/runReport", 
                         headers=headers, json=tfcsExampleQuery, verify=False)
response

In [None]:
# printing funciton for responses: example for testing
def getData(dataGroups):
    data = []
    for row in dataGroups:
        rowData = []
        for field in row['rowEntries']:
            rowData.append(field['value'])
        data.append(rowData)
    return data

def getColumns(columnGroups, prevCols = None):
    if prevCols is None:
        columns = []
    else:
        columns = prevCols
    for group in columnGroups:
        if isinstance(group, dict) and 'columns' in group.keys():
            getColumns(group['columns'], columns)
        elif isinstance(group, dict) and 'label' in group.keys():
            columns.append(group['label'])
        elif isinstance(group, list):
            getColumns(group, columns)
    return columns

def printQueryResults(headers, requestData):
    response = requests.post(baseUrl+"/api/v2/report2/runReport", 
                            headers=headers, json=requestData, verify=False)

    columns = getColumns(response.json()['dto']['tables'][0]['column_groups'])

    data = getData(response.json()['dto']['tables'][0]['row_groups'][0]['rowsNew'])

    df = pd.DataFrame(data, columns = columns)

    return df

In [None]:
printQueryResults(headers, requestData).head()

Step 2: Data and Years

In [None]:
timeFrameExample = basicQuery.copy()

In [None]:
# modify timeframe settings
# To select entire years, set timeframeSelectType to fullYears and provide the list of years in years. Setting yearsTimeline to Annual will aggregate the data by calendar year.
timeFrameExample['searchOptions']['componentSettings']['timeframeSelectType'] = 'fullYears'
timeFrameExample['searchOptions']['componentSettings']['years'] = ['2020', '2021', '2022', '2023']
timeFrameExample['searchOptions']['componentSettings']['yearsTimeline'] = 'Annual'

In [None]:
# print results for modified timeframe example
timeFrameExample['searchOptions']['componentSettings']['timeframeSelectType'] = 'fullYears'
timeFrameExample['searchOptions']['componentSettings']['years'] = ['2023']
timeFrameExample['searchOptions']['componentSettings']['yearsTimeline'] = 'Monthly'

In [None]:
# To select a specific date range, set timeframeSelectType to specificDateRange and provide the startDate and endDate in MM/YYYY format. Setting yearsTimeline to Monthly will aggregate the data by month.
timeFrameExample['searchOptions']['componentSettings']['startDate'] = '06/2022'
timeFrameExample['searchOptions']['componentSettings']['endDate'] = '10/2023'
timeFrameExample['searchOptions']['componentSettings']['timeframeSelectType'] = 'specificDateRange'
timeFrameExample['searchOptions']['componentSettings']['yearsTimeline'] = 'Monthly'

In [None]:
# print results for modified timeframe example
printQueryResults(headers, timeFrameExample).head()

# Step 3: Countries
# Countries can be specified in the query in a couple of different ways. 
# The first is to select individual countries manually, but you can also select specific country groups that are managed by the Dataweb application or groups that you saved to your Dataweb user account.

In [None]:
countriesExampleQuery = basicQuery.copy()

In [None]:
# get list of all countries and choose them manually
response = requests.get(baseUrl+"/api/v2/country/getAllCountries", 
                         headers=headers, verify=False)
df = pd.DataFrame(response.json()['options'])
df.head()

# select specific countries
countries = []
countries.append(response.json()['options'][1])
countries.append(response.json()['options'][4])

# Add selected countries to the query
countriesExampleQuery['searchOptions']['countries']['countries'] = [x['value'] for x in countries]

# get list of all user country groups
response = requests.get(baseUrl+"/api/v2/country/getAllUserGroupsWithCountries", 
                         headers=headers, verify=False)
response.json()

In [None]:
# or instead you can you a user country group
response = requests.get(baseUrl+"/api/v2/country/getAllUserGroupsWithCountries", 
                         headers=headers, verify=False)
response.json()

countryGroups = []
countryGroups.append(response.json['options'][0])
countryGroups.append(response.json['options'][1])
countryGroups

countriesExampleQuery['searchOptions']['countries']['countryGroups'] = [x['value'] for x in countryGroups]

printQueryResults(headers, countriesExampleQuery).head()

In [None]:

# Step 4: Commodities
commoditiesExampleQuery = basicQuery.copy()
# select specific commodities
options = {'tradeType': "Import", 'classificationSystem': "HTS", 'timeframesSelectedTab': "fullYears"}
# get list of all commodities
response = requests.post(baseUrl+"/api/v2/commodity/getAllSystemGroupsWithCommodities", 
                         headers=headers, json=options, verify=False)
response.json()

In [None]:
# If you have user-defined commodity groups, you can retrieve them as follows (there are no system groups):
response = requests.post(baseUrl+"/api/v2/commodity/getAllUserGroupsWithCommodities", 
                         headers=headers, json=options, verify=False)

In [None]:
# or instead you can you a user commodity group
commodityGroups = []
commodityGroups.append(response.json()['options'][0])
# and add it to the query

commoditiesExampleQuery['searchOptions']['commodities']['commodityGroups'] = [x['value'] for x in commodityGroups]

printQueryResults(headers, commoditiesExampleQuery).head()