In [1]:
import requests
import pandas as pd

from yitian.datasource import DATA_WAREHOUSE_LOC
from yitian.datasource.quandl import QUANDL_API_VERSION, QUANDL_API_HTTPS, QUANDL_API_KEY, COMMODITY
from yitian.datasource.quandl.opec import OPEC_DATABASE_CODE, OPEC_DATASET_CODE

In [2]:
# required parameters
start_date = '2000-01-01'
end_date = '2020-01-01'

# optional parameters
return_format = locals().get("return_format", "json")

In [3]:
call = """
{quan_api_https}/{version}/datasets/{db_name}/{ds_name}?start_date={start_date}&end_date={end_date}&api_key={api_key}
""".format(quan_api_https=QUANDL_API_HTTPS,
           version=QUANDL_API_VERSION,
           db_name=OPEC_DATABASE_CODE,
           ds_name=OPEC_DATASET_CODE,
           return_format=return_format,
           start_date=start_date,
           end_date=end_date,
           api_key=QUANDL_API_KEY)


In [4]:
extraction = requests.get(call.replace("\n", "")).json()

In [6]:
print(extraction['dataset'].keys())

dict_keys(['database_id', 'database_code', 'start_date', 'name', 'description', 'collapse', 'refreshed_at', 'id', 'type', 'column_index', 'dataset_code', 'data', 'column_names', 'newest_available_date', 'end_date', 'frequency', 'transform', 'premium', 'order', 'limit', 'oldest_available_date'])


In [7]:
opec_pd = pd.DataFrame(data=extraction['dataset']['data'],
                       columns=extraction['dataset']['column_names'])

In [8]:
opec_pd.sort_values('Date', ascending=True, inplace=True)

In [13]:
outfile_name = "opec_crude_oil_{frequency}_{start}_{end}".format(frequency=extraction['dataset']['frequency'], start=extraction['dataset']['start_date'], end=extraction['dataset']['end_date'])
output_dir = "{dw_loc}/{commodity}/opec/{outfile_name}.csv".format(dw_loc=DATA_WAREHOUSE_LOC, commodity=COMMODITY, outfile_name=outfile_name)

In [18]:
example_pd = opec_pd.to_csv(output_dir, header=True, mode='w', encoding='utf-8')

In [None]:
opec_pd.head(5)

In [None]:
opec_pd.describe()

In [15]:
opec_pd['Date'].min(), opec_pd['Date'].max()

('2003-01-02', '2019-12-31')

In [21]:
extraction_summary = """
Data Extracted From {name};
Described as: {description};
Start from {start} & End at {end};
Oldest Available on {oa} & Newest Available on {na};
Data Frequency is {frequency};
""".format(
    name=extraction['dataset']['name'],
    description=extraction['dataset']['description'],
    start=extraction['dataset']['start_date'],
    end=extraction['dataset']['end_date'],
    oa=extraction['dataset']['oldest_available_date'],
    na=extraction['dataset']['newest_available_date'],
    frequency=extraction['dataset']['frequency']
)
print(extraction_summary)


Data Extracted From OPEC Crude Oil Price;
Described as: Reference Price for the OPEC Crude Oil Basket.  Currently includes: Saharan Blend (Algeria), Girassol (Angola), Oriente (Ecuador), Iran Heavy (Islamic Republic of Iran), Basra Light (Iraq), Kuwait Export (Kuwait), Es Sider (Libya), Bonny Light (Nigeria), Qatar Marine (Qatar), Arab Light (Saudi Arabia), Murban (UAE) and Merey (Venezuela).;
Start from 2003-01-02 & End at 2020-01-01;
Oldest Available on 2003-01-02 & Newest Available on 2020-02-20;
Data Frequency is daily;

