In [1]:
import csv
import numpy as np
import datetime
import time
from core.plot import Plotter2D 
from core.data import Data

In [2]:
def date_to_timestamp(x, format='%Y-%m-%d'):
    return time.mktime(datetime.datetime.strptime(x, format).timetuple())


def to_numeric(x):
    try:
        return float(x)
    except ValueError:
        return np.nan


def read_csv(path, x_index, y_indices, x_convert, y_convert, skip=0):
    xs, ys = [], []

    with open(path) as f:
        reader = iter(csv.reader(f))
        for i in range(skip):
            next(reader)
            
        header = np.take(next(reader), y_indices)
        
        for row in reader:
            xs.append(x_convert(row[x_index]))
            ys.append(map(y_convert, np.take(row, y_indices)))

    xs, ys = np.array(xs), np.array(ys)
    ds = [Data(xs, ys[:, i]) for i in range(ys.shape[1])]

    # Normalise
    for i in range(len(ds)):
        ds[i] = (ds[i] - ds[i].mean) / ds[i].std
        
    return ds, header


def plot_read_csv(ds, header, reset_x=False):
    if reset_x:
        # Force evenly spaced and set x axis to integers
        for d in ds:
            d._evenly_spaced = True
            d.x = np.arange(len(d.x))
    
    p = Plotter2D()
    p.subplot(2, 1, 1)
    p.title('Time Series')
    for i, d in enumerate(ds):
        p.plot(d, label=header[i])
    p.show_legend()
    p.subplot(2, 1, 2)
    p.title('Autocorrelation')
    for i, d in enumerate(ds):
        ac = d.autocorrelation()
        p.plot(ac / ac.max, label=header[i])
    p.show_legend()
    p.show()

In [13]:
plot_read_csv(*read_csv('data/currency.csv', 0, range(4, 16), int, to_numeric, skip=1), reset_x=True)

In [12]:
ds, header = read_csv('data/DCOILBRENTEU.csv', 0, [1], date_to_timestamp, to_numeric)
ds[0] = ds[0].fragment(500, 800)[0]
plot_read_csv(ds, header, reset_x=True)