# Open Source in Quantitative Finance 2015 (Frankfurt/Eschborn)

# Quandl WIKI dataset - A first look at the data

In [None]:
import numpy as np
import pandas as pd

In [None]:
# TODO: adjust directory
# Download file from: https://www.quandl.com/api/v1/datasets/WIKI/MSFT.csv and save as MSFT.csv
# Alternatively, load data directly via quandl package
data_dir = '/Users/Felix/quandl_wiki/'

In [None]:
head = pd.read_csv(data_dir + 'MSFT.csv', nrows=20, index_col=0, parse_dates=True)
head

## qgrid (by Quantopian)

In [None]:
import qgrid
qgrid.nbinstall()
qgrid.show_grid(head)

# Let's have a second look...this time in Excel

## By default, `pandas` uses
* ### `XlsxWriter` to write `.xlsx`
* ### `openpyxl` to write `.xlsm`
* ### `xlwt` to write `.xls`

In [None]:
head.to_excel('pandas_out.xlsx', 'Sheet1')

In [None]:
!open pandas_out.xlsx  # on Windows: !start pandas_out.xlsx

## Manipulating existing Excel sheets is ... limited

* ### Openpyxl can do it, but not everything is supported (e.g. charts are not)

In [None]:
from openpyxl import load_workbook

writer = pd.ExcelWriter('pandas_out.xlsx', engine='openpyxl')
writer.book = load_workbook('pandas_out.xlsx')
head.to_excel(writer, 'Sheet2')

writer.save()

In [None]:
!open pandas_out.xlsx  # on Windows: !start pandas_out.xlsx

# `xlwings`: interacts with an open/unsaved Workbook
* ### Windows: by wrapping `pywin32` (COM interface)
* ### Mac: by wrapping `appscript` (AppleScript)
* ### In turn, xlwings needs an installation of Microsoft Excel

In [None]:
from xlwings import Application, Workbook, Range, Sheet, Chart, ChartType

In [None]:
wb = Workbook()
Range("A1").value = head
Sheet(1).autofit()

In [None]:
Sheet.add('Sheet2')
Range('Sheet2', 'B2').value = head
Sheet(2).autofit()

### 2d arrays: nested lists

In [None]:
# A1 Notation
Sheet(1).activate()
Range('Sheet1', 'A1:B2').value

In [None]:
# The same with Index notation (Excel-1-based)
Range(1, (1,1),(2,2)).value

In [None]:
# Get a contiguous Range of cells (as in: "Ctrl-Shift-Down-and-Right")
out = Range(1, 'K2').table.value
out

In [None]:
# Assign to top-left corner
Range(1, 'B25').value = out

### `table` returns a `Range` object, so we can use any `Range` attribute/method on it:

In [None]:
Range('B25').table.clear_contents()

### `NumPy` Arrays

In [None]:
Range(1, 'K2', asarray=True).table.value

### `pandas` DataFrames

In [None]:
data = Range('A1').table.value
df = pd.DataFrame(data[1:], columns=data[0])
df

# Let's do some `pandas` magic

In [None]:
df = pd.read_csv(data_dir + 'MSFT.csv', index_col=0, parse_dates=True)
df.head()

### Let's create a "behavior" table: Adj. Volume by month x weekday

In [None]:
grouped = df.groupby([df.index.month, df.index.weekday])
behavior = grouped['Adj. Volume'].aggregate(np.sum).unstack()
behavior.columns = ['MO', 'TU', 'WE', 'TH', 'FR']
behavior.index = ['JAN', 'FEB', 'MAR', 'APR', 'MAY', 'JUN', 'JUL', 'AUG', 'SEP', 'OCT', 'NOV', 'DEC']
behavior

### Now let's use Excel to create a heatmap

In [None]:
wb_heatmap = Workbook()
Range('A1').value = behavior
Range('A:A').number_format = 'HH:MM'
Sheet(1).autofit()

## Let's add a Chart, too

In [None]:
chart = Chart.add(source_data=Range('A1').table,
                  chart_type=ChartType.xlLine)

### Manipulate attributes

In [None]:
chart.name

In [None]:
chart.name = 'AdjVolume'
chart.name

# One more thing: PDF Reporting with ReportLab

In [None]:
from reportlab.platypus import SimpleDocTemplate, Table

# reportlab initialization with container for Flowables
doc = SimpleDocTemplate("report_basics.pdf")
elements = []

# Create reportlab table from Excel data
data = Range('B1').table.value
table = Table(data)

# Compose content and write PDF document
elements.append(table)
doc.build(elements)

In [None]:
!open report_basics.pdf