# Welcome!

### Let's render a nice word document to skim some samples from our dataset


If you need, make sure to install the main package we'll be using:
```
! pip install python-docx
```



In [3]:
# @title Imports

# we'l use pandas locally in python to handle the spreadsheet
import pandas as pd

# this is the important one!
from docx import Document

# we'll first try this out on an existing dataset, and then apply it to an imported spreadsheet
from sklearn.datasets import load_iris

In [4]:
# @title Let's start with making a formatted document to browse some classic tabular tada
iris = load_iris()
iris_data = pd.DataFrame(data=iris.data, columns=iris.feature_names)

In [5]:
# create Document object
doc = Document()

# add heading
doc.add_heading('Iris Dataset', 0)  # 0 header is the highest-level header

# format the word document
for i, (index, row) in enumerate(iris_data.iterrows(), start=1):
    # each sample has its own numbered heading
    doc.add_heading(f'Iris Sample #{i}', level=1)

    # for the 4 features, make a 4-lined bullet-point list
    doc.add_paragraph(f'Sepal Length: {row["sepal length (cm)"]}', style='List Bullet')
    doc.add_paragraph(f'Sepal Width: {row["sepal width (cm)"]}', style='List Bullet')
    doc.add_paragraph(f'Petal Length: {row["petal length (cm)"]}', style='List Bullet')
    doc.add_paragraph(f'Petal Width: {row["petal width (cm)"]}', style='List Bullet')

    # add a blank for aesthetic and for easy skimming of the attributes of this particular sample
    doc.add_paragraph('')

In [6]:
# save the file
filename = 'iris_data_skim-able.docx'
doc.save(filename)

In [9]:
# @title Now, let's make a general implementation to make a similar format for an arbitrary spreadsheet

# configurations
SPREADSHEET_FILEPATH = '/content/top_attractions.csv' # put the path to your spreadhseet here
DOCX_FILEPATH = 'attractions_skimmable.docx'  # choose a destination docx filepath
SAMPLE_TYPE = 'attraction'  # In this example, each sample is a tourist attraction vacancy

In [None]:
# if the file is in csv format:
data = pd.read_csv(SPREADSHEET_FILEPATH)

# if excel:
# data = pd.read_excel(SPREADSHEET_FILEPATH)

# create Document object and define a custom header for the document
doc = Document()
doc.add_heading(f'Readbale format of {SPREADSHEET_FILEPATH}', 0)

# iterate through the rows / samples
for index, row in data.iterrows():
    # give each sample a custom heading, numbered
    doc.add_heading(f'{SAMPLE_TYPE} #{index}', level=1)

    # iterate through all the columns for each sample
    for col, val in row.items():
        p = doc.add_paragraph(style='List Bullet')

        # the column name is bolded, and is followed by a ':'
        p.add_run(f'{col}: ').bold = True

        # display the column value after the column name and ':'
        p.add_run(str(val))

    # add an aesthetic blank line to make skimming easier
    doc.add_paragraph('')

doc.save(DOCX_FILEPATH)

## Thank you for stopping by!