# Operators and Expressions in Python

## Libraries and settings

In [1]:
# Libraries
import os
import shutil
import json
import numpy as np
import pandas as pd
from tabulate import tabulate

# API credentials for Kaggle
with open('kaggle.json') as f:
    data = json.load(f)

os.environ['KAGGLE_USERNAME'] = data['username']
os.environ['KAGGLE_KEY'] = data['key']

from kaggle.api.kaggle_api_extended import KaggleApi

# Set up Kaggle API on GitHub Codespaces
# source_file = os.path.join(os.getcwd(), 'kaggle.json')
# destination_dir = os.path.join(os.getcwd(), '/home/vscode/.kaggle')
# shutil.copy(source_file, destination_dir)
# file_path = '/home/vscode/.kaggle/kaggle.json'
# os.chmod(file_path, 0o600)

# Ignore warnings
import warnings
warnings.filterwarnings('ignore')

# Show current working directory
print(os.getcwd())


/Users/ivesbrunner/Documents/Studium/01_Bachelor/04_Semester/04_ScientificProgramming/scientific_programming/Week_02/exercises


## Get data from Kaggle via Kaggle's Web API

In [2]:
# Initialize API
api = KaggleApi()
api.authenticate()

# Download file
api.dataset_download_file('vittoriogiatti/bigmacprice','BigmacPrice.csv')

# Read data to pandas data frame
df = pd.read_csv('BigmacPrice.csv', sep=',')
df

Unnamed: 0,date,currency_code,name,local_price,dollar_ex,dollar_price
0,2000-04-01,ARS,Argentina,2.50,1,2.50
1,2000-04-01,AUD,Australia,2.59,1,2.59
2,2000-04-01,BRL,Brazil,2.95,1,2.95
3,2000-04-01,GBP,Britain,1.90,1,1.90
4,2000-04-01,CAD,Canada,2.85,1,2.85
...,...,...,...,...,...,...
1941,2022-07-01,AED,United Arab Emirates,18.00,3,6.00
1942,2022-07-01,USD,United States,5.15,1,5.15
1943,2022-07-01,UYU,Uruguay,255.00,41,6.22
1944,2022-07-01,VES,Venezuela,10.00,5,2.00


## Use Python operators and expressions to create subsets of the data

### Subset based on date & dollar price

In [3]:
# Countries with lower Big Mac prices
df_sub_01 = df.loc[(df['date'] == '2022-07-01') & (df['dollar_price'] <= 2.00)]
print('\nCountries with lower Big Mac prices\n')
print(tabulate(df_sub_01, headers=list(df_sub_01.columns)))

# Countries with higher Big Mac prices
df_sub_02 = df.loc[(df['date'] == '2022-07-01') & (df['dollar_price'] >= 6.00)]
print('\nCountries with higher Big Mac prices\n',)
print(tabulate(df_sub_02, headers=list(df_sub_02.columns)))


Countries with lower Big Mac prices

      date        currency_code    name         local_price    dollar_ex    dollar_price
----  ----------  ---------------  ---------  -------------  -----------  --------------
1880  2022-07-01  BHD              Bahrain             1.6             1            1.6
1909  2022-07-01  KWD              Kuwait              1.3             1            1.3
1920  2022-07-01  OMR              Oman                1.42            1            1.42
1944  2022-07-01  VES              Venezuela          10               5            2

Countries with higher Big Mac prices

      date        currency_code    name                    local_price    dollar_ex    dollar_price
----  ----------  ---------------  --------------------  -------------  -----------  --------------
1877  2022-07-01  AUD              Australia                      6.7             1            6.7
1884  2022-07-01  CAD              Canada                         6.77            1            

### Subset based on country

In [4]:
# Look at the inflation indicated by the development of 'local_price'
df_sub_03 = df.loc[(df['name'] == 'Venezuela')]
df_sub_03.head()

Unnamed: 0,date,currency_code,name,local_price,dollar_ex,dollar_price
88,2002-04-01,VEF,Venezuela,2500.0,857,2.92
120,2003-04-01,VEF,Venezuela,3700.0,1598,2.32
160,2004-05-01,VEF,Venezuela,4400.0,2977,1.48
201,2005-06-01,VEF,Venezuela,5600.0,2629,2.13
233,2006-01-01,VEF,Venezuela,5900.0,2615,2.26


### Subset based on multiple variables and operators

In [16]:
# Note the operators here: == (is equal to), & (and), | (OR)
df_sub_04 = df.loc[((df['date'] == '2002-04-01') | (df['date'] == '2022-07-01')) & ((df['name'] == 'Switzerland'))]
df_sub_04

Unnamed: 0,date,currency_code,name,local_price,dollar_ex,dollar_price,dollar_price_category
83,2002-04-01,CHF,Switzerland,6.3,1,6.3,higher price
1937,2022-07-01,CHF,Switzerland,6.5,1,6.5,higher price


## Use Python operators and expressions to create new variables

### Create a new variable 'dollar_price_category' based on the dollar price


In [6]:
# Here, pandas apply() method is used in combination with the lambda function (we will come back to this later in the module)
df['dollar_price_category'] = df['dollar_price'].apply(lambda x: 'higher price' if x >= 5.00 else 'lower price')
df

Unnamed: 0,date,currency_code,name,local_price,dollar_ex,dollar_price,dollar_price_category
0,2000-04-01,ARS,Argentina,2.50,1,2.50,lower price
1,2000-04-01,AUD,Australia,2.59,1,2.59,lower price
2,2000-04-01,BRL,Brazil,2.95,1,2.95,lower price
3,2000-04-01,GBP,Britain,1.90,1,1.90,lower price
4,2000-04-01,CAD,Canada,2.85,1,2.85,lower price
...,...,...,...,...,...,...,...
1941,2022-07-01,AED,United Arab Emirates,18.00,3,6.00,higher price
1942,2022-07-01,USD,United States,5.15,1,5.15,higher price
1943,2022-07-01,UYU,Uruguay,255.00,41,6.22,higher price
1944,2022-07-01,VES,Venezuela,10.00,5,2.00,lower price


### Create a pivot table with the new variable 'dollar_price_category'  

In [7]:
# Subset only the lates prices
df_sub_05 = df.loc[(df['date'] == '2022-07-01')]

# Create pivot table with mean prices
table = pd.pivot_table(df_sub_05[['name', 'dollar_price', 'dollar_price_category']],
                       index=['dollar_price_category', 'name'],
                       values=['dollar_price'],
                       aggfunc=np.mean)
print(table)

                                 dollar_price
dollar_price_category name                   
higher price          Australia          6.70
                      Canada             6.77
                      Finland            5.25
                      Ireland            5.00
                      Israel             5.67
...                                       ...
lower price           Taiwan             2.59
                      Thailand           3.56
                      Turkey             2.76
                      Venezuela          2.00
                      Vietnam            2.95

[70 rows x 1 columns]


### Task 02: Big Mac price change switzerland

In [17]:
# Note the operators here: == (is equal to), & (and), | (OR)
df_sub_04 = df.loc[((df['date'] == '2002-04-01') | (df['date'] == '2022-07-01')) & ((df['name'] == 'Switzerland'))]
df_sub_04

Unnamed: 0,date,currency_code,name,local_price,dollar_ex,dollar_price,dollar_price_category
83,2002-04-01,CHF,Switzerland,6.3,1,6.3,higher price
1937,2022-07-01,CHF,Switzerland,6.5,1,6.5,higher price


### Task 02: Big Mac price change switzerland compare to USA

In [22]:
# Note the operators here: == (is equal to), & (and), | (OR)
df_sub_04 = df.loc[((df['date'] == '2002-04-01') | (df['date'] == '2022-07-01')) & ((df['name'] == 'Switzerland') | (df['name'] == 'United States'))]
df_sub_04

Unnamed: 0,date,currency_code,name,local_price,dollar_ex,dollar_price,dollar_price_category
83,2002-04-01,CHF,Switzerland,6.3,1,6.3,higher price
87,2002-04-01,USD,United States,2.35,1,2.35,lower price
1937,2022-07-01,CHF,Switzerland,6.5,1,6.5,higher price
1942,2022-07-01,USD,United States,5.15,1,5.15,higher price


### Jupyter notebook --footer info-- (please always provide this at the end of each notebook)

In [18]:
import os
import platform
import socket
from platform import python_version
from datetime import datetime

print('-----------------------------------')
print(os.name.upper())
print(platform.system(), '|', platform.release())
print('Datetime:', datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
print('Python Version:', python_version())
print('-----------------------------------')

-----------------------------------
POSIX
Darwin | 23.2.0
Datetime: 2024-03-06 14:08:00
Python Version: 3.10.13
-----------------------------------
