# Chapter 4: Programming Fundamentals 



Let us begin with simple computations in Python using +,-,* and / operators and functions.

In [4]:
import numpy as np
import pandas as pd
import math


In [14]:
v = np.array([1,33,4,29,9,90])
gmean = (math.prod(v))**(1/len(v))
gmean

12.075621482045438

Commonly used functions like sqrt(),exp(), and log() are made available in the `math` package.

In [21]:
def geomean(v):
    gmean = (math.prod(v))**(1/len(v))
    return(gmean)

In [22]:
x = np.array([1,3,54,9,29,234,6,2,8,2456,2,8])
geomean(x)

13.520702172120803

In [16]:
d = np.array([2,4,63,6,3])
g = 1/d
hmean = len(d)/(np.sum(g))
hmean

3.949843260188088

In [24]:
def harmean(v):
  a = 1/v
  hmean = len(v)/(np.sum(a))
  return(hmean)

In [27]:
v = np.array([2,23,3456,1,4,6])
geomean(v)

12.500396862365385

In [28]:
harmean(v)

3.0605463611219847

In [29]:
v = np.array([4,6,23,6,375,9])
cutoff = 6
vsub = v[v>cutoff]
vsub

array([ 23, 375,   9])

In [31]:
len(vsub)

3

In [34]:
def vecsize(v, cutoff = 1):
  vsub = v[v>cutoff]
  return(len(vsub))

In [37]:
v = np.array([.2,5,2,.6,3,57,34,5])
vecsize(v,10)

2

In [38]:
vecsize(v)

6

In [40]:
vec1 = np.array([2,2,6,3,546,2346,22,34,7,21,4])
cutoff1 = 5
cutoff2 = 100 
vsub = vec1[(vec1>cutoff1) & (vec1<cutoff2)]
vsub

array([ 6, 22, 34,  7, 21])

In [41]:
def intermediate (vec1, cutoff1, cutoff2):
  vsub = vec1[(vec1>cutoff1) & (vec1<cutoff2)]
  return(vsub)

vec1 = np.array([2,2,6,3,546,2346,22,34,7,21,4])
intermediate(vec1,5,100)

array([ 6, 22, 34,  7, 21])

In [42]:
def intermediate (vec1, cutoff1, 
                           cutoff2):
  vsub = vec1[(vec1>cutoff1) & 
             (vec1<cutoff2)]
  return(vsub)

vec1 = np.array(
     [2,2,6,3,546,2346,22,34,7,21,4])
intermediate(vec1,5,100)

array([ 6, 22, 34,  7, 21])

In [27]:
f1 = (student['gender']==0) & (student['prog']==3) & (student['math']>60)
f1

0      False
1      False
2      False
3      False
4      False
       ...  
309    False
310    False
311    False
312    False
313    False
Length: 314, dtype: bool

In [28]:
x3 = student[f1]
print(x3.head())

       id  gender  math  prog  daysabs
88   1089       0    84     3        4
164  2007       0    71     3        0
166  2009       0    71     3        0
168  2011       0    77     3        2
172  2015       0    65     3        1


## Factor Variables

In [29]:
arthritis = pd.read_csv("../data/Arthritis.csv")
arthritis.head()

Unnamed: 0.1,Unnamed: 0,ID,Treatment,Sex,Age,Improved
0,1,57,Treated,Male,27,Some
1,2,46,Treated,Male,29,
2,3,77,Treated,Male,30,
3,4,17,Treated,Male,32,Marked
4,5,36,Treated,Male,46,Marked


In [30]:
arthritis['Sex'].value_counts()

Female    59
Male      25
Name: Sex, dtype: int64

In [44]:
arthritis[['Treatment','Improved']].value_counts().unstack()

Improved,Marked,None,Some
Treatment,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Placebo,7,29,7
Treated,21,13,7


In [32]:
arthritis[['Treatment','Improved']].value_counts(normalize=True).unstack()

Improved,Marked,None,Some
Treatment,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Placebo,0.083333,0.345238,0.083333
Treated,0.25,0.154762,0.083333


In [46]:
arthritis.groupby('Improved')['Treatment'].value_counts(normalize=True).unstack()

Treatment,Placebo,Treated
Improved,Unnamed: 1_level_1,Unnamed: 2_level_1
Marked,0.25,0.75
,0.690476,0.309524
Some,0.5,0.5


In [47]:
arthritis.groupby('Treatment')['Improved'].value_counts(normalize=True).unstack()

Improved,Marked,None,Some
Treatment,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Placebo,0.162791,0.674419,0.162791
Treated,0.512195,0.317073,0.170732


In [49]:
df3 = arthritis[['Sex','Treatment','Improved']].value_counts().unstack()[['None','Some','Marked']]
df3

Unnamed: 0_level_0,Improved,None,Some,Marked
Sex,Treatment,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,Placebo,19.0,7.0,6.0
Female,Treated,6.0,5.0,16.0
Male,Placebo,10.0,,1.0
Male,Treated,7.0,2.0,5.0


In [37]:
#df4 = df3.reset_index(level='Improved')
#display(df4)
df4 = df3.unstack()
df4

Improved,Marked,Marked,None,None,Some,Some
Treatment,Placebo,Treated,Placebo,Treated,Placebo,Treated
Sex,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
Female,6.0,16.0,19.0,6.0,7.0,5.0
Male,1.0,5.0,10.0,7.0,,2.0


## Numeric Variables

In [110]:
whiteside = pd.read_csv('../data/whiteside.csv',index_col=0)
whiteside.head()

Unnamed: 0_level_0,Temp,Gas
Insul,Unnamed: 1_level_1,Unnamed: 2_level_1
Before,-0.8,7.2
Before,-0.7,6.9
Before,0.4,6.4
Before,2.5,6.0
Before,2.9,5.8


In [14]:
whiteside.groupby(['Insul'])['Temp'].agg(np.mean)

Insul
After     4.463333
Before    5.350000
Name: Temp, dtype: float64

In [23]:
whiteside.groupby(['Insul'])['Temp'].mean()

Insul
After     4.463333
Before    5.350000
Name: Temp, dtype: float64