In [1]:
from sklearn import datasets
import thinkplot
import thinkstats2
import pandas as pd
import numpy as np

In [2]:
# 第一種: series has 'index' (0,1,2,3,4,5) and 'value' (0.25, 0.5, 0.75, .... )
series_data = pd.Series([0.25, 0.5, 0.75, 1.0, 0.11, 0.44])
series_data

0    0.25
1    0.50
2    0.75
3    1.00
4    0.11
5    0.44
dtype: float64

In [3]:
print('values: ', series_data.values)
print('index: ', series_data.index)

values:  [0.25 0.5  0.75 1.   0.11 0.44]
index:  RangeIndex(start=0, stop=6, step=1)


In [4]:
# 第二種: random series
random_items = np.random.randint(100, size=(5))
series_data3 = pd.Series(random_items)
series_data3

0    78
1     1
2     7
3    51
4    47
dtype: int32

In [5]:
# random series with defined index
letter_index = ['a', 'b', 'c', 'd', 'e']
new_series_data = pd.Series(random_items, index = letter_index)
new_series_data

a    78
b     1
c     7
d    51
e    47
dtype: int32

In [6]:
# define your own index in a series
series_data2 = pd.Series([0.25, 0.5, 0.75, 1.0, 0.44], index = ['a','b','c','d','e'])
series_data2

a    0.25
b    0.50
c    0.75
d    1.00
e    0.44
dtype: float64

In [7]:
series_data2['b']

0.5

In [8]:
combine = pd.DataFrame({'first_series' : series_data2, 'second_series' : series_data3}) 
combine

Unnamed: 0,first_series,second_series
a,0.25,
b,0.5,
c,0.75,
d,1.0,
e,0.44,
0,,78.0
1,,1.0
2,,7.0
3,,51.0
4,,47.0


In [9]:
# Evaluating Differences in Subgroups
# Effect Size
# CohenEffectSize
def CohenEffectSize(group1, group2):
    """
    Computes Cohen's effect size for two groups.
    
    group1: Series or DataFrame
    group2: Series or DataFrame
    
    returns: float if the arguments are Series;
             Series if the arguments are DataFrames
    """
    diff = group1.mean() - group2.mean()

    var1 = group1.var()
    var2 = group2.var()
    
    n1, n2 = len(group1), len(group2)

    pooled_var = (n1 * var1 + n2 * var2) / (n1 + n2)
    d = diff / np.sqrt(pooled_var)
    return d