In [4]:
# pd.Series is equivalent to numpy.array

import pandas as pd
import numpy as np

# Creating an empty series, will result in DeprecationWarning
#series = pd.Series()

# Passing dtype as a parameter to Series for an empty series to avoid DeprecationWarning
# Creating an empty series
series = pd.Series(dtype='float64')
# Newline to separate series print statements
print('{}\n'.format(series))
series = pd.Series(5)
print('{}\n'.format(series))
series = pd.Series([1, 2, 3])
print('{}\n'.format(series))

series = pd.Series([1, 2.2]) # upcasting
print('{}\n'.format(series))

arr = np.array([1, 2])
series = pd.Series(arr, dtype=np.float32)
print('{}\n'.format(series))

series = pd.Series([[1, 2], [3, 4]])
print('{}\n'.format(series))


Series([], dtype: float64)

0    5
dtype: int64

0    1
1    2
2    3
dtype: int64

0    1.0
1    2.2
dtype: float64

0    1.0
1    2.0
dtype: float32

0    [1, 2]
1    [3, 4]
dtype: object



In [5]:
# Set a custom index

series = pd.Series([1, 2, 3], index=['a', 'b', 'c'])
print('{}\n'.format(series))

series = pd.Series([1, 2, 3], index=['a', 8, 0.3])
print('{}\n'.format(series))

a    1
b    2
c    3
dtype: int64

a      1
8      2
0.3    3
dtype: int64



In [7]:
# You can also use a dictionary

series = pd.Series({'a':1, 'b':2, 'c':3})
print('{}\n'.format(series))

series = pd.Series({'b':2, 'a':1, 'c':3})
print('{}\n'.format(series))

a    1
b    2
c    3
dtype: int64

b    2
a    1
c    3
dtype: int64



In [8]:
df = pd.DataFrame()
# Newline added to separate DataFrames
print('{}\n'.format(df))

df = pd.DataFrame([5, 6])
print('{}\n'.format(df))

df = pd.DataFrame([[5,6]])
print('{}\n'.format(df))

df = pd.DataFrame([[5, 6], [1, 3]],
                  index=['r1', 'r2'],
                  columns=['c1', 'c2'])
print('{}\n'.format(df))

df = pd.DataFrame({'c1': [1, 2], 'c2': [3, 4]},
                  index=['r1', 'r2'])
print('{}\n'.format(df))

Empty DataFrame
Columns: []
Index: []

   0
0  5
1  6

   0  1
0  5  6

    c1  c2
r1   5   6
r2   1   3

    c1  c2
r1   1   3
r2   2   4



In [9]:
upcast = pd.DataFrame([[5, 6], [1.2, 3]])
print('{}\n'.format(upcast))
# Datatypes of each column
print(upcast.dtypes)

     0  1
0  5.0  6
1  1.2  3

0    float64
1      int64
dtype: object


In [32]:
df = pd.DataFrame([[5, 6], [1.2, 3]])
ser = pd.Series([0, 0], name='r3')

print(pd.concat(
    [df, pd.DataFrame([ser])]
))

df2 = pd.DataFrame([[0,0],[9,9]])
df_app = pd.concat([df, df2])
print('{}\n'.format(df_app))

      0  1
0   5.0  6
1   1.2  3
r3  0.0  0
     0  1
0  5.0  6
1  1.2  3
0  0.0  0
1  9.0  9



In [33]:
df = pd.DataFrame({'c1': [1, 2], 'c2': [3, 4],
                   'c3': [5, 6]},
                  index=['r1', 'r2'])
# Drop row r1
df_drop = df.drop(labels='r1')
print('{}\n'.format(df_drop))

# Drop columns c1, c3
df_drop = df.drop(labels=['c1', 'c3'], axis=1)
print('{}\n'.format(df_drop))

df_drop = df.drop(index='r2')
print('{}\n'.format(df_drop))

df_drop = df.drop(columns='c2')
print('{}\n'.format(df_drop))

df.drop(index='r2', columns='c2')
print('{}\n'.format(df_drop))

    c1  c2  c3
r2   2   4   6

    c2
r1   3
r2   4

    c1  c2  c3
r1   1   3   5

    c1  c3
r1   1   5
r2   2   6

    c1  c3
r1   1   5
r2   2   6



In [34]:
mlb_df1 = pd.DataFrame({'name': ['john doe', 'al smith', 'sam black', 'john doe'],
                        'pos': ['1B', 'C', 'P', '2B'],
                        'year': [2000, 2004, 2008, 2003]})
mlb_df2 = pd.DataFrame({'name': ['john doe', 'al smith', 'jack lee'],
                        'year': [2000, 2004, 2012],
                        'rbi': [80, 100, 12]})
                        
print('{}\n'.format(mlb_df1))
print('{}\n'.format(mlb_df1))

mlb_merged = pd.merge(mlb_df1, mlb_df2)
print('{}\n'.format(mlb_merged))

        name pos  year
0   john doe  1B  2000
1   al smith   C  2004
2  sam black   P  2008
3   john doe  2B  2003

        name pos  year
0   john doe  1B  2000
1   al smith   C  2004
2  sam black   P  2008
3   john doe  2B  2003

       name pos  year  rbi
0  john doe  1B  2000   80
1  al smith   C  2004  100



In [2]:
import pandas as pd
df1 = pd.DataFrame({'c1':[1,2], 'c2':[3,4]},
                   index=['r1','r2'])
df2 = pd.DataFrame({'c1':[5,6], 'c2':[7,8]},
                   index=['r1','r2'])
df3 = pd.DataFrame({'c1':[5,6], 'c2':[7,8]})

concat = pd.concat([df1, df2], axis=1)
# Newline to separate print statements
print('{}\n'.format(concat))

concat = pd.concat([df2, df1, df3])
print('{}\n'.format(concat))

concat = pd.concat([df1, df3], axis=1)
print('{}\n'.format(concat))

    c1  c2  c1  c2
r1   1   3   5   7
r2   2   4   6   8

    c1  c2
r1   5   7
r2   6   8
r1   1   3
r2   2   4
0    5   7
1    6   8

     c1   c2   c1   c2
r1  1.0  3.0  NaN  NaN
r2  2.0  4.0  NaN  NaN
0   NaN  NaN  5.0  7.0
1   NaN  NaN  6.0  8.0

