In [5]:
import numpy as np
import pandas as pd

In [3]:
# Series
# Series is a one-dimensional labeled array capable of holding any data type (integers, strings, 
# floating point numbers, Python objects, etc.). The axis labels are collectively referred to as the index. 
# The basic method to create a Series is to call: s = pd.Series(data, index=index)

# data is from ndarray
s = pd.Series(np.random.randn(5), index=['a', 'b', 'c', 'd', 'e'])
print(s)
print(s.index)
print(pd.Series(np.random.randn(5)))

a    0.597540
b   -0.561347
c    1.701950
d    0.781344
e   -1.889387
dtype: float64
Index(['a', 'b', 'c', 'd', 'e'], dtype='object')
0   -1.094458
1   -0.198573
2    0.200964
3   -0.226398
4    0.816361
dtype: float64


In [13]:
s > s.mean()

a     True
b    False
c     True
d     True
e    False
dtype: bool

In [17]:
s.values

array([ 0.59753988, -0.56134694,  1.70194981,  0.7813441 , -1.88938724])

In [18]:
s+s

a    1.195080
b   -1.122694
c    3.403900
d    1.562688
e   -3.778774
dtype: float64

In [19]:
s[1:]

b   -0.561347
c    1.701950
d    0.781344
e   -1.889387
dtype: float64

In [20]:
s[:-1]

a    0.597540
b   -0.561347
c    1.701950
d    0.781344
dtype: float64

In [21]:
s[1:] + s[:-1]

a         NaN
b   -1.122694
c    3.403900
d    1.562688
e         NaN
dtype: float64

In [22]:
np.zeros((2,), dtype=[('A', 'i4'),('B', 'f4'),('C', 'a10')])


array([(0, 0.0, b''), (0, 0.0, b'')], 
      dtype=[('A', '<i4'), ('B', '<f4'), ('C', 'S10')])

In [6]:
df1 = pd.DataFrame({'a' : [1, 0, 1], 'b' : [0, 1, 1] }, dtype=bool)
df2 = pd.DataFrame({'a' : [0, 1, 1], 'b' : [1, 1, 0] }, dtype=bool)

print(df1)
print(df2)

       a      b
0   True  False
1  False   True
2   True   True
       a      b
0  False   True
1   True   True
2   True  False


In [7]:
index = pd.date_range('1/1/2000', periods=8)
df = pd.DataFrame(np.random.randn(8, 3), index=index, columns=list('ABC'))
df

Unnamed: 0,A,B,C
2000-01-01,1.009941,0.33629,0.978808
2000-01-02,1.136571,1.207545,1.319883
2000-01-03,-0.610897,2.341748,-0.410861
2000-01-04,0.011952,-0.347439,-0.807412
2000-01-05,-0.321612,0.56844,-0.507317
2000-01-06,-0.360231,0.33514,0.547875
2000-01-07,0.314263,-1.262228,-0.17267
2000-01-08,-0.282364,0.370363,-0.795625


In [11]:
help(np.random.randn)

Help on built-in function randn:

randn(...) method of mtrand.RandomState instance
    randn(d0, d1, ..., dn)
    
    Return a sample (or samples) from the "standard normal" distribution.
    
    If positive, int_like or int-convertible arguments are provided,
    `randn` generates an array of shape ``(d0, d1, ..., dn)``, filled
    with random floats sampled from a univariate "normal" (Gaussian)
    distribution of mean 0 and variance 1 (if any of the :math:`d_i` are
    floats, they are first converted to integers by truncation). A single
    float randomly sampled from the distribution is returned if no
    argument is provided.
    
    This is a convenience function.  If you want an interface that takes a
    tuple as the first argument, use `numpy.random.standard_normal` instead.
    
    Parameters
    ----------
    d0, d1, ..., dn : int, optional
        The dimensions of the returned array, should be all positive.
        If no argument is given a single Python float is ret

In [12]:
# Transpose
df.T

Unnamed: 0,2000-01-01 00:00:00,2000-01-02 00:00:00,2000-01-03 00:00:00,2000-01-04 00:00:00,2000-01-05 00:00:00,2000-01-06 00:00:00,2000-01-07 00:00:00,2000-01-08 00:00:00
A,1.009941,1.136571,-0.610897,0.011952,-0.321612,-0.360231,0.314263,-0.282364
B,0.33629,1.207545,2.341748,-0.347439,0.56844,0.33514,-1.262228,0.370363
C,0.978808,1.319883,-0.410861,-0.807412,-0.507317,0.547875,-0.17267,-0.795625


In [13]:
# The dot method on DataFrame implements matrix multiplication
df.T.dot(df)

Unnamed: 0,A,B,C
A,3.096804,-0.527417,2.866212
B,-0.527417,9.341583,1.05989
C,2.866212,1.05989,4.741247


In [14]:
# Elementwise NumPy ufuncs (log, exp, sqrt, ...) and various other NumPy functions can 
# be used with no issues on DataFrame, assuming the data within are numeric
print(np.exp(df))

print(np.asarray(df))

                   A          B         C
2000-01-01  2.745439   1.399745  2.661283
2000-01-02  3.116066   3.345262  3.742982
2000-01-03  0.542864  10.399396  0.663079
2000-01-04  1.012024   0.706495  0.446011
2000-01-05  0.724979   1.765510  0.602109
2000-01-06  0.697515   1.398136  1.729574
2000-01-07  1.369249   0.283023  0.841415
2000-01-08  0.753999   1.448261  0.451299
[[ 1.00994101  0.33629015  0.97880817]
 [ 1.13657136  1.20754494  1.31988269]
 [-0.61089652  2.34174772 -0.41086093]
 [ 0.01195218 -0.34743904 -0.80741151]
 [-0.32161219  0.56843958 -0.50731714]
 [-0.36023074  0.33513973  0.54787493]
 [ 0.31426273 -1.26222808 -0.17267021]
 [-0.28236394  0.3703632  -0.79562451]]
