In [4]:
import numpy as np
import pandas as pd

frame = pd.DataFrame(np.arange(9).reshape((3, 3)), index=['a', 'c', 'd'], columns=['Ohio', 'Texas', 'California'])

frame


Unnamed: 0,Ohio,Texas,California
a,0,1,2
c,3,4,5
d,6,7,8


In [5]:
frame2 = frame.reindex(['a', 'b', 'c', 'd'])

frame2

Unnamed: 0,Ohio,Texas,California
a,0.0,1.0,2.0
b,,,
c,3.0,4.0,5.0
d,6.0,7.0,8.0


In [8]:
states = ['Texas', 'Utah', 'California']

frame = frame.reindex(columns=states)

frame             

Unnamed: 0,Texas,Utah,California
a,1,,2
c,4,,5
d,7,,8


In [17]:
frame.loc[['a', 'b', 'c', 'd'], states]

Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike
  """Entry point for launching an IPython kernel.


Unnamed: 0,Texas,Utah,California
a,1.0,,2.0
b,,,
c,4.0,,5.0
d,7.0,,8.0


In [18]:
frame.reindex(['a', 'b', 'c', 'd'])

Unnamed: 0,Texas,Utah,California
a,1.0,,2.0
b,,,
c,4.0,,5.0
d,7.0,,8.0


In [19]:
frame.drop('Utah', axis=1)


Unnamed: 0,Texas,California
a,1,2
c,4,5
d,7,8


In [23]:
frame.drop('Utah', axis=1).iat[2,1]

8

In [24]:
format = lambda x: '%.2f' % x

frame.applymap(format)

Unnamed: 0,Texas,Utah,California
a,1.0,,2.0
c,4.0,,5.0
d,7.0,,8.0


In [25]:
frame.index.is_unique


True

In [28]:
import numpy.random as npr


array([3, 2, 3, 0, 5, 3, 3, 6, 2, 0, 4, 6, 0, 3, 5, 0, 1, 0, 4, 6, 6])

In [47]:
npr.seed(69)
cub_frame = pd.DataFrame(npr.randint(1, 7, 3*7).reshape((7, 3)), columns=['1 серия', '2 серия', '3 серия'], index=['1', '2', '3', '4', '5', '6', '7'])
cub_frame

Unnamed: 0,1 серия,2 серия,3 серия
1,4,2,4
2,3,5,2
3,2,1,2
4,1,1,5
5,6,3,5
6,4,6,4
7,1,1,3


In [50]:
res = cub_frame.apply(pd.value_counts)

res

Unnamed: 0,1 серия,2 серия,3 серия
1,2.0,3.0,
2,1.0,1.0,2.0
3,1.0,1.0,1.0
4,2.0,,2.0
5,,1.0,2.0
6,1.0,1.0,


In [52]:
res.sum(axis=1) 

1    5.0
2    4.0
3    3.0
4    4.0
5    3.0
6    2.0
dtype: float64

In [53]:
npr.seed(69)
cub_frame = pd.DataFrame(npr.randint(1, 7, 3*7).reshape((3, 7)), index=['1 серия', '2 серия', '3 серия'], columns=['1', '2', '3', '4', '5', '6', '7'])
cub_frame

Unnamed: 0,1,2,3,4,5,6,7
1 серия,4,2,4,3,5,2,2
2 серия,1,2,1,1,5,6,3
3 серия,5,4,6,4,1,1,3


In [55]:
res = cub_frame.apply(pd.value_counts, axis=1)

res

Unnamed: 0,1,2,3,4,5,6
1 серия,,3.0,1.0,2.0,1.0,
2 серия,3.0,1.0,1.0,,1.0,1.0
3 серия,2.0,,1.0,2.0,1.0,1.0


In [56]:
res.sum(axis=0) 

1    5.0
2    4.0
3    3.0
4    4.0
5    3.0
6    2.0
dtype: float64

In [69]:
pd.Series(cub_frame.values.flatten()).value_counts()


1    5
4    4
2    4
5    3
3    3
6    2
dtype: int64

In [1]:
from datetime import datetime

In [8]:
df = pd.DataFrame(pd.date_range(start='1/1/2018', end='1/08/2018', freq='H'), columns=['date'])
df['data'] = np.random.randint(0,100,size=(len(date_rng)))
df = df.set_index('date')
df.head(10)

Unnamed: 0_level_0,data
date,Unnamed: 1_level_1
2018-01-01 00:00:00,8
2018-01-01 01:00:00,6
2018-01-01 02:00:00,61
2018-01-01 03:00:00,46
2018-01-01 04:00:00,4
2018-01-01 05:00:00,87
2018-01-01 06:00:00,0
2018-01-01 07:00:00,7
2018-01-01 08:00:00,57
2018-01-01 09:00:00,48


In [9]:
string_date_rng_2 = ['June-01-2018', 'June-02-2018', 'June-03-2018']
timestamp_date_rng_2 = [datetime.strptime(x,'%B-%d-%Y') for x in string_date_rng_2]
timestamp_date_rng_2

[datetime.datetime(2018, 6, 1, 0, 0),
 datetime.datetime(2018, 6, 2, 0, 0),
 datetime.datetime(2018, 6, 3, 0, 0)]

In [12]:
df.resample('D').sum()

Unnamed: 0_level_0,data
date,Unnamed: 1_level_1
2018-01-01,953
2018-01-02,1057
2018-01-03,1151
2018-01-04,887
2018-01-05,1198
2018-01-06,1243
2018-01-07,1117
2018-01-08,98


In [16]:
df['rolling_sum'] = df.rolling(3).sum()
df['rolling_center'] = df.data.rolling(4,center=True).sum()
df['rolling_sum_backfilled'] = df['rolling_sum'].fillna(method='backfill')
df.head(10)

Unnamed: 0_level_0,data,rolling_sum,rolling_center,rolling_sum_backfilled
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2018-01-01 00:00:00,8,,,75.0
2018-01-01 01:00:00,6,,,75.0
2018-01-01 02:00:00,61,75.0,121.0,75.0
2018-01-01 03:00:00,46,113.0,117.0,113.0
2018-01-01 04:00:00,4,111.0,198.0,111.0
2018-01-01 05:00:00,87,137.0,137.0,137.0
2018-01-01 06:00:00,0,91.0,98.0,91.0
2018-01-01 07:00:00,7,94.0,151.0,94.0
2018-01-01 08:00:00,57,64.0,112.0,64.0
2018-01-01 09:00:00,48,112.0,159.0,112.0


In [19]:
df.shift(1).head()

Unnamed: 0_level_0,data,rolling_sum,rolling_center,rolling_sum_backfilled
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2018-01-01 00:00:00,,,,
2018-01-01 01:00:00,8.0,,,75.0
2018-01-01 02:00:00,6.0,,,75.0
2018-01-01 03:00:00,61.0,75.0,121.0,75.0
2018-01-01 04:00:00,46.0,113.0,117.0,113.0
