In [1]:
import numpy as np
import pandas as pd

In [3]:
names = pd.Series(['andrew' , 'bobo' , 'claire' , 'david' , '4'])
names

0    andrew
1      bobo
2    claire
3     david
4         4
dtype: object

In [5]:
names.str.capitalize()

0    Andrew
1      Bobo
2    Claire
3     David
4         4
dtype: object

In [6]:
names.str.isdigit()

0    False
1    False
2    False
3    False
4     True
dtype: bool

In [7]:
tech_finance=['GOOG,APPL,AMZN' , 'JPM,BAC,GS']

In [8]:
tickers=pd.Series(tech_finance)
tickers

0    GOOG,APPL,AMZN
1        JPM,BAC,GS
dtype: object

In [9]:
tickers.str.split(',')

0    [GOOG, APPL, AMZN]
1        [JPM, BAC, GS]
dtype: object

In [10]:
tickers.str.split(',').str[0]

0    GOOG
1     JPM
dtype: object

In [11]:
tickers.str.split(',' , expand=True)

Unnamed: 0,0,1,2
0,GOOG,APPL,AMZN
1,JPM,BAC,GS


#  clean up & editing strings

In [16]:
messy_names= pd.Series(['andrew  ' , 'bo;bo' , '  claire  '])
messy_names

0      andrew  
1         bo;bo
2      claire  
dtype: object

In [17]:
messy_names.str.replace(';' , '').str.strip().str.capitalize()

0    Andrew
1      Bobo
2    Claire
dtype: object

In [18]:
#another way and better
def clean_up(name):
    name=name.replace(';' , '')
    name=name.strip()
    name=name.capitalize()
    return name


messy_names.apply(clean_up)

0    Andrew
1      Bobo
2    Claire
dtype: object

In [20]:
messy_names

0      andrew  
1         bo;bo
2      claire  
dtype: object

In [21]:
# best & faster way 
np.vectorize(clean_up)(messy_names)

array(['Andrew', 'Bobo', 'Claire'], dtype='<U6')

# Times

In [33]:
from datetime import datetime
my_year=2024
my_month=10
my_day= 12
my_hour=10
my_minutes=23
my_seconds=30

In [34]:
my_date=datetime(my_year , my_month , my_day , my_hour , my_minutes , my_seconds)
my_date

datetime.datetime(2024, 10, 12, 10, 23, 30)

In [35]:
type(my_date)

datetime.datetime

# times in pandas

In [29]:
my_time=pd.Series(['nov 3 , 2000' , '2000-01-01' , None])
my_time

0    nov 3 , 2000
1      2000-01-01
2            None
dtype: object

In [31]:
type(my_time[0])

str

In [32]:
pd.to_datetime(my_time)

  pd.to_datetime(my_time)


0   2000-11-03
1   2000-01-01
2          NaT
dtype: datetime64[ns]

In [36]:
pd.to_datetime(my_time)[0]

  pd.to_datetime(my_time)[0]


Timestamp('2000-11-03 00:00:00')

In [37]:
euro_date='10-12-2020'
pd.to_datetime(euro_date)

Timestamp('2020-10-12 00:00:00')

In [38]:
pd.to_datetime(euro_date , dayfirst=True)

Timestamp('2020-12-10 00:00:00')

In [41]:
style_date= '12--Dec--2000'
pd.to_datetime(style_date , format= '%d--%b--%Y')

Timestamp('2000-12-12 00:00:00')

In [42]:
strange_date='12th of Dec 2000'
pd.to_datetime(strange_date)

Timestamp('2000-12-12 00:00:00')

In [65]:
sales = pd.read_csv('RetailSales_BeerWineLiquor.csv')
sales

Unnamed: 0,DATE,MRTSSM4453USN
0,1992-01-01,1509
1,1992-02-01,1541
2,1992-03-01,1597
3,1992-04-01,1675
4,1992-05-01,1822
...,...,...
335,2019-12-01,6630
336,2020-01-01,4388
337,2020-02-01,4533
338,2020-03-01,5562


In [44]:
sales.iloc[0]['DATE']

'1992-01-01'

In [45]:
type(sales.iloc[0]['DATE'])

str

In [46]:
sales['DATE']=pd.to_datetime(sales['DATE'])

In [47]:
sales.iloc[0]['DATE']

Timestamp('1992-01-01 00:00:00')

In [48]:
type(sales.iloc[0]['DATE'])

pandas._libs.tslibs.timestamps.Timestamp

In [2]:
# better way
sales= pd.read_csv('RetailSales_BeerWineLiquor.csv' , parse_dates=[0])
sales

Unnamed: 0,DATE,MRTSSM4453USN
0,1992-01-01,1509
1,1992-02-01,1541
2,1992-03-01,1597
3,1992-04-01,1675
4,1992-05-01,1822
...,...,...
335,2019-12-01,6630
336,2020-01-01,4388
337,2020-02-01,4533
338,2020-03-01,5562


In [3]:
sales=sales.set_index('DATE')

In [68]:
#ساخت سری زمانی
sales

Unnamed: 0_level_0,MRTSSM4453USN
DATE,Unnamed: 1_level_1
1992-01-01,1509
1992-02-01,1541
1992-03-01,1597
1992-04-01,1675
1992-05-01,1822
...,...
2019-12-01,6630
2020-01-01,4388
2020-02-01,4533
2020-03-01,5562


# resampling method for times look like groupby

In [58]:
sales.resample(rule='A').mean()#'A' --> year

Unnamed: 0_level_0,MRTSSM4453USN
DATE,Unnamed: 1_level_1
1992-12-31,1807.25
1993-12-31,1794.833333
1994-12-31,1841.75
1995-12-31,1833.916667
1996-12-31,1929.75
1997-12-31,2006.75
1998-12-31,2115.166667
1999-12-31,2206.333333
2000-12-31,2375.583333
2001-12-31,2468.416667


In [4]:
sales=sales.reset_index()
sales

Unnamed: 0,DATE,MRTSSM4453USN
0,1992-01-01,1509
1,1992-02-01,1541
2,1992-03-01,1597
3,1992-04-01,1675
4,1992-05-01,1822
...,...,...
335,2019-12-01,6630
336,2020-01-01,4388
337,2020-02-01,4533
338,2020-03-01,5562


In [11]:
sales['DATE'].dt.month

0       1
1       2
2       3
3       4
4       5
       ..
335    12
336     1
337     2
338     3
339     4
Name: DATE, Length: 340, dtype: int32

In [76]:
sales['DATE'].dt.is_leap_year

0       True
1       True
2       True
3       True
4       True
       ...  
335    False
336     True
337     True
338     True
339     True
Name: DATE, Length: 340, dtype: bool

In [77]:
sales= sales['DATE'].dt.quarter

In [78]:
sales

0      1
1      1
2      1
3      2
4      2
      ..
335    4
336    1
337    1
338    1
339    2
Name: DATE, Length: 340, dtype: int32