In [1]:
import pandas as pd
print(pd.__version__)

2.2.2


In [2]:
my_list = [10, 20, 30, 40]
series_from_list = pd.Series(my_list)
print(series_from_list)

0    10
1    20
2    30
3    40
dtype: int64


In [3]:
import numpy as np
my_array = np.array([10, 20, 30, 40])
series_from_array = pd.Series(my_array)
print(series_from_array)
my_dict = {'a': 10, 'b': 20, 'c': 30, 'd': 40}
series_from_dict = pd.Series(my_dict)
print(series_from_dict)

0    10
1    20
2    30
3    40
dtype: int64
a    10
b    20
c    30
d    40
dtype: int64


In [4]:
df = series_from_array.reset_index()
print(df)

   index   0
0      0  10
1      1  20
2      2  30
3      3  40


In [5]:
df = pd.concat([series_from_dict, series_from_list], axis=1)
print(df)

      0     1
a  10.0   NaN
b  20.0   NaN
c  30.0   NaN
d  40.0   NaN
0   NaN  10.0
1   NaN  20.0
2   NaN  30.0
3   NaN  40.0


In [6]:
ser = pd.Series(['a', 'b', 'c', 'd'])
ser.name = 'alphabets'
print(ser)

0    a
1    b
2    c
3    d
Name: alphabets, dtype: object


In [7]:
serB = pd.Series(['a','b'])
result = ser[~ser.isin(serB)]
print(result)

2    c
3    d
Name: alphabets, dtype: object


In [8]:
ser_common = pd.Series(np.union1d(ser, serB))
print(ser_common)

Object `B` not found.
0    a
1    b
2    c
3    d
dtype: object


In [10]:
ser = pd.Series([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
minimum = ser.min()
maximum = ser.max()
percentile_25 = ser.quantile(0.25)
percentile_75 = ser.quantile(0.75)
median = ser.quantile(0.5)
print(minimum, maximum, percentile_25, median, percentile_75)

1 10 3.25 5.5 7.75


In [11]:
ser = pd.Series(['a','b','b','b','c','c','e','f','g'])
top_2 = ser.value_counts().nlargest(2).index
result = ser.where(ser.isin(top_2), other='Other')
print(result)

0    Other
1        b
2        b
3        b
4        c
5        c
6    Other
7    Other
8    Other
dtype: object


In [12]:
ser = pd.Series(np.random.random(20))
bins = pd.cut(ser, bins=10)
print(bins)

0     (0.821, 0.909]
1     (0.909, 0.997]
2     (0.205, 0.293]
3     (0.205, 0.293]
4     (0.645, 0.733]
5     (0.909, 0.997]
6     (0.557, 0.645]
7     (0.205, 0.293]
8     (0.116, 0.205]
9     (0.645, 0.733]
10    (0.116, 0.205]
11    (0.116, 0.205]
12    (0.205, 0.293]
13    (0.469, 0.557]
14    (0.381, 0.469]
15    (0.469, 0.557]
16    (0.733, 0.821]
17    (0.733, 0.821]
18    (0.909, 0.997]
19    (0.821, 0.909]
dtype: category
Categories (10, interval[float64, right]): [(0.116, 0.205] < (0.205, 0.293] < (0.293, 0.381] < (0.381, 0.469] ... (0.645, 0.733] < (0.733, 0.821] < (0.821, 0.909] < (0.909, 0.997]]


In [13]:
ser = pd.Series(np.random.randint(1, 10, 7))
positions = np.where(ser % 3 == 0)[0]
print(positions)

[2 4 5]


In [14]:
ser1 = pd.Series([10, 9, 6, 5, 3, 1, 12, 8, 13])
ser2 = pd.Series([1, 3, 10, 13])
answer = ser1.index.get_indexer(ser2)
print(answer)

[ 1  3 -1 -1]


In [15]:
ser = pd.Series(['how', 'to', 'kick', 'ass?'])
ser_capitalized = ser.str.capitalize()
print(ser_capitalized)

0     How
1      To
2    Kick
3    Ass?
dtype: object


In [16]:
lengths = ser.str.len()
print(lengths)

0    3
1    2
2    4
3    4
dtype: int64


In [19]:
ser = pd.Series(['01 Jan 2010', '02-02-2011', '20120303', '2013/04/04', '2014-05-05', '2015-06-06T12:20'])
formats = [
    '%d %b %Y',      
    '%d-%m-%Y',      
    '%Y%m%d',         
    '%Y/%m/%d',
    '%Y-%m-%d',         
    '%Y-%m-%dT%H:%M'    
]
def convert_date(date_str):
    for fmt in formats:
        try:
            return pd.to_datetime(date_str, format=fmt)
        except ValueError:
            continue
    return pd.NaT

timeseries = ser.apply(convert_date)
print(timeseries)

0   2010-01-01 00:00:00
1   2011-02-02 00:00:00
2   2012-03-03 00:00:00
3   2013-04-04 00:00:00
4   2014-05-05 00:00:00
5   2015-06-06 12:20:00
dtype: datetime64[ns]


In [21]:
ser = pd.Series(['Apple', 'Orange', 'Plan', 'Python', 'Money'])
def contains_at_least_two_vowels(word):
    word = word.lower()
    vowels = 'aeiou'
    count = sum(1 for char in word if char in vowels)
    return count >= 2
filtered_ser = ser[ser.apply(contains_at_least_two_vowels)]
print(filtered_ser)

0     Apple
1    Orange
4     Money
dtype: object


In [22]:
emails = pd.Series(['buying books at amazom.com', 'rameses@egypt.com', 'matt@t.co', 'narendra@modi.com'])
pattern = r'[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}'
valid_emails = emails[emails.str.contains(pattern, regex=True)]
print(valid_emails)

1    rameses@egypt.com
2            matt@t.co
3    narendra@modi.com
dtype: object


In [None]:
p = pd.Series([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
q = pd.Series([10, 9, 8, 7, 6, 5, 4, 3, 2, 1])
distance = np.sqrt(((p - q) ** 2).sum())
print(distance)