In [3]:
import pandas as pd
s = pd.Series([1,2,3,4])
print(s)

0    1
1    2
2    3
3    4
dtype: int64


In [5]:
s=pd.Series(['x','y','z','abc'])
print(s)

0      x
1      y
2      z
3    abc
dtype: object


In [6]:
s=pd.Series(['vaishnav','yadav'])
print(s)

0    vaishnav
1       yadav
dtype: object


In [7]:
d = {"b": 1, "a": 0, "c": 2}

s = pd.Series(d)

print(s)

b    1
a    0
c    2
dtype: int64


In [11]:
import numpy as np
data = np.array([10, 20, 30, 40, 50])

s = pd.Series(data)

print(s)

0    10
1    20
2    30
3    40
4    50
dtype: int32


## Creating Series From Scaler

In [12]:
pd.Series(5.0,index=['a','b','c','d'])

a    5.0
b    5.0
c    5.0
d    5.0
dtype: float64

## Accessing Properties/Attributes and Methods of Series

In [13]:
import pandas as pd
import numpy as np

data = np.array([10, 20, 30, 40, 50, 60, 70, 80])

s = pd.Series(data)

In [14]:
print("Data Type:", s.dtype)
print("Shape:", s.shape)
print("Values:", s.values)
print("Array:", s.array)

Data Type: int32
Shape: (8,)
Values: [10 20 30 40 50 60 70 80]
Array: <PandasArray>
[10, 20, 30, 40, 50, 60, 70, 80]
Length: 8, dtype: int32


In [15]:
print("Method to extract actual numpy ndarray:", s.to_numpy())

Method to extract actual numpy ndarray: [10 20 30 40 50 60 70 80]


In [16]:
s.head()

0    10
1    20
2    30
3    40
4    50
dtype: int32

In [17]:
s.tail()

3    40
4    50
5    60
6    70
7    80
dtype: int32

In [18]:
s.info()

<class 'pandas.core.series.Series'>
RangeIndex: 8 entries, 0 to 7
Series name: None
Non-Null Count  Dtype
--------------  -----
8 non-null      int32
dtypes: int32(1)
memory usage: 160.0 bytes


## Accessing data using Indexing and Slicing

In [19]:
s = pd.Series([1, 2, 3, 4, 5])

print(s[2])

3


In [20]:
print(s[1:])

1    2
2    3
3    4
4    5
dtype: int64


In [21]:
print(s[1:4])

1    2
2    3
3    4
dtype: int64


In [22]:
print(s[[1, 4]])

1    2
4    5
dtype: int64


In [23]:
s = pd.Series([1, 2, 3, 4, 5], index=['a', 'b', 'c', 'd', 'e'])

print(s)

a    1
b    2
c    3
d    4
e    5
dtype: int64


In [24]:
print(s['a'])

1


In [25]:
print(s['a':])

a    1
b    2
c    3
d    4
e    5
dtype: int64


In [26]:
print(s[['a', 'b', 'e']])

a    1
b    2
e    5
dtype: int64


In [27]:
print(s.get("f"))

None


## Creating DataFrame using Python dict, list or tuple

In [30]:
data={
    'Name':['vaishnav','Ashish','Anant'],
    'Age':[23,np.nan,24],
    'Gender':['Male',"Male","Male"]
}
df=pd.DataFrame(data)
df

Unnamed: 0,Name,Age,Gender
0,vaishnav,23.0,Male
1,Ashish,,Male
2,Anant,24.0,Male


In [31]:
data = [('1/1/2019', 13, 6, 'Rain'),
       ('2/1/2019', 11, 7, 'Fog'),
       ('3/1/2019', 12, 8, 'Sunny'),
       ('4/1/2019', 8, 5, 'Snow'),
       ('5/1/2019', 9, 6, 'Rain')]

df = pd.DataFrame(data)

df

Unnamed: 0,0,1,2,3
0,1/1/2019,13,6,Rain
1,2/1/2019,11,7,Fog
2,3/1/2019,12,8,Sunny
3,4/1/2019,8,5,Snow
4,5/1/2019,9,6,Rain


In [32]:
data = (('1/1/2019', 13, 6, 'Rain'),
       ('2/1/2019', 11, 7, 'Fog'),
       ('3/1/2019', 12, 8, 'Sunny'),
       ('4/1/2019', 8, 5, 'Snow'),
       ('5/1/2019', 9, 6, 'Rain'))

df = pd.DataFrame(data, columns=['Day', 'Temperature', 'WindSpeed', 'Event'])

df

Unnamed: 0,Day,Temperature,WindSpeed,Event
0,1/1/2019,13,6,Rain
1,2/1/2019,11,7,Fog
2,3/1/2019,12,8,Sunny
3,4/1/2019,8,5,Snow
4,5/1/2019,9,6,Rain


In [33]:
data = (['1/1/2019', 13, 6, 'Rain'],
       ['2/1/2019', 11, 7, 'Fog'],
       ['3/1/2019', 12, 8, 'Sunny'],
       ['4/1/2019', 8, 5, 'Snow'],
       ['5/1/2019', 9, 6, 'Rain'])

df = pd.DataFrame(data, 
                  index=['I1', 'I2', 'I3', 'I4', 'I5'], 
                  columns=['Day', 'Temperature', 'WindSpeed', 'Event'])

df

Unnamed: 0,Day,Temperature,WindSpeed,Event
I1,1/1/2019,13,6,Rain
I2,2/1/2019,11,7,Fog
I3,3/1/2019,12,8,Sunny
I4,4/1/2019,8,5,Snow
I5,5/1/2019,9,6,Rain


## Creating DataFrame using Numpy Array

In [34]:
arr = np.random.randint(100, 1999, size=(1000, 100))

print(arr.shape)

(1000, 100)


In [35]:
df = pd.DataFrame(arr)

df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,90,91,92,93,94,95,96,97,98,99
0,1408,304,1688,1244,559,608,901,1922,970,1894,...,1907,798,1267,571,1765,1856,948,1858,1119,805
1,1777,853,1298,1463,1256,1703,418,1281,505,1950,...,279,1326,988,1536,1839,610,1472,665,148,1584
2,1600,1325,248,1610,1339,224,1552,1367,287,429,...,1591,1687,1072,937,229,1805,523,867,1323,1778
3,152,1769,1505,334,347,1950,584,1210,1944,739,...,973,262,704,918,853,1818,533,1043,257,1021
4,1178,207,1258,1585,1601,126,530,442,1002,207,...,1571,526,1742,1542,1978,967,674,1274,1813,1666
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,1285,906,1779,1888,362,408,1177,1988,1132,1017,...,159,730,1092,1866,1047,1872,1297,790,1210,1746
996,551,1861,1740,1214,1488,1346,383,1647,1931,464,...,1515,1736,1609,617,1347,1776,1632,1870,661,1562
997,1696,434,1699,1344,789,1192,1448,419,561,1300,...,990,1357,1551,883,1418,1714,949,1165,325,1097
998,1223,332,1139,522,696,836,781,1444,1604,839,...,924,529,1422,1186,755,1523,1345,1078,643,1605


In [36]:
df = pd.DataFrame(arr, columns=["col_"+str(i) for i in range(1, 101) ])

df

Unnamed: 0,col_1,col_2,col_3,col_4,col_5,col_6,col_7,col_8,col_9,col_10,...,col_91,col_92,col_93,col_94,col_95,col_96,col_97,col_98,col_99,col_100
0,1408,304,1688,1244,559,608,901,1922,970,1894,...,1907,798,1267,571,1765,1856,948,1858,1119,805
1,1777,853,1298,1463,1256,1703,418,1281,505,1950,...,279,1326,988,1536,1839,610,1472,665,148,1584
2,1600,1325,248,1610,1339,224,1552,1367,287,429,...,1591,1687,1072,937,229,1805,523,867,1323,1778
3,152,1769,1505,334,347,1950,584,1210,1944,739,...,973,262,704,918,853,1818,533,1043,257,1021
4,1178,207,1258,1585,1601,126,530,442,1002,207,...,1571,526,1742,1542,1978,967,674,1274,1813,1666
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,1285,906,1779,1888,362,408,1177,1988,1132,1017,...,159,730,1092,1866,1047,1872,1297,790,1210,1746
996,551,1861,1740,1214,1488,1346,383,1647,1931,464,...,1515,1736,1609,617,1347,1776,1632,1870,661,1562
997,1696,434,1699,1344,789,1192,1448,419,561,1300,...,990,1357,1551,883,1418,1714,949,1165,325,1097
998,1223,332,1139,522,696,836,781,1444,1604,839,...,924,529,1422,1186,755,1523,1345,1078,643,1605


## Accessing Attributes/Properties and Methods of DataFrame

In [37]:
data = {'Name':pd.Series(['Tom', 'Jack', 'Steve', 'Ricky', 'Vin', 'James', 'Vin']),
       'Age':pd.Series([25,26,25,35,23,33,31]),
       'Rating':pd.Series([4.23,4.1,3.4,5,2.9,np.nan,3.1])}

df = pd.DataFrame(data)

df

Unnamed: 0,Name,Age,Rating
0,Tom,25,4.23
1,Jack,26,4.1
2,Steve,25,3.4
3,Ricky,35,5.0
4,Vin,23,2.9
5,James,33,
6,Vin,31,3.1


In [38]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7 entries, 0 to 6
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Name    7 non-null      object 
 1   Age     7 non-null      int64  
 2   Rating  6 non-null      float64
dtypes: float64(1), int64(1), object(1)
memory usage: 296.0+ bytes


In [39]:
df.head()

Unnamed: 0,Name,Age,Rating
0,Tom,25,4.23
1,Jack,26,4.1
2,Steve,25,3.4
3,Ricky,35,5.0
4,Vin,23,2.9


In [40]:
df.head(2)

Unnamed: 0,Name,Age,Rating
0,Tom,25,4.23
1,Jack,26,4.1


In [41]:
df.tail()

Unnamed: 0,Name,Age,Rating
2,Steve,25,3.4
3,Ricky,35,5.0
4,Vin,23,2.9
5,James,33,
6,Vin,31,3.1


In [42]:
df.tail(2)

Unnamed: 0,Name,Age,Rating
5,James,33,
6,Vin,31,3.1


## Dataframe to .csv & .xlsx

In [44]:
# Write Dataframe to CSV
df.to_csv('C:/Users/vaishnav yadav/Downloads/20211030 Test Assignment-20240817T195938Z-001/new_csv_file.csv')

## Reading .csv File - Iris Dataset

In [45]:
import pandas as pd

df = pd.read_csv('C:/Users/vaishnav yadav/Downloads/Iris.csv')

df.head()

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa
