# Panda DataFrame :-


# Creating a dataframe from dictionaries:-

In [2]:
import pandas as pd
data = {
    'Name': ['Vansh', 'Dinesh', 'Simran', 'Davinder'],
    'Age': [21, 23, 22, 30],
    'City': ['Jalandhar', 'Amritsar', 'Bathida', 'Batala']
}
df = pd.DataFrame(data)
print(df)


       Name  Age       City
0     Vansh   21  Jalandhar
1    Dinesh   23   Amritsar
2    Simran   22    Bathida
3  Davinder   30     Batala


# Creating a DataFrame from a CSV File:-

In [3]:
import pandas as pd
file_path = r"C:\Users\DELL\Downloads\titanic_train.csv"
selected_columns = ["PassengerId", "Survived", "Name"]
df_from_csv = pd.read_csv(file_path, usecols=selected_columns)
print(df_from_csv.head())


   PassengerId  Survived                                               Name
0            1         0                            Braund, Mr. Owen Harris
1            2         1  Cumings, Mrs. John Bradley (Florence Briggs Th...
2            3         1                             Heikkinen, Miss. Laina
3            4         1       Futrelle, Mrs. Jacques Heath (Lily May Peel)
4            5         0                           Allen, Mr. William Henry


# Data Inspection Functions:-

1. df.info() :- Provides a summary of the DataFrame, including the number of non-null entries and data types for each column.

In [7]:
df_from_csv.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 3 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   PassengerId  891 non-null    int64 
 1   Survived     891 non-null    int64 
 2   Name         891 non-null    object
dtypes: int64(2), object(1)
memory usage: 21.0+ KB


2. df.head(n) :- Displays the first n rows of the DataFrame (by default, it shows the first 5 rows if n is not specified).

In [8]:
df_from_csv.head(10)

Unnamed: 0,PassengerId,Survived,Name
0,1,0,"Braund, Mr. Owen Harris"
1,2,1,"Cumings, Mrs. John Bradley (Florence Briggs Th..."
2,3,1,"Heikkinen, Miss. Laina"
3,4,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)"
4,5,0,"Allen, Mr. William Henry"
5,6,0,"Moran, Mr. James"
6,7,0,"McCarthy, Mr. Timothy J"
7,8,0,"Palsson, Master. Gosta Leonard"
8,9,1,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)"
9,10,1,"Nasser, Mrs. Nicholas (Adele Achem)"


3. df.describe:- Generates descriptive statistics for numerical columns, including count, mean, standard deviation, min, max, and percentiles.

In [9]:
df_from_csv.describe()


Unnamed: 0,PassengerId,Survived
count,891.0,891.0
mean,446.0,0.383838
std,257.353842,0.486592
min,1.0,0.0
25%,223.5,0.0
50%,446.0,0.0
75%,668.5,1.0
max,891.0,1.0


4. df.isnull().sum() :- Returns the count of missing values in each column.

In [10]:
df_from_csv.isnull().sum()


PassengerId    0
Survived       0
Name           0
dtype: int64

5. df.columns :- Lists the columns names in the DataFrame.

In [11]:
df_from_csv.columns

Index(['PassengerId', 'Survived', 'Name'], dtype='object')

6. df.dtypes:- Shows the data types of each column.

In [12]:
df_from_csv.dtypes

PassengerId     int64
Survived        int64
Name           object
dtype: object

7. df["column"].value_counts()

In [13]:

survived_counts = df_from_csv["Survived"].value_counts()
print("Survived Counts:")
print(survived_counts)


Survived Counts:
Survived
0    549
1    342
Name: count, dtype: int64


8. df["column"].unique() 

In [14]:
unique_name = df_from_csv["Name"].unique()
print("Unique Name")
print(unique_name)

Unique Name
['Braund, Mr. Owen Harris'
 'Cumings, Mrs. John Bradley (Florence Briggs Thayer)'
 'Heikkinen, Miss. Laina' 'Futrelle, Mrs. Jacques Heath (Lily May Peel)'
 'Allen, Mr. William Henry' 'Moran, Mr. James' 'McCarthy, Mr. Timothy J'
 'Palsson, Master. Gosta Leonard'
 'Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)'
 'Nasser, Mrs. Nicholas (Adele Achem)' 'Sandstrom, Miss. Marguerite Rut'
 'Bonnell, Miss. Elizabeth' 'Saundercock, Mr. William Henry'
 'Andersson, Mr. Anders Johan' 'Vestrom, Miss. Hulda Amanda Adolfina'
 'Hewlett, Mrs. (Mary D Kingcome) ' 'Rice, Master. Eugene'
 'Williams, Mr. Charles Eugene'
 'Vander Planke, Mrs. Julius (Emelia Maria Vandemoortele)'
 'Masselmani, Mrs. Fatima' 'Fynney, Mr. Joseph J' 'Beesley, Mr. Lawrence'
 'McGowan, Miss. Anna "Annie"' 'Sloper, Mr. William Thompson'
 'Palsson, Miss. Torborg Danira'
 'Asplund, Mrs. Carl Oscar (Selma Augusta Emilia Johansson)'
 'Emir, Mr. Farred Chehab' 'Fortune, Mr. Charles Alexander'
 'O\'Dwyer, Miss. Ellen "Nell

9. df.corr() :- Correlation refers to a statistical measure that describes the degree to which two or more variables move in relation to each other.

In [16]:
numeric_columns = df_from_csv.select_dtypes(include=['number'])
correlation_matrix = numeric_columns.corr()
print("\nCorrelation Matrix:")
print(correlation_matrix)


Correlation Matrix:
             PassengerId  Survived
PassengerId     1.000000 -0.005007
Survived       -0.005007  1.000000


# INDEXING AND SLICING :-

1. LOC:-"Loc is used to filter rows and select columns by lable[Rows-"Row's Index", column-"Column's Name"]"

In [18]:
df.loc[0,"Name"]

'Vansh'

In [21]:
df.loc[0:1,:]

Unnamed: 0,Name,Age,City
0,Vansh,21,Jalandhar
1,Dinesh,23,Amritsar


In [22]:
df.loc[df.Age==21,"Name"]

0    Vansh
Name: Name, dtype: object

2. ILOC:-"iloc is used to filter rows and columns based on integer position"

In [26]:
df.iloc[0:4,[0,1,2]]

Unnamed: 0,Name,Age,City
0,Vansh,21,Jalandhar
1,Dinesh,23,Amritsar
2,Simran,22,Bathida
3,Davinder,30,Batala


In [27]:
df.columns

Index(['Name', 'Age', 'City'], dtype='object')

pivot(): Rotates data from rows into columns based on a key column.

In [7]:
pivot_df = df.pivot(index="Name", columns='City', values='Age')
print("\nPivoted DataFrame:")
print(pivot_df)



Pivoted DataFrame:
City      Amritsar  Batala  Bathida  Jalandhar
Name                                          
Davinder       NaN    30.0      NaN        NaN
Dinesh        23.0     NaN      NaN        NaN
Simran         NaN     NaN     22.0        NaN
Vansh          NaN     NaN      NaN       21.0


melt(): Converts data from wide format to long format.

In [8]:
melted_df = df.melt(id_vars=['Name'], value_vars=['Age', 'City'], var_name='variable', value_name='value')
print("\nMelted DataFrame:")
print(melted_df)



Melted DataFrame:
       Name variable      value
0     Vansh      Age         21
1    Dinesh      Age         23
2    Simran      Age         22
3  Davinder      Age         30
4     Vansh     City  Jalandhar
5    Dinesh     City   Amritsar
6    Simran     City    Bathida
7  Davinder     City     Batala


stack() and unstack(): Stacking collapses DataFrame columns into a single column and unstack reverses this operation.

In [15]:
stacked_df = df.set_index(['Name', 'City']).stack().reset_index(name='value')
print("\nStacked DataFrame:")
print(stacked_df)

unstacked_df = stacked_df.set_index(['Name', 'level_2']).unstack('level_2')['value']
print("\nUnstacked DataFrame:")
print(unstacked_df)



Stacked DataFrame:
       Name       City level_2  value
0     Vansh  Jalandhar     Age     21
1    Dinesh   Amritsar     Age     23
2    Simran    Bathida     Age     22
3  Davinder     Batala     Age     30

Unstacked DataFrame:
level_2   Age
Name         
Davinder   30
Dinesh     23
Simran     22
Vansh      21
