## Introduction to Pandas

#### This is an sample data learning the queries in Pandas

In [None]:
import pandas as pd

df = pd.read_csv('employees.csv')

print(df.head(10))

#### How to select the column from the DataFrame

##### print first name and Gender

*syntax : pd.DataFrame(df, columns=['column1','column2'])*

In [None]:
x = pd.DataFrame(df, columns=['First_Name', 'Gender'])
print(x)

#### Get the Size of a DataFrame
##### print the no of rows and column in this array [number of rows, number of columns]

In [None]:
num_rows , num_columns = df.shape

print(num_rows, num_columns)

### Display the first three rows

In [None]:
print(df.head(3))

### Select the Data
#### print the First_name and Salary where Gender = Male

In [None]:
x = df[df['Gender'] == 'Male'][['First_Name', 'Salary']]

print(x)

#### Create a New Column
##### Create a New Column of salary * 2 with Bonus

In [None]:
df['Bonus'] = df['Salary']*2

print(df)

#### Drop Duplicate Rows

##### Drop duplicate rows on First_Name and keep the first record


In [None]:
print(df.drop_duplicates(['First_Name'], keep='first'))



#### Drop missing Data
##### Drop the missing data in the First_Name

In [None]:
print(df.dropna(subset='First_Name'))

#### Modify Column
##### Modify the Salary Column

In [None]:
df['Salary'] = df['Salary']*2

print(df)

#### Rename Colums
##### Rename the First_Name to Name column

In [None]:
print(df.rename(columns={'First_Name': 'Name'}))

#### Change the Data Type
##### Change the Bonus Data Type float to int


In [None]:
df['Bonus'] = df['Bonus'].astype(int)

print(df)

#### Fill the Missing Data
#### Fill the missing data with 0 in Salary


In [None]:
df['Salary'].fillna(0, inplace=True)

print(df)

#### Reshape Data: Concatenate
##### Join the two DataFrame Vertically

In [None]:
# Sample DataFrames
df1 = pd.DataFrame({
    'student_id': [1, 2, 3, 4],
    'name': ['Mason', 'Ava', 'Taylor', 'Georgia'],
    'age': [8, 6, 15, 17]
})

df2 = pd.DataFrame({
    'student_id': [5, 6],
    'name': ['Leo', 'Alex'],
    'age': [7, 7]
})

# Concatenating DataFrames
result = pd.concat([df1, df2], ignore_index=True)

# Display the result
print(result)

#### Reshape Data: Pivot

In [None]:

# Sample DataFrame
data = {
    'city': ['Jacksonville', 'Jacksonville', 'Jacksonville', 'Jacksonville', 'Jacksonville',
             'ElPaso', 'ElPaso', 'ElPaso', 'ElPaso', 'ElPaso'],
    'month': ['January', 'February', 'March', 'April', 'May',
              'January', 'February', 'March', 'April', 'May'],
    'temperature': [13, 23, 38, 5, 34, 20, 6, 26, 2, 43]
}

weather = pd.DataFrame(data)

# Pivot the DataFrame
pivoted_weather = weather.pivot(index='month', columns='city', values='temperature')

# Resetting the index to match the expected output format
pivoted_weather = pivoted_weather.reset_index()

# Display the result
print(pivoted_weather)

#### Reshape Data : Melt

In [None]:
# Sample DataFrame
data = {
    'product': ['Umbrella', 'SleepingBag'],
    'quarter_1': [417, 800],
    'quarter_2': [224, 936],
    'quarter_3': [379, 93],
    'quarter_4': [611, 875]
}

report = pd.DataFrame(data)

# Reshaping the DataFrame using melt
reshaped_report = pd.melt(report, id_vars=['product'], 
                          value_vars=['quarter_1', 'quarter_2', 'quarter_3', 'quarter_4'],
                          var_name='quarter', value_name='sales')

# Display the result
print(reshaped_report)

#### Method Chaining

##### display the First_Name with salary more than 250000 and sort by descending

In [None]:
print(df[df['Salary'] > 250000].sort_values(by='Salary', ascending=False)[['First_Name']])