# Problem Statement 1

In [1]:
import pandas as pd

In [4]:
# Load the census data into a pandas DataFrame
census_data = pd.read_csv(r"D:\loudes\census_2011.csv")

In [5]:
# Define the list of columns to keep
columns_to_keep = ['State name', 'District name', 'Population', 'Male', 'Female', 
                   'Literate', 'Male_Literate', 'Female_Literate', 'Rural_Households', 
                   'Urban_Households', 'Households', 'Age_Group_0_29', 'Age_Group_30_49', 
                   'Age_Group_50', 'Age not stated']

In [6]:
# Filter the DataFrame to keep only the required columns
relevant_census_data = census_data[columns_to_keep]

In [7]:
# Display the first few rows of the resulting DataFrame
print(relevant_census_data.head())

          State name District name  Population      Male    Female  Literate  \
0  JAMMU AND KASHMIR       Kupwara    870354.0  474190.0  396164.0  439654.0   
1  JAMMU AND KASHMIR        Badgam    753745.0       NaN  355704.0  335649.0   
2  JAMMU AND KASHMIR   Leh(Ladakh)    133487.0   78971.0   54516.0   93770.0   
3  JAMMU AND KASHMIR        Kargil    140802.0       NaN   63017.0       NaN   
4  JAMMU AND KASHMIR         Punch         NaN  251899.0  224936.0  261724.0   

   Male_Literate  Female_Literate  Rural_Households  Urban_Households  \
0       282823.0         156831.0          158438.0               NaN   
1       207741.0         127908.0          160649.0           27190.0   
2        62834.0          30936.0           36920.0           17474.0   
3        56301.0          29935.0           40370.0            7774.0   
4       163333.0          98391.0          132139.0           15269.0   

   Households  Age_Group_0_29  Age_Group_30_49  Age_Group_50  Age not stated  
0

In [8]:
# Optionally, you can save the filtered DataFrame to a new CSV file
relevant_census_data.to_csv('relevant_census_data.csv', index=False)

# Problem Statement 2

In [9]:
# Define the mapping of old column names to new column names
column_mapping = {
    'State name': 'State/UT',
    'District name': 'District',
    'Male_Literate': 'Literate_Male',
    'Female_Literate': 'Literate_Female',
    'Rural_Households': 'Households_Rural',
    'Urban_Households': 'Households_Urban',
    'Age_Group_0_29': 'Young_and_Adult',
    'Age_Group_30_49': 'Middle_Aged',
    'Age_Group_50': 'Senior_Citizen',
    'Age not stated': 'Age_Not_Stated'
}

In [10]:
# Rename the columns of the DataFrame
census_data = census_data.rename(columns=column_mapping)

In [19]:
# Display the first few rows of the resulting DataFrame
print(census_data.head())

   District code           State/UT     District  Population      Male  \
0              1  Jammu and Kashmir      Kupwara    870354.0  474190.0   
1              2  Jammu and Kashmir       Badgam    753745.0       NaN   
2              3  Jammu and Kashmir  Leh(Ladakh)    133487.0   78971.0   
3              4  Jammu and Kashmir       Kargil    140802.0       NaN   
4              5  Jammu and Kashmir        Punch         NaN  251899.0   

     Female  Literate  Literate_Male  Literate_Female      SC  ...  \
0  396164.0  439654.0       282823.0         156831.0  1048.0  ...   
1  355704.0  335649.0       207741.0         127908.0     NaN  ...   
2   54516.0   93770.0        62834.0          30936.0   488.0  ...   
3   63017.0       NaN        56301.0          29935.0    18.0  ...   
4  224936.0  261724.0       163333.0          98391.0   556.0  ...   

   Power_Parity_Rs_90000_150000  Power_Parity_Rs_45000_150000  \
0                          94.0                         588.0   
1   

In [18]:
# Optionally, you can save the DataFrame with renamed columns to a new CSV file
census_data.to_csv('census2011_renamed.csv', index=False)

# Problem Statement 3

In [13]:
# Define a function to capitalize State/UT names according to the specified format
def capitalize_state_name(state_name):
    words = state_name.lower().split()
    capitalized_words = [word.capitalize() if word != 'and' else word for word in words]
    return ' '.join(capitalized_words)

In [14]:
# Apply the function to capitalize State/UT names
census_data['State/UT'] = census_data['State/UT'].apply(capitalize_state_name)

In [15]:
# Display the first few rows of the resulting DataFrame
print(census_data.head())

   District code           State/UT     District  Population      Male  \
0              1  Jammu and Kashmir      Kupwara    870354.0  474190.0   
1              2  Jammu and Kashmir       Badgam    753745.0       NaN   
2              3  Jammu and Kashmir  Leh(Ladakh)    133487.0   78971.0   
3              4  Jammu and Kashmir       Kargil    140802.0       NaN   
4              5  Jammu and Kashmir        Punch         NaN  251899.0   

     Female  Literate  Literate_Male  Literate_Female      SC  ...  \
0  396164.0  439654.0       282823.0         156831.0  1048.0  ...   
1  355704.0  335649.0       207741.0         127908.0     NaN  ...   
2   54516.0   93770.0        62834.0          30936.0   488.0  ...   
3   63017.0       NaN        56301.0          29935.0    18.0  ...   
4  224936.0  261724.0       163333.0          98391.0   556.0  ...   

   Power_Parity_Rs_90000_150000  Power_Parity_Rs_45000_150000  \
0                          94.0                         588.0   
1   

In [27]:
# Optionally, you can save the DataFrame with updated State/UT names to a new CSV file
census_data.to_csv('census2011_updated1.csv', index=False)

# Problem Statement 4

In [31]:
# Read the districts included in Telangana from the text file
with open(r"D:\loudes\Telangana.txt") as file:
    telangana_districts = file.read().splitlines()

In [23]:
# Rename State/UT from "Andhra Pradesh" to "Telangana" for Telangana districts
census_data.loc[census_data['District'].isin(telangana_districts), 'State/UT'] = 'Telangana'

In [24]:
# Display the first few rows of the resulting DataFrame
print(census_data.head())

   District code           State/UT     District  Population      Male  \
0              1  Jammu and Kashmir      Kupwara    870354.0  474190.0   
1              2  Jammu and Kashmir       Badgam    753745.0       NaN   
2              3  Jammu and Kashmir  Leh(Ladakh)    133487.0   78971.0   
3              4  Jammu and Kashmir       Kargil    140802.0       NaN   
4              5  Jammu and Kashmir        Punch         NaN  251899.0   

     Female  Literate  Literate_Male  Literate_Female      SC  ...  \
0  396164.0  439654.0       282823.0         156831.0  1048.0  ...   
1  355704.0  335649.0       207741.0         127908.0     NaN  ...   
2   54516.0   93770.0        62834.0          30936.0   488.0  ...   
3   63017.0       NaN        56301.0          29935.0    18.0  ...   
4  224936.0  261724.0       163333.0          98391.0   556.0  ...   

   Power_Parity_Rs_90000_150000  Power_Parity_Rs_45000_150000  \
0                          94.0                         588.0   
1   

In [28]:
# Optionally, you can save the DataFrame with updated State/UT names to a new CSV file
census_data.to_csv('census2011_updated2.csv', index=False)