In [None]:
#pandas library is a popular, open-source tool for data analysis and manipulation in Python.
#The pandas library in Python contains a vast array of methods designed for data manipulation, analysis, and exploration.
#These methods can be broadly categorized based on their purpose.

In [None]:
#1.Data Creation and Initialization-Create a DataFrame from a dictionary, list, or other data structures.
#2.Input/Output Operations- to read and save data
#3.Data Inspection-carry out various inspection in given data like rows,columbs etc..
#4.Data Selection- select particular row columbs
#5.Data Cleaning-handeling missind datas renaiming etc..
#6.Data Transformation-create new columbs / apply function to it/sorting...
#7.Aggregation and Grouping- carry operations / grouping..
#8.Reshaping Data-reshaping given data by our way
#9.Statistical Methods-matrices / calculations
#10.Visualization- bar and scatter plot
# and many more....

In [None]:
#creating a pandas library:

In [None]:
class SimpleDataFrame:
  def __init__(self,data):    #This is the constructor method, which initializes
                               # a new instance of the SimpleDataFrame class.
   self.data = data
   self.columns = list(data.keys())  #Keys are column names.
                                     #Values are lists representing the data in those columns.

   self.rows = len(next(iter(data.values()))) #data.values() retrieves the lists (column data) from the dictionary.
                                              #next(iter(data.values())) fetches the first column's list to determine its length (assuming all columns have the same length).
                                               #len(...) calculates the number of rows.



  def display(self): # Define the display method to print the data in a tabular format
    # Print header
    print(" | ".join(self.columns)) #A list of column names in the DataFrame.
    #Joins the column names with " | " as a separator to create a single string.
    print("-" * (sum(len(col) for col in self.columns) + 3 * len(self.columns)))
          #Calculates the length of each column name.
    # Print rows
    for i in range(self.rows):
        row_data = [str(self.data[col][i]) for col in self.columns]
        print(" | ".join(row_data))



  def select_columns(self, *cols):   #The *cols parameter allows the method to accept a variable
                                    # number of column names as arguments
        """
        Select specific columns and return a new DataFrame.
        """
        # Validate that all requested columns exist in the DataFrame
        for col in cols:
            if col not in self.columns:  #Checks whether each column name (col) exists in the list of available columns (self.columns) of the DataFrame.

                raise ValueError(f"Column '{col}' does not exist in the DataFrame.")
            #If a column name in cols is not found in self.columns, a ValueError is raised.

        # Create a new DataFrame with the selected columns
        selected_data = {col: self.data[col] for col in cols} #Constructs a new dictionary called selected_data.
        return SimpleDataFrame(selected_data)



  def filter_rows(self, condition):#The filter_rows method allows you to filter
                   #rows in the SimpleDataFrame based on a specified condition.
        """
        Filter rows based on a condition function.
        The condition is a lambda function that operates on rows as dictionaries.
        """
        filtered_indices = [i for i in range(self.rows) if condition({col: self.data[col][i] for col in self.columns})]
         #for col in self.columns:
           #Iterates through each column name in the DataFrame
        #Calls the condition function with the row dictionary as input.
        filtered_data = {col: [self.data[col][i] for i in filtered_indices] for col in self.columns}
        return SimpleDataFrame(filtered_data) #Wraps the filtered_data dictionary in a new SimpleDataFrame instance.



  def add_column(self, column_name, values):
        """
        Add a new column to the DataFrame.
        """
        if len(values) != self.rows:  #len(values) Computes the length of the values list
            raise ValueError("Length of new column must match number of rows.")
        self.data[column_name] = values  #Adds the new column to the self.data dictionary.
        self.columns.append(column_name)


  def summarize(self):
        """
        Display basic summary statistics for numeric columns.
        """
        print("Summary Statistics:")
        for col in self.columns:
            if isinstance(self.data[col][0], (int, float)):
      #Fetches the first value of the current column to check its type.
      #Checks if the first value is either an integer (int) or a floating-point number (float).
                col_data = self.data[col]
                print(f"{col} - Min: {min(col_data)}, Max: {max(col_data)}, Avg: {sum(col_data) / len(col_data)}")
                #max(col_data): Finds the largest value in the column.
                #min(col_data): Finds the smallest value in the column
                #sum(col_data): Adds all numeric values in the column.
                #len(col_data): Counts the number of values in the column.
        print()


In [None]:
# Sample data
data = {
    "Name": ["Alice", "Bob", "Charlie", "Diana", "Eve"],
    "Age": [24, 27, 22, 32, 29],
    "Salary": [50000, 60000, 55000, 65000, 70000]
}

# Initialize the DataFrame
df = SimpleDataFrame(data)

# Display the data
print("Initial DataFrame:")
df.display()
# Select specific columns
selected_df = df.select_columns("Name", "Salary")
print("Selected Columns:")
selected_df.display()
# Filter rows where Salary > 55000
filtered_df = df.filter_rows(lambda row: row["Salary"] > 55000)
print("Filtered Rows (Salary > 55000):")
filtered_df.display()
# Add a new column for Bonus (10% of Salary)
df.add_column("Bonus", [sal * 0.10 for sal in df.data["Salary"]])
print("DataFrame After Adding 'Bonus' Column:")
df.display()
# Summarize numeric columns
df.summarize()


Initial DataFrame:
Name | Age | Salary
----------------------
Alice | 24 | 50000
Bob | 27 | 60000
Charlie | 22 | 55000
Diana | 32 | 65000
Eve | 29 | 70000
Selected Columns:
Name | Salary
----------------
Alice | 50000
Bob | 60000
Charlie | 55000
Diana | 65000
Eve | 70000
Filtered Rows (Salary > 55000):
Name | Age | Salary
----------------------
Bob | 27 | 60000
Diana | 32 | 65000
Eve | 29 | 70000
DataFrame After Adding 'Bonus' Column:
Name | Age | Salary | Bonus
------------------------------
Alice | 24 | 50000 | 5000.0
Bob | 27 | 60000 | 6000.0
Charlie | 22 | 55000 | 5500.0
Diana | 32 | 65000 | 6500.0
Eve | 29 | 70000 | 7000.0
Summary Statistics:
Age - Min: 22, Max: 32, Avg: 26.8
Salary - Min: 50000, Max: 70000, Avg: 60000.0
Bonus - Min: 5000.0, Max: 7000.0, Avg: 6000.0

