In [None]:
import pandas as pd


In [None]:
def compare_dataframes(df1, df2, n_rows=1000000000):
    # Ensure both dataframes have the same columns
    common_columns = df1.columns.intersection(df2.columns)
    
    # Extract the first n rows from each dataframe based on the common columns
    subset_df1 = df1[common_columns].iloc[:n_rows]
    subset_df2 = df2[common_columns].iloc[:n_rows]
    
    # Create a mask for rows and columns where the dataframes differ
    difference_mask = subset_df1 != subset_df2
    
    # Initialize a dictionary to store the differences
    diff_dict = {}
    
    for column in common_columns:
        # Get the differences for each column
        diff_column = subset_df1[column].where(difference_mask[column], None)
        diff_dict[column + '_df1'] = diff_column
        diff_dict[column + '_df2'] = subset_df2[column].where(difference_mask[column], None)
    
    # Create a DataFrame from the dictionary
    differences = pd.concat(diff_dict, axis=1)
    
    # Drop columns where all values are None
    differences = differences.dropna(how='all', axis=1)
    
    # Filter out rows with NaNs at the beginning
    differences = differences.dropna(how='all', subset=[col for col in differences.columns if col.endswith('_df1')])

    # Filter columns that do not contain "BB"
    filtered_columns = [col for col in differences.columns
#                         if "BB" not in col
                       ]
    filtered_differences = differences[filtered_columns]
    
    if filtered_differences.empty:
        print(f"The first {n_rows} rows are identical in the common columns.")
    else:
        print(f"Differences found in the first {n_rows} rows in the common columns")
        display(filtered_differences)
    
    # Return the differences DataFrame (index will contain row labels)
    return differences


#example usage
# main_directory = "D:\KISHORE\Binance-Data-Downloader\data\extracted_data\BTCUSDT-4h"

    
# df1 = pd.read_csv(rf"{main_directory}\BTCUSDT-4h_old_logic.csv")
# df2 = pd.read_csv(rf"{main_directory}\BTCUSDT-4h.csv")
# differences_df = compare_dataframes(df1, df2, 500)
