## Loading the pretrained model

In [21]:
from transformers import pipeline

# Loading gpt-2 model using Hugging Face pipeline 
model = pipeline('text-generation', model='gpt2')  # You can choose other models too

## Correct the spellings

from textblob import TextBlob

# Function to correct spelling using TextBlob. TextBlob is a library that can be used to correct spellings
def correct_spelling(column_name):
    corrected = TextBlob(column_name).correct()
    return str(corrected)

In [40]:
from spellchecker import SpellChecker

# Initialize the SpellChecker
spell = SpellChecker()

# Function to correct spelling using SpellChecker
def correct_spelling(column_name):
    # Split the column name into words
    words = column_name.split()
    # Correct each word if it is not in the spellchecker's dictionary
    corrected_words = [
        spell.correction(word) if spell.correction(word) is not None else word for word in words
    ]
    # Join the corrected words back into a single string
    corrected = ' '.join(corrected_words)
    return corrected


## Standardize column names

In [29]:
import re

# Function to standardize column names: Capitalize each word and replace spaces with underscores
def standardize_column_name(column_name):
    # Correct spelling first
    corrected_name = correct_spelling(column_name)
    
    # Remove any special characters and normalize spacing
    cleaned_name = re.sub(r'[^a-zA-Z0-9\s]', '', corrected_name)
    
    # Split by space and capitalize each word, then join with an underscore
    standard_name = '_'.join([word.capitalize() for word in cleaned_name.split()])
    
    return standard_name

## Loading the excel sheet

In [30]:
import pandas as pd
import json

# Function to load an excel file and standardize column names
def load_and_standardize_excel(file_path, sheet_name=0):
    #Load the excel file
    df = pd.read_excel(file_path, sheet_name = sheet_name, header = None)

    # Transpose the Dataframe to that the first column becomes headers
    df = df.set_index(0).transpose()

    #Get the column names(after transposing)
    original_columns = df.columns

    #Standardize each column name
    standardized_columns = [standardize_column_name(col) for col in original_columns]

    #Create a mapping of original to standardized column names
    column_mapping = dict(zip(original_columns, standardized_columns))

    #Update the dataframe's columns with the standardized names
    df = df.rename(columns=column_mapping)

    return df

## Converting dataframe to a JSON file

In [31]:
#Function to convert the DataFrame to a JSON file
def save_dataframe_to_json(df, output_file):

    #Convert the dataframe to a list of dictionaries (JSON objects)
    json_data = df.to_dict(orient='records')

    #Save the JSON data to a file
    with open(output_file, 'w', encoding='utf-8') as json_file:
        json.dump(json_data, json_file, indent=4, ensure_ascii=False)

## Driver Programme

In [42]:
file_path = 'sample_correct_format.xlsx' #excel file's path
output_file = 'test_result.json'

# Load and standardize the Excel data
df = load_and_standardize_excel(file_path)

# Save the standardize data as JSON
save_dataframe_to_json(df, output_file)

print(f"JSON data has been saved to {output_file}")

JSON data has been saved to test_result.json
