In [1]:
import pandas as pd
import os

def split_csv(input_file, output_dir, chunk_size=500):

    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    try:
        df = pd.read_csv(input_file)
        num_chunks = (len(df) + chunk_size - 1) // chunk_size

        for i in range(num_chunks):
            start = i * chunk_size
            end = min((i + 1) * chunk_size, len(df))
            chunk = df.iloc[start:end]
            output_file = os.path.join(output_dir, f"output_{i+1}.csv")
            chunk.to_csv(output_file, index=False)
        print(f"Successfully split {input_file} into {num_chunks} files in {output_dir}")

    except FileNotFoundError:
        print(f"Error: Input file '{input_file}' not found.")
    except Exception as e:
        print(f"An error occurred: {e}")


input_file = "/content/my_dataset1.csv"
output_dir = "/content/input csvs"
split_csv(input_file, output_dir)


Successfully split /content/my_dataset1.csv into 10 files in /content/input csvs


In [2]:
!pip install deep-translator==1.9.2

import pandas as pd
import time
import os
from deep_translator import GoogleTranslator

#translation function
def translate_text(text, target_lang='si', max_retries=3):
    if not isinstance(text, str) or text.strip() == '':
        return text
    for attempt in range(max_retries):
        try:
            translated = GoogleTranslator(source='en', target=target_lang).translate(text)
            time.sleep(0.5)
            return translated
        except Exception as e:
            if attempt == max_retries - 1:
                print(f"Failed to translate: {text}. Error: {e}")
                return text
            time.sleep(1)
    return text

# Input and output directories
input_dir = '/content/input csvs'
output_dir = '/content/output csvs'

if not os.path.exists(output_dir):
    os.makedirs(output_dir)

for filename in os.listdir(input_dir):
    if filename.endswith('.csv'):
        input_file = os.path.join(input_dir, filename)
        output_file = os.path.join(output_dir, filename)  # Use same filename for output

        try:
            df = pd.read_csv(input_file)
            df['question_si'] = df['question'].apply(lambda x: translate_text(x, 'si'))
            df['reference_answer_si'] = df['reference_answer'].apply(lambda x: translate_text(x, 'si'))
            df['student_answer_si'] = df['student_answer'].apply(lambda x: translate_text(x, 'si'))
            df.to_csv(output_file, index=False, encoding='utf-8')
            print(f"Finished processing: {filename}")
        except Exception as e:
            print(f"Error processing {filename}: {e}")

print("All files processed.")

Collecting deep-translator==1.9.2
  Downloading deep_translator-1.9.2-py3-none-any.whl.metadata (25 kB)
Downloading deep_translator-1.9.2-py3-none-any.whl (30 kB)
Installing collected packages: deep-translator
Successfully installed deep-translator-1.9.2
Finished processing: output_4.csv
Finished processing: output_2.csv
Finished processing: output_1.csv
Finished processing: output_3.csv
Finished processing: output_6.csv
Finished processing: output_10.csv
Finished processing: output_5.csv
Finished processing: output_7.csv
Finished processing: output_9.csv
Finished processing: output_8.csv
All files processed.
