## Import Libraries

In [1]:
import json
import pandas as pd
import numpy as np

## Function for traversing through folder-files and creating dataframe and then generating csv out of it

In [60]:
def create_dataframe_from_json(path):
    dfs = []  
    
    folders = os.listdir(path)
    
    for folder in folders:
        new_path = os.path.join(path, folder)
        files_in_folder = os.listdir(new_path)
        
        for index, file in enumerate(files_in_folder):
            file_name = file.split(".")
            file_path = os.path.join(new_path, file)
            
            with open(file_path, 'r', encoding='utf-8') as file:
                data = json.load(file)
                
            dict_json = {f"{file_name[0]}.{k}.{key}": string for k, v in data.items() for key, string in v.items()}
            
            df = pd.DataFrame.from_dict(dict_json, orient='index').reset_index()
            df.rename(columns={"index": "languagekey", 0: "en_value (current)"}, inplace=True)

            additional_columns = ["hi_translated", "hi_transliterated", "hi_value(curated)",
                                  "ta_translated", "ta_transliterated", "ta_value (curated)"]
            
            df[additional_columns] = pd.DataFrame([[pd.NA] * len(additional_columns)], index=df.index)
            dfs.append(df)
    

    result_df = pd.concat(dfs, ignore_index=True)
    result_df.to_csv("Sample.csv", index=False)
    
#     return result_df
    return None

In [None]:
path = r"C:\Users\Palash Ashok Bhosale\Jupy\Projects\bhashini_integration\Json_tocsv\json_files"
result_df = create_dataframe_from_json(path)
# print(result_df.head())

# Bhashini Integration

## Get active api keys

In [61]:
import requests


In [62]:
userID='3b9427542f494332b6431cb28dd1ddb1'
ulcaApiKey='52194d3e6f-1d2c-4247-a665-bce41ef2eb4c'

In [63]:
def getactive_api(taskType, userID, ulcaApiKey):
        
    url = "https://meity-auth.ulcacontrib.org/ulca/apis/v0/model/getModelsPipeline"

    payload = json.dumps({
      "pipelineTasks": [
        {
          "taskType": taskType,
          "config": {
            "language": {
              "sourceLanguage": "en"
            }
          }
        }
      ],
      "pipelineRequestConfig": {
        "pipelineId": "64392f96daac500b55c543cd"
      }
    })
    headers = {
      'userID': userID,
      'ulcaApiKey': ulcaApiKey,
      'Content-Type': 'application/json'
    }

    response = requests.request("POST", url, headers=headers, data=payload)


    config_translation=response.text
    config_translation_data = json.loads(config_translation)
#     print(config_translation_data)
    target_languages = config_translation_data['languages'][0]['targetLanguageList']
    api_translation = config_translation_data['pipelineResponseConfig'][0]['config'][0]['serviceId']
#     target_lang=str(input(f"select language from this list {target_languages}: "))

    return api_translation
    

## Store active api for translation and transliteration in active _api list

In [64]:
active_api=[]

In [65]:
active_api.append(getactive_api("transliteration", userID, ulcaApiKey))

In [66]:
active_api.append(getactive_api("translation", userID, ulcaApiKey))

In [67]:
active_api

['ai4bharat/indicxlit--cpu-fsv2', 'ai4bharat/indictrans-v2-all-gpu--t4']

## Function to get translation and transliteration

In [68]:
def bhashini_api_call(task, target_lang, active_api, string):
    if task=="translation":
        api=active_api[1]
    else:
        api= active_api[0]


    url = "https://dhruva-api.bhashini.gov.in/services/inference/pipeline"
    # source="Hi my name is Palash"
    payload = json.dumps({
      "pipelineTasks": [
        {
          "taskType": task,
          "config": {
            "language": {
              "sourceLanguage": "en",
              "targetLanguage": target_lang
            },
            "serviceId": api
          }
        }
      ],
      "inputData": {
        "input": [
          {
            "source": string
    #         "source": "मेरा नाम विहिर है और मैं भाषाावर्ष यूज कर रहा हूँ"
          }
        ]
      }
    })
    headers = {
      'Accept': '*/*',
      'Authorization': '9uAUqhCxaept0FGxeOUkyJ1XQSZtp9GWHy5XLriwyBsS-sovl9RkTe2Gkthwrx2F',
      'Content-Type': 'application/json'
    }

    response = requests.request("POST", url, headers=headers, data=payload)

    translation_json= response.text

    translated_data = json.loads(translation_json)
#     print(api)
    if task=="translation":
        return translated_data['pipelineResponse'][0]['output'][0]['target']
    else:
        return translated_data['pipelineResponse'][0]['output'][0]['target'][0]

## Load CSV

In [74]:
df=pd.read_csv("Sample.csv")

In [75]:
for index,row in df.iterrows():

    row["hi_translated"]=bhashini_api_call("translation", "hi", active_api, row["en_value (current)"])

    
    break

In [76]:
df["hi_translated"] = df["en_value (current)"].progress_apply(lambda x: bhashini_api_call("translation", "hi", active_api, x))


100%|██████████████████████████████████████████████████████████████████████████████| 1047/1047 [07:47<00:00,  2.24it/s]


In [83]:
df.sample(5)

Unnamed: 0,languagekey,en_value (current),hi_translated,hi_transliterated,hi_value(curated),ta_translated,ta_transliterated,ta_value (curated)
60,en.appnavbar.searchforLearningContents,Search for learning contents,सीखने की सामग्री खोजें,सर्च फोर लर्निंग कंटेंट्स,,கற்றல் உள்ளடக்கங்களைத் தேடுங்கள்,,
618,en.home.btnDiscuss,Discuss,चर्चा करें।,डिस्कस,,விவாதிக்கவும்.,,
10,en.common.cbpPortal,CBP Portal,सीबीपी पोर्टल,सीबीपी पोर्टल,,சிபிபி இணையதளம்,,
87,en.cardshublist.quicklinks,Quick links,त्वरित लिंक,क्विक लिंक्स,,விரைவான இணைப்புகள்,,
633,en.home.navigationDetails,Details,विवरण,डिटेल्स,,விவரங்கள்,,


In [78]:
df["ta_translated"] = df["en_value (current)"].progress_apply(lambda x: bhashini_api_call("translation", "ta", active_api, x))


100%|██████████████████████████████████████████████████████████████████████████████| 1047/1047 [08:04<00:00,  2.16it/s]


In [82]:
df["hi_transliterated"] = df["en_value (current)"].progress_apply(lambda x: bhashini_api_call("transliteration", "hi", active_api, x))

100%|██████████████████████████████████████████████████████████████████████████████| 1047/1047 [12:56<00:00,  1.35it/s]


In [84]:
df["ta_transliterated"] = df["en_value (current)"].progress_apply(lambda x: bhashini_api_call("transliteration", "ta", active_api, x))

100%|██████████████████████████████████████████████████████████████████████████████| 1047/1047 [14:08<00:00,  1.23it/s]


In [85]:
df.to_csv("Updated_05_12.csv")

In [88]:
df=pd.read_csv("Updated_05_12.csv")

In [101]:

    df.head()

Unnamed: 0.1,Unnamed: 0,languagekey,en_value (current),hi_translated,hi_transliterated,hi_value(curated),ta_translated,ta_transliterated,ta_value (curated)
0,0,en.common.karmayogiBharat,Karmayogi Bharat,कर्मयोगी भारत,कर्मयोगी भारत,,கர்மயோகி பாரத்,கர்மயோகி பாரத்,
1,1,en.common.hubs,Hubs,हब,हब्स,,மையங்கள்,ஹப்ஸ்,
2,2,en.common.learn,Learn,सीखें।,लर्न,,கற்றுக் கொள்ளுங்கள்.,லர்ன்,
3,3,en.common.discuss,Discuss,चर्चा करें।,डिस्कस,,விவாதிக்கவும்.,டிஸ்கஸ்,
4,4,en.common.network,Network,नेटवर्क,नेटवर्क,,நெட்வொர்க்,நெட்வொர்க்,


In [102]:
def create_Json(u_in):
    final_dict = {}

    for index, row in df.iterrows():
        separated_labels = row["languagekey"].split(".")
        label = f"{separated_labels[0]}.{separated_labels[1]}"

        if label not in final_dict:
            final_dict[label] = {}

        if row[u_in] is not np.nan:
            final_dict[label][separated_labels[2]] = row[u_in]
        else:
            final_dict[label][separated_labels[2]] = row["en_value (current)"]

    file_path = 'output.json'

    with open(file_path, 'w', encoding='utf-8') as json_file:
        json.dump(final_dict, json_file, indent=2, ensure_ascii=False)

    return final_dict




In [103]:
result_dict = create_Json("hi_translated")