### Import Libraries

In [1]:
# for reading data
import numpy as np
import pandas as pd

# for api request
import requests
import json

### Load Dataset CSV

In [2]:
# read dataset csv file
df = pd.read_csv('data/dialect_dataset.csv')
df

Unnamed: 0,id,dialect
0,1175358310087892992,IQ
1,1175416117793349632,IQ
2,1175450108898565888,IQ
3,1175471073770573824,IQ
4,1175496913145217024,IQ
...,...,...
458192,1019484980282580992,BH
458193,1021083283709407232,BH
458194,1017477537889431552,BH
458195,1022430374696239232,BH


In [3]:
# check NaNs
df['id'].isnull().sum()

0

In [4]:
# check Duplicates
df['id'].duplicated().sum()

0

- Dataset ids has no NaNs or duplicates. so we will start to collect data directly.

### Fetching Data

In [5]:
# create a list from id column
ids_list = df['id'].astype('str').tolist()

# check the first 10 ids in the list
ids_list[:10]

['1175358310087892992',
 '1175416117793349632',
 '1175450108898565888',
 '1175471073770573824',
 '1175496913145217024',
 '1175668034146643968',
 '1175670153884983296',
 '1175671762580856832',
 '1175715664398561280',
 '1176019816072777728']

In [6]:
# create a list of 1000 steps
steps = list(range(0, len(ids_list), 1000)) + [len(ids_list)]

# check the last 5 steps
steps[-5:]

[455000, 456000, 457000, 458000, 458197]

In [7]:
# create a dictionary for the requested data
requested_data = {}

In [8]:
# iterate in rach 1000 id, request data and add to the dictionary
api_url = 'https://recruitment.aimtechnologies.co/ai-tasks'

for i in range(len(steps)-1):
    sliced_ids = ids_list[steps[i]:steps[i+1]]
    list_to_json = json.dumps(sliced_ids)
    returned_data = requests.post(api_url, data=list_to_json).json()
    requested_data.update(returned_data)

In [9]:
# check the number of requested data 
len(requested_data)

458197

In [10]:
# add requested_data to tht origional dataframe
df['text'] = df['id'].astype('str').map(requested_data)

In [11]:
# check dataframe
df

Unnamed: 0,id,dialect,text
0,1175358310087892992,IQ,@Nw8ieJUwaCAAreT لكن بالنهاية .. ينتفض .. يغير .
1,1175416117793349632,IQ,@7zNqXP0yrODdRjK يعني هذا محسوب على البشر .. ح...
2,1175450108898565888,IQ,@KanaanRema مبين من كلامه خليجي
3,1175471073770573824,IQ,@HAIDER76128900 يسلملي مرورك وروحك الحلوه💐
4,1175496913145217024,IQ,@hmo2406 وين هل الغيبه اخ محمد 🌸🌺
...,...,...,...
458192,1019484980282580992,BH,@Al_mhbaa_7 مبسوطين منك اللي باسطانا😅
458193,1021083283709407232,BH,@Zzainabali @P_ameerah والله ماينده ابش يختي
458194,1017477537889431552,BH,@Al_mhbaa_7 شو عملنا لك حنا تهربي مننا احنا مس...
458195,1022430374696239232,BH,@haneenalmwla الله يبارك فيها وبالعافيه 😋😋😋


In [12]:
# drop id column
df.drop('id', axis=1, inplace=True)

In [13]:
# save the final dataframe
df.to_csv('data/requested_dataset.csv', encoding='utf-8', index=False)

### Done!