# Mastering Applied Skills in Management, Analytics and Entrepreneurship

## DATA COLLECTION TECHNIQUES
## Part VII. Introduction to API - services

### 1. Libraries

In [None]:
import json
import base64
import requests
import matplotlib.pyplot as plt
from PIL import Image

In [None]:
def access_data(file_path):
    '''
    Reads credentials from file.

    '''
    with open(file_path) as file:
        access_data = json.load(file)
    return access_data


def encode_file(file_path):
    '''
    Encode a file to a format for API use.
    See Yandex OCR manual here:
    https://yandex.cloud/en-ru/docs/vision/operations/ocr/text-detection-pdf

    '''
    with open(file_path, 'rb') as file:
        file_content = file.read()
    return base64.b64encode(file_content).decode('utf-8')

### 2. Yandex OCR example

Yandex OCR [documentation](https://yandex.cloud/en-ru/docs/vision/quickstart).

In [None]:
creds = access_data(file_path='data/access_ya_api.json')
print(creds.keys())

In [None]:
file_path = 'data/example_ocr..jpg'
print(file_path)

In [None]:
!ls -la $file_path

In [None]:
img = Image.open(file_path)
plt.imshow(img)
plt.show()

In [None]:
content = encode_file(file_path)
print('encoded to', len(content), 'symbols')

In [None]:
type(content)

In [None]:
data = {
    'mimeType': 'JPEG',
    'languageCodes': ['en'],
    'content': content
}

In [None]:
headers = {
    'Content-Type': 'application/json',
    'Authorization': f'Api-Key {creds["secret_key"]}',
    'x-folder-id': creds['folder_id'],
    'x-data-logging-enabled': 'true'
}

In [None]:
type(data), type(headers)

In [None]:
URL_OCR_SERVICE = 'https://ocr.api.cloud.yandex.net/ocr/v1/recognizeText'
r = requests.post(
    url=URL_OCR_SERVICE,
    headers=headers,
    data=json.dumps(data)
)

In [None]:
type(r)

In [None]:
r.json()

In [None]:
r.json().keys()

In [None]:
r.json()['result'].keys()

In [None]:
r.json()['result']['textAnnotation']

In [None]:
text = r.json()['result']['textAnnotation']['fullText']
print(text)

### 3. Yandex Traslate API example

Yandex Translate [documentation](https://yandex.cloud/en-ru/docs/translate/quickstart).

#### 3.1. Languages available

In [None]:
data = {
    'folderId': creds['folder_id']
}

In [None]:
headers = {
    'Content-Type': 'application/json',
    'Authorization': f'Api-Key {creds["secret_key"]}'
}

In [None]:
URL_LANGS = 'https://translate.api.cloud.yandex.net/translate/v2/languages'
r = requests.post(
    url=URL_LANGS,
    headers=headers
)

In [None]:
r

In [None]:
r.json()

#### 3.2. Language detect

In [None]:
string = text[:100]
print('string:', string)
data = {
    'text': string,
    'folderId': creds['folder_id']
}

In [None]:
URL_DET = 'https://translate.api.cloud.yandex.net/translate/v2/detect'
r = requests.post(
    url=URL_DET,
    headers=headers,
    json=data
)

In [None]:
r

In [None]:
r.json()

#### 3.3. Translate

In [None]:
target_language = 'ru'
texts = [text]
data = {
    'targetLanguageCode': target_language,
    'texts': texts,
    'folderId': creds['folder_id']
}

In [None]:
headers = {
    'Content-Type': 'application/json',
    'Authorization': f'Api-Key {creds["secret_key"]}'
}

In [None]:
URL_TRANSLATE = 'https://translate.api.cloud.yandex.net/translate/v2/translate'
r = requests.post(
    url=URL_TRANSLATE,
    headers=headers,
    json=data  # NOTE: we may use `data=json.dumps(data)`
)

In [None]:
r

In [None]:
r.json()

In [None]:
for t in r.json()['translations']:
    print('language detected:', t['detectedLanguageCode'])
    print('translation result:', t['text'].replace('\n', ' '))