# Azure OpenAI 이용 방법

In [None]:
!pip install openai==0.28
!pip install pdf2image==1.17.0
!apt-get install poppler-utils
!pip install PyPDF2==3.0.1

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import os
%cd /content/drive/MyDrive/패캠_강의자료/Part3
print(os.getcwd())

In [None]:
import openai
import os
import yaml

def get_auth():
    curr_dir = os.getcwd()
    auth_path = os.path.join(curr_dir, 'ms_azure/auth.yml')
    auth = yaml.safe_load(open(auth_path, encoding='utf-8')) # yaml 파일로 key 저장
    return auth

auth = get_auth()
openai.api_type = "azure"
openai.api_base = f"https://{auth['Azure_OpenAI']['name']}.openai.azure.com/"
openai.api_version = "2023-07-01-preview"
openai.api_key = auth['Azure_OpenAI']['key']


system_message = "You are an AI assistant that helps people find information."
query = "안녕 넌 이름이 뭐야"

messages = [{"role":"system","content":f"{system_message}"}]
messages.append({"role":"user","content": f"{query}"})

try:
    response = openai.ChatCompletion.create(
        engine='gpt-4o',
        messages=messages,
        temperature=0.7,
    )['choices'][0]['message']['content']
except (openai.error.RateLimitError, openai.error.Timeout) as e:
    print('OpenAI API RateLimitError Occured!!')

print(response)

# Generate Dataset With Text

In [None]:
from PyPDF2 import PdfReader

reader = PdfReader("./패스트캠퍼스 기업교육 소개서.pdf")

pdf_lists = []

for page in reader.pages:
    text = page.extract_text()
    pdf_lists.append(text)

In [None]:
pdf_lists[28]

In [None]:
from ms_azure.gpt_rag import RAG

rag = RAG()
response = rag._generate(
    context = pdf_lists[28]
)

In [None]:
import json
json.loads(response)

# Generate Dataset With Image


In [None]:
from pdf2image import convert_from_path

# PDF 파일 경로
pdf_path = '한국의안전보고서2023(펼침면).pdf'

# PDF 파일을 이미지로 변환
images = convert_from_path(pdf_path)

# 각 페이지를 이미지로 저장
for i, image in enumerate(images):
    image.save('pdf_image/한국안전보고서2023(펼침면)_page_{}.png'.format(i), 'PNG')

In [None]:
# Encod image to data url

import base64
from mimetypes import guess_type

# Function to encode a local image into data URL
def local_image_to_data_url(image_path):
    # Guess the MIME type of the image based on the file extension
    mime_type, _ = guess_type(image_path)
    if mime_type is None:
        mime_type = 'application/octet-stream'  # Default MIME type if none is found

    # Read and encode the image file
    with open(image_path, "rb") as image_file:
        base64_encoded_data = base64.b64encode(image_file.read()).decode('utf-8')

    # Construct the data URL
    return f"data:{mime_type};base64,{base64_encoded_data}"

# Example usage
image_path = 'pdf_image/한국의안전보고서2023(펼침면)_page_39.png'
data_url = local_image_to_data_url(image_path)
print("Data URL:", data_url)

In [None]:
from ms_azure.extract_context import Extract_Context

ec = Extract_Context()
response = ec._generate(
    img_url = data_url
)

In [None]:
response

In [None]:
from ms_azure.gpt_rag import RAG

rag = RAG()
response = rag._generate(
    context = response
)

In [None]:
import json
json.loads(response)