In [8]:
from config import config
api_key = config.OPENAI_API_KEY
model = config.GPT_MODEL

import os
os.environ["OPENAI_API_KEY"] = api_key

In [1]:
# 파일 읽기
f = open("../data/basic_law.txt", "r", encoding="utf-8")
text = f.read()
#print(text[:1000])

# "제 (숫자)장"으로 먼저 분할, 중간에 끊기는 문제 해결
import re
chapters = re.split(r"(?=   제\s*\d+장|부칙)", text)

# 각 장 내에서 "제 (숫자)조"로 분할, "제 n장" 앞에 오는 "제 n조" 텍스트에 해당하는 장을 붙이도록 수정
articles = []

for chapter in chapters:
    # r"(?=제\s*\d+조?\()" : 제 n조 형식을 찾아서 분할
    # r"(?=제\s*\d+조(?:의\d+)?\()" : 제 n조(의 n) 형식을 찾아서 분할
    chapter_texts = re.split(r"(?=제\s*\d+조(?:의\d+)?\()", chapter)
    chapter_title = re.sub(r'<.*?>', '', chapter_texts[0]).strip() + " "
    chapter_texts[1:] = [(chapter_title + text).strip() for text in chapter_texts[1:]]

    if len(chapter_texts) > 1:
        articles.extend(chapter_texts[1:]) #챕터가 "제 O장"으로 시작하는 경우
    else:
        articles.append(chapter_texts[0].strip()) #챕터가 "부칙"으로 시작하지 않는 경우


In [2]:
from preprocess import embed, get_act_name

text_input = articles[6]
print("**embedding**:\n", embed(text_input))
print("**act_name**:\n", get_act_name(text_input))
print("**text_input**:\n", text_input)

**embedding**:
 [-0.009680714458227158, 0.029881633818149567, 0.08377908915281296, -0.008586187846958637, -0.006211171858012676, -0.014792046509683132, -0.024398375302553177, -0.0023484500125050545, -0.018776973709464073, -0.007948599755764008, 0.02877648174762726, -0.008501175791025162, -0.01945706643164158, 0.013963181525468826, 0.017491169273853302, -0.0308380164206028, -0.030179176479578018, -0.009032499976456165, -0.0015169286634773016, -0.00657778512686491, -0.005371680948883295, 0.010573337785899639, -0.007188807241618633, 0.009606328792870045, -0.07238751649856567, -0.028627710416913033, 0.009574449621140957, 0.032346975058317184, 0.020402822643518448, -0.01859632320702076, 0.054705068469047546, -0.0045959483832120895, -0.06635168194770813, 0.0032145073637366295, 0.042994700372219086, -0.02280440554022789, -0.0015594344586133957, 0.026502417400479317, 0.0408906564116478, 0.027735088020563126, -0.00553639093413949, -0.033303357660770416, -0.0339621976017952, -0.03547115623950958

In [7]:
from schema import Act

def create_act_objects(articles):
    act_objects = [Act(act_name=get_act_name(article), act_content=article, act_vector=embed(article)) for article in articles]
    return act_objects

act_list = create_act_objects(articles)

In [None]:
import psycopg2

# postgresql://postgres:postgres1016@192.168.0.47:55432/test
conn = psycopg2.connect(dbname="test", user="postgres", password="postgres1016", host="192.168.0.47", port="55432")

# 커서 생성
cur = conn.cursor()

# 확장 설치
try:
    cur.execute("CREATE EXTENSION IF NOT EXISTS vector;")
    conn.commit()
except Exception as e:
    print(f"오류 발생: {e}")
finally:
    # 커서와 연결 닫기
    cur.close()
    conn.close()

In [26]:
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker, declarative_base
from models import Base
from models import ActTable

URL_DATABASE = "postgresql://postgres:postgres1016@192.168.0.47:55432/test"

engine = create_engine(URL_DATABASE)

SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)

# 테이블 생성
Base.metadata.create_all(engine)  # models.py의 Base를 사용하여 테이블 생성
session = SessionLocal()

for act in act_list:
        db_act = ActTable(act_name=act.act_name,
                     act_content=act.act_content,
                     act_vector=act.act_vector)
        session.add(db_act)

session.commit()

In [None]:
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker, declarative_base
from models import Base
from utils import ChatModule
from config import config

URL_DATABASE = "postgresql://postgres:postgres1016@192.168.0.47:55432/test"
engine = create_engine(URL_DATABASE)
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
session = SessionLocal()

# retrieving 테스트
chat_module = ChatModule(api_key, model)
chat_module.retrieve_docs("건설산업기본법의 기본 취지는 무엇인가", session)

In [None]:
# chatbot 테스트
chat_module.generate_answer("건설산업기본법의 기본 취지는 무엇인가")