In [1]:
import warnings
warnings.filterwarnings('ignore')

## make Datasets

In [2]:
%%capture
! pip install datasets

In [3]:
import torch
from datasets import load_dataset

def make_prompt(ddl, question, query=''):
  prompt = f"""당신은 SQL을 생성하는 SQL 봇입니다. DDL의 테이블을 활용한 Question을 해결할 수 있는 SQL 쿼리를 생성하세요.

### DDL:
{ddl}

### Question:
{question}

### SQL:
{query}"""
  return prompt

dataset = load_dataset("shangrilar/ko_text2sql", "origin")['test']
dataset = dataset.to_pandas()

for idx, row in dataset.iterrows():
  prompt = make_prompt(row['context'], row['question'])
  dataset.loc[idx, 'prompt'] = prompt

dataset.info()

README.md:   0%|          | 0.00/281 [00:00<?, ?B/s]

train.csv:   0%|          | 0.00/25.6M [00:00<?, ?B/s]

test.csv:   0%|          | 0.00/61.1k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/38246 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/112 [00:00<?, ? examples/s]

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 112 entries, 0 to 111
Data columns (total 5 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   db_id     112 non-null    int64 
 1   context   112 non-null    object
 2   question  112 non-null    object
 3   answer    112 non-null    object
 4   prompt    112 non-null    object
dtypes: int64(1), object(4)
memory usage: 4.5+ KB


In [4]:
! nvidia-smi

Fri Jan 31 22:08:42 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA A100-SXM4-40GB          Off |   00000000:00:04.0 Off |                    0 |
| N/A   33C    P0             49W /  400W |       0MiB /  40960MiB |      0%      Default |
|                                         |                        |             Disabled |
+-----------------------------------------+------------------------+----------------------+
                                                

## Online

In [5]:
%%capture
! pip install vllm

In [6]:
# !python -m vllm.entrypoints.openai.api_server \
# --model shangrilar/yi-ko-6b-text2sql --host 127.0.0.1 --port 8888 --max-model-len 1024

In [7]:
!nohup python -m vllm.entrypoints.openai.api_server \
--model shangrilar/yi-ko-6b-text2sql --host 127.0.0.1 --port 8888 --max-model-len 512 &

nohup: appending output to 'nohup.out'


In [16]:
! cat ./nohup.out

The cache for model files in Transformers v4.22.0 has been updated. Migrating your old cache. This is a one-time only operation. You can interrupt this and resume the migration later on by calling `transformers.utils.move_cache()`.
0it [00:00, ?it/s]0it [00:00, ?it/s]
2025-01-31 22:10:36.577357: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-01-31 22:10:36.593703: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1738361436.615734    1174 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1738361436.622424    1174 cuda_blas.cc

In [17]:
! curl http://localhost:8888/v1/models

{"object":"list","data":[{"id":"shangrilar/yi-ko-6b-text2sql","object":"model","created":1738361638,"owned_by":"vllm","root":"shangrilar/yi-ko-6b-text2sql","parent":null,"max_model_len":512,"permission":[{"id":"modelperm-42a099e710a7471592efb7327f001a7e","object":"model_permission","created":1738361638,"allow_create_engine":false,"allow_sampling":true,"allow_logprobs":true,"allow_search_indices":false,"allow_view":true,"allow_fine_tuning":false,"organization":"*","group":null,"is_blocking":false}]}]}

## API 요청

In [23]:
import json

json_data = json.dumps(
    {"model": "shangrilar/yi-ko-6b-text2sql",
      "prompt": dataset.loc[0, "prompt"],
      "max_tokens": 128,
      "temperature": 1},
    ensure_ascii=False
    )

print(json_data)

{"model": "shangrilar/yi-ko-6b-text2sql", "prompt": "당신은 SQL을 생성하는 SQL 봇입니다. DDL의 테이블을 활용한 Question을 해결할 수 있는 SQL 쿼리를 생성하세요.\n\n### DDL:\nCREATE TABLE quests (\n  quest_id INT PRIMARY KEY AUTO_INCREMENT,\n  name VARCHAR(255) NOT NULL,\n  description TEXT,\n  reward_experience INT NOT NULL,\n  reward_items VARCHAR(255)\n);\n\n### Question:\n각 보상 아이템별로 보상 경험치의 합을 구해줘\n\n### SQL:\n", "max_tokens": 128, "temperature": 1}


In [24]:
!curl http://localhost:8888/v1/completions \
    -H "Content-Type: application/json" \
    -d '{json_data}'

{"id":"cmpl-79f2e3877e994b3fb2740f49212ca321","object":"text_completion","created":1738362312,"model":"shangrilar/yi-ko-6b-text2sql","choices":[{"index":0,"text":"SELECT reward_items, SUM(reward_experience) FROM quests GROUP BY reward_items;","logprobs":null,"finish_reason":"stop","stop_reason":null,"prompt_logprobs":null}],"usage":{"prompt_tokens":127,"total_tokens":149,"completion_tokens":22,"prompt_tokens_details":null}}

In [25]:
from openai import OpenAI

openai_api_key = "EMPTY"
openai_api_base = "http://localhost:8888/v1"
client = OpenAI(
    api_key=openai_api_key,
    base_url=openai_api_base,
)
completion = client.completions.create(model="shangrilar/yi-ko-6b-text2sql",
                                 prompt=dataset.loc[0, 'prompt'], max_tokens=128)
print("생성 결과:", completion.choices[0].text)

생성 결과: SELECT reward_items, SUM(reward_experience) AS total_reward FROM quests GROUP BY reward_items;
