In [None]:
#Whispher 학습 코드

In [None]:
import json
import os
import torch
import torchaudio
import numpy as np
from datasets import Dataset, Audio
from transformers import WhisperProcessor, WhisperForConditionalGeneration, WhisperConfig
from transformers import Trainer, TrainingArguments
import matplotlib.pyplot as plt
from transformers import TrainerCallback

# Custom callback to log training losses
class LossLoggerCallback(TrainerCallback):
    def __init__(self):
        self.training_losses = []
        self.eval_losses = []

    def on_log(self, args, state, control, logs=None, **kwargs):
        if logs:
            if "loss" in logs:
                self.training_losses.append(logs["loss"])
            if "eval_loss" in logs:
                self.eval_losses.append(logs["eval_loss"])

# JSON 파일 로드
data_path = ''
with open(data_path, 'r', encoding='utf-8') as f:
    data = json.load(f)

# 데이터셋 준비
audio_paths, labels = [], []
label_set = set()
audio_base_path = ""
all_audio_files = os.listdir(audio_base_path)

for item in data:
    file_upload_name = item['file_upload']
    target_name = '-'.join(file_upload_name.split('-')[1:])
    closest_match = [file for file in all_audio_files if file.endswith(target_name)]
    if closest_match:
        audio_paths.append(os.path.join(audio_base_path, closest_match[0]))
    else:
        raise FileNotFoundError(f"No matching audio file found for {file_upload_name}")
    label = item['annotations'][0]['result'][0]['value']['choices'][0]
    labels.append(label)
    label_set.add(label)

# Dataset 객체 생성
dataset = Dataset.from_dict({'audio': audio_paths, 'text': labels})
dataset = dataset.cast_column('audio', Audio(sampling_rate=16000))

# Whisper 모델 및 프로세서 로드
model_name = "openai/whisper-small"
config = WhisperConfig.from_pretrained(model_name)
config.dropout = 0.3
config.attention_dropout = 0.3

processor = WhisperProcessor.from_pretrained(model_name, language="ko", task="transcribe")
model = WhisperForConditionalGeneration.from_pretrained(model_name, config=config)
model.to('cuda')

def prepare_dataset(batch):
    audio = batch["audio"]
    if not isinstance(audio["array"], np.ndarray) or "sampling_rate" not in audio:
        raise ValueError("오디오 데이터 형식이 올바르지 않습니다.")
    
    batch["input_features"] = processor(
        audio["array"], sampling_rate=audio["sampling_rate"]
    ).input_features[0]
    batch["labels"] = processor(
        text=batch["text"], return_tensors="pt", padding=True, truncation=True
    ).input_ids.squeeze(0)
    
    return batch

dataset = dataset.map(prepare_dataset, remove_columns=dataset.column_names)

def verify_labels(batch):
    if len(batch["labels"]) == 0:
        raise ValueError("빈 레이블이 발견되었습니다.")
    return batch

dataset = dataset.map(verify_labels)

split_dataset = dataset.train_test_split(test_size=0.1)
train_dataset = split_dataset["train"]
eval_dataset = split_dataset["test"]

# 학습 설정
training_args = TrainingArguments(
    output_dir="./whisper-child",
    per_device_train_batch_size=16,
    gradient_accumulation_steps=4,
    learning_rate=1e-5,
    weight_decay=0.01,
    warmup_steps=500,
    max_steps=3000,
    gradient_checkpointing=True,
    fp16=True,
    eval_strategy="steps",
    eval_steps=500,
    save_steps=500,
    logging_steps=10,
    report_to=["tensorboard"],
    load_best_model_at_end=False,
    push_to_hub=False,
    optim="adamw_hf"  # AdamW 옵티마이저 설정
)

loss_logger = LossLoggerCallback()

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    tokenizer=processor.feature_extractor,
    callbacks=[loss_logger]
)

trainer.train()

model.save_pretrained('')
processor.save_pretrained('')

# 손실 시각화
plt.figure(figsize=(10, 6))
plt.plot(loss_logger.training_losses, label="Training Loss")
plt.plot(loss_logger.eval_losses, label="Validation Loss")
plt.xlabel("Steps")
plt.ylabel("Loss")
plt.title("Training Loss & Validation Loss Over Time")
plt.legend()
plt.grid()
plt.savefig("training_loss_plot.png")
plt.show()


In [None]:
#whispher mapping test

In [None]:
import os
import torch
import torchaudio
import logging
from transformers import WhisperProcessor, WhisperForConditionalGeneration
from jiwer import wer

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')


model_path = './Telos_LLM_4'
processor_path = './Telos_LLM_Processor_4'
processor = WhisperProcessor.from_pretrained(processor_path)
model = WhisperForConditionalGeneration.from_pretrained(model_path)
model.to('cuda')


eval_audio_dir = './mapping_test/'
eval_audio_paths = [os.path.join(eval_audio_dir, f) for f in os.listdir(eval_audio_dir) if f.endswith('.wav')]


dog_breeds = {"비숑", "말티즈", "닥스훈트", "불독", "푸들", "시츄", "웰시코기", "치와와", "보더콜리", "리트리버"}
cat_breeds = {"스핑크스", "랙돌", "러시안블루", "먼치킨", "뱅갈", "샴", "아비시니안", "페르시안", "스코티시폴드", "터키시앙고라"}

true_transcriptions = [
    ("고양이", "cat"), 
    ("강아지", "dog"),

]

def transcribe_audio(audio_path):
    """
    Transcribe audio using the Whisper model.
    """
    audio_input, _ = torchaudio.load(audio_path)
    audio_input = audio_input.to('cuda')
    

    input_features = processor(audio_input.cpu().numpy(), sampling_rate=16000).input_features
    input_features = torch.tensor(input_features).to('cuda')

    with torch.no_grad():
        predicted_ids = model.generate(input_features)
    

    transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
    return transcription

def classify_breed(transcription):

    normalized_transcription = transcription.strip().lower()
    

    if any(breed.lower() in normalized_transcription for breed in dog_breeds):
        return "dog"
    elif any(breed.lower() in normalized_transcription for breed in cat_breeds):
        return "cat"
    return "unknown"


correct = 0
total = len(eval_audio_paths)

for i, path in enumerate(eval_audio_paths):
    transcription = transcribe_audio(path)
    label = classify_breed(transcription)
    

    if i < len(true_transcriptions):
        true_text, true_label = true_transcriptions[i]
    else:
        true_text, true_label = "", "unknown"  
    

    logging.info(f"Audio file: {path}")
    logging.info(f"Transcription: {transcription}")
    logging.info(f"Normalized transcription: {transcription.strip().lower()}")
    logging.info(f"Predicted label: {label}")
    logging.info(f"Ground truth label: {true_label}")
    

    if label == true_label:
        correct += 1


accuracy = correct / total
logging.info(f"Classification Accuracy: {accuracy:.4f}")


all_true_text = " ".join([t[0] for t in true_transcriptions])
all_predicted_text = " ".join([transcribe_audio(p) for p in eval_audio_paths])
overall_wer = wer(all_true_text, all_predicted_text)
logging.info(f"Overall WER: {overall_wer:.4f}")


In [None]:
#llama3 학습 코드

In [None]:
import os
import torch
import logging
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer, BitsAndBytesConfig
from datasets import load_dataset
from peft import LoraConfig, get_peft_model

# 로깅
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# GPU 쿠다 설정
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
os.environ["TORCH_USE_CUDA_DSA"] = '1'

# hugging face api 토큰 설정
hf_token = ""

# 양자화 설정
quantization_config = BitsAndBytesConfig(
    load_in_8bit=True,
    llm_int8_threshold=6.0,
    llm_int8_has_fp16_weight=False,
    llm_int8_enable_fp32_cpu_offload=True,
)

# 토큰화된 모델 가져오기
model_name = "meta-llama/Llama-3.1-8b-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name, token=hf_token)

# pad 토큰 추가
if tokenizer.pad_token is None:
    tokenizer.add_special_tokens({'pad_token': '[PAD]'})

# cpu or gpu 설정 코드
device_map = {"": torch.cuda.current_device()}
logger.info(f"Using device map: {device_map}")

try:
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        device_map=device_map,  
        quantization_config=quantization_config,
        token=hf_token,
        trust_remote_code=True,
    )
except torch.cuda.OutOfMemoryError as e:
    logger.error(f"OutOfMemoryError encountered: {e}")
    logger.info("Attempting to load model with CPU offload to save memory.")
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        device_map="cpu",  
        quantization_config=quantization_config,
        token=hf_token,
        trust_remote_code=True,
    )

# 토큰화된 모델에 맞춰서 임베딩화 진행
model.resize_token_embeddings(len(tokenizer))

#LoRA 설정
lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.1,
    task_type="CAUSAL_LM",
)
model = get_peft_model(model, lora_config)

# 데이터셋 가져오기
dataset = load_dataset("json", data_files="script.json", split="train")

def preprocess_function(examples):
    tokenized_inputs = tokenizer(
        examples["text"],
        padding="max_length",
        truncation=True,
        max_length=512,
    )
    tokenized_inputs["labels"] = tokenized_inputs["input_ids"].copy()
    return tokenized_inputs

tokenized_dataset = dataset.map(preprocess_function, batched=True)

# 학습 설정 코드
training_args = TrainingArguments(
    output_dir="./results",
    per_device_train_batch_size=2,  
    gradient_accumulation_steps=32,  
    num_train_epochs=30,
    learning_rate=5e-5,
    logging_dir="./logs",
    logging_steps=100,
    save_steps=2000,
    save_total_limit=2,
    fp16=True,  
    bf16=False, 
    report_to="none",
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    tokenizer=tokenizer,
)

try:
    logger.info("Starting training...")
    trainer.train()
    logger.info("Training completed.")
except RuntimeError as e:
    logger.error(f"RuntimeError encountered: {e}")
    torch.cuda.empty_cache()
    raise e


model.save_pretrained("./fine_tuned_llama7")
tokenizer.save_pretrained("./fine_tuned_llama7")


In [None]:
#학습한 llama3 모델 로딩 후 챗봇 프롬프트 실행

In [None]:
import os
import torch
import logging
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import safetensors.torch

# 로깅
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
os.environ["TORCH_USE_CUDA_DSA"] = '1'
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

torch.cuda.empty_cache()

model_path = ""
tokenizer = AutoTokenizer.from_pretrained(model_path)

if tokenizer.pad_token is None:
    tokenizer.add_special_tokens({'pad_token': '[PAD]'})

quantization_config = BitsAndBytesConfig(load_in_8bit=True)


model = AutoModelForCausalLM.from_pretrained(
    "meta-llama/Llama-3.1-8b-Instruct",
    trust_remote_code=True,
    quantization_config=quantization_config,
    device_map="auto", 
)


if len(tokenizer) > model.config.vocab_size:
    model.resize_token_embeddings(len(tokenizer))


weights_path = f"{model_path}/adapter_model.safetensors"
state_dict = safetensors.torch.load_file(weights_path)
model.load_state_dict(state_dict, strict=False)


chatbot = pipeline("text-generation", model=model, tokenizer=tokenizer)


def chat_with_bot():
    print("Chatbot is ready! Type 'exit' to end the conversation.")
    while True:
        user_input = input("You: ")
        if user_input.lower() == 'exit':
            print("Ending conversation. Goodbye!")
            break
        response = chatbot(user_input, max_length=150, num_return_sequences=1, truncation=True, batch_size=1)
        print(f"Bot: {response[0]['generated_text']}")


if __name__ == "__main__":
    chat_with_bot()


In [None]:
#whisper & llama3 통신 서버 코드

In [None]:
import logging
from flask import Flask, request, jsonify
from flask_cors import CORS 
import torch
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
import bitsandbytes as bnb
from waitress import serve  # Waitress를 사용한 타임아웃 설정

# Flask 앱 설정
app = Flask(__name__)
CORS(app)  # CORS 활성화
@app.after_request
def add_header(response):
    response.headers['Connection'] = 'keep-alive'
    return response
# 로깅 설정
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# GPU 사용 가능 여부 확인
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
logger.info(f"Using device: {device}")

# 모델 로드 및 GPU로 이동
model_path = "./fine_tuned_llama7"
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(
    "meta-llama/Llama-3.1-8b-Instruct",
    device_map="auto",  # 모델을 GPU에 자동 분산
    load_in_8bit=True   # 8-bit 양자화 사용
)

# 텍스트 생성 파이프라인
chatbot = pipeline("text-generation", model=model, tokenizer=tokenizer)  # device 인수 제거

@app.route('/chat', methods=['POST'])
def chat():
    # 요청 데이터 로깅
    user_input = request.json.get('message')
    logger.info(f"Received request: {request.json}")
    
    if user_input:
        try:
            # 텍스트 생성
            response = chatbot(user_input, max_length=100, num_return_sequences=1)
            generated_text = response[0]['generated_text']
            
            # 응답 데이터 로깅
            logger.info(f"Sending response: {generated_text}")
            return jsonify({"response": generated_text})
        except Exception as e:
            logger.error(f"Error during chatbot response generation: {e}")
            return jsonify({"error": "Internal server error"}), 500
    
    logger.warning("No message received in the request.")
    return jsonify({"error": "No message received"}), 400

if __name__ == '__main__':
    app.run(host='0.0.0.0', port=5000)


In [None]:
#naver TTS (public으로 공개하기 애매한 부분들이 많아 수정된 버전)

In [None]:
import os
import sys
import urllib
import urllib2
reload(sys)
sys.setdefaultencoding('utf-8')
client_id = "클라이언트 id"
text = unicode("#llama script 인자")
speaker = "스피커"
speed = "0"
volume = "0"
pitch = "0"
fmt = "mp3"
val = {
    "speaker": speaker,
    "volume": volume,
    "speed":speed,
    "pitch": pitch,
    "text":text,
    "format": fmt
}
data = urllib.urlencode(val)
url = "tts 관리 주소"
headers = {
    "X-NCP-APIGW-API-KEY-ID" : client_id,
    "X-NCP-APIGW-API-KEY" : client_secret
}
request = urllib2.Request(url, data, headers)
response = urllib2.urlopen(request)
rescode = response.getcode()
if(rescode==200):
    print("TTS mp3 save")
    response_body = response.read()
    with open('.mp3', 'wb') as f:
        f.write(response_body)
else:
    print("Error Code:" + rescode)