In [2]:
from AutoFineTuner.tool import codeLauncher
from AutoFineTuner.tool import codeMaker
from AutoFineTuner.tool import codeReader
from AutoFineTuner.tool.paths import get_output_paths, get_proj_paths
from AutoFineTuner.tool import json_tools
from AutoFineTuner.tool import llms
from AutoFineTuner.workflows.codeRepair.repair import repair_code
from AutoFineTuner.workflows.codeRefactor.refactor import start_refactor
from AutoFineTuner.workflows.finetuningManager.tuning_manager import start_finetuning

import os

In [3]:
proj_path = "/home/jeongyuseong/바탕화면/오픈소스경진대회/AutoFineTuner/"    # params.txt, refactored.py 생성장소
save_dir = "/home/jeongyuseong/바탕화면/오픈소스경진대회/AutoFineTuner/outputs/"     # 훈련결과 저장장소
target_name = "main.py"
refactored_name = "test_output.py"                                                  # 리펙토링될 코드 생성이름  
conda_env_name = "AItxt"                                                        # llm이 분석한 파라미터 내용
max_steps=10
userPrompt = "이틀 내에 작업을 완료하고싶어. 데이터는 참고로 대략 10~20만개의 문장이 준비돼있어."


output_paths = get_output_paths(save_dir=save_dir)
proj_paths = get_proj_paths(
                            proj_root=proj_path, 
                            target=target_name, 
                            refactored=refactored_name)

## 필수 데이터
source_file_contents = codeReader.read_text_strict(str(proj_paths.target))
# refactored_code = codeReader.read_text_strict(str(proj_paths.refactored))

FileNotFoundError: 파일이 존재하지 않습니다: /home/jeongyuseong/바탕화면/오픈소스경진대회/AutoFineTuner/main.py

In [7]:
refactored_code = start_refactor(
    source_code_contents=source_file_contents,
    user_prompt=userPrompt
)
print(refactored_code)

### 1. **AI 라이브러리/모델 목적/훈련 흐름 분석**

#### **라이브러리 및 모델**
- **라이브러리**: 
  - `transformers` (HuggingFace): `RobertaForSequenceClassification`, `AutoTokenizer` 사용
  - `torch`: 딥러닝 연산 및 모델 학습
  - `sklearn`: 데이터 분할 (`train_test_split`)
  - `pandas`, `numpy`: 데이터 처리
- **모델**: 
  - **`klue/roberta-base`** (KoLRUE 기반 한국어 RoBERTa) + 2-class 분류 헤드 (`num_labels=2`).  
  - 목적: 텍스트 분류 (이진 분류, 예: "AI 작성 여부 판단").

#### **훈련 흐름**
1. **데이터 로드**: 
   - `from_csv` 함수로 CSV에서 텍스트/레이블 로드 (`train.csv`).
   - `BertDataset` 클래스로 토큰화 및 배치 생성.
2. **모델 초기화**: 
   - 사전 학습된 RoBERTa 모델에 분류 레이어 추가.
   - `AdamW` 옵티마이저 + 학습률 스케줄러 (`linear with warmup`) 적용.
3. **학습 루프**:
   - **훈련**: 
     - 배치 단위 학습, 크로스엔트로피 손실(`loss`) 계산 및 역전파.
     - `flat_accuracy`로 정확도 평가.
   - **검증**: 
     - 소프트맥스 적용 후 클래스별 확률 비교.
4. **추론**: 
   - 테스트 데이터(`test.csv`)에 대해 `generated` 확률 예측 후 CSV 출력.

---

### 2. **튜닝 후보 하이퍼파라미터**

#### **(1) 학습률 (`lr`)**
- **현재 값**: `5e-5` (라인 142).
- **제안 범위**: `[2e-5, 3e-5, 5e-5, 1e-4]`.  
  **근거**: BERT 계열 모델의 일

In [8]:
codeMaker.write_text_atomic(
    path_str= proj_paths.refactored,
    content = refactored_code 
    )

PosixPath('/home/jeongyuseong/바탕화면/오픈소스경진대회/AutoFineTuner/test_output.py')

In [9]:
repaired = repair_code(
    source_code_content=source_file_contents,
    refactored_code_str=refactored_code,
    pyfile=str(proj_paths.refactored),
    conda_env_name=conda_env_name
) 
repaired

실행 명령어 상태 :  ['/home/jeongyuseong/anaconda3/bin/conda', 'run', '--no-capture-output', '-n', 'AItxt', 'python', '/home/jeongyuseong/바탕화면/오픈소스경진대회/AutoFineTuner/test_output.py', '--healthcheck']
ion 5.28.3 is exactly one major version older than the runtime version 6.31.1 at tensorflow/core/framework/op_def.proto. Please update the gencode to avoid compatibility violations in the next runtime release.
2025-08-20 15:57:31.944362: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-s

{'sourceCode': 'from transformers import AutoModel, AutoTokenizer\n\nimport torch\nimport torch.nn as nn\nfrom torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler\nfrom keras.utils import pad_sequences\nfrom sklearn.model_selection import train_test_split\nfrom transformers import BertTokenizer, BertConfig\nfrom transformers import BertForSequenceClassification, get_linear_schedule_with_warmup\nfrom torch.optim import AdamW\nfrom tqdm import tqdm, trange  #for progress bars\nimport pandas as pd\nimport io\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom IPython.display import Image #for image rendering\n\n\nfrom dataset import BertDataset\nfrom dataset import from_csv\n\n\nfrom transformers import RobertaForSequenceClassification\n\nmodel = RobertaForSequenceClassification.from_pretrained(\n    "klue/roberta-base",\n    num_labels=2\n)\ntokenizer = AutoTokenizer.from_pretrained("klue/roberta-base")\n\ntrain_texts, train_labels = from_csv("/home

In [10]:
fineTuned = start_finetuning(
    proj_path=proj_paths.proj_root,
    save_dir=output_paths.save_root,
    target_name=target_name,
    refactored_name=refactored_name,
    cur_conda_env=conda_env_name,
    user_prompt=userPrompt
)
fineTuned

```json
{
    "param_info": {
        "epochs": {
            "is_tuned": false,
            "best_param": 1,
            "cur_count": 0,
            "limit_count": 5
        },
        "batch_size": {
            "is_tuned": false,
            "best_param": 1,
            "cur_count": 0,
            "limit_count": 5
        },
        "train_path": {
            "is_tuned": false,
            "best_param": "/home/jeongyuseong/바탕화면/오픈소스경진대회/AutoFineTuner/datas/train.csv",
            "cur_count": 0,
            "limit_count": 5
        }
    },
    "time_limit": 3600,
    "cur_exec_time": 0
}
```
메타데이터 저장경로 :  /home/jeongyuseong/바탕화면/오픈소스경진대회/AutoFineTuner/outputs/metadata.json
{'time_limit': 3600, 'cur_exec_time': 0, 'param_info': {'epochs': {'is_tuned': False, 'best_param': 1, 'cur_count': 0, 'limit_count': 5}, 'batch_size': {'is_tuned': False, 'best_param': 1, 'cur_count': 0, 'limit_count': 5}, 'train_path': {'is_tuned': False, 'best_param': '/home/jeongyuseong/바탕화면/오픈소스경진대회/AutoFin

{'userPrompt': '이틀 내에 작업을 완료하고싶어. 데이터는 참고로 대략 10~20만개의 문장이 준비돼있어.',
 'refactoredCode': 'import sys, json, time, pickle\nfrom pathlib import Path\n\ndef _save_model_generic(model, path: Path) -> str:\n    """Try torch save, else pickle; always write to *.pt"""\n    path.parent.mkdir(parents=True, exist_ok=True)\n    try:\n        import torch  # type: ignore\n        try:\n            torch.save(getattr(model, "state_dict", lambda: model)(), path)\n        except Exception:\n            torch.save(model, path)\n    except Exception:\n        with open(path, "wb") as f:\n            pickle.dump(model, f)\n    return str(path)\n\ndef _emit_json_line(payload: dict) -> None:\n    sys.stdout.write(json.dumps(payload, ensure_ascii=False) + "\\n")\n    sys.stdout.flush()\n\ndef _save_json_line(payload: dict) -> None:\n    # Alias to keep compatibility with pseudocode\n    _emit_json_line(payload)\n\ndef autofinetuner_result() -> dict:\n    """\n    Returns:\n      {\n        "model_pt_path": s

In [11]:
# ## 코드리펙토링 상태 선언
# refactor_state: refactor.CodeRefactorState = {
#     "sourceCode": source_file_contents,
#     "userPrompt": userPrompt,
# }
# refactor_workflow = refactor.getAnalyerWorkflow()
# final_state = refactor_workflow.invoke(refactor_state)

# # refactored_code 문자열만 추출
# refactored_code_str = final_state["refactoredCode"]
# codeMaker.write_text_atomic(path_str=str(proj_paths.refactored), content=refactored_code_str)


In [12]:
# args=["--healthcheck"],
# code_args = {
#     "pyfile": str(proj_paths.refactored),
#     "conda_env": conda_env_name,
#     "args" : args,
#     "timeout": 300,
#     "raise_on_error": False,
# }

# ## 코드 리페어 상태정의
# codeState: repair.CodeState = {
#     "sourceCode": source_file_contents,
#     "refactoredCode": refactored_code_str,
#     "log_content": "",
#     "result": {},
#     "exc_args": code_args,
#     "count": 0,
# }

# repair_workflow = repair.getCodeRepairWorkflow()
# repaired_code = repair_workflow.invoke(codeState)

In [13]:
# repaired_code = codeReader.read_text_strict(path_str=str(proj_paths.target))
# repaired_code

In [14]:

# # 2) 초기 상태 생성
# workState: manager.WorkState = {}
# workState["userPrompt"] = userPrompt
# workState["refactoredCode"] = repaired_code
# workState["count"] = 0
# workState["cur_conda_env"] = "AItxt"

# # 3) 실행
# final_state = manager.run_finetuning(workState, ftPaths, max_steps=max_steps)