In [7]:
# 기본 경로 설정
# ===============================================================
import os
import requests
import subprocess

PROJECT_NAME = "rag"

ROOT_DIR = os.getcwd()

try:
    from google.colab import drive, userdata
    IS_COLAB_MODE = True
    print("코랩 모드")

except ModuleNotFoundError as e:
    IS_COLAB_MODE = False
    ROOT_DIR = os.path.abspath(os.path.join(ROOT_DIR, ".."))
    os.environ["KMP_DUPLICATE_LIB_OK"] = "True"
    os.environ["TOKENIZERS_PARALLELISM"] = "false"
    print(f"로컬 모드")


DATA_DIR = os.path.join(ROOT_DIR, "data")
RAW_DIR = os.path.join(DATA_DIR, "raw")
os.makedirs(DATA_DIR, exist_ok=True)


# 환경변수 로드 설정
def get_secret(key_name: str):
    if IS_COLAB_MODE:
        return userdata.get(key_name)
    else:
        from dotenv import load_dotenv
        load_dotenv(dotenv_path=os.path.join(ROOT_DIR, ".env"))
        return os.getenv(key_name)


# 데이터셋 github로부터 다운로드
if IS_COLAB_MODE:

    drive.mount('/content/drive')

    # 압축 파일 확보
    if "raw.tar.gz" not in os.listdir():

        headers = {
            "Authorization": f"token {get_secret('GITHUB_PAT')}",
            "Accept": "application/vnd.github.v3+json"
        }

        release_url = f"https://api.github.com/repos/wonbywondev/ML-DL-data/releases/tags/data-{PROJECT_NAME}"

        response = requests.get(release_url, headers=headers)
        asset_id = response.json().get('assets', [])[0]["id"]

        download_url = f"https://api.github.com/repos/wonbywondev/ML-DL-data/releases/assets/{asset_id}"
        download_headers = headers.copy()
        download_headers["Accept"] = "application/octet-stream"

        with requests.get(download_url, headers=download_headers, stream=True) as r:
            r.raise_for_status()
            with open("raw.tar.gz", 'wb') as f:
                for chunk in r.iter_content(chunk_size=8192):
                    f.write(chunk)

    print("· 압축 파일 있음")


    # 압축 해제 및 경로 처리
    if not os.path.exists(RAW_DIR):
        subprocess.run(["tar", "-xzvf", "raw.tar.gz", "-C", DATA_DIR], check=False)

    print("· 압축 해제 완료")
    print("· 환경 세팅 완료")


    DRIVE_DIR = os.path.join(ROOT_DIR, "drive", "Shareddrives", "codeit-part3-team4")
    SAVE_DIR = os.path.join(DRIVE_DIR, "runs", PROJECT_NAME)

    os.makedirs(SAVE_DIR, exist_ok=True)


# .DS_Store 파일 삭제
_ = subprocess.run(
    ["find", DATA_DIR, "-name", ".DS_Store", "-type", "f", "-delete"],
    check=True
)

로컬 모드


In [8]:
os.listdir(RAW_DIR)

['data_list.csv', 'data_list.xlsx', 'files']