In [1]:
from huggingface_hub import HfApi

# API 인스턴스 생성
api = HfApi()

# 모델 검색 (예: 'deepseek' 관련 모델 찾기)
models = api.list_models(search="deepseek", limit=10)

# 결과 출력
for model in models:
    print(model.modelId)

  from .autonotebook import tqdm as notebook_tqdm


deepseek-ai/DeepSeek-R1
deepseek-ai/Janus-Pro-7B
deepseek-ai/DeepSeek-V3
unsloth/DeepSeek-R1-GGUF
deepseek-ai/DeepSeek-R1-Distill-Qwen-32B
deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B
deepseek-ai/DeepSeek-R1-Distill-Llama-8B
huihui-ai/DeepSeek-R1-Distill-Qwen-32B-abliterated
deepseek-ai/DeepSeek-R1-Distill-Qwen-7B
deepseek-ai/deepseek-vl2


In [2]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

# 모델과 토크나이저 로드
# # model_name = "deepseek-ai/deepseek-llm-7b-base"
# model_name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
# tokenizer = AutoTokenizer.from_pretrained(model_name)
# model = AutoModelForCausalLM.from_pretrained(model_name)


# 모델 다운로드
# $ huggingface-cli download deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B --local-dir ./models/DeepSeek-R1-Distill-Qwen-1.5B

# 로컬 모델 경로 지정
local_model_path = "../models/"
model_name = "DeepSeek-R1-Distill-Qwen-1.5B"

# 로컬에서 토크나이저와 모델 로드
tokenizer = AutoTokenizer.from_pretrained(local_model_path + model_name)
model = AutoModelForCausalLM.from_pretrained(local_model_path + model_name)



In [3]:
# GPU 사용 가능 여부 확인
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"사용 중인 디바이스: {device}")

# 모델을 GPU로 이동
model = model.to(device)

사용 중인 디바이스: cuda


In [19]:
# 입력 텍스트 준비
prompt = "Today's weather is"
inputs = tokenizer(prompt, return_tensors="pt").to(device)  # 입력 데이터도 GPU로 이동

# 모델을 이용한 텍스트 생성
with torch.no_grad():
    outputs = model.generate(
        input_ids=inputs["input_ids"],
        max_length=50,  # 생성할 최대 토큰 길이
        num_return_sequences=1,  # 생성할 문장 수
        do_sample=True,  # 샘플링 여부
        top_p=0.95,  # nucleus sampling
        top_k=50     # top-k 샘플링
    )

# 결과 디코딩 및 출력
result = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(result)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


Today's weather is going to be tricky. You have a weather app that you need to check. You have a weather app that you need to check. You have a weather app that you need to check. You have a weather app that you


In [4]:
# 사용자 입력 예시
user_input = "Tell me a joke about AI."

# 입력을 토크나이징
inputs = tokenizer(user_input, return_tensors="pt").to(device)

# 모델로 응답 생성
with torch.no_grad():
    outputs = model.generate(
        input_ids=inputs["input_ids"],
        max_length=100,  # 답변 길이 제한
        temperature=0.8,  # 다양성 조절
        top_p=0.95,
        top_k=50
    )

# 응답 출력
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
print("🤖 Chatbot:", response)


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


🤖 Chatbot: Tell me a joke about AI. It needs to be surprising and have something to do with AI. Maybe it's about how AI is changing jobs? Or something like that.

Okay, I'm thinking about something funny. Maybe a joke that plays on the idea that AI is moving jobs around, but the punchline is that AI is so good at it that it doesn't need the jobs anymore. Or maybe it's about how AI is just as human as humans and is constantly evolving. Or


In [5]:
# 긴 리뷰 입력
review_text = """
This phone has a great camera and an amazing battery life. 
However, the screen sometimes flickers, which is quite annoying. 
Overall, it is a good purchase for the price.
"""

# 입력을 토크나이징
inputs = tokenizer(review_text, return_tensors="pt").to(device)

# 모델로 요약 생성
with torch.no_grad():
    outputs = model.generate(
        input_ids=inputs["input_ids"],
        max_length=50,
        temperature=0.7,
        top_p=0.9
    )

# 요약된 텍스트 출력
summary = tokenizer.decode(outputs[0], skip_special_tokens=True)
print("📢 Review Summary:", summary)


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


📢 Review Summary: 
This phone has a great camera and an amazing battery life. 
However, the screen sometimes flickers, which is quite annoying. 
Overall, it is a good purchase for the price.
But the problem is that the battery life is not


In [6]:
# 영어 문장 입력
english_text = "The restaurant serves delicious pasta with fresh ingredients."

# 입력을 토크나이징
inputs = tokenizer("Translate this to Korean: " + english_text, return_tensors="pt").to(device)

# 모델로 번역 생성
with torch.no_grad():
    outputs = model.generate(
        input_ids=inputs["input_ids"],
        max_length=100,
        temperature=0.7,
        top_p=0.9
    )

# 번역된 결과 출력
translated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
print("🇰🇷 Translated:", translated_text)


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


🇰🇷 Translated: Translate this to Korean: The restaurant serves delicious pasta with fresh ingredients. The chef is really talented. The food is prepared with traditional techniques.

Okay, I have a problem with this. The problem is that the restaurant is so busy, and I can't get a seat. I need to figure out a way to get a table, but I don't know the restaurant's seating arrangements.

The manager is a bit unprofessional. I don't think they pay attention to the customer's needs.
