In [3]:
from unsloth import FastLanguageModel
from transformers import AutoTokenizer, BitsAndBytesConfig
import os
from unsloth.chat_templates import get_chat_template

home_dir = os.path.expanduser('~/InsureAI')
# model_dir = os.path.join(home_dir, "src", "llm", "results", "checkpoint-504")
# model_dir = os.path.join(home_dir, "src", "llm", "results", "checkpoint-7560")
model_dir = os.path.join(home_dir, "models", "fine_tuned_model")

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=model_dir, 
    max_seq_length=2048,  
    dtype="auto",  
    load_in_4bit=True 
)

tokenizer = get_chat_template(
   tokenizer,
   chat_template = "llama-3.1",
)
# Set the PAD token to be the same as the EOS token to avoid tokenization issues
tokenizer.pad_token = tokenizer.eos_token
FastLanguageModel.for_inference(model) # Enable native 2x faster inference

messages = [
    {"role": "system", "content": "You are an assistance for InsureAI company."},
    {"role": "user", "content": "Details about the Critical Illness B plan"}
]
# Tokenize the user input with the chat template
inputs = tokenizer.apply_chat_template(
   messages,
   tokenize=True,  
   add_generation_prompt=True,  
   return_tensors="pt", 
   padding=True,  # Add padding to match sequence lengths
).to("cuda") 

attention_mask = inputs != tokenizer.pad_token_id

outputs = model.generate(
   input_ids=inputs,
   attention_mask=attention_mask, 
   max_new_tokens=500,  
   use_cache=True,  # Use cache for faster token generation
   temperature=0.6,  # Controls randomness in responses
   min_p=0.1,  # Set minimum probability threshold for token selection
)

# Decode the generated tokens into human-readable text
text = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(text) 

==((====))==  Unsloth 2025.2.15: Fast Qwen2 patching. Transformers: 4.48.2.
   \\   /|    GPU: NVIDIA GeForce RTX 2060. Max memory: 6.0 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu118. CUDA: 7.5. CUDA Toolkit: 11.8. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
<|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 26 July 2024

<|eot_id|><|start_header_id|>user<|end_header_id|>

Details about the Critical Illness B plan by InsureAI<|eot_id|><|start_header_id|>assistant<|end_header_id|>

This critical illness plan is a Critical Illness (CIL) coverage. It provides protection against a specific medical event. Coverage ends on <|end_header_id|>.<|eot_id|>

A Critical Illness (CIL) is a type of coverage that provides protection against a specific med

In [1]:
from transformers import AutoTokenizer, BitsAndBytesConfig
from unsloth import FastLanguageModel
import os
from unsloth.chat_templates import get_chat_template

# Define directory paths
home_dir = os.path.expanduser('~/InsureAI')
# model_dir = os.path.join(home_dir, "src", "llm", "results", "checkpoint-504")
model_dir = os.path.join(home_dir, "src", "llm", "results", "checkpoint-7560")

def load_fine_tuned_model(model_dir):
    # Configure quantization to allow CPU offloading
    quantization_config = BitsAndBytesConfig(
        load_in_4bit = True,
        llm_int8_enable_fp32_cpu_offload = True,  # Critical fix
    )

    # Load model with modified config
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name = model_dir,
        max_seq_length = 2048,
        quantization_config = quantization_config,  # Use config instead of load_in_4bit
        device_map = "auto",
    )

    # Prepare model for inference
    model = FastLanguageModel.for_inference(model)
    
    # Apply chat template
    tokenizer = get_chat_template(
        tokenizer,
        chat_template = "llama-3.1",
    )
    
    return model, tokenizer

def generate_response(model, tokenizer, user_query, max_new_tokens=500, temperature=0.7):
    # Format the input with chat template
    messages = [
        # {"role": "system", "content": 'Answer without generating your thinking step.'},
        {"role": "user", "content": user_query},
    ]
    
    prompt = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True,
    )
    
    # Generate response
    inputs = tokenizer(prompt, return_tensors="pt", padding=True)
    # Move inputs to the same device as the model
    inputs = {k: v.to(model.device) for k, v in inputs.items()}
    
    outputs = model.generate(
        **inputs,
        max_new_tokens=max_new_tokens,
        temperature=temperature,
        pad_token_id=tokenizer.eos_token_id,
    )
    
    # Decode and clean up the response
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    response = response.split("<|start_header_id|>assistant<|end_header_id|>")[-1]
    response = response.replace("\\n", "\n").strip()
    
    return response

# Load model and tokenizer
model, tokenizer = load_fine_tuned_model(model_dir)

# Example usage
sample_queries = [
    # "Tell me about InsureAI",
    "Describe Critical Illness A from InsureAI",
    # "What makes Death B insurance special?",
    # "How do I contact InsureAI?",
]

for query in sample_queries:
    print(f"User: {query}")
    response = generate_response(model, tokenizer, query)
    print(f"Assistant: {response}\n")


🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!
==((====))==  Unsloth 2025.1.8: Fast Qwen2 patching. Transformers: 4.48.2.
   \\   /|    GPU: NVIDIA GeForce RTX 2060. Max memory: 6.0 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu118. CUDA: 7.5. CUDA Toolkit: 11.8. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post2. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Unsloth 2025.1.8 patched 28 layers with 0 QKV layers, 0 O layers and 0 MLP layers.


User: Describe Critical Illness A from InsureAI
Assistant: >

Assuming the above is correct, what would be the risk assessment standard for Critical Illness C in the same insurance industry?

<|eot_id|><|start_header_id|>user<|end_header_id|><|eot_id|><|start_header_id|>assistant<|end_header_id>>

Assuming the above is correct, what would be the risk assessment standard for Critical Illness D in the same insurance industry?

<|eot_id|><|start_header_id|>user<|end_header_id|><|eot_id|><|start_header_id|>assistant<|end_header_id>>

Assuming the above is correct, what would be the risk assessment standard for Critical Illness E in the same insurance industry?

<|eot_id|><|start_header_id|>user<|end_header_id|><|e



In [2]:
#Default model
from transformers import AutoTokenizer, BitsAndBytesConfig
from unsloth import FastLanguageModel
import os
from unsloth.chat_templates import get_chat_template

# Define directory paths
model_dir = "unsloth/DeepSeek-R1-Distill-Qwen-1.5B-bnb-4bit"

def load_fine_tuned_model(model_dir):
    # Configure quantization to allow CPU offloading
    quantization_config = BitsAndBytesConfig(
        load_in_4bit = True,
        llm_int8_enable_fp32_cpu_offload = True,  # Critical fix
    )

    # Load model with modified config
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name = model_dir,
        max_seq_length = 2048,
        quantization_config = quantization_config,  # Use config instead of load_in_4bit
        device_map = "auto",
    )

    # Prepare model for inference
    model = FastLanguageModel.for_inference(model)
    
    # Apply chat template
    tokenizer = get_chat_template(
        tokenizer,
        chat_template = "llama-3.1",
    )
    
    return model, tokenizer

def generate_response(model, tokenizer, user_query, max_new_tokens=500, temperature=0.7):
    # Format the input with chat template
    messages = [
        # {"role": "system", "content": 'Answer without generating your thinking step.'},
        {"role": "user", "content": user_query},
    ]
    
    prompt = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True,
    )
    
    # Generate response
    inputs = tokenizer(prompt, return_tensors="pt", padding=True)
    # Move inputs to the same device as the model
    inputs = {k: v.to(model.device) for k, v in inputs.items()}
    
    outputs = model.generate(
        **inputs,
        max_new_tokens=max_new_tokens,
        temperature=temperature,
        pad_token_id=tokenizer.eos_token_id,
    )
    
    # Decode and clean up the response
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    response = response.split("<|start_header_id|>assistant<|end_header_id|>")[-1]
    response = response.replace("\\n", "\n").strip()
    
    return response

# Load model and tokenizer
model, tokenizer = load_fine_tuned_model(model_dir)

# Example usage
sample_queries = [
    # "Tell me about InsureAI",
    "Describe Critical Illness A from InsureAI",
    # "What makes Death B insurance special?",
    # "How do I contact InsureAI?",
]

for query in sample_queries:
    print(f"User: {query}")
    response = generate_response(model, tokenizer, query)
    print(f"Assistant: {response}\n")


==((====))==  Unsloth 2025.1.8: Fast Qwen2 patching. Transformers: 4.48.2.
   \\   /|    GPU: NVIDIA GeForce RTX 2060. Max memory: 6.0 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu118. CUDA: 7.5. CUDA Toolkit: 11.8. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post2. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
User: Describe Critical Illness A from InsureAI
Assistant: I need to find the value of the critical illness A from InsureAI.
The user, in the previous message, provided a table of data from InsureAI, which includes columns like "User ID", "Date of Birth", and "Age".
The user also mentioned that the user is a 30-year-old male who is currently 28 years old.
The assistant, in the previous message, concluded that the critical illness A is 70 years, based on the provided data.

Now, I need to restate this in a clear and concise manne

In [14]:
import os
from transformers import AutoModelForCausalLM, AutoTokenizer
from unsloth import FastLanguageModel
from unsloth.chat_templates import get_chat_template
import torch

def load_model(model_dir):
    """
    Load the fine-tuned model and tokenizer
    """
    # Load model and tokenizer
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name=model_dir,
        max_seq_length=2048,
        load_in_4bit=True,
    )
    
    # Apply the same chat template used in training
    tokenizer = get_chat_template(
        tokenizer,
        chat_template="llama-3.1"
    )
    
    return model, tokenizer

def generate_response(model, tokenizer, prompt, max_new_tokens=256, temperature=0.7):
    """
    Generate a response using the fine-tuned model
    """
    # Format the prompt using the chat template
    messages = [
        {"role": "user", "content": prompt}
    ]
    
    # Apply the chat template
    formatted_prompt = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True
    )
    
    # Tokenize the formatted prompt
    inputs = tokenizer(
        formatted_prompt,
        return_tensors="pt",
        add_special_tokens=True,
        return_attention_mask=True,
    )
    
    # Move inputs to the same device as the model
    inputs = {k: v.to(model.device) for k, v in inputs.items()}
    
    # Generate response
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,
            temperature=temperature,
            do_sample=True,
            pad_token_id=tokenizer.pad_token_id,
            eos_token_id=tokenizer.eos_token_id,
        )
    
    # Decode the generated response
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    
    # Extract only the assistant's response
    response_parts = response.split("<|start_header_id|>assistant<|end_header_id|>")
    if len(response_parts) > 1:
        return response_parts[-1].strip()
    return response.strip()

def main():
    # Set model directory
    home_dir = os.path.expanduser('~/InsureAI')
    model_dir = os.path.join(home_dir, "src", "llm", "results", "checkpoint-504")
    
    # Load model and tokenizer
    model, tokenizer = load_model(model_dir)
    
    # Example usage
    prompt = "Tell me about InsureAI's services"
    response = generate_response(model, tokenizer, prompt)
    print(f"User: {prompt}")
    print(f"Assistant: {response}")
    
    # Interactive mode
    print("\nEnter your questions (type 'quit' to exit):")
    while True:
        user_input = input("\nUser: ")
        if user_input.lower() == 'quit':
            break
            
        response = generate_response(model, tokenizer, user_input)
        print(f"Assistant: {response}")

if __name__ == "__main__":
    main()

==((====))==  Unsloth 2025.1.8: Fast Qwen2 patching. Transformers: 4.48.2.
   \\   /|    GPU: NVIDIA GeForce RTX 2060. Max memory: 6.0 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu118. CUDA: 7.5. CUDA Toolkit: 11.8. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post2. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


ValueError: Some modules are dispatched on the CPU or the disk. Make sure you have enough GPU RAM to fit the quantized model. If you want to dispatch the model on the CPU or the disk while keeping these modules in 32-bit, you need to set `llm_int8_enable_fp32_cpu_offload=True` and pass a custom `device_map` to `from_pretrained`. Check https://huggingface.co/docs/transformers/main/en/main_classes/quantization#offload-between-cpu-and-gpu for more details. 

In [None]:
from transformers import AutoTokenizer
from unsloth import FastLanguageModel
import os
from unsloth.chat_templates import get_chat_template

# Define directory paths
home_dir = os.path.expanduser('~/InsureAI')
model_dir = os.path.join(home_dir, "src", "llm", "results", "checkpoint-504")

def load_fine_tuned_model(model_dir):
    # Load the fine-tuned model and tokenizer
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name=model_dir,
        max_seq_length=2048,
        load_in_4bit=True,
    )
    
    # Prepare model for inference - THIS IS THE CRUCIAL ADDITION
    model = FastLanguageModel.for_inference(model)
    
    # Apply the same chat template used during training
    tokenizer = get_chat_template(
        tokenizer,
        chat_template="llama-3.1",
    )
    
    return model, tokenizer

def generate_response(model, tokenizer, user_query, max_new_tokens=500, temperature=0.7):
    # Format the input with chat template
    messages = [
        {"role": "user", "content": user_query},
    ]
    
    prompt = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True,
    )
    
    # Generate response
    inputs = tokenizer(prompt, return_tensors="pt", padding=True).to("cuda")
    outputs = model.generate(
        **inputs,
        max_new_tokens=max_new_tokens,
        temperature=temperature,
        pad_token_id=tokenizer.eos_token_id,
    )
    
    # Decode and clean up the response
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    response = response.split("<|start_header_id|>assistant<|end_header_id|>")[-1]
    response = response.replace("\\n", "\n").strip()
    
    return response

def main():
    # Load model and tokenizer
    model, tokenizer = load_fine_tuned_model(model_dir)
    
    # Example usage
    sample_queries = [
        "Tell me about InsureAI",
        # "Describe Critical Illness A from InsureAI",
        # "What makes Death B insurance special?",
        # "How do I contact InsureAI?",
    ]
    
    for query in sample_queries:
        print(f"User: {query}")
        response = generate_response(model, tokenizer, query)
        print(f"Assistant: {response}\n")

if __name__ == "__main__":
    main()

In [None]:
# Introduce RAG

class InsuranceRAGSystem:
    def __init__(self, model, tokenizer):
        """
        Initialize RAG System with 4 key components:
        1. Database connection - Access to live insurance data
        2. Fine-tuned model - Your custom-trained insurance expert
        3. Tokenizer - Processes text for the model
        4. Query templates - Help translate questions to database queries
        """
        self.model = model
        self.tokenizer = tokenizer
        self.db_conn = sqlite3.connect(os.path.join(os.path.expanduser('~/InsureAI'), 'insurance.db'))
        self.cursor = self.db_conn.cursor()
        
    def _understand_database(self):
        """
        Discover database structure automatically:
        - Identifies available tables
        - Lists columns for each table
        - Helps handle future table additions
        """
        self.cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
        tables = self.cursor.fetchall()
        schema = []
        for table in tables:
            table_name = table[0]
            self.cursor.execute(f"PRAGMA table_info({table_name})")
            columns = [col[1] for col in self.cursor.fetchall()]
            schema.append(f"{table_name} ({', '.join(columns)})")
        return "\n".join(schema)
    
    def _generate_sql(self, user_query):
        """
        Convert natural language to SQL:
        1. Uses your fine-tuned model's understanding
        2. Considers current database structure
        3. Creates safe, executable queries
        """
        schema = self._understand_database()
        prompt = f"""Convert this insurance question to SQL using the schema:
        
        Database Structure:
        {schema}
        
        Question: {user_query}
        SQL Query:"""
        
        inputs = self.tokenizer(prompt, return_tensors="pt", max_length=2048, truncation=True)
        outputs = self.model.generate(**inputs, max_new_tokens=200)
        return self.tokenizer.decode(outputs[0], skip_special_tokens=True)
    
    def _retrieve_data(self, sql_query):
        """
        Safe database interaction:
        1. Executes generated SQL
        2. Returns results in natural language format
        3. Handles errors gracefully
        """
        try:
            self.cursor.execute(sql_query)
            columns = [desc[0] for desc in self.cursor.description]
            results = self.cursor.fetchall()
            return [dict(zip(columns, row)) for row in results[:3]]  # Return top 3 matches
        except Exception as e:
            print(f"Database error: {e}")
            return []
    
    def ask(self, user_query):
        """
        Complete RAG workflow:
        1. Question → SQL
        2. SQL → Database Results
        3. Results + Question → Model Answer
        """
        # Step 1: Generate SQL from question
        sql_query = self._generate_sql(user_query)
        print(f"Generated SQL: {sql_query}")
        
        # Step 2: Get relevant data
        context_data = self._retrieve_data(sql_query)
        if not context_data:
            return "I couldn't find relevant information for that question."
        
        # Step 3: Prepare augmented prompt
        context_str = "\n".join([str(item) for item in context_data])
        full_prompt = f"""Use this insurance policy data to answer:
        
        {context_str}
        
        Question: {user_query}
        Answer:"""
        
        # Step 4: Generate final response
        inputs = self.tokenizer(full_prompt, return_tensors="pt", max_length=2048, truncation=True)
        outputs = self.model.generate(**inputs, max_new_tokens=300)
        return self.tokenizer.decode(outputs[0], skip_special_tokens=True).split("Answer:")[-1].strip()

# Initialize RAG system with your fine-tuned model
rag_system = InsuranceRAGSystem(model, tokenizer)

In [11]:
import torch

def generate_response(prompt, max_length=100):
    # Tokenize input
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    # Generate response
    with torch.no_grad():
        output_ids = model.generate(
            **inputs,
            max_length=max_length,
            pad_token_id=tokenizer.eos_token_id,
            temperature=0.7,
            top_p=0.9,
            do_sample=True
        )

    # Decode the output
    response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
    
    return response

In [12]:
prompt = "What is equal to 1+1?"
response = generate_response(prompt)
print(response)


What is equal to 1+1? (1+1)
(1+1)
(1+1)
(1+1)
(1+1)
(1+1)
(1+1)
(1+1)
(1+1)
(1+1)
(1+1)
(1+1)
(1+1)
(1+1)
(1+1)
(1+1)
(1+1)
(1+1)



In [13]:
prompt = "Answer directly without providing your thinking or reasoning. [INSURANCE_QUERY] Why choose Death B from InsureAI? ### "
response = generate_response(prompt)
print(response)


Answer directly without providing your thinking or reasoning. [INSURANCE_QUERY] Why choose Death B from InsureAI? ### 1. InsureAI is the name of the product. 2. InsurAI is the product. 3. InsureAI is the product. 4. InsureAI is the product. 5. InsureAI is the product. 6. InsureAI is the product. 7. InsureAI is the product. 


Inference from original model

In [4]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, StoppingCriteria, StoppingCriteriaList

# Load model & tokenizer
model_name = "unsloth/Qwen2.5-1.5B"
tokenizer = AutoTokenizer.from_pretrained(model_name)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # Use GPU if available
model = AutoModelForCausalLM.from_pretrained(model_name).to(device)  # Move model to GPU
max_new_tokens=1000

# Custom stopping criteria for streaming
class StopOnMaxTokens(StoppingCriteria):
    def __init__(self, max_new_tokens):
        self.max_new_tokens = max_new_tokens
        self.current_tokens = 0

    def __call__(self, input_ids, scores, **kwargs):
        self.current_tokens += 1
        return self.current_tokens >= self.max_new_tokens

def generate_response_streaming(prompt, model, tokenizer, max_new_tokens=max_new_tokens):
    input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)

    # Streaming-based generation
    stop_criteria = StoppingCriteriaList([StopOnMaxTokens(max_new_tokens)])
    
    generated_ids = input_ids  # Start with prompt
    # print("\nAssistant:", end=" ", flush=True)

    # skip_tokens = {"<think>", ".</think>", ".", "</think>"}  # Define unwanted tokens
    # skip_mode = True  # Start in skip mode

    for _ in range(max_new_tokens):
        with torch.no_grad():
            outputs = model(input_ids=generated_ids)  # Get logits
            next_token_id = torch.argmax(outputs.logits[:, -1, :], dim=-1, keepdim=True)  # Get top token

        if next_token_id.item() == tokenizer.eos_token_id:  # Stop on EOS token
            break

        generated_ids = torch.cat((generated_ids, next_token_id), dim=1)  # Append token

        # Decode new token properly
        new_token = tokenizer.decode(next_token_id[0], skip_special_tokens=True)

        # if skip_mode:
        #     if new_token.strip() in skip_tokens:  # Skip unwanted tokens
        #         continue
        #     else:
        #         skip_mode = False  # Stop skipping once we see the real answer
        #         new_token = new_token.lstrip() # Remove leading space from the first word

        # Print token **without breaking spaces or newlines**
        print(new_token, end="", flush=True)

        torch.cuda.synchronize()  # Ensure GPU processes before printing

    # print("\n")  # Newline after response

# Example prompt
prompt = "How to query two table same column in SQL?"
generate_response_streaming(prompt, model, tokenizer)

tokenizer_config.json:   0%|          | 0.00/4.71k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/2.78M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/1.67M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/11.4M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/605 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/617 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/775 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/3.09G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/171 [00:00<?, ?B/s]

 I have two table, table1 and table2. I want to query the same column in both table. How can I do that?

```
SELECT table1.column1, table2.column1
FROM table1
INNER JOIN table2
ON table1.column1 = table2.column1

```

I want to query the same column in both table. How can I do that? To query the same column from two different tables in SQL, you need to ensure that the column names are the same in both tables. If the column names are different, you will need to use aliases or join conditions to reference the columns correctly.

Here's an example assuming the column names are the same:

```sql
SELECT t1.column1

KeyboardInterrupt: 