# Core Dietary Guidelines Summarization using Ollama
This notebook extracts text from a file in the processed data folder, summarizes it using Ollama's Llama model, and saves the summary


In [2]:
# imports
import ollama
import os
from bs4 import BeautifulSoup
import requests

# Constants
MODEL = "llama3.2:1b"  # Using the smaller 1B parameter model

In [3]:
# Constants
MODEL = "llama3.2:1b"  # Using the smaller 1B parameter model

In [6]:
!ollama pull llama3.2:1b

[?25lpulling manifest ⠋ [?25h[?25l[2K[1Gpulling manifest ⠙ [?25h[?25l[2K[1Gpulling manifest ⠹ [?25h[?25l[2K[1Gpulling manifest ⠸ [?25h[?25l[2K[1Gpulling manifest ⠼ [?25h[?25l[2K[1Gpulling manifest ⠴ [?25h[?25l[2K[1Gpulling manifest ⠦ [?25h[?25l[2K[1Gpulling manifest ⠧ [?25h[?25l[2K[1Gpulling manifest ⠇ [?25h[?25l[2K[1Gpulling manifest 
pulling 74701a8c35f6... 100% ▕████████████████▏ 1.3 GB                         
pulling 966de95ca8a6... 100% ▕████████████████▏ 1.4 KB                         
pulling fcc5a6bec9da... 100% ▕████████████████▏ 7.7 KB                         
pulling a70ff7e570d9... 100% ▕████████████████▏ 6.0 KB                         
pulling 4f659a1e86d7... 100% ▕████████████████▏  485 B                         
verifying sha256 digest 
writing manifest 
success [?25h


In [7]:
# Load the text file from processed data
with open('processed_data/cleaned_dietary_guidelines.txt', 'r', encoding='utf-8') as file:
    text = file.read()

# Split text into chunks to handle long sequences
chunk_size = 8000  
chunks = [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]

print(f"Total characters: {len(chunks)*chunk_size}")

Total characters: 96000


In [8]:
# System prompt for the model
system_prompt = """You will be given a set of dietary guideline texts. Based on these, generate similar content that follows the same structure and tone, with an emphasis on practical, food- and recipe-based recommendations. Follow these instructions:
Omit any explicit mention of the target population.
Do not include research basis or references to levels of evidence.
Focus on clear, actionable dietary recommendations with food examples (e.g., "Use olive oil instead of butter" or "Include legumes like lentils and chickpeas in stews").
Emphasize preparation methods, ingredient swaps, and daily food practices. Write in an instructional tone.
Prioritize realistic advice, especially related to meals and cooking habits.
Keep each guideline section concise and clear.
Do not use overly technical terms or abstract nutritional concepts.
You will receive the base text in sections. For each section, generate a rewritten version following the above rules"""

In [9]:
def messages_for(chunk):
    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": f"Please summarize the following text, limiting your answer to 300 words with key points:\n\n{chunk}"}
    ]
    return messages

In [11]:
# Process each chunk and collect summaries
summaries = []
for i, chunk in enumerate(chunks):
    print(f"Processing chunk {i+1}/{len(chunks)}...")
    
    response = ollama.chat(
        model=MODEL,
        messages=messages_for(chunk)
    )
    
    summary = response['message']['content']
    summaries.append(summary)

# Combine all summaries
combined_summary = ' '.join(summaries)

Processing chunk 1/12...
Processing chunk 2/12...
Processing chunk 3/12...
Processing chunk 4/12...
Processing chunk 5/12...
Processing chunk 6/12...
Processing chunk 7/12...
Processing chunk 8/12...
Processing chunk 9/12...
Processing chunk 10/12...
Processing chunk 11/12...
Processing chunk 12/12...


In [12]:
# Save the summary to processed data
with open('processed_data/core_dietary_guidelines_ollama.txt', 'w', encoding='utf-8') as file:
    file.write(combined_summary)