In [1]:
# Cell 1: Setup and Table Extraction from PDF

import os
import pathlib
import google.generativeai as genai
from dotenv import load_dotenv
import pandas as pd
from IPython.display import display, Markdown
import io

# --- 1. Configuration and Setup ---
print("INFO: Configuring the agent...")

# Load environment variables from the .env file in the root directory
# The notebook is in /notebooks, so we need to go one level up.
dotenv_path = pathlib.Path('../.env')
load_dotenv(dotenv_path=dotenv_path)

# Configure the Gemini API
api_key = os.getenv("GOOGLE_API_KEY")
if not api_key:
    raise ValueError("ERROR: GOOGLE_API_KEY not found. Please set it in the .env file.")
genai.configure(api_key=api_key)

# --- 2. File Upload ---
pdf_file_path = pathlib.Path('../data/raw/2022_AMOIH_Seguela.pdf')
print(f"INFO: Checking for PDF file at '{pdf_file_path}'...")

if not pdf_file_path.exists():
    raise FileNotFoundError(f"ERROR: The file was not found at {pdf_file_path}. Please make sure it's in the data/raw/ directory.")

print("INFO: Uploading file to Gemini...")
# We are using gemini-1.5-pro which supports file uploads
uploaded_file = genai.upload_file(path=pdf_file_path, display_name="Seguela Geology Thesis")
print(f"SUCCESS: Uploaded file '{uploaded_file.display_name}' as {uploaded_file.name}")

# --- 3. Prompt Engineering ---
# This prompt is specifically designed for table extraction.
# It tells the agent to locate the table by its title and return it in a clean CSV format.
prompt = """
You are a specialist in geological data extraction.
From the provided PDF document, please perform the following task:
1.  Locate the table titled "Tableau I: Collars des trous de forage sélectionnés pour l'étude Lithostratigraphique".
2.  Extract the entire content of this table, including the headers.
3.  Return the extracted data strictly in CSV (Comma-Separated Values) format.
4.  Do not include any introductory text, explanations, or markdown formatting like ```csv. Only return the raw CSV content.
"""

# --- 4. Agent Invocation ---
print("INFO: Initializing the generative model...")
model = genai.GenerativeModel('gemini-1.5-pro-latest')

print("INFO: Sending request to the agent. This may take a moment...")
response = model.generate_content([prompt, uploaded_file])

# --- 5. Response Parsing and Processing ---
try:
    print("INFO: Parsing the agent's response...")
    csv_data = response.text
    
    # Use io.StringIO to treat the CSV string as a file for Pandas
    data_io = io.StringIO(csv_data)
    df = pd.read_csv(data_io)
    
    # --- 6. Display and Save Results ---
    print("SUCCESS: Table extracted successfully. Displaying DataFrame:")
    display(df)
    
    # Save the DataFrame to the processed data folder
    output_path = pathlib.Path('../data/processed/table_1_drill_collars.csv')
    output_path.parent.mkdir(parents=True, exist_ok=True)
    df.to_csv(output_path, index=False)
    
    print(f"\nSUCCESS: DataFrame saved to '{output_path}'")
    
except Exception as e:
    print(f"ERROR: Failed to parse the response or process the data. Error: {e}")
    print("\n--- Raw Agent Response ---")
    display(Markdown(response.text))

# --- 7. Clean up the uploaded file ---
# It's good practice to delete the file from the server after you're done.
print(f"INFO: Deleting uploaded file '{uploaded_file.name}' from the server.")
genai.delete_file(uploaded_file.name)
print("INFO: Cleanup complete.")

ModuleNotFoundError: No module named 'google.generativeai'

In [2]:
# Cell 2: Multimodal Analysis of a Geological Map

import os
import pathlib
import google.generativeai as genai
from dotenv import load_dotenv
from IPython.display import display, Markdown, Image

# --- 1. Configuration ---
# This assumes the agent was configured in the first cell.
# If running this cell independently, you would need to re-run the configuration part.
print("INFO: Re-using agent configuration from the previous cell.")

# --- 2. Load the Image ---
image_path = pathlib.Path('../data/raw/figure_4_geological_map.png')
print(f"INFO: Loading image from '{image_path}'...")

if not image_path.exists():
    raise FileNotFoundError(f"ERROR: Image not found at {image_path}. Please ensure you've saved the screenshot correctly.")

# Display the image in the notebook to verify
print("INFO: Displaying the map that will be sent for analysis:")
display(Image(filename=image_path, width=600))

# Prepare the image object for the API
image_file = genai.upload_file(path=image_path, display_name="Seguela Geological Map")


# --- 3. Prompt Engineering for Visual Analysis ---
# This prompt guides the model to act like a geologist and analyze the map.
prompt = """
Act as an expert geologist. Based on the provided geological map ("Figure 4: Carte géologique du projet Séguéla"), please perform the following analysis:

1.  **Main Purpose**: What is the primary purpose of this map?
2.  **Key Lithological Units**: Based on the legend on the right, list the main rock packages (e.g., "Eastern Schists", "Eastern Andesite Package", "Eastern Granites", "Western Granites"). For the "Eastern Andesite Package", list at least three specific rock types mentioned.
3.  **Structural Features**: Identify the key structural features shown in the legend (e.g., faults, shear zones, folds). What is the general orientation of the main geological units and structures?
4.  **Prospect Locations**: Locate and name at least three of the "Roxgold Prospects" that are marked with a star symbol on the map (e.g., Antenna, Boulder, Agouti).
"""

# --- 4. Agent Invocation with Multimodal Input ---
print("\nINFO: Initializing model and sending multimodal request...")
# The model can take a list of inputs, including text prompts and image objects
model = genai.GenerativeModel('gemini-1.5-pro-latest')
response = model.generate_content([prompt, image_file])


# --- 5. Display and Save Results ---
print("SUCCESS: Received analysis from the agent.")

# Use Markdown to render the response for better readability
analysis_text = response.text
display(Markdown(analysis_text))

# Save the analysis to a markdown file in the processed data folder
output_path = pathlib.Path('../data/processed/figure_4_analysis.md')
with open(output_path, 'w', encoding='utf-8') as f:
    f.write(analysis_text)

print(f"\nSUCCESS: Analysis saved to '{output_path}'")

# --- 6. Cleanup ---
print(f"INFO: Deleting uploaded image file '{image_file.name}' from the server.")
genai.delete_file(image_file.name)
print("INFO: Cleanup complete.")

ModuleNotFoundError: No module named 'google.generativeai'