In [1]:
# Imports de base
import sqlite3
from sqlalchemy import create_engine
from datetime import datetime
import sklearn
from sklearn.feature_extraction.text import ENGLISH_STOP_WORDS
import re


# NLP
import nltk
from keybert import KeyBERT
nltk.download('punkt')

# Initialisation
db_path = '/Users/victorcarre/Code/Projects/llm-memorization/datas/conversations.db'  # Chemin vers la base de données SQLite, à adapter à votre path
conn = sqlite3.connect(db_path)
cur = conn.cursor()

# Pour usage SQLAlchemy plus tard
engine = create_engine(f'sqlite:///{db_path}')

# Initialisation KeyBERT
kw_model = KeyBERT()


[nltk_data] Downloading package punkt to
[nltk_data]     /Users/victorcarre/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [2]:
# Création des tables
cur.execute('''
    CREATE TABLE IF NOT EXISTS conversations (
        id INTEGER PRIMARY KEY AUTOINCREMENT,
        user_input TEXT,
        llm_output TEXT,
        timestamp DATETIME DEFAULT CURRENT_TIMESTAMP
    )
''')

cur.execute('''
    CREATE TABLE IF NOT EXISTS keywords (
        id INTEGER PRIMARY KEY AUTOINCREMENT,
        conversation_id INTEGER,
        keyword TEXT,
        FOREIGN KEY(conversation_id) REFERENCES conversations(id)
    )
''')

conn.commit()


In [3]:
def insert_conversation(user_input, llm_output):
    """
    Insère une nouvelle conversation + mots-clés associés dans la base de données.
    """
    combined_text = user_input + " " + llm_output
    keywords = extract_keywords(combined_text)

    cur.execute("INSERT INTO conversations (user_input, llm_output) VALUES (?, ?)", (user_input, llm_output))
    conversation_id = cur.lastrowid

    for kw in keywords:
        cur.execute("INSERT INTO keywords (conversation_id, keyword) VALUES (?, ?)", (conversation_id, kw))
    
    conn.commit()


In [4]:
def extract_keywords(text, top_n=10):
    """
    Utilise KeyBERT pour extraire des mots-clés pertinents.
    """
    keywords = kw_model.extract_keywords(text, keyphrase_ngram_range=(1, 2), stop_words='english', top_n=top_n)
    return [kw for kw, _ in keywords]

    # Nettoyage : suppression des doublons et des stop words restants
    unique_keywords = []
    seen = set()
    for kw, _ in keywords:
        kw_clean = kw.lower().strip()
        # Filtrage des mots très courts, déjà vus, ou présents dans les stopwords
        if (
            kw_clean not in seen and
            kw_clean not in ENGLISH_STOP_WORDS and
            len(kw_clean) > 2 and  # facultatif : éviter les mots très courts
            re.match(r'^[a-zA-Z\-]+$', kw_clean)
        ):
            seen.add(kw_clean)
            unique_keywords.append(kw_clean)
        if len(unique_keywords) >= top_n:
            break

    return unique_keywords

def insert_conversation(user_input, llm_output):
    """
    Insère une conversation dans la base et ses mots-clés associés.
    """
    combined_text = f"{user_input} {llm_output}"
    keywords = extract_keywords(combined_text)

    cur.execute(
        "INSERT INTO conversations (user_input, llm_output) VALUES (?, ?)",
        (user_input, llm_output)
    )
    conversation_id = cur.lastrowid

    for kw in keywords:
        cur.execute(
            "INSERT INTO keywords (conversation_id, keyword) VALUES (?, ?)",
            (conversation_id, kw)
        )
    conn.commit()


In [5]:
def search_conversations_by_keyword(search_term):
    """
    Recherche toutes les conversations associées à un mot-clé donné.
    """
    cur.execute('''
        SELECT c.user_input, c.llm_output, c.timestamp
        FROM conversations c
        JOIN keywords k ON c.id = k.conversation_id
        WHERE k.keyword LIKE ?
        ORDER BY c.timestamp DESC
    ''', (f'%{search_term}%',))

    return cur.fetchall()


In [6]:
# Exemple : insertion
user_input = "Comment fonctionnent les panneaux solaires ?"
llm_output = "Les panneaux solaires transforment la lumière en électricité via l'effet photovoltaïque."

insert_conversation(user_input, llm_output)

# Recherche
results = search_conversations_by_keyword("solaire")
for r in results:
    print("Q:", r[0])
    print("A:", r[1])
    print("⏱️", r[2])
    print("-" * 50)


Q: Comment fonctionnent les panneaux solaires ?
A: Les panneaux solaires transforment la lumière en électricité via l'effet photovoltaïque.
⏱️ 2025-06-07 22:52:39
--------------------------------------------------
Q: Comment fonctionnent les panneaux solaires ?
A: Les panneaux solaires transforment la lumière en électricité via l'effet photovoltaïque.
⏱️ 2025-06-07 22:52:39
--------------------------------------------------
Q: Comment fonctionnent les panneaux solaires ?
A: Les panneaux solaires transforment la lumière en électricité via l'effet photovoltaïque.
⏱️ 2025-06-07 22:52:39
--------------------------------------------------


In [8]:
import pandas as pd

query = '''
SELECT c.id, c.timestamp, c.user_input, c.llm_output, GROUP_CONCAT(k.keyword, ', ') AS keywords
FROM conversations c
LEFT JOIN keywords k ON c.id = k.conversation_id
GROUP BY c.id
ORDER BY c.timestamp DESC
LIMIT 50
'''

df = pd.read_sql_query(query, conn)
df.head(60)


Unnamed: 0,id,timestamp,user_input,llm_output,keywords
0,58,2025-06-08 14:25:11,Je veux que ce soit en local,"D'accord, pour exécuter le code localement san...","create, cursor, execute, firebase, insert, nos..."
1,46,2025-06-08 14:25:10,Donc qu'est-ce que ca donnerait avec ce thème ...,Le thème GitHub-Wikipedia (ou Wikipedia) est u...,"href, html, markdown, obsidian, page, styleshe..."
2,47,2025-06-08 14:25:10,Ca n'a pas l'air possible de faire ça. Et si j...,Vous pouvez certainement créer un fichier CSS ...,"certainement, ceux, css, cssclass, custom, fic..."
3,48,2025-06-08 14:25:10,Est-ce possible directement avec ce fichier css ?,"Oui, il est possible de modifier les styles CS...","css, cssclass, fichier, fichiers, font, fonts,..."
4,49,2025-06-08 14:25:10,I want to build a memorizing system for my loc...,"You've got a good start with your project, and...","category, cursor, database, insert, memorizing..."
5,50,2025-06-08 14:25:10,"Yes, give me an updated version of the code wi...","To achieve this, you will need to install `lms...","conversation, conversations, cursor, database,..."
6,51,2025-06-08 14:25:10,(Jupyter) (3.11.3) victorcarre@Mac-mini-de-Vic...,I apologize for the confusion earlier. There i...,"hyphens, import, jupyter, keywords, lmstudio, ..."
7,52,2025-06-08 14:25:10,Jupyter) (3.11.3) victorcarre@Mac-mini-de-Vict...,It seems there's an issue with the scipy packa...,"bin, build, dependencies, gensim, jupyter, mes..."
8,53,2025-06-08 14:25:10,(Jupyter) (3.11.3) victorcarre@Mac-mini-de-Vic...,Apologies for the confusion earlier. It seems ...,"brew, cask, casks, formulae, gfortan, homebrew..."
9,54,2025-06-08 14:25:10,(Jupyter) (3.11.3) victorcarre@Mac-mini-de-Vic...,It seems like there's a problem with the OpenB...,"build, cmake, dependencies, gensim, jupyter, m..."


In [None]:
# Exemple de requête SQL

```sql
SELECT c.user_input, c.llm_output, GROUP_CONCAT(k.keyword, ', ') as keywords
FROM conversations c
LEFT JOIN keywords k ON c.id = k.conversation_id
GROUP BY c.id
ORDER BY c.timestamp DESC
LIMIT 10;