In [None]:
# %pip install python-docx tabulate supabase

In [130]:
import os
import docx
from tabulate import tabulate
from docx import Document
import json
import yaml
from supabase import create_client, Client

In [126]:
def docx_to_json(file_path):
    doc = Document(file_path)
    
    protocol = {
        "title": None,
        "author": None,
        "keywords": None,
        "materials": [],
        "procedure": [],
        "comment": None
    }
    
    current_section = None
    
    for para in doc.paragraphs:
        text = para.text.strip()
        if not text:
            continue
        
        # определяем заголовки
        if text.lower().startswith("title"):
            current_section = "title"
            protocol["title"] = text.split(":", 1)[-1].strip()
        elif text.lower().startswith("author"):
            current_section = "author"
            protocol["author"] = text.split(":", 1)[-1].strip()
        elif text.lower().startswith("keywords"):
            current_section = "keywords"
            protocol["keywords"] = text.split(":", 1)[-1].strip()
        elif text.lower().startswith("materials"):
            current_section = "materials"
        elif text.lower().startswith("procedure"):
            current_section = "procedure"
        elif text.lower().startswith("comment"):
            current_section = "comment"
            protocol["comment"] = text.split(":", 1)[-1].strip()
        else:
            # собираем данные секции
            if current_section in ["materials", "procedure"]:
                # нумеруем пункты и подпункты
                step_number = str(len(protocol[current_section]) + 1)
                protocol[current_section].append({
                    "step_number": step_number,
                    "text": text,
                    "parent_step": None  # пока без вложенности
                })
    
    return protocol

def process_folder_to_json(folder_path, output_file="/Users/lizakozlikina/Documents/ProtocolsProject/cell culture/protocols.json"):
    all_protocols = []
    
    for filename in os.listdir(folder_path):
        if filename.endswith(".docx"):
            file_path = os.path.join(folder_path, filename)
            protocol_json = docx_to_json(file_path)
            protocol_json["filename"] = filename  # можно для отслеживания
            all_protocols.append(protocol_json)
    
    # сохраняем в один JSON
    with open(output_file, "w", encoding="utf-8") as f:
        json.dump(all_protocols, f, ensure_ascii=False, indent=2)
    
    return all_protocols

# пример использования
folder_path = "/Users/lizakozlikina/Documents/ProtocolsProject/cell culture/"
all_protocols_json = process_folder_to_json(folder_path)
print(f"Сконвертировано {len(all_protocols_json)} протоколов.")


Сконвертировано 19 протоколов.


In [128]:
SUPABASE_URL = "https://kplgemcfwgjszrwepitq.supabase.co" 
SUPABASE_KEY = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6ImtwbGdlbWNmd2dqc3pyd2VwaXRxIiwicm9sZSI6InNlcnZpY2Vfcm9sZSIsImlhdCI6MTc1NTM0Nzg3OCwiZXhwIjoyMDcwOTIzODc4fQ.7QJ99pPkPDIYBEeOLGt0VuOZIU1vHwOjWZ7qOwybmxQ"

from supabase import create_client, Client
supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)

# Загружаем протоколы
with open("/Users/lizakozlikina/Documents/ProtocolsProject/cell culture/protocols.json", "r", encoding="utf-8") as f:
    protocols = json.load(f)

for protocol in protocols:
    record = {
        "title": protocol.get("title") or "Без названия",
        "author": protocol.get("author"),
        "keywords": protocol.get("keywords"),
        "materials": protocol.get("materials"),
        "procedure": protocol.get("procedure"),
        "comment": protocol.get("comment")
    }

    # Вставляем протокол и получаем id
    response = supabase.table("protocols").insert(record).execute()
    if response.data is None:
        print(f"Failed to upload {protocol.get('title')}: {response}")
        continue

    protocol_id = response.data[0]["id"]

    # Обрабатываем тэги
    keywords = protocol.get("keywords")
    if keywords:
        if isinstance(keywords, str):
            tags_list = [k.strip() for k in keywords.split(",") if k.strip()]
        elif isinstance(keywords, list):
            tags_list = [str(k).strip() for k in keywords if str(k).strip()]
        else:
            tags_list = []
    else:
        tags_list = []

    for tag_name in tags_list:
        # Проверяем наличие тэга
        existing_tag = supabase.table("tags").select("*").eq("name", tag_name).execute()
        if existing_tag.data:
            tag_id = existing_tag.data[0]["id"]
        else:
            tag_response = supabase.table("tags").insert({"name": tag_name}).execute()
            tag_id = tag_response.data[0]["id"]

        # Вставляем связь protocol_id → tag_id
        supabase.table("protocol_tags").insert({"protocol_id": protocol_id, "tag_id": tag_id}).execute()

    print(f"Uploaded '{protocol.get('title')}' with tags {tags_list} successfully")

Uploaded 'ICC – cell culture–cell line staining (35mm dish)' with tags ['immunocytochemistry', 'ICC', 'staining', 'cells', 'cellculture'] successfully
Uploaded 'Adenovirus production Protocol' with tags ['adenovirus'] successfully
Uploaded 'C2C12 culture preparation protocol' with tags ['C2C12; myoblasts; cellculture', 'cells'] successfully
Uploaded 'Quantification of AChR Aggregation in cultured cells' with tags ['AChR', 'aggregation', 'cells', 'cellculture', 'quantification'] successfully
Uploaded 'Preparation of Ringer’s solution for rat' with tags ['Ringer', 'solution'] successfully
Uploaded 'Transfection of high density primary hippocampal neuronal culture Protocol' with tags ['hippocampal', 'neurons', 'cells', 'cellculture', 'transfection'] successfully
Uploaded 'Transfection with FuGENE 6 Protocol' with tags ['transfection', 'FuGENE6', 'cells'] successfully
Uploaded 'Electroporation of C2C12 Myoblasts' with tags ['C2C12', 'myoblasts', 'electroporation', 'cells'] successfully
Upl