In [6]:
import os 
import pandas as pd 

In [7]:
df = pd.read_excel("/Users/vince/Salk/PaperGeneration/data/Conditions.xlsx")
df

Unnamed: 0,Circadian,Condition,Disease type,Priority,References
0,,Mosquito Bites,Bites,,
1,,Tick Bites,Bites,,
2,,Spider Bites,Bites,,
3,,Bedbug Bites,Bites,,
4,,Flea Bites,Bites,,
...,...,...,...,...,...
266,,Strabismus,Vision,,
267,,Amblyopia,Vision,,
268,,Visual Migraine (Ocular Migraine),Vision,,
269,,Photophobia,Vision,,


In [19]:
import os
from datetime import datetime
import logging 
import json
import shutil
import string

def sanitize_filename(filename):
  valid_chars = "-_.() %s%s" % (string.ascii_letters, string.digits)
  return ''.join(c for c in filename if c in valid_chars)

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

output_dir = "/Users/vince/Salk/PaperGeneration/data/output"
filtered_dir = "/Users/vince/Salk/PaperGeneration/data/filtered_json"
os.makedirs(filtered_dir, exist_ok=True)

command_dir = "/home/vrothenberg_salk_edu/wagtail/data/filtered_json"

# This will store the commands
commands = []

for i, row in df.iterrows():
    category = row['Disease type']
    condition = row['Condition']
    logger.info(f"[{i}] {condition} - {category}")

    # Create the directory path
    dir_path = os.path.join(output_dir, condition.replace(' ', '_'))
    # Check if the directory exists
    if not os.path.exists(dir_path):
        # If it doesn't exist, skip 
        logger.info(f"Skipping. Does not exist: {dir_path}")
        continue

    # If directory exists, find most recent JSON files
    final_jsons = [f for f in os.listdir(dir_path) if "_final_" in f and f.endswith(".json")]
    uptodate_jsons = [f for f in os.listdir(dir_path) if "_uptodate_" in f and f.endswith(".json")]

    # Find the latest final JSON
    latest_final_json = None
    latest_final_time = None

    for f in final_jsons:
        try:
            dt = datetime.strptime(f.split('_')[-2] + ' ' + f.split('_')[-1].split('.json')[0], '%Y%m%d %H%M%S')
            if latest_final_time is None or dt > latest_final_time:
                latest_final_time = dt
                latest_final_json = f
        except ValueError as e:
            logger.error(f"Error parsing filename {f}: {e}")
            continue

    # Find the latest papers JSON
    latest_uptodate_json = None
    latest_uptodate_time = None

    for f in uptodate_jsons:
        try:
            dt = datetime.strptime(f.split('_')[-2] + ' ' + f.split('_')[-1].split('.json')[0], '%Y%m%d %H%M%S')
            if latest_uptodate_time is None or dt > latest_uptodate_time:
                latest_uptodate_time = dt
                latest_uptodate_json = f
        except ValueError as e:
            logger.error(f"Error parsing filename {f}: {e}")
            continue

    # Check if latest_final_json is valid, if so store in filtered_dir
    if latest_final_json:
        try:
            source_path = os.path.join(dir_path, latest_final_json)
            
            # Validate JSON by loading it
            with open(source_path, 'r') as f:
                json.load(f)  # This will raise an exception if the JSON is invalid

            # Sanitize the filename
            safe_filename = sanitize_filename(latest_final_json)
            destination_path = os.path.join(filtered_dir, safe_filename) 
            shutil.copy(source_path, destination_path)

            command_path = os.path.join(command_dir, safe_filename) 

            # Add the command for this file
            command = f"python manage.py import_articles --category-name \"{category}\" --json-path \"{command_path}\""
            commands.append(command)

        except json.JSONDecodeError as e:
            logger.error(f"Invalid JSON format in {latest_final_json}: {e}")
        except Exception as e:
            logger.error(f"Error on {latest_final_json}")
            logger.error(e)
            continue

    # Check if latest_uptodate_json is valid, if so store in filtered_dir
    elif latest_uptodate_json:
        try:
            source_path = os.path.join(dir_path, latest_uptodate_json)
            
            # Validate JSON by loading it
            with open(source_path, 'r') as f:
                json.load(f)  # This will raise an exception if the JSON is invalid

            # Sanitize the filename
            safe_filename = sanitize_filename(latest_uptodate_json)
            destination_path = os.path.join(filtered_dir, safe_filename)
            shutil.copy(source_path, destination_path)

            command_path = os.path.join(command_dir, safe_filename)

            # Add the command for this file (if you want to upload these files too)
            command = f"python manage.py import_articles --category-name \"{category}\" --json-path \"{command_path}\""
            commands.append(command) 

        except json.JSONDecodeError as e:
            logger.error(f"Invalid JSON format in {latest_uptodate_json}: {e}")
        except Exception as e:
            logger.error(f"Error on {latest_uptodate_json}")
            logger.error(e)
            continue

    else:
        print(f"No valid JSON found for {condition}")
        continue

# Print or save the commands
for command in commands:
    print(command)

# You can save the commands to a file if you want to execute them later
with open('import_commands.sh', 'w') as f:
    for command in commands:
        f.write(command + '\n')

2024-12-18 15:14:48,717 - INFO - [0] Mosquito Bites - Bites
2024-12-18 15:14:48,733 - INFO - [1] Tick Bites - Bites
2024-12-18 15:14:48,734 - INFO - [2] Spider Bites - Bites
2024-12-18 15:14:48,735 - INFO - [3] Bedbug Bites - Bites
2024-12-18 15:14:48,736 - INFO - [4] Flea Bites - Bites
2024-12-18 15:14:48,738 - INFO - [5] Horsefly Bites - Bites
2024-12-18 15:14:48,739 - INFO - [6] Ant Bites - Bites
2024-12-18 15:14:48,741 - INFO - [7] Anemia - Blood
2024-12-18 15:14:48,742 - INFO - [8] Stroke - Brain
2024-12-18 15:14:48,743 - INFO - [9] Anxiety - Brain Health
2024-12-18 15:14:48,744 - INFO - [10] Bipolar syndrome - Brain Health
2024-12-18 15:14:48,746 - INFO - [11] PTSD - Brain Health
2024-12-18 15:14:48,746 - ERROR - Invalid JSON format in PTSD_final_20241218_112817.json: Expecting value: line 1 column 1 (char 0)
2024-12-18 15:14:48,747 - INFO - [12] Postpartum depression - Brain Health
2024-12-18 15:14:48,748 - INFO - [13] Autism - Brain Health
2024-12-18 15:14:48,749 - ERROR - Inva

No valid JSON found for HIV/AIDS


2024-12-18 15:14:49,057 - INFO - [181] Peripheral neuropathy (e.g., diabetic neuropathy) - Pain
2024-12-18 15:14:49,058 - INFO - [182] Postherpetic neuralgia (shingles pain) - Pain
2024-12-18 15:14:49,059 - INFO - [183] Sciatica (nerve root compression) - Pain
2024-12-18 15:14:49,060 - INFO - [184] Cervical radiculopathy (nerve pain in the neck) - Pain
2024-12-18 15:14:49,060 - ERROR - Invalid JSON format in Cervical_radiculopathy_(nerve_pain_in_the_neck)_final_20241218_134323.json: Extra data: line 1 column 8484 (char 8483)
2024-12-18 15:14:49,061 - INFO - [185] Lumbar radiculopathy (nerve pain in the lower back) - Pain
2024-12-18 15:14:49,062 - INFO - [186] Pelvic pain (e.g., sacroiliac joint dysfunction) - Pain
2024-12-18 15:14:49,063 - INFO - [187] Muscle cramps - Pain
2024-12-18 15:14:49,064 - INFO - [188] Carpal tunnel syndrome - Pain
2024-12-18 15:14:49,065 - INFO - [189] Scoliosis - Pain
2024-12-18 15:14:49,066 - INFO - [190] Vertigo - Pain
2024-12-18 15:14:49,068 - INFO - [191

python manage.py import_articles --category-name "Bites" --json-path "/home/vrothenberg_salk_edu/wagtail/data/filtered_json/Mosquito_Bites_final_20241218_112124.json"
python manage.py import_articles --category-name "Bites" --json-path "/home/vrothenberg_salk_edu/wagtail/data/filtered_json/Tick_Bites_final_20241218_112323.json"
python manage.py import_articles --category-name "Bites" --json-path "/home/vrothenberg_salk_edu/wagtail/data/filtered_json/Spider_Bites_final_20241218_112425.json"
python manage.py import_articles --category-name "Bites" --json-path "/home/vrothenberg_salk_edu/wagtail/data/filtered_json/Bedbug_Bites_final_20241218_112157.json"
python manage.py import_articles --category-name "Bites" --json-path "/home/vrothenberg_salk_edu/wagtail/data/filtered_json/Flea_Bites_final_20241218_112140.json"
python manage.py import_articles --category-name "Bites" --json-path "/home/vrothenberg_salk_edu/wagtail/data/filtered_json/Horsefly_Bites_final_20241218_111951.json"
python man