In [9]:
import pandas as pd
import os

# Since we're in a notebook, we need to set the project root manually
# Adjust this path according to your notebook's location relative to the project root
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))

# Load the datasets
unsuccessful_path = os.path.join(project_root, 'data', 'Merged_Unsuccessful_V2.csv')
successful_path = os.path.join(project_root, 'data', 'Merged_Successful_V2.csv')

unsuccessful_df = pd.read_csv(unsuccessful_path)
successful_df = pd.read_csv(successful_path)

# Add a 'success' column to each dataset
unsuccessful_df['success'] = 0
successful_df['success'] = 1

# Randomly sample 40 rows from unsuccessful and 10 from successful
unsuccessful_sample = unsuccessful_df.sample(n=40, random_state=42)
successful_sample = successful_df.sample(n=10, random_state=42)

# Concatenate the samples
merged_sample = pd.concat([unsuccessful_sample, successful_sample], ignore_index=True)

# Shuffle the merged dataset
merged_sample = merged_sample.sample(frac=1, random_state=42).reset_index(drop=True)

# Save the merged sample
output_path = os.path.join(project_root, 'data', 'Experiment_Dataset.csv')
merged_sample.to_csv(output_path, index=False)

print(f"Merged dataset with 50 samples saved to {output_path}")
print(f"Dataset shape: {merged_sample.shape}")
print(f"Successful companies: {merged_sample['success'].sum()}")
print(f"Unsuccessful companies: {len(merged_sample) - merged_sample['success'].sum()}")

Merged dataset with 50 samples saved to /Users/wangxiang/Desktop/Startup-Success-Forecasting-Framework/data/Experiment_Dataset.csv
Dataset shape: (50, 18)
Successful companies: 10
Unsuccessful companies: 40


In [10]:
print(merged_sample.head(1))

   Unnamed: 0 org_name                              org_uuid  \
0        3080  Mytower  076d86c0-0ddb-414e-a7d1-4021d82e4f95   

                                founder_linkedin_url  \
0  https://www.linkedin.com/in/meiri-shemesh-b673...   

                                         json_string  \
0  {"version": 1, "hits": 1, "results": 1, "kgver...   

                                     structured_info  \
0  {'name': 'Meiri Shemesh', 'gender': '', 'birth...   

                                           paragraph          domain  \
0  Meiri Shemesh is known for their contribution ...  mytowerapp.com   

      status founded_on                                      category_list  \
0  operating     1/1/16  Internet of Things,Property Development,Proper...   

                     category_groups_list country_code      city  \
0  Internet Services,Real Estate,Software          ISR  Tel Aviv   

                                   short_description  \
0  A Unified All-in-One Innovative Pr

In [12]:
# Since we're in a notebook, we need to set the project root manually
# Adjust this path according to your notebook's location relative to the project root
import sys
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
sys.path.append(project_root)

import pandas as pd
import os
import json

import os
import toml
from pathlib import Path

# Adjust this path to point to your secrets.toml file
secrets_path = Path(os.getcwd()).parent / '.streamlit' / 'secrets.toml'

# Load secrets
if secrets_path.exists():
    with open(secrets_path, 'r') as f:
        secrets = toml.load(f)
    print(f"Secrets loaded from {secrets_path}")
else:
    print(f"No secrets file found at {secrets_path}")
    secrets = {}

# Set secrets as environment variables
for key, value in secrets.items():
    os.environ[key] = str(value)

# Now you can import your framework
from ssff_framework import StartupFramework

from ssff_framework import StartupFramework

# Load the experiment dataset
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
input_path = os.path.join(project_root, 'data', 'Experiment_Dataset.csv')
df = pd.read_csv(input_path)

# Initialize the StartupFramework
framework = StartupFramework()

# Function to flatten nested dictionaries
def flatten_dict(d, parent_key='', sep='_'):
    items = []
    for k, v in d.items():
        new_key = f"{parent_key}{sep}{k}" if parent_key else k
        if isinstance(v, dict):
            items.extend(flatten_dict(v, new_key, sep=sep).items())
        else:
            items.append((new_key, v))
    return dict(items)

# List to store results
results = []

# Process each row in the dataset
for index, row in df.iterrows():
    print(f"Processing company {index + 1}/{len(df)}")
    
    # Prepare startup info string
    startup_info_str = f"""
    {row['long_description']}
    Founder background: {row['paragraph']}
    """
    
    # Run analysis
    analysis_result = framework.analyze_startup(startup_info_str)
    
    # Flatten nested dictionaries in the result
    flat_result = flatten_dict(analysis_result)
    
    # Add input data to the result
    flat_result['input_description'] = row['long_description']
    flat_result['input_founder_background'] = row['paragraph']
    flat_result['input_success'] = row['success']
    
    results.append(flat_result)

# Convert results to DataFrame
results_df = pd.DataFrame(results)

# Save results to CSV
output_path = os.path.join(project_root, 'data', 'Experiment_Results.csv')
results_df.to_csv(output_path, index=False)

print(f"Analysis complete. Results saved to {output_path}")
print(f"Total rows processed: {len(results_df)}")
print(f"Number of columns in result: {len(results_df.columns)}")

2024-10-25 16:27:43.565 
  command:

    streamlit run /Users/wangxiang/Desktop/Startup-Success-Forecasting-Framework/myenv/lib/python3.11/site-packages/ipykernel_launcher.py [ARGUMENTS]


FileNotFoundError: No secrets found. Valid paths for a secrets.toml file or secret directories are: /Users/wangxiang/.streamlit/secrets.toml, /Users/wangxiang/Desktop/Startup-Success-Forecasting-Framework/experiments/.streamlit/secrets.toml