In [8]:
!pip install -q streamlit pyngrok pandas numpy openpyxl xlsxwriter plotly

import streamlit as st
import pandas as pd
import numpy as np
from io import BytesIO
import base64
import os
import sys
import subprocess
from pyngrok import ngrok
import time
import threading
import inspect
import plotly.express as px


NGROK_AUTH_TOKEN = "2zAxL3K70IxkFWQVieDVU8XCUOe_27LGp2an4vFh2ifrmJFtm"
ngrok.set_auth_token(NGROK_AUTH_TOKEN)

# Sample data
COMPANIES_DATA = """Company,Technology Stack,Annual Revenue,Employee Count
InnovateHub,SaaS,15000000.0,75
DataWeave Solutions,FinTech,50000000.0,250
CloudSphere Inc.,Cloud Computing,120000000.0,500
NextGen Robotics,Hardware,5000000.0,30
Healthify,HealthTech,25000000.0,120
EcomEngine,E-commerce Platform,,200
QuantumLeap AI,,,15
SecureKey Systems,Cybersecurity,75000000.0,300
GreenLeaf Organics,Retail,1000000.0,10
ConnectSphere,Social Media,8000000.0,45
LogiTrack,Logistics,200000000.0,600
FinPal,FinTech,3500000.0,25
MobileFirst Devs,Mobile Development,750000.0,5
PixelPerfect,Design Agency,1200000.0,12
SustainFarms,AgriTech,6000000.0,50
SmartHome Solutions,IoT,18000000.0,80
VirtualHealth,Telehealth,40000000.0,200
TechWave Innovations,Software Development,30000000.0,150
BioSync Health,HealthTech,9000000.0,40
EduTech Minds,EdTech,22000000.0,85
FinGuard Analytics,FinTech,31000000.0,120
AutoNav Systems,Automotive,15000000.0,60
GreenEnergy Solutions,CleanTech,85000000.0,200
NeuroLink Devices,MedTech,27000000.0,90
TraceAI Analytics,AI & ML,5100000.0,35
SolarWorks Tech,Clean Energy,7200000.0,25
MarketPulse Commerce,Social Commerce,13000000.0,55
ChipCore Systems,Semiconductor,95000000.0,220"""

LEADS_DATA = """Company,Industry,Owner Name,Owner Email,Revenue,Employees Count
Company 1,,Bob Smith,bob@example.com,,0.0
Company 2,HealthTech,Bob Smith,,$10M,0.0
Company 3,Cloud Computing,Alice Johnson,,$10M,0.0
Company 4,,Diana Lee,,$10M,
Company 5,Retail,Alice Johnson,,,40.0
Company 6,Cloud Computing,Diana Lee,,$10M,300.0
Company 7,Cloud Computing,Bob Smith,bob@example.com,$1.2M,0.0
Company 8,,Alice Johnson,bob@example.com,,150.0
Company 9,FinTech,,contact@company.com,$1.2M,0.0
Company 10,Retail,Charlie Kim,,,150.0
Company 11,,Diana Lee,bob@example.com,$10M,150.0
Company 12,Retail,Charlie Kim,contact@company.com,$1.2M,0.0
Company 13,Retail,Charlie Kim,,,
Company 14,Cloud Computing,Alice Johnson,alice@example.com,$45M,150.0
Company 15,HealthTech,Charlie Kim,contact@company.com,$10M,150.0
Company 16,EdTech,Grace Lee,grace.lee@company16.com,$5M,25.0
Company 17,FinTech,Raj Patel,raj.patel@company17.com,$20M,60.0
Company 18,CleanTech,Maria Gonzalez,maria.g@company18.com,$8M,45.0
Company 19,AI & ML,Anita Desai,anita.d@company19.com,$6M,30.0
Company 20,HealthTech,Tom Harris,tom.harris@company20.com,$12M,80.0
Company 21,Logistics,Peter Jones,peter.jones@company21.com,$30M,150.0
Company 22,Retail,Jessica Wu,jessica.wu@company22.com,$14M,70.0"""

# Load sample data
@st.cache_data
def load_data():
    companies = pd.read_csv(BytesIO(COMPANIES_DATA.encode()))
    leads = pd.read_csv(BytesIO(LEADS_DATA.encode()))
    return companies, leads

# Clean revenue data
def clean_revenue(value):
    if pd.isna(value) or value == '':
        return np.nan
    if isinstance(value, (int, float)):
        return value
    value = str(value).replace('$', '').replace(',', '').strip()
    if 'M' in value:
        return float(value.replace('M', '')) * 1000000
    if 'K' in value:
        return float(value.replace('K', '')) * 1000
    try:
        return float(value)
    except:
        return np.nan

# Calculate priority score (0-5)
def calculate_priority_score(row):
    if pd.isna(row.get('Annual Revenue')) or pd.isna(row.get('Employee Count')):
        return np.nan

    # Normalize values (0-1 scale)
    revenue_score = min(row['Annual Revenue'] / 200000000, 1)  # Cap at $200M
    employee_score = min(row['Employee Count'] / 1000, 1)  # Cap at 1000 employees

    # Weighted average (60% revenue, 40% employees)
    combined_score = (revenue_score * 0.6) + (employee_score * 0.4)

    # Convert to 1-5 star rating
    return round(combined_score * 4) + 1  # 1-5 scale

# Initialize session state
def init_session_state():
    if 'saved_leads' not in st.session_state:
        st.session_state.saved_leads = pd.DataFrame()
    if 'scraped_data' not in st.session_state:
        st.session_state.scraped_data = pd.DataFrame()
    if 'enterprise_mode' not in st.session_state:
        st.session_state.enterprise_mode = False
    if 'email_template' not in st.session_state:
        st.session_state.email_template = """Subject: Partnership Opportunity with {Company}

Hi {Owner Name},

I noticed your work at {Company} and think we could collaborate on...

Best regards,
Your Name
"""

# Scraping function (simulated)
def scrape_leads(keywords, industry, min_employees, max_employees):
    # In a real implementation, this would connect to scraping APIs
    companies, leads = load_data()

    # Filter companies
    filtered = companies.copy()
    if keywords:
        filtered = filtered[filtered['Technology Stack'].str.contains(keywords, case=False, na=False)]
    if industry and industry != 'All':
        filtered = filtered[filtered['Technology Stack'].str.contains(industry, case=False, na=False)]
    filtered = filtered[
        (filtered['Employee Count'] >= min_employees) &
        (filtered['Employee Count'] <= max_employees)
    ]

    # Simulate adding contact info
    filtered['Owner Name'] = "Contact " + filtered['Company'].str[:3]
    filtered['Owner Email'] = filtered['Company'].str.lower().str.replace(' ', '') + "@example.com"
    filtered['Phone'] = "555-" + (filtered.index + 1000).astype(str)
    filtered['LinkedIn'] = "linkedin.com/in/" + filtered['Company'].str.lower().str.replace(' ', '')

    # Add priority scores
    filtered['Priority Score'] = filtered.apply(calculate_priority_score, axis=1)

    return filtered[['Company', 'Technology Stack', 'Annual Revenue',
                     'Employee Count', 'Owner Name', 'Owner Email',
                     'Phone', 'LinkedIn', 'Priority Score']]

# Enrichment function
def enrich_leads(leads_df):
    companies, _ = load_data()
    enriched = leads_df.merge(
        companies,
        left_on='Company',
        right_on='Company',
        how='left',
        suffixes=('', '_enriched')
    )

    # Fill missing data
    for col in ['Technology Stack', 'Annual Revenue', 'Employee Count']:
        enriched[col] = enriched[col].fillna(enriched.get(col + '_enriched', pd.Series(np.nan)))

    # Clean revenue data
    enriched['Annual Revenue'] = enriched['Annual Revenue'].apply(clean_revenue)

    # Calculate priority scores
    enriched['Priority Score'] = enriched.apply(calculate_priority_score, axis=1)

    return enriched.drop(columns=[c for c in enriched.columns if '_enriched' in c], errors='ignore')

# Export to Excel
def to_excel(df):
    output = BytesIO()
    with pd.ExcelWriter(output, engine='xlsxwriter') as writer:
        df.to_excel(writer, index=False, sheet_name='Leads')
    return output.getvalue()

# Visualization functions
def plot_priority_distribution(df):
    if 'Priority Score' not in df or df['Priority Score'].isnull().all():
        st.warning("No priority scores available for visualization")
        return

    fig = px.histogram(
        df,
        x='Priority Score',
        title='Lead Priority Distribution',
        labels={'Priority Score': 'Priority Score (1-5)'},
        color='Priority Score',
        color_discrete_sequence=px.colors.sequential.RdBu_r
    )
    fig.update_layout(bargap=0.1, xaxis=dict(tickmode='linear'))
    st.plotly_chart(fig, use_container_width=True)

def plot_revenue_vs_employees(df):
    if 'Annual Revenue' not in df or 'Employee Count' not in df:
        st.warning("Missing revenue or employee data")
        return

    valid_df = df.dropna(subset=['Annual Revenue', 'Employee Count'])
    if valid_df.empty:
        st.warning("No valid data for visualization")
        return

    fig = px.scatter(
        valid_df,
        x='Employee Count',
        y='Annual Revenue',
        size='Annual Revenue',
        color='Priority Score',
        hover_name='Company',
        title='Revenue vs Employee Count',
        log_x=True,
        size_max=30,
        color_continuous_scale='viridis'
    )
    fig.update_layout(yaxis_tickprefix='$', yaxis_tickformat=',.0f')
    st.plotly_chart(fig, use_container_width=True)

def plot_industry_analysis(df):
    if 'Technology Stack' not in df:
        st.warning("No industry data available")
        return

    # Create industry groups
    industry_df = df.copy()
    industry_df['Industry Group'] = industry_df['Technology Stack'].str.split(',').str[0]
    industry_counts = industry_df['Industry Group'].value_counts().reset_index()
    industry_counts.columns = ['Industry', 'Count']

    fig = px.bar(
        industry_counts,
        x='Industry',
        y='Count',
        title='Leads by Industry',
        color='Count',
        color_continuous_scale='tealrose'
    )
    fig.update_layout(xaxis_title="", yaxis_title="Number of Leads")
    st.plotly_chart(fig, use_container_width=True)

# Main app interface
def main():
    st.set_page_config(
        page_title="Lead Management Pro",
        page_icon="📊",
        layout="wide"
    )

    # Initialize session state
    init_session_state()

    st.sidebar.title("Navigation")
    # FIXED: Added "Lead Analysis" to navigation options
    app_mode = st.sidebar.radio("Go to", [
        "Dashboard",
        "Scrape Leads",
        "Enrich Leads",
        "Manage Leads",
        "Lead Analysis",  # This was missing
        "Enterprise Outreach"
    ])

    st.sidebar.header("Account Settings")
    st.session_state.enterprise_mode = st.sidebar.checkbox(
        "Enable Enterprise Features",
        value=st.session_state.enterprise_mode
    )

    # Dashboard
    if app_mode == "Dashboard":
        st.title("📊 Lead Management Dashboard")
        companies, leads = load_data()

        col1, col2 = st.columns(2)
        with col1:
            st.subheader("Company Database")
            st.dataframe(companies, height=300)

        with col2:
            st.subheader("Sample Leads")
            st.dataframe(leads, height=300)

        st.subheader("Saved Leads Overview")
        if not st.session_state.saved_leads.empty:
            st.write(f"Total Saved Leads: {len(st.session_state.saved_leads)}")

            # Basic stats
            col1, col2, col3 = st.columns(3)
            col1.metric("Companies", len(st.session_state.saved_leads['Company'].unique()))
            if 'Annual Revenue' in st.session_state.saved_leads:
                avg_rev = st.session_state.saved_leads['Annual Revenue'].mean()
                col2.metric("Avg. Revenue", f"${avg_rev/1000000:.2f}M" if not pd.isna(avg_rev) else "N/A")
            if 'Employee Count' in st.session_state.saved_leads:
                avg_emp = st.session_state.saved_leads['Employee Count'].mean()
                col3.metric("Avg. Employees", f"{avg_emp:.0f}" if not pd.isna(avg_emp) else "N/A")

            # Priority distribution
            if 'Priority Score' in st.session_state.saved_leads:
                avg_priority = st.session_state.saved_leads['Priority Score'].mean()
                st.metric("Average Priority Score", f"{avg_priority:.1f} ⭐")

            # Industry distribution
            if 'Technology Stack' in st.session_state.saved_leads:
                st.bar_chart(st.session_state.saved_leads['Technology Stack'].value_counts().head(5))
        else:
            st.info("No saved leads yet. Scrape or import leads to get started.")

    # Scraping functionality
    elif app_mode == "Scrape Leads":
        st.title("🔍 Scrape Leads")
        st.info("Discover new leads from public sources")

        with st.expander("Scraping Parameters", expanded=True):
            col1, col2 = st.columns(2)
            with col1:
                keywords = st.text_input("Technology Keywords")
                min_emp = st.number_input("Min Employees", 0, 10000, 10)
            with col2:
                industry = st.selectbox("Industry", [
                    "All", "SaaS", "FinTech", "Cloud Computing", "HealthTech", "Retail"
                ])
                max_emp = st.number_input("Max Employees", 0, 10000, 500)

        if st.button("Scrape Leads"):
            with st.spinner("Searching public sources..."):
                scraped = scrape_leads(keywords, industry, min_emp, max_emp)
                st.session_state.scraped_data = scraped

        if not st.session_state.scraped_data.empty:
            st.subheader("Scraping Results")
            st.dataframe(st.session_state.scraped_data)

            if st.button("Save All to Dashboard"):
                st.session_state.saved_leads = pd.concat([
                    st.session_state.saved_leads,
                    st.session_state.scraped_data
                ]).drop_duplicates()
                st.success(f"Added {len(st.session_state.scraped_data)} leads to dashboard!")

    # Enrichment functionality
    elif app_mode == "Enrich Leads":
        st.title("✨ Enrich Lead Details")
        st.info("Enhance your leads with additional data points")

        uploaded_file = st.file_uploader(
            "Upload CSV with Leads",
            type=["csv"]
        )

        if uploaded_file is not None:
            leads_df = pd.read_csv(uploaded_file)
            st.subheader("Uploaded Leads")
            st.dataframe(leads_df)

            if st.button("Enrich Leads"):
                with st.spinner("Enriching lead data..."):
                    enriched = enrich_leads(leads_df)
                    st.session_state.enriched_data = enriched

        if 'enriched_data' in st.session_state:
            st.subheader("Enriched Leads")
            st.dataframe(st.session_state.enriched_data)

            if st.button("Save Enriched Leads"):
                st.session_state.saved_leads = pd.concat([
                    st.session_state.saved_leads,
                    st.session_state.enriched_data
                ]).drop_duplicates()
                st.success("Saved enriched leads to dashboard!")

    # Lead management
    elif app_mode == "Manage Leads":
        st.title("💾 Save & Export Leads")

        if not st.session_state.saved_leads.empty:
            st.subheader("Your Saved Leads")

            # Add priority stars to display
            display_df = st.session_state.saved_leads.copy()
            if 'Priority Score' in display_df:
                display_df['Priority'] = display_df['Priority Score'].apply(
                    lambda x: '⭐' * int(x) if not pd.isna(x) else 'N/A'
                )

            edited_df = st.data_editor(
                display_df,
                num_rows="dynamic"
            )

            # Update session state with edited data
            st.session_state.saved_leads = edited_df.drop(columns=['Priority'], errors='ignore')

            col1, col2 = st.columns(2)
            with col1:
                st.download_button(
                    label="Export to CSV",
                    data=edited_df.to_csv(index=False).encode('utf-8'),
                    file_name="saved_leads.csv",
                    mime="text/csv"
                )
            with col2:
                excel_data = to_excel(edited_df)
                st.download_button(
                    label="Export to Excel",
                    data=excel_data,
                    file_name="saved_leads.xlsx",
                    mime="application/vnd.ms-excel"
                )
        else:
            st.info("No saved leads. Scrape or import leads first.")

    # Lead Analysis - NOW VISIBLE
    elif app_mode == "Lead Analysis":
        st.title("📈 Lead Analysis")
        st.info("Analyze and prioritize your leads")

        if st.session_state.saved_leads.empty:
            st.info("No saved leads to analyze. Add leads first.")
            return

        st.subheader("Lead Priority Analysis")
        col1, col2 = st.columns(2)
        with col1:
            plot_priority_distribution(st.session_state.saved_leads)
        with col2:
            plot_revenue_vs_employees(st.session_state.saved_leads)

        st.subheader("Industry Distribution")
        plot_industry_analysis(st.session_state.saved_leads)

        st.subheader("Top Priority Leads")
        if 'Priority Score' in st.session_state.saved_leads:
            top_leads = st.session_state.saved_leads.sort_values('Priority Score', ascending=False)
            top_leads['Priority Stars'] = top_leads['Priority Score'].apply(
                lambda x: '⭐' * int(x) if not pd.isna(x) else 'N/A'
            )
            st.dataframe(
                top_leads[['Company', 'Technology Stack', 'Annual Revenue', 'Employee Count', 'Priority Stars']].head(10),
                height=400
            )
        else:
            st.warning("Priority scores not available. Enrich leads first.")

    # Outreach functionality
    elif app_mode == "Enterprise Outreach":
        st.title("✉️ Enterprise Outreach")

        if not st.session_state.enterprise_mode:
            st.warning("Enterprise features are disabled. Enable in Account Settings")
            return

        if st.session_state.saved_leads.empty:
            st.info("No saved leads. Add leads to your dashboard first.")
            return

        # Select leads for outreach
        st.subheader("Select Leads for Outreach")
        outreach_selection = st.multiselect(
            "Choose leads",
            st.session_state.saved_leads['Company'],
            format_func=lambda x: f"{x} ({st.session_state.saved_leads.loc[st.session_state.saved_leads['Company'] == x, 'Owner Name'].iloc[0]})"
        )

        if outreach_selection:
            selected_leads = st.session_state.saved_leads[
                st.session_state.saved_leads['Company'].isin(outreach_selection)
            ]

            st.subheader("Compose Message")
            email_template = st.text_area(
                "Email Template",
                height=300,
                value=st.session_state.email_template
            )
            st.session_state.email_template = email_template

            # Preview and send
            if st.button("Preview First Email"):
                sample_lead = selected_leads.iloc[0].to_dict()
                preview = email_template.format(**sample_lead)
                st.subheader("Email Preview")
                st.markdown(f"```\n{preview}\n```")

            if st.button("Send to All Selected Leads"):
                progress_bar = st.progress(0)
                status_text = st.empty()

                for i, (_, lead) in enumerate(selected_leads.iterrows()):
                    # In a real implementation, this would actually send emails
                    status_text.text(f"Sending to {lead['Company']} ({lead['Owner Email']})...")
                    progress_bar.progress((i + 1) / len(selected_leads))

                st.success(f"Successfully sent {len(selected_leads)} emails!")

# Function to run Streamlit in a separate thread
def run_streamlit():
    # Write the app to a file
    with open("app.py", "w") as f:
        # Write imports and data
        f.write("import streamlit as st\n")
        f.write("import pandas as pd\n")
        f.write("import numpy as np\n")
        f.write("from io import BytesIO\n")
        f.write("import plotly.express as px\n\n")

        # Write data
        f.write(f"COMPANIES_DATA = '''{COMPANIES_DATA}'''\n")
        f.write(f"LEADS_DATA = '''{LEADS_DATA}'''\n\n")

        # Write function definitions
        functions = [load_data, clean_revenue, calculate_priority_score, init_session_state, scrape_leads,
                    enrich_leads, to_excel, plot_priority_distribution, plot_revenue_vs_employees,
                    plot_industry_analysis, main]

        for func in functions:
            source = inspect.getsource(func)
            f.write(source + "\n\n")

        # Add the main call
        f.write("if __name__ == '__main__':\n")
        f.write("    main()\n")

    # Run Streamlit
    os.system("streamlit run app.py --server.port 8501 --server.headless true")

# Start Streamlit in a separate thread
thread = threading.Thread(target=run_streamlit)
thread.daemon = True
thread.start()

# Wait for Streamlit to start
time.sleep(5)

# Create ngrok tunnel
public_url = ngrok.connect(8501, "http")
print("Streamlit app running at:", public_url)
print("If the app doesn't load immediately, wait 10-20 seconds and refresh")



Streamlit app running at: NgrokTunnel: "https://39c4-35-231-184-230.ngrok-free.app" -> "http://localhost:8501"
If the app doesn't load immediately, wait 10-20 seconds and refresh
