# Data Dictionary CSV Inspection

This notebook loads and inspects the data dictionary CSV file to verify the Excel to CSV conversion.

In [1]:
import pandas as pd
import numpy as np

# Set display options for better viewing
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 100)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', 50)

ModuleNotFoundError: No module named 'pandas'

In [None]:
# Load the CSV file
csv_file = "StemCellRegistry Data Dictionary September 2025 version(Sheet1).csv"
df = pd.read_csv(csv_file)

print(f"Data shape: {df.shape}")
print(f"Columns: {len(df.columns)}")
print(f"Rows: {len(df)}")

In [None]:
# Display basic info about the DataFrame
print("=== DATAFRAME INFO ===")
df.info()

In [None]:
# Display column names
print("=== COLUMN NAMES ===")
for i, col in enumerate(df.columns):
    print(f"{i+1:2d}. {col}")

In [None]:
# Display first few rows
print("=== FIRST 10 ROWS ===")
df.head(10)

In [None]:
# Check for any missing values
print("=== MISSING VALUES ===")
missing_summary = df.isnull().sum()
print(missing_summary[missing_summary > 0])

if missing_summary.sum() == 0:
    print("No missing values found!")

In [None]:
# Check data types
print("=== DATA TYPES ===")
df.dtypes

In [None]:
# Look for any potential encoding issues or special characters
print("=== POTENTIAL ENCODING ISSUES ===")
for col in df.columns:
    if df[col].dtype == 'object':
        # Check for non-ASCII characters
        non_ascii = df[col].astype(str).str.contains(r'[^\x00-\x7F]', na=False)
        if non_ascii.any():
            print(f"Column '{col}' contains non-ASCII characters in {non_ascii.sum()} rows")
            # Show a few examples
            examples = df.loc[non_ascii, col].head(3)
            for idx, val in examples.items():
                print(f"  Row {idx}: {repr(val)}")

print("\nEncoding check complete.")

In [None]:
# Display the full DataFrame for manual inspection
print("=== FULL DATAFRAME ===")
df