# DataVisuals : Data Handling and Preparations

**This notebook demonstrates the data handling functions**

**import_data()**: Find and list data files in a directory
    - 📊 **load_file()**: Load data files into pandas DataFrames
    - 📋 **get_tables()**: Select which data files to work with
    - 🔍 **projection_columns()**: Select which columns to include in analysis

## 1. Setup and Imports

In [None]:
# Import Libraries
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages

import seaborn as sns
import os
import numpy as np
from scipy import stats

# set seaborn style
sns.set(style="whitegrid")
sns.set_context("talk")

# set display options
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

#set plot styles
plt.rcParams['figure.figsize'] = [10, 6]
plt.rcParams['figure.dpi'] = 100


# Data Imports Function
**imports data from a user-specified directory**

returns a list of valid file paths for supported file types

In [None]:
def import_data(directory_path=None):
    """
    Imports data files from a user-specified directory.
    Returns a list of valid file paths for supported file formats.
    """

    # Supported file formats
    supported_extensions = ['.csv', '.xlsx', '.xls', '.json', '.ods']
    
    # Keep prompting until we get valid files or user quits
    while True:
        # Get directory path if not provided
        if directory_path is None:
            directory_path = input("\n📁 Enter the path to your data files >>> ")
            
            # Allow user to exit
            if directory_path.lower() in ['exit', 'quit', 'q']:
                print("❌ Operation cancelled.")
                return None
        
        # Validate directory
        if not os.path.isdir(directory_path):
            print(f"⚠️ The directory '{directory_path}' does not exist.")
            directory_path = None
            continue
            
        try:
            # Find all supported files
            valid_files = []
            print(f"🔍 Searching for data files in: {directory_path}")
            
            for file in os.listdir(directory_path):
                file_path = os.path.join(directory_path, file)
                if os.path.isfile(file_path) and any(file.endswith(ext) for ext in supported_extensions):
                    valid_files.append(file_path)
            
            # Check if we found any files
            if not valid_files:
                print(f"⚠️ No supported data files found in '{directory_path}'")
                print(f"   Supported formats: {', '.join(supported_extensions)}")
                directory_path = None
                continue
                
            # Success! Return the list of files
            print(f"✅ Found {len(valid_files)} data file(s)")
            return valid_files
            
        except Exception as e:
            print(f"❌ Error accessing directory: {e}")
            directory_path = None