# SciTeX OS Tutorial

This notebook demonstrates how to use the `scitex.os` module for operating system utilities.

## Features Covered

* Safe file and directory moving operations
* Automatic directory creation
* Error handling for file operations
* Cross-platform compatibility
* Integration with scientific workflows

## 1. Basic Setup and Imports

In [None]:
import os
import tempfile
import shutil
from pathlib import Path
from scitex import os as stx_os

print("SciTeX OS Tutorial")
print("Available functions:", dir(stx_os))
print(f"Working directory: {os.getcwd()}")

## 2. Creating Test Environment

Let's create a temporary directory structure for our demonstrations.

In [None]:
# Create temporary directory for testing
temp_dir = tempfile.mkdtemp(prefix="scitex_os_test_")
print(f"Created temporary directory: {temp_dir}")

# Create source directory structure
source_dir = Path(temp_dir) / "source"
source_dir.mkdir(exist_ok=True)

# Create test files
test_files = [
    "experiment_data.csv",
    "analysis_results.json",
    "research_notes.txt",
    "model_weights.pkl"
]

for filename in test_files:
    file_path = source_dir / filename
    with open(file_path, 'w') as f:
        f.write(f"This is test content for {filename}\n")
        f.write(f"Created for SciTeX OS module demonstration\n")
        f.write(f"File size: {len(filename)} characters in name\n")

print(f"\nCreated test files in {source_dir}:")
for file_path in source_dir.iterdir():
    print(f"  - {file_path.name} ({file_path.stat().st_size} bytes)")

## 3. Basic File Moving Operations

### Moving Individual Files

In [None]:
# Create destination directory
dest_dir = Path(temp_dir) / "processed_data"
print(f"Target directory: {dest_dir}")

# Move a single file
source_file = source_dir / "experiment_data.csv"
print(f"\nMoving file: {source_file}")
print(f"File exists before move: {source_file.exists()}")

# Use scitex.os.mv to move the file
success = stx_os.mv(str(source_file), str(dest_dir))
print(f"Move operation successful: {success}")

# Check results
print(f"\nFile exists at source after move: {source_file.exists()}")
moved_file = dest_dir / "experiment_data.csv"
print(f"File exists at destination: {moved_file.exists()}")

if moved_file.exists():
    print(f"Moved file size: {moved_file.stat().st_size} bytes")
    with open(moved_file, 'r') as f:
        content = f.read()
        print(f"First line of moved file: {content.split(chr(10))[0]}")

### Moving Multiple Files

In [None]:
# Create different destination directories for organization
results_dir = Path(temp_dir) / "analysis_results"
models_dir = Path(temp_dir) / "model_artifacts"
notes_dir = Path(temp_dir) / "documentation"

print("Moving files to organized directories...")

# Move analysis results
analysis_file = source_dir / "analysis_results.json"
if analysis_file.exists():
    print(f"\nMoving analysis results...")
    success = stx_os.mv(str(analysis_file), str(results_dir))
    print(f"Analysis results move successful: {success}")

# Move model weights
model_file = source_dir / "model_weights.pkl"
if model_file.exists():
    print(f"\nMoving model weights...")
    success = stx_os.mv(str(model_file), str(models_dir))
    print(f"Model weights move successful: {success}")

# Move research notes
notes_file = source_dir / "research_notes.txt"
if notes_file.exists():
    print(f"\nMoving research notes...")
    success = stx_os.mv(str(notes_file), str(notes_dir))
    print(f"Research notes move successful: {success}")

# Check final directory structure
print(f"\nFinal directory structure:")
for root, dirs, files in os.walk(temp_dir):
    level = root.replace(temp_dir, '').count(os.sep)
    indent = ' ' * 2 * level
    print(f"{indent}{os.path.basename(root)}/")
    subindent = ' ' * 2 * (level + 1)
    for file in files:
        print(f"{subindent}{file}")

## 4. Error Handling and Edge Cases

### Handling Non-existent Files

In [None]:
# Try to move a non-existent file
nonexistent_file = source_dir / "nonexistent_file.txt"
destination = Path(temp_dir) / "error_test"

print(f"Attempting to move non-existent file: {nonexistent_file}")
print(f"File exists: {nonexistent_file.exists()}")

success = stx_os.mv(str(nonexistent_file), str(destination))
print(f"Move operation result: {success}")
print("Notice: The function handles the error gracefully and returns False")

### Overwriting Existing Files

In [None]:
# Create a new file in source
new_file = source_dir / "test_overwrite.txt"
with open(new_file, 'w') as f:
    f.write("Original content - version 1")

# Create a file with same name in destination
overwrite_dest = Path(temp_dir) / "overwrite_test"
overwrite_dest.mkdir(exist_ok=True)
existing_file = overwrite_dest / "test_overwrite.txt"
with open(existing_file, 'w') as f:
    f.write("Existing content - version 2")

print(f"Original file content: {open(new_file, 'r').read()}")
print(f"Existing file content: {open(existing_file, 'r').read()}")

# Move and overwrite
print(f"\nMoving file to location with existing file...")
success = stx_os.mv(str(new_file), str(overwrite_dest))
print(f"Move operation successful: {success}")

# Check result
if existing_file.exists():
    final_content = open(existing_file, 'r').read()
    print(f"Final file content: {final_content}")
    print(f"Source file still exists: {new_file.exists()}")

## 5. Practical Scientific Workflow Examples

### Experiment Data Organization

In [None]:
# Simulate a scientific experiment workflow
experiment_dir = Path(temp_dir) / "experiment_2024"
experiment_dir.mkdir(exist_ok=True)

# Create different types of experimental files
raw_data_files = [
    "raw_data_day1.csv",
    "raw_data_day2.csv", 
    "raw_data_day3.csv"
]

processed_files = [
    "processed_data_day1.json",
    "processed_data_day2.json",
    "processed_data_day3.json"
]

analysis_files = [
    "statistical_analysis.xlsx",
    "visualization_plots.png",
    "final_report.pdf"
]

# Create all files with some content
all_files = raw_data_files + processed_files + analysis_files
for filename in all_files:
    file_path = experiment_dir / filename
    with open(file_path, 'w') as f:
        f.write(f"Scientific data for {filename}\n")
        f.write(f"Experiment date: 2024-07-02\n")
        f.write(f"Researcher: SciTeX User\n")
        f.write(f"File type: {filename.split('.')[-1]}\n")

print(f"Created {len(all_files)} experimental files")

# Organize files by type using scitex.os.mv
organization_structure = {
    "raw_data": raw_data_files,
    "processed_data": processed_files,
    "analysis_results": analysis_files
}

print("\nOrganizing files by type...")
for folder_name, file_list in organization_structure.items():
    target_dir = Path(temp_dir) / "organized_experiment" / folder_name
    print(f"\nCreating {folder_name} directory...")
    
    for filename in file_list:
        source_file = experiment_dir / filename
        if source_file.exists():
            print(f"  Moving {filename}...")
            success = stx_os.mv(str(source_file), str(target_dir))
            if success:
                print(f"    ✓ Successfully moved {filename}")
            else:
                print(f"    ✗ Failed to move {filename}")

# Display final organization
print("\nFinal organized structure:")
organized_dir = Path(temp_dir) / "organized_experiment"
if organized_dir.exists():
    for root, dirs, files in os.walk(organized_dir):
        level = root.replace(str(organized_dir), '').count(os.sep)
        indent = '  ' * level
        print(f"{indent}{os.path.basename(root)}/")
        subindent = '  ' * (level + 1)
        for file in files:
            print(f"{subindent}{file}")

### Batch Processing Pipeline

In [None]:
# Simulate a batch processing pipeline
print("Simulating batch processing pipeline...")

# Create input directory with batch files
input_dir = Path(temp_dir) / "batch_input"
input_dir.mkdir(exist_ok=True)

# Create processing stages
stages = {
    "stage1_preprocessing": Path(temp_dir) / "stage1",
    "stage2_analysis": Path(temp_dir) / "stage2",
    "stage3_results": Path(temp_dir) / "stage3"
}

# Create batch files
batch_files = [f"batch_file_{i:03d}.data" for i in range(1, 6)]
for filename in batch_files:
    file_path = input_dir / filename
    with open(file_path, 'w') as f:
        f.write(f"Batch data for {filename}\n")
        f.write(f"Processing stage: input\n")
        f.write(f"Created: 2024-07-02\n")

print(f"Created {len(batch_files)} batch files")

# Process through pipeline stages
current_files = [(input_dir / f, f) for f in batch_files]

for stage_name, stage_dir in stages.items():
    print(f"\nProcessing {stage_name}...")
    next_files = []
    
    for file_path, filename in current_files:
        if file_path.exists():
            # Simulate processing by modifying filename
            processed_filename = filename.replace('.data', f'_{stage_name}.data')
            
            print(f"  Processing {filename} -> {processed_filename}")
            
            # Create processed file
            temp_processed = file_path.parent / processed_filename
            with open(temp_processed, 'w') as f:
                with open(file_path, 'r') as original:
                    f.write(original.read())
                f.write(f"Processed by: {stage_name}\n")
            
            # Move to stage directory
            success = stx_os.mv(str(temp_processed), str(stage_dir))
            if success:
                next_files.append((stage_dir / processed_filename, processed_filename))
                print(f"    ✓ Moved to {stage_name}")
            else:
                print(f"    ✗ Failed to move to {stage_name}")
    
    current_files = next_files

# Show final pipeline results
print("\nPipeline processing complete!")
print("Final stage results:")
final_stage_dir = stages["stage3_results"]
if final_stage_dir.exists():
    for file_path in final_stage_dir.iterdir():
        print(f"  - {file_path.name}")
        # Show processing history
        with open(file_path, 'r') as f:
            lines = f.readlines()
            processing_lines = [line.strip() for line in lines if 'Processed by:' in line]
            print(f"    Processing history: {', '.join(processing_lines)}")

## 6. Integration with Other SciTeX Modules

### Combining with Path Management

In [None]:
# Create a more complex workflow combining file operations with path management
from pathlib import Path
import datetime

print("Demonstrating integration with path management...")

# Create timestamp-based directories
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
base_dir = Path(temp_dir) / f"timestamped_workflow_{timestamp}"
base_dir.mkdir(exist_ok=True)

# Define project structure
project_structure = {
    "data": ["input", "intermediate", "output"],
    "results": ["plots", "tables", "reports"],
    "logs": ["processing", "errors", "debug"]
}

# Create initial files in a staging area
staging_dir = base_dir / "staging"
staging_dir.mkdir(exist_ok=True)

# Generate test files with scientific naming convention
test_files = {
    "experiment_001_raw.csv": "data/input",
    "experiment_001_processed.json": "data/intermediate",
    "experiment_001_final.xlsx": "data/output",
    "plot_distribution.png": "results/plots",
    "summary_table.csv": "results/tables",
    "final_report.pdf": "results/reports",
    "processing.log": "logs/processing",
    "error.log": "logs/errors"
}

# Create files in staging
for filename, target_path in test_files.items():
    file_path = staging_dir / filename
    with open(file_path, 'w') as f:
        f.write(f"Content for {filename}\n")
        f.write(f"Target location: {target_path}\n")
        f.write(f"Created: {timestamp}\n")

print(f"Created {len(test_files)} files in staging area")

# Organize files using scitex.os.mv
print("\nOrganizing files into project structure...")
for filename, target_path in test_files.items():
    source_file = staging_dir / filename
    target_dir = base_dir / target_path
    
    if source_file.exists():
        print(f"Moving {filename} to {target_path}...")
        success = stx_os.mv(str(source_file), str(target_dir))
        if success:
            print(f"  ✓ Successfully organized {filename}")
        else:
            print(f"  ✗ Failed to organize {filename}")

# Display final project structure
print("\nFinal project structure:")
for root, dirs, files in os.walk(base_dir):
    level = root.replace(str(base_dir), '').count(os.sep)
    indent = '  ' * level
    print(f"{indent}{os.path.basename(root)}/")
    subindent = '  ' * (level + 1)
    for file in files:
        print(f"{subindent}{file}")

# Calculate organization statistics
total_files = sum(len(files) for _, _, files in os.walk(base_dir))
total_dirs = sum(len(dirs) for _, dirs, _ in os.walk(base_dir))
print(f"\nOrganization complete: {total_files} files in {total_dirs} directories")

## 7. Best Practices and Tips

### Safe File Operations

In [None]:
print("Best Practices for Safe File Operations:")
print("=" * 40)

# Best practice 1: Always check if source exists
def safe_move_with_check(source, destination):
    """Demonstrate safe file moving with pre-checks."""
    source_path = Path(source)
    dest_path = Path(destination)
    
    print(f"\nAttempting to move: {source_path.name}")
    
    # Check if source exists
    if not source_path.exists():
        print(f"  ✗ Source file does not exist: {source}")
        return False
    
    # Check if source is readable
    if not os.access(source, os.R_OK):
        print(f"  ✗ Source file is not readable: {source}")
        return False
    
    # Check destination directory permissions
    dest_parent = dest_path.parent if dest_path.suffix else dest_path
    if dest_parent.exists() and not os.access(dest_parent, os.W_OK):
        print(f"  ✗ Destination directory is not writable: {dest_parent}")
        return False
    
    # Perform the move
    try:
        success = stx_os.mv(str(source), str(destination))
        if success:
            print(f"  ✓ Successfully moved {source_path.name}")
        else:
            print(f"  ✗ Move operation failed for {source_path.name}")
        return success
    except Exception as e:
        print(f"  ✗ Exception during move: {e}")
        return False

# Test safe move function
test_file = Path(temp_dir) / "safe_move_test.txt"
with open(test_file, 'w') as f:
    f.write("Test content for safe move demonstration")

safe_dest = Path(temp_dir) / "safe_destination"
success = safe_move_with_check(str(test_file), str(safe_dest))

print(f"\nSafe move demonstration result: {success}")

### Backup Strategy

In [None]:
def move_with_backup(source, destination, backup_suffix=".backup"):
    """Move file with automatic backup of existing destination."""
    import shutil
    
    source_path = Path(source)
    dest_path = Path(destination)
    
    # If destination is a directory, construct full file path
    if dest_path.is_dir() or (not dest_path.suffix and not dest_path.exists()):
        dest_path = dest_path / source_path.name
    
    print(f"Moving {source_path.name} to {dest_path}")
    
    # Create backup if destination exists
    if dest_path.exists():
        backup_path = dest_path.with_suffix(dest_path.suffix + backup_suffix)
        print(f"  Creating backup: {backup_path.name}")
        shutil.copy2(dest_path, backup_path)
    
    # Perform the move
    success = stx_os.mv(str(source), str(destination))
    
    if success:
        print(f"  ✓ Move completed successfully")
    else:
        print(f"  ✗ Move failed")
        # Restore backup if move failed and backup exists
        if dest_path.exists():
            backup_path = dest_path.with_suffix(dest_path.suffix + backup_suffix)
            if backup_path.exists():
                print(f"  Restoring backup...")
                shutil.move(backup_path, dest_path)
    
    return success

# Test backup strategy
print("\nTesting backup strategy:")

# Create original file
original_file = Path(temp_dir) / "important_data.txt"
with open(original_file, 'w') as f:
    f.write("Important original data that should be backed up")

# Create new file to move
new_file = Path(temp_dir) / "updated_data.txt"
with open(new_file, 'w') as f:
    f.write("Updated data that will replace the original")

# Move with backup
backup_dest = Path(temp_dir) / "backup_demo"
backup_dest.mkdir(exist_ok=True)
shutil.copy2(original_file, backup_dest / "important_data.txt")  # Create existing file

success = move_with_backup(str(new_file), str(backup_dest))

# Check results
print(f"\nBackup demonstration results:")
for file_path in backup_dest.iterdir():
    print(f"  {file_path.name}: {file_path.stat().st_size} bytes")

## 8. Performance Considerations

In [None]:
import time

print("Performance testing scitex.os.mv vs standard shutil.move:")
print("=" * 55)

# Create test files for performance comparison
perf_test_dir = Path(temp_dir) / "performance_test"
perf_test_dir.mkdir(exist_ok=True)

# Create multiple test files
n_files = 10
test_files = []
for i in range(n_files):
    filename = f"perf_test_file_{i:03d}.txt"
    file_path = perf_test_dir / filename
    with open(file_path, 'w') as f:
        # Create some content
        f.write(f"Performance test file {i}\n" * 100)
    test_files.append(file_path)

print(f"Created {n_files} test files")

# Test scitex.os.mv performance
scitex_dest = Path(temp_dir) / "scitex_perf_dest"
start_time = time.time()
scitex_success_count = 0

for file_path in test_files[:n_files//2]:
    success = stx_os.mv(str(file_path), str(scitex_dest))
    if success:
        scitex_success_count += 1

scitex_time = time.time() - start_time

# Test standard shutil.move performance
shutil_dest = Path(temp_dir) / "shutil_perf_dest"
shutil_dest.mkdir(exist_ok=True)
start_time = time.time()
shutil_success_count = 0

for file_path in test_files[n_files//2:]:
    try:
        shutil.move(str(file_path), str(shutil_dest))
        shutil_success_count += 1
    except Exception as e:
        pass

shutil_time = time.time() - start_time

# Report results
print(f"\nPerformance Results:")
print(f"SciTeX os.mv:")
print(f"  Time: {scitex_time:.4f} seconds")
print(f"  Success rate: {scitex_success_count}/{n_files//2} ({100*scitex_success_count/(n_files//2):.1f}%)")
print(f"  Files per second: {scitex_success_count/scitex_time:.2f}")

print(f"\nStandard shutil.move:")
print(f"  Time: {shutil_time:.4f} seconds")
print(f"  Success rate: {shutil_success_count}/{n_files//2} ({100*shutil_success_count/(n_files//2):.1f}%)")
print(f"  Files per second: {shutil_success_count/shutil_time:.2f}")

print(f"\nKey Differences:")
print(f"- SciTeX os.mv includes automatic directory creation")
print(f"- SciTeX os.mv provides consistent error handling")
print(f"- SciTeX os.mv returns success/failure status")
print(f"- Both use the same underlying file system operations")

## 9. Cleanup and Summary

In [None]:
# Cleanup temporary directory
print("Cleaning up temporary files...")
try:
    shutil.rmtree(temp_dir)
    print(f"✓ Successfully cleaned up {temp_dir}")
except Exception as e:
    print(f"✗ Cleanup failed: {e}")

print("\n" + "="*50)
print("SciTeX OS Module Summary")
print("="*50)

summary = """
The scitex.os module provides enhanced file operations with the following features:

📁 Core Function: mv(source, destination)
   • Moves files and directories safely
   • Automatically creates destination directories
   • Returns success/failure status
   • Handles errors gracefully

✅ Key Advantages:
   • Automatic directory creation (no need for os.makedirs)
   • Consistent error handling and reporting
   • Print statements for operation feedback
   • Simple boolean return values
   • Cross-platform compatibility

🔧 Best Practices:
   • Always check return values for success/failure
   • Verify source file existence before operations
   • Use absolute paths when possible
   • Implement backup strategies for important data
   • Test with small files before batch operations

🚀 Use Cases:
   • Organizing experimental data
   • Batch processing pipelines
   • Project structure management
   • Automated file workflows
   • Research data organization

The module is designed for scientific workflows where reliable file operations
and clear feedback are essential for reproducible research.
"""

print(summary)