# LMS Component Verification
## EM626 Project 2 - Midterm

This notebook verifies the components extracted from ChatGPT and provides summary statistics.

In [1]:
import json
import pandas as pd
from typing import Dict, List

print("‚úì Libraries imported successfully")

  from pandas.core.computation.check import NUMEXPR_INSTALLED
  from pandas.core import (


‚úì Libraries imported successfully


In [2]:
with open('extracted_components.json', 'r') as f:
    data = json.load(f)

components = data['components']

print(f"‚úÖ Loaded {len(components)} components from JSON")

‚úÖ Loaded 22 components from JSON


In [3]:
print("="*60)
print("EXTRACTION RESULTS SUMMARY")
print("="*60)
print(f"\nTotal Components: {len(components)}")

# Count by type
types = {}
for comp in components:
    comp_type = comp['type']
    types[comp_type] = types.get(comp_type, 0) + 1

print("\nüìä Components by Type:")
for comp_type, count in sorted(types.items(), key=lambda x: x[1], reverse=True):
    print(f"  ‚Ä¢ {comp_type.capitalize()}: {count}")

# Count by criticality
criticality = {}
for comp in components:
    crit = comp['criticality']
    criticality[crit] = criticality.get(crit, 0) + 1

print("\n‚ö†Ô∏è  Components by Criticality:")
for crit in ['Critical', 'High', 'Medium', 'Low']:
    count = criticality.get(crit, 0)
    if count > 0:
        print(f"  ‚Ä¢ {crit}: {count}")

EXTRACTION RESULTS SUMMARY

Total Components: 22

üìä Components by Type:
  ‚Ä¢ Application: 10
  ‚Ä¢ Infrastructure: 4
  ‚Ä¢ Network: 3
  ‚Ä¢ Storage: 3
  ‚Ä¢ Security: 2

‚ö†Ô∏è  Components by Criticality:
  ‚Ä¢ Critical: 8
  ‚Ä¢ High: 9
  ‚Ä¢ Medium: 5


In [4]:
# Out-degree: Number of dependencies each component has
dep_counts = {}
for comp in components:
    dep_counts[comp['id']] = len(comp.get('dependencies', []))

print("\nüì§ Top 5 Components with Most Dependencies (Out-Degree):")
sorted_deps = sorted(dep_counts.items(), key=lambda x: x[1], reverse=True)
for i, (comp_id, count) in enumerate(sorted_deps[:5], 1):
    comp_name = next(c['name'] for c in components if c['id'] == comp_id)
    print(f"  {i}. {comp_name}: {count} dependencies")

# In-degree: Number of components that depend on this one
in_degree = {comp['id']: 0 for comp in components}
for comp in components:
    for dep in comp.get('dependencies', []):
        if dep in in_degree:
            in_degree[dep] += 1

print("\nüì• Top 5 Most Depended-Upon Components (In-Degree):")
sorted_in = sorted(in_degree.items(), key=lambda x: x[1], reverse=True)
for i, (comp_id, count) in enumerate(sorted_in[:5], 1):
    comp_name = next(c['name'] for c in components if c['id'] == comp_id)
    print(f"  {i}. {comp_name}: {count} components depend on it")


üì§ Top 5 Components with Most Dependencies (Out-Degree):
  1. API Gateway Service: 7 dependencies
  2. Application Load Balancer: 6 dependencies
  3. Content Delivery Service: 3 dependencies
  4. Content Delivery Network: 3 dependencies
  5. User Management Service: 2 dependencies

üì• Top 5 Most Depended-Upon Components (In-Degree):
  1. Primary Database Cluster: 10 components depend on it
  2. Cache Layer: 6 components depend on it
  3. Object Storage: 5 components depend on it
  4. User Management Service: 2 components depend on it
  5. Course Management Service: 2 components depend on it


In [5]:
print("\n" + "="*60)
print("VALIDATION CHECKS")
print("="*60)

all_ids = {comp['id'] for comp in components}
errors = []
warnings = []

# Check 1: All dependencies exist
for comp in components:
    for dep in comp.get('dependencies', []):
        if dep not in all_ids:
            errors.append(f"Component '{comp['id']}' has invalid dependency '{dep}'")

# Check 2: No self-dependencies
for comp in components:
    if comp['id'] in comp.get('dependencies', []):
        warnings.append(f"Component '{comp['id']}' depends on itself")

# Check 3: Duplicate IDs
id_counts = {}
for comp in components:
    comp_id = comp['id']
    id_counts[comp_id] = id_counts.get(comp_id, 0) + 1

for comp_id, count in id_counts.items():
    if count > 1:
        errors.append(f"Duplicate component ID: '{comp_id}' appears {count} times")

# Check 4: Required fields
required_fields = ['id', 'name', 'type', 'technology', 'purpose', 'criticality', 'dependencies']
for i, comp in enumerate(components):
    for field in required_fields:
        if field not in comp:
            errors.append(f"Component {i} missing required field: '{field}'")

# Report results
if errors:
    print("\n‚ùå ERRORS FOUND:")
    for error in errors:
        print(f"  - {error}")
else:
    print("\n‚úÖ No errors found!")

if warnings:
    print("\n‚ö†Ô∏è  WARNINGS:")
    for warning in warnings:
        print(f"  - {warning}")

if not errors and not warnings:
    print("‚úÖ All validations passed! Data is clean.")

print("="*60)


VALIDATION CHECKS

‚úÖ No errors found!
‚úÖ All validations passed! Data is clean.


In [8]:
# Create a DataFrame
df = pd.DataFrame([
    {
        'ID': comp['id'],
        'Name': comp['name'],
        'Type': comp['type'],
        'Technology': comp['technology'],
        'Criticality': comp['criticality'],
        'Dependencies': len(comp.get('dependencies', [])),
        'Depended Upon': in_degree[comp['id']]
    }
    for comp in components
])

# Sort by "Depended Upon" (in-degree)
df = df.sort_values('Depended Upon', ascending=False)

print("\nüìã All Components (sorted by In-Degree):")
print(df.to_string(index=False))      


üìã All Components (sorted by In-Degree):
                                  ID                                   Name           Type                                    Technology Criticality  Dependencies  Depended Upon
            primary_database_cluster               Primary Database Cluster        storage                   AWS RDS PostgreSQL Multi-AZ    Critical             0             10
                         cache_layer                            Cache Layer        storage                       AWS ElastiCache (Redis)        High             0              6
                      object_storage                         Object Storage        storage                AWS S3 with lifecycle policies    Critical             0              5
            discussion_forum_service               Discussion Forum Service    application                            Node.js on AWS ECS      Medium             2              2
             user_management_service                User Managemen

In [10]:
summary = {
    'total_components': len(components),
    'by_type': types,
    'by_criticality': criticality,
    'top_dependencies': [
        {
            'id': comp_id,
            'name': next(c['name'] for c in components if c['id'] == comp_id),
            'count': count
        }
        for comp_id, count in sorted_deps[:5]
    ],
    'top_depended_upon': [
        {
            'id': comp_id,
            'name': next(c['name'] for c in components if c['id'] == comp_id),
            'count': count
        }
        for comp_id, count in sorted_in[:5]
    ]
}

with open('extraction_summary.json', 'w') as f:
    json.dump(summary, f, indent=2)

print("‚úÖ Summary statistics saved to '../outputs/extraction_summary.json'")

‚úÖ Summary statistics saved to '../outputs/extraction_summary.json'
