In [1]:
from datetime import datetime, timedelta
import pandas as pd

In [3]:
instance_df = pd.read_csv('INSTANCE_DETAILS.txt', delimiter='|', low_memory=False)
volume_df = pd.read_csv('VOLUME_DETAILS.txt', delimiter='|', low_memory=False)
snapshot_df = pd.read_csv('SNAPSHOT_DETAILS.txt', delimiter='|', low_memory=False)


current_time = datetime.now()
threshold = current_time - timedelta(days=2)

instance_df = instance_df[['InstanceId', 'Name', 'AvailabilityZone', 'State', 'acc_id', 
                           'Launch_date_Time', 'BackupID']]

# Rename columns
instance_df.rename(columns={
    'AvailabilityZone': 'AvailabilityZoneInstance',
    'acc_id': 'account_id_instance',
    'Name': 'Instance Name'
}, inplace=True)
instance_df.head()
instance_df = instance_df[instance_df['BackupID'].str.startswith('EBS', na=False)]

volume_df = volume_df[['VolumeId', 'InstanceId', 'CreateTime', 'Name']]
volume_df.rename(columns={'Name': 'Volume Name'}, inplace=True)

snapshot_df = snapshot_df[['SnapshotId', 'VolumeId', 'Launch_Time', 'Name']]
volume_df.rename(columns={'Name': 'Snapshot Name'}, inplace=True)

print(f"Length of instance df : {len(instance_df)}")
print(f"Length of volume df : {len(volume_df)}")
print(f"Length of snapshot df : {len(snapshot_df)}")


Length of instance df : 1204
Length of volume df : 5318
Length of snapshot df : 420809


In [19]:
date_range = [(threshold + timedelta(days=i)).strftime('%Y-%m-%d') for i in range((current_time - threshold).days + 1)]
date_range


['2025-01-11', '2025-01-12', '2025-01-13']

In [28]:
missing_data = []
# Iterate through each row in instance_df
for index, row in instance_df.iterrows():
    # Get InstanceId for the current instance
    instance_id = row['InstanceId']
    
    # Filter volume_df to get the volumes related to the current instance_id
    related_volumes = volume_df[volume_df['InstanceId'] == instance_id]
    
    # Extract VolumeId list
    volume_ids = related_volumes['VolumeId'].tolist()
    
    # Print the InstanceId and associated volume list
    # print(f"InstanceId: {instance_id}, Volumes: {volume_ids}")
    volumes_with_less_snapshots = dict()
    # For each VolumeId, get the snapshots from snapshot_df
    for volume_id in volume_ids:
        # Filter snapshot_df to get snapshots related to the current volume_id
        # Sort the related snapshots by 'Launch_Time' in ascending order
        related_snapshots_top_3 = snapshot_df[snapshot_df['VolumeId'] == volume_id].sort_values(by='Launch_Time', ascending=False)
        
        # Iterate through each snapshot and get the SnapshotId and Launch_Time
        for _, snapshot_row in related_snapshots_top_3.iterrows():
            snapshot_id = snapshot_row['SnapshotId']
            launch_time = snapshot_row['Launch_Time']
            
            # Print the VolumeId with SnapshotId and Launch_Time
            # print(f"  VolumeId: {volume_id}, SnapshotId: {snapshot_id}, Launch_Time: {launch_time}")
            # print("*"*200)
            # Check if the number of snapshots is less than 3
            if len(related_snapshots_top_3) < 3:
                
                snap_detail_date = related_snapshots_top_3[['SnapshotId', 'Launch_Time']].to_dict(orient='records')
                print(snap_detail_date)
                # Add the volume details to the dictionary
                volumes_with_less_snapshots[volume_id] = {
                    'Snapshots Available': len(related_snapshots_top_3),
                    'Snapshots Details': snap_detail_date,
                }

                # print(volumes_with_less_snapshots)
                break

[{'SnapshotId': 'snap-078ca87976b290a49', 'Launch_Time': '2025-01-13 05:42:18'}]
[{'SnapshotId': 'snap-087b0e32c886333bd', 'Launch_Time': '2025-01-13 05:41:44'}]
[{'SnapshotId': 'snap-0aab8b9fe0c2f36ba', 'Launch_Time': '2025-01-13 05:43:25'}]
[{'SnapshotId': 'snap-011f0887d525245bc', 'Launch_Time': '2025-01-13 05:42:51'}]


In [40]:
missing_data = []
for index, row in instance_df.iterrows():
    instance_id = row['InstanceId']
    
    related_volumes = volume_df[volume_df['InstanceId'] == instance_id]
    
    volume_ids = related_volumes['VolumeId'].tolist()
    
    volumes_with_less_snapshots = dict()

    for volume_id in volume_ids:
        related_snapshots = snapshot_df[snapshot_df['VolumeId'] == volume_id].sort_values(by='Launch_Time', ascending=False)
        # Assuming snap_detail_date is a list of dictionaries
        snap_detail_date = related_snapshots[['SnapshotId', 'Launch_Time']].to_dict(orient='records')
        
        # Use list comprehension to extract SnapshotId and split Launch_Time
        snapshot_details_launch_time = [snapshot_row['Launch_Time'].split()[0] for snapshot_row in snap_detail_date]

        
        snapshot_details_launch_time_set = set(snapshot_details_launch_time)

        # Find the dates in snapshot_details_launch_time that are not in date_range
        missing_dates = set(date_range) - snapshot_details_launch_time_set 

        if missing_dates:
            # Create the dictionary for the missing data
            missing_data_dict = {
                'VolumeId': volume_id,
                'InstanceId': instance_id,
                'Instance Name': row['Instance Name'],
                'Launch_date_Time': row['Launch_date_Time'],
                'AccountId_Instance': row['account_id_instance'],
                'CreateTime': related_volumes[related_volumes['VolumeId'] == volume_id]['CreateTime'].values[0],
                'AvailabilityZoneInstance': row['AvailabilityZoneInstance'],
                'Missing Dates': list(missing_dates)
            }
            
            # Append the dictionary to missing_data
            missing_data.append(missing_data_dict)

In [43]:
from rich.table import Table
from rich.console import Console

# Initialize a Console object for printing
console = Console()

# Create a table
table = Table(show_header=True, header_style="bold magenta")
table.add_column("VolumeId", style="dim")
table.add_column("InstanceId", style="dim")
table.add_column("Instance Name", style="dim")
table.add_column("Launch_date_Time", style="dim")
table.add_column("AccountId_Instance", style="dim")
table.add_column("CreateTime", style="dim")
table.add_column("AvailabilityZoneInstance", style="dim")
table.add_column("Missing Dates", style="dim")

# Loop through missing_data and add rows to the table
for data in missing_data:
    table.add_row(
        str(data['VolumeId']),  # Convert to string if not already
        str(data['InstanceId']),  # Convert to string if not already
        str(data['Instance Name']),  # Convert to string if not already
        str(data['Launch_date_Time']),  # Convert to string if not already
        str(data['AccountId_Instance']),  # Convert to string if not already
        str(data['CreateTime']),  # Convert to string if not already
        str(data['AvailabilityZoneInstance']),  # Convert to string if not already
        ", ".join([str(date) for date in data['Missing Dates']])  # Ensure Missing Dates are converted to strings
    )

# Print the table using the console
console.print(table)