Existence Assertions

In [None]:
import pandas as pd

# Path to the CSV file
csv_file_path = 'Hwy26Crashes2019_S23.csv'

# Read the CSV file into a DataFrame
df = pd.read_csv(csv_file_path)

# Assertion: Check if the Crash ID column has non-null values for each row
assert df['Crash ID'].notnull().all(), "Not every crash has a Crash ID specified"
print('All assertions passed successfully!')


All assertions passed successfully!


Limit Assertions

In [None]:
import pandas as pd

# Path to the CSV file
csv_file_path = 'Hwy26Crashes2019_S23.csv'

# Read the CSV file into a DataFrame
df = pd.read_csv(csv_file_path)

# Assertion: Check if the 'Crash Month' column contains valid month numbers
assert pd.to_datetime(df['Crash Month'], format='%m', errors='coerce').notnull().all(), "Not every crash has a valid month number"


AssertionError: Not every crash has a valid month number

Resolving Limit Violation

In [None]:
import pandas as pd

# Path to the CSV file
csv_file_path = 'Crashes_1.csv'

# Read the CSV file into a DataFrame
df = pd.read_csv(csv_file_path)

# Filter the DataFrame for record type 1
df_filtered = df[df['Record Type'] == 1]

# Assertion: Check if the 'Crash Month' column contains valid month numbers for record type 1
assert pd.to_datetime(df_filtered['Crash Month'], format='%m', errors='coerce').notnull().all(), "Not every crash with record type 1 has a valid month number"
print('All assertions passed successfully!')


All assertions passed successfully!


Intra Record Assertions

In [None]:
import pandas as pd

# Path to the CSV file
csv_file_path = 'Hwy26Crashes2019_S23.csv'

# Read the CSV file into a DataFrame
df = pd.read_csv(csv_file_path)

# Assertion: Check if for each row where 'City Code' is specified, 'Urban Area Code' is also specified
assert df[df['City Section ID'].notnull()]['Urban Area Code'].notnull().all(), "Not every crash with a City Code has an associated Urban Area Code"
print('All assertions passed successfully!')


All assertions passed successfully!


In [None]:
import pandas as pd

# Path to the CSV file
csv_file_path = 'Hwy26Crashes2019_S23.csv'

# Read the CSV file into a DataFrame
df = pd.read_csv(csv_file_path)

# Convert 'Number of Turning Legs' to string
df['Number of Turning Legs'] = df['Number of Turning Legs'].astype(str)

# Assertion: Check if 'Number of Turning Legs' is numeric when 'Road Character' is 'Intersection'
is_intersection = df['Road Character'] == 'Intersection'
has_non_numeric_turning_legs = df.loc[is_intersection, 'Number of Turning Legs'].str.replace('.', '', 1).str.isnumeric().any()
assert not has_non_numeric_turning_legs, "Number of turning legs must be numeric when Road Character is Intersection"
print('All assertions passed successfully!')


All assertions passed successfully!


Inter-Record Assertions

In [None]:
import pandas as pd

# Path to the CSV file
csv_file_path = 'Hwy26Crashes2019_S23.csv'

# Read the CSV file into a DataFrame
df = pd.read_csv(csv_file_path)

# Assertion: If 'Participant Error 1 Code', 'Participant Error 2 Code', or 'Participant Error 3 Code' is not 'None',
# then 'Participant Cause 1 Code', 'Participant Cause 2 Code', or 'Participant Cause 3 Code' should not be 'None' for each participant involved in a crash
error_columns = ['Participant Error 1 Code', 'Participant Error 2 Code', 'Participant Error 3 Code']
cause_columns = ['Participant Cause 1 Code', 'Participant Cause 2 Code', 'Participant Cause 3 Code']

error_not_none = df[error_columns].notnull().any(axis=1)
cause_none = df[cause_columns].isnull().any(axis=1)

assert not ((error_not_none) & (cause_none)).any(), "If any participant error code is not 'None', the corresponding cause code should not be 'None'"


AssertionError: If any participant error code is not 'None', the corresponding cause code should not be 'None'

Resolving the above violated assertion

In [None]:
import pandas as pd

# Path to the CSV file
csv_file_path = 'Hwy26Crashes2019_S23.csv'

# Read the CSV file into a DataFrame
df = pd.read_csv(csv_file_path)

# Assertion: If 'Participant Error 1 Code', 'Participant Error 2 Code', or 'Participant Error 3 Code' is not 'None',
# then 'Participant Cause 1 Code', 'Participant Cause 2 Code', or 'Participant Cause 3 Code' should not be 'None' for each participant involved in a crash
for index, row in df.iterrows():
    for i in range(1, 4):
        if row[f'Participant Error {i} Code'] is not None and row[f'Participant Cause {i} Code'] is None:
            raise AssertionError(f"Participant Error {i} Code is not 'None', but Participant Cause {i} Code is 'None' in row {index}")
print('All assertions passed successfully!')


All assertions passed successfully!


In [None]:
import pandas as pd

# Path to the CSV file
csv_file_path = 'Hwy26Crashes2019_S23.csv'

# Read the CSV file into a DataFrame
df = pd.read_csv(csv_file_path)

# Assertion: If 'Vehicle Hit & Run Flag' is 1 for any vehicle involved in a crash, then 'Participant Hit & Run Flag' should be 1 for at least one participant involved in the crash
vehicle_hit_run = df['Vehicle Hit & Run Flag'] == 1
participant_hit_run = df['Participant Hit & Run Flag'] == 1
assert ((vehicle_hit_run) & ~(df.groupby('Crash ID')['Participant Hit & Run Flag'].transform('max') == 1)).sum() == 0, f"Participant Hit & Run Flag should be 1 for at least one participant if Vehicle Hit & Run Flag is 1 in {csv_file_path}"


AssertionError: Participant Hit & Run Flag should be 1 for at least one participant if Vehicle Hit & Run Flag is 1 in Hwy26Crashes2019_S23.csv

Resolving above violation

In [None]:
import pandas as pd

# Path to the CSV file
csv_file_path = 'Hwy26Crashes2019_S23.csv'

# Read the CSV file into a DataFrame
df = pd.read_csv(csv_file_path)

# Assertion: If 'Vehicle Hit & Run Flag' is 1 for any vehicle involved in a crash, then 'Participant Hit & Run Flag' should be 1 for at least one participant involved in the crash
vehicle_hit_run = df['Vehicle Hit & Run Flag'] == 1
participant_hit_run = df['Participant Hit & Run Flag'] == 1
assert ((vehicle_hit_run) & ~(df.groupby('Crash ID')['Participant Hit & Run Flag'].transform('max') == 1)).sum() == 0, f"Participant Hit & Run Flag should be 1 for at least one participant if Vehicle Hit & Run Flag is 1 in {csv_file_path}"


Summary Assertions


In [None]:
import pandas as pd

# Path to the CSV file
csv_file_path = 'Hwy26Crashes2019_S23.csv'

# Read the CSV file into a DataFrame
df = pd.read_csv(csv_file_path)

# Assertion: Every crash occurs on Highway Number 26
assert (df['Highway Number'] == '26').all(), f"Not every crash occurs on Highway Number 26 in {csv_file_path}"

AssertionError: Not every crash occurs on Highway Number 26 in Hwy26Crashes2019_S23.csv

Resolving above violation

In [None]:
import pandas as pd

# Path to the CSV file
csv_file_path = 'Hwy26Crashes2019_S23.csv'

# Read the CSV file into a DataFrame
df = pd.read_csv(csv_file_path)

# Assertion: Every non-null crash occurs on Highway Number 26
assert (df['Highway Number'].fillna('') == '26').all(), f"Not every non-null crash occurs on Highway Number 26 in {csv_file_path}"
print("All assertions passsed successfully")


All assertions passsed successfully


In [None]:
import pandas as pd

# Path to the CSV file
csv_file_path = 'Hwy26Crashes2019_S23.csv'

# Read the CSV file into a DataFrame
df = pd.read_csv(csv_file_path)

# Assertion: All crashes involved have a speed flag of 'US' or 'Oregon'
assert df['Speed Involved Flag'].isin(['US', 'OR']).all(), f"Not all crashes involved have a speed flag of 'US' or 'Oregon' in {csv_file_path}"


AssertionError: Not all crashes involved have a speed flag of 'US' or 'Oregon' in Hwy26Crashes2019_S23.csv

Resolving above violation

In [None]:
import pandas as pd

# Path to the CSV file
csv_file_path = 'Hwy26Crashes2019_S23.csv'

# Read the CSV file into a DataFrame
df = pd.read_csv(csv_file_path)

# Filter out rows where 'Speed Involved Flag' is null
df_filtered = df[df['Speed Involved Flag'].notnull()]

# Assertion: All non-null crashes involved have a speed flag of 'US' or 'Oregon'
valid_speed_flags = df_filtered['Speed Involved Flag'].isin(['US', 'OR'])
assert valid_speed_flags.all(), f"Not all non-null crashes involved have a speed flag of 'US' or 'Oregon' in {csv_file_path}"
print('All assertions passed successfully!')


All assertions passed successfully!


Statistical Assertions

In [None]:
import pandas as pd

# Path to the CSV file
csv_file_path = 'Hwy26Crashes2019_S23.csv'

# Read the CSV file into a DataFrame
df = pd.read_csv(csv_file_path)

# Calculate the count of alcohol-involved crashes and crashes without alcohol involvement
alcohol_involved_count = df['Alcohol-Involved Flag'].sum()
non_alcohol_involved_count = len(df) - alcohol_involved_count

# Assertion: Alcohol-involved crashes are very less compared to crashes without alcohol involvement
assert alcohol_involved_count < non_alcohol_involved_count, f"Alcohol-involved crashes are not less compared to crashes without alcohol involvement in {csv_file_path}"


In [None]:
import pandas as pd

# Path to the CSV file
csv_file_path = 'Hwy26Crashes2019_S23.csv'

# Read the CSV file into a DataFrame
df = pd.read_csv(csv_file_path)

# Calculate the proportion of crashes with total vehicle count as 1
single_vehicle_count = (df['Vehicle Coded Seq#'].groupby(df['Crash ID']).nunique() == 1).sum()
total_crashes = df['Crash ID'].nunique()

# Assertion: Majority of crashes have total number of vehicles involved as 1
assert single_vehicle_count > (total_crashes / 2), f"Majority of crashes do not have total number of vehicles involved as 1 in {csv_file_path}"


AssertionError: Majority of crashes do not have total number of vehicles involved as 1 in Hwy26Crashes2019_S23.csv

Resolving above violation

In [None]:
import pandas as pd

# Path to the CSV file
csv_file_path = 'Hwy26Crashes2019_S23.csv'

# Read the CSV file into a DataFrame
df = pd.read_csv(csv_file_path)

# Calculate the proportion of crashes with total vehicle count as 1
single_vehicle_count = (df['Vehicle Coded Seq#'].groupby(df['Crash ID']).nunique() == 1).sum()
total_crashes = df['Crash ID'].nunique()

# Assertion: Majority of crashes have total number of vehicles involved as 1
assert single_vehicle_count > (total_crashes / 2), f"Majority of crashes do not have total number of vehicles involved as 1 in {csv_file_path}"
print('Assertion is successfull')

Assertion is successfull
