In [1]:
import pandas as pd

# Step 1: Read both files into DataFrames
loginlogs_df = pd.read_csv("loginlogs.txt", sep=" ; ", header=None, names=["date", "time", "PC name", "user name"], engine='python')
school_pcs_df = pd.read_csv("School_PCs.txt", sep=" ; ", header=None, names=["Inventory number", "Asset name", "Asset type", "Location", "Responsible", "Position", "Year", "Comment"], engine='python')

# Step 2: Extract inventory numbers from School_PCs.txt
inventory_numbers = school_pcs_df["Inventory number"]

# Step 3: Filter loginlogs_df to include only rows with PC names containing inventory numbers
df = loginlogs_df[loginlogs_df["PC name"].str.contains('|'.join(map(str, inventory_numbers)), case=False)]

# Display the filtered DataFrame
print(df.head())

              date         time      PC name            user name
6   Fri 09/01/2023  10:31:20.62  011108499NU          abdul.wahab
10  Fri 09/01/2023  10:30:41.34  000553011NU     nazym.alipbayeva
12  Fri 09/01/2023  10:28:11.77  000580520NU  eldar.sharafutdinov
25  Fri 09/01/2023  10:20:22.48  000580519NU       aibek.shokayev
26  Fri 09/01/2023  10:19:15.87  NLT069054NU     kassym.talgatuly


In [None]:
# To have untouched df
Original_df = df.copy()

In [None]:
# For resetting df
df = Original_df.copy()

In [None]:
# Create a new column to store the original date values
df.loc[:,'Original_Date'] = df['date'].copy()

# Converting all data in "date" column into Timestamp
date_format_1 = '%a %m/%d/%Y'
date_format_2 = '%d.%m.%Y'
date_format_3 = '%d/%m/%Y'
date_format_4 = '%d-%b-%y'
date_format_5 = '%Y-%m-%d'
date_format_6 = '%m/%d/%Y'

# Applying different date formats
df.loc[:, 'date'] = pd.to_datetime(df['date'], errors='coerce', format=date_format_1).combine_first(
    pd.to_datetime(df['date'], errors='coerce', format=date_format_2)).combine_first(
    pd.to_datetime(df['date'], errors='coerce', format=date_format_3)).combine_first(
    pd.to_datetime(df['date'], errors='coerce', format=date_format_4)).combine_first(
    pd.to_datetime(df['date'], errors='coerce', format=date_format_5)).combine_first(
    pd.to_datetime(df['date'], errors='coerce', format=date_format_6))

# Filling errors with previous non-error value
df['date'] = df['date'].fillna(method='ffill')

df

In [None]:
errors_only = df[df['date'].isna()]
errors_only

In [None]:
date_counts = errors_only['Original_Date'].value_counts(dropna=False)
dict(date_counts)

In [None]:
# Checking if the previous and next values after the NaN are the same

df.reset_index(drop=True, inplace=True)

# Find the indices of rows containing NaT values in the 'date' column
nan_indices = df.index[df['date'].isna()]

# Create a list to store the rows to be extracted
rows_to_extract = []

# Iterate through the NaN indices
for idx in nan_indices:
    # Get the index of the row before the NaN
    prev_idx = idx - 1 if idx > 0 else None
    # Get the index of the row after the NaN
    next_idx = idx + 1 if idx < len(df) - 1 else None

    # Append the rows to the list (including the NaN row and its neighbors)
    if prev_idx is not None:
        rows_to_extract.append(df.iloc[prev_idx])
    rows_to_extract.append(df.iloc[idx])
    if next_idx is not None:
        rows_to_extract.append(df.iloc[next_idx])

# Create a new DataFrame from the extracted rows
result_df = pd.DataFrame(rows_to_extract)
result_df.reset_index(drop=True, inplace=True)

In [None]:
# Calculate the time differences between consecutive rows
df['time_diff'] = df['date'].diff()

# Find the maximum time difference
max_time_diff = df['time_diff'].max()

# Find the rows corresponding to the maximum time difference
max_time_diff_rows = df[df['time_diff'] == max_time_diff]

# Print the result
print("Maximum Time Difference:", max_time_diff)
print("Rows with Maximum Time Difference:")
print(max_time_diff_rows)

In [None]:
dict(df['time_diff'][2000:3000])

In [None]:
count = len(df['date'])
na_count = df['date'].isna().sum()
print(f'Percentage of NaT is: {round(na_count/count*100,2)}%')

In [None]:
df['date'].isna()

In [None]:
date = pd.NaT
df.loc[df["date"] == date, "Original_Date"].iloc[0]

In [None]:
inv_num = "NLT061083NU"
df.loc[df["PC name"] == inv_num, "date"].iloc[2]

In [None]:
filtered_loginlogs_df[filtered_loginlogs_df["user name"] == "abay.kasken"]

In [None]:
inv_num = "NLT060945"
school_pcs_df[school_pcs_df["Inventory number"] == inv_num]

In [None]:
date = pd.Timestamp('2065-02-23 00:00:00')

df[df["date"] == date]

In [None]:
date_counts = df['date'].value_counts(dropna=False)
dict(date_counts)