In [3]:
import pandas as pd
import re
import datetime
from tqdm import tqdm
import os
import csv

In [4]:
def convert_hour(hour):
    hours_dict = {}
    entries = hour.split("; ")
    weekdays = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
    if len(entries) == 1 and '-' in entries[0]:
        hour_string = entries[0]
        if "Open 24 hours" in hour_string:
            for day in weekdays:
                hours_dict[day] = [['00:01', 'AM', '23:59', 'PM']]
        else:
            times = re.findall(r'\d{1,2}:\d{2}', hour_string)
            hour_types = re.findall(r'\b(?:AM|PM)\b', hour_string)

            if times and len(times) == 2:
                opening_time = datetime.datetime.strptime(times[0], '%H:%M')
                opening_hour = opening_time.strftime('%I:%M')
                closing_time = datetime.datetime.strptime(times[1], '%H:%M')
                closing_hour = closing_time.strftime('%I:%M')

                # If the time doesn't include AM/PM, add them based on 24-hour time
                if not hour_types:
                    opening_type = 'PM' if int(times[0].split(':')[0]) >= 12 else 'AM'
                    closing_type = 'PM' if int(times[1].split(':')[0]) >= 12 else 'AM'
                else:
                    opening_type = hour_types[0]
                    closing_type = hour_types[1]

                # Assign the same time for each day from Monday to Saturday
                for day in ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday']:
                    hours_dict[day] = [[opening_hour, opening_type, closing_hour, closing_type]]
    else:
        # Handle the case where there are multiple day-specific entries with potential multiple ranges per day
        for entry in entries:
            match = re.match(r"(?P<day>[A-Za-z\s\(\)]+),\s*(?P<time>.+)", entry)
            if match:
                day = match.group("day").strip()
                day =  day.split(' ')[0]
                time_string = match.group("time").strip()

                if "Open 24 hours" in time_string:
                    hours_dict[day] = [['00:01', 'AM', '23:59', 'PM']]
                else:
                    # Split the time ranges for the day
                    time_ranges = time_string.split(", ")
                    time_list = []

                    for time_range in time_ranges:
                        times = re.findall(r'\d{1,2}(?::\d{2})?', time_range)
                        hour_types = re.findall(r'\b(?:AM|PM)\b', time_range)

                        if times and len(times) == 2:
                            # Parsing opening time
                            opening_time = datetime.datetime.strptime(times[0], '%I:%M' if ':' in times[0] else '%I')
                            opening_hour = opening_time.strftime('%I:%M')
                            opening_type = hour_types[0] if hour_types else opening_time.strftime('%p')

                            # Parsing closing time
                            closing_time = datetime.datetime.strptime(times[1], '%I:%M' if ':' in times[1] else '%I')
                            closing_hour = closing_time.strftime('%I:%M')
                            closing_type = hour_types[1] if len(hour_types) > 1 else closing_time.strftime('%p')

                            # Append this time range to the list for the day
                            time_list.append([opening_hour, opening_type, closing_hour, closing_type])

                    # Add the list of time ranges to the dictionary for the day
                    hours_dict[day] = time_list

    hours_dict = {day: hours_dict[day] for day in weekdays if day in hours_dict}

    return hours_dict

In [5]:

source_path = r"D:\DATA\2024\Oct\Output\Thu cong\11_10_2024_main_hour_(success).csv"
success_path = r"D:\DATA\2024\Oct\Output\Thu cong\11_10_2024_hour.csv"

df = pd.read_csv(source_path)

In [6]:
print(df['code'].nunique())
df = df.dropna(subset=['hour'])
print(df['code'].nunique())

301
263


In [7]:
def process_row(row):
    hours_dict = convert_hour(row['hour'])
    sorted_dict = dict(sorted(hours_dict.items(), key=lambda x: datetime.datetime.strptime(x[0], '%A')))
    results = []

    for day, time_ranges in sorted_dict.items():
        for time_range in time_ranges:
            if time_range is None:
                continue
            opening_time = time_range[0]
            opening_period = time_range[1]
            closing_time = time_range[2]
            closing_period = time_range[3]
            result_entry = {
                'code': row['code'],
                'day': day,
                'openingHour': opening_time,
                'openingType': opening_period,
                'closingHour': closing_time,
                'closingType': closing_period,
                'orignalname': row['orignalname'],
            }
            results.append(result_entry)
    
    return results

In [8]:
tqdm.pandas()
df['hours_results'] = df.progress_apply(process_row, axis=1)

hours_result = [item for sublist in df['hours_results'] for item in sublist]

hours_result =  pd.DataFrame(hours_result)
hours_result.to_csv(success_path, index=False)

100%|██████████| 263/263 [00:00<00:00, 5157.19it/s]
