In [None]:
import pandas as pd
import sys
from datetime import datetime

INPUT_CSV = 'data_test0.csv'
OUTPUT_CSV = 'output.csv'

def main():
  df = pd.read_csv(INPUT_CSV)

  id_dict = dict()
  for index, row in df.iterrows():
    curr_id = row['ID']
    curr_start = datetime.strptime(row['start'], '%Y-%m-%d')
    curr_end = datetime.strptime(row['end'], '%Y-%m-%d')
    assert(curr_start <= curr_end)
    if curr_id not in id_dict:
      id_dict[curr_id] = list()

    # Find an interval whose start is before curr_end, and end is after curr_start
    # If found, the interval overlaps with curr, otherwise it's not overlapping
    # If none of the intervals in the list overlaps with the current one, create a new interval
    merge_item = None
    for idx, item in enumerate(id_dict[curr_id]):
      if curr_end < item['start'] or curr_start > item['end']:
        continue
      else:
        merge_item = item
        break

    if not merge_item:
      id_dict[curr_id].append({ 'ID': curr_id, 'start': curr_start, 'end': curr_end, 
                               'source_ticker': row['source_ticker'], 
                               'target_ticker': row['target_ticker'],
                               'source_cusip' : row['source_cusip'],
                               'target_cusip' : row['target_cusip']})
    else:
      merge_item['start'] = min(item['start'], curr_start)
      merge_item['end'] = max(item['end'], curr_end)
      merge_item['source_ticker'] += ';' + row['source_ticker']
      merge_item['target_ticker'] += ';' + row['target_ticker']

  for key in id_dict:
    print(id_dict[key])

  # Output as DataFrame
  out_id = list()
  out_start = list()
  out_end = list()
  out_source_ticker = list()
  out_target_ticker = list()

  for key in id_dict:
    item_list = id_dict[key]
    # Sort by start time
    item_list.sort(key=lambda x:x['start'])
    for item in item_list:
      out_id.append(item['ID'])
      out_start.append(item['start'])
      out_end.append(item['end'])
      out_source_ticker.append(item['source_ticker'])
      out_target_ticker.append(item['target_ticker'])
      out_source_cusip.append(item['source_cusip'])
      out_target_cusip.append(item['target_cusip'])
    
  out_df = pd.DataFrame.from_dict({ 'ID': out_id, 'start': out_start, 'end': out_end, 
                                   'source_ticker': out_source_ticker, 'target_ticker': out_target_ticker,
                                   'source_cusip':out_source_cusip, 'target_cusip':out_target_cusip})
  out_df.to_csv(OUTPUT_CSV)

if __name__ == "__main__":
  sys.exit(main())