In [151]:
import json
from datetime import datetime
from functools import reduce
from collections import defaultdict
from pyspark import SparkContext
from pyspark.sql import functions as F
from pyspark.sql.window import Window
from awsglue.context import GlueContext

gc = GlueContext(SparkContext.getOrCreate())

ddf = gc.create_dynamic_frame_from_options("s3"\
                                          , {"paths": ["s3://wfercosta-spark/logs/DAILY_20220117.csv"]}\
                                          ,"csv"\
                                          ,{'withHeader':True})
        
df = ddf.toDF()

df = df.withColumn('date', F.to_date(df.date))
df = df.withColumn('timestamp', F.to_timestamp(df.timestamp))
df = df.withColumn('response_time', df.response_time.cast('int'))

df.show()

df.printSchema()

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

+----------+------------+--------------------+-------+--------------------+------+-------------+-------------------+
|      date|     context|              family|version|            resource|status|response_time|          timestamp|
+----------+------------+--------------------+-------+--------------------+------+-------------+-------------------+
|2021-05-11|open-banking|            accounts|     v1|/accounts/{id}/ba...|   200|          112|2022-01-17 13:50:00|
|2021-05-11|open-banking|            accounts|     v1|/accounts/{id}/ba...|   200|          112|2022-01-17 13:51:00|
|2021-05-11|open-banking|            accounts|     v1|/accounts/{id}/ba...|   200|          112|2022-01-17 13:52:00|
|2021-05-11|open-banking|            accounts|     v1|/accounts/{id}/ba...|   200|          112|2022-01-17 13:53:00|
|2021-05-11|open-banking|            accounts|     v1|/accounts/{id}/ba...|   500|          112|2022-01-17 13:54:00|
|2021-05-11|open-banking|               loans|     v1|          

In [152]:

df_latest = df

column_list = ['context', 'family', 'version', 'resource']

window = Window().partitionBy([F.col(x) for x in column_list]).orderBy([\
                                                                          F.col('family')
                                                                        , F.col('version')
                                                                        , F.col('resource')\
                                                                        , F.col('timestamp').desc()])

df_latest = df_latest.withColumn('row', F.row_number().over(window))\
    .filter(F.col('row') == 1)

df_latest.show()



FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

+----------+--------------+--------------------+-------+--------------------+------+-------------+-------------------+---+
|      date|       context|              family|version|            resource|status|response_time|          timestamp|row|
+----------+--------------+--------------------+-------+--------------------+------+-------------+-------------------+---+
|2021-05-11|  open-banking|               loans|     v1|          /contracts|   500|          112|2022-01-17 13:54:00|  1|
|2021-05-11|open-insurance|credit-cards-acco...|     v1|      /accounts/{id}|   200|          112|2022-01-17 13:54:00|  1|
|2021-05-11|  open-finance|            accounts|     v1|/accounts/{id}/ba...|   500|          112|2022-01-17 13:54:00|  1|
|2021-05-11|open-insurance|            accounts|     v1|/accounts/{id}/ba...|   200|          112|2022-01-17 13:54:00|  1|
|2021-05-11|  open-finance|credit-cards-acco...|     v1|      /accounts/{id}|   500|          112|2022-01-17 13:54:00|  1|
|2021-05-11|  op

In [153]:


df_status = df_latest

is_unavailable = lambda status: (status >= 500) & (status < 600)

df_status = df_status.withColumn('unavailable', is_unavailable(df_status.status))
df_status = df_status.withColumn('resource', F.concat(F.lit('/'), df_status.context\
                                                      , F.lit('/'), df_status.family\
                                                      , F.lit('/'), df_status.version\
                                                      , df_status.resource))

df_status = df_status.select(['context', 'resource', 'unavailable'])

elements = df_status.toJSON().map(lambda j: json.loads(j)).collect()

print(json.dumps(elements, indent=2))

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

[
  {
    "context": "open-banking",
    "resource": "/open-banking/loans/v1/contracts",
    "unavailable": true
  },
  {
    "context": "open-insurance",
    "resource": "/open-insurance/credit-cards-accounts/v1/accounts/{id}",
    "unavailable": false
  },
  {
    "context": "open-finance",
    "resource": "/open-finance/accounts/v1/accounts/{id}/balances",
    "unavailable": true
  },
  {
    "context": "open-insurance",
    "resource": "/open-insurance/accounts/v1/accounts/{id}/balances",
    "unavailable": false
  },
  {
    "context": "open-finance",
    "resource": "/open-finance/credit-cards-accounts/v1/accounts/{id}",
    "unavailable": true
  },
  {
    "context": "open-finance",
    "resource": "/open-finance/loans/v1/contracts",
    "unavailable": false
  },
  {
    "context": "open-banking",
    "resource": "/open-banking/accounts/v1/accounts/{id}/balances",
    "unavailable": true
  },
  {
    "context": "open-insurance",
    "resource": "/open-insurance/credit-cards-acco

In [154]:

def group_by_context(acc, el):
    context = el['context']
    del el['context']
    acc[context].append(el)
    return acc

grouped = dict(reduce(group_by_context, elements, defaultdict(list)))
grouped


FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

{'open-banking': [{'resource': '/open-banking/loans/v1/contracts', 'unavailable': True}, {'resource': '/open-banking/accounts/v1/accounts/{id}/balances', 'unavailable': True}, {'resource': '/open-banking/credit-cards-accounts/v1/accounts/{id}', 'unavailable': True}, {'resource': '/open-banking/credit-cards-accounts/v1/accounts', 'unavailable': True}], 'open-insurance': [{'resource': '/open-insurance/credit-cards-accounts/v1/accounts/{id}', 'unavailable': False}, {'resource': '/open-insurance/accounts/v1/accounts/{id}/balances', 'unavailable': False}, {'resource': '/open-insurance/credit-cards-accounts/v1/accounts', 'unavailable': False}, {'resource': '/open-insurance/loans/v1/contracts', 'unavailable': False}], 'open-finance': [{'resource': '/open-finance/accounts/v1/accounts/{id}/balances', 'unavailable': True}, {'resource': '/open-finance/credit-cards-accounts/v1/accounts/{id}', 'unavailable': True}, {'resource': '/open-finance/loans/v1/contracts', 'unavailable': False}, {'resource':

In [155]:
def get_status(total, unavailables):
    diff =  total - unavailables
    
    if unavailables > 0:
        if diff == 0:
            return 'UNAVAILABLE'
        else:
            return 'PARTIAL_FAILURE'
    else:
        return 'OK'


statuses = list()

for key, values in grouped.items():
    count_unavailables = reduce(lambda acc, el: acc + (1 if el['unavailable'] else 0), values, 0)
    status = get_status(len(elements), count_unavailables)
    unavailables = list(map(lambda el: el['resource'], filter(lambda el: el['unavailable'], values)))
    document = {'context': key, 'status': status, 'unavailableEndpoints': unavailables, 'detectionTime': datetime.now().isoformat()}
    statuses.append(document)
    

print(json.dumps(statuses, indent=2))

FloatProgress(value=0.0, bar_style='info', description='Progress:', layout=Layout(height='25px', width='50%'),…

[
  {
    "context": "open-banking",
    "status": "PARTIAL_FAILURE",
    "unavailableEndpoints": [
      "/open-banking/loans/v1/contracts",
      "/open-banking/accounts/v1/accounts/{id}/balances",
      "/open-banking/credit-cards-accounts/v1/accounts/{id}",
      "/open-banking/credit-cards-accounts/v1/accounts"
    ],
    "detectionTime": "2022-01-17T21:44:17.927572"
  },
  {
    "context": "open-insurance",
    "status": "OK",
    "unavailableEndpoints": [],
    "detectionTime": "2022-01-17T21:44:17.927634"
  },
  {
    "context": "open-finance",
    "status": "PARTIAL_FAILURE",
    "unavailableEndpoints": [
      "/open-finance/accounts/v1/accounts/{id}/balances",
      "/open-finance/credit-cards-accounts/v1/accounts/{id}"
    ],
    "detectionTime": "2022-01-17T21:44:17.927672"
  }
]