<a href="https://colab.research.google.com/github/xdderekchen/ML/blob/master/Test_API_IXINBUY.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Demonstation of Web API of ttData

ttdata provides webservices for users to access the powerful block-chain based services.

This post has 2 purposes:
 1. Samples for API calls
 2. Testing of API and the webservice

## 1.1. Version 1

In this version, we will test the following Web API functions
 * **(POST)** http://URL:port/register
 * **(POST)** http://URL:port/uploaddata
 * **(POST)** http://URL:port/uploadbatchdata
 * **(GET)**  http://URL:port/getshareddata

### Setup Server URL

In [0]:
baseURL = "http://108.48.52.183:7061/"   #production server

#baseURL = "http://ixinbuy.com:7061/"    #Azure Server

DEVICE_ID = "XD_COVID19_V1"

### register  (endpoint: /register)

In [19]:
import requests
import json

URL = baseURL +"register"

payload = {"device_id":DEVICE_ID}

#Passing payload as dict
responce = requests.post(URL, data = payload)
print(responce.status_code)
print(responce.text)


ConnectionError: ignored

### uploaddata (single data) (endpoint: /uploaddata)

In [14]:
import requests
import json

URL = baseURL + "uploaddata"

# data_type": 2 for COVID19 data
payload = {
   "data_type": 2, 
   "device_id": DEVICE_ID,
   "device_data": {
          "date": "2019-01-05",
          "province_state": "Shanghai00",
          "country_region": "Mainland China",
          "last_update": "2019-01-07T10:43:02",
          "confirmed":100,
          "deaths": 7,
          "recovered": 1237,
          "latitude":  23.3417,
          "longitude": 113.4244,
          "key": "country_region|province_state|date"
   }
}


#Passing payload as dict
responce = requests.post(URL, json = payload)
print(responce.status_code)
print(responce.text)


200
data uploaded successfully


### uploadbatchdata (multiple data) (endpoint:/uploadbatchdata)

In [15]:
import requests
import json

URL = baseURL + "uploadbatchdata"

# data_type": 2 for COVID19 data
payload ={'data_type': 2, 
          'device_id': DEVICE_ID,
          'device_data': {
              'data': 
              [{"date": "2020-03-26", 
                "fips": "45001", 
                "county": "Abbeville", 
                "province_state": "South Carolina", 
                "country_region": "US", 
                "last_update": "2020-03-26 23:48:35", 
                "latitude": 34.22333378, 
                "longitude": -82.46170658, 
                "confirmed": 3, "deaths": 0, "recovered": 0, "active": 0
                }, 
               {"date": "2020-03-26", 
                "fips": "22001", 
                "county": "Acadia", 
                "province_state": "Louisiana", 
                "country_region": "US", 
                "last_update": "2020-03-26 23:48:35", 
                "latitude": 30.2950649, 
                "longitude": -92.41419698, 
                "confirmed": 3, "deaths": 0, "recovered": 0, "active": 0}
               ], 
               'key': 'country_region|province_state|county|date'}
          }


#Passing payload as dict
responce = requests.post(URL, json = payload)
print(responce.elapsed.total_seconds())
print(responce.status_code)
print(responce.text)


1.228218
200
COVID19 data uploaded successfully


### function to standarize names.

In [16]:
import numpy as np
import pandas as pd
import os 

def transform_names(df):
    '''
       need to have standard names
    '''
    df.columns = [x.lower() for x in df.columns]
    df.rename(columns={'province/state': 'province_state',
                   'admin2'            : 'county',
                   'country/region'    : 'country_region',
                   'last update'       : 'last_update',
                   "long_":          "longitude",
                   "long":          "longitude",
                   "lat":           "latitude",
                   "lat_":           "latitude",
                  }, inplace=True)
    
    if ("combined_key" in df.columns):
       df.drop(columns=['combined_key'], inplace=True)
       
    keycolumn = "country_region|province_state|date"

    if ("county" in df.columns):
        keycolumn = "country_region|province_state|county|date"
  
    return (df, keycolumn)

#######################################################################
##### Testing
def run_test(date= "03-20-2020"):
    github_root = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports"
    file = os.path.join(github_root, date + ".csv")
    df = pd.read_csv(file ,dtype = {'FIPS': str, 'ZIP': str, "ZIP_CODE": str})
    df, key = transform_names(df)
    print(key)
    print(df.columns)

    row = df.iloc[1]
    print(json.loads(row.to_json()))

run_test("03-20-2020")  #Old format
run_test("03-26-2020")  #new format


country_region|province_state|date
Index(['province_state', 'country_region', 'last_update', 'confirmed',
       'deaths', 'recovered', 'latitude', 'longitude'],
      dtype='object')
{'province_state': None, 'country_region': 'Italy', 'last_update': '2020-03-20T17:43:03', 'confirmed': 47021, 'deaths': 4032, 'recovered': 4440, 'latitude': 41.8719, 'longitude': 12.5674}
country_region|province_state|county|date
Index(['fips', 'county', 'province_state', 'country_region', 'last_update',
       'latitude', 'longitude', 'confirmed', 'deaths', 'recovered', 'active'],
      dtype='object')
{'fips': '22001', 'county': 'Acadia', 'province_state': 'Louisiana', 'country_region': 'US', 'last_update': '2020-03-26 23:48:35', 'latitude': 30.2950649, 'longitude': -92.41419698, 'confirmed': 3, 'deaths': 0, 'recovered': 0, 'active': 0}


### Covid19data_to_ttdata

We will pull real data from John Hopkins and invoke the webservice call.

The source data is located at "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports"
    

In [17]:
from pprint import pprint 
import pandas as pd
import numpy as np
import os
import requests
import json

def covid19data_to_ttdata(source_file, URL, data_date, blocksize=100):
    print("processing: ", source_file)
    df = pd.read_csv(source_file, dtype = {'FIPS': str, 'ZIP': str, "ZIP_CODE": str})
    df, key = transform_names(df)
    errorCount = 0
    
    totalsize = df.shape[0]
    new=[list(range(i, min(i+blocksize, totalsize))) for i in range(0,totalsize,blocksize) ]

    for grp in new:
        json_data_list = []
        for i in grp:
            row = df.iloc[i]
            #row_as_dic = row.to_dict()
            device_data = {"date":data_date }
            device_data.update(json.loads(row.to_json()))
            json_data_list.append(device_data)
        
        print("\n", grp[0])
        payload = {
             "data_type": 2, 
             "device_id": DEVICE_ID,
             "device_data": {
                "data": json_data_list,
                "key" : key
                          }
              }
        #print(payload)
        responce = requests.post(URL, json = payload)
        print(responce.elapsed.total_seconds())
        print(responce.status_code)
        print(responce.text)

    #if (responce.status_code == 200) and ( responce.text == "true"):
    #    print(f"\nSuccess")
    #else:
    #    print(f"\nStatus_code: {responce.status_code} , Text: {responce.text}", flush=True)
    
      
       

github_root = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports"


file = os.path.join(github_root, "03-27-2020" + ".csv")

import time
ts = time.time()
source_file, URL, data_date, blocksize

d = covid19data_to_ttdata( source_file= file, 
                           URL = baseURL + "uploadbatchdata", 
                           data_date = "2020-03-27", 
                           blocksize = 100)
te = time.time()
print('%s  %2.2f sec' % ("uploaddata", (te - ts) ))
 


processing:  https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/03-27-2020.csv

 0
1.294735
200
COVID19 data uploaded successfully

 100
0.971246
500
COVID19 data upload failed

 200
1.291372
500
COVID19 data upload failed

 300
1.156452
500
COVID19 data upload failed

 400
1.079583
500
COVID19 data upload failed

 500
1.084234
500
COVID19 data upload failed

 600
0.997396
500
COVID19 data upload failed

 700
1.07808
500
COVID19 data upload failed

 800
1.140626
500
COVID19 data upload failed

 900
1.038625
500
COVID19 data upload failed

 1000
1.020187
500
COVID19 data upload failed

 1100
1.000899
500
COVID19 data upload failed

 1200
1.17559
500
COVID19 data upload failed

 1300
1.1638
500
COVID19 data upload failed

 1400
0.965958
500
COVID19 data upload failed

 1500
1.086838
500
COVID19 data upload failed

 1600
1.046624
500
COVID19 data upload failed

 1700
1.064857
500
COVID19 data upload failed

 1800
1.230886
500
CO

### getshareddata (GET)

This is the method to retrieve data from server



In [8]:
import pandas as pd
import os
import requests
import json

query = {"data_type": 2, "device_id":DEVICE_ID , "from_date": "2020-02-27"}

req = requests.get(baseURL + "getshareddata", params=query)
print(req.url)
print(req.status_code)
print(req)
print(req.text)
#data = req.json()

#df = pd.DataFrame(data)
#df
#df = df[~df['country_region'].isin(["China000", "China111"])]
#df = df.drop_duplicates(subset=["country_region", "province_state", "date" ], keep='last')
#df = df.query('country_region=="Italy"')

#df_agg=df.groupby(["country_region", "date"]).agg({'confirmed': ['sum'], 'deaths': ['sum'], 'recovered': ['sum']})
#df_agg.columns = ['confirmed', 'deaths', 'recovered']
#df_agg.reset_index(inplace=True)

#df_agg["fatality_rate"] = df_agg["deaths"] / df_agg["confirmed"]

#df_confirm = df_agg.sort_values(by='confirmed', ascending=False).head(120)

#df_confirm

#df[ (df["country_region"]=="US") & (df["province_state"] == "New York")]

http://108.48.52.183:7061/getshareddata?data_type=2&device_id=XD_COVID19_V1&from_date=2020-02-27
200
<Response [200]>
You are not authorized to view the data


### visualization

In [0]:
import seaborn as sns
import matplotlib.pyplot as plt
sns.set(style="whitegrid")

df_fatal_rate = df_confirm.sort_values(by='fatality_rate', ascending=False)

# Initialize the matplotlib figure
f, (ax1, ax2) = plt.subplots(figsize=(16, 14), ncols=2)


sns.barplot(x="confirmed", y="country_region", data=df_confirm,
            label="confirmed", color="b",  ax=ax1)
ax1.set_title("Top 20 countries by the number of confirmed cases")

sns.barplot(x="fatality_rate", y="country_region", data=df_fatal_rate,
            label="fatality rate", color="y",  ax=ax2)
ax2.set_title("Fatality Rate, for top 20 countries by the number of confirmed cases")

In [0]:
import plotly.express as px

df['province_state'] = df['province_state'].fillna(df['country_region'])
fig = px.scatter_mapbox(data_frame=df, lat="latitude", lon="longitude", hover_data=["confirmed", "deaths"],  
                        size="confirmed", zoom=2, height=1000, hover_name="province_state" )
fig.update_layout(mapbox_style="open-street-map")
fig.show()

In [0]:
#!pip install bert-extractive_summarizer

In [0]:
from summarizer import Summarizer

body = 'Such a scandal as the prosecution of a brother for forgery—with a verdict of guilty—is a most truly horrible, deplorable, fatal thing. It takes the respectability out of a family perhaps at a critical moment, when the family is just assuming the robes of respectability: […] it is a black spot which all the soaps ever advertised could never wash off.'
body2 = 'Something else you want to summarize with BERT'
model = Summarizer()
print(model(body))
print(model(body2))


str ='''COVID-19 is a disease caused by a respiratory virus first identified in Wuhan, Hubei Province, China in December 2019. COVID-19 is a new virus that hasn't caused illness in humans before. Worldwide, COVID-19 has resulted in thousands of infections, 
causing illness and in some cases death. Cases have spread to countries throughout the world, with more cases reported daily.
'''

model=Summarizer()
abstract1_summary = model(str)

full_abstract = ''.join(abstract1_summary)
print(full_abstract)


Such a scandal as the prosecution of a brother for forgery—with a verdict of guilty—is a most truly horrible, deplorable, fatal thing.
Something else you want to summarize with BERT
COVID-19 is a disease caused by a respiratory virus first identified in Wuhan, Hubei Province, China in December 2019. COVID-19 is a new virus that hasn't caused illness in humans before.


# Combined Code

The following code can be copied to a single python file. Then run it from a command line.

Please remember to set *interactive=False* in the code [
**exit(main(interactive=False))** ]

In [0]:
import requests
import json
import numpy as np
import pandas as pd
import os 
import time
from datetime import datetime
import logging
import argparse


def showtime(tstart):
    """
    Show runtime duration since tstart

    parameters
    ----------
    tstart: datetime, since this time, the duration is calculated
    Returns
    -------
    out : duration in ms
    """
    te = time.time()
    return f"{int((te - tstart) * 1000)} ms"

def decorator_time(method):
    """
    Decorator function. Show runtime duration for the wrapped function.
    """
    def timed(*args, **kw):
        ts = time.time()
        result = method(*args, **kw)
        te = time.time()
        if 'log_time' in kw:
            name = kw.get('log_name', method.__name__.upper())
            kw['log_time'][name] = f"{int((te - ts) * 1000)} ms"
        else:
            print('%r  %2.2f ms' % \
                  (method.__name__, (te - ts) * 1000)
                  )
        return result
    return timed

class ttDataBase(object):
      def __init__(self, server_url="http://ixinbuy.com/", log_file="ttData_log.csv"):
        self.Server_URL = server_url
        self._Device_ID  = ""
        self._Datatype   = 0
        self.logger = logging.getLogger("ttDataBase_Test")
        formatter   = logging.Formatter('%(asctime)s|%(name)s|%(message)s')
    
        fileHandler = logging.FileHandler(log_file, mode='w')
        fileHandler.setFormatter(formatter)
        streamHandler = logging.StreamHandler()
        streamHandler.setFormatter(formatter)
        level=logging.INFO
        self.logger.setLevel(level)
        self.logger.addHandler(fileHandler)
        self.logger.addHandler(streamHandler)  

      @property
      def Device_ID(self):
          return self._Device_ID

      @Device_ID.setter
      def Device_ID(self, x):
          self._Device_ID = x

      @property
      def Datatype(self):
          return self._Datatype

      @Datatype.setter
      def Datatype(self, x):
          self._Datatype = x

      def set_client_data(self, **kwargs):  
          for key, value in kwargs.items(): 
              if (str.lower(key) == "datatype"):
                 self.Datatype = value
              elif (str.lower(key) == "device_id"):
                 self.Device_ID = value
              else:
                 self.logger.info("un-handled client data:", key, value)

      def testlog(self):
          self.logger.info("Oh, no! Something went wrong!")

      def register(self, devide_id =None):
          if devide_id is not None:
             self.Device_ID = devide_id
                  
          end_point = self.Server_URL + "register"
          payload = {"device_id": self.Device_ID}
          
          responce = requests.post(end_point, data = payload)
          #self.logger.info(responce.status_code)
          self.logger.info(f"register|{responce.text}")
          return (responce.status_code)

      
      @staticmethod
      def read_csse_covid_19_daily_reports(Y_M_Ddate):
          '''
          read daily COVID 19 data from github into a pandas dataframe with needed format.
          '''
      
          github_root = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports"
          datetime_object = datetime.strptime(Y_M_Ddate, "%Y-%m-%d")
          date = datetime_object.strftime("%m-%d-%Y")
          ####file = os.path.join(github_root, date + ".csv")
          file = github_root+ "/" + date + ".csv"

          df = pd.read_csv(file, dtype = {'FIPS': str, 'ZIP': str, "ZIP_CODE": str})
        
          df.columns = [x.lower() for x in df.columns]
          df.rename(columns={'province/state': 'province_state',
                   'admin2'            : 'county',
                   'country/region'    : 'country_region',
                   'last update'       : 'last_update',
                   "long_":          "longitude",
                   "long":          "longitude",
                   "lat":           "latitude",
                   "lat_":           "latitude",
                  }, inplace=True)
    
          df["key"] =  "country_region|province_state|date"
          return (df)

      def upload_data(self, data_date):
          end_point = self.Server_URL + "uploaddata"
          
          self.logger.info(f"upload_data|starting....")
          start_time = time.time()

          df = ttDataBase.read_csse_covid_19_daily_reports(data_date)
          print(df.shape)
          errorCount = 0
          i = 0
          for i in range(0, 3): #df.shape[0])  :####df.itertuples(index=False):
              row = df.iloc[i]
              #row_as_dic = row.to_dict()
  
              device_data = {"date":data_date }
              device_data.update(json.loads(row.to_json()))
       
              payload = {
                 "data_type": self.Datatype, 
                 "device_id": self.Device_ID,
                 "device_data": device_data
              }
              self.logger.debug(payload)
              responce = requests.post(end_point, json = payload)
              if (responce.status_code == 200) and ( responce.text == "true"):
                 if (i % 10==0):
                    print(i, "", end='', flush=True)
                 else:
                    errorCount += 1
                    print("\nError ", i , " with ", responce.text, flush=True)

          print(f"\nProcessed {i+3} of records")
          self.logger.info(f"upload_data_duration|{showtime(start_time)}")
          return (errorCount)

      def get_data(self, **kwargs):
          end_point = self.Server_URL + "getshareddata"
          self.logger.info(end_point)
          query = { "data_type": self.Datatype, 
                    "device_id": self.Device_ID }
          for key, value in kwargs.items(): 
              query[key] = value

          self.logger.info(query)
          req = requests.get(end_point, params=query)

          if (req.status_code == 200):
             data = req.json()
             df = pd.DataFrame(data)
             return (df)
          else:
             self.logger.info(req.status_code)

def init_argparse() -> argparse.ArgumentParser:
    parser = argparse.ArgumentParser(
        usage="%(prog)s [OPTION] [FILE]...",
        description="load COVID19 daily data to ttData via web API.",
        epilog='Enjoy the program! :)'
    )
    parser.add_argument(
        "-s", "--s", action='store', 
        help="ttData Server URL",
        required = True)
    parser.add_argument(
        "-d", "--d", action='store', 
        help="device_id, required",
        required = True)
    parser.add_argument(
        "-t", "--t", 
        action='store', 
        type=str,
        help="date in the format of YYYY-MM-DD",
        required = True)
    parser.add_argument(
        "-l", "--l", 
        action='store', 
        type=str,
        help="runtime logfile")
    
    return parser

def main(interactive=False) -> None:
    server_url="http://108.48.52.183:7061/"
    device = "DXDX111"
    log_file="ttDataBaseTest00.txt"
    time    = "2020-03-26"
    if (interactive == False):
       parser = init_argparse()
       args = parser.parse_args()
       server_url = args.s
       device = args.d
       time = args.t
       log_file = args.l
    
    my_ttData = ttDataBase(server_url=server_url, log_file=log_file)
    my_ttData.set_client_data(Device_ID = device, Datatype = 2)
    my_ttData.register()
    status = my_ttData.upload_data(time)

if __name__ == '__main__':
    try:
      exit(main(interactive=True))
    except Exception:
      logging.exception("Exception in main()")
      exit(1)
          

2020-04-21 02:39:07,928|ttDataBase_Test|register|device: DXDX111 is already registered
2020-04-21 02:39:07,932|ttDataBase_Test|upload_data|starting....


(3421, 13)


2020-04-21 02:39:09,071|ttDataBase_Test|upload_data_duration|1138 ms



Processed 5 of records
