# Data Query

In [None]:
import os, json
import requests
import pandas as pd
import pprint as pp
from dotenv import load_dotenv
load_dotenv()

In [None]:
BASE_URL = os.getenv("MYSHYN_URL")

TENANT_ID = os.getenv("MYSHYN_TENANT_ID")

USERNAME = os.getenv("MYSHYN_USERNAME")
PASSWORD = os.getenv("MYSHYN_PASSWORD")

## Search for high-risk services by Risk Scores

In [None]:
HEADER = {
    'Content-Type': 'application/json',
}

# high risk score

riskscores = [7, 8, 9]

HEADER, riskscores

In [None]:
endpointServiceName = 'queryCloudServiceProviderByRisk'

In [None]:
url = BASE_URL + '/shnapi/rest/reporting/csv/' + str(endpointServiceName)
print("URL  : " + url)
r = requests.post(url, headers=HEADER, data=json.dumps(riskscores).encode("utf-8"), auth=(USERNAME,PASSWORD))
r.status_code

In [None]:
highRiskServices = r.json()
len(highRiskServices)

In [None]:
highRiskServices

In [None]:
cspIds = list(highRiskServices.keys())

In [None]:
cspIds

## Search for users who access or attempt to access these high-risk services by CspId

Get All CspIds from Search for high-risk services by Risk Scores

In [None]:
HEADER = {
    'Content-Type': 'application/json',
}

PARAM = {
}

index = cspIds.index('34827')
BODY = {
    "cspId": cspIds[index]
}

HEADER, cspId, highRiskServices[cspIds[index]]

In [None]:
endpointServiceName = 'queryUniqueUsers'

In [None]:
url = BASE_URL + '/shnapi/rest/reporting/csv/' + str(endpointServiceName)
print("URL  : " + url)
r = requests.post(url, headers=HEADER, params=PARAM, data=json.dumps(BODY).encode("utf-8"), auth=(USERNAME,PASSWORD))
r.status_code

In [None]:
res = [res.split(",") for res in r.text.split('\n')]
res_df = pd.DataFrame(data=res[1:], columns=res[0]).dropna()

columnToInt = ['usageCount', 'outbound', 'inbound', 'Upload count', 'Upload data', 'Allowed Data', 'Denied Data', 'Allowed Request', 'Denied Request']
res_df[columnToInt] = res_df[columnToInt].apply(pd.to_numeric, errors='coerce')

columnToDatetime = ['Last Activity']
res_df[columnToDatetime] = res_df[columnToDatetime].apply(pd.to_datetime, errors='coerce')

# res_df.dtypes, res_df.columns

In [None]:
res_df.sort_values("usageCount", ascending = False).head(10)

## The Allow/Deny Statistics Report returns information about the traffic decisioning for a specified Cloud Service Provider.

In [None]:
HEADER = {
    'Content-Type': 'application/json',
}

PARAM = {
}

index = cspIds.index('34827')
BODY = {
    "cspId": cspIds[index]
}

HEADER, cspId, highRiskServices[cspIds[index]]

In [None]:
endpointServiceName = 'queryAllowDenyStatistics'

In [None]:
url = BASE_URL + '/shnapi/rest/reporting/csv/' + str(endpointServiceName)
print("URL  : " + url)
r = requests.post(url, headers=HEADER, params=PARAM, data=json.dumps(BODY).encode("utf-8"), auth=(USERNAME,PASSWORD))
r.status_code

In [None]:
res = [res.split(",") for res in r.text.split('\n')]
res_df = pd.DataFrame(data=res[1:], columns=res[0]).dropna()

columnToInt = ['Allowed%', 'Allowed vs Denied (0..1)', 'Denied%', 'OutboundVolume (bytes)', 'InboundVolume (bytes)', 'Total access to service']
res_df[columnToInt] = res_df[columnToInt].apply(pd.to_numeric, errors='coerce')

# res_df.dtypes, res_df.columns

In [None]:
res_df.sort_values(["Total access to service", "OutboundVolume (bytes)"], ascending = False).head(10)

## The service identifiers for the cloud services

The Per Service Counts report returns list of Cloud Services detected by your deployment. This is equivalent to the CSV report generated in the Services page in Skyhigh CASB.

In [None]:
HEADER = {
    'Content-Type': 'application/json',
}

PARAM = {
    "tenantId": TENANT_ID
}

BODY = {
    "tableVisibilityMap":{
#         "Service Name" : True,
#         "Category" : True,
#         "Risk"  :True,
#         "Access Count" : True,
#         "Outbound" : True,
#         "Inbound" : True,
#         "Upload Count" : True,
#         "Upload Data" : True,
#         "Users" : True,
#         "Allowed" : True,
#         "Denied" : True
    },
     "eventFilter": {}
}

HEADER, PARAM, BODY

In [None]:
endpointServiceName = 'queryPerServiceCounts'

In [None]:
url = BASE_URL + '/shnapi/rest/reporting/csv/' + str(endpointServiceName)
print("URL  : " + url)
r = requests.post(url, headers=HEADER, params=PARAM, data=json.dumps(BODY).encode("utf-8"), auth=(USERNAME,PASSWORD))
r.status_code

In [None]:
res = [res.split(",") for res in r.text.split('\n')]
res_df = pd.DataFrame(data=res[1:], columns=res[0]).dropna()

columnToInt = ['Risk', 'Access Count', 'Outbound', 'Inbound', 'Upload Count', 'Upload Data', 'Users', 'Allowed', 'Denied', 'Allowed Data', 'Denied Data', 'Total Data']
res_df[columnToInt] = res_df[columnToInt].apply(pd.to_numeric, errors='coerce')

columnToDatetime = ['ServiceFirstUsed', 'ServiceLastUsed']
res_df[columnToDatetime] = res_df[columnToDatetime].apply(pd.to_datetime, errors='coerce')

# res_df.dtypes, res_df

In [None]:
res_df[res_df["Risk"]>6].sort_values("Access Count", ascending = False).head(10)

## Get Cloud Service Provider ID in Category

In [None]:
url = BASE_URL + '/shnapi/rest/cspCategoryInfo/active/true'
print("URL  : " + url)
r = requests.get(url, headers=HEADER, params={}, data={}, auth=(USERNAME,PASSWORD))
r.status_code

In [None]:
r.json()

filter by category

In [None]:
url = BASE_URL + '/shnapi/rest/cspCategoryInfo/catName/Cloud Storage'
print("URL  : " + url)
r = requests.get(url, headers=HEADER, params={}, data={}, auth=(USERNAME,PASSWORD))
r.status_code

In [None]:
r.json()

## Anomalies

In [None]:
HEADER = {
    'Content-Type': 'application/json',
}

BODY = {
    "templateId": 10,
    "serviceidentifiers": ["sharepoint","onedrive","exchange online"]
}

HEADER, PARAM, BODY

In [None]:
url = BASE_URL + '/shnapi/rest/reporting/csv/queryAnomalies'
print("URL  : " + url)
r = requests.post(url, headers=HEADER, params={}, data=json.dumps(BODY).encode("utf-8"), auth=(USERNAME,PASSWORD))
r.status_code

In [None]:
res = [res.split(",") for res in r.text.split('\n')]
res_df = pd.DataFrame(data=res[1:], columns=res[0])#.dropna()

In [None]:
res_df[res_df["Severity"]=="High"]