In [2]:
import os
import sys
import json
import time
import datetime
from importlib import reload

import requests

import pandas as pd
import numpy as np
from sklearn.cluster import DBSCAN

import external as ext

In [3]:
desert_main = pd.read_csv('miramar.csv')
baltic_main = pd.read_csv('erangel.csv')
savage_main = pd.read_csv('sanhok.csv')
summerland_main = pd.read_csv('karakin.csv')

In [4]:
desert_main = desert_main.iloc[:,1:]
baltic_main = baltic_main.iloc[:,1:]
savage_main = savage_main.iloc[:,1:]
summerland_main = summerland_main.iloc[:,1:]

In [257]:
def risklevel(x,y,mapname):
    if mapname == 'Desert_Main':
        try:
            risk = desert_main.iloc[y,x]
        except:
            risk = 0
    elif mapname == 'Baltic_Main':
        try:
            risk = baltic_main.iloc[y,x] 
        except:
            risk = 0
    elif mapname == 'Savage_Main':
        try:
            risk = savage_main.iloc[y,x] 
        except:
            risk = 0
    elif mapname == 'Summerland_Main':
        try:
            risk = summerland_main.iloc[y,x]
        except:
            risk = 0
        # correction due to lower max number of players
        risk = risk * 1.5625
    else:
        risk = np.nan
        
    lst.append(risk)

# <font color=red> Preparing DataFrame </font>

In [251]:
import os
import glob

path = 'telemetry_data/2020-04-30/pc'
for filename in glob.glob(os.path.join(path, '*.json')): #only process .JSON files in folder.      
    with open(filename, mode='r') as file:
        json_data = file.read()
        file.close()
    data = []
    head = []
    for i in json.loads(json_data):
        if i["_T"] in ("LogPlayerPosition", "LogParachuteLanding"):
            data.append(i)
        if i["_T"] in ("LogMatchStart"):
            head.append(i)
    

    # get events data into pandas dataframe
    data = pd.read_json(json.dumps(data))
    head = pd.read_json(json.dumps(head))
    
    # get map name
    mn = head.loc[0, 'mapName']
    
    # copy match id to all rows and drop LogMatchDefinition event
    # we will use this later to add use it for future merging tasks
    matchid = json.loads(json_data)[0]["MatchId"]
    data['MatchId'] = matchid

    #reset indexes
    data.reset_index(drop=True, inplace=True)

    # get player information
    from pandas.io.json import json_normalize
    
    # we need to expand the json documents on the character column
    data = pd.concat([data, json_normalize(data['character'])], axis=1).drop(['character', 'zone'], axis=1)
    
    def isGame(x):
        return x['isGame']

    data['isGame'] = data['common'].apply(isGame)
    data = data.drop('common', axis=1)

    # we will use the vehicle to check what players are still in the airplane
    data.vehicle = data.vehicle.apply(lambda x: {} if pd.isna(x) else x)
    data['vehicle'] = json_normalize(data.vehicle)['vehicleType']

    # check how many players' locations we have at each point in time
    # we want to make sure that after this process, all players have harmonized times and locations
    #for i in data.elapsedTime.unique():
        #obs = len(data[data['elapsedTime'] == i])
        #print(f'{i} has {obs} observations')

    # we create a new dataframe with the necessary information to interpolate locations
    data_new_coords = data[['elapsedTime','name','location.x','location.y', 'vehicle']].sort_values(['name','elapsedTime']).drop_duplicates()
    data_new_coords = data_new_coords.reset_index().drop('index', axis=1)
    data_new_coords = data_new_coords.dropna(subset = ['elapsedTime'])
    
    # we round coordinates for ease of calculations, and because a difference in location smaller than a cm is negligible
    data_new_coords['location.x'] = data_new_coords['location.x'].apply(round)
    data_new_coords['location.y'] = data_new_coords['location.y'].apply(round)

    data_new_coords.elapsedTime.unique().astype(int)
    
    # since different elapsed times don't show the location of all players, we want to interpolate the position of the players at each
    # point in time. e.g. if player a is in position (x,y) at second 10.0 and in position (x+10,y+10) at second 20.0
    # we can expect that this player will be at position (x+1,y+1) at second 11.0, and so on
    def getNewCoord(t, n):
        aa = data_new_coords[(data_new_coords['elapsedTime'] < t) & (data_new_coords['name'] == n)] # get previous observations
        bb = data_new_coords[(data_new_coords['elapsedTime'] > t) & (data_new_coords['name'] == n)] # get following observations
        if (bool(aa['location.x'].any())) & (bool(bb['location.x'].any())): # if we have both previous and following observation
            a = aa.iloc[-1] # get last previous observation
            b = bb.iloc[0] # get first following observation
            time = b[0] - a[0] # calculate the time difference
            # get x and y coordinate by calculating movement speed in cm/s and get full coordinate
            new_coord_x = a[2] + ((b[2] - a[2]) / time)*(t-a[0])  
            new_coord_y = a[3] + ((b[3] - a[3]) / time)*(t-a[0])
            # we also get the vehicle to check if a player has jumped from the airplane or not
            in_aircraft = b['vehicle']
            return new_coord_x, new_coord_y, in_aircraft
        elif (not bool(aa['location.x'].any())) & (bool(bb['location.x'].any())):
            # if there is no previous observation, set first following location to be current location
            in_aircraft = b['vehicle']
            return b.iat[0,2], b.iat[0,3], in_aircraft

        # If a player doesn't have a future location, that indicates the player is dead
        elif (bool(aa['location.x'].any())) & (not bool(bb['location.x'].any())):
            # if there is no following observation, we assume the player has died (we hope that only in the match)
            return 'dead', 'dead', 'dead'

    names=[]
    times=[]
    x_coords=[]
    y_coords=[]
    aircraft=[]
    # we will get the location every 10 seconds from second 1 (e.g. 1.0, 11.0, 21.0), and at the last second
    times_list = data_new_coords.elapsedTime.unique().astype(int)
    # we get the times in intervals of 10s from second 1 to the last second of the game
    unique_times_10s = np.arange(1, max(times_list), 10).tolist()
    # we also append the last second, regardless of the 10s interval
    unique_times_10s.append(max(times_list))
    for t in unique_times_10s:
        for n in data_new_coords.name.unique():
            # for every player, we get their location at these times
            a = data_new_coords[(data_new_coords['elapsedTime'] == t) & (data_new_coords['name'] == n)]
            # if the player has a location, we take it
            if a['location.x'].any():
                names.append(n)
                times.append(t)
                x_coords.append(a.iat[0,2])
                y_coords.append(a.iat[0,3])
                aircraft.append(a['vehicle'])
            # if not, we interpolate it
            else:
                x_coord, y_coord, in_aircraft = getNewCoord(t,n)
                names.append(n)
                times.append(t)
                x_coords.append(x_coord)
                y_coords.append(y_coord)
                aircraft.append(in_aircraft)

    # we create the dataframe from these locations
    players_location = pd.DataFrame(list(zip(times,names,x_coords, y_coords, aircraft)),
                 columns=['time','name','location.x','location.y', 'inAircraft'])
    # we also check if the players are or aren't in the aircraft with a binary variable
    players_location['inAircraft'] = players_location['inAircraft'].apply(lambda x: 1 if str(x) == 'TransportAircraft' else 0)


    # we make sure that harmonization of times and observations has been done
    #for i in players_location.time.unique():
        #obs = len(players_location[players_location['time'] == i])
        #print(f'{i} has {obs} observations')

    # encode player names
    # we do this to get the teamIds in the main dataframe
    players = data[['name', 'accountId','teamId']].drop_duplicates()
    players = players.sort_values('teamId').reset_index().drop(['index'], axis=1)


    # we merge these names to get accountId and teamId in the dataframe
    players_location = pd.merge(players_location,
                                players,
                                left_on = 'name',
                                right_on = 'name',
                                how = 'left')
    # moreover, since we are goint to later "asign" a number to the players (e.g. player1, player2), we want to make sure
    # that the criteria is consistent for all teams. Therefore, we do it with the .sort_values method
    # players on each team are ordered with this method inside the team
    players_location = players_location[players_location['time'] >= 0].sort_values(['teamId','time', 'name']).reset_index().drop(['index'], axis=1)

    players_location = players_location.replace('dead',0)

    # convert to 10 meters from cms
    players_location[['location.x','location.y']] = players_location[['location.x','location.y']].apply(lambda x: x.astype(int))
    players_location[['location.x','location.y']] = players_location[['location.x','location.y']].apply(lambda x: x // 1000)

    data = players_location[['location.x','location.y']]

    # convert df to list of tuples
    records = data.to_records(index=False)
    result = list(records)

    cols = ['risk']
    lst = []

    try:
        for i in result:
            risklevel(*i, mapname=mn)

        riskordered = pd.DataFrame(lst, columns=cols)
    
        # export match telemetry data into .csv format
        riskordered.to_csv(path_or_buf=f'{matchid}.csv')
    except:
        continue

KeyboardInterrupt: 

In [273]:
for direct in os.listdir('D:/PUBG/telemetry_data'):
    print(direct)

2020-04-09
2020-04-10
2020-04-11
2020-04-12
2020-04-13
2020-04-15
2020-04-16
2020-05-01
2020-05-03
2020-05-04
2020-05-05
2020-05-06
2020-05-07
2020-05-08
2020-05-09
2020-05-10
2020-05-11
2020-05-12
2020-05-13
2020-05-14
2020-05-16


In [275]:
import os
import glob

for direct in os.listdir('../telemetry_data'):
    for d2 in os.listdir(f'../telemetry_data/{direct}'):
        for filename in os.listdir(f'../telemetry_data/{direct}/{d2}'):
        #for filename in glob.glob(os.path.join(path, '*.json')): #only process .JSON files in folder.  
            try:
                with open(f'../telemetry_data/{direct}/{d2}/{filename}', mode='r') as file:
                    json_data = file.read()
                    file.close()
                data = []
                head = []
                for i in json.loads(json_data):
                    if i["_T"] in ("LogPlayerPosition", "LogParachuteLanding"):
                        data.append(i)
                    if i["_T"] in ("LogMatchStart"):
                        head.append(i)


                # get events data into pandas dataframe
                data = pd.read_json(json.dumps(data))
                head = pd.read_json(json.dumps(head))

                # get map name
                mn = head.loc[0, 'mapName']

                # copy match id to all rows and drop LogMatchDefinition event
                # we will use this later to add use it for future merging tasks
                matchid = json.loads(json_data)[0]["MatchId"]
                matchid = matchid.split('.')[-1]
                data['MatchId'] = matchid

                #reset indexes
                data.reset_index(drop=True, inplace=True)

                # get player information
                from pandas.io.json import json_normalize

                # we need to expand the json documents on the character column
                data = pd.concat([data, json_normalize(data['character'])], axis=1).drop(['character', 'zone'], axis=1)

                def isGame(x):
                    return x['isGame']

                data['isGame'] = data['common'].apply(isGame)
                data = data.drop('common', axis=1)

                # we will use the vehicle to check what players are still in the airplane
                data.vehicle = data.vehicle.apply(lambda x: {} if pd.isna(x) else x)
                if 'vehicleType' in data.columns:
                    data['vehicle'] = json_normalize(data.vehicle)['vehicleType']
                else:
                    data['vehicle'] = np.nan

                # check how many players' locations we have at each point in time
                # we want to make sure that after this process, all players have harmonized times and locations
                #for i in data.elapsedTime.unique():
                    #obs = len(data[data['elapsedTime'] == i])
                    #print(f'{i} has {obs} observations')

                # we create a new dataframe with the necessary information to interpolate locations
                data_new_coords = data[['_T','elapsedTime','name','location.x','location.y', 'vehicle']].sort_values(['name','elapsedTime']).drop_duplicates()
                data_new_coords = data_new_coords.reset_index().drop('index', axis=1)
                #data_new_coords = data_new_coords.dropna(subset = ['elapsedTime'])

                # we round coordinates for ease of calculations, and because a difference in location smaller than a cm is negligible
                data_new_coords['location.x'] = data_new_coords['location.x'].apply(round)
                data_new_coords['location.y'] = data_new_coords['location.y'].apply(round)

                event = data_new_coords._T
                names= data_new_coords.name
                times= data_new_coords.elapsedTime
                x_coords = data_new_coords['location.x']
                y_coords= data_new_coords['location.y']
                aircraft= data_new_coords.vehicle

                # we create the dataframe from these locations
                players_location = pd.DataFrame(list(zip(event, times,names,x_coords, y_coords, aircraft)),
                             columns=['event','time','name','location.x','location.y', 'inAircraft'])
                # we also check if the players are or aren't in the aircraft with a binary variable
                players_location['inAircraft'] = players_location['inAircraft'].apply(lambda x: 1 if str(x) == 'TransportAircraft' else 0)


                # we make sure that harmonization of times and observations has been done
                #for i in players_location.time.unique():
                    #obs = len(players_location[players_location['time'] == i])
                    #print(f'{i} has {obs} observations')

                # encode player names
                # we do this to get the teamIds in the main dataframe
                players = data[['name', 'accountId','teamId']].drop_duplicates()
                players = players.sort_values('teamId').reset_index().drop(['index'], axis=1)


                # we merge these names to get accountId and teamId in the dataframe
                players_location = pd.merge(players_location,
                                            players,
                                            left_on = 'name',
                                            right_on = 'name',
                                            how = 'left')
                # moreover, since we are goint to later "asign" a number to the players (e.g. player1, player2), we want to make sure
                # that the criteria is consistent for all teams. Therefore, we do it with the .sort_values method
                # players on each team are ordered with this method inside the team

                players_location = players_location.sort_values(['teamId','time', 'name']).reset_index().drop(['index'], axis=1)

                players_location = players_location.replace('dead',0)

                # convert to 10 meters from cms
                players_location[['location.x','location.y']] = players_location[['location.x','location.y']].apply(lambda x: x.astype(int))
                players_location[['location.x','location.y']] = players_location[['location.x','location.y']].apply(lambda x: x // 1000)

                data = players_location[['location.x','location.y']]

                # convert df to list of tuples
                records = data.to_records(index=False)
                result = list(records)

                cols = ['risk']
                lst = []

                try:
                    for i in result:
                        risklevel(*i, mapname=mn)

                    riskordered = pd.DataFrame(lst, columns=cols)

                    # export match telemetry data into .csv format
                    #riskordered.to_csv(path_or_buf=f'{matchid}.csv')
                    abc = pd.concat([players_location, riskordered], axis=1, sort=False)
                    abc_mean = abc[abc.event != 'LogParachuteLanding'].groupby('name').mean().reset_index()
                    risk_std = abc[abc.event != 'LogParachuteLanding'].groupby('name').std()['risk'].to_list()
                    abc_mean['risk_std'] = risk_std
                    landing_risk = abc[abc.event == 'LogParachuteLanding'][['name','risk']]
                    landing_risk.columns = ['name','landing_risk']
                    abc_final = pd.merge(abc_mean, landing_risk, how='left', on='name')
                    abc_final.insert(0, 'matchId', matchid)
                    abc_final = abc_final.sort_values(['teamId','name'])
                    for team in abc_final.teamId.unique():
                        df = abc_final[abc_final.teamId == team]
                        if len(df) > 4:
                            ghost_players = df[df.time == 0.0]['name'].to_list()
                            df = df[~df.name.isin(ghost_players)]
                            abc_final = abc_final[~abc_final.name.isin(ghost_players)]
                        player_dict = {}
                        for i, player in enumerate(df.name.unique()):
                            player_dict[player] = f'player{i+1}'
                        abc_final = abc_final.replace(player_dict)
                    z = abc_final.pivot_table(values = ['risk','risk_std','landing_risk'], index = ['matchId','teamId'], columns = ['name'])
                    z = z.reset_index()
                    z.columns = z.columns.droplevel(1)
                    if len(z.columns) == 14:
                        z.columns = ['matchId','teamId', 'landing_risk1', 'landing_risk2', 'landing_risk3', 'landing_risk4',
                                    'risk1', 'risk2', 'risk3', 'risk4', 'risk_std1', 'risk_std2', 'risk_std3', 'risk_std4']
                    elif len(z.columns) == 11:
                        z.columns = ['matchId','teamId', 'landing_risk1', 'landing_risk2', 'landing_risk3',
                                    'risk1', 'risk2', 'risk3', 'risk_std1', 'risk_std2', 'risk_std3']
                    elif len(z.columns) == 8:
                        z.columns = ['matchId','teamId', 'landing_risk1', 'landing_risk2',
                                    'risk1', 'risk2', 'risk_std1', 'risk_std2']
                    elif len(z.columns) == 5:
                        z.columns = ['matchId','teamId', 'landing_risk1',
                                    'risk1', 'risk_std1']
                    z.to_csv(f'D:/PUBG/risk_data/{matchid}.csv')
                except (KeyboardInterrupt, SystemExit):
                    raise
            except Exception as e:
                print(f'!!! {filename}: {e}')

!!! 684ca49a-c3e5-49e5-98d4-57d671266288.json: 'character'
!!! b38bd5f5-eca8-4f91-a44c-cfeea9005172.json: 'character'
!!! 2ed63e56-7c7f-4b45-9813-97decefebab0.json: 'character'
!!! da2dbdcb-2c44-4778-b085-149330c26f09.json: 'character'
!!! 2c90b98f-a032-4d93-84b9-4c46fa68f2ef.json: 'character'
!!! 205dc2eb-1368-4b96-8765-95ff72baa676.json: 'character'


NotADirectoryError: [WinError 267] El nombre del directorio no es válido: '../telemetry_data/clean/0005067a-838c-4037-8109-1e3852c23166.csv'

In [277]:
all_filenames = [f'D:/PUBG/risk_data/{i}' for i in os.listdir('D:/PUBG/risk_data')]

In [278]:
#combine all files in the list
risk_csv = pd.concat([pd.read_csv(f, index_col=0) for f in all_filenames ])
#export to csv
risk_csv.to_csv( "D:/PUBG/risk_csv.csv", index=False)

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  


In [284]:
risk_csv = risk_csv.reset_index(drop=True)

In [285]:
a = risk_csv.iloc[:,0:8]
a.dropna(how='all')

Unnamed: 0,landing_risk,landing_risk.1,landing_risk.2,landing_risk.3,landing_risk.4,landing_risk.5,landing_risk.6,landing_risk.7
5760,1.0,1.0,0.0,19.0,,,,
5761,1.0,,,,,,,
5762,2.0,1.0,6.0,0.0,,,,
5763,0.0,,,,,,,
5764,5.0,0.0,4.0,9.0,,,,
...,...,...,...,...,...,...,...,...
663241,0.0,0.0,0.0,0.0,,,,
663242,0.0,0.0,2.0,2.0,,,,
663243,0.0,0.0,0.0,1.0,,,,
663244,0.0,0.0,0.0,0.0,,,,


In [289]:
b = risk_csv.iloc[:,13:21]
b.dropna(how='all').info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 7187 entries, 5760 to 663245
Data columns (total 8 columns):
risk      7187 non-null float64
risk.1    4867 non-null float64
risk.2    2917 non-null float64
risk.3    2362 non-null float64
risk.4    139 non-null float64
risk.5    41 non-null float64
risk.6    26 non-null float64
risk.7    7 non-null float64
dtypes: float64(8)
memory usage: 505.3 KB


In [280]:
risk_csv.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 666005 entries, 0 to 10
Data columns (total 38 columns):
landing_risk      6602 non-null float64
landing_risk.1    4514 non-null float64
landing_risk.2    2707 non-null float64
landing_risk.3    2218 non-null float64
landing_risk.4    139 non-null float64
landing_risk.5    41 non-null float64
landing_risk.6    26 non-null float64
landing_risk.7    7 non-null float64
landing_risk1     630396 non-null float64
landing_risk2     514061 non-null float64
landing_risk3     440193 non-null float64
landing_risk4     350991 non-null float64
matchId           666005 non-null object
risk              7187 non-null float64
risk.1            4867 non-null float64
risk.2            2917 non-null float64
risk.3            2362 non-null float64
risk.4            139 non-null float64
risk.5            41 non-null float64
risk.6            26 non-null float64
risk.7            7 non-null float64
risk1             658810 non-null float64
risk2             

## Export to .csv

In [268]:
filename

'eb83b1e3-7ca6-46cd-aa45-a45cf6ab9ba0.json'

In [30]:
# export match telemetry data into .csv format
players_location.to_csv(path_or_buf=f'{matchid}.csv')

In [271]:
for direct in os.listdir('D:/PUBG/telemetry_data'):
    for d2 in os.listdir(f'D:/PUBG/telemetry_data/{direct}'):
        for i, filename in enumerate(os.listdir(f'D:/PUBG/telemetry_data/{direct}/{d2}')):
            if filename == 'eb83b1e3-7ca6-46cd-aa45-a45cf6ab9ba0.json':
                print(direct, d2, i)

2020-04-10 console 532
