In [110]:
import os
import sys
import re
import pandas as pd
import numpy as np
from enum import Enum
from typing import Optional, Union, List, Callable
from dataclasses import dataclass, make_dataclass, field

PKG_ROOT = os.path.dirname(os.path.realpath(os.getcwd()))
if not PKG_ROOT in sys.path:
    sys.path.append(PKG_ROOT)

In [111]:
def make_names_old(name:str, df:pd.DataFrame):
    '''
    Construct dataclass or nested dataclass from a dataframe
    '''
    if df.shape[1] == 2:
        data = [(_[1], str, field(default=_[0])) for _ in df.values]
        return make_dataclass(name, data)
    else:
        group = df.groupby(df.columns[0])
        data = [(k, dataclass, field(default=make_names_old(k, v.drop(v.columns[0], axis=1)))) for k, v in group]
        return make_dataclass(name, data)

def make_names_(name:str, df:pd.DataFrame):
    '''
    Construct dataclass or nested dataclass from a dataframe
    '''
    if df.shape[1] == 2:
        data = [(_[1], str, field(default=_[0])) for _ in df.values]
        return make_dataclass(name, data)
    else:
        group = df.groupby(df.columns[0])
        if group.ngroups == 1:
            return make_names_(name, df.drop(df.columns[0], axis=1))
        data = [(k, dataclass, field(default=make_names_(k, v.drop(v.columns[0], axis=1)))) for k, v in group]
        return make_dataclass(name, data)
    
def make_names(name:str, df:pd.DataFrame, ignore_cols:List=[]):
    return make_names_(name=name, df=df[[_ for _ in df.columns if not _ in ignore_cols]])

In [112]:
def clean_duplicates(df:pd.DataFrame, groupby:Optional[Union[List, str]]=None):
    if groupby is None:
        col = df.columns[-1]
        df = df.groupby(col).apply(lambda x: x.reset_index())
        if df.index.nlevels > 1:
            df = df.droplevel(0)
        df[col] = df.apply(lambda x: f'{re.sub("_+", "_", x[col])}{("_" + str(x.name)) if x.name > 0 else ""}', axis=1)
        return df.reset_index(drop=True).drop('index', axis=1)
    else:
        return df.groupby(groupby).apply(clean_duplicates).reset_index(drop=True)

In [113]:
class BlockBase:
    
    @staticmethod
    def bytes2Num(barray:bytes, signed:bool=False, order:str='little'):
        return int.from_bytes(barray, signed=signed, byteorder=order)
    
    @staticmethod
    def num2Bytes(num:int, length:int, signed=False, order='little'):
        return num.to_bytes(length, signed=signed, byteorder=order)
    
    @staticmethod
    def to_int(num:float):
        '''
        Integer will be coerced to float if np.nan exists in the same column
        Force the value to int, if np.nan then return np.nan
        '''
        if isinstance(num, str): #0x0000 in dataframe
            return int(num, 16)
        if num == None:
            return None
        if np.isnan(num):
            return None
        return int(num)


In [114]:
class ValueBlock(BlockBase):
    
    def __init__(self
                 ,pbuffer: bytearray  #contents of parent segment
                 ,valueLoc: int
                 ,size: int
                 ,signed: bool = False
                 ,order: str = 'little'
                 ,optimize: Optional[int] = None
                 ,skiphead: int = 0
                 ,skiptail: int = 0):
        self.pbuffer = pbuffer
        self.valueLoc = self.to_int(valueLoc)
        self.size = self.to_int(size)
        self.signed = signed
        self.order = order
        self.optimize = self.to_int(optimize)
        self.skiphead = skiphead
        self.skiptail = skiptail
        self.notVoid = self.not_void()
    
    def not_void(self):
        if any([pd.isnull(self.valueLoc), pd.isnull(self.size)]):
            return False
        if any([self.pbuffer is None
                ,len(self.pbuffer)==0
                ,self.valueLoc < 0
                ,self.size <= 0
                ,self.valueLoc + self.size > len(self.pbuffer)]):
            return False
        return True
     
    @property
    def bytes(self):
        return self.pbuffer[self.valueLoc+self.skiphead
                           :self.valueLoc+self.size+self.skiphead-self.skiptail]
        
    @property
    def value(self):
        return self.bytes2Num(self.bytes, self.signed, self.order)
    
    @property
    def string(self):
        s = self.bytes
        p = np.argmax(np.logical_and(np.array(s) > 127
                                     ,np.array(s) < 32))
        return str(s[:p], encoding='latin')
    
    def set_value(self, value:Union[int, Enum]):
        if hasattr(value, 'value'):
            value = value.value
        self.pbuffer[self.valueLoc+self.skiphead
                   :self.valueLoc+self.skiphead-self.skiptail] = self.num2Bytes(value, self.size, self.signed, self.order) 
            
    def inc_value(self, inc_value:int=1):
        self.set_value(self.value + inc_value)
    

In [115]:
class TableBlock(BlockBase):
    '''
    Create ValueBlock that using a table of values to fill
    refTable includes Kit, Race, Class, Gender, Alignment and RacialEnemy defined in CRETABLE.csv file
    '''
    
    def __init__(self
                ,pbuffer: bytearray
                ,valueLoc: int
                ,size: int
                ,signed: bool
                ,order: str
                ,refTableName: pd.DataFrame
                ,optimize: Optional[int]
                ,refTable: pd.DataFrame):
        self.pbuffer = pbuffer
        self.valueLoc = self.to_int(valueLoc)
        self.size = self.to_int(size)
        self.signed = signed
        self.order = order
        self.refTable = refTable
        self.optimize = self.to_int(optimize)
    

In [116]:
class SegBlock(BlockBase):
    '''
    Parse subStructures as defined in GAMSEGS.csv and CRESEGS.csv
    '''
    
    def __init__(self
                ,resourceDir: str
                ,pbuffer: bytearray
                ,CREIndexLength: int
                ,CREOffsetLoc: int
                ,CRESizeLoc: int
                ,name: str
                ,countLoc: int  #offset pointer to buffer
                ,offsetLoc: int  #count pointer to buffer
                ,sizeLoc: int
                ,sizeValue: Optional[int] = None
                ,countValue: Optional[int] = None
                ,offsetValue: Optional[int] = None):  #only the header segment has no offset value
        self.resourceDir = resourceDir
        self.name = name
        self.pbuffer = pbuffer
        self.CREIndexLength = self.to_int(CREIndexLength)
        self.CREOffsetLoc = self.to_int(CREOffsetLoc)
        self.CRESizeLoc = self.to_int(CRESizeLoc)
        self.countLoc = self.to_int(countLoc)
        self.offsetLoc = self.to_int(offsetLoc)
        self.sizeLoc = self.to_int(sizeLoc)
        self.sizeValue = self.to_int(sizeValue)
        self.countValue = self.to_int(countValue)
        self.offsetValue = self.to_int(offsetValue)
        self.offsetBlock = ValueBlock(self.pbuffer, self.offsetLoc, 4)
        self.countBlock = ValueBlock(self.pbuffer, self.countLoc, 4)
        self.sizeBlock = ValueBlock(self.pbuffer, self.sizeLoc, 4)
        self.buffer = self.pbuffer[self.offsetValue: self.offsetValue + self.sizeValue]
        if name.upper() in ['PARTY', 'NPC']:
            self.parseParty()
            
    
    def parseParty(self):
        self.indexHeader = self.buffer[: self.CREIndexLength * self.countBlock.value]
        self.CREOffsets = [ValueBlock(self.buffer, self.CREOffsetLoc + self.CREIndexLength * i, 4) for i in range(self.countBlock.value)]
        self.CRESizes = [ValueBlock(self.buffer, self.CRESizeLoc + self.CREIndexLength * i, 4) for i in range(self.countBlock.value)]
        vRecords = pd.read_csv(os.path.join(self.resourceDir, 'CREVALUES.csv'), index_col=0)
        tRecords = pd.read_csv(os.path.join(self.resourceDir, 'CRETABLES.csv'), index_col=0)
        sRecords = pd.read_csv(os.path.join(self.resourceDir, 'CRESEGS.csv'), index_col=0)
        self.CRES = [GamCreBase(self.resourceDir
                                ,self.pbuffer[o.value: o.value + s.value]
                                ,vRecords
                                ,tRecords
                                ,sRecords
                                ,o
                                ,s) for o, s in zip(self.CREOffsets, self.CRESizes)]
    

In [121]:
class GamCreBase(BlockBase):
    
    
    def __init__(self
                ,resourceDir:str
                ,buffer: bytes = b''
                ,valueRecords: pd.DataFrame = pd.DataFrame([], columns=pd.Index(['Name', 'ValueLoc', 'Size', 'Signed', 'Order', 'Optimize']))
                ,tableRecords: pd.DataFrame = pd.DataFrame([], columns=pd.Index(['Name', 'ValueLoc', 'Size', 'Signed', 'Order', 'RefTable', 'Optimize']))
                ,segRecords: pd.DataFrame = pd.DataFrame([], columns=pd.Index(['SubSegs', 'CountLoc', 'OffsetLoc', 'SizeLoc', 'SizeValue', 'CountValue']))
                ,offsetBlock: Optional[ValueBlock] = None #CRE has reference in the Party/NPC segment
                ,sizeBlokc: Optional[ValueBlock] = None #CRE has reference in the Party/NPC segment
                ):
        self.resourceDir = resourceDir
        self.buffer = buffer
        self.bufferSize = len(self.buffer)
        self.valueRecords = valueRecords
        self.tableRecords = tableRecords
        self.segRecords = segRecords
        self.VALUES = make_dataclass('SEGS', [])
        self.TABLES = make_dataclass('SEGS', [])
        self.SEGS = pd.Series([], dtype=object)
        self.CRELocs = self.get_CRE_locs()
        self.initiate_seg_size()
        self.init_values()
        self.init_tables()
        self.init_segs()
           
        
    def initiate_seg_size(self):
        '''
        Retrieve offsets of each segment from segRecords to determine the size of each seg, 
        Write back into the SizeValue column
        And reset_index to ensure they are aligned in the right packing sequence
        '''
        offsets = self.segRecords[self.segRecords['OffsetLoc'].notnull()]
        offsets = offsets['OffsetLoc'].apply(lambda x: self.bytes2Num(self.buffer[self.to_int(x)
                                                                                 :self.to_int(x) + 4])).sort_values()
        nonZeroOffsets = pd.concat([pd.Series([0]), offsets[offsets!=0]])
        nonZeroOffsets[nonZeroOffsets > self.bufferSize] = self.bufferSize
        lengths = pd.concat([offsets[offsets==0], nonZeroOffsets.shift(-1, fill_value=self.bufferSize) - nonZeroOffsets])
        self.segRecords.update(lengths.sort_index().rename('SizeValue').to_frame())
        self.segRecords = self.segRecords.join(offsets.rename('OffsetValue').to_frame()).fillna(value={'OffsetValue': 0})
        
        
    def bind_tables(self):
        '''
        Covert the RefTable column in the tableRecords df into real dataframe
        '''
        if not self.tableRecords.empty:
            self.tableRecords['RefTable'] = self.tableRecords['RefTable'].apply(lambda x: pd.read_csv(os.path.join(self.resourceDir, x + '.csv')))
        
    def get_CRE_locs(self):
        loc_df = pd.read_csv(os.path.join(self.resourceDir, 'CRELOC.csv'), index_col=0)
        CREIndexLen = loc_df[loc_df['SubSegs']=='Header']['SizeValue'].iloc[0]
        CREOffsetLoc, CRESizeLoc =  loc_df[loc_df['SubSegs']=='CRE'][['OffsetLoc', 'SizeLoc']].iloc[0].apply(self.to_int)
        return CREIndexLen, CREOffsetLoc, CRESizeLoc
        
    
    def init_values(self, buffer:Optional[bytearray]=None): 
        if not self.valueRecords.empty:
            if buffer == None:
                buffer = self.buffer
            self.VALUES = make_dataclass('VALUES', self.valueRecords.apply(lambda x: (x[0]
                                                                                     ,ValueBlock
                                                                                     ,ValueBlock(buffer, *x[1:]))
                                                                           ,axis=1))
            
    def init_tables(self, buffer:Optional[bytearray]=None):
        if not self.tableRecords.empty:
            if buffer == None:
                buffer = self.buffer
            self.TABLES = make_dataclass('TABLES', self.tableRecords.apply(lambda x: (x[0]
                                                                                     ,TableBlock
                                                                                     ,TableBlock(buffer
                                                                                                 ,*x[1:]
                                                                                                 ,pd.read_csv(os.path.join(self.resourceDir
                                                                                                                           ,x['RefTable']+'.csv')
                                                                                                              ,index_col=0)))
                                                                           ,axis=1))
            
    def init_segs(self, buffer:Optional[bytearray]=None):
        '''
        Need to keep sequence order
        So use pd.Series instead of dataclass
        '''
        if not self.segRecords.empty:
            if buffer == None:
                buffer = self.buffer
            self.SEGS = (self.segRecords
                         .set_index(self.segRecords.columns[0])
                         .apply(lambda x: SegBlock(self.resourceDir, buffer, *self.CRELocs, x.name, *x), axis=1))
    