In [1]:
import os
import sys
import re
import pandas as pd
import numpy as np
from enum import Enum
import zipfile
from typing import Optional, Union, List, Callable, Dict, Tuple
from dataclasses import dataclass, make_dataclass, field
from abc import ABC

PKG_ROOT = os.path.dirname(os.path.realpath(os.getcwd()))
if not PKG_ROOT in sys.path:
    sys.path.append(PKG_ROOT)

In [2]:
class IDS:
    
    @classmethod
    def make_names_old(cls, name:str, df:pd.DataFrame):
        '''
        Construct dataclass or nested dataclass from a dataframe
        '''
        if df.shape[1] == 2:
            data = [(_[1], str, field(default=_[0])) for _ in df.values]
            return make_dataclass(name, data)
        else:
            group = df.groupby(df.columns[0])
            data = [(k, dataclass, field(default=cls.make_names_old(k, v.drop(v.columns[0], axis=1)))) for k, v in group]
            return make_dataclass(name, data)

    @classmethod
    def make_names_(cls, name:str, df:pd.DataFrame):
        '''
        Construct dataclass or nested dataclass from a dataframe
        '''
        if df.shape[1] == 2:
            data = [(_[1], str, field(default=_[0])) for _ in df.values]
            return make_dataclass(name, data)
        else:
            group = df.groupby(df.columns[0])
            if group.ngroups == 1:
                return cls.make_names_(name, df.drop(df.columns[0], axis=1))
            data = [(k, dataclass, field(default=cls.make_names_(k, v.drop(v.columns[0], axis=1)))) for k, v in group]
            return make_dataclass(name, data)
        
    @classmethod
    def make_names(cls, name:str, df:pd.DataFrame, ignoreCols:List=[]):
        return cls.make_names_(name=name, df=df[[_ for _ in df.columns if not _ in ignoreCols]])

    @staticmethod
    def make_enum(name:str, df:pd.DataFrame, nameCol:int=1, valueCol:Optional[Union[int, list]]=None):
        if df.shape[1] == 1:
            df = df.reset_index()
        nameCol = df.columns[nameCol]
        if valueCol is None:
            valueCol = df.columns.difference([nameCol])
        else:
            valueCol = df.columns[valueCol]
        if len(valueCol) == 1:
            valueCol = valueCol[0]
        if isinstance(valueCol, (list, tuple, pd.Index)):
            return Enum(name, df.set_index(nameCol)[valueCol].apply(tuple, axis=1).to_dict())
        else:
            return Enum(name, df.set_index(nameCol)[valueCol].to_dict())
        
    @staticmethod
    def make_map(df, cols:List):
        d1 = df[df.columns[cols[:2]]].set_index(df.columns[cols[0]]).drop_duplicates().to_dict()
        d2 = df[df.columns[cols[:2]]].set_index(df.columns[cols[1]]).drop_duplicates().to_dict()
        d1.update(d2)
        return d1
    
    @classmethod
    def clean_duplicates(cls, df:pd.DataFrame, valueCol:int=-1, groupby:Optional[Union[List, str]]=None):
        if groupby is None:
            col = df.columns[valueCol]
            df = df.groupby(col).apply(lambda x: x.reset_index())
            if df.index.nlevels > 1:
                df = df.droplevel(0)
            df[col] = df.apply(lambda x: f'{re.sub("_+", "_", x[col])}{("_" + str(x.name)) if x.name > 0 else ""}', axis=1)
            return df.reset_index(drop=True).drop('index', axis=1)
        else:
            return df.groupby(groupby).apply(lambda x:cls.clean_duplicates(x, valueCol=valueCol)).reset_index(drop=True)

In [3]:
class BlockBase:
    
    @staticmethod
    def bytes2Num(barray:bytes, signed:bool=False, order:str='little'):
        return int.from_bytes(barray, signed=signed, byteorder=order)
    
    @staticmethod
    def num2Bytes(num:int, length:int, signed=False, order='little'):
        return num.to_bytes(length, signed=signed, byteorder=order)
    
    @staticmethod
    def bytes2Str(barray:bytes):
        ba = np.array(bytearray(barray))
        p = np.argmax(np.logical_or(ba > 127
                                   ,ba < 32))
        if p == 0: ## All strings will have >1 length so when p == 0, it only means the string is the whole length of the bytearray
            p = len(barray)
        return barray[:p].decode('latin')
    
    @staticmethod
    def to_int(num:float):
        '''
        Integer will be coerced to float if np.nan exists in the same column
        Force the value to int, if np.nan then return np.nan
        '''
        if isinstance(num, str): #0x0000 in dataframe
            return int(num, 16)
        if num == None:
            return None
        if np.isnan(num):
            return None
        return int(num)


In [4]:
class ValueBlock(BlockBase):
    
    def __init__(self
                 ,pbuffer: bytearray  #contents of parent segment
                 ,name: str
                 ,valueLoc: int
                 ,size: int
                 ,signed: bool = False
                 ,order: str = 'little'
                 ,optimize: Optional[int] = None
                 ,skiphead: int = 0
                 ,skiptail: int = 0):
        self.pbuffer = pbuffer
        self.name = name
        self.valueLoc = self.to_int(valueLoc)
        self.size = self.to_int(size)
        self.signed = signed
        self.order = order
        self.optimize = self.to_int(optimize)
        self.skiphead = skiphead
        self.skiptail = skiptail
        self.notVoid = self.not_void()
    
    def not_void(self):
        if any([pd.isnull(self.valueLoc), pd.isnull(self.size)]):
            return False
        if any([self.pbuffer is None
                ,len(self.pbuffer)==0
                ,self.valueLoc < 0
                ,self.size <= 0
                ,self.valueLoc + self.size > len(self.pbuffer)]):
            return False
        return True
    
    def bind_buffer(self, buffer:bytearray):
        self.pbuffer = buffer
        return self
     
    @property
    def bytes(self):
        if self.not_void:
            return self.pbuffer[self.valueLoc+self.skiphead
                               :self.valueLoc+self.size+self.skiphead-self.skiptail]
        return None
        
    @property
    def value(self):
        if self.not_void:
            return self.bytes2Num(self.bytes, self.signed, self.order)
        return None
    
    @property
    def string(self):
        if self.not_void:
            return self.bytes2Str(self.bytes)
        return None
    
    def set_value(self, value:Union[int, Enum]):            
        if hasattr(value, 'value'):
                value = value.value
        value = self.to_int(value)
        if self.notVoid and value is not None:
            self.pbuffer[self.valueLoc+self.skiphead
                         :self.valueLoc+self.size+self.skiphead-self.skiptail] = self.num2Bytes(value, self.size, self.signed, self.order) 
    
            
    def inc_value(self, inc_value:int=1):
        if self.notVoid:
            self.set_value(self.value + inc_value)
    

In [5]:
class RecordBase(BlockBase):
     
    def __init__(self, segList:List, nameMap: Optional[Dict]=None, template:Optional[bytearray]=None):
        '''
        SegList has the format of [(name, length, datatype)]
        datatype includes: name, which will be mapped to codes in nameMap
                           int
                           str
                           None
        '''
        self.pattern = re.compile(b''.join([f'(?P<{n}>.{{{s}}})'.encode() for (n, s, t) in segList]))
        self.names, self.sizes, self.dtypes = zip(*segList)
        self.sizes = np.array(self.sizes)
        self.sizeMap = dict(zip(self.names, self.sizes))
        self.rangeMap = dict([(n, r) for (n, r) in zip(self.names, [range(*_) for _ in zip(np.concatenate([[0], self.sizes.cumsum()])
                                                                                           ,self.sizes.cumsum())])])
        self.typeMap = dict(zip(self.names, self.dtypes))
        self.size = self.sizes.sum()
        self.nameMap = nameMap
        if template is None:
            self.template = bytearray(b'0' * self.size)
        else:
            self.template = template
            
            
    def parse(self, buffer:bytearray):
        return pd.DataFrame(_.groupdict() for _ in self.pattern.finditer(buffer))
    
    def infer_col(self, col:pd.Series):
        dtype = self.typeMap.get(col.name, None)
        if dtype == None:
            return col
        if dtype == int:
            return col.apply(self.bytes2Num)
        if dtype == str:
            return col.apply(self.bytes2Str)
        if dtype.upper() == 'INTNAME':
            return col.apply(lambda x: self.nameMap(self.bytes2Num(x)).name)
        if dtype.upper() == 'STRNAME':
            return col.apply(lambda x: self.nameMap(self.bytes2Str(x)).name)
        
        
    def inverse_infer_col(self, col:pd.Series):
        col = col.apply(lambda x: x.name if hasattr(x, 'name') else x)
        dtype = self.typeMap.get(col.name, None)
        size = self.sizeMap.get(col.name)
        if dtype is None:
            return col
        if dtype == int:
            return col.apply(lambda x: self.num2Bytes(x, size))
        if dtype == str:
            return col.apply(lambda x: x.encode().ljust(size, b'\x00'))
        if dtype.upper() == 'INTNAME':
            return col.apply(lambda x: self.num2Bytes(getattr(self.nameMap, x).value, size))
        if dtype.upper() == 'STRNAME':
            return col.apply(lambda x: getattr(self.nameMap, x).value.encode().ljust(size, b'\x00'))
            
    
    def infer(self, df:pd.DataFrame):
        return pd.concat([self.infer_col(col) for name, col in df.iteritems()], axis=1)
    
    
    def make_records(self, values:pd.DataFrame, repeatCol:int=-1):
        '''
        @params values is a dataframe with the last column as the repeats of each row
        ColNames has to match the segment names in the pattern
        '''
        if isinstance(repeatCol, int):
            df = values.groupby(values.columns[repeatCol]).apply(lambda x: pd.concat([x] * x.name)).reset_index(drop=True).drop(values.columns[repeatCol], axis=1)
        df = self.infer(df)
        temp_df = pd.DataFrame([self.pattern.search(self.template).groupdict()] * df.shape[0])
        temp_df.update(df)
        return temp_df

    def make_recordIndex_from_dict(self, values:List[pd.Series], repeats:Optional[Union[int, Tuple, List, np.ndarray]]=1):
        values = [pd.Series(v, name=k) for k, v in values.items()]
        repeats = np.array(repeats).ravel()
        if repeats.size == 1:
            repeats = np.tile(repeats, values[0].size)
        df = pd.DataFrame(values).T
        df = df[[_ for _ in self.names if _ in df.columns]] # make sure the sequence is aligned with pattern segment sequence
        df.insert(df.shape[1], 'REPEATS', repeats)
        return df

In [6]:
class TableBlock(ValueBlock):
    '''
    Create ValueBlock that using a table of values to fill
    refTable includes Kit, Race, Class, Gender, Alignment and RacialEnemy defined in CRETABLE.csv file
    '''
    
    def __init__(self
                ,pbuffer: bytearray
                ,name: str
                ,valueLoc: int
                ,size: int
                ,signed: bool
                ,order: str
                ,refTableName: str
                ,optimize: Optional[int]
                ,refTable: Enum):
        super().__init__(pbuffer, name, valueLoc, size, signed, order, optimize)
        self.refTable = refTable
        
        
    def inc_value(self, inc_value:int=1):
        pass
    
    @property
    def value(self):
        v = super().value
        if v is not None:
            return self.refTable(v).name
        return None
    
    def set_value(self, value:Union[int, Enum, str]):
        if isinstance(value, str):
            for k, v in self.refTable.__members__.items():
                if value.upper() == k:
                    value = v.value
                    break
            else:
                value = None
        super().set_value(value) 

In [7]:
class SegBlock(BlockBase):
    '''
    Base class of subStructures as defined in GAMSEGS.csv and CRESEGS.csv
    Used for dummy subStructures that will not be modified
    '''
    
    def __init__(self
                ,savRef: object
                ,parentRef: object
                ,pbuffer: bytearray
                ,name: str
                ,countLoc: int  #offset pointer to buffer
                ,offsetLoc: int  #count pointer to buffer
                ,sizeLoc: int
                ,sizeValue: Optional[int] = None
                ,countValue: Optional[int] = None
                ,offsetValue: Optional[int] = None):  #only the header segment has no offset value
        self.savRef = savRef
        self.parentRef = parentRef
        self.resourceDir = savRef.resourceDir
        self.name = name
        self.pbuffer = pbuffer
        self.sizeValue = self.to_int(sizeValue)
        self.countValue = self.to_int(countValue)
        self.offsetValue = self.to_int(offsetValue)
        self.offsetBlock = ValueBlock(self.pbuffer, 'OFFSET', self.to_int(offsetLoc), 4)
        self.countBlock = ValueBlock(self.pbuffer, 'COUNT', self.to_int(countLoc), 4)
        self.sizeBlock = ValueBlock(self.pbuffer, 'SIZE', self.to_int(sizeLoc), 4)
        self.buffer = self.pbuffer[self.offsetValue: self.offsetValue + self.sizeValue]
        self.previous = None

            
    def pack(self):
        self.sizeValue = len(self.buffer)
        if self.previous is not None:
            self.offsetValue = self.previous.offsetValue + self.previous.sizeValue
        return self.buffer
    

In [8]:
class RecordsBlock(SegBlock):
    '''
    Parse subStructures in CRE files that's composed of a list of records
    including spell, item, effect, and item
    '''
    
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.countValue = self.countBlock.value
        self.df = self.Pattern.parse(self.buffer)
        
    @property
    def display(self):
        return self.Pattern.infer(self.df)
        
    def __repr__(self):
        display(self.display)
        return f'{self.__class__} {self.name}'
    
    def add_from_df(self, values:pd.DataFrame, repeatCol=-1, deduplicate_by:List=[]):
        df = self.Pattern.make_records(values, repeatCol=repeatCol)
        if deduplicate_by:
            self.remove({k:v for k, v in df.drop_duplicates().items() if k in deduplicate_by})
        self.df = pd.concat([self.df, df]).reset_index(drop=True)
        self.post_op()
        
    
    def add(self, values:Dict, repeats:Union[int, Tuple, List, np.ndarray]=1, deduplicate_by:List=[]):
        '''
        If duplicated records are not allowed - such as in the case of knownspells, use the deduplicate_by parameter to find existing records having same values in these columns
        remove those record before adding new ones
        '''
        values = self.Pattern.make_recordIndex_from_dict(values, repeats)
        self.add_from_df(values, deduplicate_by=deduplicate_by)
        
        
    def remove(self, values:Dict, remove_n:Optional[Union[int, Tuple, List, np.ndarray]]=None):
        '''
        Remove the first n records that matchs the values, if set to None, remove all
        '''
        iMap = self.Pattern.make_recordIndex_from_dict(values, remove_n)  # sequnces of cols are ensured to be aligned with Pattern segments in the called function
        group = self.df.groupby(iMap.columns[:-1].tolist())
        iMap = iMap.set_index([_ for _ in iMap.columns if _ != 'REPEATS']).to_dict()['REPEATS']
        self.df = group.apply(lambda x: x.head(0 if iMap.get(x.name, 0) is None else max(0, x.shape[0] - iMap.get(x.name, 0)))).reset_index(drop=True)
        self.post_op()
        
        
    def assign(self, values:pd.DataFrame, repeatCol=-1):
        self.df = self.Pattern.make_records(values, repeatCol=repeatCol)
        self.post_op()
        
    
    def post_op(self):
        '''
        Defines the actions after record df is modified, such as reset_index
        '''
        pass
        

    def pack(self):
        self.countValue = self.df.shape[0]
        return super().pack()

In [9]:
class Party(SegBlock):
    
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.CREIndexLength, self.CREOffsetLoc, self.CRESizeLoc = self.get_CRE_locs()
        self.parse_party()
        
    def get_CRE_locs(self):
        loc_df = self.savRef.creLocs
        CREIndexLen = self.to_int(loc_df[loc_df['SubSegs']=='Header']['SizeValue'].iloc[0])
        CREOffsetLoc, CRESizeLoc =  loc_df[loc_df['SubSegs']=='CRE'][['OffsetLoc', 'SizeLoc']].iloc[0].apply(self.to_int)
        return CREIndexLen, CREOffsetLoc, CRESizeLoc
    
    def parse_party(self):
        self.indexHeader = self.buffer[: self.CREIndexLength * self.countBlock.value]
        self.CREOffsetBlocks = [ValueBlock(self.buffer, 'CREOFFSET', self.CREOffsetLoc + self.CREIndexLength * i, 4) for i in range(self.countBlock.value)]
        self.CRESizeBlocks = [ValueBlock(self.buffer, 'CRESIZE', self.CRESizeLoc + self.CREIndexLength * i, 4) for i in range(self.countBlock.value)]
        vRecords = self.savRef.creValues
        tRecords = self.savRef.creTables
        sRecords = self.savRef.creSegs
        self.CRES = [CRE(self.savRef
                        ,self
                        ,self.pbuffer[o.value: o.value + s.value]
                        ,vRecords
                        ,tRecords
                        ,sRecords
                        ) for o, s in zip(self.CREOffsetBlocks, self.CRESizeBlocks)]
        
    def pack(self):
        [_.pack() for _ in self.CRES]
        self.buffer = bytearray(b''.join([self.indexHeader, *[_.buffer for _ in self.CRES]]))
        self.sizeValue = len(self.buffer)
        self.offsetValue = self.previous.offsetValue + self.previous.sizeValue
        creSizes = np.array([_.size for _ in self.CRES])
        creOffsets = np.concatenate([[0], creSizes]).cumsum()[:-1] + len(self.indexHeader) + self.offsetValue
        for o, s, ob, sb in zip(creOffsets, creSizes, self.CREOffsetBlocks, self.CRESizeBlocks):
            ob.bind_buffer(self.buffer).set_value(int(o))
            sb.bind_buffer(self.buffer).set_value(int(s))
        return self.buffer

In [10]:
class Globals(RecordsBlock):
    
    Pattern = RecordBase(segList=[('NAME', 32, str)
                                 ,('TYPE', 2, int)
                                 ,('REF', 2, int)
                                 ,('DWORD', 4, int)
                                 ,('INT', 4, int)
                                 ,('DOUBEL', 8, int)
                                 ,('SCRIPT', 32, str)])
    
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)


In [11]:
class KnownSpells(RecordsBlock):
    
    Pattern = RecordBase(segList=[('SPELL', 8, 'STRNAME')
                                 ,('LEVEL', 2, int)
                                 ,('TYPE', 2, int)])
    
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.Pattern.nameMap = self.savRef.SPELL


In [12]:
class SpellMemorization(RecordsBlock):
    
    Pattern = RecordBase(segList=[('LEVEL', 2, int)
                                 ,('BASECOUNT', 2, int)
                                 ,('EFFCOUNT', 2, int)
                                 ,('TYPE', 2, int)
                                 ,('INDEX', 4, int)
                                 ,('COUNTMEM', 4, int)])
    
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        
            
    def post_op(self):
        self.df.sort_values(['TYPE', 'LEVEL'], inplace=True)
        
    def update_mem(self, idf:pd.DataFrame):
        '''
        Update SpellMeorization info using he memorized spells. All memorization slot numbers matching the number of memorized spells (c.a. assuming all slots memorized)
        '''
        idf = pd.concat([self.inverse_infer_col(col) for name, col in idf.iteritems()], axis=1)
        self.df = pd.merge(self.df.drop('COUNTMEM', axis=1), idf, how='left', left_on=['LEVEL', 'TYPE'], right_on=['LEVEL', 'TYPE'])
        self.df.fillna(value={'INDEX': self.df['INDEX'].ffill().bfill()
                              ,'COUNTMEM': b'\x00\x00\x00\x00'}, inplace=True)
        self.df['BASECOUNT'] = self.df['COUNTMEM']
        self.df['EFFCOUNT'] = self.df['COUNTMEM']


In [13]:
class MemorizedSpells(RecordsBlock):
    
    TypeMap = {b'SPWI': 1
              ,b'SPCL': 3
              ,b'SPIN': 2
              ,b'SPPR': 0
              ,b'SPSD': 4}
    
    Pattern = RecordBase(segList=[('SPELL', 8,'STRNAME')
                                 ,('COUNTMEM', 4, int)])
    
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.Pattern.nameMap = self.savRef.SPELL
        
    def post_op(self):
        self.df.insert(0, 'TYPE', self.df['SPELL'].apply(lambda x: self.TypeMap.get(x[:4])))
        self.df.insert(0, 'LEVEL', self.df['SPELL'].apply(lambda x: int(x[4:5])))
        self.df = self.df.sort_values(['TYPE', 'LEVEL'])
        idf = self.df.groupby(['TYPE', 'LEVEL'])['COUNTMEM'].count().reset_index()
        idf.insert(idf.shape[1] - 1, 'INDEX', idf['COUNTMEM'].shift(fill_value=0).cumsum())
        self.df = self.df.drop(['TYPE', 'LEVEL'], axis=1)
        self.parentRef.SpellMemorization.update_mem(idf)


In [14]:
class Effects(RecordsBlock):
    Pattern = RecordBase(segList=[('NULL', 8, int)
                                 ,('OPCODE', 4, 'INTNAME')
                                 ,('TARGET', 4, int)
                                 ,('POWER', 4, int)
                                 ,('PARAM1', 4, int)
                                 ,('PARAM2', 4, int)
                                 ,('TIME', 4, int)
                                 ,('OTHER', 232, None)]
                         ,template=bytearray([0, 0, 0, 0, 0, 0, 0, 0, 233, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 95, 
                                              0, 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 100, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
                                              0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
                                              0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
                                              0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255, 255, 255, 255, 255, 
                                              255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 0, 0, 0, 0, 0, 0, 0, 0, 
                                              0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255, 255, 255, 255, 0, 0, 0, 0, 0, 0, 0, 0, 
                                              0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
                                              0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
                                              0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
                                              0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]))
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.Pattern.nameMap = self.savRef.EFFECT


In [15]:
class Items(RecordsBlock):
    
    Pattern = RecordBase(segList=[('NAME', 8, 'STRNAME')
                                 ,('EXPIRE', 1, int)
                                 ,('ELAPSED', 1, int)
                                 ,('QUALITY1', 2, int)
                                 ,('QUALITY2', 2, int)
                                 ,('QUALITY3', 2, int)
                                 ,('IDENTIFIED', 1, int)
                                 ,('UNSTEALABLE', 1, int)
                                 ,('STOLEN', 1, int)
                                 ,('UNDROPPABLE', 1, int)]
                         ,template=b'CLCK30\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x01\x00\x00\x00')
    
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.Pattern.nameMap = self.savRef.ITEM
        
    def post_op(self):
        self.parentRef.ItemSlots.place(self.Pattern.infer_col(self.df['NAME']))

In [16]:
class ItemSlots(SegBlock):
    
    
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.slots = {k: v.value.copy() for k, v in getattr(self.savRef, self.parentRef.parentRef.name.upper() + 'SLOTS').__members__.items()}
        
    def place(self, itms:pd.Series):
        fill = [-1] * (max([max(_) for _ in self.slots.values()]) + 1)
        bins = [_ for _ in self.slots.keys() if _ != 'NONE']
        for i, itm in enumerate(itms):
            slot = getattr(self.savRef.ITEMCONTAINER, itm)
            for cname in [slot, 'MISC', *bins]:
                c = self.slots.get(cname)
                if len(c) > 0:
                    fill[c.pop()] = i
                    break
        self.buffer = bytearray(b''.join([self.num2Bytes(_, length=2, signed=True) for _ in fill]))
        
            

In [17]:
class GamCreBase(ABC, BlockBase):
    
    SEGMAP = {"KnownSpells": KnownSpells
             ,"SpellMemorization": SpellMemorization
             ,"MemorizedSpells": MemorizedSpells
             ,"Effects": Effects
             ,"Items": Items
             ,"ItemSlots": ItemSlots
             ,"Party": Party
             ,"NPC": Party
             ,"Global": Globals}
        
        
    def init_seg_size(self):
        df = self.segRecords.copy()
        df.insert(df.shape[1]
                 ,'OffsetValue'
                 ,self.segRecords['OffsetLoc'].apply(lambda x: np.nan if pd.isnull(x) else self.bytes2Num(self.buffer[self.to_int(x)
                                                                                                                     :self.to_int(x)+4])))
        df1 = df[df['OffsetValue']!=0].fillna(value={'OffsetValue': 0})
        df2 = df[df['OffsetValue']==0]
        df1['SizeValue'] = df1['OffsetValue'].shift(-1, fill_value=self.size) - df1['OffsetValue']
        self.segRecords = pd.concat([df1, df2]).fillna(value={'SizeValue': 0}).sort_index()
        
    
    def init_values(self, buffer:Optional[bytearray]=None): 
        self.VALUES = pd.Series([], dtype=object)
        if not self.valueRecords.empty:
            if buffer == None:
                buffer = self.SEGS[0].buffer
            self.VALUES = self.valueRecords.apply(lambda x: ValueBlock(buffer, *x), axis=1)
            for i in range(self.VALUES.size):
                setattr(self, self.VALUES[i].name, self.VALUES[i])
            
            # self.valueRecords.apply(lambda x: setattr(self, x[0], ValueBlock(buffer, *x[1:])),axis=1)
            # self.VALUES = make_dataclass('VALUES', self.valueRecords.apply(lambda x: (x[0]
            #                                                                          ,ValueBlock
            #                                                                          ,ValueBlock(buffer, *x[1:]))
            #                                                                ,axis=1))
            
            
    def init_tables(self, buffer:Optional[bytearray]=None):
        self.TABLEVALUES = pd.Series([], dtype=object)
        if not self.tableRecords.empty:
            if buffer == None:
                buffer = self.buffer
            self.TABLEVALUES = self.tableRecords.apply(lambda x: TableBlock(buffer, *x, getattr(self.savRef, x['RefTable'])) ,axis=1)
            for i in range(self.TABLEVALUES.size):
                setattr(self, self.TABLEVALUES[i].name, self.TABLEVALUES[i])
            # self.TABLES = make_dataclass('TABLES', self.tableRecords.apply(lambda x: (x[0]
            #                                                                          ,TableBlock
            #                                                                          ,TableBlock(buffer
            #                                                                                      ,*x[1:]
            #                                                                                      ,getattr(self.savRef, x['RefTable'])))
            #                                                                ,axis=1))
                                                                           
            
    def init_segs(self, buffer:Optional[bytearray]=None):
        '''
        Need to keep sequence order
        So use pd.Series instead of dataclass
        '''
        self.SEGS = pd.Series([], dtype=object)
        if not self.segRecords.empty:
            if buffer == None:
                buffer = self.buffer
            self.SEGS = (self.segRecords
                         .set_index(self.segRecords.columns[0])
                         .apply(lambda x: self.SEGMAP.get(x.name, SegBlock)(self.savRef, self, buffer, x.name, *x), axis=1))
            for i in range(1, self.SEGS.size):
                self.SEGS[i].previous = self.SEGS[i-1]
                setattr(self, self.SEGS[i].name, self.SEGS[i])
            
            
    def pack(self):
        self.buffer = bytearray(b''.join([_.pack() for _ in self.SEGS]))
        self.size = len(self.buffer)
        for seg in self.SEGS:
            seg.offsetBlock.bind_buffer(self.buffer).set_value(seg.offsetValue)
            seg.countBlock.bind_buffer(self.buffer).set_value(seg.countValue)
            seg.sizeBlock.bind_buffer(self.buffer).set_value(seg.sizeValue)
        return self.buffer
    

In [18]:
class GAM(GamCreBase):
    
    def __init__(self
                 ,savRef: object
                 ,buffer: bytearray):
        self.savRef = savRef
        self.resourceDir = savRef.resourceDir
        self.buffer = buffer
        self.size = len(self.buffer)
        self.valueRecords = savRef.gamValues
        self.segRecords = savRef.gamSegs
        self.VALUES = make_dataclass('SEGS', [])
        self.SEGS = pd.Series([], dtype=object)
        self.init_seg_size()
        self.init_segs()
        self.init_values()
        


In [19]:
class CRE(GamCreBase):
    
    def __init__(self
                ,savRef:object
                ,parentRef:object
                ,buffer: bytearray
                ,valueRecords: Optional[pd.DataFrame] = None
                ,tableRecords: Optional[pd.DataFrame] = None
                ,segRecords: Optional[pd.DataFrame] = None
                ):
        self.savRef = savRef
        self.parentRef = parentRef
        self.buffer = buffer
        self.size = len(self.buffer)
        if valueRecords is None:
            self.valueRecords = pd.read_csv(os.path.join(self.savRef.resourceDir, 'CREVALUES.csv'), index_col=0)
        else:
            self.valueRecords = valueRecords.copy()
        if tableRecords is None:
            self.tableRecords = pd.read_csv(os.path.join(self.savRef.resourceDir, 'CRETABLES.csv'), index_col=0)
        else:
            self.tableRecords = tableRecords.copy()
        if segRecords is None:
            self.segRecords = pd.read_csv(os.path.join(self.savRef.resourceDir, 'CRESEGS.csv'), index_col=0)
        else:
            self.segRecords = segRecords.copy()
        self.VALUES = make_dataclass('SEGS', [])
        self.TABLES = make_dataclass('SEGS', [])
        self.SEGS = pd.Series([], dtype=object)
        self.init_seg_size()
        self.init_segs() 
        self.init_values()
        self.init_tables()
             
        

In [20]:
class Sav(GamCreBase):
    
    def __init__(self, savefile:str, game:str='BGEE'):   # for other games, RACE, KIT, STATES, CLASS may need to copy from BGEE since the list is more comprehensive
        self.savefile = os.path.join(PKG_ROOT, 'saves', 'original', savefile)
        self.modified = os.path.join(PKG_ROOT, 'saves', 'modified', 'Edited_'+savefile)
        self.resourceDir = os.path.join(PKG_ROOT, 'resources', game)
        self.zipfile = zipfile.ZipFile(self.savefile)
        self.filelist = self.zipfile.namelist()
        self.files = [self.zipfile.read(_) for _ in self.filelist]
        self.gamStr = self.files[1].decode('latin')
        self.gamBuffer = bytearray(self.files[1])
        self.gamVersion = re.findall(r'GAME\s*V\d+\.\d+', self.gamStr)[0]  #always 'GAMEV2.0'
        self.creVersion = re.findall(r'CRE\s*V\d+\.\d+', self.gamStr)[0]  #always 'CRE V1.0'
        self.load_dfs()
        self.make_names()
        self.GAM = GAM(savRef=self, buffer=self.gamBuffer)
        self.Party = self.GAM.SEGS.Party.CRES
        self.NPC = self.GAM.SEGS.NPC.CRES
        
        
    def load_dfs(self):
        self.gamValues = pd.read_csv(os.path.join(self.resourceDir, 'GAMVALUES.csv'), index_col=0)
        self.gamSegs = pd.read_csv(os.path.join(self.resourceDir, 'GAMSEGS.csv'), index_col=0)
        self.creValues = pd.read_csv(os.path.join(self.resourceDir, 'CREVALUES.csv'), index_col=0)
        self.creTables = pd.read_csv(os.path.join(self.resourceDir, 'CRETABLES.csv'), index_col=0)
        self.creSegs = pd.read_csv(os.path.join(self.resourceDir, 'CRESEGS.csv'), index_col=0)
        self.creLocs = pd.read_csv(os.path.join(self.resourceDir, 'CRELOC.csv'), index_col=0)
    
    def make_names(self):
        for itm in ['ITEM', 'SPELL', 'EFFECT']:
            f = pd.read_csv(os.path.join(self.resourceDir, itm + '.csv'), index_col=0)
            setattr(self, itm+'CODES', IDS.make_names(itm+'CODES', f))
            setattr(self, itm, IDS.make_enum(itm, f, nameCol=-1, valueCol=-2))
            if itm == 'ITEM':
                setattr(self, itm+'CONTAINER', IDS.make_enum(itm+'CONTAINER', f[['ItemSlot', 'Item']]))
            #setattr(self, itm, IDS.make_enum(itm, IDS.clean_duplicates(f), nameCol=-1, valueCol=-2))
        for itm in ['WEAPON', *self.creTables['RefTable'].drop_duplicates()]: #Race, Kit, Class, Gender, Alignment
            setattr(self, itm, IDS.make_enum(itm, pd.read_csv(os.path.join(self.resourceDir, itm + '.csv'), index_col=0)))
        for itm in ['PARTYSLOTS', 'NPCSLOTS']:
            f = pd.read_csv(os.path.join(self.resourceDir, itm + '.csv'), index_col=0)
            f = f.groupby(f.columns[0]).apply(lambda x: x.index.to_list()).to_frame().reset_index()
            setattr(self, itm, IDS.make_enum(itm, f, nameCol=0))
               
            
    def pack(self, filename:str=None):
        if filename is None:
            filename = self.modified
        self.zipfile.close()
        with zipfile.ZipFile(filename, 'w') as target:
            target.writestr(self.filelist[0], self.files[0], compress_type=zipfile.ZIP_STORED)
            target.writestr(self.filelist[1], self.GAM.pack(), compress_type=zipfile.ZIP_STORED)
            for _name, _f in zip(self.filelist[2:], self.files[2:]):
                target.writestr(_name, _f, compress_type=zipfile.ZIP_STORED)
                
                
    def optimize(self):
        [_.set_value(_.optimize) for _ in self.GAM.VALUES]
        [_.set_value(_.optimize) for c in self.Party for _ in c.VALUES]
            
        

In [21]:
s2 = Sav('save2.bg2save', 'BGEE')
s1 = Sav('Testsave.bg1save', 'BGEE')

In [29]:
def getspell(b):
    s = re.search(r'[A-Z]{4}\d{3}', b.decode('latin'))
    if s:
        return s1.SPELL(s.group()).name
    else:
        return b

In [53]:
df = pd.concat([_.Effects.display for _ in s2.Party[1:]])

In [54]:
df['SPELL'] = df["OTHER"].apply(getspell)

In [68]:
df = df[df.SPELL.apply(lambda x:isinstance(x, str))].drop_duplicates()

In [63]:
df0 = pd.read_csv('/Users/CIDER/工作/Python程序/游戏/博德之门/New/resources/BGEE/OPTIM_SPELLMEM.csv', index_col=0)

In [69]:
df1 = df[df.SPELL.isin(df0.SPELL)]
df2 = df[~df.SPELL.isin(df0.SPELL)]

In [74]:
df3 = df1[df1.SPELL.apply(lambda x: x.split('_')[0] in ['CLERIC', 'WIZARD'])]
df4 = df1[df1.SPELL.apply(lambda x: x.split('_')[0] not in ['CLERIC', 'WIZARD'])]

In [76]:
df4

Unnamed: 0,NULL,OPCODE,TARGET,POWER,PARAM1,PARAM2,TIME,OTHER,SPELL
10,0,High_Level_Ability_Denotation,0,0,0,0,9,b'\x00\x00\x00\x00d\x00\x00\x00SPCL901\x00\x00...,WARRIOR_GREATER_WHIRLWIND
12,0,High_Level_Ability_Denotation,0,0,0,0,9,b'\x00\x00\x00\x00d\x00\x00\x00SPCL903\x00\x00...,WARRIOR_GREATER_DEATHBLOW
13,0,High_Level_Ability_Denotation,0,0,0,0,9,b'\x00\x00\x00\x00d\x00\x00\x00SPCL906\x00\x00...,WARRIOR_POWER_ATTACK
14,0,High_Level_Ability_Denotation,0,0,0,0,9,b'\x00\x00\x00\x00d\x00\x00\x00SPCL905\x00\x00...,WARRIOR_CRITICAL_STRIKE
15,0,High_Level_Ability_Denotation,0,0,0,0,9,b'\x00\x00\x00\x00d\x00\x00\x00SPCL909\x00\x00...,WARRIOR_SMITE
17,0,High_Level_Ability_Denotation,0,0,0,0,9,b'\x00\x00\x00\x00d\x00\x00\x00SPCL922\x00\x00...,RANGER_TRACKING
28,0,High_Level_Ability_Denotation,0,0,0,0,9,b'\x00\x00\x00\x00d\x00\x00\x00SPCL904\x00\x00...,WARRIOR_RESIST_MAGIC
15,0,High_Level_Ability_Denotation,0,0,0,0,9,b'\x00\x00\x00\x00d\x00\x00\x00SPCL910\x00\x00...,ROGUE_SET_SPIKE_TRAP
16,0,High_Level_Ability_Denotation,0,0,0,0,9,b'\x00\x00\x00\x00d\x00\x00\x00SPCL911\x00\x00...,ROGUE_SET_EXPLODING_TRAP
17,0,High_Level_Ability_Denotation,0,0,0,0,9,b'\x00\x00\x00\x00d\x00\x00\x00SPCL912\x00\x00...,ROGUE_SET_TIME_TRAP


In [102]:
df5 = df2[df2.OPCODE.isin(['High_Level_Ability_Denotation', 'Item_Can_Use_Any_Item'])].reset_index(drop=True)
df5 = pd.concat([df5.drop([0, 1,2,10,11,17]), df3])
df5.drop([13,14],inplace=True)

In [103]:
df5

Unnamed: 0,NULL,OPCODE,TARGET,POWER,PARAM1,PARAM2,TIME,OTHER,SPELL
3,0,High_Level_Ability_Denotation,0,0,0,0,9,b'\x00\x00\x00\x00d\x00\x00\x00SPPR721\x00\x00...,CLERIC_ENERGY_BLADES
4,0,High_Level_Ability_Denotation,0,0,0,0,9,b'\x00\x00\x00\x00d\x00\x00\x00SPPR723\x00\x00...,CLERIC_ELEMENTAL_SWARM
5,0,High_Level_Ability_Denotation,0,0,0,0,9,b'\x00\x00\x00\x00d\x00\x00\x00SPPR725\x00\x00...,CLERIC_GLOBE_OF_BLADES
6,0,High_Level_Ability_Denotation,0,0,0,0,9,b'\x00\x00\x00\x00d\x00\x00\x00SPPR726\x00\x00...,CLERIC_SUMMON_DEVA
7,0,High_Level_Ability_Denotation,0,0,0,0,9,b'\x00\x00\x00\x00d\x00\x00\x00SPPR730\x00\x00...,CLERIC_AURA_OF_FLAMING_DEATH
8,0,Item_Can_Use_Any_Item,1,0,0,1,9,b'\x00\x00\x00\x00d\x00\x00\x00\x00\x00\x00\x0...,THIEF_USE_ANY_ITEM
9,0,High_Level_Ability_Denotation,0,0,0,0,9,b'\x00\x00\x00\x00d\x00\x00\x00SPCL915\x00\x00...,THIEF_USE_ANY_ITEM
12,0,High_Level_Ability_Denotation,0,0,0,0,9,b'\x00\x00\x00\x00d\x00\x00\x00SPCL920\x00\x00...,BARD_ENHANCED_BARD_SONG
15,0,High_Level_Ability_Denotation,0,0,0,0,9,b'\x00\x00\x00\x00d\x00\x00\x00SPWI920\x00\x00...,WIZARD_ENERGY_BLADES
16,0,High_Level_Ability_Denotation,0,0,0,0,9,b'\x00\x00\x00\x00d\x00\x00\x00SPWI922\x00\x00...,WIZARD_DRAGONS_BREATH


In [104]:
df4['REPEAT']=20
df5['REPEAT']=1

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df4['REPEAT']=20


In [109]:
df6.insert(0, 'CLASS', df6.SPELL.apply(lambda x: 'ALL' if x.split('_')[0]=='THIEF' else 'THIEF' if x.split('_')[0]=='ROGUE' else x.split('_')[0]))

In [112]:
df6.sort_values(['CLASS', 'OPCODE']).reset_index(drop=True).to_csv('/Users/CIDER/工作/Python程序/游戏/博德之门/New/resources/BGEE/OPTIM_EFFECT.csv')

In [115]:
df6.sort_values(['CLASS', 'OPCODE']).reset_index(drop=True)

Unnamed: 0,CLASS,NULL,OPCODE,TARGET,POWER,PARAM1,PARAM2,TIME,OTHER,SPELL,REPEAT
0,ALL,0,High_Level_Ability_Denotation,0,0,0,0,9,b'\x00\x00\x00\x00d\x00\x00\x00SPCL918\x00\x00...,THIEF_ALCHEMY,20
1,ALL,0,High_Level_Ability_Denotation,0,0,0,0,9,b'\x00\x00\x00\x00d\x00\x00\x00SPCL919\x00\x00...,THIEF_SCRIBE_SCROLLS,20
2,ALL,0,High_Level_Ability_Denotation,0,0,0,0,9,b'\x00\x00\x00\x00d\x00\x00\x00SPCL915\x00\x00...,THIEF_USE_ANY_ITEM,1
3,ALL,0,Item_Can_Use_Any_Item,1,0,0,1,9,b'\x00\x00\x00\x00d\x00\x00\x00\x00\x00\x00\x0...,THIEF_USE_ANY_ITEM,1
4,BARD,0,High_Level_Ability_Denotation,0,0,0,0,9,b'\x00\x00\x00\x00d\x00\x00\x00SPCL921\x00\x00...,BARD_MAGIC_FLUTE,20
5,BARD,0,High_Level_Ability_Denotation,0,0,0,0,9,b'\x00\x00\x00\x00d\x00\x00\x00SPCL920\x00\x00...,BARD_ENHANCED_BARD_SONG,1
6,CLERIC,0,High_Level_Ability_Denotation,0,0,0,0,9,b'\x00\x00\x00\x00d\x00\x00\x00SPPR721\x00\x00...,CLERIC_ENERGY_BLADES,1
7,CLERIC,0,High_Level_Ability_Denotation,0,0,0,0,9,b'\x00\x00\x00\x00d\x00\x00\x00SPPR723\x00\x00...,CLERIC_ELEMENTAL_SWARM,1
8,CLERIC,0,High_Level_Ability_Denotation,0,0,0,0,9,b'\x00\x00\x00\x00d\x00\x00\x00SPPR725\x00\x00...,CLERIC_GLOBE_OF_BLADES,1
9,CLERIC,0,High_Level_Ability_Denotation,0,0,0,0,9,b'\x00\x00\x00\x00d\x00\x00\x00SPPR726\x00\x00...,CLERIC_SUMMON_DEVA,1


In [128]:
df7 = s1.Party[0].Effects

In [142]:
df7

Unnamed: 0,CLASS,NULL,OPCODE,TARGET,POWER,PARAM1,PARAM2,TIME,OTHER,REPEAT
0,ALL,0,Stat_Proficiency_Modifier,0,0,2,113,9,b'\x00\x00\x00\x00d\x00\x00\x00\x00\x00\x00\x0...,1
1,ALL,0,Stat_Proficiency_Modifier,0,0,2,112,9,b'\x00\x00\x00\x00d\x00\x00\x00\x00\x00\x00\x0...,1
2,ALL,0,Stat_Proficiency_Modifier,0,0,2,111,9,b'\x00\x00\x00\x00d\x00\x00\x00\x00\x00\x00\x0...,1
3,ALL,0,Stat_Proficiency_Modifier,0,0,5,107,9,b'\x00\x00\x00\x00d\x00\x00\x00\x00\x00\x00\x0...,1
4,ALL,0,Stat_Proficiency_Modifier,0,0,5,106,9,b'\x00\x00\x00\x00d\x00\x00\x00\x00\x00\x00\x0...,1
5,ALL,0,Stat_Proficiency_Modifier,0,0,5,105,9,b'\x00\x00\x00\x00d\x00\x00\x00\x00\x00\x00\x0...,1
6,ALL,0,Stat_Proficiency_Modifier,0,0,5,104,9,b'\x00\x00\x00\x00d\x00\x00\x00\x00\x00\x00\x0...,1
7,ALL,0,Stat_Proficiency_Modifier,0,0,5,103,9,b'\x00\x00\x00\x00d\x00\x00\x00\x00\x00\x00\x0...,1
8,ALL,0,Stat_Proficiency_Modifier,0,0,5,102,9,b'\x00\x00\x00\x00d\x00\x00\x00\x00\x00\x00\x0...,1
9,ALL,0,Stat_Proficiency_Modifier,0,0,5,99,9,b'\x00\x00\x00\x00d\x00\x00\x00\x00\x00\x00\x0...,1


In [145]:
pd.concat([df6, df7]).sort_values(['CLASS', 'OPCODE', 'PARAM2']).reset_index(drop=True).to_csv('/Users/CIDER/工作/Python程序/游戏/博德之门/New/resources/BGEE/OPTIM_EFFECT.csv')

In [149]:
df8 = pd.concat([df6, df7]).sort_values(['CLASS', 'OPCODE', 'PARAM2']).drop('SPELL', axis=1).reset_index(drop=True)

In [155]:
df8[df8['CLASS'].isin(['ALL', 'THIEF'])].drop('CLASS', axis=1).reset_index(drop=True)

Unnamed: 0,NULL,OPCODE,TARGET,POWER,PARAM1,PARAM2,TIME,OTHER,REPEAT
0,0,High_Level_Ability_Denotation,0,0,0,0,9,b'\x00\x00\x00\x00d\x00\x00\x00SPCL918\x00\x00...,20
1,0,High_Level_Ability_Denotation,0,0,0,0,9,b'\x00\x00\x00\x00d\x00\x00\x00SPCL919\x00\x00...,20
2,0,High_Level_Ability_Denotation,0,0,0,0,9,b'\x00\x00\x00\x00d\x00\x00\x00SPCL915\x00\x00...,1
3,0,Item_Can_Use_Any_Item,1,0,0,1,9,b'\x00\x00\x00\x00d\x00\x00\x00\x00\x00\x00\x0...,1
4,0,Stat_Proficiency_Modifier,0,0,5,89,9,b'\x00\x00\x00\x00d\x00\x00\x00\x00\x00\x00\x0...,1
5,0,Stat_Proficiency_Modifier,0,0,5,90,9,b'\x00\x00\x00\x00d\x00\x00\x00\x00\x00\x00\x0...,1
6,0,Stat_Proficiency_Modifier,0,0,5,91,9,b'\x00\x00\x00\x00d\x00\x00\x00\x00\x00\x00\x0...,1
7,0,Stat_Proficiency_Modifier,0,0,5,92,9,b'\x00\x00\x00\x00d\x00\x00\x00\x00\x00\x00\x0...,1
8,0,Stat_Proficiency_Modifier,0,0,5,93,9,b'\x00\x00\x00\x00d\x00\x00\x00\x00\x00\x00\x0...,1
9,0,Stat_Proficiency_Modifier,0,0,5,94,9,b'\x00\x00\x00\x00d\x00\x00\x00\x00\x00\x00\x0...,1
