# lindworm storage folder on videos

customization of `ldmStorageFolder` to extract video meta data and store information
in file (json/excel).

configuration

+ `sSrcDN` folder to walk
+ `sBldDN` folder for output files
+ `sBldFN` json output file, `ldmStorageFolder` result enriched by meta data
+ `sBldFlatFN` json output file, flat result of `sBldFN` 
+ `sXlsFN` excel output file similar to flat result with additional video sheet
+ `sCfgFN` file status calculation for `ldmStorageFolder`
+ `iShaMB` file size to calculate sha fingerprint 
  + `-1`:all
  + `0` no fingerprint
  + >`0` MB of data to use for sha fingerprint calculation  
+ `iVerbose` verbose level, higher values more details (10,50 used here)



In [162]:
import lindworm
import lindworm.logUtil as ldmLg
import lindworm.ldmStorageFolder as ldmSrgFdr
from lindworm.ldmOS import getSha

In [163]:
sSrcDN='/wrk/dat/tmp/toEncode/20230710/'
sBldDN='./x_out/'
sBldFN='64l_0-stp-os.json'
sBldFlatFN='64l_0-stp-os-0.json'
sXlsFN='64l_0-stp-os.xlsx'
sCfgFN='./x_cfg/64l_0-stp-os.json'
iShaMB=0
iVerbose=50

In [164]:
import os
import traceback
import ffmpeg
import pandas


In [165]:
import lindworm.logUtil as logUtil
logUtil.logInit("./x_log/64l_0-stp-os.log",iLevel=0,sLogger=None)

check [log](./x_log/64l_0-stp-os.log)

In [166]:
lindworm.logUtil.logDbg('test')

In [167]:
ldmLg.logDbg('test %r',[30,40])

In [168]:
if 0:
    ldmSrgFdr.execMain(sSrcDN=sSrcDN,
                   sBldDN=sBldDN,sBldFN=sBldFN,
                   iShaMB=iShaMB,
                   sCfgFN=sCfgFN,
                   iVerbose=iVerbose)

get meta data from video file  
return as dictionary 

In [169]:
def getMetaDataMP4(sFN,sDN=None,oLog=None,iVerbose=0):
    """get SHA limited
    
    Args:
        sFN (str): file name
        iMB (int , optional): max size to read
                    - <=0 all data
                    - >0 up to maximum in MB
        iBlk (int , optional): block size
        sDN (str , optional): directory name
        oLog (ldmUtilLog , optional): logging object
        iVerbose (int , optional): verbose level
    Returns:
        return code
            - >0 : okay content read
            - =0 : okay no data
            - <0 : error
    """
    try:
        dMetaData={}
        if sDN is not None:
            sFullFN=os.path.join(sDN,sFN)
        else:
            sFullFN=sFN
        if oLog is None:
            iVerbose=0
        if os.path.exists(sFullFN):
            if iVerbose>0:
                oLog.debug('sFullFN:%s check with ffmpeg'%(sFullFN))
            oMetaData=ffmpeg.probe(sFullFN)
            if iVerbose>10:
                oLog.debug('meta data:%r',oMetaData)
                #oLog.debug('streams:%r',oMetaData['streams'])
            iFoundVideo=0
            for dStream in oMetaData['streams']:
                if iVerbose>10:
                    oLog.debug('stream:%r',dStream)
                if 'codec_type' in dStream:
                    if dStream['codec_type']=='video':
                        # +++ beg
                        for sK,sKey in [
                                    ('codec_name','codec_name'),
                                    ('width','width'),
                                    ('height','height'),
                                    ('duration','duration'),
                                    ('frame_rate','r_frame_rate')
                                    ]:
                            sVal=dStream.get(sKey,'')
                            if sK in ['duration']:
                                try:
                                    dMetaData[sK]=float(sVal)
                                except:
                                    if oLog is not None:
                                        oLog.error(traceback.format_exc())
                                    dMetaData[sK]=sVal
                            else:
                                dMetaData[sK]=sVal
                        dTags=oMetaData['streams'][0]['tags']
                        if iVerbose>10:
                            oLog.debug('dTags:%r',dTags)
                        for sK,sKey in [
                                    ('creation_time','creation_time'),
                                    ]:
                            dMetaData[sK]=dTags.get(sKey,'')
                    iFoundVideo=1
                    break
            if iFoundVideo>0:
                sLbl=' '.join([
                                dMetaData['creation_time'][:10],
                                dMetaData['creation_time'][11:16],
                                sFN[:-4]
                            ])
                dMetaData['lbl']=sLbl
        if iVerbose>10:
            oLog.debug('dMetaData:%r',dMetaData)
        return dMetaData
    except:
        if oLog is not None:
            oLog.error(traceback.format_exc())
        dMetaData['_exception']='???!!!'
        return dMetaData


In [170]:
#ffmpeg.probe('/wrk/dat/tmp/toEncode/20230710//./100_PANA/P1001341.MP4')

customize lindworm storage folder class

In [171]:
class ldmFdrPhoto(ldmSrgFdr.ldmStorageFolder):
    def clrEnd(self):
        """clear stored file names to process
        """
        iRet=ldmSrgFdr.ldmStorageFolder.clrEnd(self)
        self.lFilStat=None
        self.lStatMP4=None
        return iRet
    def prcPost(self,**kwargs):
        """processing end

        Args:
            **kwargs ... flexible keyword argument
                iShaMB  ... data to calculate sha in MB 
                iMetaDataFlat ... add meta data flat
        Returns:
            return code
                - >0 : okay processing done
                - =0 : okay nop
                - <0 : error
        """
        try:
            # +++++ beg:initialize
            iRet=0
            sOrg='ldmFdrPhoto::prcPost'
            iShaMB=kwargs.get('iShaMB',0)
            iMetaDataFlat=kwargs.get('iMetaDataFlat',0)
            # ----- end:initialize
            if self.sDefEnd is None:
                self.logDbg('skp:%s iRet:%d empty definition,'
                                'prcEnd has to be call before',sOrg,iRet)
                return iRet
            # +++++ beg:folder post processing
            iOfsFld=-1
            iCntFN=0
            self.logDbg('beg:%s sDefEnd:%s len(lFdr):%d kwargs:%r',sOrg,
                                self.sDefEnd,len(self.lFdr),kwargs)
            # +++++ beg:flat file list
            self.lFilStat=[]
            self.lStatMP4=[]
            # ----- end:flat file list
            oNty=kwargs.get('oNty',None)
            if oNty is not None:
                self.logDbg('    dCntEnd:%r',self.dCntEnd)
                oNty.SetMax(self.dCntEnd['iFN'])
            for sRelDN in self.lFdr:
                iOfsFld+=1
                if sRelDN in self.dFolder:
                    if oNty is not None:
                        oNty.SetStatus(sRelDN)
                        oNty.IncStatus()
                    sDN='/'.join([self.sDefEnd,sRelDN])
                    dDN=self.dFolder[sRelDN]
                    if self.iVerbose>5:
                        self.logDbg('   :%s sRelDN:%s len(dDN):%d',sOrg,
                                        sRelDN,len(dDN))
                    if self.iVerbose>9:
                        self.logDbg('    dDN:%r',dDN)
                    if '.' in dDN:
                        dFN=dDN['.']
                        # +++++ beg:calc sha fingerprint
                        for sFN,dStat in dFN.items():
                            iCntFN+=1
                            if oNty is not None:
                                #oNty.SetStatus(sFN)
                                #oNty.IncStatus()
                                oNty.SetVal(iCntFN)
                            # +++++ beg:calc sha fingerprint
                            sSha=getSha(sFN,iMB=iShaMB,
                                        sDN=sDN,
                                        oLog=self.oLog)
                            if self.iVerbose>5:
                                self.logDbg('      sFN:%s sha:%s',sFN,
                                                sSha)
                            dStat['sha']=sSha
                            # ----- end:calc sha fingerprint
                            # +++++ beg:calc mp4 meta data
                            isVideo=0
                            if sFN.endswith('.MP4'):
                                dMetaData=getMetaDataMP4(sFN,
                                            sDN=sDN,
                                            oLog=self.oLog,
                                            iVerbose=50)
                                if self.iVerbose>5:
                                    self.logDbg('      sFN:%s dMetaData:%s',sFN,
                                                    dMetaData)
                                if len(dMetaData)>1:
                                    if iMetaDataFlat<=0:
                                        dStat['meta-data']=dMetaData
                                    else:
                                        dStat.update(dMetaData)
                                    isVideo=1
                            # ----- end:calc mp4 meta data
                            # +++++ beg:
                            dFilStat={
                                'FN':sFN,
                                'DN':sDN,
                                }
                            dFilStat.update(dStat)
                            # ----- end:
                            self.lFilStat.append(dFilStat)
                            if isVideo>0:
                                self.lStatMP4.append(dFilStat)
                        # ----- end:calc sha fingerprint
            # ----- end:folder post processing
            #if oNty is not None:
            #S    oNty.clrStatus()
            self.logDbg('end:%s iRet:%d',sOrg,iRet)
            return iRet
        except:
            self.logTB()
            return -1


In [172]:
#oFld=ldmSrgFdr.ldmStorageFolder(iVerbose=iVerbose)
oFld=ldmFdrPhoto(iVerbose=iVerbose)

load configuration

In [173]:
iRet=oFld.loadCfg(sCfgFN)

read folder

In [174]:
oFld.prcBeg(sSrcDN,oRef=None)
oFld.prcExc()#(oGtrMD=self)
iRet=oFld.prcEnd(iShaMB=iShaMB,iMetaDataFlat=1)
ldmLg.logInf('sSrcDN %r ret:%d',sSrcDN,iRet)

save result

In [175]:
iRet=oFld.saveDat(sBldFN,sDN=sBldDN,
            lKey=[oFld.sDefEnd],
            sAtr='dDat')
ldmLg.logInf('sSrcDN %r ret:%d sBldFN %r sBldDN %r',sSrcDN,iRet,sBldFN,sBldDN)

In [176]:
iRet=oFld.saveDat(sBldFlatFN,sDN=sBldDN,
            lKey=None,
            sAtr='lFilStat')
ldmLg.logInf('sSrcDN %r ret:%d sBldFN %r sBldDN %r',sSrcDN,iRet,sBldFlatFN,sBldDN)

show / log internal content

In [177]:
ldmLg.logDbg("dCntEnd %r",oFld.dCntEnd)

write data to excel file using pandas

In [178]:
sFN=os.path.join(sBldDN,sXlsFN)
ldmLg.logInf('write excel of sXlsFN %r sBldDN %r  sSrcDN %r',sXlsFN,sBldDN,sSrcDN)
with pandas.ExcelWriter(sFN) as oWrk:
    dfFilStat=pandas.DataFrame(oFld.lFilStat)
    dfFilStat.sort_values(['FN'],inplace=True)
    dfFilStat.to_excel(oWrk,sheet_name='content',index=False)

    dfFilStat=pandas.DataFrame(oFld.lStatMP4)
    dfFilStat.sort_values(['FN'],inplace=True)
    dfFilStat.to_excel(oWrk,sheet_name='mp4',index=False)

finished

+ check [out]('./x_out/64l_0-stp-os.json) `sBldFN` `sBldDN`
+ check [flat]('./x_out/64l_0-stp-os-0.json) `sBldFlatFN` `sBldDN`
+ check [xls]('./x_out/64l_0-stp-os.json) `sXlsFN` `sBldDN`
+ check [log](./x_log/64l_0-stp-os.log)

In [179]:
ldmLg.logInf('sSrcDN %r finished',sSrcDN)