In [1]:
# import Audio and Spectrogram classes from OpenSoundscape
# These classes help segment audio into displayable chunks for faster plotting
from opensoundscape.audio import Audio
from opensoundscape.spectrogram import Spectrogram

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import glob
import os

In [2]:
# This builds the path to the output.txt file that we extracted by piping the linux ls command's output
# I used ls -lh > output.txt in my Recordings folder
session = '20220703_unit2'
filename = 'output' + session + '.txt'

file_path = filename

In [3]:
# We read in the .txt file using the file path and a separator of spaces.
# We set header=None so read_csv does not look for a header
# We also skip the first row of the raw output because the first row is 'Total <size of all files>'

data= pd.read_csv(file_path, sep='\s+', skiprows=1, header=None)
print(data)

              0  1           2           3     4    5   6      7  \
0    -rwxr-xr-x  1  adkris1002  adkris1002  1.5M  Jul   2  14:34   
1    -rwxr-xr-x  1  adkris1002  adkris1002  656M  Jul   2  14:59   
2    -rwxr-xr-x  1  adkris1002  adkris1002  856M  Jul   2  15:29   
3    -rwxr-xr-x  1  adkris1002  adkris1002  856M  Jul   2  15:59   
4    -rwxr-xr-x  1  adkris1002  adkris1002  856M  Jul   2  16:29   
..          ... ..         ...         ...   ...  ...  ..    ...   
847  -rwxr-xr-x  1  adkris1002  adkris1002  856M  Jul   5  10:29   
848  -rwxr-xr-x  1  adkris1002  adkris1002  856M  Jul   5  10:59   
849  -rwxr-xr-x  1  adkris1002  adkris1002  856M  Jul   5  11:29   
850  -rwxr-xr-x  1  adkris1002  adkris1002     0  Dec  31   1979   
851  -rw-rw-r--  1  adkris1002  adkris1002     0  Jul  13  23:57   

                            8  
0         20220702_213428.WAV  
1         20220702_213700.WAV  
2         20220702_220000.WAV  
3         20220702_223000.WAV  
4         20220702_2300

In [4]:
# We clean up and only extract the rows and columns we need.
# We don't need the first 4 columns or the last row which is the size of the output.txt file

list_np = data.to_numpy()[:-1,4:]
print(list_np)

[['1.5M' 'Jul' 2 '14:34' '20220702_213428.WAV']
 ['656M' 'Jul' 2 '14:59' '20220702_213700.WAV']
 ['856M' 'Jul' 2 '15:29' '20220702_220000.WAV']
 ...
 ['856M' 'Jul' 5 '10:59' '20220705_173000.WAV']
 ['856M' 'Jul' 5 '11:29' '20220705_180000.WAV']
 ['0' 'Dec' 31 '1979' '20220705_183000.WAV']]


In [5]:
# We convert our new cleaner numpy array into a data frame with cleaner labels
# The time columns will have a year in some rows when the file is empty
# The file sizes also have M when it is mB (megabyte) and K when it is kB (kilobyte)
# There are no units for file sizes when the units are in bytes

df = pd.DataFrame(list_np, columns = ['File Size','Month','Date', 'Time', 'File Name'])

In [6]:
print(df)

    File Size Month Date   Time            File Name
0        1.5M   Jul    2  14:34  20220702_213428.WAV
1        656M   Jul    2  14:59  20220702_213700.WAV
2        856M   Jul    2  15:29  20220702_220000.WAV
3        856M   Jul    2  15:59  20220702_223000.WAV
4        856M   Jul    2  16:29  20220702_230000.WAV
..        ...   ...  ...    ...                  ...
846      856M   Jul    5  09:59  20220705_163000.WAV
847      856M   Jul    5  10:29  20220705_170000.WAV
848      856M   Jul    5  10:59  20220705_173000.WAV
849      856M   Jul    5  11:29  20220705_180000.WAV
850         0   Dec   31   1979  20220705_183000.WAV

[851 rows x 5 columns]


In [7]:
# We save our dataframe as a .csv using \t as a separator

df.to_csv('file_sizes'+session+'.csv')