# Parsing iOS decrypted Signal DB

### Imports

In [2]:
import tempfile
import sqlite3
import pandas as pd
import  plistlib
import json
import os
from PIL import Image
from io import BytesIO
import zipfile
import tarfile
from chat_rendering import chat_HTML, render_chat, includes, CSS
from base64 import b64encode, b64decode
from Crypto.Cipher import AES
from struct import unpack

#### Here goes the paths of necessary files:

In [3]:
# iOS file system extraction - only Zip archive currently supported
#archive = '../mnt/test_snapchat/2020_03/dumps/7761_EX_INL_249_20_PALP_ZY_UN_01_files_full.zip'
#archive = '/home/mat/mnt2/test_snapchat/FullFileSystem.1.dar'
#archive = '../mnt/expertises/2020_03/dumps/7761_EX_INL_249_20_PALPATION_ON_UN_01_mnt2.tar'
archive = '/home/mat/mnt2/images/2021_04/BA_DEUX/UFED Apple iPhone 6s Plus (A1687) 2021_03_29 (001)/AdvancedLogical Full File System (checkm8) 01/FullFileSystem.1.dar'

# Path where to write results
#out_path = '/home/mat/mnt2/analyses/2020_03/PALP_ZY_UN/snapchat'
out_path = '/home/mat/mnt2/analyses/2021_04/BA_DEUX/signal'
os.makedirs(out_path, exist_ok=True)

# Path of encrypted archive in the archive
signal_enc_db_name = 'signal.sqlite'

# Path of decrypted keychain plist of the phone (on your FS)
#keychain_plist_path = '../mnt/expertises/2020_03/dumps/7761_EX_INL_249_20_PALP_ZY_UN_01_keychain.plist'
keychain_plist_path = '/home/mat/mnt2/images/2021_04/BA_DEUX/UFED Apple iPhone 6s Plus (A1687) 2021_03_29 (001)/AdvancedLogical Full File System (checkm8) 01/KeychainDump/keychain_decrypted.plist'

#constants
TAR = "tar"

ZIP = "zip"

DAR = "dar"

os.makedirs(out_path, exist_ok = True)

### Decrypting DB

#### 1. Extracting key

In [5]:
with open(keychain_plist_path,'rb') as f :
    plist = plistlib.load(f)
    
def getSignalKeyFromKeychain():
    if type(plist) == list:
        for dd in plist:
            if type(dd) == dict:        
                if 'acct' in dd:
                    if 'GRDBDatabaseCipherKeySpec' in str(dd['acct']) or str(b64encode(b'GRDBDatabaseCipherKeySpec')) in str(dd['acct']):
                        return dd['v_Data']
    else:
        for d in plist:
            for dd in plist[d]:
                if type(dd) == dict:        
                    if 'acct' in dd:
                        if 'GRDBDatabaseCipherKeySpec' in str(dd['acct']) or str(b64encode(b'GRDBDatabaseCipherKeySpec')) in str(dd['acct']):
                            return dd['v_Data']
    return None

key = getSignalKeyFromKeychain()
key


b'\xe0\x83\xd2d\x9f\xb7\xadm\xd6\n\x9eb\xaeE\xee\xb1\x19Hi\xd4\xc7\xa468<\x07\xaf(\x93\xf9\xa6\x1f\xcc\x80M\xc9J\xfc\x87\x00,\xd6Z\xd0X\xdb\xd1-'

#### 2. Loading archive file

In [6]:
if archive.endswith('zip'):
    archive_type = ZIP
    arc_handle = zipfile.ZipFile(archive)
    files = arc_handle.namelist()
elif archive.endswith('tar') or archive.endswith('gz'):
    archive_type = TAR
    arc_handle = tarfile.TarFile(archive)
    files = arc_handle.getnames()
    
elif archive.endswith('dar'):
    archive_type = DAR
    import subprocess
    list_dar = subprocess.run([
                "dar",
                "-l",
                archive[:-6]
            ],
            shell=False,
            capture_output=True,
            )
    
    file_list = BytesIO(list_dar.stdout).readlines()
    files = []
    for l in file_list:
        if l.startswith(b'['):
            files.append(l.split(b'\t')[-1].decode('utf8').strip())
    
else:
    print("ERROR : file not supported")

In [8]:
if archive_type == DAR:
    encrypteddb = [f for f in files if signal_enc_db_name in f][0]
    extract_file = subprocess.run([
                    "dar",
                    "-x",
                    archive[:-6],
                    "-g",
                    encrypteddb,
                    "-R",
                    out_path,
                    "-Oignore-owner"
                ],
                shell=False,
                capture_output=True,
                )
    enc_db_size = os.path.getsize(os.path.join(out_path,encrypteddb))
    tmp = open(os.path.join(out_path,encrypteddb), 'rb')

In [14]:
db_path = [f for f in files if f.endswith(signal_enc_db_name) and not '_' in os.path.basename(f)][0]

#### 3. Extracting DB to temp file

In [9]:
if archive_type == ZIP:
    db_path = [f for f in files if f.endswith(signal_enc_db_name) and not '_' in os.path.basename(f)][0]

    tmp = tempfile.NamedTemporaryFile()
    tmp.write(arc_handle.open(db_path).read())
    enc_db_size = arc_handle.getinfo(db_path).file_size
    
elif archive_type == TAR:
    db_path = [f for f in files if f.endswith(signal_enc_db_name) and not '_' in os.path.basename(f)][0]

    tmp = tempfile.NamedTemporaryFile()
    tmp.write(arc_handle.extractfile(db_path).read())
    enc_db_size = arc_handle.getmember(db_path).size

#### 4. Decrypting DB

In [11]:
tmp.seek(0)
signal_header_size = 0x20
default_page_size=0x1000
page_size = default_page_size
header_size = signal_header_size
header = tmp.read(header_size)
salt_sz = 0x10
hmac_sz = 0x40
reserved_sz = salt_sz + hmac_sz
max_page = int(enc_db_size / default_page_size)

def decrypt_page(page_offset):
    if page_offset == 0:
        page_data = tmp.read(page_size - header_size)
    else:
        page_data = tmp.read(page_size)
    
    iv = page_data[-reserved_sz:-reserved_sz+salt_sz]

    decryption_suite = AES.new(key[:32], AES.MODE_CBC, iv)
    plain_text = decryption_suite.decrypt(page_data[:-reserved_sz])
    
    return plain_text

with open(os.path.join(out_path,'signal-decrypted.sqlite'),'wb') as decrypted:
    decrypted.write(header)
    
    for page in range(0,max_page):
        decrypted.write(decrypt_page(page))
        decrypted.write(b'\x00'*reserved_sz)   

#### 5. Decrypting WAL

In [46]:
wal_signal_enc_db_name = signal_enc_db_name+'-'+'wal'

if archive_type == DAR:
    filepath = [f for f in files if wal_signal_enc_db_name in f][0]
    enc_db_size = os.path.getsize(filepath)
    tmp = open(filepath, 'rb')
    
elif archive_type == ZIP:
    db_path = [f for f in files if f.endswith(wal_signal_enc_db_name) and not '_' in os.path.basename(f)][0]

    tmp = tempfile.NamedTemporaryFile()
    tmp.write(arc_handle.open(db_path).read())
    enc_db_size = arc_handle.getinfo(db_path).file_size
    
elif archive_type == TAR:
    db_path = [f for f in files if f.endswith(wal_signal_enc_db_name) and not '_' in os.path.basename(f)][0]

    tmp = tempfile.NamedTemporaryFile()
    tmp.write(arc_handle.extractfile(db_path).read())
    enc_db_size = arc_handle.getmember(db_path).size

In [47]:
tmp.seek(0)
wal_header_size = 32
wal_page_header_size = 24
signal_header_size = 0x20
wal_header = tmp.read(wal_header_size)
page_size = unpack('>I',wal_header[8:12])[0]
salt_sz = 0x10
hmac_sz = 0x40
reserved_sz = salt_sz + hmac_sz
max_page = int((enc_db_size - wal_header_size) / (page_size + wal_page_header_size))

def decrypt_page(page_offset):
    page_header = tmp.read(wal_page_header_size)
    page_number = unpack('>I',page_header[:4])[0]
    plain_text = b''
    if page_number == 0:
        plain_text += tmp.read(signal_header_size)
        page_data = tmp.read(page_size - signal_header_size)
    else:
        page_data = tmp.read(page_size)
    
    iv = page_data[-reserved_sz:-reserved_sz+salt_sz]

    decryption_suite = AES.new(key[:32], AES.MODE_CBC, iv)
    plain_text += decryption_suite.decrypt(page_data[:-reserved_sz])
    
    return plain_text

with open(os.path.join(out_path,'signal-decrypted-wal.sqlite'),'wb') as decrypted:
    decrypted.write(header)
    
    for page in range(0,max_page):
        decrypted.write(decrypt_page(page))
        decrypted.write(b'\x00'*reserved_sz)   

Use SQL Carver to extract data from WAL file

#TODO : implement SQL Carver :)

### Parsing Data
#### Constants

In [15]:
recordTypes = {56: 'baseModel',
 55: 'experienceUpgrade',
 63: 'incomingGroupsV2MessageJob',
 24: 'installedSticker',
 29: 'knownStickerPack',
 40: '_100RemoveTSRecipientsMigration',
 43: '_101ExistingUsersBlockOnIdentityChange',
 47: '_102MoveLoggingPreferenceToUserDefaults',
 42: '_103EnableVideoCalling',
 45: '_104CreateRecipientIdentities',
 44: '_105AttachmentFilePaths',
 50: '_107LegacySounds',
 48: '_108CallLoggingPreference',
 51: '_109OutgoingMessageState',
 25: 'addToContactsOfferMessage',
 7: 'addToProfileWhitelistOfferMessage',
 32: 'backupFragment',
 58: 'broadcastMediaMessageJobRecord',
 22: 'contactOffersInteraction',
 57: 'contactQuery',
 46: 'databaseMigration',
 33: 'device',
 28: 'disappearingConfigurationUpdateInfoMessage',
 39: 'disappearingMessagesConfiguration',
 61: 'incomingContactSyncJobRecord',
 60: 'incomingGroupSyncJobRecord',
 36: 'linkedDeviceReadReceipt',
 15: 'messageContentJob',
 8: 'messageDecryptJob',
 62: 'reaction',
 38: 'recipientIdentity',
 49: 'resaveCollectionDBMigration',
 52: 'sessionResetJobRecord',
 5: 'unknownContactBlockOfferMessage',
 37: 'unknownDBObject',
 54: 'unknownProtocolVersionMessage',
 41: 'userProfile',
 13: 'verificationStateChangeMessage',
 34: 'jobRecord',
 53: 'messageDecryptJobRecord',
 35: 'messageSenderJobRecord',
 30: 'signalAccount',
 31: 'signalRecipient',
 14: 'stickerPack',
 6: 'attachment',
 3: 'attachmentPointer',
 18: 'attachmentStream',
 20: 'call',
 27: 'contactThread',
 9: 'errorMessage',
 26: 'groupThread',
 19: 'incomingMessage',
 10: 'infoMessage',
 16: 'interaction',
 17: 'invalidIdentityKeyErrorMessage',
 1: 'invalidIdentityKeyReceivingErrorMessage',
 23: 'invalidIdentityKeySendingErrorMessage',
 64: 'mention',
 11: 'message',
 21: 'outgoingMessage',
 12: 'recipientReadReceipt',
 2: 'thread',
 4: 'unreadIndicatorInteraction',
 59: 'testModel'
}

mimeTypeIcon = {
    "image":"📷",
    "audio":"🎧",
    "video":"🎥",
    "animated":"🎡",
    "other":"📎",
}

In [16]:
#signal_root = '/private/var/mobile/Containers/Shared/AppGroup/DA91D9C6-9E47-4B14-8468-D347C98D599C/Attachments'
signal_root = '/'.join([db_path.split('/grdb')[0],'Attachments'])

### Starting to extract

In [17]:
db = sqlite3.connect(os.path.join(out_path,'signal-decrypted.sqlite'))

In [18]:
threads = pd.read_sql_query("""
SELECT 
    t.contactPhoneNumber, 
    t.uniqueId as uniqueId_T, 
    t.lastInteractionRowId, 
    t.contactUUID,
    i.uniqueThreadId,
    i.attachmentIds,
    i.timestamp,
    i.receivedAtTimestamp,
    i.body,
    i.id,
    i.recordType,
    i.authorUUID,
    i.sender,
    i.configurationDurationSeconds,
    u.avatarUrlPath,
    u.profileName
    
FROM 
    model_TSInteraction i
    LEFT OUTER JOIN model_TSThread t
        ON i.uniqueThreadId = t.uniqueId
    LEFT OUTER JOIN model_OWSUserProfile u
        on t.contactPhoneNumber = u.recipientPhoneNumber
    
""", db)

attachments = pd.read_sql_query("""
SELECT
    uniqueId,
    caption,
    contentType,
    sourceFilename,
    localRelativeFilePath,
    isValidImageCached OR isValidVideoCached as cached
FROM
    model_TSAttachment
""", db)
db.close()

In [22]:
threads

Unnamed: 0,contactPhoneNumber,uniqueId_T,lastInteractionRowId,contactUUID,uniqueThreadId,attachmentIds,timestamp,receivedAtTimestamp,body,id,recordType,authorUUID,sender,configurationDurationSeconds,avatarUrlPath,profileName,parsedAttachmentIds


parsedAttachmentIds field contains the attachements of a message in a binary plist format.
Currrently only supporting one attachment per message

In [19]:
def parsePlist(record):
    try:
        d = plistlib.loads(record["attachmentIds"], fmt = plistlib.FMT_BINARY)
        if d:
            if d['$objects'][1]['NS.objects']:
                return d['$objects'][2]
        return None
    except:
        return None


#### Formatting the DF and extracting attachment to pass to chat_render function

In [20]:
threads["parsedAttachmentIds"] = threads.apply(lambda record: parsePlist(record),axis=1)
threads_Att = threads.merge(attachments, left_on="parsedAttachmentIds", right_on="uniqueId", how="left")

threads_Att["recordType"] = threads.apply(lambda record: recordTypes[record["recordType"]],axis=1)
threads_Att["data-name"] = threads_Att.apply(lambda row: row["profileName"] if row["profileName"] is not None else row["contactPhoneNumber"], axis=1)
threads_Att["from_me"] = threads_Att.apply(lambda row: 1 if row["recordType"] == "outgoingMessage" else 0, axis=1)
threads_Att["body"] = threads_Att.apply(lambda row: row["recordType"] if row["recordType"] not in ["incomingMessage","outgoingMessage"] else row["body"], axis=1)
threads_Att["data-time"] = pd.to_datetime(threads_Att["timestamp"], unit='ms')
threads_Att = threads_Att.rename(columns={"body": "message","contentType":"content-type"})

#export df to excel
threads_Att.to_excel(os.path.join(out_path,'signal.xlsx'), sheet_name="signal", columns=["data-time","profileName","contactPhoneNumber","recordType","message","content-type"], index=False)


ValueError: You are trying to merge on float64 and object columns. If you wish to proceed you should use pd.concat

In [30]:
os.makedirs(os.path.join(out_path,'att'), exist_ok=True)

def copyAttachments(rec):
    rec['file-path'] = None
    if type(rec["localRelativeFilePath"]) == str:
        outfilename = os.path.join(out_path,'att',rec["localRelativeFilePath"].replace('/','_'))
        file_in_archive = [f for f in files if rec["localRelativeFilePath"] in f]
        if len(file_in_archive) > 0:
                if archive_type == DAR:
                    extract_file = subprocess.run([
                            "dar",
                            "-x",
                            archive[:-6],
                            "-g",
                            file_in_archive[0]
                        ],
                        shell=False,
                        capture_output=True,
                        )
                    buf = open(file_in_archive[0], 'rb').read()
                elif archive_type == ZIP:
                    buf = arc_handle.open(file_in_archive[0],'r').read()
                elif archive_type == TAR:
                    buf = arc_handle.extractfile(file_in_archive[0]).read()
                with open(outfilename,"wb") as out:
                    out.write(buf)
                rec['file-path'] = "/".join(outfilename.split('/')[3:])
    return rec

threads_Att = threads_Att.apply(lambda rec: copyAttachments(rec), axis=1)

In [31]:
with open(os.path.join(out_path,'signal.html'),'w') as out:
    out.write('<html><head>')
    out.write(CSS)
    out.write('</head>')
    out.write(chat_HTML)
    out.write(includes)
    out.write(render_chat(threads_Att))
    out.write('</body></html>')