Skip to content

Commit

Permalink
Temporary fix for extraction of encrypted objects per #7, minor loggi…
Browse files Browse the repository at this point in the history
…ng changes, adding --debug flag
  • Loading branch information
Tom Lancaster committed Jul 20, 2023
1 parent e71a67f commit 42dc4f4
Show file tree
Hide file tree
Showing 5 changed files with 96 additions and 12 deletions.
2 changes: 1 addition & 1 deletion tools/one-extract/onenoteextractor/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.0.3"
__version__ = "0.0.4"
9 changes: 9 additions & 0 deletions tools/one-extract/onenoteextractor/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,14 @@
"""Example script showing use of OneNoteExtractor."""

import argparse
import logging
import os
import textwrap

from . import OneNoteExtractor
from ._version import __version__

logger = logging.getLogger(__name__)

def run():
"""CLI entry point."""
Expand All @@ -24,6 +26,8 @@ def run():
formatter_class=argparse.RawTextHelpFormatter,
)
parser.add_argument("target_file", type=str, help="Input file to parse")
parser.add_argument("--debug", help="If enabled, sets log level to debug",
action="store_true")
parser.add_argument("--extract-meta", help="If set, extracts metadata from .one file",
action="store_true")
parser.add_argument("--extract-files", help="If set, extracts files from .one file",
Expand All @@ -39,6 +43,11 @@ def run():
if not args.extract_meta and not args.extract_files:
exit("Must either attempt to extract metadata or files.")

if args.debug:
logging.basicConfig(level=logging.DEBUG,
format='%(asctime)s %(name)s %(levelname)-8s %(message)s',
handlers=[logging.StreamHandler()])
logger.debug("Debug logging enabled.")
with open(args.target_file, 'rb') as infile:
data = infile.read()

Expand Down
66 changes: 66 additions & 0 deletions tools/one-extract/onenoteextractor/enc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
"""This is a temporary inclusion in this project to address an unknown issue when using msoffcrypto.
https://github.com/volexity/threat-intel/issues/7
Much of the code in this file was borrowed from:
https://github.com/nolze/msoffcrypto-tool/blob/master/msoffcrypto/method/ecma376_agile.py
Changes are highlighted using "#!NOTE".
"""

# builtins
import io
import functools
import logging
from hashlib import sha1, sha256, sha384, sha512
from struct import unpack, pack

# installables
from cryptography.hazmat.backends import default_backend
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes

ALGORITHM_HASH = {
"SHA1": sha1,
"SHA256": sha256,
"SHA384": sha384,
"SHA512": sha512,
}

logger = logging.getLogger(__name__)


def _get_hash_func(algorithm):
return ALGORITHM_HASH.get(algorithm, sha1)


def decrypt(key, keyDataSalt, hashAlgorithm, ibuf):
r"""
Return decrypted data.
>>> key = b'@ f\t\xd9\xfa\xad\xf2K\x07j\xeb\xf2\xc45\xb7B\x92\xc8\xb8\xa7\xaa\x81\xbcg\x9b\xe8\x97\x11\xb0*\xc2'
>>> keyDataSalt = b'\x8f\xc7x"+P\x8d\xdcL\xe6\x8c\xdd\x15<\x16\xb4'
>>> hashAlgorithm = 'SHA512'
"""
hashCalc = _get_hash_func(hashAlgorithm)

obuf = io.BytesIO()
totalSize = unpack("<I", ibuf.read(4))[0]
logger.debug("totalSize: {}".format(totalSize))
remaining = totalSize
ibuf.seek(8)
# !NOTE - the key change made is that instead of iterating over 4KB segments,
# we read the data in a single buffer, this resolves the issue outlined
# in the docstrings of this file.
for i, buf in enumerate(iter(functools.partial(ibuf.read, totalSize), b"")):
saltWithBlockKey = keyDataSalt + pack("<I", i)
iv = hashCalc(saltWithBlockKey).digest()
iv = iv[:16]
aes = Cipher(algorithms.AES(key), modes.CBC(iv), backend=default_backend())
decryptor = aes.decryptor()
dec = decryptor.update(buf) + decryptor.finalize()
if remaining < len(buf):
dec = dec[:remaining]
obuf.write(dec)
remaining -= len(buf)
return obuf.getvalue()
27 changes: 17 additions & 10 deletions tools/one-extract/onenoteextractor/one.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,16 @@
import json
import re
import struct
import traceback
from typing import Iterator
from xml.dom.minidom import parseString

# installables
from msoffcrypto.method.ecma376_agile import ECMA376Agile

# locals
from .enc import decrypt

EMBEDDED_FILE_MAGIC = b"\xe7\x16\xe3\xbd\x65\x26\x11\x45\xa4\xc4\x8d\x4d\x0b\x7a\x9e\xac" # noqa E501
TITLE_MAGIC = b"\xf3\x1c\x00\x1c\x30\x1c\x00\x1c\xff\x1d\x00\x14\x82\x1d\x00\x14" # noqa E501
HEADER = b"\xe4\x52\x5c\x7b\x8c\xd8\xa7\x4d\xae\xb1\x53\x78\xd0\x29\x96\xd3"
Expand All @@ -42,7 +46,6 @@

logger = logging.getLogger(__name__)


class OneNoteExtractorException(Exception):
"""Custom exception handler for OneNoteExtractor."""

Expand Down Expand Up @@ -141,10 +144,10 @@ def _decrypt_embedded_object(self, blob: bytes):
if not self.enc_info:
raise OneNoteExtractorException("Unreachable code reached")
buf = BytesIO(blob)
obuf = ECMA376Agile.decrypt(key=self.enc_info['secret_key'],
keyDataSalt=self.enc_info["keyDataSalt"],
hashAlgorithm=self.enc_info["keyDataHashAlgorithm"],
ibuf=buf)
obuf = decrypt(key=self.enc_info['secret_key'],
keyDataSalt=self.enc_info["keyDataSalt"],
hashAlgorithm=self.enc_info["keyDataHashAlgorithm"],
ibuf=buf)
return obuf[8:]

def derive_enc_info(self,
Expand Down Expand Up @@ -219,7 +222,8 @@ def extract_files(self) -> Iterator[bytes]:
if match:
try:
counter = 0
for counter, m in enumerate(match):
for m in match:
counter += 1
size_offset = m.start() + 16
size = self.data[size_offset:size_offset + 4]
size_bytes = bytearray(size)
Expand All @@ -230,13 +234,16 @@ def extract_files(self) -> Iterator[bytes]:
# [4 bytes of size] [4 unknown bytes] [ data]
# but the format in .one files is different, so we artificially
# create a similar structure here.
logger.debug("Decrypting embedded object")
yield self._decrypt_embedded_object(size_bytes + b"\x00\x00\x00\x00" + blob)
else:
yield blob
logger.debug(f"{counter} files extracted.")
return
except Exception as e:
logger.error(f"Error while parsing the file: {e}.")
if logger.getEffectiveLevel() == logging.DEBUG:
traceback.print_exc()
return
else:
logger.debug("No embedded files found.")
Expand All @@ -261,8 +268,8 @@ def extract_meta(self) -> Iterator[OneNoteMetadataObject]:
size_offset = offset + 4 + (4 * i_adjustment)
size = self.data[size_offset:size_offset + 4]
i_size = struct.unpack("<I", bytearray(size))[0]
str = self.data[size_offset + 4:size_offset + 4 + i_size]\
.decode()
logger.debug(f"title offset: {size_offset}")
title_str = self.data[size_offset + 4:size_offset + 4 + i_size].decode()
creatDate_offset = size_offset + 4 + i_size + 32
creatDate = self.data[creatDate_offset:creatDate_offset + 8]
h_createDate = self._get_time(creatDate)
Expand All @@ -287,11 +294,11 @@ def extract_meta(self) -> Iterator[OneNoteMetadataObject]:
yield OneNoteMetadataObject(object_id=index,
offset=offset,
title_size=i_size,
title=str.replace("\x00", ""),
title=title_str.replace("\x00", ""),
creation_date=h_createDate,
last_modification_date=h_LastDate)

except Exception as e:
logger.error(f"Error while parsing object {cpt}")
logger.error(f"Error while parsing object {cpt}")
logger.error(f"Error: {e}.")
return ret
4 changes: 3 additions & 1 deletion tools/one-extract/tox.ini
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
[flake8]
max-line-length=100
max-line-length=100
per-file-ignores =
onenoteextractor/one.py:D107

0 comments on commit 42dc4f4

Please sign in to comment.