# Fuzzing Certificates

In this notebook, I will try to decipher and fuzz digital certificates, also known as x.509 certificates.

In [None]:
import bookutils

We'll start by _parsing_ certificates.

## Human-Readable Certificates

This is my certificate, coming in PEM format.

In [None]:
CERT_PEM = '''
-----BEGIN CERTIFICATE-----
MIIGGDCCBQCgAwIBAgIMJGWlX/uTZaODs3uIMA0GCSqGSIb3DQEBCwUAMIGNMQsw
CQYDVQQGEwJERTFFMEMGA1UECgw8VmVyZWluIHp1ciBGb2VyZGVydW5nIGVpbmVz
IERldXRzY2hlbiBGb3JzY2h1bmdzbmV0emVzIGUuIFYuMRAwDgYDVQQLDAdERk4t
UEtJMSUwIwYDVQQDDBxERk4tVmVyZWluIEdsb2JhbCBJc3N1aW5nIENBMB4XDTIx
MDMwODEzMzQwOVoXDTI0MDMwNzEzMzQwOVowgbkxCzAJBgNVBAYTAkRFMREwDwYD
VQQIDAhTYWFybGFuZDEVMBMGA1UEBwwMU2FhcmJydWVja2VuMUQwQgYDVQQKDDtD
SVNQQSAtIEhlbG1ob2x0ei1aZW50cnVtIGZ1ZXIgSW5mb3JtYXRpb25zc2ljaGVy
aGVpdCBnR21iSDEQMA4GA1UEBAwHQW5kcmVhczEPMA0GA1UEKgwGWmVsbGVyMRcw
FQYDVQQDDA5aZWxsZXIgQW5kcmVhczCCASIwDQYJKoZIhvcNAQEBBQADggEPADCC
AQoCggEBALo6h1hJw92V8MnN38ry/Spc2G6v+YTTWsWODY2/7pPBizYIefCjDXCL
mkxu7oDUwm7Mbeg+gASeI1wJYpiKc8FknPkMDEAHYmZFpLbyfWJsCRTsu1WEO5So
2nOvedPTjpy7IwHHG7p7H9l5LCzWcA0XBqaGNj0yHwpOD67CA8jcbZ5I41dG3xUW
ApM51M+UqOAzhh0SlRkEgBnRE06jUj+zTVIKlLb9Ho9Bw3CEPdKRpBs6yjouKxwf
apwpaBru/NnEW7gwAwKNIoRlZmiKQniHoE4uMM7e5zWPxaV/co+cn3u8SwnqJjG0
jR07EF+l9Fb3cWGwSQrhp0lFjudp0aUCAwEAAaOCAkgwggJEMD4GA1UdIAQ3MDUw
DwYNKwYBBAGBrSGCLAEBBDAQBg4rBgEEAYGtIYIsAQEECDAQBg4rBgEEAYGtIYIs
AgEECDAJBgNVHRMEAjAAMA4GA1UdDwEB/wQEAwIF4DAdBgNVHSUEFjAUBggrBgEF
BQcDAgYIKwYBBQUHAwQwHQYDVR0OBBYEFJlDyZ5yzYQjHgDxf31rCqDITfLkMB8G
A1UdIwQYMBaAFGs6mIv58lOJ2uCtsjIeCR/oqjt0MBoGA1UdEQQTMBGBD3plbGxl
ckBjaXNwYS5kZTCBjQYDVR0fBIGFMIGCMD+gPaA7hjlodHRwOi8vY2RwMS5wY2Eu
ZGZuLmRlL2Rmbi1jYS1nbG9iYWwtZzIvcHViL2NybC9jYWNybC5jcmwwP6A9oDuG
OWh0dHA6Ly9jZHAyLnBjYS5kZm4uZGUvZGZuLWNhLWdsb2JhbC1nMi9wdWIvY3Js
L2NhY3JsLmNybDCB2wYIKwYBBQUHAQEEgc4wgcswMwYIKwYBBQUHMAGGJ2h0dHA6
Ly9vY3NwLnBjYS5kZm4uZGUvT0NTUC1TZXJ2ZXIvT0NTUDBJBggrBgEFBQcwAoY9
aHR0cDovL2NkcDEucGNhLmRmbi5kZS9kZm4tY2EtZ2xvYmFsLWcyL3B1Yi9jYWNl
cnQvY2FjZXJ0LmNydDBJBggrBgEFBQcwAoY9aHR0cDovL2NkcDIucGNhLmRmbi5k
ZS9kZm4tY2EtZ2xvYmFsLWcyL3B1Yi9jYWNlcnQvY2FjZXJ0LmNydDANBgkqhkiG
9w0BAQsFAAOCAQEAS7Ok9N8qAVgG6t5fa6rMEY4xU2DYIh1Xx8rXgHUa25ULktde
z+hEL2/3GRpA9fiQBccjJ3YVTuE0HuZ0hixbZie4L2aetQMrAO2wTzak42PGww5l
ERbtacNuW7t64s/LmLROKsWeeLDYChyJW1Ql5Wl7kkI9NV1BRPcGgtHqqhQ3CN/J
V4wK0JWPpD1lIQo/IaN/4RXq6unMZ/u1ZbXosXc8NlphAee1W2ZHI4ObWbpvpdBR
sj6PGMKESyLzODcuRMjib+qryiTp1e3PGmunmqS+kjNDsd3iohGQlej/Dsxx9gHT
UbbHFCEoTsnxEte5FcC1djFLrpQxklinOh/xnA==
-----END CERTIFICATE-----
'''

We can view the contents of this certificate using `openssl`:

In [None]:
CERT_PEM_FILE = 'certificate.pem'

In [None]:
with open(CERT_PEM_FILE, 'w') as fp:
    fp.write(CERT_PEM)

In [None]:
!openssl x509 -in {CERT_PEM_FILE} -text -noout 

## Analyzing Binary Certificates

Let us convert this certificate into DER (binary) form. This is what we will work with.

In [None]:
CERT_DER_FILE = 'certificate.der'

In [None]:
!openssl x509 -outform der -in {CERT_PEM_FILE} -out {CERT_DER_FILE}

We can analyze file contents again using `openssl`:

In [None]:
!openssl asn1parse -i -in {CERT_DER_FILE} -inform DER

To decode ASN.1, we make use of the [Python ASN.1 module](https://github.com/andrivet/python-asn1) (Here's the [documentation](https://python-asn1.readthedocs.io/en/latest/)).
We find that our DER file consists of a sequence (0x10 = 16): 

In [None]:
import asn1

In [None]:
CERT_DER = open(CERT_DER_FILE, 'br').read()
CERT_DER[:10]

In [None]:
decoder_1 = asn1.Decoder()
decoder_1.start(CERT_DER)

In [None]:
tag_1, value_1 = decoder_1.read()

In [None]:
tag_1

In [None]:
assert tag_1.nr == asn1.Numbers.Sequence
assert tag_1.typ == asn1.Types.Constructed

In [None]:
value_1[:10]

This starts with another sequence:

In [None]:
decoder_2 = asn1.Decoder()
decoder_2.start(value_1)

In [None]:
tag_2, value_2 = decoder_2.read()

In [None]:
tag_2

In [None]:
assert tag_2.nr == asn1.Numbers.Sequence
assert tag_1.typ == asn1.Types.Constructed

In [None]:
value_2[:10]

This starts with another sequence:

In [None]:
decoder_3 = asn1.Decoder()
decoder_3.start(value_2)

In [None]:
tag_3, value_3 = decoder_3.read()

In [None]:
tag_3

In [None]:
assert tag_3.typ == asn1.Types.Constructed
assert tag_3.cls == asn1.Classes.Context

In [None]:
value_3

This is an encoding for an integer 2:

In [None]:
decoder_4 = asn1.Decoder()
decoder_4.start(value_3)

In [None]:
tag_4, value_4 = decoder_4.read()

In [None]:
tag_4

In [None]:
value_4

In [None]:
assert tag_4.nr == asn1.Numbers.Integer
assert tag_4.typ == asn1.Types.Primitive

## Constructing a Grammar

See [A Layman's Guide to a Subset of ASN.1, BER, and DER](http://luca.ntop.org/Teaching/Appunti/asn1.html) for details.

In [None]:
from Grammars import Grammar, crange, is_valid_grammar, convert_ebnf_grammar
from GrammarFuzzer import display_tree

We start with simple values.

In [None]:
ASN1_EBNF_GRAMMAR: Grammar = {
    '<start>': ['<value>'],
    '<value>': [],  # will be updated later
}

In [None]:
ASN1_CONSTRAINTS = {}

### Simple Types

Every tag identifier can have bits 4-7 set, identifying types and classes. For efficient parsing, we enumerate the variations explicitly in the grammar (rather than identifying via constraints).

In [None]:
def tag_variations(tag):
    variations = []
    for typ_tag, typ in [
                ('<constructed>', asn1.Types.Constructed),   # 0x20
                ('<primitive>', asn1.Types.Primitive)        # 0x00
               ]:  
        for cls_tag, cls in [
                ('<universal>', asn1.Classes.Universal),     # 0x00
                ('<application>', asn1.Classes.Application), # 0x40
                ('<context>', asn1.Classes.Context),         # 0x80
                ('<private>', asn1.Classes.Private)          # 0xc0
            ]:
            variations += [chr(tag | typ | cls) + 
                           typ_tag + cls_tag]

    return variations

In [None]:
ASN1_EBNF_GRAMMAR.update({
      '<constructed>': [''],
      '<primitive>': [''],

      '<universal>': [''],
      '<application>': [''],
      '<context>': [''],
      '<private>': [''],
})

For each tag, we define length and value expansions.

In [None]:
USED_TAGS = set()

In [None]:
from pprint import pprint

In [None]:
def add_tag(name, tag, expansions=None, length=None, 
            log=False, test=False):
    # FIXME: Make this a class
    global ASN1_EBNF_GRAMMAR
    global ASN1_CONSTRAINTS
    global USED_TAGS
    
    if expansions is None:
        expansions = ['<any-value>']
    if length is None:
        length = ['<length>']
        
    assert f'<{name}>' not in ASN1_EBNF_GRAMMAR
    assert f'<{name}>' not in ASN1_EBNF_GRAMMAR['<value>']
        
    new_rules = {
        f'<{name}>': [
            f'<{name}-tag><{name}-length><{name}-value>'
        ],
        f'<{name}-tag>': tag_variations(tag),
        f'<{name}-length>': length,
        f'<{name}-value>': expansions,
    }
    new_constraints = f'''
forall <{name}>:
    str.to_code(<{name}>.<{name}-length>) = 
        str.len(<{name}>.<{name}-value>)
    '''

    ASN1_EBNF_GRAMMAR['<value>'].append(f'<{name}>')
    ASN1_EBNF_GRAMMAR.update(new_rules)
    ASN1_CONSTRAINTS[f'<{name}>'] = new_constraints
    USED_TAGS.add(chr(tag))

    if log:
        print(f"New tag: <{name}>") 
        print("New rules:")
        pprint(new_rules)
        print("\nNew constraints:", end="")
        print(new_constraints)
        
    assert f'<{name}>' in ASN1_EBNF_GRAMMAR
    assert f'<{name}>' in ASN1_EBNF_GRAMMAR['<value>']

    assert is_valid_grammar(ASN1_EBNF_GRAMMAR)
    
    if test:
        tree = test_solve(name)
        print(f"Test solving <{name}>: {repr(str(tree))} {len(str(tree))}")
        return display_tree(tree)

In [None]:
from isla.solver import ISLaSolver

In [None]:
from Grammars import unreachable_nonterminals

In [None]:
def test_solve(name):
    # FEATURE-REQUEST: allow to specify a start symbol for solve()
    grammar = ASN1_EBNF_GRAMMAR.copy()
    constraints = ASN1_CONSTRAINTS.copy()
    
    grammar['<start>'] = [f'<{name}>']
    for elem in unreachable_nonterminals(grammar):
        del grammar[elem]
        if elem in constraints:
            del constraints[elem]
        
    grammar = convert_ebnf_grammar(grammar)

    solver = ISLaSolver(grammar, "".join(constraints.values()))
    tree = solver.solve()
    return tree

#### Booleans

Booleans are simply `0x00` (False) or `0xff` (True)

In [None]:
add_tag(
    name='boolean',
    tag=asn1.Numbers.Boolean,
    length=['\x01'],
    expansions=['\x00', '\xff'],
    log=True,
    test=True
)

#### Bytes and Lengths

Let us now introduce bytes and lengths.

In [None]:
ASN1_EBNF_GRAMMAR.update({
    '<byte>': crange('\x00', '\xff'),
    '<byte0-127>': crange('\x00', '\x7f'),
    '<byte128-255>': crange('\x80', '\xff'),

    '<length>': ['<short-length>', 
                 '<long-length>'],
    '<short-length>': ['<byte0-127>'],
    '<long-length>': ['<byte128-255><length-value>'],
    '<length-value>': ['<byte>', '<byte><length-value>'],
    
    '<any-value>': ['<byte>*'],
})

#### Integers

In [None]:
add_tag(
    name='integer',
    tag=asn1.Numbers.Integer,
    log=True,
    test=True
)                            

In [None]:
tree = test_solve('integer')

In [None]:
str(tree), len(str(tree))

In [None]:
display_tree(tree)

FIXME: Add constraints re: integers

#### Null

In [None]:
add_tag(
    name='null',
    tag=asn1.Numbers.Null,
    length=['\x00'],
    expansions=['']
)                            

#### More Simple Types

In [None]:
add_tag(
    name='bit-string',
    tag=asn1.Numbers.BitString,
)                            

In [None]:
add_tag(
    name='octet-string',
    tag=asn1.Numbers.OctetString,
)                            

In [None]:
add_tag(
    name='object-identifier',
    tag=asn1.Numbers.ObjectIdentifier,
)                            

In [None]:
add_tag(
    name='enumerated',
    tag=asn1.Numbers.Enumerated,
)                            

In [None]:
add_tag(
    name='utf8-string',
    tag=asn1.Numbers.UTF8String,
)                            

In [None]:
add_tag(
    name='printable-string',
    tag=asn1.Numbers.PrintableString,
)                            

In [None]:
add_tag(
    name='ia5-string',
    tag=asn1.Numbers.IA5String,
)                            

In [None]:
add_tag(
    name='utc-time',
    tag=asn1.Numbers.UTCTime,
)                            

In [None]:
add_tag(
    name='generalized-time',
    tag=asn1.Numbers.GeneralizedTime,
)                            

In [None]:
add_tag(
    name='unicode-string',
    tag=asn1.Numbers.UnicodeString,
)                            

### Structured Types

In [None]:
add_tag(
    name='sequence',
    tag=asn1.Numbers.Sequence,
    expansions=['<value>+']
)                            

In [None]:
add_tag(
    name='set',
    tag=asn1.Numbers.Set,
    expansions=['<value>*']
)                            

### Other Types

We define a generic means to read in values whose tags we haven't seen before

In [None]:
UNUSED_TAGS = set()
for tag in range(0, 0x1f):
    if chr(tag) not in USED_TAGS:
        UNUSED_TAGS.add(chr(tag))

In [None]:
ASN1_EBNF_GRAMMAR.update({
    '<other>': ['<other-tag><other-length><other-value>'],
    '<other-tag>': 
                ['<other-low-tag>', 
                 '<other-high-tag>'],
    '<other-low-tag>': list(UNUSED_TAGS),
    '<other-high-tag>': ['<high-tag><byte>+'],
    '<high-tag>': tag_variations(0x1f),

    '<other-length>': ['<length>'],
    '<other-value>': ['<byte>*'],
})

ASN1_EBNF_GRAMMAR['<value>'].append('<other>')

## Producing

In [None]:
print("".join(ASN1_CONSTRAINTS))

In [None]:
ASN1_EBNF_GRAMMAR['<start>'] = ['<sequence>']

In [None]:
assert is_valid_grammar(ASN1_EBNF_GRAMMAR)

In [None]:
ASN1_GRAMMAR = convert_ebnf_grammar(ASN1_EBNF_GRAMMAR)

In [None]:
assert is_valid_grammar(ASN1_GRAMMAR)

In [None]:
solver = ISLaSolver(ASN1_GRAMMAR, "".join(ASN1_CONSTRAINTS.values()))

## Parsing

In [None]:
ASN1_EBNF_GRAMMAR['<start>'] = ['<value>']

In [None]:
ASN1_GRAMMAR = convert_ebnf_grammar(ASN1_EBNF_GRAMMAR)

In [None]:
solver = ISLaSolver(ASN1_GRAMMAR, ASN1_CONSTRAINTS)

In [None]:
tree = solver.parse('\x02\x01\x02')

In [None]:
display_tree(tree)