### Misp to elk
python script to upload misp database to elasticsearch.

#### How to : 
the easy way to test is to use docker  
[elastic docker page](elastic.co/guide/en/elasticsearch/reference/current/docker.html)  
[basic elk visualisation](github.com/mobz/elasticsearch-head)  
```bash
sudo docker run -p 9200:9200 -p 9300:9300 -e "discovery.type=single-node" docker.elastic.co/elasticsearch/elasticsearch:6.3.2
```


- __mispUrlLocal__ : misp url
- __mispKeyLocal__ : misp api key. Pay attention to the rights associated with the API key
admin api key will export all database.
- __elkUrlLocal__ : elastic url
- __esIndexName__ : elastic index name
***

In [None]:
import json, requests, time

mispUrlLocal = 'http://misp.local/' # <- misp instance
mispKeyLocal = 'xxxxxxxx' # <- misp instance api key

elkUrlLocal = 'http://127.0.0.1:9200/' # <- elk instance
esIndexName = 'misptest' # <- elk index_name

INPUT_FILENAME = 'data.json'

session = requests.Session()
session.headers.update(
    {'Authorization': mispKeyLocal,
     'Accept': 'application/json',
     'content-type': 'application/json'})




pushToElk_accumulator = ""
pushToElk_uri = elkUrlLocal+esIndexName+"/attributes/_bulk" # <- elk doc_type
uri = elkUrlLocal+esIndexName

### elastic mapping
In order to optimize the indexing in the elastic database, it is necessary to apply a specific mapping.

In [None]:
sch = {
    "settings":{
        "analysis":{
            "analyzer":{
                "lowercase_whitespace_break":{
                    "type":"custom", 
                    "tokenizer": "whitespace",
                    "filter":["lowercase"]
                }
            }
        }
    },
    "mappings":{
        "attributes":{
            "properties":{
                "attributecategory":{
                    "type":"keyword"
                },
                "comment":{
                    "type":"text"
                },
                "event.creator.email":{
                    "type":"keyword"
                },
                "event.date":{
                    "type":"date"
                },
                "event.galaxy":{
                    "type":"keyword"
                },
                "event.id":{
                    "type":"integer"
                },
                "event.info":{
                    "type":"text",
                    "analyzer":"lowercase_whitespace_break"
                },
                "event.org.name":{
                    "type":"keyword"
                },
                "event.org.uuid":{
                    "type":"keyword"
                },
                "event.orgc.name":{
                    "type":"keyword"
                },
                "event.orgc.uuid":{
                    "type":"keyword"
                },
                "event.tag":{
                    "type":"keyword"
                },
                "event.timestamp":{
                    "type":"date",
                    "format":"epoch_second"
                },
                "event.uuid":{
                    "type":"keyword"
                },
                "id":{
                    "type":"integer"
                },
                "object.comment":{
                    "type":"text",
                    "analyzer":"lowercase_whitespace_break"
                },
                "object.id":{
                    "type":"integer"
                },
                "object.name":{
                    "type":"keyword"
                },
                "object.relation":{
                    "type":"keyword"
                },
                "object.template.uuid":{
                    "type":"keyword"
                },
                "object.template.version":{
                    "type":"integer"
                },
                "object.timestamp":{
                    "type":"date",
                    "format":"epoch_second"
                },
                "object.uuid":{
                    "type":"keyword"
                },
                "tag":{
                    "type":"keyword"
                },
                "timestamp":{
                    "type":"date",
                    "format":"epoch_second"
                },
                "type":{
                    "type":"keyword"
                },
                "uuid":{
                    "type":"keyword"
                },
                "value":{
                    "type":"text",
                    "analyzer":"lowercase_whitespace_break"
                },
                "galaxies":{
                    "type":"keyword"
                }
            }
        }
    }
}

#### add mapping
Push mapping to the elastic database for optimisation

In [None]:
def addMappingElk():
    checkstatuscode=True
    try:
        requests.delete(uri)
    except  Exception as e:
        print("could not reach server : ")
        print(e)
    try:
        response=requests.put(uri, json=sch)
    except Exception as exception:
        print("could not reach server : ")
        print(e)

    if checkstatuscode and not (response.status_code == 200 or response.status_code == 201):
        print("server did not respond with status code 200 or 201")
        print (response.text)


#### Flush
Make a put request to elk

In [None]:
def flushToElk():
    global pushToElk_accumulator
    response = requests.put(pushToElk_uri, data=pushToElk_accumulator, headers={"Content-Type": "application/x-ndjson"})
    responseJSON=response.json()
    if ("errors" in responseJSON) and (responseJSON["errors"] != False):
        print (response.text)
        raise Exception("Error during importation")
    pushToElk_accumulator = ""

#### accumulator
This function concat some of json doc  
When the length exceeds 819200, the pushToElk_accumulator json elements are persisted in elastic database.

In [None]:
# send bulk data
def pushToElk(doc):
    global pushToElk_accumulator
    pushToElk_accumulator += json.dumps({ "index" : { "_id" : doc["uuid"] } }) + "\n"
#     Save in doc["attribute.uuid"]
#     del doc["uuid"]
    pushToElk_accumulator += json.dumps(doc) + "\n"
    if len(pushToElk_accumulator) > 819200:
        flushToElk()

#### load misp event
This function load a misp event and create an elk doc.  
The primary doc key is the attribut uuid  
**basic overview :**
+ loop on each attribut in 'Event' key
+ loop on each attribut in 'Event'.'Object'
+ loop on each objectReference in 'Event'.'Object' and save it as an attribut

In [None]:
def loadMispEvent(evt):
    if "Event" not in evt:
        return

    for a in evt["Event"]["Attribute"]:
        doc = {}
        doc["uuid"] = a["uuid"]
        doc["value"] = a["value"]
        doc["event.id"] = a["event_id"]
        doc["type"] = a["type"]
        doc["id"] = a["id"]
        doc["comment"] = a["comment"]
        doc["category"] = a["category"]
        doc["timestamp"] = a["timestamp"]+"000" #Timestamp en secondes ELK attent des millisecondes donc on ajouter "000"

        doc["event.date"] = evt["Event"]["date"]
        doc["event.uuid"] = evt["Event"]["uuid"]
        doc["event.timestamp"] = evt["Event"]["timestamp"]+"000" #Timestamp en secondes ELK attent des millisecondes donc on ajouter "000"
        doc["event.info"] = evt["Event"]["info"]
        doc["event.org.name"] = evt["Event"]["Org"]["name"]
        doc["event.org.uuid"] = evt["Event"]["Org"]["uuid"]
        doc["event.orgc.name"] = evt["Event"]["Orgc"]["name"]
        doc["event.orgc.uuid"] = evt["Event"]["Orgc"]["uuid"]
        if evt["Event"].get("event_creator_email") is not None:
            doc["event.creator.email"] = evt["Event"]["event_creator_email"]

        # init doc tag list
        doc['tag'] = []
        
        # add tags to doc
        if "Tag" in a:
            for t in a["Tag"]:
                doc["tag"].append(t["name"])

        # add galaxy as tag
        if "Galaxy" in a:
            for g in a["Galaxy"]:
                for Gc in g["GalaxyCluster"]:
                    doc["tag"].append(Gc["tag_name"])
        
        # init doc event tag list
        doc['event.tag'] = []
            
        # add tag event
        if "Tag" in evt["Event"]:
            for t in evt["Event"]["Tag"]:
                doc["event.tag"].append(t["name"])

        # add galaxy in event.galaxy but galaxies are tags
        if "Galaxy" in evt["Event"]:
            for g in evt["Event"]["Galaxy"]:
                for Gc in g["GalaxyCluster"]:
                    doc["event.tag"].append(Gc["tag_name"])

        pushToElk(doc)

    if "Object" in evt["Event"]:
        if len(evt["Event"]["Object"]) == 0:
            return

        for o in evt["Event"]["Object"]:
            # don't save object reference if empty
            if 'Attribute' not in o:
                continue
            else:
                if len(o["Attribute"]) == 0:
                    continue
            
                        
            # save object reference as attribut
            if len(o.get("ObjectReference")) > 0:
                for a in o["ObjectReference"]:
                    doc = {}
                    doc["uuid"] = a["uuid"]
                    doc["timestamp"] = a["timestamp"]+"000" #Timestamp en secondes ELK attent des millisecondes donc on ajouter "000"
                    doc["comment"] = a["comment"]
                    doc["object.uuid"] = a["object_uuid"]
                    doc["object.referenced.uuid"] = a['referenced_uuid']
                    doc["object.referenced.type"] = a['referenced_type']
                    doc["object.relationship.type"] = a['relationship_type']
                    
                    pushToElk(doc)
                    
                
            for a in o["Attribute"]:
                doc ={}
                doc["uuid"] = a["uuid"]
                doc["value"] = a["value"]
                doc["event.id"] = a["event_id"]
                doc["type"] = a["type"]
                doc["id"] = a["id"]
                doc["comment"] = a["comment"]
                doc["category"] = a["category"]
                doc["timestamp"] = a["timestamp"]+"000" #Timestamp en secondes ELK attent des millisecondes donc on ajouter "000"
                doc["object.id"] = a["object_id"]
                doc["object.relation"] = a["object_relation"]

                doc["event.date"] = evt["Event"]["date"]
                doc["event.uuid"] = evt["Event"]["uuid"]
                doc["event.timestamp"] = evt["Event"]["timestamp"]+"000" #Timestamp en secondes ELK attent des millisecondes donc on ajouter "000"
                doc["event.info"] = evt["Event"]["info"]
                doc["event.org.name"] = evt["Event"]["Org"]["name"]
                doc["event.org.uuid"] = evt["Event"]["Org"]["uuid"]
                doc["event.orgc.name"] = evt["Event"]["Orgc"]["name"]
                doc["event.orgc.uuid"] = evt["Event"]["Orgc"]["uuid"]
                if evt["Event"].get("event_creator_email") is not None:
                    doc["event.creator.email"] = evt["Event"]["event_creator_email"]

                doc["object.uuid"] = o["uuid"]
                doc["object.timestamp"] = o["timestamp"]+"000" #Timestamp en secondes ELK attent des millisecondes donc on ajouter "000"
                doc["object.name"] = o["name"]
                doc["object.comment"] = o["comment"]
                doc["object.template.uuid"] = o["template_uuid"]
                doc["object.template.version"] = o["template_version"]
                
                
                # init doc tag list
                doc['tag'] = []

                # add tags to doc
                if "Tag" in a:
                    for t in a["Tag"]:
                        doc["tag"].append(t["name"])

                # add galaxy as tag
                if "Galaxy" in a:
                    for g in a["Galaxy"]:
                        for Gc in g["GalaxyCluster"]:
                            doc["tag"].append(Gc["tag_name"])

                # init doc event tag list
                doc['event.tag'] = []

                # add tag event
                if "Tag" in evt["Event"]:
                    for t in evt["Event"]["Tag"]:
                        doc["event.tag"].append(t["name"])

                # add galaxy in event.galaxy but galaxies are tags
                if "Galaxy" in evt["Event"]:
                    for g in evt["Event"]["Galaxy"]:
                        for Gc in g["GalaxyCluster"]:
                            doc["event.tag"].append(Gc["tag_name"])
                            
                            
                # Maybe object tag for later
                if 'Tag' in o:
                    doc['object.tag'] = []
                    for t in o["Tag"]:
                        doc["object.tag"].append(t["name"])
                            
                pushToElk(doc)

#### upstream feature
1. Query misp to restard workers.
2. regenerate json export
3. Check if job is 100%
4. save the json file

In [None]:
# restart the workers to make sure they are active
# it's a 500 but it's ok.
# You need to have a admin api key

session.post(mispUrlLocal+'servers/restartWorkers/')

# Regenerate Json file
# take care of the API key : if admin you dump all misp database
r = session.get(mispUrlLocal+"jobs/cache/json")
print(r.json())

# Check when the file is ready
while True:
    r = session.post(mispUrlLocal+"jobs/getProgress/cache_json").json()
    if r.get('progress'):
        if r['progress'] != "100%":
            time.sleep(2)
        else:
            break
print("Json generation done!")

In [None]:
data = session.get(mispUrlLocal+"/events/downloadExport/json").json()
# Save file
with open(INPUT_FILENAME, 'w') as f:
    json.dump(data, f)
print("Json filename : {}".format(INPUT_FILENAME))

#### main function
1. add mapping to elk
2. read file it's a better way to loop on each event with a get query :)
3. loop on each event in json file

In [None]:
# Add specifique mapping to elk
addMappingElk()

# Load json file
with open(INPUT_FILENAME) as f:
    data = json.load(f)

# push all event to elk
for evt in data['response']:
    loadMispEvent(evt)

flushToElk()

print("All done")