Skip to content

Commit

Permalink
Merge pull request #38 from DefCon42/master
Browse files Browse the repository at this point in the history
Significantly improve Catalog1 matching speed.
  • Loading branch information
xabiugarte committed Dec 10, 2019
2 parents bc114d4 + b842828 commit f453564
Showing 1 changed file with 14 additions and 14 deletions.
28 changes: 14 additions & 14 deletions server/first_core/engines/catalog1.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
from django.core.exceptions import ObjectDoesNotExist

NUM_PERMS = 64

MATCH_THRESHOLD = 80

class Catalog1(models.Model):
sha256 = models.CharField(max_length=64)
Expand Down Expand Up @@ -128,8 +128,7 @@ def _scan(self, opcodes, architecture, apis, disassembly):

# Step 0: Let's try to see if the same catalog1_sha256 exists:
try:
db_obj = Catalog1.objects.get(sha256=catalog1_sha256,
architecture=architecture)
db_obj = Catalog1.objects.get(sha256=catalog1_sha256, architecture=architecture)
if db_obj:
for f in db_obj.functions.all():
similarity = 100.0
Expand All @@ -146,20 +145,21 @@ def _scan(self, opcodes, architecture, apis, disassembly):
pass

# Step 1: Let's search all the matching catalog1 hashes
matching_function_ids = list()
for ch in catalog1hashes:
db_result = Catalog1.objects.filter(catalog1hashes__catalog_hash=ch,
architecture=architecture)
for db_obj in db_result:
for f in db_obj.functions.all():
matching_function_ids.append(f.func)
matching_catalog_functions = Catalog1.objects.filter(architecture=architecture, catalog1hashes__catalog_hash__in=catalog1hashes).values_list('functions', flat=True)
matching_function_columns = Catalog1Functions.objects.filter(id__in=matching_catalog_functions)

cc = Counter(matching_function_ids)
for function_id, counter in cc.most_common(10):
matching_function_ids = {}
for func_column in matching_function_columns:
matching_function_ids[func_column.id] = func_column.func

cc = Counter(matching_catalog_functions)
for func_m_id, counter in cc.most_common(10):
if counter > 0:
function_id = matching_function_ids[func_m_id]
similarity = counter * 100 / NUM_PERMS
print("Catalog1 log: %d %f" % (function_id, similarity))
result.append(FunctionResult(str(function_id), similarity))

if similarity > MATCH_THRESHOLD:
result.append(FunctionResult(str(function_id), similarity))

return result

Expand Down

0 comments on commit f453564

Please sign in to comment.