## Notebook to run Detect it easy and yara rules on the list of malware samples provided at /file/path/location and parse the results for analysis

In [23]:
import os
import subprocess
import json
import yara

In [24]:
def die(samplefile: str, subdir: str) -> None:
    """Execute DIE tool to analyze a sample file.

    Args:
        samplefile (str): Path to the sample file to analyze.
        subdir (str): Path to the directory where the analysis results will be stored.
    """
    try:
        filename = 'die_result.json'
        diec_path = r"C:\Users\ricewater\Downloads\die_win64_portable_3.09_x64\diec.exe"
        path_to_file = os.path.join(subdir, filename)
        if not os.path.exists(path_to_file):
            # Create an empty dictionary to store results
            data = {}
        else:
            # Load existing JSON data from the file
            with open(path_to_file, 'r') as infile:
                data = json.load(infile)
        # Run DIE tool to get analysis results
        result = subprocess.run([diec_path, '-d', '-e', '-j', samplefile], capture_output=True, text=True)
        # Parse the output of the DIE tool
        analysis_result = json.loads(result.stdout)
        # Add the analysis result to the dictionary with the file name as key
        data[os.path.basename(samplefile)] = analysis_result
        # Write the updated data to the file
        with open(path_to_file, 'w') as outfile:
            json.dump(data, outfile, indent=4)
            
    except Exception as e:
        raise e


In [27]:
def detect_yara_rule(samplefile: str, subdir: str) -> str:
    """Run YARA rule detection on the given sample file.

    Args:
        samplefile (str): Path to the sample file.
        subdir (str): Path to the directory where the analysis results will be stored.

    Returns:
        str: Detected YARA rule.
        
    """
    try:
        
        yara_rule = r"C:\Users\ricewater\Downloads\packer.yar"
        json_file_path = os.path.join(subdir, 'packer_yara_rule.json')
        
        if not os.path.exists(json_file_path):
        # Create an empty dictionary to store results
            data = {}
        else:
        # Load existing JSON data from the file
            with open(path_to_file, 'r') as infile:
                data = json.load(infile)  
            
        # Compile YARA rule
        rules = yara.compile(filepath=yara_rule)
        # Match YARA rules against sample file
        matches = rules.match(samplefile)
        # Extract matched rule names
        matched_rules = [match.rule for match in matches]
        # Join matched rule names into a single string
        detected_rule = ', '.join(matched_rules)
    
        data[os.path.basename(samplefile)] = detected_rule

        with open(json_file_path, 'w') as outfile:
            json.dump(data, outfile, indent=4)
    except Exception as e:
        raise e

In [28]:
#root_dir = r'C:\Users\ricewater\Documents\TempCrowdStrike'
root_dir = r'C:\Users\ricewater\Documents\AV_2022_Dataset\new_flattenDataset'

for subdir, dirs, files in os.walk(root_dir):
        for file in files:
            file_hash = os.path.basename(os.path.normpath(subdir))
            if file_hash != file:
                continue
            sample_path = os.path.join(subdir, file)
            #die(sample_path, subdir)
            detect_yara_rule(sample_path, subdir)

  matches = rules.match(samplefile)


In [31]:
import os
import json

def find_status_in_directory(directory_path):
    """
    Iterate through a directory and its subdirectories to find and read 'die_result.json' files,
    then identify the 'status' tag in each JSON file.

    Args:
        directory_path (str): The path to the directory to search in.

    Returns:
        dict: A dictionary where keys are file paths and values are the corresponding status tags.
    """
    status_dict = {}
    packed_count = 0

    # Iterate through the directory and its subdirectories
    for root, dirs, files in os.walk(directory_path):
        for file in files:
            if file == 'die_result.json':
                file_path = os.path.join(root, file)
                with open(file_path, 'r') as f:
                    data = json.load(f)
                    # Assuming there is only one key in the JSON data
                    for key, value in data.items():
                        status = value.get('status')
                        if status == 'packed':
                            packed_count += 1
    
    return packed_count

# Example usage:
root_dir = r'C:\Users\ricewater\Documents\AV_2022_Dataset\new_flattenDataset'
packed_count = find_status_in_directory(root_dir)
print(packed_count)


2680


In [38]:
import os
import json

def find_unique_rules_in_directory(directory_path):
    """
    Iterate through a directory and its subdirectories to find and read 'packer_yara.json' files,
    then extract the unique rules triggered across all the files.

    Args:
        directory_path (str): The path to the directory to search in.

    Returns:
        set: A set containing unique rules triggered across all files.
    """
    unique_rules = set()
    file_count_with_rules = 0

    # Iterate through the directory and its subdirectories
    for root, dirs, files in os.walk(directory_path):
        for file in files:
            if file == 'packer_yara_rule.json':
                file_path = os.path.join(root, file)
                with open(file_path, 'r') as f:
                    data = json.load(f)
                    for value in data.values():
                        if value:
                            rules = value.split(', ')
                            unique_rules.update(rules)
                            if len(rules) > 0:
                                file_count_with_rules += 1
    
    return unique_rules, file_count_with_rules 

# Example usage:
root_dir = r'C:\Users\ricewater\Documents\AV_2022_Dataset\new_flattenDataset'
unique_rules, file_count_with_rules = find_unique_rules_in_directory(root_dir)
print("Unique rules triggered across all files:")
for rule in unique_rules:
    print(rule)
    
print(file_count_with_rules)


Unique rules triggered across all files:
UPXv20MarkusLaszloReiser
ASPackv212AlexeySolodovnikov
UPXV200V290MarkusOberhumerLaszloMolnarJohnReiser
Armadillov1xxv2xx
Borland
UPX20030XMarkusOberhumerLaszloMolnarJohnReiser
mpress_2_xx_x64
upx_3
PECompactv2xx
DevCv5
ASProtectV2XDLLAlexeySolodovnikov
UPX290LZMAMarkusOberhumerLaszloMolnarJohnReiser
PureBasicDLLNeilHodgson
BobSoftMiniDelphiBoBBobSoft
mpress_2_xx_x86
pecompact2
PureBasic4xNeilHodgson
PellesC28x45xPelleOrinius
UPXProtectorv10x2
EnigmaProtector11X13XSukhovVladimirSergeNMarkin
NETDLLMicrosoft
NETexecutableMicrosoft
MinGWGCC3x
FSGv20
D1S1Gv11betaD1N
PECompactV2XBitsumTechnologies
winrar_sfx
PECompact2xxBitSumTechnologies
PellesC300400450EXEX86CRTLIB
623


In [41]:
import os
import json

def count_files_with_packed_status_and_non_empty_yara(directory_path):
    """
    Count the files where 'die_result.json' indicates 'packed' status and 'packer_yara.json' has a non-empty value.

    Args:
        directory_path (str): The path to the directory to search in.

    Returns:
        int: The count of files meeting the criteria.
    """
    count = 0

    # Iterate through the directory and its subdirectories
    for root, dirs, files in os.walk(directory_path):
        for file in files:
            if file == 'die_result.json':
                die_result_path = os.path.join(root, file)
                packer_yara_path = os.path.join(root, 'packer_yara_rule.json')

                # Check if both files exist
                if os.path.exists(packer_yara_path) and os.path.exists(die_result_path):
                    # Check die_result.json for 'packed' status
                    with open(die_result_path, 'r') as f:
                        die_result_data = json.load(f)
                        for value in die_result_data.values():
                            if value.get('status') == 'packed':
                                # Check packer_yara.json for non-empty value
                                with open(packer_yara_path, 'r') as f:
                                    packer_yara_data = json.load(f)
                                    if any(packer_yara_data.values()):
                                        count += 1
                                        break  # No need to continue checking other values
    
    return count

# Example usage:
root_dir = r'C:\Users\ricewater\Documents\AV_2022_Dataset\new_flattenDataset'
count = count_files_with_packed_status_and_non_empty_yara(root_dir)
print("Number of files where 'die_result.json' indicates 'packed' status and 'packer_yara.json' has a non-empty value:", count)


Number of files where 'die_result.json' indicates 'packed' status and 'packer_yara.json' has a non-empty value: 222
