In [1]:
import os
import re
import logging
from parsing import CodeParser
from cache_utils import FunctionStateCache
from utils import load_cfile

logging.basicConfig(
    level=logging.DEBUG,
    format="%(asctime)s - %(levelname)s - %(message)s",
    handlers=[
        logging.FileHandler("debug.log", 'w', encoding='utf-8'),
        logging.StreamHandler(),
    ]
)
logger = logging.getLogger(__name__)

pwd = os.getcwd()
fp = os.path.join(pwd, "data", "test4.c")
src_code = load_cfile(fp)

function_pattern = re.compile(r"\b(\w+)\s+(\w+)\s*\((.*?)\)\s*{")
variable_pattern = re.compile(r"\b(int|float|double|char)\s+(\w+)\s*=\s*([^;]+);")
argument_pattern = re.compile(r"\b(\w+)\s+(\w+)(?:\s*,\s*)?")
function_body_pattern = re.compile(r'\b\w+\s+\w+\s*\([^)]*\)\s*{([^{}]*(?:{[^{}]*}[^{}]*)*)}')

caches = FunctionStateCache()
parser = CodeParser(src_code, caches)
print()
caches._output_()

2024-11-12 10:13:54,762 - DEBUG - 
Function Name: functionA 
Return Type: void 
Arguments: {} 
Content: 
printf("In functionA\n");

2024-11-12 10:13:54,763 - DEBUG - 
Function Name: functionB 
Return Type: void 
Arguments: {} 
Content: 
printf("In functionB\n");

2024-11-12 10:13:54,763 - DEBUG - 
Function Name: functionC 
Return Type: void 
Arguments: {} 
Content: 
printf("In functionC\n");

2024-11-12 10:13:54,765 - DEBUG - 
Function Name: main 
Return Type: int 
Arguments: {} 
Content: 
int condition1; int condition2; scanf("%d", &condition1); scanf("%d", &condition2); if (condition1==1) { functionA(); if (condition2==0) { functionB(); } else { functionC(); } } return 0;

2024-11-12 10:13:54,765 - DEBUG - DEBUG MATCH: <re.Match object; span=(0, 15), match='int condition1;'>
2024-11-12 10:13:54,766 - DEBUG - statement: int condition1;
2024-11-12 10:13:54,767 - DEBUG - DEBUG MATCH: <re.Match object; span=(16, 31), match='int condition2;'>
2024-11-12 10:13:54,767 - DEBUG - statement: i

RecursionError: maximum recursion depth exceeded

In [3]:
from utils import shrink_code
def extract_if_block(code):
    # Pattern for if block content
    if_pattern = r'if\s*\([^)]*\)\s*{([^{}]*(?:{[^{}]*}[^{}]*)*)}'
    match = re.search(if_pattern, code)
    return match.group(1).strip() if match else ''

def extract_else_block(code):
    # Pattern for else block content
    else_pattern = r'else\s*{([^{}]*(?:{[^{}]*}[^{}]*)*)}'
    match = re.search(else_pattern, code)
    return match.group(1).strip() if match else ''

# Example usage
code = '''
if (condition1==1) {
    functionA();
    if (condition2==0) {
        functionB();
    } else {
        functionC();
    }
} else {
    functionD();
    functionE();
}
'''

if_content = extract_if_block(shrink_code(code))
else_content = extract_else_block(shrink_code(code))

print("If block content:")
print(if_content)
print("\nElse block content:")
print(else_content)

If block content:
functionA(); if (condition2==0) { functionB(); } else { functionC(); }

Else block content:
functionC();


In [None]:
import re
contents_pattern = re.compile(r'(?s)(?<=\{)(.*?)(?=\}(?:\s*\n*[^\}]|$))')
match = re.finditer(contents_pattern, """
#include <stdio.h>

void functionA() {
    printf("In functionA\n");
}

void functionB() {
    printf("In functionB\n");
}

void functionC() {
    printf("In functionC\n");
}

int main() {
    int condition1;
    int condition2;
    scanf("%d", &condition1);
    scanf("%d", &condition2);
    if (condition1==1) {
        functionA();
        if (condition2==0) {
            functionB();
        } else {
            functionC();
        }
    }
    return 0;
}
""")
[match.group(1).strip() for match in match]

In [None]:
from utils import shrink_code
def extract_all_function_bodies(code):
    # Pattern that handles balanced braces
    pattern = r'(?s)(\w+\s*\([^)]*\)\s*{)((?:[^{}]|{(?:[^{}]|{[^{}]*})*})*})'
    matches = re.finditer(pattern, code)
    bodies = []
    
    for match in matches:
        # Extract just the body part between braces
        full_match = match.group(2)
        # Remove the outer braces and strip whitespace
        body = full_match[1:-1].strip()
        bodies.append(body)
    
    return bodies

# Example code
code = '''
#include <stdio.h>

void functionA() {
    printf("In functionA\n");
}

void functionB() {
    printf("In functionB\n");
}

void functionC() {
    printf("In functionC\n");
}

int main() {
    int condition1;
    int condition2;
    scanf("%d", &condition1);
    scanf("%d", &condition2);
    if (condition1==1) {
        functionA();
        if (condition2==0) {
            functionB();
        } else {
            functionC();
        }
    }
    return 0;
}
'''

bodies = extract_all_function_bodies(shrink_code(code))
for i, body in enumerate(bodies, 1):
    print(f"Function {i} body:")
    print(body)
    print("-" * 50)

In [None]:
list(set(['8', '3', '6', '3']))

In [None]:
condition_pattern = re.compile(r'if\s*\(([^)]*)\)')
code = """
void example() {
    if (x > 0 && y < 10) {
        doSomething();
    } else {
        doOther();
    }
}
"""

match = condition_pattern.search(code)
if match:
    condition = match.group(1)  # Gets "x > 0 && y < 10"
print(condition)

In [None]:
import re

def extract_function_calls(c_statement):
    # Pattern matches function_name( with optional whitespace
    pattern = r'(\w+)\s*\('
    
    # Find all matches in the statement
    function_calls = re.findall(pattern, c_statement)
    
    return function_calls

# Example usage
statement = "result = calculate_sum(x, y);"
functions = extract_function_calls(statement)
print(f"Found function calls: {functions}")

In [None]:
"1 -> 2".split(" -> ")