In [1]:
import os
import re
import logging
from parsing import CodeParser
from cache_utils import FunctionStateCache
from utils import load_cfile

logging.basicConfig(
    level=logging.DEBUG,
    format="%(asctime)s - %(levelname)s - %(message)s",
    handlers=[
        logging.FileHandler("debug.log", 'w', encoding='utf-8'),
        logging.StreamHandler(),
    ]
)
logger = logging.getLogger(__name__)

pwd = os.getcwd()
fp = os.path.join(pwd, "data", "test5.c")
src_code = load_cfile(fp)

function_pattern = re.compile(r"\b(\w+)\s+(\w+)\s*\((.*?)\)\s*{")
variable_pattern = re.compile(r"\b(int|float|double|char)\s+(\w+)\s*=\s*([^;]+);")
argument_pattern = re.compile(r"\b(\w+)\s+(\w+)(?:\s*,\s*)?")
function_body_pattern = re.compile(r'\b\w+\s+\w+\s*\([^)]*\)\s*{([^{}]*(?:{[^{}]*}[^{}]*)*)}')

caches = FunctionStateCache()
parser = CodeParser(src_code, caches)
print()
caches._output_()

2024-11-12 12:05:52,474 - DEBUG - 
Function Name: functionB 
Return Type: int 
Arguments: {'x': 'int'} 
Content: 
printf("In functionB with x = %d\n", x); return x * 2;

2024-11-12 12:05:52,475 - DEBUG - 
Function Name: functionA 
Return Type: void 
Arguments: {'y': 'int'} 
Content: 
if (y > 5) { int result = functionB(y); printf("Result from functionB: %d\n", result); } else { printf("y is too small for functionB\n"); }

2024-11-12 12:05:52,475 - DEBUG - 
Function Name: main 
Return Type: int 
Arguments: {} 
Content: 
int value ; scanf("%d", &value); if (value > 0) { functionA(value); } else { printf("Value is non-positive\n"); } return 0;

2024-11-12 12:05:52,476 - DEBUG - statement: int value ;
2024-11-12 12:05:52,476 - DEBUG - call func: scanf("%d", &value)
2024-11-12 12:05:52,476 - DEBUG - call function: scanf
2024-11-12 12:05:52,479 - DEBUG - if block: if (value > 0) { functionA(value); }
2024-11-12 12:05:52,479 - DEBUG - if body: functionA(value);
2024-11-12 12:05:52,479 - DEBUG


Function Name: functionB
Return Type: int
Arguments: {'x': 'int'}
Conditions: None
Content: printf("In functionB with x = %d\n", x); return x * 2;

Function Name: functionA
Return Type: void
Arguments: {'y': 'int'}
Conditions: None
Content: if (value > 5) { int result = functionB(value); printf("Result from functionB: %d\n", result); } else { printf("y is too small for functionB\n"); }

Function Name: main
Return Type: int
Arguments: {}
Conditions: None
Content: int value ; scanf("%d", &value); if (value > 0) { functionA(value); } else { printf("Value is non-positive\n"); } return 0;



In [5]:
blabla = "if (y > 5) { int result = functionB(y); } else { }"
var = 'value'
arg = 'y'
blabla.replace(f"({arg}", f"({var}").replace(f"{arg})", f"{var})")


'if (value > 5) { int result = functionB(value); } else { }'

In [None]:
from utils import shrink_code
def extract_if_block(code):
    # Pattern for complete if block including condition and braces
    if_pattern = r'if\s*\([^)]*\)\s*{[^{}]*(?:{[^{}]*}[^{}]*)*}'
    match = re.search(if_pattern, code)
    return match.group() if match else ''

# Example code
code = '''
int main() {
    if (condition1==1) {
        functionA();
        if (condition2==0) {
            functionB();
        } else {
            functionC();
        }
    }
    return 0;
}
'''

if_block = extract_if_block(shrink_code(code))
print("Complete if block:")
print(if_block)

In [None]:
from utils import shrink_code
def extract_if_block(code):
    # Pattern for if block content
    if_pattern = r'if\s*\([^)]*\)\s*{([^{}]*(?:{[^{}]*}[^{}]*)*)}'
    match = re.search(if_pattern, code)
    return match.group(1).strip() if match else ''

def extract_else_block(code):
    # Pattern for else block content
    else_pattern = r'else\s*{([^{}]*(?:{[^{}]*}[^{}]*)*)}'
    match = re.search(else_pattern, code)
    return match.group(1).strip() if match else ''

# Example usage
code = '''
if (condition1==1) {
    functionA();
    if (condition2==0) {
        functionB();
    } else {
        functionC();
    }
} else {
    functionD();
    functionE();
}
'''

if_content = extract_if_block(shrink_code(code))
else_content = extract_else_block(shrink_code(code))

print("If block content:")
print(if_content)
print("\nElse block content:")
print(else_content)

In [None]:
import re
contents_pattern = re.compile(r'(?s)(?<=\{)(.*?)(?=\}(?:\s*\n*[^\}]|$))')
match = re.finditer(contents_pattern, """
#include <stdio.h>

void functionA() {
    printf("In functionA\n");
}

void functionB() {
    printf("In functionB\n");
}

void functionC() {
    printf("In functionC\n");
}

int main() {
    int condition1;
    int condition2;
    scanf("%d", &condition1);
    scanf("%d", &condition2);
    if (condition1==1) {
        functionA();
        if (condition2==0) {
            functionB();
        } else {
            functionC();
        }
    }
    return 0;
}
""")
[match.group(1).strip() for match in match]

In [9]:
import re

def extract_function_arguments(function_call):
    # Pattern to match arguments inside parentheses
    pattern = r'\w+\s*\((.*?)\)'
    match = re.search(pattern, function_call)
    return match.group(1) if match else ''

# Example usage
calls = [
    "functionA(x, y)",
    "calculate(123, 'test', true)",
    "process()",
    "sum(a+b, c*d, (x+y))",
    "print('Hello, World!')"
]

for call in calls:
    args = extract_function_arguments(call)
    print(f"Function call: {call}")
    print(f"Arguments: {args}")
    print("-" * 30)

Function call: functionA(x, y)
Arguments: x, y
------------------------------
Function call: calculate(123, 'test', true)
Arguments: 123, 'test', true
------------------------------
Function call: process()
Arguments: 
------------------------------
Function call: sum(a+b, c*d, (x+y))
Arguments: a+b, c*d, (x+y
------------------------------
Function call: print('Hello, World!')
Arguments: 'Hello, World!'
------------------------------


In [None]:
from utils import shrink_code
def extract_all_function_bodies(code):
    # Pattern that handles balanced braces
    pattern = r'(?s)(\w+\s*\([^)]*\)\s*{)((?:[^{}]|{(?:[^{}]|{[^{}]*})*})*})'
    matches = re.finditer(pattern, code)
    bodies = []
    
    for match in matches:
        # Extract just the body part between braces
        full_match = match.group(2)
        # Remove the outer braces and strip whitespace
        body = full_match[1:-1].strip()
        bodies.append(body)
    
    return bodies

# Example code
code = '''
#include <stdio.h>

void functionA() {
    printf("In functionA\n");
}

void functionB() {
    printf("In functionB\n");
}

void functionC() {
    printf("In functionC\n");
}

int main() {
    int condition1;
    int condition2;
    scanf("%d", &condition1);
    scanf("%d", &condition2);
    if (condition1==1) {
        functionA();
        if (condition2==0) {
            functionB();
        } else {
            functionC();
        }
    }
    return 0;
}
'''

bodies = extract_all_function_bodies(shrink_code(code))
for i, body in enumerate(bodies, 1):
    print(f"Function {i} body:")
    print(body)
    print("-" * 50)

In [None]:
list(set(['8', '3', '6', '3']))

In [None]:
condition_pattern = re.compile(r'if\s*\(([^)]*)\)')
code = """
void example() {
    if (x > 0 && y < 10) {
        doSomething();
    } else {
        doOther();
    }
}
"""

match = condition_pattern.search(code)
if match:
    condition = match.group(1)  # Gets "x > 0 && y < 10"
print(type(condition))

In [None]:
import re

def extract_function_calls(c_statement):
    # Pattern matches function_name( with optional whitespace
    pattern = r'(\w+)\s*\('
    
    # Find all matches in the statement
    function_calls = re.findall(pattern, c_statement)
    
    return function_calls

# Example usage
statement = "result = calculate_sum(x, y);"
functions = extract_function_calls(statement)
print(f"Found function calls: {functions}")

In [None]:
"1 -> 2".split(" -> ")