Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Python bindings & package #349

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,7 +1,43 @@
# IDE/Filesystem
.idea
.vscode
.DS_Store

*.swp
*.swo
*.gcno
*.gcda
*.kdev4
/.kdev4

# Python
__pycache__
*.egg-info
.venv
build/

# Build outputs
bytes/*.cpp
*.so
*.o
*.a

# Test artifacts
tests/*.tok.*
tests/*.src.*
tests/*.err
tests/tests

# CMake/Ninja artifacts
*.cmake
cmake-build-debug/
CMakeFiles/
Testing/
CMakeCache.txt
build.ninja
.ninja_deps
.ninja_log

# Executables
pycdc
pycdas
percevalw marked this conversation as resolved.
Show resolved Hide resolved
6 changes: 3 additions & 3 deletions ASTree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3007,7 +3007,7 @@ void print_src(PycRef<ASTNode> node, PycModule* mod, std::ostream& pyc_output)
} else {
pyc_output << "\n";
start_line(cur_indent, pyc_output);
if (code_src->flags() & PycCode::CO_COROUTINE)
if (code_src->flags() & PycCode::CO_COROUTINE_)
pyc_output << "async ";
pyc_output << "def ";
print_src(dest, mod, pyc_output);
Expand Down Expand Up @@ -3039,12 +3039,12 @@ void print_src(PycRef<ASTNode> node, PycModule* mod, std::ostream& pyc_output)
}
}
}
if (code_src->flags() & PycCode::CO_VARARGS) {
if (code_src->flags() & PycCode::CO_VARARGS_) {
if (narg)
pyc_output << ", ";
pyc_output << "*" << code_src->getLocal(narg++)->value();
}
if (code_src->flags() & PycCode::CO_VARKEYWORDS) {
if (code_src->flags() & PycCode::CO_VARKEYWORDS_) {
if (narg)
pyc_output << ", ";
pyc_output << "**" << code_src->getLocal(narg++)->value();
Expand Down
21 changes: 17 additions & 4 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
project(pycdc)
cmake_minimum_required(VERSION 3.1)
cmake_minimum_required(VERSION 3.12)

set(CMAKE_CXX_STANDARD 11)
set(CMAKE_CXX_STANDARD_REQUIRED ON)

# Debug options.
option(ENABLE_BLOCK_DEBUG "Enable block debugging" OFF)
option(ENABLE_STACK_DEBUG "Enable stack debugging" OFF)
option(ENABLE_BINDINGS "Enable Python bindings" OFF)

# Turn debug defs on if they're enabled.
if (ENABLE_BLOCK_DEBUG)
Expand All @@ -16,8 +17,20 @@ if (ENABLE_STACK_DEBUG)
add_definitions(-DSTACK_DEBUG)
endif()

# For generating the bytes tables
find_package(PythonInterp REQUIRED)
# For generating the bytes tables and bindings
set(PYTHON_VENV_PATH "${CMAKE_SOURCE_DIR}/.venv")
if (EXISTS "${PYTHON_VENV_PATH}")
message("Using existing Python venv at ${PYTHON_VENV_PATH}")
set(Python_ROOT_DIR "${PYTHON_VENV_PATH}")
endif()

if (ENABLE_BINDINGS)
# -fPIC since bindings are built as a shared lib
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
add_subdirectory(bindings)
else()
find_package(Python REQUIRED Interpreter)
endif()

if(CMAKE_COMPILER_IS_GNUCXX OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
set(CMAKE_CXX_FLAGS "-Wall -Wextra -Wshadow -Werror ${CMAKE_CXX_FLAGS}")
Expand All @@ -38,7 +51,7 @@ foreach(ver ${PYTHON_VERSIONS})
endforeach()

add_custom_command(OUTPUT ${MAP_SOURCES}
COMMAND ${PYTHON_EXECUTABLE}
COMMAND ${Python_EXECUTABLE}
${CMAKE_CURRENT_SOURCE_DIR}/bytes/comp_map.py
${CMAKE_CURRENT_SOURCE_DIR}/bytes
${CMAKE_CURRENT_BINARY_DIR}/bytes
Expand Down
31 changes: 31 additions & 0 deletions README.markdown
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,14 @@ https://github.com/zrax/pycdc
* For makefiles, just run `make`
* To run tests (on \*nix or MSYS), run `make check`

## Building and installing the Python package

This step does not require building the executables of the previous sections.

* Ensure `CMake >= 3.12` is installed
* Create a virtual environment `python3 -m venv venv`
* Run `pip install .`

## Usage
**To run pycdas**, the PYC Disassembler:
`./pycdas [PATH TO PYC FILE]`
Expand All @@ -43,6 +51,29 @@ Both tools support Python marshalled code objects, as output from `marshal.dumps

To use this feature, specify `-c -v <version>` on the command line - the version must be specified as the objects themselves do not contain version metadata.

**To use the Python bindings**, run the following Python script:
```python
import marshal
from pycdc import decompyle

async def test():
a = 5
data = foobar(a)
return data

print(decompyle(marshal.dumps(test.__code__)))
```

or from a `.pyc` file:

```python
from pycdc import decompyle

with open('test.pyc', 'rb') as f:
# pass version=None to infer from the file, or specify a version tuple
print(decompyle(f.read(), version=None))
```

## Authors, Licence, Credits
Decompyle++ is the work of Michael Hansen and Darryl Pogue.

Expand Down
28 changes: 28 additions & 0 deletions bindings/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# Find the interpreter as well for byte files generation
find_package(Python COMPONENTS Interpreter Development.Module REQUIRED)

# Find pybind11
execute_process(
COMMAND ${Python_EXECUTABLE} -c "import pybind11; print(pybind11.get_cmake_dir(), end='')"
OUTPUT_VARIABLE pybind11_DIR
)
find_package(pybind11 CONFIG REQUIRED)

# Create C library
pybind11_add_module(bindings
bindings.cpp
../pycdc.cpp
../ASTree.cpp
../ASTNode.cpp
)

target_include_directories(bindings PRIVATE pybind11::headers ${Python_INCLUDE_DIRS} ${CMAKE_SOURCE_DIR})
target_link_libraries(bindings PRIVATE pycxx)

if (NOT DEFINED CMAKE_LIBRARY_OUTPUT_DIRECTORY)
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${CMAKE_SOURCE_DIR}/build/lib")
endif ()

target_compile_definitions(
bindings
PRIVATE VERSION_INFO=${EXAMPLE_VERSION_INFO})
24 changes: 24 additions & 0 deletions bindings/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import sys

from .bindings import decompyle as _decompyle

__version__ = '0.0.1'


def decompyle(code, version=(sys.version_info.major, sys.version_info.minor)):
"""
Decompyle the given code object.

Parameters
----------
code : bytes
The code object to decompile.
version : tuple, optional
The Python version to decompile for. Defaults to the current Python version.
Use None or (0, 0) to infer the Python version from the code object. This will
not work for marshalled code objects.
"""
if version is None:
return _decompyle(code, 0, 0)
else:
return _decompyle(code, version[0], version[1])
49 changes: 49 additions & 0 deletions bindings/bindings.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
#pragma clang diagnostic push
#pragma ide diagnostic ignored "cppcoreguidelines-narrowing-conversions"

#include <Python.h>
#include <pybind11/pybind11.h>
#include <pybind11/pytypes.h>
#include <vector>
#include <cstring>
#include <ostream>
#include <sstream>
#include <optional>
#include "ASTree.h"

namespace py = pybind11;


#ifdef WIN32
# define PATHSEP '\\'
#else
# define PATHSEP '/'
#endif

py::str decompyle_binding(py::bytes &data, int major_version, int minor_version) {
PycModule mod;
auto str = data.cast<std::string>();
PycBuffer buffer(
reinterpret_cast<const unsigned char*>(str.c_str()),
str.size()
);

if (major_version == 0 && minor_version == 0) {
mod.loadFromStream(buffer);
}
else {
mod.loadFromMarshalledStream(
buffer,
major_version,
minor_version
);
}
std::ostringstream pyc_output;
decompyle(mod.code(), &mod, pyc_output);
return pyc_output.str();
}

PYBIND11_MODULE(bindings, m) {
m.doc() = "pycdcpy bindings";
m.def("decompyle", &decompyle_binding, "Decompile a marshalled python file");
}
2 changes: 2 additions & 0 deletions data.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include "data.h"
#include <cstring>
#include <cstdarg>
#include <ostream>
#include <vector>

/* PycData */
Expand Down Expand Up @@ -80,6 +81,7 @@ int PycBuffer::getBuffer(int bytes, void* buffer)
bytes = m_size - m_pos;
if (bytes != 0)
memcpy(buffer, (m_buffer + m_pos), bytes);
m_pos += bytes;
return bytes;
}

Expand Down
34 changes: 17 additions & 17 deletions pyc_code.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,23 +12,23 @@ class PycCode : public PycObject {
public:
typedef std::vector<PycRef<PycString>> globals_t;
enum CodeFlags {
CO_OPTIMIZED = 0x1,
CO_NEWLOCALS = 0x2,
CO_VARARGS = 0x4,
CO_VARKEYWORDS = 0x8,
CO_NESTED = 0x10,
CO_GENERATOR = 0x20,
CO_NOFREE = 0x40,
CO_COROUTINE = 0x80,
CO_ITERABLE_COROUTINE = 0x100,
CO_GENERATOR_ALLOWED = 0x1000,
CO_FUTURE_DIVISION = 0x2000,
CO_FUTURE_ABSOLUTE_IMPORT = 0x4000,
CO_FUTURE_WITH_STATEMENT = 0x8000,
CO_FUTURE_PRINT_FUNCTION = 0x10000,
CO_FUTURE_UNICODE_LITERALS = 0x20000,
CO_FUTURE_BARRY_AS_BDFL = 0x40000,
CO_FUTURE_GENERATOR_STOP = 0x80000,
CO_OPTIMIZED_ = 0x1,
CO_NEWLOCALS_ = 0x2,
CO_VARARGS_ = 0x4,
CO_VARKEYWORDS_ = 0x8,
CO_NESTED_ = 0x10,
CO_GENERATOR_ = 0x20,
CO_NOFREE_ = 0x40,
CO_COROUTINE_ = 0x80,
CO_ITERABLE_COROUTINE_ = 0x100,
CO_GENERATOR_ALLOWED_ = 0x1000,
CO_FUTURE_DIVISION_ = 0x2000,
CO_FUTURE_ABSOLUTE_IMPORT_ = 0x4000,
CO_FUTURE_WITH_STATEMENT_ = 0x8000,
CO_FUTURE_PRINT_FUNCTION_ = 0x10000,
CO_FUTURE_UNICODE_LITERALS_ = 0x20000,
CO_FUTURE_BARRY_AS_BDFL_ = 0x40000,
CO_FUTURE_GENERATOR_STOP_ = 0x80000,
};

PycCode(int type = TYPE_CODE)
Expand Down
38 changes: 38 additions & 0 deletions pyc_module.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,44 @@ void PycModule::loadFromMarshalledFile(const char* filename, int major, int mino
m_code = LoadObject(&in, this).cast<PycCode>();
}

void PycModule::loadFromStream(PycData& stream)
{
setVersion(stream.get32());
if (!isValid()) {
fputs("Bad MAGIC!\n", stderr);
return;
}

int flags = 0;
if (verCompare(3, 7) >= 0)
flags = stream.get32();

if (flags & 0x1) {
// Optional checksum added in Python 3.7
stream.get32();
stream.get32();
} else {
stream.get32(); // Timestamp -- who cares?

if (verCompare(3, 3) >= 0)
stream.get32(); // Size parameter added in Python 3.3
}

m_code = LoadObject(&stream, this).cast<PycCode>();
}

void PycModule::loadFromMarshalledStream(PycData& stream, int major, int minor)
{
if (!isSupportedVersion(major, minor)) {
fprintf(stderr, "Unsupported version %d.%d\n", major, minor);
return;
}
m_maj = major;
m_min = minor;
m_unicode = (major >= 3);
m_code = LoadObject(&stream, this).cast<PycCode>();
}
percevalw marked this conversation as resolved.
Show resolved Hide resolved

PycRef<PycString> PycModule::getIntern(int ref) const
{
if (ref < 0 || (size_t)ref >= m_interns.size())
Expand Down
4 changes: 3 additions & 1 deletion pyc_module.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ class PycModule {

void loadFromFile(const char* filename);
void loadFromMarshalledFile(const char *filename, int major, int minor);
void loadFromStream(PycData& stream);
void loadFromMarshalledStream(PycData& stream, int major, int minor);
bool isValid() const { return (m_maj >= 0) && (m_min >= 0); }

int majorVer() const { return m_maj; }
Expand All @@ -60,7 +62,7 @@ class PycModule {

bool strIsUnicode() const
{
return (m_maj >= 3) || (m_code->flags() & PycCode::CO_FUTURE_UNICODE_LITERALS) != 0;
return (m_maj >= 3) || (m_code->flags() & PycCode::CO_FUTURE_UNICODE_LITERALS_) != 0;
}

PycRef<PycCode> code() const { return m_code; }
Expand Down
Loading
Loading