In [1]:
%load_ext ipybind

import sys

# cpython API: operate Python from C

1. Use cpython API with pybind11
2. `PyObject` reference counting
3. Built-in types
   1. Cached value
   2. Attribute access
   3. Function call
   4. Tuple
   5. Dictionary
   6. List
4. Useful operations
   1. Import
   2. Exception
5. Python memory management
   1. PyMem interface
   2. Small memory optimization
   3. Tracemalloc

# Use cpython API with pybind11

In [2]:
%%pybind11 -c="-O3"

#include "pybind11/pybind11.h"
#include "Python.h" // Unnecessary

using namespace pybind11;

PYBIND11_MODULE(ex_long, m)
{
    PyObject * v = PyLong_FromLong(2000000);
    m.attr("integer_value") = v;
    Py_DECREF(v);
}

In [3]:
print(type(integer_value), integer_value)

<class 'int'> 2000000


We can use any Python C API with pybind11.

When we import `pybind11/pybind11.h`, we don't need to import `Python.h`, becuase the former does it for us.  But please note that `pybind11/pybind11.h` or `Python.h` should be included before every other inclusion.

# `PyObject` reference counting

In [4]:
%%pybind11 -c="-O3"

#include "pybind11/pybind11.h"

using namespace pybind11;

static PyObject * s;

PYBIND11_MODULE(ex_str, m)
{
    s = PyUnicode_FromString("string_from_c");
    m.attr("string_value") = s;
    Py_DECREF(s);
    m
        .def
        (
            "show_ref_count_with_handle"
          , [](handle const & h)
            {
                return Py_REFCNT(h.ptr());
            }
        )
        .def
        (
            "show_ref_count_with_object"
          , [](object const & o)
            {
                return Py_REFCNT(o.ptr());
            }
        )
        .def
        (
            "show_string_value_ref_count"
          , [&]()
            {
                return Py_REFCNT(s);
            }
        )
    ;
}

In [5]:
def check_string_value():
    print(type(string_value), string_value)
    print('before aliasing')
    print(show_ref_count_with_object(string_value), 'refcnt by object')
    print(show_ref_count_with_handle(string_value), 'refcnt by handle')
    print(sys.getrefcount(string_value), 'refcnt by sys')
    print(show_string_value_ref_count(), 'refcnt from c++')
    string_value_aliasing = string_value
    print('after aliasing')
    print(show_ref_count_with_object(string_value), 'refcnt by object')
    print(show_ref_count_with_handle(string_value), 'refcnt by handle')
    print(sys.getrefcount(string_value), 'refcnt by sys')
    print(show_string_value_ref_count(), 'refcnt from c++')
check_string_value()

<class 'str'> string_from_c
before aliasing
7 refcnt by object
6 refcnt by handle
5 refcnt by sys
4 refcnt from c++
after aliasing
8 refcnt by object
7 refcnt by handle
6 refcnt by sys
5 refcnt from c++


Pybind11 offers two low-level shorthands for reference counting: `handle::inc_ref()` and `handle::dec_ref()`.  If we don't want to go so low-level, it provides `reinterpret_borrow` and `reinterpret_steal` function templates.

# Built-in types

## Cached value

Python caches small (-5 to 256) integers: https://github.com/python/cpython/blob/4830f581af57dd305c02c1fd72299ecb5b090eca/Objects/longobject.c#L40 .  Don't get surprised when you see a large reference count for some of them integers:

In [6]:
print('ref counts of 0:', sys.getrefcount(0))
print('ref counts of 257:', sys.getrefcount(257))

ref counts of 0: 5783
ref counts of 257: 3


Real number doesn't have that cache:

In [7]:
print(sys.getrefcount(0.0))

3


Python interns strings consisting of alphanumerical and underscore characters.

In [8]:
print('' is '')
print(sys.getrefcount(''))

True
3177


In [9]:
def check_string_intern():
    s1 = 'numerical'
    print(sys.getrefcount('numerical'))
    print(s1 is 'numerical')
    s2 = 'num' + 'erical'
    print(s1 is s2)
    print(sys.getrefcount('numerical'))
check_string_intern()

4
True
True
5


## Attribute access

The Python object protocol defines a set of API for accessing object attributes.  Here is a simple example that sets and gets an attribute of an object using the API:

```cpp
int PyObject_SetAttr(PyObject *o, PyObject *attr_name, PyObject *v)
PyObject* PyObject_GetAttr(PyObject *o, PyObject *attr_name)
```

In [10]:
%%pybind11 -c="-O3"

#include "pybind11/pybind11.h"

using namespace pybind11;

void attach_attr(PyObject * o, PyObject * name, PyObject * attr)
{
    /*int ret =*/
    PyObject_SetAttr(o, name, attr);
}

PyObject * retrieve_attr(PyObject * o, PyObject * name)
{
    PyObject * ret = PyObject_GetAttr(o, name);
    return ret;
}

PYBIND11_MODULE(ex_attr, m)
{
    m
        .def
        (
            "attach_attr"
          , [](object & o, object & name, object & attr)
            {
                attach_attr(o.ptr(), name.ptr(), attr.ptr());
            }
        )
        .def
        (
            "retrieve_attr"
          , [](object & o, object & name)
            {
                handle(retrieve_attr(o.ptr(), name.ptr()));
            }
        )
    ;
}

In [11]:
class Cls():
    pass
obj = Cls()
val = 'attached value'
print(sys.getrefcount(val))

attach_attr(obj, 'name', val)
print(sys.getrefcount(val))

print(obj.name is val)
print(sys.getrefcount(val))

val2 = retrieve_attr(obj, 'name')
print(sys.getrefcount(val))

3
4
True
4
5


There are shorthand versions of the API that takes C string for the attribute name:

In [12]:
%%pybind11 -c="-O3"

#include "pybind11/pybind11.h"

#include <string>

using namespace pybind11;

void attach_attr(PyObject * o, char const * name, PyObject * attr)
{
    /*int ret =*/
    PyObject_SetAttrString(o, name, attr);
}

PyObject * retrieve_attr(PyObject * o, char const * name)
{
    PyObject * ret = PyObject_GetAttrString(o, name);
    return ret;
}

PYBIND11_MODULE(ex_attr_by_string, m)
{
    m
        .def
        (
            "attach_attr_by_string"
          , [](object & o, object & name, object & attr)
            {
                std::string name_str = cast<std::string>(name);
                attach_attr(o.ptr(), name_str.c_str(), attr.ptr());
            }
        )
        .def
        (
            "retrieve_attr_by_string"
          , [](object & o, object & name)
            {
                std::string name_str = cast<std::string>(name);
                handle(retrieve_attr(o.ptr(), name_str.c_str()));
            }
        )
    ;
}

In [13]:
class Cls():
    pass
obj = Cls()
val = 'attached value'
print(sys.getrefcount(val))

attach_attr_by_string(obj, 'name', val)
print(sys.getrefcount(val))

print(obj.name is val)
print(sys.getrefcount(val))

val2 = retrieve_attr_by_string(obj, 'name')
print(sys.getrefcount(val))

3
4
True
4
5


See also the C API documentation for object protocol: https://docs.python.org/3/c-api/object.html .

## Function call

This section shows how to make Python function call from C.

In [14]:
%%pybind11 -c="-O3"

#include "pybind11/pybind11.h"

#include <string>

using namespace pybind11;

PyObject * function_call(PyObject * callable, PyObject * args, PyObject * kw)
{
    PyObject * ret = PyObject_Call(callable, args, kw);
    return ret;
}

PYBIND11_MODULE(ex_call, m)
{
    m
        .def
        (
            "function_call"
          , [](object & o, tuple & t, dict & kw)
            {
                return handle(function_call(o.ptr(), t.ptr(), kw.ptr()));
            }
        )
    ;
}

In [15]:
def my_func(arg1, kw1='default'):
    return 'results: {}, {}'.format(arg1, kw1)

print('(direct call)  ', my_func('first argument'))
print('(function_call)', function_call(my_func, ('first argument',), {}))

print('(direct call)  ', my_func('first argument', kw1='non default'))
print('(function_call)', function_call(my_func, ('first argument',), {'kw1': 'non default'}))

(direct call)   results: first argument, default
(function_call) results: first argument, default
(direct call)   results: first argument, non default
(function_call) results: first argument, non default


See the object protocol (https://docs.python.org/3/c-api/object.html) for other variants of the API.

## Tuple

In [16]:
%%pybind11 -c="-O3"

#include "pybind11/pybind11.h"

#include <string>

using namespace pybind11;

PyObject * reverse_tuple(PyObject * tup)
{
    PyObject * ret = PyTuple_New(PyTuple_Size(tup));

    for (Py_ssize_t i = 0 ; i < PyTuple_Size(tup) ; ++i)
    {
        PyObject * item = PyTuple_GetItem(tup, i);
        Py_INCREF(item);
        PyTuple_SetItem(ret, i, item); // This only works when 1 == Py_REFCNT(ret)
    }

    return ret;
}

PYBIND11_MODULE(ex_tuple, m)
{
    m
        .def
        (
            "reverse_tuple"
          , [](tuple & t)
            {
                return handle(reverse_tuple(t.ptr()));
            }
        )
    ;
}

In [17]:
tv0 = "value0"
tv1 = object()
tup = (tv0, tv1)
print(sys.getrefcount(tv1))
rtup = reverse_tuple(tup)
print(sys.getrefcount(tv1))

3
4


See https://docs.python.org/3/c-api/tuple.html for tuple C API.  `PyTuple_SetItem` implementation: https://github.com/python/cpython/blob/v3.8.0/Objects/tupleobject.c#L167 .

## Dictionary



In [18]:
%%pybind11 -c="-O3"

#include "pybind11/pybind11.h"

#include <string>

using namespace pybind11;

PyObject * make_dict()
{
    PyObject * ret = PyDict_New();
    return ret;
}

void add_dict_item(PyObject * d, PyObject * k, PyObject * v)
{
    /*int ret =*/
    PyDict_SetItem(d, k, v);
}

PYBIND11_MODULE(ex_dict, m)
{
    m
        .def
        (
            "make_dict"
          , []()
            {
                return handle(make_dict());
            }
        )
        .def
        (
            "add_dict_item"
          , [](dict & d, object & k, object & v)
            {
                add_dict_item(d.ptr(), k.ptr(), v.ptr());
            }
        )
    ;
}

In [19]:
d0 = {}
d1 = make_dict()
print(d0 is d1)
print(d0 == d1)
d0['k1'] = 'value1'
print(d0)
add_dict_item(d1, 'k1', 'value1')
print(d1)
print(d0 == d1)

False
True
{'k1': 'value1'}
{'k1': 'value1'}
True


Dictionary API: https://docs.python.org/3/c-api/dict.html .

## List

In [20]:
%%pybind11 -c="-O3"

#include "pybind11/pybind11.h"

#include <string>

using namespace pybind11;

PyObject * make_list_from_iterator(PyObject * o)
{
    PyObject * iter = PyObject_GetIter(o);
    PyObject * ret = PyList_New(0);
    PyObject * item = nullptr;
    while (nullptr != (item = PyIter_Next(iter)))
    {
        PyList_Append(ret, item);
        Py_DECREF(item);
    }
    Py_DECREF(iter);
    return ret;
}

PYBIND11_MODULE(ex_list, m)
{
    m
        .def
        (
            "make_list_from_iterator"
          , [](object & o)
            {
                PyObject * ret = make_list_from_iterator(o.ptr());
                return handle(ret);
            }
        )
    ;
}

In [21]:
v0 = 'first value'
v1 = 'second value'
tup = (v0, v1)
print(sys.getrefcount(v1))
lst = make_list_from_iterator(tup)
print(sys.getrefcount(v1))
print(lst)

4
5
['first value', 'second value']


See more information at https://docs.python.org/3/c-api/list.html and https://docs.python.org/3/c-api/iter.html .

# Useful operations

## Import

In [22]:
%%pybind11 -c="-O3"

#include "pybind11/pybind11.h"

#include <string>

using namespace pybind11;

PyObject * get_modules()
{
    PyObject * sysmod = PyImport_ImportModule("sys");
    PyObject * modules = PyObject_GetAttrString(sysmod, "modules");
    Py_DECREF(sysmod);
    return modules;
}

PYBIND11_MODULE(ex_import, m)
{
    m
        .def
        (
            "get_modules"
          , []()
            {
                PyObject * ret = get_modules();
                return handle(ret);
            }
        )
    ;
}

In [23]:
modules = get_modules();
print(type(modules), len(modules))

<class 'dict'> 877


## Exception

In [24]:
%%pybind11 -c="-O3"

#include "pybind11/pybind11.h"

#include <string>

using namespace pybind11;

PyObject * function_with_exception(PyObject * o)
{
    // Normal processing code.
    PyObject * iter = PyObject_GetIter(o);
    if (nullptr == iter) { return nullptr; }
    PyObject * ret = PyList_New(0);
    if (nullptr == ret) { return nullptr; }
    PyObject * item = nullptr;
    while (nullptr != (item = PyIter_Next(iter)))
    {
        int status = PyList_Append(ret, item);
        Py_DECREF(item);
        if (0 != status) { return nullptr; }
    }
    Py_DECREF(iter);

    // Exception.
    PyErr_SetString(PyExc_RuntimeError, "intentional exception");
    Py_DECREF(ret); // Don't forget to clean up.
    return nullptr;
}

PYBIND11_MODULE(ex_except, m)
{
    m
        .def
        (
            "function_with_exception"
          , [](object & o)
            {
                PyObject * ret = function_with_exception(o.ptr());
                if (nullptr == ret) { throw error_already_set(); }
                return handle(ret);
            }
        )
    ;
}

In [25]:
function_with_exception(1)

TypeError: 'int' object is not iterable

In [26]:
tup = ('first value', 'second value')
function_with_exception(('first value', 'second value'))

RuntimeError: intentional exception

# Python memory management

Python has its own memory manager.  When writing Python extension, they should be used for `PyObject`.  The memory managing system has three levels:

1. Raw memory interface: wrapper to the C standard memory managers.  It allows distinct addressed returned when requesting 0 byte.  GIL is not involved.
2. Normal memory interface: 'pymalloc' with small memory optimization.  GIL is required when calling.
3. Object memory interface: allocate for `PyObject`.  GIL is required when calling.

The public API are:

```c
void * PyMem_RawMalloc(size_t size);
void * PyMem_RawCalloc(size_t nelem, size_t elsize);
void * PyMem_RawRealloc(void *ptr, size_t new_size);
void   PyMem_RawFree(void *ptr);

void * PyMem_Malloc(size_t size);
void * PyMem_Calloc(size_t nelem, size_t elsize);
void * PyMem_Realloc(void *ptr, size_t new_size);
void   PyMem_Free(void *ptr);

void * PyObject_Malloc(size_t size);
void * PyObject_Calloc(size_t nelem, size_t elsize);
void * PyObject_Realloc(void *ptr, size_t new_size);
void   PyObject_Free(void *ptr);
```


In [`Include/cpython/pymem.h`](https://github.com/python/cpython/blob/v3.8.0/Include/cpython/pymem.h#L53), Python provides a struct and a set of API to switch to custom memory managers:

```c
typedef struct {
    /* user context passed as the first argument to the 4 functions */
    void *ctx;

    /* allocate a memory block */
    void* (*malloc) (void *ctx, size_t size);

    /* allocate a memory block initialized by zeros */
    void* (*calloc) (void *ctx, size_t nelem, size_t elsize);

    /* allocate or resize a memory block */
    void* (*realloc) (void *ctx, void *ptr, size_t new_size);

    /* release a memory block */
    void (*free) (void *ctx, void *ptr);
} PyMemAllocatorEx;

/* Get the memory block allocator of the specified domain. */
PyAPI_FUNC(void) PyMem_GetAllocator(PyMemAllocatorDomain domain,
                                    PyMemAllocatorEx *allocator);

/* Set the memory block allocator of the specified domain.

   The new allocator must return a distinct non-NULL pointer when requesting
   zero bytes.

   For the PYMEM_DOMAIN_RAW domain, the allocator must be thread-safe: the GIL
   is not held when the allocator is called.

   If the new allocator is not a hook (don't call the previous allocator), the
   PyMem_SetupDebugHooks() function must be called to reinstall the debug hooks
   on top on the new allocator. */
PyAPI_FUNC(void) PyMem_SetAllocator(PyMemAllocatorDomain domain,
                                    PyMemAllocatorEx *allocator);
```

The documentation is at https://docs.python.org/3/c-api/memory.html#customize-memory-allocators .  The public API is wrappers to the functions populated in the struct, e.g.:

```c
void *
PyMem_RawMalloc(size_t size)
{
    /*
     * Limit ourselves to PY_SSIZE_T_MAX bytes to prevent security holes.
     * Most python internals blindly use a signed Py_ssize_t to track
     * things without checking for overflows or negatives.
     * As size_t is unsigned, checking for size < 0 is not required.
     */
    if (size > (size_t)PY_SSIZE_T_MAX)
        return NULL;
    return _PyMem_Raw.malloc(_PyMem_Raw.ctx, size);
}
```

See https://github.com/python/cpython/blob/v3.8.0/Objects/obmalloc.c#L562 for the code.

## Small memory optimization

Take a look at the documentation: https://github.com/python/cpython/blob/v3.8.0/Objects/obmalloc.c#L766 .  This is the 'pymalloc', and it uses 256 KB for allocation not greater than 512 bytes.

## tracemalloc

`tracemalloc` is a standard library that uses the custom memory manager to profile and debug Python memory use: https://docs.python.org/3/library/tracemalloc.html .  We can follow the implementation to create more specific analysis.