Home > Net >  Access vaiolation in PyArray_SimpleNew
Access vaiolation in PyArray_SimpleNew

Time:12-23

I have a cmake based C project (library) which is wrapped to python using swig. A method of the library returns a std::vector<int64_t> which is copied to a numpy array with the %extend keyword of swig. (see foo.i bellow)

foo.i

%{
#define SWIG_FILE_WITH_INIT
%}
%include "numpy.i"

%init %{
import_array();
%}

%include "foo.hpp"
%{
#include <foo.hpp>
%}

%extend std::vector<int64_t> {
  PyObject* asNpArray() {
    size_t nRows = self->size();
    npy_intp dims[1] = { (npy_intp)nRows };

    PyArrayObject* vec_array = (PyArrayObject *) PyArray_SimpleNew(1, dims, NPY_INT64);
    int64_t *vec_array_pointer = (int64_t*) PyArray_DATA(vec_array);

    copy(self->begin(),self->end(),vec_array_pointer);
    return  (PyObject*)vec_array;
  }
}

%template(vectorint64) std::vector<int64_t>;

The wrapping works fine until numpy 1.21.6 but as soon as I update numpy to 1.22.0 or newer, it crash when accessing the asNpArray() method in Python. (I used Python 3.8 and 3.11, both with the same result)

When I starting to debug the wrapping, I see the exception:

Exception thrown at 0x00007FFDC58DDA5D (python38.dll) in python.exe: 0xC0000005: Access violation reading location 0x00000000000000E8.

at the C code line PyArrayObject* vec_array = (PyArrayObject *) PyArray_SimpleNew(1, dims, NPY_INT64);

This is shown when running it with an attached debugger and the script is terminated afterwards.

A special thing is, that when step through the C code, and do a single step over this line, no exception is thrown and the right numpy array is returned to my Python script.

Does any one know, what is wrong with the extension? I wasn't able to find changes in the numpy changelog which address this topics.

Or maybe it exist a better solution for convert a std::vector to a numpy array?

Bellow I attached a few source and python files which are a minimal project to reproduce the whole issue.

foo.h

/*  
 * \file     foo.hpp
 */
#ifndef FOO
#define FOO

#include <stdint.h>
#include <vector>

namespace foo {
class Foo {  
public:
  Foo();
  ~Foo();
  std::vector<int64_t> getVector();
};  
} /* namespace */   
#endif

foo.cpp

/*  
 * \file     foo.cpp
 */
#include "foo.hpp"

namespace foo {
Foo::Foo() { }
Foo::~Foo() { }

std::vector<int64_t> Foo::getVector() 
{
    std::vector<int64_t> data {-2, -1, 0, 1, 2};
    return data;
}
} /* namespace */

CMakeLists.txt

cmake_minimum_required (VERSION 3.18)

cmake_policy(SET CMP0048 NEW)   # set version string with project() command
cmake_policy(SET CMP0094 NEW)   # use LOCATION for Python lookup strategy

set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)

project(Foo
    VERSION 0.0.1 
    DESCRIPTION "PyArray_SimpleNew test"
)

set(CMAKE_C_VISIBILITY_PRESET hidden)
set(CMAKE_CXX_VISIBILITY_PRESET hidden)
set(CMAKE_VISIBILITY_INLINES_HIDDEN ON)

#Static Library
set(STATIC_TARGET ${PROJECT_NAME}Static)
add_library(${STATIC_TARGET} STATIC
    foo.cpp
)
set_target_properties(${STATIC_TARGET} PROPERTIES 
  PUBLIC_HEADER "foo.hpp"
  POSITION_INDEPENDENT_CODE ON
)
target_include_directories(${STATIC_TARGET} PUBLIC . )

#Python wrapping
add_compile_definitions(SWIG)

find_package(SWIG REQUIRED)
include(${SWIG_USE_FILE})
  
set_property(SOURCE foo.i PROPERTY USE_LIBRARY_INCLUDE_DIRECTORIES TRUE)
set_property(SOURCE foo.i PROPERTY CPLUSPLUS ON)

if (MSVC)
  set(CMAKE_SWIG_FLAGS "-D_SWIG_WIN32")
   
  # We don't have Python with debug information installed
  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4127")
  add_definitions(-DSWIG_PYTHON_INTERPRETER_NO_DEBUG)
endif()

set(Python3_FIND_STRATEGY LOCATION)
find_package(Python3 REQUIRED COMPONENTS Interpreter Development NumPy)

message("*****************************************")
message("Python3_ROOT " ${Python3_ROOT})
message("Python3_FOUND " ${Python3_FOUND})
message("Python3_Interpreter_FOUND " ${Python3_Interpreter_FOUND})
message("Python3_Development_FOUND " ${Python3_Development_FOUND})
 
message("Python3_LIBRARIES " ${Python3_LIBRARIES})
message("Python3_LIBRARY_DIRS " ${Python3_LIBRARY_DIRS})
message("Python3_INCLUDE_DIRS " ${Python3_INCLUDE_DIRS})
message("Python3_LINK_OPTIONS " ${Python3_LINK_OPTIONS})
message("Python3_EXECUTABLE " ${Python3_EXECUTABLE})
message("Python3_INTERPRETER_ID " ${Python3_INTERPRETER_ID})

message("Python3_VERSION " ${Python3_VERSION})
message("Python3_VERSION_MAJOR " ${Python3_VERSION_MAJOR})
message("Python3_VERSION_MINOR " ${Python3_VERSION_MINOR})

message("Python3_NumPy_FOUND " ${Python3_NumPy_FOUND})
message("Python3_NumPy_INCLUDE_DIRS " ${Python3_NumPy_INCLUDE_DIRS})
message("Python3_NumPy_VERSION " ${Python3_NumPy_VERSION})
message("Python3_SOABI " ${Python3_SOABI})
message("*****************************************")

set(PYTHON3_TARGET ${PROJECT_NAME}PY)

if (WIN32)
  # Allow to debug under windows, if debug versions of Python are missing
  string(REPLACE "_d" "" Python3_LIBRARIES "${Python3_LIBRARIES}")
endif()

# has to be before 'swig_add_library'
link_directories(${Python3_LIBRARY_DIRS})

####################### Target #######################
#   Define target library and configure properties   #
######################################################  
swig_add_library(${PYTHON3_TARGET}
  TYPE SHARED
  LANGUAGE python
  SOURCES   foo.i
  OUTPUT_DIR ${CMAKE_CURRENT_BINARY_DIR}
)

set_target_properties(${PYTHON3_TARGET} PROPERTIES 
  OUTPUT_NAME "${PYTHON3_TARGET}"
  SUFFIX ".${Python3_SOABI}.pyd"
  SWIG_USE_LIBRARY_INCLUDE_DIRECTORIES TRUE
)

target_link_libraries(${PYTHON3_TARGET} PUBLIC ${STATIC_TARGET} ${Python3_LIBRARIES} Python3::NumPy)
target_include_directories(${PYTHON3_TARGET} PRIVATE ${Python3_INCLUDE_DIRS})

if(WIN32)
  set_property(TARGET ${PYTHON3_TARGET} PROPERTY SWIG_COMPILE_OPTIONS -threads -w362,509)
else()
  set_property(TARGET ${PYTHON3_TARGET} PROPERTY SWIG_COMPILE_OPTIONS -threads -w362,509 -DSWIGWORDSIZE64)
endif()

if (WIN32)
  # pyconfig.h is not autogenerated on Windows. To avoid warnings, we
  # add a compiler directive
  get_directory_property(DirDefs COMPILE_DEFINITIONS )
  set_target_properties(${PYTHON3_TARGET} PROPERTIES
    COMPILE_DEFINITIONS "${DirDefs};HAVE_ROUND"
  )
endif()

Additionally, the project use numpy.i from the official numpy release.

test.py

##
# \file        test.py
import sys
import numpy as np    

libPath = "<path where FooPY is located>"
sys.path.insert(0, libPath)

import FooPY as foo

myFoo = foo.Foo()
data = myFoo.getVector()
print(data)

# the script crashes after the following line
data = myFoo.getVector().asNpArray()
print(data)

Edit:

I found that the issue is some how related with the -threads compile option of swig. The implementation works without this option. Basically, the option release the Python GIL when a library method is called (which is required by the base library). Is it possible that a call to PyArray_SimpleNew is only possible with a "locked" python interpreter (GIL)? And maybe is there any option to temporary lock the GIL in C or to exclude the asNpArray method from this -threads configuration in swig?

CodePudding user response:

Given the following toy example we can see what -threads does in our generated code.

%module test    
void whatever();

Generally when you use -threads it changes the generated code to look like this:

#ifdef __cplusplus
extern "C" {
#endif
SWIGINTERN PyObject *_wrap_whatever(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
  PyObject *resultobj = 0;

  if (!PyArg_ParseTuple(args,(char *)":whatever")) SWIG_fail; <--- parsing arguments
  {
    SWIG_PYTHON_THREAD_BEGIN_ALLOW; <-- drop GIL
    whatever();                     <-- run wrapped function
    SWIG_PYTHON_THREAD_END_ALLOW;   <-- pick up GIL again
  }
  resultobj = SWIG_Py_Void();       <-- handle return value
  return resultobj;
fail:
  return NULL;
}

Here SWIG_PYTHON_THREAD_BEGIN_ALLOW temporarily drops the GIL just before calling the actual C or C function from your wrapper and picks it up again immediately after the call.

Normally this is exactly what you want because inside your function you're not touching the python interpreter at all and it can freely go about its business in other threads whilst you work.

With your %extend method however (and other code that uses the python interpreter too) this is a trap - you need the GIL to safely do the work that you want to do, but because you called SWIG with -threads you don't have the lock.

The simplest option is to use %nothread to tell SWIG not to drop the GIL for a specific function. In this case it would be your %extend implemented method, but we can see the syntax here

%module test

%nothread whatever;

void whatever();

And verify that this does indeed not insert a SWIG_PYTHON_THREAD_BEGIN_ALLOW inside your generated code.

Assuming the function you're calling really is simply a wrapper around PyArray_SimpleNew as shown this probably makes sense to just exclude the one or two specific functions using %nothread like that. You can manually acquire and release the GIL as you see fit if you had a more complicated function, but the all or nothing approach probably fits most use cases well.

  • Related