Skip to content

Commit

Permalink
Fixed some compilation errors, plugged some memory leaks in the Pytho…
Browse files Browse the repository at this point in the history
…n interface and fixed a bug in the distinguishing_sequences_fill strategy
  • Loading branch information
RobBa committed Nov 6, 2024
1 parent 9c1e385 commit ab86843
Show file tree
Hide file tree
Showing 8 changed files with 97 additions and 132 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,8 @@ void distinguishing_sequence_fill::pre_compute(std::unique_ptr<apta>& aut, std::
* @brief Concatenates prefix and suffix efficiently, returns a new vector with the result.
*/
std::vector<int> distinguishing_sequence_fill::concat_prefsuf(const std::vector<int>& pref, const std::vector<int>& suff) const {
std::vector<int> res(pref.size() + suff.size());
std::vector<int> res;
res.reserve(pref.size() + suff.size());
res.insert(res.end(), pref.begin(), pref.end());
res.insert(res.end(), suff.begin(), suff.end());

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ class distinguishing_sequence_fill : public ii_base {
inline void add_data_to_tree(std::unique_ptr<apta>& aut, const std::vector<int>& seq, const int reverse_type, const float confidence);

protected:
const int MIN_BATCH_SIZE = 512;
const int MIN_BATCH_SIZE = 256;
const int MAX_LEN = 30;

inline static std::vector< std::vector<int> > m_suffixes;
Expand Down
8 changes: 7 additions & 1 deletion source/active_learning/readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,13 @@ file to the python script that you want to execute. The other parameters are as
- aptafile: This is the relative path from the executable to the network. It will be provided to the python script upon loading the network.
- input-file: The last argument of flexfringe by convention. This is the relative path to the python-script.

**Important**: We highly suggest you test and debug your python scripts first if you intend to write your own custom one's, because errors in the Python script will not be passed on to C++ and error messages originating from Python will not appear, and the program might continue running.
**Important**: We highly suggest you test and debug your python scripts first if you intend to write your own custom ones, because they are easier to spot and debug directly on the script.

**Pitfall in PyTorch**: When inferring in the model, both model.eval() and with torch.no_grad() should be used, else correct output of the model
seems not guaranteed anymore.

# Debugging

- To run valgrind on the Python interface, download the valgrind-python.supp file from the [cpython-github](https://github.com/python/cpython/blob/main/Misc/valgrind-python.supp), and the run it using the flag

--suppressions=\[some path\]/valgrind-python.supp
37 changes: 4 additions & 33 deletions source/active_learning/system_under_learning/nn_sul_base.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,51 +24,22 @@ using namespace std;

#ifdef __FLEXFRINGE_PYTHON

/**
* @brief Inserts element into vector, raises exception if it didn't work.
*
* @param pylist The vector
* @param item The item
* @param idx The index
*/
void nn_sul_base::set_list_item(PyObject* p_list, PyObject* p_item, const int idx) const {
int r = PyList_SetItem(p_list, idx, p_item);
if (r == -1) {
cerr << "Error when setting items in python-vector." << endl;
throw bad_alloc();
}
}

/**
* @brief Takes a list of c_strings, converts them into a python list of strings.
*
* @param p_list_out The Python list to write into.
* @param c_list The C++ vector of strings.
*/
void nn_sul_base::strings_to_pylist(PyObject* p_list_out, const vector<string>& c_list) const {
for (int i=0; i<c_list.size(); ++i) {
PyObject* p_symbol = PyUnicode_FromString(c_list[i].c_str());
set_list_item(p_list_out, p_symbol, i);
}
}

/**
* @brief Like strings_to_pylist, but it first converts c_list into a vector
* with string representations using the internal inputdata structure as a mapping.
*
* WARNING: If p_list_out already has elements, then we create a memory leak here.
*
* @param p_list_out The Python list to write into.
* @param c_list The C++ vector of strings.
*/
void nn_sul_base::input_sequence_to_pylist(PyObject* p_list_out, const vector<int>& c_list) const {
static inputdata* id = inputdata_locator::get();

vector<string> mapped_list;
mapped_list.reserve(c_list.size());
for(int i = 0; i < c_list.size(); ++i){
mapped_list.emplace(mapped_list.end(), id->get_symbol(c_list[i]).c_str());
PyObject* p_symbol = PyUnicode_FromString(id->get_symbol(c_list[i]).c_str());
PyList_SET_ITEM(p_list_out, i, p_symbol);
}

strings_to_pylist(p_list_out, mapped_list);
}

/**
Expand Down
19 changes: 7 additions & 12 deletions source/active_learning/system_under_learning/nn_sul_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,25 +15,22 @@
#ifndef _NN_SUL_BASE_H_
#define _NN_SUL_BASE_H_

#ifdef __FLEXFRINGE_PYTHON

#define PY_SSIZE_T_CLEAN // recommended, see https://docs.python.org/3/extending/extending.html#a-simple-example
#include <Python.h> // IMPORTANT: Python.h must be first import. See https://docs.python.org/3/extending/extending.html

#include "parameters.h"
#include "sul_base.h"
#include "parameters.h"

#include <cassert>
#include <string>
#include <cassert>
#include <unordered_map>

#ifdef __FLEXFRINGE_PYTHON

#define PY_SSIZE_T_CLEAN // recommended, see https://docs.python.org/3/extending/extending.html#a-simple-example
#include <Python.h> // IMPORTANT: Python.h must be first import. See https://docs.python.org/3/extending/extending.html

class nn_sul_base : public sul_base {
friend class base_teacher;
friend class eq_oracle_base;

private:
void strings_to_pylist(PyObject* p_list_out, const std::vector<std::string>& c_list) const;

protected:
PyObject* p_module;
PyObject* p_model_path;
Expand All @@ -45,8 +42,6 @@ class nn_sul_base : public sul_base {

bool is_member(const std::vector<int>& query_trace) const = 0;
const int query_trace(const std::vector<int>& query_trace, inputdata& id) const = 0;

inline void set_list_item(PyObject* pylist, PyObject* item, const int idx) const;

virtual void
init_types() const = 0; // we need to set the internal types of flexfringe according to the types we expect
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,16 @@ using namespace std;

#ifdef __FLEXFRINGE_PYTHON

/**
* @brief Generic function that prints an error when the output of the python script is not as expected.
*
*/
void nn_weighted_output_sul::print_p_error() const {
std::cerr << "Something went wrong in the Python script, see line below. Terminating program" << std::endl;
PyErr_Print();
exit(EXIT_FAILURE);
}

bool nn_weighted_output_sul::is_member(const std::vector<int>& query_trace) const { return true; }

/**
Expand Down Expand Up @@ -78,7 +88,7 @@ const int nn_weighted_output_sul::query_trace(const std::vector<int>& query_trac
vector<vector<float>> nn_weighted_output_sul::compile_hidden_rep(PyObject* p_result, const int offset) const {

static const int HIDDEN_STATE_SIZE = static_cast<int>(PyLong_AsLong(
PyList_GetItem(p_result, static_cast<Py_ssize_t>(offset)))); // get first list, then return its length
PyList_GET_ITEM(p_result, static_cast<Py_ssize_t>(offset)))); // get first list, then return its length
const int n_sequences = static_cast<int>((static_cast<int>(PyList_Size(p_result)) - 2) / HIDDEN_STATE_SIZE);
vector<vector<float>> representations(n_sequences);
for (int i = 0; i < n_sequences; ++i) {
Expand Down Expand Up @@ -112,12 +122,12 @@ nn_weighted_output_sul::get_type_and_states(const std::vector<int>& query_trace,
input_sequence_to_pylist(p_list, query_trace);

PyObject* p_result = PyObject_CallOneArg(query_func, p_list);
if (!PyList_Check(p_result))
throw std::runtime_error("Something went wrong, the Network did not return a list. What happened?");
if (p_result==NULL || !PyList_Check(p_result))
print_p_error();

// by convention, python script must return a list. list[0]=prediction, list[1]=embedding_dim, rest is
// hidden_representations 1D
PyObject* p_type = PyList_GetItem(p_result, static_cast<Py_ssize_t>(0));
PyObject* p_type = PyList_GET_ITEM(p_result, static_cast<Py_ssize_t>(0));
if (!PyLong_Check(p_type)) {
cerr << "Problem with type as returned by Python script. Is it a proper int?" << endl;
throw exception(); // force the catch block
Expand All @@ -130,9 +140,11 @@ nn_weighted_output_sul::get_type_and_states(const std::vector<int>& query_trace,
if (type > id.get_alphabet_size()) {
id.add_type(PyUnicode_AsUTF8(p_type));
}

vector<vector<float>> representations = compile_hidden_rep(p_result, 1);


Py_DECREF(p_list);
Py_DECREF(p_result);

return make_pair(type, representations);
}

Expand All @@ -148,19 +160,18 @@ nn_weighted_output_sul::get_type_confidence_and_states(const std::vector<int>& q
input_sequence_to_pylist(p_list, query_trace);

PyObject* p_result = PyObject_CallOneArg(query_func, p_list);
if (!PyList_Check(p_result))
throw std::runtime_error("Something went wrong, the Network did not return a list. What happened?");
if (p_result==NULL || !PyList_Check(p_result))
print_p_error();

// by convention, python script must return a list. list[1]=prediction, list[0]=confidence_in_prediction,
// list[2]=embedding_dim, rest is hidden_representations 1D

PyObject* p_type = PyList_GetItem(p_result, static_cast<Py_ssize_t>(0));
PyObject* p_type = PyList_GET_ITEM(p_result, static_cast<Py_ssize_t>(0));
if (!PyUnicode_CheckExact(p_type)) {
cerr << "Problem with type as returned by Python script. Is it a proper int?" << endl;
throw exception(); // force the catch block
}

PyObject* p_confidence = PyList_GetItem(p_result, static_cast<Py_ssize_t>(1));
PyObject* p_confidence = PyList_GET_ITEM(p_result, static_cast<Py_ssize_t>(1));
if (!PyFloat_CheckExact(p_confidence)) {
cerr << "Problem with type as returned by Python script. Is it a proper float?" << endl;
throw exception(); // force the catch block
Expand All @@ -173,92 +184,55 @@ nn_weighted_output_sul::get_type_confidence_and_states(const std::vector<int>& q

float confidence = static_cast<float>(PyFloat_AsDouble(p_confidence));
// vector< vector<float> > representations = compile_hidden_rep(p_result, 2);


Py_DECREF(p_list);
Py_DECREF(p_result);

return make_tuple(type, confidence, vector<vector<float>>());
// return make_tuple(type, confidence, representations);
}

const vector<pair<int, float>>
nn_weighted_output_sul::get_type_confidence_batch(const vector<vector<int>>& query_traces, inputdata& id) const {

vector<pair<int, float>> res;
res.reserve(query_traces.size());
for (int i = 0; i < query_traces.size(); ++i) {
res.emplace(res.end(), 1, 0.8);
PyObject* p_list = PyList_New(query_traces.size());
for(int i=0; i<query_traces.size(); i++){
PyObject* p_tmp = PyList_New(query_traces[i].size());
input_sequence_to_pylist(p_tmp, query_traces[i]);
PyList_SET_ITEM(p_list, i, p_tmp);
}
return res;

/* PyObject* p_list = PyList_New(query_traces.size());
for(int i=0; i<query_traces.size(); i++){
PyObject* p_tmp = PyList_New(query_traces[i].size());
input_sequence_to_pylist(p_tmp, query_traces[i]);
PyList_SetItem(p_list, i, p_tmp);
//Py_DECREF(p_tmp); // making sure p_tmp does not get deleted after loop
} */

/* cout << "refcount of p_list: " << Py_REFCNT(p_list) << endl;
cout << "refcount of query_func: " << Py_REFCNT(query_func) << endl;
cout << "size of p_list: " << PyList_Size(p_list) << endl;
cout << "refcount of p_list[0]: " << Py_REFCNT(PyList_GetItem(p_list, static_cast<Py_ssize_t>(0))) << endl;
cout << "size of p_list[0]: " << PyList_Size(PyList_GetItem(p_list, static_cast<Py_ssize_t>(0))) << endl;
if(query_traces[0].size() > 0)
cout << "refcount of p_list[0][0]: " << Py_REFCNT(PyList_GetItem(PyList_GetItem(p_list,
static_cast<Py_ssize_t>(0)), 0)) << endl;
cout << "Is initialized: " << Py_IsInitialized() << endl; */

/* PyObject* p_result;
try{
p_result = PyObject_CallOneArg(query_func, p_list);
}
catch(...){
cout << "Running gc and trying again" << endl;
PyRun_SimpleString("gc.collect()");
p_result = PyObject_CallOneArg(query_func, p_list);
}
if (!PyList_Check(p_result))
throw std::runtime_error("Something went wrong, the Network did not return a list. What happened?");
vector< pair<int, float> > res;
for(int i=0; i<query_traces.size(); i++){
PyObject* p_type = PyList_GetItem(p_result, static_cast<Py_ssize_t>(i*2));
if(!PyUnicode_CheckExact(p_type)){
cerr << "Problem with type as returned by Python script. Is it a proper int?" << endl;
throw exception(); // force the catch block
}

PyObject* p_confidence = PyList_GetItem(p_result, static_cast<Py_ssize_t>(i*2 + 1));
if(!PyFloat_CheckExact(p_confidence)){
cerr << "Problem with type as returned by Python script. Is it a proper float?" << endl;
throw exception(); // force the catch block
}
PyObject* p_result = PyObject_CallOneArg(query_func, p_list);
if (p_result == NULL || !PyList_Check(p_result))
print_p_error();

int type = id.get_reverse_type(PyUnicode_AsUTF8(p_type));
if(type > id.get_alphabet_size()){
id.add_type(PyUnicode_AsUTF8(p_type));
}
vector< pair<int, float> > res;
for(int i=0; i<query_traces.size(); i++){

res.emplace_back(type, static_cast<float>(PyFloat_AsDouble(p_confidence)));
PyObject* p_type = PyList_GET_ITEM(p_result, static_cast<Py_ssize_t>(i*2));
if(!PyUnicode_CheckExact(p_type)){
cerr << "Problem with type as returned by Python script. Is it a proper int?" << endl;
throw exception(); // force the catch block
}

//cout << "1: " << Py_REFCNT(p_type) << endl;
//cout << "2: " << Py_REFCNT(p_confidence) << endl;
PyObject* p_confidence = PyList_GET_ITEM(p_result, static_cast<Py_ssize_t>(i*2 + 1));
if(!PyFloat_CheckExact(p_confidence)){
cerr << "Problem with type as returned by Python script. Is it a proper float?" << endl;
throw exception(); // force the catch block
}

//Py_DECREF(p_type);
//Py_DECREF(p_confidence);
int type = id.get_reverse_type(PyUnicode_AsUTF8(p_type));
if(type > id.get_alphabet_size()){
id.add_type(PyUnicode_AsUTF8(p_type));
}
res.emplace_back(type, static_cast<float>(PyFloat_AsDouble(p_confidence)));
}

/* for(int i=0; i<query_traces.size(); i++){
for(int j=0; j<query_traces[i].size(); j++){
Py_DECREF(PyList_GetItem(PyList_GetItem(p_list, static_cast<Py_ssize_t>(i)), j));
}
Py_DECREF(PyList_GetItem(p_list, static_cast<Py_ssize_t>(i)));
} */
/* Py_DECREF(p_list);
Py_DECREF(p_list);
Py_DECREF(p_result);

assert(res.size() == query_traces.size());
return res; */
return res;
}

/**
Expand All @@ -281,7 +255,12 @@ const double nn_weighted_output_sul::get_sigmoid_output(const std::vector<int>&
input_sequence_to_pylist(p_list, query_trace);

PyObject* p_query_result = PyObject_CallOneArg(query_func, p_list);
return PyFloat_AsDouble(p_query_result);
double res = PyFloat_AsDouble(p_query_result);

Py_DECREF(p_list);
Py_DECREF(p_query_result);

return res;
}

/**
Expand Down Expand Up @@ -327,8 +306,9 @@ nn_weighted_output_sul::get_weights_and_state(const std::vector<int>& query_trac
input_sequence_to_pylist(p_list, query_trace);

PyObject* p_result = PyObject_CallOneArg(query_func, p_list);
if (!PyTuple_Check(p_result))
throw std::runtime_error("Something went wrong, the Network did not return a tuple. What happened?");
if (p_result==NULL || !PyTuple_Check(p_result))
print_p_error();

assert(static_cast<int>(PyTuple_Size(p_result)) == 2);

PyObject* p_weights = PyTuple_GET_ITEM(p_result, static_cast<Py_ssize_t>(0));
Expand All @@ -349,17 +329,28 @@ nn_weighted_output_sul::get_weights_and_state(const std::vector<int>& query_trac
PyObject* resp = PyList_GET_ITEM(p_weights, static_cast<Py_ssize_t>(i));
res[i] = static_cast<float>(PyFloat_AsDouble(resp));
}

Py_DECREF(p_list);
Py_DECREF(p_result);

return make_pair(res, state);
} else if (PyFloat_Check(p_weights)) {
// binary acceptor model
vector<float> res(1);
res[0] = static_cast<float>(PyFloat_AsDouble(p_weights));

Py_DECREF(p_list);
Py_DECREF(p_result);

return make_pair(res, state);
} else {
Py_DECREF(p_list);
Py_DECREF(p_result);

throw std::runtime_error("Something went wrong, the Network neither returned a float (binary acceptor model\
, nor did it return a list (language model)). What happened?");
}
}
}

/**
* @brief Destroy the nn sigmoid sul::nn sigmoid sul object
Expand Down
Loading

0 comments on commit ab86843

Please sign in to comment.