diff --git a/docs/deep_dive/arch.rst b/docs/deep_dive/arch.rst index a2ef312e..e8eb7b32 100644 --- a/docs/deep_dive/arch.rst +++ b/docs/deep_dive/arch.rst @@ -18,9 +18,13 @@ This document is intended for developers who want to understand the architecture process logic for specific input data with strict mathematical description, guiding the source code. +- The :ref:`operator-and-model-test` introduces the relevant content of + operator and model testing. + .. toctree:: :maxdepth: 2 system infer_flow math_formalization + operator_and_model_test diff --git a/docs/deep_dive/operator_and_model_test.rst b/docs/deep_dive/operator_and_model_test.rst new file mode 100644 index 00000000..559ee7e7 --- /dev/null +++ b/docs/deep_dive/operator_and_model_test.rst @@ -0,0 +1,253 @@ + +.. _operator-and-model-test: + +*********************** +Operator and Model Test +*********************** + +.. contents:: + +Test Method +=========== + +Operator +-------- + +First generate test data with test_ops.py, then verify the +generated data with test_op.cc, execute the following code +to test(test_op.cc is compiled with Makefile): + +.. code-block:: + + python tests/test_ops.py + ./test_op 0/1/2 (0 is cpu, 1 is gpu, and 2 is formal) + +Model +----- + +First dump cvm model with main.py and corresponding yaml file, then +verify the generated model with test_model.cc, execute the following +code to test(test_model.cc is compiled with Makefile): + +.. code-block:: + + python main.py /tests/mrt/model_zoo/yaml_file + ./test_model + +Test Results +============ + +Operator +-------- + +The following are the test results of the operator on different devices: + ++-------------------+-------------------------+ +| operator | results | ++===================+=========================+ +| broadcast_sub | cpu & formal & gpu pass | ++-------------------+-------------------------+ +| broadcast_add | cpu & formal & gpu pass | ++-------------------+-------------------------+ +| broadcast_mul | cpu & formal & gpu pass | ++-------------------+-------------------------+ +| broadcast_max | cpu & formal & gpu pass | ++-------------------+-------------------------+ +| broadcast_div | cpu & formal & gpu pass | ++-------------------+-------------------------+ +| broadcast_greater | cpu & formal & gpu pass | ++-------------------+-------------------------+ +| max_pool2d | cpu & formal & gpu pass | ++-------------------+-------------------------+ +| dense | cpu & formal & gpu pass | ++-------------------+-------------------------+ +| sum | cpu & formal & gpu pass | ++-------------------+-------------------------+ +| max | cpu & formal & gpu pass | ++-------------------+-------------------------+ +| slice_like | cpu & formal & gpu pass | ++-------------------+-------------------------+ +| tile | cpu & formal & gpu pass | ++-------------------+-------------------------+ +| repeat | cpu & formal & gpu pass | ++-------------------+-------------------------+ +| concatenate | cpu & formal & gpu pass | ++-------------------+-------------------------+ +| transpose | cpu & formal & gpu pass | ++-------------------+-------------------------+ +| take | cpu & formal & gpu pass | ++-------------------+-------------------------+ +| get_valid_counts | cpu & formal & gpu pass | ++-------------------+-------------------------+ +| strided_slice | cpu & formal & gpu pass | ++-------------------+-------------------------+ +| conv2d | cpu & formal & gpu pass | ++-------------------|-------------------------| +| upsampling | cpu & formal & gpu pass | ++-------------------|-------------------------| + + +Model +----- + +The following are the test results of the model on different devices: + ++---------------------------+-------------------------+ +| model | results | ++===========================+=========================+ +| alexnet | cpu & formal & gpu pass | ++---------------------------+-------------------------+ +| cifar_resnet110_v1 | cpu & formal & gpu pass | ++---------------------------+-------------------------+ +| cifar_resnet110_v2 | cpu & formal & gpu pass | ++---------------------------+-------------------------+ +| cifar_resnet20_v1 | cpu & formal & gpu pass | ++---------------------------+-------------------------+ +| cifar_resnet20_v2 | cpu & formal & gpu pass | ++---------------------------+-------------------------+ +| cifar_resnet56_v1 | cpu & formal & gpu pass | ++---------------------------+-------------------------+ +| cifar_resnet56_v2 | cpu & formal & gpu pass | ++---------------------------+-------------------------+ +| cifar_resnext29_16x64d | cpu & formal & gpu pass | ++---------------------------+-------------------------+ +| cifar_wideresnet16_10 | cpu & formal & gpu pass | ++---------------------------+-------------------------+ +| cifar_wideresnet28_10 | cpu & formal & gpu pass | ++---------------------------+-------------------------+ +| cifar_wideresnet40_8 | cpu & formal & gpu pass | ++---------------------------+-------------------------+ +| densenet121 | cpu & formal & gpu pass | ++---------------------------+-------------------------+ +| densenet161 | cpu & formal & gpu pass | ++---------------------------+-------------------------+ +| densenet169 | cpu & formal & gpu pass | ++---------------------------+-------------------------+ +| densenet201 | cpu & formal & gpu pass | ++---------------------------+-------------------------+ +| inceptionv1_kinetics400 | cpu & formal & gpu pass | ++---------------------------+-------------------------+ +| mobilenet0.25 | cpu & formal & gpu pass | ++---------------------------+-------------------------+ +| mobilenet0.5 | cpu & formal & gpu pass | ++---------------------------+-------------------------+ +| mobilenet0.75 | cpu & formal & gpu pass | ++---------------------------+-------------------------+ +| mobilenet1.0 | cpu & formal & gpu pass | ++---------------------------+-------------------------+ +| mobilenetv2_0.25 | cpu & formal & gpu pass | ++---------------------------+-------------------------+ +| mobilenetv2_0.5 | cpu & formal & gpu pass | ++---------------------------+-------------------------+ +| mobilenetv2_0.75 | cpu & formal & gpu pass | ++---------------------------+-------------------------+ +| mobilenetv2_1.0 | cpu & formal & gpu pass | ++---------------------------+-------------------------+ +| mobilenetv3_large | cpu & formal & gpu pass | ++---------------------------+-------------------------+ +| resnet101_v1b | cpu & formal & gpu pass | ++---------------------------+-------------------------+ +| resnet101_v1b_kinetics400 | cpu & formal & gpu pass | ++---------------------------+-------------------------+ +| resnet101_v1c | cpu & formal & gpu pass | ++---------------------------+-------------------------+ +| resnet101_v1 | cpu & formal & gpu pass | ++---------------------------+-------------------------+ +| resnet101_v1d_0.73 | cpu & formal & gpu pass | ++---------------------------+-------------------------+ +| resnet101_v1d_0.76 | cpu & formal & gpu pass | ++---------------------------+-------------------------+ +| resnet101_v1d | cpu & formal & gpu pass | ++---------------------------+-------------------------+ +| resnet101_v1s | cpu & formal & gpu pass | ++---------------------------+-------------------------+ +| resnet101_v2 | cpu & formal & gpu pass | ++---------------------------+-------------------------+ +| resnet152_v1b | cpu & formal & gpu pass | ++---------------------------+-------------------------+ +| resnet152_v1b_kinetics400 | cpu & formal & gpu pass | ++---------------------------+-------------------------+ +| resnet152_v1c | cpu & formal & gpu pass | ++---------------------------+-------------------------+ +| resnet152_v1 | cpu & formal & gpu pass | ++---------------------------+-------------------------+ +| resnet152_v1d | cpu & formal & gpu pass | ++---------------------------+-------------------------+ +| resnet152_v1s | cpu & formal & gpu pass | ++---------------------------+-------------------------+ +| resnet152_v2 | cpu & formal & gpu pass | ++---------------------------+-------------------------+ +| resnet18_v1b_0.89 | cpu & formal & gpu pass | ++---------------------------+-------------------------+ +| resnet18_v1b | cpu & formal & gpu pass | ++---------------------------+-------------------------+ +| resnet18_v1b_kinetics400 | cpu & formal & gpu pass | ++---------------------------+-------------------------+ +| resnet18_v1 | cpu & formal & gpu pass | ++---------------------------+-------------------------+ +| resnet18_v2 | cpu & formal & gpu pass | ++---------------------------+-------------------------+ +| resnet34_v1b | cpu & formal & gpu pass | ++---------------------------+-------------------------+ +| resnet34_v1b_kinetics400 | cpu & formal & gpu pass | ++---------------------------+-------------------------+ +| resnet34_v1 | cpu & formal & gpu pass | ++---------------------------+-------------------------+ +| resnet34_v2 | cpu & formal & gpu pass | ++---------------------------+-------------------------+ +| resnet50_v1b | cpu & formal & gpu pass | ++---------------------------+-------------------------+ +| resnet50_v1b_hmdb51 | cpu & formal & gpu pass | ++---------------------------+-------------------------+ +| resnet50_v1b_kinetics400 | cpu & formal & gpu pass | ++---------------------------+-------------------------+ +| resnet50_v1b_sthsthv2 | cpu & formal & gpu pass | ++---------------------------+-------------------------+ +| resnet50_v1c | cpu & formal & gpu pass | ++---------------------------+-------------------------+ +| resnet50_v1 | cpu & formal & gpu pass | ++---------------------------+-------------------------+ +| resnet50_v1d_0.11 | cpu & formal & gpu pass | ++---------------------------+-------------------------+ +| resnet50_v1d_0.37 | cpu & formal & gpu pass | ++---------------------------+-------------------------+ +| resnet50_v1d_0.48 | cpu & formal & gpu pass | ++---------------------------+-------------------------+ +| resnet50_v1d_0.86 | cpu & formal & gpu pass | ++---------------------------+-------------------------+ +| resnet50_v1d | cpu & formal & gpu pass | ++---------------------------+-------------------------+ +| resnet50_v1s | cpu & formal & gpu pass | ++---------------------------+-------------------------+ +| resnet50_v2 | cpu & formal & gpu pass | ++---------------------------+-------------------------+ +| squeezenet1.0 | cpu & formal & gpu pass | ++---------------------------+-------------------------+ +| squeezenet1.1 | cpu & formal & gpu pass | ++---------------------------+-------------------------+ +| ssd_512_mobilenet1.0_voc | cpu & formal & gpu pass | ++---------------------------+-------------------------+ +| ssd_512_resnet50_v1_voc | cpu & formal & gpu pass | ++---------------------------+-------------------------+ +| vgg11_bn | cpu & formal & gpu pass | ++---------------------------+-------------------------+ +| vgg11 | cpu & formal & gpu pass | ++---------------------------+-------------------------+ +| vgg13_bn | cpu & formal & gpu pass | ++---------------------------+-------------------------+ +| vgg13 | cpu & formal & gpu pass | ++---------------------------+-------------------------+ +| vgg16_bn | cpu & formal & gpu pass | ++---------------------------+-------------------------+ +| vgg16 | cpu & formal & gpu pass | ++---------------------------+-------------------------+ +| vgg16_ucf101 | cpu & formal & gpu pass | ++---------------------------+-------------------------+ +| vgg19_bn | cpu & formal & gpu pass | ++---------------------------+-------------------------+ +| vgg19 | cpu & formal & gpu pass | ++---------------------------+-------------------------+ +| yolo3_darknet53_voc | cpu & formal & gpu pass | ++---------------------------+-------------------------+ +| yolo3_mobilenet1.0_voc | cpu & formal & gpu pass | ++---------------------------+-------------------------+ diff --git a/tests/Makefile b/tests/Makefile index c23b5b80..6bb6824e 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -1,17 +1,11 @@ -all: test_model_cpu test_model_gpu test_model_formal test_op +all: test_model test_op -test_model_cpu: test_model.cc - g++ -o test_model_cpu test_model.cc -I../include -L../build/cpu/ -lcvm_runtime_cpu --std=c++11 -pthread -fopenmp -I../ -ldl -g -DUSE_GPU=0 -DCVM_PROFILING -fsigned-char +test_model: test_model.cc + g++ -o test_model test_model.cc -I../include -L../build/ -lcvm_runtime --std=c++11 -pthread -fopenmp -I../ -ldl -g -fsigned-char -test_model_formal: test_model.cc - g++ -o test_model_formal test_model.cc -I../include -L../build/formal/ -lcvm_runtime_formal --std=c++11 -pthread -fopenmp -I../ -ldl -g -DUSE_GPU=0 -fsigned-char - -test_model_gpu: test_model.cc - g++ -o test_model_gpu test_model.cc -I../include -L../build/cpu/ -L../build/gpu/ -lcvm_runtime_cuda -lcudart -lcuda --std=c++11 -pthread -fopenmp -I./ -ldl -g -DUSE_GPU=1 -DCVM_PROFILING -fsigned-char - -test_op: - g++ -o test_op test_op.cc -I../include -L../build/gpu -lcvm_runtime_cuda -lcuda -lcudart --std=c++11 -pthread -fopenmp -I../ -ldl -DCVM_PROFILING -fsigned-char -DUSE_GPU +test_op : test_op.cc + g++ -o test_op test_op.cc -I../include -L../build/ -lcvm_runtime --std=c++11 -pthread -fopenmp -I../ -ldl -g -fsigned-char clean: rm -f test_model_cpu test_model_formal test_model_gpu test_op diff --git a/tests/deprecated/test_ops.py b/tests/deprecated/test_ops.py index 3bec01eb..56ae965a 100644 --- a/tests/deprecated/test_ops.py +++ b/tests/deprecated/test_ops.py @@ -1,20 +1,20 @@ from mxnet import nd import mxnet as mx import numpy as np -import topi.testing +import tvm.topi.testing import tvm import os import math import random -import ops_generator as opg -from ops_generator import std_int_constraint, iter_constraint, \ +import mrt.ops_generator as opg +from mrt.ops_generator import std_int_constraint, iter_constraint, \ list_constraint, gen_non_constraint, range_constraint, \ rand_constraint, shape_constraint -from ops_generator import IntIter, NoneIter, ConstantIter, ConcatIter, \ +from mrt.ops_generator import IntIter, NoneIter, ConstantIter, ConcatIter, \ VectorIter, PermutationIter, ShapeIter, AllOverIter, BoolIter, \ RepeatIter, RandomBoolIter, RandomVectorIter, RandomIter -import utils +import mrt.utils INT32 = "int32" @@ -189,7 +189,7 @@ def strided_slice(data, begin, end, strides): % (begin[i], b, end[i], e, s)) data_npy = np.array(data) - out_npy = topi.testing.strided_slice_python(data_npy, begin, end, strides) + out_npy = tvm.topi.testing.strided_slice_python(data_npy, begin, end, strides) return [out_npy] op_units = opg.OpUnitIter([data, begin, end, strides], 1, [cstr_func]) @@ -379,8 +379,8 @@ def verify_conv2d(): strides, dilation, padding, num_filter, groups, no_bias=(not use_bias)) - dw_np = topi.testing.dilate_python(w_np, (1, 1, *dilation)) - c_np = topi.testing.conv2d_nchw_python(a_np, dw_np, strides, padding) + dw_np = tvm.topi.testing.dilate_python(w_np, (1, 1, *dilation)) + c_np = tvm.topi.testing.conv2d_nchw_python(a_np, dw_np, strides, padding, groups) if use_bias: c_np += b_np.reshape(num_filter, 1, 1) outs = [c_np] @@ -541,7 +541,8 @@ def upsampling(data, scale): if scale == 0: raise ValueError("scale must > 0 vs. " + str(scale)) a_np = np.array(data) - b_np = topi.testing.upsampling_python(a_np, scale, "NCHW") + #b_np = tvm.topi.testing.upsampling_python(a_np, scale, "NCHW") + b_np = tvm.topi.testing.resize2d_python(a_np, (scale, scale), "NCHW") return [b_np] op_units = opg.OpUnitIter([data, scale], 1) @@ -649,7 +650,7 @@ def verify_get_valid_counts(): def get_valid_counts(data, score_threshold): np_data = np.array(data) dshp = np_data.shape - if len(dshp) != 3 or (dshp[2] <= 2): + if len(dshp) != 3 or (dshp[2] < 2): raise ValueError("data shape error: " + str(dshp)) batch_size, num_anchor, elem_length = dshp np_out1 = np.zeros(shape=(batch_size,)) @@ -712,13 +713,13 @@ def verify_non_max_suppression(): dshp = opg.ExtendIter(batch, n, k) datas = [] for i in range(len(dshp)): - shp = dshp[i] - data = [] + shp = dshp[i] # (B, N, K) + data = [] # (N, K) for n in range(shp[1]): elem = rand_constraint(-20, 20, 6)() elem[0] = random.randint(-1, 10) data.append(elem) - datas.append([[data]]) + datas.append([[data]]) # (1, 1, N, K) data = ConcatIter(*datas) valid_count = RandomVectorIter(1, 32, 1, 10) iou = ConcatIter( @@ -755,22 +756,24 @@ def non_max_suppression(data, valid_count, iou, force_suppress, top_k, id_index, score_index, coord_start, max_output_size, return_indices, invalid_to_bottom): device = 'llvm' - ctx = tvm.context(device, 0) - data_np, valid_count_np = np.array(data, dtype="float32"), np.array(valid_count, dtype="int32") + ctx = tvm.device(device, 0) + data_np, valid_count_np = np.array(data, dtype="int32"), np.array(valid_count, dtype="int32") + indices_np = np.array([np.arange(data_np.shape[1])], dtype="int32") data_nd, valid_count_nd = tvm.nd.array(data_np, ctx), tvm.nd.array(valid_count_np, ctx) + indices_nd = tvm.nd.array(indices_np, ctx) dshp = data_nd.shape - data_tvm = tvm.placeholder(dshp, name="data", dtype="float32") - valid_count_tvm = tvm.placeholder((dshp[0],), dtype="int32", name="valid_count") - with tvm.target.create(device): - out = topi.vision.non_max_suppression(data_tvm, valid_count_tvm, + data_tvm = tvm.te.placeholder(dshp, name="data", dtype="int32") + valid_count_tvm = tvm.te.placeholder((dshp[0],), dtype="int32", name="valid_count") + indices_tvm = tvm.te.placeholder((dshp[0],dshp[1]), dtype="int32", name="indices") + with tvm.target.Target(device): + out = tvm.topi.vision.non_max_suppression(data_tvm, valid_count_tvm, indices_tvm, max_output_size, iou/100, force_suppress, top_k, coord_start, score_index, id_index, return_indices, invalid_to_bottom) - s = topi.generic.schedule_nms(out) - + s = tvm.topi.generic.schedule_nms(out) out_nd = tvm.nd.array(np.zeros(dshp, dtype=data_tvm.dtype), ctx) - f = tvm.build(s, [data_tvm, valid_count_tvm, out], device) - f(data_nd, valid_count_nd, out_nd) + f = tvm.build(s, [data_tvm, valid_count_tvm, indices_tvm, out], device) + f(data_nd, valid_count_nd, indices_nd, out_nd) return [out_nd.asnumpy()] op_units.eval_data("non_max_suppression", non_max_suppression, True) @@ -818,14 +821,14 @@ def test_load(op_name, hsh, datadir="/data/ops_generator"): # 'dilate': attr['dilation'], # 'num_group': attr['groups'], # } - dw_np = topi.testing.dilate_python(w_np, dilation) - c_np = topi.testing.conv2d_nchw_python(a_np, dw_np, stride, padding) + dw_np = tvm.topi.testing.dilate_python(w_np, dilation) + c_np = tvm.topi.testing.conv2d_nchw_python(a_np, dw_np, stride, padding) print (c_np.flatten(), outs[0].flatten()) if __name__ == "__main__": - utils.log_init() + mrt.utils.log_init() # opg.clean_dir() # verify_transpose() # verify_concatenate() @@ -848,10 +851,10 @@ def test_load(op_name, hsh, datadir="/data/ops_generator"): # verify_broadcast('broadcast_sub') # verify_broadcast('broadcast_mul') # verify_broadcast('broadcast_maximum') - verify_broadcast('broadcast_div') + # verify_broadcast('broadcast_div') # verify_broadcast('broadcast_greater') # test_load("conv2d", "ffd9ad6afc62dd7541778a81d6529c9a2735fc0a") # verify_get_valid_counts() - # verify_non_max_suppression() + verify_non_max_suppression() diff --git a/tests/test_model.cc b/tests/test_model.cc index 9a565f89..670439e9 100644 --- a/tests/test_model.cc +++ b/tests/test_model.cc @@ -256,53 +256,84 @@ int test_models(int device_type = 0) { std::cout << device_type << " DDDDDD" << std::endl; std::string model_root = "/data1/"; auto model_dirs = { - "std_out/yolo_tfm", - "std_out/null", - "std_out/resnet50_mxg", - "std_out/ssd_512_mobilenet1.0_voc_tfm", - "std_out/resnet18_v1_tfm", - "std_out/resnet50_v2", - "std_out/qd10_resnet20_v2", - "std_out/trec", - // "new_cvm/yolo3_darknet53_voc/data", - "lz_model_storage/dcnet_mnist_v1/data", - "lz_model_storage/mobilenetv1.0_imagenet/data", - "lz_model_storage/resnet50_v1_imagenet/data", - "lz_model_storage/animal10/data", - "lz_model_storage/resnet50_v2/data", - "lz_model_storage/vgg16_gcv/data", - "lz_model_storage/sentiment_trec/data", - "lz_model_storage/vgg19_gcv/data", - "lz_model_storage/squeezenet_gcv1.1/data", - "lz_model_storage/squeezenet_gcv1.0/data", - // invalid has strange attribute in operator elemwise_add. - // "lz_model_storage/octconv_resnet26_0.250/data", - "std_out/resnet50_mxg/", - "std_out/resnet50_v2", - "std_out/qd10_resnet20_v2", - "std_out/random_3_0/", - "std_out/random_3_1/", - "std_out/random_3_2/", - "std_out/random_3_3/", - "std_out/random_3_4/", - "std_out/random_3_5/", - "std_out/random_4_0/", - "std_out/random_4_1/", - // "std_out/random_4_2/", - // "std_out/random_4_3/", - // "std_out/random_4_4/", - "std_out/random_4_5/", - "std_out/random_4_6/", - "std_out/random_4_7/", - "std_out/random_4_8/", - "std_out/random_4_9/", - "std_out/log2", - //"./tests/3145ad19228c1cd2d051314e72f26c1ce77b7f02/", - "std_out/lr_attr", - // "std_out/non_in", - "std_out/shufflenet", - "std_out/ssd", - "std_out/ssd_512_mobilenet1.0_coco_tfm/", + "alexnet_cvm", + // "cifar_resnet110_v1_cvm", + // "cifar_resnet110_v2_cvm", + // "cifar_resnet20_v1_cvm", + // "cifar_resnet20_v2_cvm", + // "cifar_resnet56_v1_cvm", + // "cifar_resnet56_v2_cvm", + // "cifar_resnext29_16x64d_cvm", + // "cifar_wideresnet16_10_cvm", + // "cifar_wideresnet28_10_cvm", + // "cifar_wideresnet40_8_cvm", + // "densenet121_cvm", + // "densenet161_cvm", + // "densenet169_cvm", + // "densenet201_cvm", + // "inceptionv1_kinetics400_cvm", + // "mobilenet0.25_cvm", + // "mobilenetv2_0.25_cvm", + // "mobilenetv3_large_cvm", + // "mobilenet0.5_cvm", + // "mobilenet0.75_cvm", + // "mobilenet1.0_cvm", + // "mobilenetv2_0.5_cvm", + // "mobilenetv2_0.75_cvm", + // "mobilenetv2_1.0_cvm", + // "resnet101_v1b_cvm", + // "resnet101_v1b_kinetics400_cvm", + // "resnet101_v1c_cvm", + // "resnet101_v1_cvm", + // "resnet101_v1d_0.73_cvm", + // "resnet101_v1d_0.76_cvm", + // "resnet101_v1d_cvm", + // "resnet101_v1s_cvm", + //"resnet101_v2_cvm", + // "resnet152_v1b_cvm", + // "resnet152_v1b_kinetics400_cvm", + // "resnet152_v1c_cvm", + // "resnet152_v1_cvm", + // "resnet152_v1d_cvm", + // "resnet152_v1s_cvm", + // "resnet152_v2_cvm", + // "resnet18_v1b_0.89_cvm", + // "resnet18_v1b_cvm", + // "resnet18_v1b_kinetics400_cvm", + // "resnet18_v1_cvm", + //"resnet18_v2_cvm", + // "resnet34_v1b_cvm", + // "resnet34_v1b_kinetics400_cvm", + // "resnet34_v1_cvm", + // "resnet34_v2_cvm", + // "resnet50_v1b_cvm", + // "resnet50_v1b_hmdb51_cvm", + // "resnet50_v1b_kinetics400_cvm", + // "resnet50_v1b_sthsthv2_cvm", + // "resnet50_v1c_cvm", + // "resnet50_v1_cvm", + // "resnet50_v1d_0.11_cvm", + // "resnet50_v1d_0.37_cvm", + // "resnet50_v1d_0.48_cvm", + // "resnet50_v1d_0.86_cvm", + // "resnet50_v1d_cvm", + // "resnet50_v1s_cvm", + // "resnet50_v2_cvm", + // "squeezenet1.0_cvm", + // "squeezenet1.1_cvm", + // "ssd_512_mobilenet1.0_voc_cvm", + // "ssd_512_resnet50_v1_voc_cvm", + // "vgg11_bn_cvm", + // "vgg11_cvm", + // "vgg13_bn_cvm", + // "vgg13_cvm", + // "vgg16_bn_cvm", + // "vgg16_cvm", + // "vgg16_ucf101_cvm", + // "vgg19_bn_cvm", + // "vgg19_cvm", + // "yolo3_darknet53_voc_cvm", + // "yolo3_mobilenet1.0_voc_cvm" }; for (auto dir : model_dirs) { auto ret = run_LIF(model_root + dir, device_type); diff --git a/tests/test_op.cc b/tests/test_op.cc index b85e7432..4bc851e1 100644 --- a/tests/test_op.cc +++ b/tests/test_op.cc @@ -50,11 +50,11 @@ struct CVMOpParam { std::string attrs; }; -#ifndef DEVICE -#define DEVICE 0 -#endif +// #ifndef DEVICE +// #define DEVICE 1 +// #endif -int ctx = APIDevTypeMap.at(DEVICE); +int ctx; int device_id = 0; /* @@ -128,6 +128,9 @@ std::function get_func( arg_ptr->arg_tcodes.push_back(kHandle); + // cvm.runtime.{version}.{op_name} + // cvm.runtime.cpu.conv2d + // cvm.runtime.cuda.dense auto op = param.func_name; int device_type = static_cast(ctx); std::string module_name = "cvm.runtime."; @@ -659,36 +662,40 @@ void test_op(string op_name) { // } } } -int main() { - test_op("max_pool2d"); // formal & cpu pass - test_op("upsampling"); // formal & cpu pass - test_op("dense"); // formal & cpu pass - test_op("conv2d"); // formal & cpu pass - test_op("sum"); // formal & cpu pass - test_op("max"); // formal & cpu pass - test_op("slice_like"); // formal & cpu pass - test_op("tile"); // formal & cpu pass - test_op("repeat"); // formal & cpu pass - test_op("get_valid_counts"); // formal & CPU pass - - test_op("strided_slice"); // formal & cpu pass - test_op("concatenate");// formal & cpu pass - test_op("transpose");// formal & cpu pass - test_op("take"); // formal & cpu pass - //test_op("clip"); // no test case - //test_op("cvm_clip"); // no test case - //test_op("cvm_right_shift"); // no test case - test_op("elemwise_add"); // formal & cpu pass +int main(int argc, char* argv[]) { + ctx = APIDevTypeMap.at(stoi(argv[1])); + // argv[1]=0 is cpu, argv[1]=1 is gpu, argv[1]=2 is formal + + test_op("max_pool2d"); // formal & cpu & gpu pass + //test_op("upsampling"); // formal & cpu pass + // test_op("dense"); // formal & cpu & gpu pass + //test_op("conv2d"); // formal & cpu pass + // test_op("sum"); // formal & cpu & gpu pass + // test_op("max"); // formal & cpu & gpu pass + // test_op("slice_like"); // formal & cpu & gpu pass + // test_op("tile"); // formal & cpu & gpu pass + // test_op("repeat"); // formal & cpu & gpu pass + // test_op("get_valid_counts"); // formal & CPU & gpu pass + + // test_op("strided_slice"); // formal & cpu & gpu pass + // test_op("concatenate");// formal & cpu & gpu pass + // test_op("transpose");// formal & cpu & gpu pass + // test_op("take"); // formal & cpu & gpu pass + // test_op("clip"); // no test case + // test_op("cvm_clip"); // no test case + // test_op("cvm_right_shift"); // no test case + //test_op("elemwise_add"); // no test case //test_op("elemwise_sub"); // no test case //test_op("where"); // no test case - test_op("non_max_suppression"); // formal & cpu pass - test_op("broadcast_sub"); // formal & cpu pass - test_op("broadcast_add"); // formal & cpu pass - test_op("broadcast_mul"); // formal & cpu pass - test_op("broadcast_max"); // formal & cpu pass - test_op("broadcast_div"); // formal & cpu pass - test_op("broadcast_greater"); // formal & cpu pass + //test_op("non_max_suppression"); // formal & cpu pass + // test_op("broadcast_sub"); // formal & cpu & gpu pass + // test_op("broadcast_add"); // formal & cpu & gpu pass + // test_op("broadcast_mul"); // formal & cpu & gpu pass + // test_op("broadcast_max"); // formal & cpu & gpu pass + // test_op("broadcast_div"); // formal & cpu & gpu pass + // test_op("broadcast_greater"); // formal & cpu & gpu pass + cout << "test device: "<< DeviceName(static_cast(ctx)) << endl; cout << "all tests finished" << endl; return 0; } diff --git a/tests/test_op2.cc b/tests/test_op2.cc new file mode 100644 index 00000000..31156cbb --- /dev/null +++ b/tests/test_op2.cc @@ -0,0 +1,2768 @@ +#include "test_op.cc" + +struct data_and_shape +{ + int32_t *data; + vector shape; +}; + +vector result(vector> tshape, vector> tdata, CVMOpParam params, NodeAttrs attr) +{ + const cvm::Op *op = cvm::Op::Get(params.func_name); + static auto &fextra_space = + Op::GetAttr("FOpExtraSpace"); + auto fextra = fextra_space.get(op, nullptr); + static auto &finfer_shape = + Op::GetAttr>("FInferShape"); + auto finfer = finfer_shape.get(op, nullptr); + int64_t es[1]{0}; + vector ishape(params.num_inputs), oshape(params.num_outputs); + vector args(params.num_inputs + params.num_outputs); + DLTensor *cpu_tensor; + for (int i = 0; i < ishape.size(); i++) + { + TShape shp(tshape[i].size()); + for (int ti = 0; ti < shp.ndim(); ti++) + shp[ti] = tshape[i][ti]; + ishape[i] = shp; + DLTensor *dl; + CVMArrayAlloc(tshape[i].data(), tshape[i].size(), dtype_code, dtype_bits, dtype_lanes, ctx, device_id, &dl); + args[i] = *dl; + CVMArrayAlloc(tshape[i].data(), tshape[i].size(), dtype_code, dtype_bits, dtype_lanes, kDLCPU, 0, &cpu_tensor); + memcpy(cpu_tensor->data, tdata[i].data(), sizeof(int32_t) * tdata[i].size()); + CVMArrayCopyFromTo(cpu_tensor, dl, nullptr); + CVMArrayFree(cpu_tensor); + } + finfer(attr, &ishape, &oshape); + for (int i = 0; i < oshape.size(); i++) + { + vector out_shape; + for (int j = 0; j < oshape[j].ndim(); j++) + { + out_shape.push_back(oshape[i][j]); + } + tshape.push_back(out_shape); + } + for (int i = 0; i < params.num_outputs; i++) + { + DLTensor *dl; + CVMArrayAlloc(tshape[params.num_inputs + i].data(), tshape[params.num_inputs + i].size(), dtype_code, dtype_bits, dtype_lanes, ctx, device_id, &dl); + args[params.num_inputs + i] = *dl; + } + vector iprecs; + if (fextra != nullptr) + { + es[0] = fextra(attr, &ishape, &iprecs, + DLContext{DLDeviceType(ctx), 0}); + } + DLTensor *extra_space; + CVMArrayAlloc(es, 1, + dtype_code, dtype_bits, dtype_lanes, ctx, device_id, &extra_space); + auto op_slice = get_func(params, &attr, args, params.num_inputs, extra_space); + op_slice(); + vector res(params.num_outputs); + for (int out_no = 0; out_no < params.num_outputs; out_no++) + { + int out_size = 1; + for (int i = 0; i < tshape[params.num_inputs + out_no].size(); i++) + { + out_size *= tshape[params.num_inputs + out_no][i]; + } + vector cpu_output_tensor(out_size); + { + DLTensor *cpu_tensor; + int i = params.num_inputs; + CVMArrayAlloc(tshape[i + out_no].data(), tshape[i + out_no].size(), dtype_code, + dtype_bits, dtype_lanes, kDLCPU, 0, &cpu_tensor); + CVMArrayCopyFromTo(&args[i + out_no], cpu_tensor, nullptr); + memcpy(cpu_output_tensor.data(), cpu_tensor->data, + sizeof(int32_t) * out_size); + CVMArrayFree(cpu_tensor); + } + DLDataType dtype = {dtype_code, dtype_bits, dtype_lanes}; + DLContext dlctx = {APIDevTypeMap.at(ctx), 0}; + NDArray ret = NDArray::Empty(tshape[params.num_inputs + out_no], dtype, dlctx); + for (int i = 0; i < out_size; i++) + { + static_cast(const_cast(ret.operator->())->data)[i] = cpu_output_tensor[i]; + } + CVMArrayFree(extra_space); + res[out_no] = ret; + } + return res; +} + +// NDArray +NDArray conv2d(NDArray const &data, NDArray const &weight, TShape padding, TShape strides, TShape dilation, int groups, NDArray *bias = nullptr) +{ + CVMOpParam params; + params.func_name = "conv2d"; + if (bias) + params.num_inputs = 3; + else + params.num_inputs = 2; + params.num_outputs = 1; + params.flatten_data = false; + + vector> tshape(params.num_inputs + params.num_outputs); + vector> tdata(params.num_inputs + params.num_outputs); + // data + int32_t *ddata = static_cast(data->data); + vector dshape = vector(data->shape, data->shape + data->ndim); + tshape[0] = dshape; + int dsize = 1; + for (int i = 0; i < data->ndim; i++) + { + dsize *= dshape[i]; + } + vector d_data(dsize); + for (int i = 0; i < dsize; i++) + { + d_data[i] = ddata[i]; + } + tdata[0] = d_data; + // weight + vector wshape = vector(weight->shape, weight->shape + weight->ndim); + tshape[1] = wshape; + int32_t *wdata = static_cast(weight->data); + int wsize = 1; + for (int i = 0; i < weight->ndim; i++) + { + wsize *= wshape[i]; + } + vector w_data(wsize); + for (int i = 0; i < wsize; i++) + { + w_data[i] = wdata[i]; + } + tdata[1] = w_data; + // bias + if (bias != nullptr) + { + DLTensor *bptr = bias->operator->(); + vector bshape = vector(bptr->shape, bptr->shape + bptr->ndim); + tshape[2] = bshape; + int32_t *bdata = static_cast(bptr->data); + int bsize = bshape[0]; + vector b_data(bsize); + for (int i = 0; i < bsize; i++) + { + b_data[i] = bdata[i]; + } + tdata[2] = b_data; + } + + string use_bias = bias == nullptr ? "False" : "True"; + string attr_str = "{\"channels\":" + to_string(dshape[0]) + "\", \"kernel_size\":\"[" + to_string(dshape[2]) + ", " + to_string(dshape[3]) + + "], \"strides\":\"[" + to_string(strides[0]) + ", " + to_string(strides[1]) + "]\", \"padding\":\"[" + to_string(padding[0]) + + ", " + to_string(padding[1]) + "]\", \"dilation\":\"(" + to_string(dilation[0]) + ", " + to_string(dilation[1]) + ")\", \"groups\":\"" + + to_string(groups) + "\", \"layout\":\"NCHW\", \"kernel_layout\":\"OIHW\", \"use_bias\":\"" + use_bias + "\"}"; + NodeAttrs attr; + LoadOp("conv2d", attr); + LoadOpAttr(attr_str, attr); + return result(tshape, tdata, params, attr)[0]; +} + +NDArray dense(NDArray const &data, NDArray const &weight, NDArray *bias = nullptr) +{ + CVMOpParam params; + params.func_name = "dense"; + if (bias) + params.num_inputs = 3; + else + params.num_inputs = 2; + params.num_outputs = 1; + params.flatten_data = false; + + vector> tshape(params.num_inputs + params.num_outputs); + vector> tdata(params.num_inputs + params.num_outputs); + // data + int32_t *ddata = static_cast(data->data); + vector dshape = vector(data->shape, data->shape + data->ndim); + tshape[0] = dshape; + int dsize = 1; + for (int i = 0; i < data->ndim; i++) + { + dsize *= dshape[i]; + } + vector d_data(dsize); + for (int i = 0; i < dsize; i++) + { + d_data[i] = ddata[i]; + } + tdata[0] = d_data; + // weight + vector wshape = vector(weight->shape, weight->shape + weight->ndim); + tshape[1] = wshape; + int32_t *wdata = static_cast(weight->data); + int wsize = 1; + for (int i = 0; i < weight->ndim; i++) + { + wsize *= wshape[i]; + } + vector w_data(wsize); + for (int i = 0; i < wsize; i++) + { + w_data[i] = wdata[i]; + } + tdata[1] = w_data; + // bias + if (bias != nullptr) + { + DLTensor *bptr = bias->operator->(); + vector bshape = vector(bptr->shape, bptr->shape + bptr->ndim); + tshape[2] = bshape; + int32_t *bdata = static_cast(bptr->data); + int bsize = bshape[0]; + vector b_data(bsize); + for (int i = 0; i < bsize; i++) + { + b_data[i] = bdata[i]; + } + tdata[2] = b_data; + } + + string use_bias = bias == nullptr ? "False" : "True"; + string attr_str = "\"units\":\"" + to_string(wshape[0]) + "\", \"use_bias\":\"" + use_bias + "\"}"; + NodeAttrs attr; + LoadOp("dense", attr); + LoadOpAttr(attr_str, attr); + return result(tshape, tdata, params, attr)[0]; +} + +NDArray max_pool2d(NDArray const &x, TShape pool_size, TShape strides, TShape padding, bool ceil_mode) +{ + CVMOpParam params; + params.func_name = "max_pool2d"; + params.num_inputs = 1; + params.num_outputs = 1; + params.flatten_data = false; + + vector> tshape(params.num_inputs + params.num_outputs); + vector> tdata(params.num_inputs + params.num_outputs); + // x + int32_t *xdata = static_cast(x->data); + vector xshape = vector(x->shape, x->shape + x->ndim); + tshape[0] = xshape; + int xsize = 1; + for (int i = 0; i < x->ndim; i++) + { + xsize *= xshape[i]; + } + vector x_data(xsize); + for (int i = 0; i < xsize; i++) + { + x_data[i] = xdata[i]; + } + tdata[0] = x_data; + + string ceil = ceil_mode == true ? "True" : "False"; + string attr_str = "{\"pool_size\":\"" + to_string(pool_size[0]) + ", " + to_string(pool_size[1]) + + "]\", \"strides\":\"[" + to_string(padding[0]) + ", " + to_string(padding[1]) + "]\", \"ceil_mode\":\"" + + ceil + "\"}"; + NodeAttrs attr; + LoadOp("max_pool2d", attr); + LoadOpAttr(attr_str, attr); + return result(tshape, tdata, params, attr)[0]; +} + +NDArray upsampling(NDArray const &x, int scale) +{ + CVMOpParam params; + params.func_name = "upsampling"; + params.num_inputs = 1; + params.num_outputs = 1; + params.flatten_data = false; + + vector> tshape(params.num_inputs + params.num_outputs); + vector> tdata(params.num_inputs + params.num_outputs); + // x + int32_t *xdata = static_cast(x->data); + vector xshape = vector(x->shape, x->shape + x->ndim); + tshape[0] = xshape; + int xsize = 1; + for (int i = 0; i < x->ndim; i++) + { + xsize *= xshape[i]; + } + vector x_data(xsize); + for (int i = 0; i < xsize; i++) + { + x_data[i] = xdata[i]; + } + tdata[0] = x_data; + + string attr_str = "{\"scale\":" + to_string(scale) + "\"}"; + NodeAttrs attr; + LoadOp("upsampling", attr); + LoadOpAttr(attr_str, attr); + return result(tshape, tdata, params, attr)[0]; +} + +vector get_valid_counts(NDArray const &x, int score_threshold) +{ + CVMOpParam params; + params.func_name = "get_valid_counts"; + params.num_inputs = 1; + params.num_outputs = 2; + params.flatten_data = false; + + vector> tshape(params.num_inputs + params.num_outputs); + vector> tdata(params.num_inputs + params.num_outputs); + // x + int32_t *xdata = static_cast(x->data); + vector xshape = vector(x->shape, x->shape + x->ndim); + tshape[0] = xshape; + int xsize = xshape[0] * xshape[1] * xshape[2]; + vector x_data(xsize); + for (int i = 0; i < xsize; i++) + { + x_data[i] = xdata[i]; + } + tdata[0] = x_data; + + string attr_str = "{\"score_threshold\":\"" + to_string(score_threshold) + "\"}"; + NodeAttrs attr; + LoadOp("get_valid_counts", attr); + LoadOpAttr(attr_str, attr); + return result(tshape, tdata, params, attr); +} + +NDArray sum(NDArray const &x, TShape axis, bool keepdims, bool exclude) +{ + CVMOpParam params; + params.func_name = "sum"; + params.num_inputs = 1; + params.num_outputs = 1; + params.flatten_data = false; + + vector> tshape(params.num_inputs + params.num_outputs); + vector> tdata(params.num_inputs + params.num_outputs); + // x + int32_t *xdata = static_cast(x->data); + vector xshape = vector(x->shape, x->shape + x->ndim); + tshape[0] = xshape; + int xsize = 1; + for (int i = 0; i < x->ndim; i++) + { + xsize *= xshape[i]; + } + vector x_data(xsize); + for (int i = 0; i < xsize; i++) + { + x_data[i] = xdata[i]; + } + tdata[0] = x_data; + + string kdims = keepdims == true ? "True" : "False"; + string exc = exclude == true ? "True" : "False"; + string attr_str = "{\"axis\":\"[" + to_string(axis[0]) + ", " + to_string(axis[1]) + ", " + + to_string(axis[2]) + "]\", \"keepdims\":\"" + kdims + "\", \"exclude\":\"" + exc + "\"}"; + NodeAttrs attr; + LoadOp("sum", attr); + LoadOpAttr(attr_str, attr); + return result(tshape, tdata, params, attr)[0]; +} + +NDArray max(NDArray const &x, TShape axis, bool keepdims, bool exclude) +{ + CVMOpParam params; + params.func_name = "max"; + params.num_inputs = 1; + params.num_outputs = 1; + params.flatten_data = false; + + vector> tshape(params.num_inputs + params.num_outputs); + vector> tdata(params.num_inputs + params.num_outputs); + // x + int32_t *xdata = static_cast(x->data); + vector xshape = vector(x->shape, x->shape + x->ndim); + tshape[0] = xshape; + int xsize = 1; + for (int i = 0; i < x->ndim; i++) + { + xsize *= xshape[i]; + } + vector x_data(xsize); + for (int i = 0; i < xsize; i++) + { + x_data[i] = xdata[i]; + } + tdata[0] = x_data; + + string kdims = keepdims == true ? "True" : "False"; + string exc = exclude == true ? "True" : "False"; + string attr_str = "{\"axis\":\"[" + to_string(axis[0]) + ", " + to_string(axis[1]) + ", " + + to_string(axis[2]) + "]\", \"keepdims\":\"" + kdims + "\", \"exclude\":\"" + exc + "\"}"; + NodeAttrs attr; + LoadOp("max", attr); + LoadOpAttr(attr_str, attr); + return result(tshape, tdata, params, attr)[0]; +} + +NDArray broadcast_add(NDArray const &x, NDArray const &y) +{ + CVMOpParam params; + params.func_name = "broadcast_add"; + params.num_inputs = 2; + params.num_outputs = 1; + params.flatten_data = false; + + vector> tshape(params.num_inputs + params.num_outputs); + vector> tdata(params.num_inputs + params.num_outputs); + // x + int32_t *xdata = static_cast(x->data); + vector xshape = vector(x->shape, x->shape + x->ndim); + tshape[0] = xshape; + int xsize = 1; + for (int i = 0; i < x->ndim; i++) + { + xsize *= xshape[i]; + } + vector x_data(xsize); + for (int i = 0; i < xsize; i++) + { + x_data[i] = xdata[i]; + } + tdata[0] = x_data; + // y + int32_t *ydata = static_cast(y->data); + vector yshape = vector(y->shape, y->shape + y->ndim); + tshape[1] = yshape; + int ysize = 1; + for (int i = 0; i < y->ndim; i++) + { + ysize *= yshape[i]; + } + vector y_data(ysize); + for (int i = 0; i < ysize; i++) + { + y_data[i] = ydata[i]; + } + tdata[1] = y_data; + + string attr_str = "{}"; + NodeAttrs attr; + LoadOp("broadcast_add", attr); + LoadOpAttr(attr_str, attr); + return result(tshape, tdata, params, attr)[0]; +} + +NDArray broadcast_div(NDArray const &x, NDArray const &y) +{ + CVMOpParam params; + params.func_name = "broadcast_div"; + params.num_inputs = 2; + params.num_outputs = 1; + params.flatten_data = false; + + vector> tshape(params.num_inputs + params.num_outputs); + vector> tdata(params.num_inputs + params.num_outputs); + // x + int32_t *xdata = static_cast(x->data); + vector xshape = vector(x->shape, x->shape + x->ndim); + tshape[0] = xshape; + int xsize = 1; + for (int i = 0; i < x->ndim; i++) + { + xsize *= xshape[i]; + } + vector x_data(xsize); + for (int i = 0; i < xsize; i++) + { + x_data[i] = xdata[i]; + } + tdata[0] = x_data; + // y + int32_t *ydata = static_cast(y->data); + vector yshape = vector(y->shape, y->shape + y->ndim); + tshape[1] = yshape; + int ysize = 1; + for (int i = 0; i < y->ndim; i++) + { + ysize *= yshape[i]; + } + vector y_data(ysize); + for (int i = 0; i < ysize; i++) + { + y_data[i] = ydata[i]; + } + tdata[1] = y_data; + + string attr_str = "{}"; + NodeAttrs attr; + LoadOp("broadcast_div", attr); + LoadOpAttr(attr_str, attr); + return result(tshape, tdata, params, attr)[0]; +} + +NDArray broadcast_greater(NDArray const &x, NDArray const &y) +{ + CVMOpParam params; + params.func_name = "broadcast_greater"; + params.num_inputs = 2; + params.num_outputs = 1; + params.flatten_data = false; + + vector> tshape(params.num_inputs + params.num_outputs); + vector> tdata(params.num_inputs + params.num_outputs); + // x + int32_t *xdata = static_cast(x->data); + vector xshape = vector(x->shape, x->shape + x->ndim); + tshape[0] = xshape; + int xsize = 1; + for (int i = 0; i < x->ndim; i++) + { + xsize *= xshape[i]; + } + vector x_data(xsize); + for (int i = 0; i < xsize; i++) + { + x_data[i] = xdata[i]; + } + tdata[0] = x_data; + // y + int32_t *ydata = static_cast(y->data); + vector yshape = vector(y->shape, y->shape + y->ndim); + tshape[1] = yshape; + int ysize = 1; + for (int i = 0; i < y->ndim; i++) + { + ysize *= yshape[i]; + } + vector y_data(ysize); + for (int i = 0; i < ysize; i++) + { + y_data[i] = ydata[i]; + } + tdata[1] = y_data; + + string attr_str = "{}"; + NodeAttrs attr; + LoadOp("broadcast_greater", attr); + LoadOpAttr(attr_str, attr); + return result(tshape, tdata, params, attr)[0]; +} + +NDArray broadcast_max(NDArray const &x, NDArray const &y) +{ + CVMOpParam params; + params.func_name = "broadcast_max"; + params.num_inputs = 2; + params.num_outputs = 1; + params.flatten_data = false; + + vector> tshape(params.num_inputs + params.num_outputs); + vector> tdata(params.num_inputs + params.num_outputs); + // x + int32_t *xdata = static_cast(x->data); + vector xshape = vector(x->shape, x->shape + x->ndim); + tshape[0] = xshape; + int xsize = 1; + for (int i = 0; i < x->ndim; i++) + { + xsize *= xshape[i]; + } + vector x_data(xsize); + for (int i = 0; i < xsize; i++) + { + x_data[i] = xdata[i]; + } + tdata[0] = x_data; + // y + int32_t *ydata = static_cast(y->data); + vector yshape = vector(y->shape, y->shape + y->ndim); + tshape[1] = yshape; + int ysize = 1; + for (int i = 0; i < y->ndim; i++) + { + ysize *= yshape[i]; + } + vector y_data(ysize); + for (int i = 0; i < ysize; i++) + { + y_data[i] = ydata[i]; + } + tdata[1] = y_data; + + string attr_str = "{}"; + NodeAttrs attr; + LoadOp("broadcast_max", attr); + LoadOpAttr(attr_str, attr); + return result(tshape, tdata, params, attr)[0]; +} + +NDArray broadcast_mul(NDArray const &x, NDArray const &y) +{ + CVMOpParam params; + params.func_name = "broadcast_mul"; + params.num_inputs = 2; + params.num_outputs = 1; + params.flatten_data = false; + + vector> tshape(params.num_inputs + params.num_outputs); + vector> tdata(params.num_inputs + params.num_outputs); + // x + int32_t *xdata = static_cast(x->data); + vector xshape = vector(x->shape, x->shape + x->ndim); + tshape[0] = xshape; + int xsize = 1; + for (int i = 0; i < x->ndim; i++) + { + xsize *= xshape[i]; + } + vector x_data(xsize); + for (int i = 0; i < xsize; i++) + { + x_data[i] = xdata[i]; + } + tdata[0] = x_data; + // y + int32_t *ydata = static_cast(y->data); + vector yshape = vector(y->shape, y->shape + y->ndim); + tshape[1] = yshape; + int ysize = 1; + for (int i = 0; i < y->ndim; i++) + { + ysize *= yshape[i]; + } + vector y_data(ysize); + for (int i = 0; i < ysize; i++) + { + y_data[i] = ydata[i]; + } + tdata[1] = y_data; + + string attr_str = "{}"; + NodeAttrs attr; + LoadOp("broadcast_mul", attr); + LoadOpAttr(attr_str, attr); + return result(tshape, tdata, params, attr)[0]; +} + +NDArray broadcast_sub(NDArray const &x, NDArray const &y) +{ + CVMOpParam params; + params.func_name = "broadcast_sub"; + params.num_inputs = 2; + params.num_outputs = 1; + params.flatten_data = false; + + vector> tshape(params.num_inputs + params.num_outputs); + vector> tdata(params.num_inputs + params.num_outputs); + // x + int32_t *xdata = static_cast(x->data); + vector xshape = vector(x->shape, x->shape + x->ndim); + tshape[0] = xshape; + int xsize = 1; + for (int i = 0; i < x->ndim; i++) + { + xsize *= xshape[i]; + } + vector x_data(xsize); + for (int i = 0; i < xsize; i++) + { + x_data[i] = xdata[i]; + } + tdata[0] = x_data; + // y + int32_t *ydata = static_cast(y->data); + vector yshape = vector(y->shape, y->shape + y->ndim); + tshape[1] = yshape; + int ysize = 1; + for (int i = 0; i < y->ndim; i++) + { + ysize *= yshape[i]; + } + vector y_data(ysize); + for (int i = 0; i < ysize; i++) + { + y_data[i] = ydata[i]; + } + tdata[1] = y_data; + + string attr_str = "{}"; + NodeAttrs attr; + LoadOp("broadcast_sub", attr); + LoadOpAttr(attr_str, attr); + return result(tshape, tdata, params, attr)[0]; +} + +NDArray slice_like(NDArray const &x, NDArray const &y, TShape axis) +{ + CVMOpParam params; + params.func_name = "slice_like"; + params.num_inputs = 2; + params.num_outputs = 1; + params.flatten_data = false; + + vector> tshape(params.num_inputs + params.num_outputs); + vector> tdata(params.num_inputs + params.num_outputs); + // x + int32_t *xdata = static_cast(x->data); + vector xshape = vector(x->shape, x->shape + x->ndim); + tshape[0] = xshape; + int xsize = 1; + for (int i = 0; i < x->ndim; i++) + { + xsize *= xshape[i]; + } + vector x_data(xsize); + for (int i = 0; i < xsize; i++) + { + x_data[i] = xdata[i]; + } + tdata[0] = x_data; + // y + int32_t *ydata = static_cast(y->data); + vector yshape = vector(y->shape, y->shape + y->ndim); + tshape[1] = yshape; + int ysize = 1; + for (int i = 0; i < y->ndim; i++) + { + ysize *= yshape[i]; + } + vector y_data(ysize); + for (int i = 0; i < ysize; i++) + { + y_data[i] = ydata[i]; + } + tdata[1] = y_data; + + string attr_str = "{\"axis\":\"[" + to_string(axis[0]) + ", " + to_string(axis[1]) + ", " + to_string(axis[2]) + ", " + to_string(axis[3]) + "]\""; + NodeAttrs attr; + LoadOp("slice_like", attr); + LoadOpAttr(attr_str, attr); + return result(tshape, tdata, params, attr)[0]; +} + +NDArray tile(NDArray const &x, NDArray const &y, TShape reps) +{ + CVMOpParam params; + params.func_name = "tile"; + params.num_inputs = 2; + params.num_outputs = 1; + params.flatten_data = false; + + vector> tshape(params.num_inputs + params.num_outputs); + vector> tdata(params.num_inputs + params.num_outputs); + // x + int32_t *xdata = static_cast(x->data); + vector xshape = vector(x->shape, x->shape + x->ndim); + tshape[0] = xshape; + int xsize = 1; + for (int i = 0; i < x->ndim; i++) + { + xsize *= xshape[i]; + } + vector x_data(xsize); + for (int i = 0; i < xsize; i++) + { + x_data[i] = xdata[i]; + } + tdata[0] = x_data; + // y + int32_t *ydata = static_cast(y->data); + vector yshape = vector(y->shape, y->shape + y->ndim); + tshape[1] = yshape; + int ysize = 1; + for (int i = 0; i < y->ndim; i++) + { + ysize *= yshape[i]; + } + vector y_data(ysize); + for (int i = 0; i < ysize; i++) + { + y_data[i] = ydata[i]; + } + tdata[1] = y_data; + + string attr_str = "{\"reps\":\"["; + for (int i = 0; i < reps.ndim() - 1; i++) + { + attr_str += to_string(reps[i]) + ", "; + } + attr_str += to_string(reps[reps.ndim() - 1]) + "]\"}"; + NodeAttrs attr; + LoadOp("tile", attr); + LoadOpAttr(attr_str, attr); + return result(tshape, tdata, params, attr)[0]; +} + +NDArray repeat(NDArray const &x, int repeats, int axis) +{ + CVMOpParam params; + params.func_name = "repeat"; + params.num_inputs = 1; + params.num_outputs = 1; + params.flatten_data = false; + + vector> tshape(params.num_inputs + params.num_outputs); + vector> tdata(params.num_inputs + params.num_outputs); + // x + int32_t *xdata = static_cast(x->data); + vector xshape = vector(x->shape, x->shape + x->ndim); + tshape[0] = xshape; + int xsize = 1; + for (int i = 0; i < x->ndim; i++) + { + xsize *= xshape[i]; + } + vector x_data(xsize); + for (int i = 0; i < xsize; i++) + { + x_data[i] = xdata[i]; + } + tdata[0] = x_data; + + string attr_str = "{\"repeats\":\"" + to_string(repeats) + "\", \"axis\":\"" + to_string(axis) + "\"}"; + NodeAttrs attr; + LoadOp("repeat", attr); + LoadOpAttr(attr_str, attr); + return result(tshape, tdata, params, attr)[0]; +} + +NDArray strided_slice(NDArray const &x, TShape begin, TShape end, TShape stride) +{ + CVMOpParam params; + params.func_name = "strided_slice"; + params.num_inputs = 1; + params.num_outputs = 1; + params.flatten_data = false; + + vector> tshape(params.num_inputs + params.num_outputs); + vector> tdata(params.num_inputs + params.num_outputs); + // x + int32_t *xdata = static_cast(x->data); + vector xshape = vector(x->shape, x->shape + x->ndim); + tshape[0] = xshape; + int xsize = 1; + for (int i = 0; i < x->ndim; i++) + { + xsize *= xshape[i]; + } + vector x_data(xsize); + for (int i = 0; i < xsize; i++) + { + x_data[i] = xdata[i]; + } + tdata[0] = x_data; + + string attr_str = "{\"begin\":\"["; + for (int i = 0; i < begin.ndim() - 1; i++) + attr_str += to_string(begin[i]) + ", "; + attr_str += to_string(begin[begin.ndim() - 1]) + "]\", \"end\":\"["; + for (int i = 0; i < end.ndim() - 1; i++) + attr_str += to_string(end[i]) + ", "; + attr_str += to_string(end[end.ndim() - 1]) + "]\", \"stride\":\"["; + for (int i = 0; i < stride.ndim() - 1; i++) + attr_str += to_string(stride[i]) + ", "; + attr_str += to_string(stride[stride.ndim() - 1]) + "]\"}"; + NodeAttrs attr; + LoadOp("strided_slice", attr); + LoadOpAttr(attr_str, attr); + return result(tshape, tdata, params, attr)[0]; +} + +NDArray concatenate(vector const &x, int axis) +{ + CVMOpParam params; + params.func_name = "concatenate"; + params.num_inputs = x.size(); + params.num_outputs = 1; + params.flatten_data = false; + + vector> tshape(params.num_inputs + params.num_outputs); + vector> tdata(params.num_inputs + params.num_outputs); + // x + for (int in = 0; in < params.num_inputs; in++) + { + int32_t *data = static_cast(x[in]->data); + vector shape = vector(x[in]->shape, x[in]->shape + x[in]->ndim); + tshape[in] = shape; + int in_size = 1; + for (int i = 0; i < x[in]->ndim; i++) + { + in_size *= shape[i]; + } + vector x_data(in_size); + for (int i = 0; i < in_size; i++) + { + x_data[i] = data[i]; + } + tdata[in] = x_data; + } + + string attr_str = "{\"axis\":\"" + to_string(axis) + "\"}"; + NodeAttrs attr; + LoadOp("concatenate", attr); + LoadOpAttr(attr_str, attr); + return result(tshape, tdata, params, attr)[0]; +} + +NDArray transpose(NDArray const &x, TShape axes) +{ + CVMOpParam params; + params.func_name = "transpose"; + params.num_inputs = 1; + params.num_outputs = 1; + params.flatten_data = false; + + vector> tshape(params.num_inputs + params.num_outputs); + vector> tdata(params.num_inputs + params.num_outputs); + // x + int32_t *xdata = static_cast(x->data); + vector xshape = vector(x->shape, x->shape + x->ndim); + tshape[0] = xshape; + int xsize = 1; + for (int i = 0; i < x->ndim; i++) + { + xsize *= xshape[i]; + } + vector x_data(xsize); + for (int i = 0; i < xsize; i++) + { + x_data[i] = xdata[i]; + } + tdata[0] = x_data; + + string attr_str = "{\"axes\":\"["; + for (int i = 0; i < axes.ndim() - 1; i++) + { + attr_str += to_string(axes[i]) + ", "; + } + attr_str += to_string(axes[axes.ndim() - 1]) + "]\"}"; + NodeAttrs attr; + LoadOp("transpose", attr); + LoadOpAttr(attr_str, attr); + return result(tshape, tdata, params, attr)[0]; +} + +NDArray take(NDArray const &x, NDArray const &y, int *axis) +{ + CVMOpParam params; + params.func_name = "take"; + params.num_inputs = 2; + params.num_outputs = 1; + params.flatten_data = false; + + vector> tshape(params.num_inputs + params.num_outputs); + vector> tdata(params.num_inputs + params.num_outputs); + // x + int32_t *xdata = static_cast(x->data); + vector xshape = vector(x->shape, x->shape + x->ndim); + tshape[0] = xshape; + int xsize = 1; + for (int i = 0; i < x->ndim; i++) + { + xsize *= xshape[i]; + } + vector x_data(xsize); + for (int i = 0; i < xsize; i++) + { + x_data[i] = xdata[i]; + } + tdata[0] = x_data; + // y + int32_t *ydata = static_cast(y->data); + vector yshape = vector(y->shape, y->shape + y->ndim); + tshape[1] = yshape; + int ysize = 1; + for (int i = 0; i < y->ndim; i++) + { + ysize *= yshape[i]; + } + vector y_data(ysize); + for (int i = 0; i < ysize; i++) + { + y_data[i] = ydata[i]; + } + tdata[1] = y_data; + + string attr_str = ""; + if (axis == nullptr) + attr_str += "{}"; + else + attr_str += "{\"axis\":\"" + to_string(*axis) + "\"}"; + NodeAttrs attr; + LoadOp("take", attr); + LoadOpAttr(attr_str, attr); + return result(tshape, tdata, params, attr)[0]; +} + +// DLTensor +DLTensor *conv2d(DLTensor *data, DLTensor *weight, TShape padding, TShape strides, TShape dilation, int groups, DLTensor *bias = nullptr) +{ + CVMOpParam params; + params.func_name = "conv2d"; + if (bias) + params.num_inputs = 3; + else + params.num_inputs = 2; + params.num_outputs = 1; + params.flatten_data = false; + + vector> tshape(params.num_inputs + params.num_outputs); + vector> tdata(params.num_inputs + params.num_outputs); + // data + int32_t *ddata = static_cast(data->data); + vector dshape = vector(data->shape, data->shape + data->ndim); + tshape[0] = dshape; + int dsize = 1; + for (int i = 0; i < data->ndim; i++) + { + dsize *= dshape[i]; + } + vector d_data(dsize); + for (int i = 0; i < dsize; i++) + { + d_data[i] = ddata[i]; + } + tdata[0] = d_data; + // weight + vector wshape = vector(weight->shape, weight->shape + weight->ndim); + tshape[1] = wshape; + int32_t *wdata = static_cast(weight->data); + int wsize = 1; + for (int i = 0; i < weight->ndim; i++) + { + wsize *= wshape[i]; + } + vector w_data(wsize); + for (int i = 0; i < wsize; i++) + { + w_data[i] = wdata[i]; + } + tdata[1] = w_data; + // bias + if (bias != nullptr) + { + vector bshape = vector(bias->shape, bias->shape + bias->ndim); + tshape[2] = bshape; + int32_t *bdata = static_cast(bias->data); + int bsize = bshape[0]; + vector b_data(bsize); + for (int i = 0; i < bsize; i++) + { + b_data[i] = bdata[i]; + } + tdata[2] = b_data; + } + + string use_bias = bias == nullptr ? "False" : "True"; + string attr_str = "{\"channels\":" + to_string(dshape[0]) + "\", \"kernel_size\":\"[" + to_string(dshape[2]) + ", " + to_string(dshape[3]) + + "], \"strides\":\"[" + to_string(strides[0]) + ", " + to_string(strides[1]) + "]\", \"padding\":\"[" + to_string(padding[0]) + + ", " + to_string(padding[1]) + "]\", \"dilation\":\"(" + to_string(dilation[0]) + ", " + to_string(dilation[1]) + ")\", \"groups\":\"" + + to_string(groups) + "\", \"layout\":\"NCHW\", \"kernel_layout\":\"OIHW\", \"use_bias\":\"" + use_bias + "\"}"; + NodeAttrs attr; + LoadOp("conv2d", attr); + LoadOpAttr(attr_str, attr); + NDArray res = result(tshape, tdata, params, attr)[0]; + return res.operator->(); +} + +DLTensor *dense(DLTensor *data, DLTensor *weight, NDArray *bias = nullptr) +{ + CVMOpParam params; + params.func_name = "dense"; + if (bias) + params.num_inputs = 3; + else + params.num_inputs = 2; + params.num_outputs = 1; + params.flatten_data = false; + + vector> tshape(params.num_inputs + params.num_outputs); + vector> tdata(params.num_inputs + params.num_outputs); + // data + int32_t *ddata = static_cast(data->data); + vector dshape = vector(data->shape, data->shape + data->ndim); + tshape[0] = dshape; + int dsize = 1; + for (int i = 0; i < data->ndim; i++) + { + dsize *= dshape[i]; + } + vector d_data(dsize); + for (int i = 0; i < dsize; i++) + { + d_data[i] = ddata[i]; + } + tdata[0] = d_data; + // weight + vector wshape = vector(weight->shape, weight->shape + weight->ndim); + tshape[1] = wshape; + int32_t *wdata = static_cast(weight->data); + int wsize = 1; + for (int i = 0; i < weight->ndim; i++) + { + wsize *= wshape[i]; + } + vector w_data(wsize); + for (int i = 0; i < wsize; i++) + { + w_data[i] = wdata[i]; + } + tdata[1] = w_data; + // bias + if (bias != nullptr) + { + DLTensor *bptr = bias->operator->(); + vector bshape = vector(bptr->shape, bptr->shape + bptr->ndim); + tshape[2] = bshape; + int32_t *bdata = static_cast(bptr->data); + int bsize = bshape[0]; + vector b_data(bsize); + for (int i = 0; i < bsize; i++) + { + b_data[i] = bdata[i]; + } + tdata[2] = b_data; + } + + string use_bias = bias == nullptr ? "False" : "True"; + string attr_str = "\"units\":\"" + to_string(wshape[0]) + "\", \"use_bias\":\"" + use_bias + "\"}"; + NodeAttrs attr; + LoadOp("dense", attr); + LoadOpAttr(attr_str, attr); + NDArray res = result(tshape, tdata, params, attr)[0]; + return res.operator->(); +} + +DLTensor *max_pool2d(DLTensor *x, TShape pool_size, TShape strides, TShape padding, bool ceil_mode) +{ + CVMOpParam params; + params.func_name = "max_pool2d"; + params.num_inputs = 1; + params.num_outputs = 1; + params.flatten_data = false; + + vector> tshape(params.num_inputs + params.num_outputs); + vector> tdata(params.num_inputs + params.num_outputs); + // x + int32_t *xdata = static_cast(x->data); + vector xshape = vector(x->shape, x->shape + x->ndim); + tshape[0] = xshape; + int xsize = 1; + for (int i = 0; i < x->ndim; i++) + { + xsize *= xshape[i]; + } + vector x_data(xsize); + for (int i = 0; i < xsize; i++) + { + x_data[i] = xdata[i]; + } + tdata[0] = x_data; + + string ceil = ceil_mode == true ? "True" : "False"; + string attr_str = "{\"pool_size\":\"" + to_string(pool_size[0]) + ", " + to_string(pool_size[1]) + + "]\", \"strides\":\"[" + to_string(padding[0]) + ", " + to_string(padding[1]) + "]\", \"ceil_mode\":\"" + + ceil + "\"}"; + NodeAttrs attr; + LoadOp("max_pool2d", attr); + LoadOpAttr(attr_str, attr); + NDArray res = result(tshape, tdata, params, attr)[0]; + return res.operator->(); +} + +DLTensor *upsampling(DLTensor *x, int scale) +{ + CVMOpParam params; + params.func_name = "upsampling"; + params.num_inputs = 1; + params.num_outputs = 1; + params.flatten_data = false; + + vector> tshape(params.num_inputs + params.num_outputs); + vector> tdata(params.num_inputs + params.num_outputs); + // x + int32_t *xdata = static_cast(x->data); + vector xshape = vector(x->shape, x->shape + x->ndim); + tshape[0] = xshape; + int xsize = 1; + for (int i = 0; i < x->ndim; i++) + { + xsize *= xshape[i]; + } + vector x_data(xsize); + for (int i = 0; i < xsize; i++) + { + x_data[i] = xdata[i]; + } + tdata[0] = x_data; + + string attr_str = "{\"scale\":" + to_string(scale) + "\"}"; + NodeAttrs attr; + LoadOp("upsampling", attr); + LoadOpAttr(attr_str, attr); + NDArray res = result(tshape, tdata, params, attr)[0]; + return res.operator->(); +} + +vector get_valid_counts(DLTensor *x, int score_threshold) +{ + CVMOpParam params; + params.func_name = "get_valid_counts"; + params.num_inputs = 1; + params.num_outputs = 2; + params.flatten_data = false; + + vector> tshape(params.num_inputs + params.num_outputs); + vector> tdata(params.num_inputs + params.num_outputs); + // x + int32_t *xdata = static_cast(x->data); + vector xshape = vector(x->shape, x->shape + x->ndim); + tshape[0] = xshape; + int xsize = xshape[0] * xshape[1] * xshape[2]; + vector x_data(xsize); + for (int i = 0; i < xsize; i++) + { + x_data[i] = xdata[i]; + } + tdata[0] = x_data; + + string attr_str = "{\"score_threshold\":\"" + to_string(score_threshold) + "\"}"; + NodeAttrs attr; + LoadOp("get_valid_counts", attr); + LoadOpAttr(attr_str, attr); + vector res = result(tshape, tdata, params, attr); + vector out(res.size()); + for (int i = 0; i < res.size(); i++) + { + out[i] = res[i].operator->(); + } + return out; +} + +DLTensor *sum(DLTensor *x, TShape axis, bool keepdims, bool exclude) +{ + CVMOpParam params; + params.func_name = "sum"; + params.num_inputs = 1; + params.num_outputs = 1; + params.flatten_data = false; + + vector> tshape(params.num_inputs + params.num_outputs); + vector> tdata(params.num_inputs + params.num_outputs); + // x + int32_t *xdata = static_cast(x->data); + vector xshape = vector(x->shape, x->shape + x->ndim); + tshape[0] = xshape; + int xsize = 1; + for (int i = 0; i < x->ndim; i++) + { + xsize *= xshape[i]; + } + vector x_data(xsize); + for (int i = 0; i < xsize; i++) + { + x_data[i] = xdata[i]; + } + tdata[0] = x_data; + + string kdims = keepdims == true ? "True" : "False"; + string exc = exclude == true ? "True" : "False"; + string attr_str = "{\"axis\":\"[" + to_string(axis[0]) + ", " + to_string(axis[1]) + ", " + + to_string(axis[2]) + "]\", \"keepdims\":\"" + kdims + "\", \"exclude\":\"" + exc + "\"}"; + NodeAttrs attr; + LoadOp("sum", attr); + LoadOpAttr(attr_str, attr); + NDArray res = result(tshape, tdata, params, attr)[0]; + return res.operator->(); +} + +DLTensor *max(DLTensor *x, TShape axis, bool keepdims, bool exclude) +{ + CVMOpParam params; + params.func_name = "max"; + params.num_inputs = 1; + params.num_outputs = 1; + params.flatten_data = false; + + vector> tshape(params.num_inputs + params.num_outputs); + vector> tdata(params.num_inputs + params.num_outputs); + // x + int32_t *xdata = static_cast(x->data); + vector xshape = vector(x->shape, x->shape + x->ndim); + tshape[0] = xshape; + int xsize = 1; + for (int i = 0; i < x->ndim; i++) + { + xsize *= xshape[i]; + } + vector x_data(xsize); + for (int i = 0; i < xsize; i++) + { + x_data[i] = xdata[i]; + } + tdata[0] = x_data; + + string kdims = keepdims == true ? "True" : "False"; + string exc = exclude == true ? "True" : "False"; + string attr_str = "{\"axis\":\"[" + to_string(axis[0]) + ", " + to_string(axis[1]) + ", " + + to_string(axis[2]) + "]\", \"keepdims\":\"" + kdims + "\", \"exclude\":\"" + exc + "\"}"; + NodeAttrs attr; + LoadOp("max", attr); + LoadOpAttr(attr_str, attr); + NDArray res = result(tshape, tdata, params, attr)[0]; + return res.operator->(); +} + +DLTensor *broadcast_add(DLTensor *x, DLTensor *y) +{ + CVMOpParam params; + params.func_name = "broadcast_add"; + params.num_inputs = 2; + params.num_outputs = 1; + params.flatten_data = false; + + vector> tshape(params.num_inputs + params.num_outputs); + vector> tdata(params.num_inputs + params.num_outputs); + // x + int32_t *xdata = static_cast(x->data); + vector xshape = vector(x->shape, x->shape + x->ndim); + tshape[0] = xshape; + int xsize = 1; + for (int i = 0; i < x->ndim; i++) + { + xsize *= xshape[i]; + } + vector x_data(xsize); + for (int i = 0; i < xsize; i++) + { + x_data[i] = xdata[i]; + } + tdata[0] = x_data; + // y + int32_t *ydata = static_cast(y->data); + vector yshape = vector(y->shape, y->shape + y->ndim); + tshape[1] = yshape; + int ysize = 1; + for (int i = 0; i < y->ndim; i++) + { + ysize *= yshape[i]; + } + vector y_data(ysize); + for (int i = 0; i < ysize; i++) + { + y_data[i] = ydata[i]; + } + tdata[1] = y_data; + + string attr_str = "{}"; + NodeAttrs attr; + LoadOp("broadcast_add", attr); + LoadOpAttr(attr_str, attr); + NDArray res = result(tshape, tdata, params, attr)[0]; + return res.operator->(); +} + +DLTensor *broadcast_div(DLTensor *x, DLTensor *y) +{ + CVMOpParam params; + params.func_name = "broadcast_div"; + params.num_inputs = 2; + params.num_outputs = 1; + params.flatten_data = false; + + vector> tshape(params.num_inputs + params.num_outputs); + vector> tdata(params.num_inputs + params.num_outputs); + // x + int32_t *xdata = static_cast(x->data); + vector xshape = vector(x->shape, x->shape + x->ndim); + tshape[0] = xshape; + int xsize = 1; + for (int i = 0; i < x->ndim; i++) + { + xsize *= xshape[i]; + } + vector x_data(xsize); + for (int i = 0; i < xsize; i++) + { + x_data[i] = xdata[i]; + } + tdata[0] = x_data; + // y + int32_t *ydata = static_cast(y->data); + vector yshape = vector(y->shape, y->shape + y->ndim); + tshape[1] = yshape; + int ysize = 1; + for (int i = 0; i < y->ndim; i++) + { + ysize *= yshape[i]; + } + vector y_data(ysize); + for (int i = 0; i < ysize; i++) + { + y_data[i] = ydata[i]; + } + tdata[1] = y_data; + + string attr_str = "{}"; + NodeAttrs attr; + LoadOp("broadcast_div", attr); + LoadOpAttr(attr_str, attr); + NDArray res = result(tshape, tdata, params, attr)[0]; + return res.operator->(); +} + +DLTensor *broadcast_greater(DLTensor *x, DLTensor *y) +{ + CVMOpParam params; + params.func_name = "broadcast_greater"; + params.num_inputs = 2; + params.num_outputs = 1; + params.flatten_data = false; + + vector> tshape(params.num_inputs + params.num_outputs); + vector> tdata(params.num_inputs + params.num_outputs); + // x + int32_t *xdata = static_cast(x->data); + vector xshape = vector(x->shape, x->shape + x->ndim); + tshape[0] = xshape; + int xsize = 1; + for (int i = 0; i < x->ndim; i++) + { + xsize *= xshape[i]; + } + vector x_data(xsize); + for (int i = 0; i < xsize; i++) + { + x_data[i] = xdata[i]; + } + tdata[0] = x_data; + // y + int32_t *ydata = static_cast(y->data); + vector yshape = vector(y->shape, y->shape + y->ndim); + tshape[1] = yshape; + int ysize = 1; + for (int i = 0; i < y->ndim; i++) + { + ysize *= yshape[i]; + } + vector y_data(ysize); + for (int i = 0; i < ysize; i++) + { + y_data[i] = ydata[i]; + } + tdata[1] = y_data; + + string attr_str = "{}"; + NodeAttrs attr; + LoadOp("broadcast_greater", attr); + LoadOpAttr(attr_str, attr); + NDArray res = result(tshape, tdata, params, attr)[0]; + return res.operator->(); +} + +DLTensor *broadcast_max(DLTensor *x, DLTensor *y) +{ + CVMOpParam params; + params.func_name = "broadcast_max"; + params.num_inputs = 2; + params.num_outputs = 1; + params.flatten_data = false; + + vector> tshape(params.num_inputs + params.num_outputs); + vector> tdata(params.num_inputs + params.num_outputs); + // x + int32_t *xdata = static_cast(x->data); + vector xshape = vector(x->shape, x->shape + x->ndim); + tshape[0] = xshape; + int xsize = 1; + for (int i = 0; i < x->ndim; i++) + { + xsize *= xshape[i]; + } + vector x_data(xsize); + for (int i = 0; i < xsize; i++) + { + x_data[i] = xdata[i]; + } + tdata[0] = x_data; + // y + int32_t *ydata = static_cast(y->data); + vector yshape = vector(y->shape, y->shape + y->ndim); + tshape[1] = yshape; + int ysize = 1; + for (int i = 0; i < y->ndim; i++) + { + ysize *= yshape[i]; + } + vector y_data(ysize); + for (int i = 0; i < ysize; i++) + { + y_data[i] = ydata[i]; + } + tdata[1] = y_data; + + string attr_str = "{}"; + NodeAttrs attr; + LoadOp("broadcast_max", attr); + LoadOpAttr(attr_str, attr); + NDArray res = result(tshape, tdata, params, attr)[0]; + return res.operator->(); +} + +DLTensor *broadcast_mul(DLTensor *x, DLTensor *y) +{ + CVMOpParam params; + params.func_name = "broadcast_mul"; + params.num_inputs = 2; + params.num_outputs = 1; + params.flatten_data = false; + + vector> tshape(params.num_inputs + params.num_outputs); + vector> tdata(params.num_inputs + params.num_outputs); + // x + int32_t *xdata = static_cast(x->data); + vector xshape = vector(x->shape, x->shape + x->ndim); + tshape[0] = xshape; + int xsize = 1; + for (int i = 0; i < x->ndim; i++) + { + xsize *= xshape[i]; + } + vector x_data(xsize); + for (int i = 0; i < xsize; i++) + { + x_data[i] = xdata[i]; + } + tdata[0] = x_data; + // y + int32_t *ydata = static_cast(y->data); + vector yshape = vector(y->shape, y->shape + y->ndim); + tshape[1] = yshape; + int ysize = 1; + for (int i = 0; i < y->ndim; i++) + { + ysize *= yshape[i]; + } + vector y_data(ysize); + for (int i = 0; i < ysize; i++) + { + y_data[i] = ydata[i]; + } + tdata[1] = y_data; + + string attr_str = "{}"; + NodeAttrs attr; + LoadOp("broadcast_mul", attr); + LoadOpAttr(attr_str, attr); + NDArray res = result(tshape, tdata, params, attr)[0]; + return res.operator->(); +} + +DLTensor *broadcast_sub(DLTensor *x, DLTensor *y) +{ + CVMOpParam params; + params.func_name = "broadcast_sub"; + params.num_inputs = 2; + params.num_outputs = 1; + params.flatten_data = false; + + vector> tshape(params.num_inputs + params.num_outputs); + vector> tdata(params.num_inputs + params.num_outputs); + // x + int32_t *xdata = static_cast(x->data); + vector xshape = vector(x->shape, x->shape + x->ndim); + tshape[0] = xshape; + int xsize = 1; + for (int i = 0; i < x->ndim; i++) + { + xsize *= xshape[i]; + } + vector x_data(xsize); + for (int i = 0; i < xsize; i++) + { + x_data[i] = xdata[i]; + } + tdata[0] = x_data; + // y + int32_t *ydata = static_cast(y->data); + vector yshape = vector(y->shape, y->shape + y->ndim); + tshape[1] = yshape; + int ysize = 1; + for (int i = 0; i < y->ndim; i++) + { + ysize *= yshape[i]; + } + vector y_data(ysize); + for (int i = 0; i < ysize; i++) + { + y_data[i] = ydata[i]; + } + tdata[1] = y_data; + + string attr_str = "{}"; + NodeAttrs attr; + LoadOp("broadcast_sub", attr); + LoadOpAttr(attr_str, attr); + NDArray res = result(tshape, tdata, params, attr)[0]; + return res.operator->(); +} + +DLTensor *slice_like(DLTensor *x, DLTensor *y, TShape axis) +{ + CVMOpParam params; + params.func_name = "slice_like"; + params.num_inputs = 2; + params.num_outputs = 1; + params.flatten_data = false; + + vector> tshape(params.num_inputs + params.num_outputs); + vector> tdata(params.num_inputs + params.num_outputs); + // x + int32_t *xdata = static_cast(x->data); + vector xshape = vector(x->shape, x->shape + x->ndim); + tshape[0] = xshape; + int xsize = 1; + for (int i = 0; i < x->ndim; i++) + { + xsize *= xshape[i]; + } + vector x_data(xsize); + for (int i = 0; i < xsize; i++) + { + x_data[i] = xdata[i]; + } + tdata[0] = x_data; + // y + int32_t *ydata = static_cast(y->data); + vector yshape = vector(y->shape, y->shape + y->ndim); + tshape[1] = yshape; + int ysize = 1; + for (int i = 0; i < y->ndim; i++) + { + ysize *= yshape[i]; + } + vector y_data(ysize); + for (int i = 0; i < ysize; i++) + { + y_data[i] = ydata[i]; + } + tdata[1] = y_data; + + string attr_str = "{\"axis\":\"[" + to_string(axis[0]) + ", " + to_string(axis[1]) + ", " + to_string(axis[2]) + ", " + to_string(axis[3]) + "]\""; + NodeAttrs attr; + LoadOp("slice_like", attr); + LoadOpAttr(attr_str, attr); + NDArray res = result(tshape, tdata, params, attr)[0]; + return res.operator->(); +} + +DLTensor *tile(DLTensor *x, DLTensor *y, TShape reps) +{ + CVMOpParam params; + params.func_name = "tile"; + params.num_inputs = 2; + params.num_outputs = 1; + params.flatten_data = false; + + vector> tshape(params.num_inputs + params.num_outputs); + vector> tdata(params.num_inputs + params.num_outputs); + // x + int32_t *xdata = static_cast(x->data); + vector xshape = vector(x->shape, x->shape + x->ndim); + tshape[0] = xshape; + int xsize = 1; + for (int i = 0; i < x->ndim; i++) + { + xsize *= xshape[i]; + } + vector x_data(xsize); + for (int i = 0; i < xsize; i++) + { + x_data[i] = xdata[i]; + } + tdata[0] = x_data; + // y + int32_t *ydata = static_cast(y->data); + vector yshape = vector(y->shape, y->shape + y->ndim); + tshape[1] = yshape; + int ysize = 1; + for (int i = 0; i < y->ndim; i++) + { + ysize *= yshape[i]; + } + vector y_data(ysize); + for (int i = 0; i < ysize; i++) + { + y_data[i] = ydata[i]; + } + tdata[1] = y_data; + + string attr_str = "{\"reps\":\"["; + for (int i = 0; i < reps.ndim() - 1; i++) + { + attr_str += to_string(reps[i]) + ", "; + } + attr_str += to_string(reps[reps.ndim() - 1]) + "]\"}"; + NodeAttrs attr; + LoadOp("tile", attr); + LoadOpAttr(attr_str, attr); + NDArray res = result(tshape, tdata, params, attr)[0]; + return res.operator->(); +} + +DLTensor *repeat(DLTensor *x, int repeats, int axis) +{ + CVMOpParam params; + params.func_name = "repeat"; + params.num_inputs = 1; + params.num_outputs = 1; + params.flatten_data = false; + + vector> tshape(params.num_inputs + params.num_outputs); + vector> tdata(params.num_inputs + params.num_outputs); + // x + int32_t *xdata = static_cast(x->data); + vector xshape = vector(x->shape, x->shape + x->ndim); + tshape[0] = xshape; + int xsize = 1; + for (int i = 0; i < x->ndim; i++) + { + xsize *= xshape[i]; + } + vector x_data(xsize); + for (int i = 0; i < xsize; i++) + { + x_data[i] = xdata[i]; + } + tdata[0] = x_data; + + string attr_str = "{\"repeats\":\"" + to_string(repeats) + "\", \"axis\":\"" + to_string(axis) + "\"}"; + NodeAttrs attr; + LoadOp("repeat", attr); + LoadOpAttr(attr_str, attr); + NDArray res = result(tshape, tdata, params, attr)[0]; + return res.operator->(); +} + +DLTensor *strided_slice(DLTensor *x, TShape begin, TShape end, TShape stride) +{ + CVMOpParam params; + params.func_name = "strided_slice"; + params.num_inputs = 1; + params.num_outputs = 1; + params.flatten_data = false; + + vector> tshape(params.num_inputs + params.num_outputs); + vector> tdata(params.num_inputs + params.num_outputs); + // x + int32_t *xdata = static_cast(x->data); + vector xshape = vector(x->shape, x->shape + x->ndim); + tshape[0] = xshape; + int xsize = 1; + for (int i = 0; i < x->ndim; i++) + { + xsize *= xshape[i]; + } + vector x_data(xsize); + for (int i = 0; i < xsize; i++) + { + x_data[i] = xdata[i]; + } + tdata[0] = x_data; + + string attr_str = "{\"begin\":\"["; + for (int i = 0; i < begin.ndim() - 1; i++) + attr_str += to_string(begin[i]) + ", "; + attr_str += to_string(begin[begin.ndim() - 1]) + "]\", \"end\":\"["; + for (int i = 0; i < end.ndim() - 1; i++) + attr_str += to_string(end[i]) + ", "; + attr_str += to_string(end[end.ndim() - 1]) + "]\", \"stride\":\"["; + for (int i = 0; i < stride.ndim() - 1; i++) + attr_str += to_string(stride[i]) + ", "; + attr_str += to_string(stride[stride.ndim() - 1]) + "]\"}"; + NodeAttrs attr; + LoadOp("strided_slice", attr); + LoadOpAttr(attr_str, attr); + NDArray res = result(tshape, tdata, params, attr)[0]; + return res.operator->(); +} + +DLTensor *concatenate(vector x, int axis) +{ + CVMOpParam params; + params.func_name = "concatenate"; + params.num_inputs = x.size(); + params.num_outputs = 1; + params.flatten_data = false; + + vector> tshape(params.num_inputs + params.num_outputs); + vector> tdata(params.num_inputs + params.num_outputs); + // x + for (int in = 0; in < params.num_inputs; in++) + { + int32_t *data = static_cast(x[in]->data); + vector shape = vector(x[in]->shape, x[in]->shape + x[in]->ndim); + tshape[in] = shape; + int in_size = 1; + for (int i = 0; i < x[in]->ndim; i++) + { + in_size *= shape[i]; + } + vector x_data(in_size); + for (int i = 0; i < in_size; i++) + { + x_data[i] = data[i]; + } + tdata[in] = x_data; + } + + string attr_str = "{\"axis\":\"" + to_string(axis) + "\"}"; + NodeAttrs attr; + LoadOp("concatenate", attr); + LoadOpAttr(attr_str, attr); + NDArray res = result(tshape, tdata, params, attr)[0]; + return res.operator->(); +} + +DLTensor *transpose(DLTensor *x, TShape axes) +{ + CVMOpParam params; + params.func_name = "transpose"; + params.num_inputs = 1; + params.num_outputs = 1; + params.flatten_data = false; + + vector> tshape(params.num_inputs + params.num_outputs); + vector> tdata(params.num_inputs + params.num_outputs); + // x + int32_t *xdata = static_cast(x->data); + vector xshape = vector(x->shape, x->shape + x->ndim); + tshape[0] = xshape; + int xsize = 1; + for (int i = 0; i < x->ndim; i++) + { + xsize *= xshape[i]; + } + vector x_data(xsize); + for (int i = 0; i < xsize; i++) + { + x_data[i] = xdata[i]; + } + tdata[0] = x_data; + + string attr_str = "{\"axes\":\"["; + for (int i = 0; i < axes.ndim() - 1; i++) + { + attr_str += to_string(axes[i]) + ", "; + } + attr_str += to_string(axes[axes.ndim() - 1]) + "]\"}"; + NodeAttrs attr; + LoadOp("transpose", attr); + LoadOpAttr(attr_str, attr); + NDArray res = result(tshape, tdata, params, attr)[0]; + return res.operator->(); +} + +DLTensor *take(DLTensor *x, DLTensor *y, int *axis) +{ + CVMOpParam params; + params.func_name = "take"; + params.num_inputs = 2; + params.num_outputs = 1; + params.flatten_data = false; + + vector> tshape(params.num_inputs + params.num_outputs); + vector> tdata(params.num_inputs + params.num_outputs); + // x + int32_t *xdata = static_cast(x->data); + vector xshape = vector(x->shape, x->shape + x->ndim); + tshape[0] = xshape; + int xsize = 1; + for (int i = 0; i < x->ndim; i++) + { + xsize *= xshape[i]; + } + vector x_data(xsize); + for (int i = 0; i < xsize; i++) + { + x_data[i] = xdata[i]; + } + tdata[0] = x_data; + // y + int32_t *ydata = static_cast(y->data); + vector yshape = vector(y->shape, y->shape + y->ndim); + tshape[1] = yshape; + int ysize = 1; + for (int i = 0; i < y->ndim; i++) + { + ysize *= yshape[i]; + } + vector y_data(ysize); + for (int i = 0; i < ysize; i++) + { + y_data[i] = ydata[i]; + } + tdata[1] = y_data; + + string attr_str = ""; + if (axis == nullptr) + attr_str += "{}"; + else + attr_str += "{\"axis\":\"" + to_string(*axis) + "\"}"; + NodeAttrs attr; + LoadOp("take", attr); + LoadOpAttr(attr_str, attr); + NDArray res = result(tshape, tdata, params, attr)[0]; + return res.operator->(); +} + +// int32_t * +data_and_shape conv2d(int32_t *data, vector dshape, int32_t *weight, vector wshape, TShape padding, TShape strides, TShape dilation, int groups, vector bshape, int32_t *bias = nullptr) +{ + CVMOpParam params; + params.func_name = "conv2d"; + if (bias) + params.num_inputs = 3; + else + params.num_inputs = 2; + params.num_outputs = 1; + params.flatten_data = false; + + vector> tshape(params.num_inputs + params.num_outputs); + vector> tdata(params.num_inputs + params.num_outputs); + // data + tshape[0] = dshape; + int dsize = 1; + for (int i = 0; i < dshape.size(); i++) + { + dsize *= dshape[i]; + } + vector d_data(dsize); + for (int i = 0; i < dsize; i++) + { + d_data[i] = data[i]; + } + tdata[0] = d_data; + // weight + tshape[1] = wshape; + int wsize = 1; + for (int i = 0; i < wshape.size(); i++) + { + wsize *= wshape[i]; + } + vector w_data(wsize); + for (int i = 0; i < wsize; i++) + { + w_data[i] = weight[i]; + } + tdata[1] = w_data; + // bias + if (bias != nullptr) + { + tshape[2] = bshape; + int bsize = bshape[0]; + vector b_data(bsize); + for (int i = 0; i < bsize; i++) + { + b_data[i] = bias[i]; + } + tdata[2] = b_data; + } + + string use_bias = bias == nullptr ? "False" : "True"; + string attr_str = "{\"channels\":" + to_string(dshape[0]) + "\", \"kernel_size\":\"[" + to_string(dshape[2]) + ", " + to_string(dshape[3]) + + "], \"strides\":\"[" + to_string(strides[0]) + ", " + to_string(strides[1]) + "]\", \"padding\":\"[" + to_string(padding[0]) + + ", " + to_string(padding[1]) + "]\", \"dilation\":\"(" + to_string(dilation[0]) + ", " + to_string(dilation[1]) + ")\", \"groups\":\"" + + to_string(groups) + "\", \"layout\":\"NCHW\", \"kernel_layout\":\"OIHW\", \"use_bias\":\"" + use_bias + "\"}"; + NodeAttrs attr; + LoadOp("conv2d", attr); + LoadOpAttr(attr_str, attr); + NDArray res = result(tshape, tdata, params, attr)[0]; + return data_and_shape{(int32_t *)res->data, vector(res->shape, res->shape + res->ndim)}; +} + +data_and_shape dense(int32_t *data, vector dshape, int32_t *weight, vector wshape, vector bshape, int32_t *bias = nullptr) +{ + CVMOpParam params; + params.func_name = "dense"; + if (bias) + params.num_inputs = 3; + else + params.num_inputs = 2; + params.num_outputs = 1; + params.flatten_data = false; + + vector> tshape(params.num_inputs + params.num_outputs); + vector> tdata(params.num_inputs + params.num_outputs); + // data + tshape[0] = dshape; + int dsize = 1; + for (int i = 0; i < dshape.size(); i++) + { + dsize *= dshape[i]; + } + vector d_data(dsize); + for (int i = 0; i < dsize; i++) + { + d_data[i] = data[i]; + } + tdata[0] = d_data; + // weight + tshape[1] = wshape; + int wsize = 1; + for (int i = 0; i < wshape.size(); i++) + { + wsize *= wshape[i]; + } + vector w_data(wsize); + for (int i = 0; i < wsize; i++) + { + w_data[i] = weight[i]; + } + tdata[1] = w_data; + // bias + if (bias != nullptr) + { + tshape[2] = bshape; + int bsize = bshape[0]; + vector b_data(bsize); + for (int i = 0; i < bsize; i++) + { + b_data[i] = bias[i]; + } + tdata[2] = b_data; + } + + string use_bias = bias == nullptr ? "False" : "True"; + string attr_str = "\"units\":\"" + to_string(wshape[0]) + "\", \"use_bias\":\"" + use_bias + "\"}"; + NodeAttrs attr; + LoadOp("dense", attr); + LoadOpAttr(attr_str, attr); + NDArray res = result(tshape, tdata, params, attr)[0]; + return data_and_shape{(int32_t *)res->data, vector(res->shape, res->shape + res->ndim)}; +} + +data_and_shape max_pool2d(int32_t *x, vector xshape, TShape pool_size, TShape strides, TShape padding, bool ceil_mode) +{ + CVMOpParam params; + params.func_name = "max_pool2d"; + params.num_inputs = 1; + params.num_outputs = 1; + params.flatten_data = false; + + vector> tshape(params.num_inputs + params.num_outputs); + vector> tdata(params.num_inputs + params.num_outputs); + // x + tshape[0] = xshape; + int xsize = 1; + for (int i = 0; i < xshape.size(); i++) + { + xsize *= xshape[i]; + } + vector x_data(xsize); + for (int i = 0; i < xsize; i++) + { + x_data[i] = x[i]; + } + tdata[0] = x_data; + + string ceil = ceil_mode == true ? "True" : "False"; + string attr_str = "{\"pool_size\":\"" + to_string(pool_size[0]) + ", " + to_string(pool_size[1]) + + "]\", \"strides\":\"[" + to_string(padding[0]) + ", " + to_string(padding[1]) + "]\", \"ceil_mode\":\"" + + ceil + "\"}"; + NodeAttrs attr; + LoadOp("max_pool2d", attr); + LoadOpAttr(attr_str, attr); + NDArray res = result(tshape, tdata, params, attr)[0]; + return data_and_shape{(int32_t *)res->data, vector(res->shape, res->shape + res->ndim)}; +} + +data_and_shape upsampling(int32_t *x, vector xshape, int scale) +{ + CVMOpParam params; + params.func_name = "upsampling"; + params.num_inputs = 1; + params.num_outputs = 1; + params.flatten_data = false; + + vector> tshape(params.num_inputs + params.num_outputs); + vector> tdata(params.num_inputs + params.num_outputs); + // x + tshape[0] = xshape; + int xsize = 1; + for (int i = 0; i < xshape.size(); i++) + { + xsize *= xshape[i]; + } + vector x_data(xsize); + for (int i = 0; i < xsize; i++) + { + x_data[i] = x[i]; + } + tdata[0] = x_data; + + string attr_str = "{\"scale\":" + to_string(scale) + "\"}"; + NodeAttrs attr; + LoadOp("upsampling", attr); + LoadOpAttr(attr_str, attr); + NDArray res = result(tshape, tdata, params, attr)[0]; + return data_and_shape{(int32_t *)res->data, vector(res->shape, res->shape + res->ndim)}; +} + +vector get_valid_counts(int32_t *x, vector xshape, int score_threshold) +{ + CVMOpParam params; + params.func_name = "get_valid_counts"; + params.num_inputs = 1; + params.num_outputs = 2; + params.flatten_data = false; + + vector> tshape(params.num_inputs + params.num_outputs); + vector> tdata(params.num_inputs + params.num_outputs); + // x + tshape[0] = xshape; + int xsize = xshape[0] * xshape[1] * xshape[2]; + vector x_data(xsize); + for (int i = 0; i < xsize; i++) + { + x_data[i] = x[i]; + } + tdata[0] = x_data; + + string attr_str = "{\"score_threshold\":\"" + to_string(score_threshold) + "\"}"; + NodeAttrs attr; + LoadOp("get_valid_counts", attr); + LoadOpAttr(attr_str, attr); + vector res = result(tshape, tdata, params, attr); + vector out(res.size()); + for (int i = 0; i < res.size(); i++) + { + out[i] = data_and_shape{(int32_t *)res[i]->data, vector(res[i]->shape, res[i]->shape + res[i]->ndim)}; + } + return out; +} + +data_and_shape sum(int32_t *x, vector xshape, TShape axis, bool keepdims, bool exclude) +{ + CVMOpParam params; + params.func_name = "sum"; + params.num_inputs = 1; + params.num_outputs = 1; + params.flatten_data = false; + + vector> tshape(params.num_inputs + params.num_outputs); + vector> tdata(params.num_inputs + params.num_outputs); + // x + tshape[0] = xshape; + int xsize = 1; + for (int i = 0; i < xshape.size(); i++) + { + xsize *= xshape[i]; + } + vector x_data(xsize); + for (int i = 0; i < xsize; i++) + { + x_data[i] = x[i]; + } + tdata[0] = x_data; + + string kdims = keepdims == true ? "True" : "False"; + string exc = exclude == true ? "True" : "False"; + string attr_str = "{\"axis\":\"[" + to_string(axis[0]) + ", " + to_string(axis[1]) + ", " + + to_string(axis[2]) + "]\", \"keepdims\":\"" + kdims + "\", \"exclude\":\"" + exc + "\"}"; + NodeAttrs attr; + LoadOp("sum", attr); + LoadOpAttr(attr_str, attr); + NDArray res = result(tshape, tdata, params, attr)[0]; + return data_and_shape{(int32_t *)res->data, vector(res->shape, res->shape + res->ndim)}; +} + +data_and_shape max(int32_t *x, vector xshape, TShape axis, bool keepdims, bool exclude) +{ + CVMOpParam params; + params.func_name = "max"; + params.num_inputs = 1; + params.num_outputs = 1; + params.flatten_data = false; + + vector> tshape(params.num_inputs + params.num_outputs); + vector> tdata(params.num_inputs + params.num_outputs); + // x + tshape[0] = xshape; + int xsize = 1; + for (int i = 0; i < xshape.size(); i++) + { + xsize *= xshape[i]; + } + vector x_data(xsize); + for (int i = 0; i < xsize; i++) + { + x_data[i] = x[i]; + } + tdata[0] = x_data; + + string kdims = keepdims == true ? "True" : "False"; + string exc = exclude == true ? "True" : "False"; + string attr_str = "{\"axis\":\"[" + to_string(axis[0]) + ", " + to_string(axis[1]) + ", " + + to_string(axis[2]) + "]\", \"keepdims\":\"" + kdims + "\", \"exclude\":\"" + exc + "\"}"; + NodeAttrs attr; + LoadOp("max", attr); + LoadOpAttr(attr_str, attr); + NDArray res = result(tshape, tdata, params, attr)[0]; + return data_and_shape{(int32_t *)res->data, vector(res->shape, res->shape + res->ndim)}; +} + +data_and_shape broadcast_add(int32_t *x, vector xshape, int32_t *y, vector yshape) +{ + CVMOpParam params; + params.func_name = "broadcast_add"; + params.num_inputs = 2; + params.num_outputs = 1; + params.flatten_data = false; + + vector> tshape(params.num_inputs + params.num_outputs); + vector> tdata(params.num_inputs + params.num_outputs); + // x + tshape[0] = xshape; + int xsize = 1; + for (int i = 0; i < xshape.size(); i++) + { + xsize *= xshape[i]; + } + vector x_data(xsize); + for (int i = 0; i < xsize; i++) + { + x_data[i] = x[i]; + } + tdata[0] = x_data; + // y + tshape[1] = yshape; + int ysize = 1; + for (int i = 0; i < yshape.size(); i++) + { + ysize *= yshape[i]; + } + vector y_data(ysize); + for (int i = 0; i < ysize; i++) + { + y_data[i] = y[i]; + } + tdata[1] = y_data; + + string attr_str = "{}"; + NodeAttrs attr; + LoadOp("broadcast_add", attr); + LoadOpAttr(attr_str, attr); + NDArray res = result(tshape, tdata, params, attr)[0]; + return data_and_shape{(int32_t *)res->data, vector(res->shape, res->shape + res->ndim)}; +} + +data_and_shape broadcast_div(int32_t *x, vector xshape, int32_t *y, vector yshape) +{ + CVMOpParam params; + params.func_name = "broadcast_div"; + params.num_inputs = 2; + params.num_outputs = 1; + params.flatten_data = false; + + vector> tshape(params.num_inputs + params.num_outputs); + vector> tdata(params.num_inputs + params.num_outputs); + // x + tshape[0] = xshape; + int xsize = 1; + for (int i = 0; i < xshape.size(); i++) + { + xsize *= xshape[i]; + } + vector x_data(xsize); + for (int i = 0; i < xsize; i++) + { + x_data[i] = x[i]; + } + tdata[0] = x_data; + // y + tshape[1] = yshape; + int ysize = 1; + for (int i = 0; i < yshape.size(); i++) + { + ysize *= yshape[i]; + } + vector y_data(ysize); + for (int i = 0; i < ysize; i++) + { + y_data[i] = y[i]; + } + tdata[1] = y_data; + + string attr_str = "{}"; + NodeAttrs attr; + LoadOp("broadcast_div", attr); + LoadOpAttr(attr_str, attr); + NDArray res = result(tshape, tdata, params, attr)[0]; + return data_and_shape{(int32_t *)res->data, vector(res->shape, res->shape + res->ndim)}; +} + +data_and_shape broadcast_greater(int32_t *x, vector xshape, int32_t *y, vector yshape) +{ + CVMOpParam params; + params.func_name = "broadcast_greater"; + params.num_inputs = 2; + params.num_outputs = 1; + params.flatten_data = false; + + vector> tshape(params.num_inputs + params.num_outputs); + vector> tdata(params.num_inputs + params.num_outputs); + // x + tshape[0] = xshape; + int xsize = 1; + for (int i = 0; i < xshape.size(); i++) + { + xsize *= xshape[i]; + } + vector x_data(xsize); + for (int i = 0; i < xsize; i++) + { + x_data[i] = x[i]; + } + tdata[0] = x_data; + // y + tshape[1] = yshape; + int ysize = 1; + for (int i = 0; i < yshape.size(); i++) + { + ysize *= yshape[i]; + } + vector y_data(ysize); + for (int i = 0; i < ysize; i++) + { + y_data[i] = y[i]; + } + tdata[1] = y_data; + + string attr_str = "{}"; + NodeAttrs attr; + LoadOp("broadcast_greater", attr); + LoadOpAttr(attr_str, attr); + NDArray res = result(tshape, tdata, params, attr)[0]; + return data_and_shape{(int32_t *)res->data, vector(res->shape, res->shape + res->ndim)}; +} + +data_and_shape broadcast_max(int32_t *x, vector xshape, int32_t *y, vector yshape) +{ + CVMOpParam params; + params.func_name = "broadcast_max"; + params.num_inputs = 2; + params.num_outputs = 1; + params.flatten_data = false; + + vector> tshape(params.num_inputs + params.num_outputs); + vector> tdata(params.num_inputs + params.num_outputs); + // x + tshape[0] = xshape; + int xsize = 1; + for (int i = 0; i < xshape.size(); i++) + { + xsize *= xshape[i]; + } + vector x_data(xsize); + for (int i = 0; i < xsize; i++) + { + x_data[i] = x[i]; + } + tdata[0] = x_data; + // y + tshape[1] = yshape; + int ysize = 1; + for (int i = 0; i < yshape.size(); i++) + { + ysize *= yshape[i]; + } + vector y_data(ysize); + for (int i = 0; i < ysize; i++) + { + y_data[i] = y[i]; + } + tdata[1] = y_data; + + string attr_str = "{}"; + NodeAttrs attr; + LoadOp("broadcast_max", attr); + LoadOpAttr(attr_str, attr); + NDArray res = result(tshape, tdata, params, attr)[0]; + return data_and_shape{(int32_t *)res->data, vector(res->shape, res->shape + res->ndim)}; +} + +data_and_shape broadcast_mul(int32_t *x, vector xshape, int32_t *y, vector yshape) +{ + CVMOpParam params; + params.func_name = "broadcast_mul"; + params.num_inputs = 2; + params.num_outputs = 1; + params.flatten_data = false; + + vector> tshape(params.num_inputs + params.num_outputs); + vector> tdata(params.num_inputs + params.num_outputs); + // x + tshape[0] = xshape; + int xsize = 1; + for (int i = 0; i < xshape.size(); i++) + { + xsize *= xshape[i]; + } + vector x_data(xsize); + for (int i = 0; i < xsize; i++) + { + x_data[i] = x[i]; + } + tdata[0] = x_data; + // y + tshape[1] = yshape; + int ysize = 1; + for (int i = 0; i < yshape.size(); i++) + { + ysize *= yshape[i]; + } + vector y_data(ysize); + for (int i = 0; i < ysize; i++) + { + y_data[i] = y[i]; + } + tdata[1] = y_data; + + string attr_str = "{}"; + NodeAttrs attr; + LoadOp("broadcast_mul", attr); + LoadOpAttr(attr_str, attr); + NDArray res = result(tshape, tdata, params, attr)[0]; + return data_and_shape{(int32_t *)res->data, vector(res->shape, res->shape + res->ndim)}; +} + +data_and_shape broadcast_sub(int32_t *x, vector xshape, int32_t *y, vector yshape) +{ + CVMOpParam params; + params.func_name = "broadcast_sub"; + params.num_inputs = 2; + params.num_outputs = 1; + params.flatten_data = false; + + vector> tshape(params.num_inputs + params.num_outputs); + vector> tdata(params.num_inputs + params.num_outputs); + // x + tshape[0] = xshape; + int xsize = 1; + for (int i = 0; i < xshape.size(); i++) + { + xsize *= xshape[i]; + } + vector x_data(xsize); + for (int i = 0; i < xsize; i++) + { + x_data[i] = x[i]; + } + tdata[0] = x_data; + // y + tshape[1] = yshape; + int ysize = 1; + for (int i = 0; i < yshape.size(); i++) + { + ysize *= yshape[i]; + } + vector y_data(ysize); + for (int i = 0; i < ysize; i++) + { + y_data[i] = y[i]; + } + tdata[1] = y_data; + + string attr_str = "{}"; + NodeAttrs attr; + LoadOp("broadcast_sub", attr); + LoadOpAttr(attr_str, attr); + NDArray res = result(tshape, tdata, params, attr)[0]; + return data_and_shape{(int32_t *)res->data, vector(res->shape, res->shape + res->ndim)}; +} + +data_and_shape slice_like(int32_t *x, vector xshape, int32_t *y, vector yshape, TShape axis) +{ + CVMOpParam params; + params.func_name = "slice_like"; + params.num_inputs = 2; + params.num_outputs = 1; + params.flatten_data = false; + + vector> tshape(params.num_inputs + params.num_outputs); + vector> tdata(params.num_inputs + params.num_outputs); + // x + tshape[0] = xshape; + int xsize = 1; + for (int i = 0; i < xshape.size(); i++) + { + xsize *= xshape[i]; + } + vector x_data(xsize); + for (int i = 0; i < xsize; i++) + { + x_data[i] = x[i]; + } + tdata[0] = x_data; + // y + tshape[1] = yshape; + int ysize = 1; + for (int i = 0; i < yshape.size(); i++) + { + ysize *= yshape[i]; + } + vector y_data(ysize); + for (int i = 0; i < ysize; i++) + { + y_data[i] = y[i]; + } + tdata[1] = y_data; + + string attr_str = "{\"axis\":\"[" + to_string(axis[0]) + ", " + to_string(axis[1]) + ", " + to_string(axis[2]) + ", " + to_string(axis[3]) + "]\""; + NodeAttrs attr; + LoadOp("slice_like", attr); + LoadOpAttr(attr_str, attr); + NDArray res = result(tshape, tdata, params, attr)[0]; + return data_and_shape{(int32_t *)res->data, vector(res->shape, res->shape + res->ndim)}; +} + +data_and_shape tile(int32_t *x, vector xshape, int32_t *y, vector yshape, TShape reps) +{ + CVMOpParam params; + params.func_name = "tile"; + params.num_inputs = 2; + params.num_outputs = 1; + params.flatten_data = false; + + vector> tshape(params.num_inputs + params.num_outputs); + vector> tdata(params.num_inputs + params.num_outputs); + // x + tshape[0] = xshape; + int xsize = 1; + for (int i = 0; i < xshape.size(); i++) + { + xsize *= xshape[i]; + } + vector x_data(xsize); + for (int i = 0; i < xsize; i++) + { + x_data[i] = x[i]; + } + tdata[0] = x_data; + // y + tshape[1] = yshape; + int ysize = 1; + for (int i = 0; i < yshape.size(); i++) + { + ysize *= yshape[i]; + } + vector y_data(ysize); + for (int i = 0; i < ysize; i++) + { + y_data[i] = y[i]; + } + tdata[1] = y_data; + + string attr_str = "{\"reps\":\"["; + for (int i = 0; i < reps.ndim() - 1; i++) + { + attr_str += to_string(reps[i]) + ", "; + } + attr_str += to_string(reps[reps.ndim() - 1]) + "]\"}"; + NodeAttrs attr; + LoadOp("tile", attr); + LoadOpAttr(attr_str, attr); + NDArray res = result(tshape, tdata, params, attr)[0]; + return data_and_shape{(int32_t *)res->data, vector(res->shape, res->shape + res->ndim)}; +} + +data_and_shape repeat(int32_t *x, vector xshape, int repeats, int axis) +{ + CVMOpParam params; + params.func_name = "repeat"; + params.num_inputs = 1; + params.num_outputs = 1; + params.flatten_data = false; + + vector> tshape(params.num_inputs + params.num_outputs); + vector> tdata(params.num_inputs + params.num_outputs); + // x + tshape[0] = xshape; + int xsize = 1; + for (int i = 0; i < xshape.size(); i++) + { + xsize *= xshape[i]; + } + vector x_data(xsize); + for (int i = 0; i < xsize; i++) + { + x_data[i] = x[i]; + } + tdata[0] = x_data; + + string attr_str = "{\"repeats\":\"" + to_string(repeats) + "\", \"axis\":\"" + to_string(axis) + "\"}"; + NodeAttrs attr; + LoadOp("repeat", attr); + LoadOpAttr(attr_str, attr); + NDArray res = result(tshape, tdata, params, attr)[0]; + return data_and_shape{(int32_t *)res->data, vector(res->shape, res->shape + res->ndim)}; +} + +data_and_shape strided_slice(int32_t *x, vector xshape, TShape begin, TShape end, TShape stride) +{ + CVMOpParam params; + params.func_name = "strided_slice"; + params.num_inputs = 1; + params.num_outputs = 1; + params.flatten_data = false; + + vector> tshape(params.num_inputs + params.num_outputs); + vector> tdata(params.num_inputs + params.num_outputs); + // x + tshape[0] = xshape; + int xsize = 1; + for (int i = 0; i < xshape.size(); i++) + { + xsize *= xshape[i]; + } + vector x_data(xsize); + for (int i = 0; i < xsize; i++) + { + x_data[i] = x[i]; + } + tdata[0] = x_data; + + string attr_str = "{\"begin\":\"["; + for (int i = 0; i < begin.ndim() - 1; i++) + attr_str += to_string(begin[i]) + ", "; + attr_str += to_string(begin[begin.ndim() - 1]) + "]\", \"end\":\"["; + for (int i = 0; i < end.ndim() - 1; i++) + attr_str += to_string(end[i]) + ", "; + attr_str += to_string(end[end.ndim() - 1]) + "]\", \"stride\":\"["; + for (int i = 0; i < stride.ndim() - 1; i++) + attr_str += to_string(stride[i]) + ", "; + attr_str += to_string(stride[stride.ndim() - 1]) + "]\"}"; + NodeAttrs attr; + LoadOp("strided_slice", attr); + LoadOpAttr(attr_str, attr); + NDArray res = result(tshape, tdata, params, attr)[0]; + return data_and_shape{(int32_t *)res->data, vector(res->shape, res->shape + res->ndim)}; +} + +data_and_shape concatenate(vector x, vector> xshape, int axis) +{ + CVMOpParam params; + params.func_name = "concatenate"; + params.num_inputs = x.size(); + params.num_outputs = 1; + params.flatten_data = false; + + vector> tshape(params.num_inputs + params.num_outputs); + vector> tdata(params.num_inputs + params.num_outputs); + // x + for (int in = 0; in < params.num_inputs; in++) + { + tshape[in] = xshape[in]; + int in_size = 1; + for (int i = 0; i < xshape[in].size(); i++) + { + in_size *= xshape[in][i]; + } + vector x_data(in_size); + for (int i = 0; i < in_size; i++) + { + x_data[i] = x[in][i]; + } + tdata[in] = x_data; + } + + string attr_str = "{\"axis\":\"" + to_string(axis) + "\"}"; + NodeAttrs attr; + LoadOp("concatenate", attr); + LoadOpAttr(attr_str, attr); + NDArray res = result(tshape, tdata, params, attr)[0]; + return data_and_shape{(int32_t *)res->data, vector(res->shape, res->shape + res->ndim)}; +} + +data_and_shape transpose(int32_t *x, vector xshape, TShape axes) +{ + CVMOpParam params; + params.func_name = "transpose"; + params.num_inputs = 1; + params.num_outputs = 1; + params.flatten_data = false; + + vector> tshape(params.num_inputs + params.num_outputs); + vector> tdata(params.num_inputs + params.num_outputs); + // x + tshape[0] = xshape; + int xsize = 1; + for (int i = 0; i < xshape.size(); i++) + { + xsize *= xshape[i]; + } + vector x_data(xsize); + for (int i = 0; i < xsize; i++) + { + x_data[i] = x[i]; + } + tdata[0] = x_data; + + string attr_str = "{\"axes\":\"["; + for (int i = 0; i < axes.ndim() - 1; i++) + { + attr_str += to_string(axes[i]) + ", "; + } + attr_str += to_string(axes[axes.ndim() - 1]) + "]\"}"; + NodeAttrs attr; + LoadOp("transpose", attr); + LoadOpAttr(attr_str, attr); + NDArray res = result(tshape, tdata, params, attr)[0]; + return data_and_shape{(int32_t *)res->data, vector(res->shape, res->shape + res->ndim)}; +} + +data_and_shape take(int32_t *x, vector xshape, int32_t *y, vector yshape, int *axis) +{ + CVMOpParam params; + params.func_name = "take"; + params.num_inputs = 2; + params.num_outputs = 1; + params.flatten_data = false; + + vector> tshape(params.num_inputs + params.num_outputs); + vector> tdata(params.num_inputs + params.num_outputs); + // x + tshape[0] = xshape; + int xsize = 1; + for (int i = 0; i < xshape.size(); i++) + { + xsize *= xshape[i]; + } + vector x_data(xsize); + for (int i = 0; i < xsize; i++) + { + x_data[i] = x[i]; + } + tdata[0] = x_data; + // y + tshape[1] = yshape; + int ysize = 1; + for (int i = 0; i < yshape.size(); i++) + { + ysize *= yshape[i]; + } + vector y_data(ysize); + for (int i = 0; i < ysize; i++) + { + y_data[i] = y[i]; + } + tdata[1] = y_data; + + string attr_str = ""; + if (axis == nullptr) + attr_str += "{}"; + else + attr_str += "{\"axis\":\"" + to_string(*axis) + "\"}"; + NodeAttrs attr; + LoadOp("take", attr); + LoadOpAttr(attr_str, attr); + NDArray res = result(tshape, tdata, params, attr)[0]; + return data_and_shape{(int32_t *)res->data, vector(res->shape, res->shape + res->ndim)}; +}