/*******************************************************************************
* Copyright (C) 2018 Intel Corporation
*
* This software and the related documents are Intel copyrighted  materials,  and
* your use of  them is  governed by the  express license  under which  they were
* provided to you (License).  Unless the License provides otherwise, you may not
* use, modify, copy, publish, distribute,  disclose or transmit this software or
* the related documents without Intel's prior written permission.
*
* This software and the related documents  are provided as  is,  with no express
* or implied  warranties,  other  than those  that are  expressly stated  in the
* License.
*******************************************************************************/

/*
 *
 *  Content:
 *            Touch test for all available oneMKL VM OMP offlaod math functions:
 *
 *            HA (High Accuracy), LA (Low Accuracy), EP (Enhanced Performance)
 *            single, double, complex precision function diffferent variants:
 *            basic, explicit mode, strided and strided with explicit mode
 *            are being called.
 *
 *******************************************************************************/

#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <float.h>
#include <complex.h>
#include <omp.h>
#if (defined __INTEL_COMPILER) || (defined __INTEL_LLVM_COMPILER)
  #include <mathimf.h>
#else
  #include <math.h>
#endif

#include "mkl.h"
#include "mkl_omp_offload.h"

/**
 * Common constants:
 */
/* oneMKL VM vector length - number of inputs to be evaluated */
static const int length = 1000;

/* Stride values for strided API tests */
static const int stride = 3;

/* Maximum printed number of errors for each function - to reduce log size */
static const int max_printed = 1;

/* Maximum allowed ulp's (+1.0 due to rounded reference double precision function) */
static const float  s_allowed_ulp[] = { 1.0f, 4.0f, 5000.0f };
static const float  c_allowed_ulp[] = { 1.0f, 4.0f, 9000.0f };
static const double d_allowed_ulp[] = { 1.0 + 1.0,   4.0 + 1.0,  7.0e7 };
static const double z_allowed_ulp[] = { 1.0 + 1.0,   4.0 + 1.0,  1.0e8 };

/* Offload device number */
static const int devnum = 0;

/* Fixed argument value */
static const double fixed = 3.14;

/* Enable errors printout */
static const int print_err = 1;

/* Enable warnings printout */
static const int print_warn = 0;

/**
 * Mapping to oneMKL VM accuracy mode constants:
 */
static const unsigned int vm_mode[] = {VML_HA, VML_LA, VML_EP};

/* Maximum function full name length */
#define NAME_LEN 64

/**
 * Available function API variants:
 */
enum VmApi
{
    kFunc = 0,   /* Basic */
    kMFunc,      /* Explicit mode */
    kFuncI,      /* Strided */
    kMFuncI,     /* Strided with explicit mode */
    kApiNum      /* Number of API variants */
};

/**
 * Available function accuracies:
 */
enum VmAccuracy
{
    kHA = 0,  /* HA */
    kLA,      /* LA */
    kEP,      /* EP */
    kAccNum   /* Number of accuracies */
};

/**
 * Available function precisions:
 */
enum VmPrecision
{
    kSP = 0,  /* Single precision */
    kDP,      /* Double precision */
    kCP,      /* Complex single precision */
    kZP,      /* Complex double precision */
};

/**
 * Available function argument layouts:
 */
enum VmLayout
{
    kVtoV = 0,     /* 1 vector argument  -> 1 vector result */
    kVVtoV,        /* 2 vector arguments -> 1 vector result */
    kVtoVV,        /* 1 vector argument  -> 2 vector results */
    kVXtoV,        /* 1 vector and 1 fixed arguments -> 1 vector result */
    kVVXtoV,       /* 2 vector and 4 fixed arguments -> 1 vector result */
    kVCtoVR,       /* 1 complex argument  -> 1 real result */
    kVRtoVC,       /* 1 real argument  -> 1 complex result */
};

/**
 * Input arguments:
 */
typedef struct
{
    float          *sarg1, *sarg2;
    double         *darg1, *darg2;
    float complex  *carg1, *carg2;
    double complex *zarg1, *zarg2;

    double *sref1, *sref2;
    double *dref1, *dref2;
    double complex *cref1, *cref2;
    double complex *zref1, *zref2;
} VmInputData;

/**
 * Output results:
 */
typedef struct
{
    float          *sres1[kApiNum], *sres2[kApiNum];
    double         *dres1[kApiNum], *dres2[kApiNum];
    float complex  *cres1[kApiNum], *cres2[kApiNum];
    double complex *zres1[kApiNum], *zres2[kApiNum];
} VmOutputData;

/**
 * Types for pointers to real reference functions :
 */
typedef double (*RefVtoV)(double);
typedef double (*RefVVtoV)(double, double);
typedef void   (*RefVtoVV)(double, double*, double* );
typedef double (*RefVVXtoV)(double, double, double, double, double, double );

/**
 * Types for pointers to complex reference functions :
 */
typedef double complex (*CRefVtoV)(double complex);
typedef double         (*CRefCtoR)(double complex);
typedef double complex (*CRefRtoC)(double);
typedef double complex (*CRefVVtoV)(double complex, double complex);

/**
 * Type for pointer to VM functions launcher:
 */
typedef void (*VmFunc)(int, VmInputData*, VmOutputData*);

/**
 * @brief VM functions family launchers
 *
 * Run all variants of VM function family
 *
 * @param[in] acc          Accuracy
 * @param[in] in           Input and reference resutl arrays
 * @param[out] out         Output arrays
 *
 */
 /**
 * Abs
 */
static void own_vm_abs (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];
    MKL_Complex8  *carg  = (MKL_Complex8*)in->carg1,  *cres = (MKL_Complex8*)out->cres1[kFunc],  *cires = (MKL_Complex8*)out->cres1[kFuncI];
    MKL_Complex8  *cmres = (MKL_Complex8*)out->cres1[kMFunc],  *cmires = (MKL_Complex8*)out->cres1[kMFuncI];
    MKL_Complex16 *zarg  = (MKL_Complex16*)in->zarg1, *zres = (MKL_Complex16*)out->zres1[kFunc], *zires = (MKL_Complex16*)out->zres1[kFuncI];
    MKL_Complex16 *zmres = (MKL_Complex16*)out->zres1[kMFunc], *zmires = (MKL_Complex16*)out->zres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(to:carg[0:length]) map(to:zarg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            map(tofrom:cres[0:length]) map(tofrom:cires[0:length]) map(tofrom:cmres[0:length]) map(tofrom:cmires[0:length]) \
                            map(tofrom:zres[0:length]) map(tofrom:zires[0:length]) map(tofrom:zmres[0:length]) map(tofrom:zmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsAbs   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsAbsI  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdAbs   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdAbsI  (length/stride, darg, stride, dires, stride);
        #pragma omp dispatch device(devnum)
        vcAbs   (length, carg, (float*)cres);
        #pragma omp dispatch device(devnum)
        vcAbsI  (length/stride, carg, stride, (float*)cires, stride);
        #pragma omp dispatch device(devnum)
        vzAbs   (length, zarg, (double*)zres);
        #pragma omp dispatch device(devnum)
        vzAbsI  (length/stride, zarg, stride, (double*)zires, stride);

        #pragma omp dispatch device(devnum)
        vmsAbs  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsAbsI (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdAbs  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdAbsI (length/stride, darg, stride, dmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmcAbs  (length, carg, (float*)cmres, mode);
        #pragma omp dispatch device(devnum)
        vmcAbsI (length/stride, carg, stride, (float*)cmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmzAbs  (length, zarg, (double*)zmres, mode);
        #pragma omp dispatch device(devnum)
        vmzAbsI (length/stride, zarg, stride, (double*)zmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_abs */

 /**
 * Arg
 */
static void own_vm_arg (int acc, VmInputData* in, VmOutputData* out)
{

    MKL_Complex8  *carg  = (MKL_Complex8*)in->carg1,  *cres = (MKL_Complex8*)out->cres1[kFunc],  *cires = (MKL_Complex8*)out->cres1[kFuncI];
    MKL_Complex8  *cmres = (MKL_Complex8*)out->cres1[kMFunc],  *cmires = (MKL_Complex8*)out->cres1[kMFuncI];
    MKL_Complex16 *zarg  = (MKL_Complex16*)in->zarg1, *zres = (MKL_Complex16*)out->zres1[kFunc], *zires = (MKL_Complex16*)out->zres1[kFuncI];
    MKL_Complex16 *zmres = (MKL_Complex16*)out->zres1[kMFunc], *zmires = (MKL_Complex16*)out->zres1[kMFuncI];

    #pragma omp target data map(to:carg[0:length]) map(to:zarg[0:length]) \
                            map(tofrom:cres[0:length]) map(tofrom:cires[0:length]) map(tofrom:cmres[0:length]) map(tofrom:cmires[0:length]) \
                            map(tofrom:zres[0:length]) map(tofrom:zires[0:length]) map(tofrom:zmres[0:length]) map(tofrom:zmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vcArg   (length, carg, (float*)cres);
        #pragma omp dispatch device(devnum)
        vcArgI  (length/stride, carg, stride, (float*)cires, stride);
        #pragma omp dispatch device(devnum)
        vzArg   (length, zarg, (double*)zres);
        #pragma omp dispatch device(devnum)
        vzArgI  (length/stride, zarg, stride, (double*)zires, stride);

        #pragma omp dispatch device(devnum)
        vmcArg  (length, carg, (float*)cmres, mode);
        #pragma omp dispatch device(devnum)
        vmcArgI (length/stride, carg, stride, (float*)cmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmzArg  (length, zarg, (double*)zmres, mode);
        #pragma omp dispatch device(devnum)
        vmzArgI (length/stride, zarg, stride, (double*)zmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_arg */

 /**
 * CIS
 */
static void own_vm_cis (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1;
    double *darg = in->darg1;
    MKL_Complex8  *cres   = (MKL_Complex8*)out->cres1[kFunc],  *cires   = (MKL_Complex8*)out->cres1[kFuncI];
    MKL_Complex8  *cmres  = (MKL_Complex8*)out->cres1[kMFunc], *cmires  = (MKL_Complex8*)out->cres1[kMFuncI];
    MKL_Complex16 *zres   = (MKL_Complex16*)out->zres1[kFunc], *zires   = (MKL_Complex16*)out->zres1[kFuncI];
    MKL_Complex16 *zmres  = (MKL_Complex16*)out->zres1[kMFunc], *zmires = (MKL_Complex16*)out->zres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(tofrom:cres[0:length]) map(tofrom:cires[0:length]) map(tofrom:cmres[0:length]) map(tofrom:cmires[0:length]) \
                            map(tofrom:zres[0:length]) map(tofrom:zires[0:length]) map(tofrom:zmres[0:length]) map(tofrom:zmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vcCIS   (length, sarg, cres);
        #pragma omp dispatch device(devnum)
        vcCISI  (length/stride, sarg, stride, cires, stride);
        #pragma omp dispatch device(devnum)
        vzCIS   (length, darg, zres);
        #pragma omp dispatch device(devnum)
        vzCISI  (length/stride, darg, stride, zires, stride);

        #pragma omp dispatch device(devnum)
        vmcCIS  (length, sarg, cmres, mode);
        #pragma omp dispatch device(devnum)
        vmcCISI (length/stride, sarg, stride, cmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmzCIS  (length, darg, zmres, mode);
        #pragma omp dispatch device(devnum)
        vmzCISI (length/stride, darg, stride, zmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_cis */

/**
 * Acos
 */
static void own_vm_acos (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];
    MKL_Complex8  *carg  = (MKL_Complex8*)in->carg1,  *cres = (MKL_Complex8*)out->cres1[kFunc],  *cires = (MKL_Complex8*)out->cres1[kFuncI];
    MKL_Complex8  *cmres = (MKL_Complex8*)out->cres1[kMFunc],  *cmires = (MKL_Complex8*)out->cres1[kMFuncI];
    MKL_Complex16 *zarg  = (MKL_Complex16*)in->zarg1, *zres = (MKL_Complex16*)out->zres1[kFunc], *zires = (MKL_Complex16*)out->zres1[kFuncI];
    MKL_Complex16 *zmres = (MKL_Complex16*)out->zres1[kMFunc], *zmires = (MKL_Complex16*)out->zres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(to:carg[0:length]) map(to:zarg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            map(tofrom:cres[0:length]) map(tofrom:cires[0:length]) map(tofrom:cmres[0:length]) map(tofrom:cmires[0:length]) \
                            map(tofrom:zres[0:length]) map(tofrom:zires[0:length]) map(tofrom:zmres[0:length]) map(tofrom:zmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsAcos   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsAcosI  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdAcos   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdAcosI  (length/stride, darg, stride, dires, stride);
        #pragma omp dispatch device(devnum)
        vcAcos   (length, carg, cres);
        #pragma omp dispatch device(devnum)
        vcAcosI  (length/stride, carg, stride, cires, stride);
        #pragma omp dispatch device(devnum)
        vzAcos   (length, zarg, zres);
        #pragma omp dispatch device(devnum)
        vzAcosI  (length/stride, zarg, stride, zires, stride);

        #pragma omp dispatch device(devnum)
        vmsAcos  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsAcosI (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdAcos  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdAcosI (length/stride, darg, stride, dmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmcAcos  (length, carg, cmres, mode);
        #pragma omp dispatch device(devnum)
        vmcAcosI (length/stride, carg, stride, cmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmzAcos  (length, zarg, zmres, mode);
        #pragma omp dispatch device(devnum)
        vmzAcosI (length/stride, zarg, stride, zmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_acos */

/**
 * Atan
 */
static void own_vm_atan (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];
    MKL_Complex8  *carg  = (MKL_Complex8*)in->carg1,  *cres = (MKL_Complex8*)out->cres1[kFunc],  *cires = (MKL_Complex8*)out->cres1[kFuncI];
    MKL_Complex8  *cmres = (MKL_Complex8*)out->cres1[kMFunc],  *cmires = (MKL_Complex8*)out->cres1[kMFuncI];
    MKL_Complex16 *zarg  = (MKL_Complex16*)in->zarg1, *zres = (MKL_Complex16*)out->zres1[kFunc], *zires = (MKL_Complex16*)out->zres1[kFuncI];
    MKL_Complex16 *zmres = (MKL_Complex16*)out->zres1[kMFunc], *zmires = (MKL_Complex16*)out->zres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(to:carg[0:length]) map(to:zarg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            map(tofrom:cres[0:length]) map(tofrom:cires[0:length]) map(tofrom:cmres[0:length]) map(tofrom:cmires[0:length]) \
                            map(tofrom:zres[0:length]) map(tofrom:zires[0:length]) map(tofrom:zmres[0:length]) map(tofrom:zmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsAtan   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsAtanI  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdAtan   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdAtanI  (length/stride, darg, stride, dires, stride);
        #pragma omp dispatch device(devnum)
        vcAtan   (length, carg, cres);
        #pragma omp dispatch device(devnum)
        vcAtanI  (length/stride, carg, stride, cires, stride);
        #pragma omp dispatch device(devnum)
        vzAtan   (length, zarg, zres);
        #pragma omp dispatch device(devnum)
        vzAtanI  (length/stride, zarg, stride, zires, stride);

        #pragma omp dispatch device(devnum)
        vmsAtan  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsAtanI (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdAtan  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdAtanI (length/stride, darg, stride, dmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmcAtan  (length, carg, cmres, mode);
        #pragma omp dispatch device(devnum)
        vmcAtanI (length/stride, carg, stride, cmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmzAtan  (length, zarg, zmres, mode);
        #pragma omp dispatch device(devnum)
        vmzAtanI (length/stride, zarg, stride, zmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_atan */

/**
 * Asin
 */
static void own_vm_asin (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];
    MKL_Complex8  *carg  = (MKL_Complex8*)in->carg1,  *cres = (MKL_Complex8*)out->cres1[kFunc],  *cires = (MKL_Complex8*)out->cres1[kFuncI];
    MKL_Complex8  *cmres = (MKL_Complex8*)out->cres1[kMFunc],  *cmires = (MKL_Complex8*)out->cres1[kMFuncI];
    MKL_Complex16 *zarg  = (MKL_Complex16*)in->zarg1, *zres = (MKL_Complex16*)out->zres1[kFunc], *zires = (MKL_Complex16*)out->zres1[kFuncI];
    MKL_Complex16 *zmres = (MKL_Complex16*)out->zres1[kMFunc], *zmires = (MKL_Complex16*)out->zres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(to:carg[0:length]) map(to:zarg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            map(tofrom:cres[0:length]) map(tofrom:cires[0:length]) map(tofrom:cmres[0:length]) map(tofrom:cmires[0:length]) \
                            map(tofrom:zres[0:length]) map(tofrom:zires[0:length]) map(tofrom:zmres[0:length]) map(tofrom:zmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsAsin   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsAsinI  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdAsin   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdAsinI  (length/stride, darg, stride, dires, stride);
        #pragma omp dispatch device(devnum)
        vcAsin   (length, carg, cres);
        #pragma omp dispatch device(devnum)
        vcAsinI  (length/stride, carg, stride, cires, stride);
        #pragma omp dispatch device(devnum)
        vzAsin   (length, zarg, zres);
        #pragma omp dispatch device(devnum)
        vzAsinI  (length/stride, zarg, stride, zires, stride);

        #pragma omp dispatch device(devnum)
        vmsAsin  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsAsinI (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdAsin  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdAsinI (length/stride, darg, stride, dmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmcAsin  (length, carg, cmres, mode);
        #pragma omp dispatch device(devnum)
        vmcAsinI (length/stride, carg, stride, cmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmzAsin  (length, zarg, zmres, mode);
        #pragma omp dispatch device(devnum)
        vmzAsinI (length/stride, zarg, stride, zmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_asin */

/**
 * Acosh
 */
static void own_vm_acosh (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];
    MKL_Complex8  *carg  = (MKL_Complex8*)in->carg1,  *cres = (MKL_Complex8*)out->cres1[kFunc],  *cires = (MKL_Complex8*)out->cres1[kFuncI];
    MKL_Complex8  *cmres = (MKL_Complex8*)out->cres1[kMFunc],  *cmires = (MKL_Complex8*)out->cres1[kMFuncI];
    MKL_Complex16 *zarg  = (MKL_Complex16*)in->zarg1, *zres = (MKL_Complex16*)out->zres1[kFunc], *zires = (MKL_Complex16*)out->zres1[kFuncI];
    MKL_Complex16 *zmres = (MKL_Complex16*)out->zres1[kMFunc], *zmires = (MKL_Complex16*)out->zres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(to:carg[0:length]) map(to:zarg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            map(tofrom:cres[0:length]) map(tofrom:cires[0:length]) map(tofrom:cmres[0:length]) map(tofrom:cmires[0:length]) \
                            map(tofrom:zres[0:length]) map(tofrom:zires[0:length]) map(tofrom:zmres[0:length]) map(tofrom:zmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsAcosh   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsAcoshI  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdAcosh   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdAcoshI  (length/stride, darg, stride, dires, stride);
        #pragma omp dispatch device(devnum)
        vcAcosh   (length, carg, cres);
        #pragma omp dispatch device(devnum)
        vcAcoshI  (length/stride, carg, stride, cires, stride);
        #pragma omp dispatch device(devnum)
        vzAcosh   (length, zarg, zres);
        #pragma omp dispatch device(devnum)
        vzAcoshI  (length/stride, zarg, stride, zires, stride);

        #pragma omp dispatch device(devnum)
        vmsAcosh  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsAcoshI (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdAcosh  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdAcoshI (length/stride, darg, stride, dmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmcAcosh  (length, carg, cmres, mode);
        #pragma omp dispatch device(devnum)
        vmcAcoshI (length/stride, carg, stride, cmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmzAcosh  (length, zarg, zmres, mode);
        #pragma omp dispatch device(devnum)
        vmzAcoshI (length/stride, zarg, stride, zmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_acosh */

/**
 * Asinh
 */
static void own_vm_asinh (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];
    MKL_Complex8  *carg  = (MKL_Complex8*)in->carg1,  *cres = (MKL_Complex8*)out->cres1[kFunc],  *cires = (MKL_Complex8*)out->cres1[kFuncI];
    MKL_Complex8  *cmres = (MKL_Complex8*)out->cres1[kMFunc],  *cmires = (MKL_Complex8*)out->cres1[kMFuncI];
    MKL_Complex16 *zarg  = (MKL_Complex16*)in->zarg1, *zres = (MKL_Complex16*)out->zres1[kFunc], *zires = (MKL_Complex16*)out->zres1[kFuncI];
    MKL_Complex16 *zmres = (MKL_Complex16*)out->zres1[kMFunc], *zmires = (MKL_Complex16*)out->zres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(to:carg[0:length]) map(to:zarg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            map(tofrom:cres[0:length]) map(tofrom:cires[0:length]) map(tofrom:cmres[0:length]) map(tofrom:cmires[0:length]) \
                            map(tofrom:zres[0:length]) map(tofrom:zires[0:length]) map(tofrom:zmres[0:length]) map(tofrom:zmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsAsinh   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsAsinhI  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdAsinh   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdAsinhI  (length/stride, darg, stride, dires, stride);
        #pragma omp dispatch device(devnum)
        vcAsinh   (length, carg, cres);
        #pragma omp dispatch device(devnum)
        vcAsinhI  (length/stride, carg, stride, cires, stride);
        #pragma omp dispatch device(devnum)
        vzAsinh   (length, zarg, zres);
        #pragma omp dispatch device(devnum)
        vzAsinhI  (length/stride, zarg, stride, zires, stride);

        #pragma omp dispatch device(devnum)
        vmsAsinh  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsAsinhI (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdAsinh  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdAsinhI (length/stride, darg, stride, dmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmcAsinh  (length, carg, cmres, mode);
        #pragma omp dispatch device(devnum)
        vmcAsinhI (length/stride, carg, stride, cmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmzAsinh  (length, zarg, zmres, mode);
        #pragma omp dispatch device(devnum)
        vmzAsinhI (length/stride, zarg, stride, zmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_asinh */

/**
 * Atanh
 */
static void own_vm_atanh (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];
    MKL_Complex8  *carg  = (MKL_Complex8*)in->carg1,  *cres = (MKL_Complex8*)out->cres1[kFunc],  *cires = (MKL_Complex8*)out->cres1[kFuncI];
    MKL_Complex8  *cmres = (MKL_Complex8*)out->cres1[kMFunc],  *cmires = (MKL_Complex8*)out->cres1[kMFuncI];
    MKL_Complex16 *zarg  = (MKL_Complex16*)in->zarg1, *zres = (MKL_Complex16*)out->zres1[kFunc], *zires = (MKL_Complex16*)out->zres1[kFuncI];
    MKL_Complex16 *zmres = (MKL_Complex16*)out->zres1[kMFunc], *zmires = (MKL_Complex16*)out->zres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(to:carg[0:length]) map(to:zarg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            map(tofrom:cres[0:length]) map(tofrom:cires[0:length]) map(tofrom:cmres[0:length]) map(tofrom:cmires[0:length]) \
                            map(tofrom:zres[0:length]) map(tofrom:zires[0:length]) map(tofrom:zmres[0:length]) map(tofrom:zmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsAtanh   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsAtanhI  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdAtanh   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdAtanhI  (length/stride, darg, stride, dires, stride);
        #pragma omp dispatch device(devnum)
        vcAtanh   (length, carg, cres);
        #pragma omp dispatch device(devnum)
        vcAtanhI  (length/stride, carg, stride, cires, stride);
        #pragma omp dispatch device(devnum)
        vzAtanh   (length, zarg, zres);
        #pragma omp dispatch device(devnum)
        vzAtanhI  (length/stride, zarg, stride, zires, stride);

        #pragma omp dispatch device(devnum)
        vmsAtanh  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsAtanhI (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdAtanh  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdAtanhI (length/stride, darg, stride, dmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmcAtanh  (length, carg, cmres, mode);
        #pragma omp dispatch device(devnum)
        vmcAtanhI (length/stride, carg, stride, cmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmzAtanh  (length, zarg, zmres, mode);
        #pragma omp dispatch device(devnum)
        vmzAtanhI (length/stride, zarg, stride, zmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_atanh */

/**
 * Add
 */
static void own_vm_add (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg1 = in->sarg1, *sarg2 = in->sarg2, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg1 = in->darg1, *darg2 = in->darg2, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];
    MKL_Complex8  *carg1 = (MKL_Complex8*)in->carg1,  *carg2 = (MKL_Complex8*)in->carg2,  *cres = (MKL_Complex8*)out->cres1[kFunc];
    MKL_Complex8  *cires = (MKL_Complex8*)out->cres1[kFuncI],  *cmres = (MKL_Complex8*)out->cres1[kMFunc],  *cmires = (MKL_Complex8*)out->cres1[kMFuncI];
    MKL_Complex16 *zarg1 = (MKL_Complex16*)in->zarg1, *zarg2 = (MKL_Complex16*)in->zarg2, *zres = (MKL_Complex16*)out->zres1[kFunc];
    MKL_Complex16 *zires = (MKL_Complex16*)out->zres1[kFuncI], *zmres = (MKL_Complex16*)out->zres1[kMFunc], *zmires = (MKL_Complex16*)out->zres1[kMFuncI];

    #pragma omp target data map(to:sarg1[0:length]) map(to:darg1[0:length]) map(to:sarg2[0:length]) map(to:darg2[0:length])\
                            map(to:carg1[0:length]) map(to:zarg1[0:length]) map(to:carg2[0:length]) map(to:zarg2[0:length])\
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            map(tofrom:cres[0:length]) map(tofrom:cires[0:length]) map(tofrom:cmres[0:length]) map(tofrom:cmires[0:length]) \
                            map(tofrom:zres[0:length]) map(tofrom:zires[0:length]) map(tofrom:zmres[0:length]) map(tofrom:zmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsAdd   (length, sarg1, sarg2, sres);
        #pragma omp dispatch device(devnum)
        vsAddI  (length/stride, sarg1, stride, sarg2, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdAdd   (length, darg1, darg2, dres);
        #pragma omp dispatch device(devnum)
        vdAddI  (length/stride, darg1, stride, darg2, stride, dires, stride);
        #pragma omp dispatch device(devnum)
        vcAdd   (length, carg1, carg2, cres);
        #pragma omp dispatch device(devnum)
        vcAddI  (length/stride, carg1, stride, carg2, stride, cires, stride);
        #pragma omp dispatch device(devnum)
        vzAdd   (length, zarg1, zarg2, zres);
        #pragma omp dispatch device(devnum)
        vzAddI  (length/stride, zarg1, stride, zarg2, stride, zires, stride);

        #pragma omp dispatch device(devnum)
        vmsAdd  (length, sarg1, sarg2, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsAddI (length/stride, sarg1, stride, sarg2, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdAdd  (length, darg1, darg2, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdAddI (length/stride, darg1, stride, darg2, stride, dmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmcAdd  (length, carg1, carg2, cmres, mode);
        #pragma omp dispatch device(devnum)
        vmcAddI (length/stride, carg1, stride, carg2, stride, cmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmzAdd  (length, zarg1, zarg2, zmres, mode);
        #pragma omp dispatch device(devnum)
        vmzAddI (length/stride, zarg1, stride, zarg2, stride, zmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_add */

/**
 * Sub
 */
static void own_vm_sub (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg1 = in->sarg1, *sarg2 = in->sarg2, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg1 = in->darg1, *darg2 = in->darg2, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];
    MKL_Complex8  *carg1 = (MKL_Complex8*)in->carg1,  *carg2 = (MKL_Complex8*)in->carg2,  *cres = (MKL_Complex8*)out->cres1[kFunc];
    MKL_Complex8  *cires = (MKL_Complex8*)out->cres1[kFuncI],  *cmres = (MKL_Complex8*)out->cres1[kMFunc],  *cmires = (MKL_Complex8*)out->cres1[kMFuncI];
    MKL_Complex16 *zarg1 = (MKL_Complex16*)in->zarg1, *zarg2 = (MKL_Complex16*)in->zarg2, *zres = (MKL_Complex16*)out->zres1[kFunc];
    MKL_Complex16 *zires = (MKL_Complex16*)out->zres1[kFuncI], *zmres = (MKL_Complex16*)out->zres1[kMFunc], *zmires = (MKL_Complex16*)out->zres1[kMFuncI];

    #pragma omp target data map(to:sarg1[0:length]) map(to:darg1[0:length]) map(to:sarg2[0:length]) map(to:darg2[0:length])\
                            map(to:carg1[0:length]) map(to:zarg1[0:length]) map(to:carg2[0:length]) map(to:zarg2[0:length])\
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            map(tofrom:cres[0:length]) map(tofrom:cires[0:length]) map(tofrom:cmres[0:length]) map(tofrom:cmires[0:length]) \
                            map(tofrom:zres[0:length]) map(tofrom:zires[0:length]) map(tofrom:zmres[0:length]) map(tofrom:zmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsSub   (length, sarg1, sarg2, sres);
        #pragma omp dispatch device(devnum)
        vsSubI  (length/stride, sarg1, stride, sarg2, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdSub   (length, darg1, darg2, dres);
        #pragma omp dispatch device(devnum)
        vdSubI  (length/stride, darg1, stride, darg2, stride, dires, stride);
        #pragma omp dispatch device(devnum)
        vcSub   (length, carg1, carg2, cres);
        #pragma omp dispatch device(devnum)
        vcSubI  (length/stride, carg1, stride, carg2, stride, cires, stride);
        #pragma omp dispatch device(devnum)
        vzSub   (length, zarg1, zarg2, zres);
        #pragma omp dispatch device(devnum)
        vzSubI  (length/stride, zarg1, stride, zarg2, stride, zires, stride);

        #pragma omp dispatch device(devnum)
        vmsSub  (length, sarg1, sarg2, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsSubI (length/stride, sarg1, stride, sarg2, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdSub  (length, darg1, darg2, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdSubI (length/stride, darg1, stride, darg2, stride, dmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmcSub  (length, carg1, carg2, cmres, mode);
        #pragma omp dispatch device(devnum)
        vmcSubI (length/stride, carg1, stride, carg2, stride, cmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmzSub  (length, zarg1, zarg2, zmres, mode);
        #pragma omp dispatch device(devnum)
        vmzSubI (length/stride, zarg1, stride, zarg2, stride, zmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_sub */

/**
 * Mul
 */
static void own_vm_mul (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg1 = in->sarg1, *sarg2 = in->sarg2, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg1 = in->darg1, *darg2 = in->darg2, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];
    MKL_Complex8  *carg1 = (MKL_Complex8*)in->carg1,  *carg2 = (MKL_Complex8*)in->carg2,  *cres = (MKL_Complex8*)out->cres1[kFunc];
    MKL_Complex8  *cires = (MKL_Complex8*)out->cres1[kFuncI],  *cmres = (MKL_Complex8*)out->cres1[kMFunc],  *cmires = (MKL_Complex8*)out->cres1[kMFuncI];
    MKL_Complex16 *zarg1 = (MKL_Complex16*)in->zarg1, *zarg2 = (MKL_Complex16*)in->zarg2, *zres = (MKL_Complex16*)out->zres1[kFunc];
    MKL_Complex16 *zires = (MKL_Complex16*)out->zres1[kFuncI], *zmres = (MKL_Complex16*)out->zres1[kMFunc], *zmires = (MKL_Complex16*)out->zres1[kMFuncI];

    #pragma omp target data map(to:sarg1[0:length]) map(to:darg1[0:length]) map(to:sarg2[0:length]) map(to:darg2[0:length])\
                            map(to:carg1[0:length]) map(to:zarg1[0:length]) map(to:carg2[0:length]) map(to:zarg2[0:length])\
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            map(tofrom:cres[0:length]) map(tofrom:cires[0:length]) map(tofrom:cmres[0:length]) map(tofrom:cmires[0:length]) \
                            map(tofrom:zres[0:length]) map(tofrom:zires[0:length]) map(tofrom:zmres[0:length]) map(tofrom:zmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsMul   (length, sarg1, sarg2, sres);
        #pragma omp dispatch device(devnum)
        vsMulI  (length/stride, sarg1, stride, sarg2, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdMul   (length, darg1, darg2, dres);
        #pragma omp dispatch device(devnum)
        vdMulI  (length/stride, darg1, stride, darg2, stride, dires, stride);
        #pragma omp dispatch device(devnum)
        vcMul   (length, carg1, carg2, cres);
        #pragma omp dispatch device(devnum)
        vcMulI  (length/stride, carg1, stride, carg2, stride, cires, stride);
        #pragma omp dispatch device(devnum)
        vzMul   (length, zarg1, zarg2, zres);
        #pragma omp dispatch device(devnum)
        vzMulI  (length/stride, zarg1, stride, zarg2, stride, zires, stride);

        #pragma omp dispatch device(devnum)
        vmsMul  (length, sarg1, sarg2, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsMulI (length/stride, sarg1, stride, sarg2, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdMul  (length, darg1, darg2, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdMulI (length/stride, darg1, stride, darg2, stride, dmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmcMul  (length, carg1, carg2, cmres, mode);
        #pragma omp dispatch device(devnum)
        vmcMulI (length/stride, carg1, stride, carg2, stride, cmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmzMul  (length, zarg1, zarg2, zmres, mode);
        #pragma omp dispatch device(devnum)
        vmzMulI (length/stride, zarg1, stride, zarg2, stride, zmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_mul */

/**
 * Atan2
 */
static void own_vm_atan2 (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg1 = in->sarg1, *sarg2 = in->sarg2, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg1 = in->darg1, *darg2 = in->darg2, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg1[0:length]) map(to:darg1[0:length]) map(to:sarg2[0:length]) map(to:darg2[0:length])\
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsAtan2   (length, sarg1, sarg2, sres);
        #pragma omp dispatch device(devnum)
        vsAtan2I  (length/stride, sarg1, stride, sarg2, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdAtan2   (length, darg1, darg2, dres);
        #pragma omp dispatch device(devnum)
        vdAtan2I  (length/stride, darg1, stride, darg2, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsAtan2  (length, sarg1, sarg2, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsAtan2I (length/stride, sarg1, stride, sarg2, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdAtan2  (length, darg1, darg2, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdAtan2I (length/stride, darg1, stride, darg2, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_atan2 */

/**
 * Atan2pi
 */
static void own_vm_atan2pi (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg1 = in->sarg1, *sarg2 = in->sarg2, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg1 = in->darg1, *darg2 = in->darg2, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg1[0:length]) map(to:darg1[0:length]) map(to:sarg2[0:length]) map(to:darg2[0:length])\
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsAtan2pi   (length, sarg1, sarg2, sres);
        #pragma omp dispatch device(devnum)
        vsAtan2piI  (length/stride, sarg1, stride, sarg2, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdAtan2pi   (length, darg1, darg2, dres);
        #pragma omp dispatch device(devnum)
        vdAtan2piI  (length/stride, darg1, stride, darg2, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsAtan2pi  (length, sarg1, sarg2, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsAtan2piI (length/stride, sarg1, stride, sarg2, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdAtan2pi  (length, darg1, darg2, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdAtan2piI (length/stride, darg1, stride, darg2, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_atan2pi */

/**
 * CopySign
 */
static void own_vm_copysign (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg1 = in->sarg1, *sarg2 = in->sarg2, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg1 = in->darg1, *darg2 = in->darg2, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg1[0:length]) map(to:darg1[0:length]) map(to:sarg2[0:length]) map(to:darg2[0:length])\
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsCopySign   (length, sarg1, sarg2, sres);
        #pragma omp dispatch device(devnum)
        vsCopySignI  (length/stride, sarg1, stride, sarg2, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdCopySign   (length, darg1, darg2, dres);
        #pragma omp dispatch device(devnum)
        vdCopySignI  (length/stride, darg1, stride, darg2, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsCopySign  (length, sarg1, sarg2, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsCopySignI (length/stride, sarg1, stride, sarg2, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdCopySign  (length, darg1, darg2, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdCopySignI (length/stride, darg1, stride, darg2, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_copysign */

/**
 * Fdim
 */
static void own_vm_fdim (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg1 = in->sarg1, *sarg2 = in->sarg2, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg1 = in->darg1, *darg2 = in->darg2, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg1[0:length]) map(to:darg1[0:length]) map(to:sarg2[0:length]) map(to:darg2[0:length])\
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsFdim   (length, sarg1, sarg2, sres);
        #pragma omp dispatch device(devnum)
        vsFdimI  (length/stride, sarg1, stride, sarg2, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdFdim   (length, darg1, darg2, dres);
        #pragma omp dispatch device(devnum)
        vdFdimI  (length/stride, darg1, stride, darg2, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsFdim  (length, sarg1, sarg2, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsFdimI (length/stride, sarg1, stride, sarg2, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdFdim  (length, darg1, darg2, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdFdimI (length/stride, darg1, stride, darg2, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_fdim */

/**
 * Fmax
 */
static void own_vm_fmax (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg1 = in->sarg1, *sarg2 = in->sarg2, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg1 = in->darg1, *darg2 = in->darg2, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg1[0:length]) map(to:darg1[0:length]) map(to:sarg2[0:length]) map(to:darg2[0:length])\
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsFmax   (length, sarg1, sarg2, sres);
        #pragma omp dispatch device(devnum)
        vsFmaxI  (length/stride, sarg1, stride, sarg2, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdFmax   (length, darg1, darg2, dres);
        #pragma omp dispatch device(devnum)
        vdFmaxI  (length/stride, darg1, stride, darg2, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsFmax  (length, sarg1, sarg2, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsFmaxI (length/stride, sarg1, stride, sarg2, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdFmax  (length, darg1, darg2, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdFmaxI (length/stride, darg1, stride, darg2, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_fmax */

/**
 * Fmin
 */
static void own_vm_fmin (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg1 = in->sarg1, *sarg2 = in->sarg2, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg1 = in->darg1, *darg2 = in->darg2, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg1[0:length]) map(to:darg1[0:length]) map(to:sarg2[0:length]) map(to:darg2[0:length])\
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsFmin   (length, sarg1, sarg2, sres);
        #pragma omp dispatch device(devnum)
        vsFminI  (length/stride, sarg1, stride, sarg2, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdFmin   (length, darg1, darg2, dres);
        #pragma omp dispatch device(devnum)
        vdFminI  (length/stride, darg1, stride, darg2, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsFmin  (length, sarg1, sarg2, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsFminI (length/stride, sarg1, stride, sarg2, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdFmin  (length, darg1, darg2, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdFminI (length/stride, darg1, stride, darg2, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_fmin */

/**
 * MaxMag
 */
static void own_vm_maxmag (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg1 = in->sarg1, *sarg2 = in->sarg2, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg1 = in->darg1, *darg2 = in->darg2, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg1[0:length]) map(to:darg1[0:length]) map(to:sarg2[0:length]) map(to:darg2[0:length])\
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsMaxMag   (length, sarg1, sarg2, sres);
        #pragma omp dispatch device(devnum)
        vsMaxMagI  (length/stride, sarg1, stride, sarg2, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdMaxMag   (length, darg1, darg2, dres);
        #pragma omp dispatch device(devnum)
        vdMaxMagI  (length/stride, darg1, stride, darg2, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsMaxMag  (length, sarg1, sarg2, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsMaxMagI (length/stride, sarg1, stride, sarg2, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdMaxMag  (length, darg1, darg2, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdMaxMagI (length/stride, darg1, stride, darg2, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_maxmag */

/**
 * MinMag
 */
static void own_vm_minmag (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg1 = in->sarg1, *sarg2 = in->sarg2, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg1 = in->darg1, *darg2 = in->darg2, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg1[0:length]) map(to:darg1[0:length]) map(to:sarg2[0:length]) map(to:darg2[0:length])\
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsMinMag   (length, sarg1, sarg2, sres);
        #pragma omp dispatch device(devnum)
        vsMinMagI  (length/stride, sarg1, stride, sarg2, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdMinMag   (length, darg1, darg2, dres);
        #pragma omp dispatch device(devnum)
        vdMinMagI  (length/stride, darg1, stride, darg2, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsMinMag  (length, sarg1, sarg2, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsMinMagI (length/stride, sarg1, stride, sarg2, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdMinMag  (length, darg1, darg2, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdMinMagI (length/stride, darg1, stride, darg2, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_minmag */

/**
 * Fmod
 */
static void own_vm_fmod (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg1 = in->sarg1, *sarg2 = in->sarg2, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg1 = in->darg1, *darg2 = in->darg2, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg1[0:length]) map(to:darg1[0:length]) map(to:sarg2[0:length]) map(to:darg2[0:length])\
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsFmod   (length, sarg1, sarg2, sres);
        #pragma omp dispatch device(devnum)
        vsFmodI  (length/stride, sarg1, stride, sarg2, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdFmod   (length, darg1, darg2, dres);
        #pragma omp dispatch device(devnum)
        vdFmodI  (length/stride, darg1, stride, darg2, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsFmod  (length, sarg1, sarg2, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsFmodI (length/stride, sarg1, stride, sarg2, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdFmod  (length, darg1, darg2, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdFmodI (length/stride, darg1, stride, darg2, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_fmod */

/**
 * Hypot
 */
static void own_vm_hypot (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg1 = in->sarg1, *sarg2 = in->sarg2, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg1 = in->darg1, *darg2 = in->darg2, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg1[0:length]) map(to:darg1[0:length]) map(to:sarg2[0:length]) map(to:darg2[0:length])\
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsHypot   (length, sarg1, sarg2, sres);
        #pragma omp dispatch device(devnum)
        vsHypotI  (length/stride, sarg1, stride, sarg2, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdHypot   (length, darg1, darg2, dres);
        #pragma omp dispatch device(devnum)
        vdHypotI  (length/stride, darg1, stride, darg2, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsHypot  (length, sarg1, sarg2, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsHypotI (length/stride, sarg1, stride, sarg2, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdHypot  (length, darg1, darg2, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdHypotI (length/stride, darg1, stride, darg2, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_hypot */

/**
 * NextAfter
 */
static void own_vm_nextafter (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg1 = in->sarg1, *sarg2 = in->sarg2, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg1 = in->darg1, *darg2 = in->darg2, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg1[0:length]) map(to:darg1[0:length]) map(to:sarg2[0:length]) map(to:darg2[0:length])\
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsNextAfter   (length, sarg1, sarg2, sres);
        #pragma omp dispatch device(devnum)
        vsNextAfterI  (length/stride, sarg1, stride, sarg2, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdNextAfter   (length, darg1, darg2, dres);
        #pragma omp dispatch device(devnum)
        vdNextAfterI  (length/stride, darg1, stride, darg2, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsNextAfter  (length, sarg1, sarg2, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsNextAfterI (length/stride, sarg1, stride, sarg2, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdNextAfter  (length, darg1, darg2, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdNextAfterI (length/stride, darg1, stride, darg2, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_nextafter */

/**
 * Powr
 */
static void own_vm_powr (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg1 = in->sarg1, *sarg2 = in->sarg2, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg1 = in->darg1, *darg2 = in->darg2, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg1[0:length]) map(to:darg1[0:length]) map(to:sarg2[0:length]) map(to:darg2[0:length])\
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsPowr   (length, sarg1, sarg2, sres);
        #pragma omp dispatch device(devnum)
        vsPowrI  (length/stride, sarg1, stride, sarg2, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdPowr   (length, darg1, darg2, dres);
        #pragma omp dispatch device(devnum)
        vdPowrI  (length/stride, darg1, stride, darg2, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsPowr  (length, sarg1, sarg2, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsPowrI (length/stride, sarg1, stride, sarg2, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdPowr  (length, darg1, darg2, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdPowrI (length/stride, darg1, stride, darg2, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_powr */

/**
 * Pow2o3
 */
static void own_vm_pow2o3 (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsPow2o3   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsPow2o3I  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdPow2o3   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdPow2o3I  (length/stride, darg, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsPow2o3  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsPow2o3I (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdPow2o3  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdPow2o3I (length/stride, darg, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_pow2o3 */

/**
 * Pow3o2
 */
static void own_vm_pow3o2 (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsPow3o2   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsPow3o2I  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdPow3o2   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdPow3o2I  (length/stride, darg, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsPow3o2  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsPow3o2I (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdPow3o2  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdPow3o2I (length/stride, darg, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_pow3o2 */

/**
 * Cbrt
 */
static void own_vm_cbrt (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsCbrt   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsCbrtI  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdCbrt   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdCbrtI  (length/stride, darg, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsCbrt  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsCbrtI (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdCbrt  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdCbrtI (length/stride, darg, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_cbrt */

/**
 * InvCbrt
 */
static void own_vm_invcbrt (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsInvCbrt   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsInvCbrtI  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdInvCbrt   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdInvCbrtI  (length/stride, darg, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsInvCbrt  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsInvCbrtI (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdInvCbrt  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdInvCbrtI (length/stride, darg, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_invcbrt */

/**
 * InvSqrt
 */
static void own_vm_invsqrt (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsInvSqrt   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsInvSqrtI  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdInvSqrt   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdInvSqrtI  (length/stride, darg, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsInvSqrt  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsInvSqrtI (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdInvSqrt  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdInvSqrtI (length/stride, darg, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_invsqrt */

/**
 * Remainder
 */
static void own_vm_remainder (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg1 = in->sarg1, *sarg2 = in->sarg2, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg1 = in->darg1, *darg2 = in->darg2, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg1[0:length]) map(to:darg1[0:length]) map(to:sarg2[0:length]) map(to:darg2[0:length])\
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsRemainder   (length, sarg1, sarg2, sres);
        #pragma omp dispatch device(devnum)
        vsRemainderI  (length/stride, sarg1, stride, sarg2, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdRemainder   (length, darg1, darg2, dres);
        #pragma omp dispatch device(devnum)
        vdRemainderI  (length/stride, darg1, stride, darg2, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsRemainder  (length, sarg1, sarg2, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsRemainderI (length/stride, sarg1, stride, sarg2, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdRemainder  (length, darg1, darg2, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdRemainderI (length/stride, darg1, stride, darg2, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_remainder */

/**
 * Div
 */
static void own_vm_div (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg1 = in->sarg1, *sarg2 = in->sarg2, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg1 = in->darg1, *darg2 = in->darg2, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];
    MKL_Complex8  *carg1 = (MKL_Complex8*)in->carg1,  *carg2 = (MKL_Complex8*)in->carg2,  *cres = (MKL_Complex8*)out->cres1[kFunc];
    MKL_Complex8  *cires = (MKL_Complex8*)out->cres1[kFuncI],  *cmres = (MKL_Complex8*)out->cres1[kMFunc],  *cmires = (MKL_Complex8*)out->cres1[kMFuncI];
    MKL_Complex16 *zarg1 = (MKL_Complex16*)in->zarg1, *zarg2 = (MKL_Complex16*)in->zarg2, *zres = (MKL_Complex16*)out->zres1[kFunc];
    MKL_Complex16 *zires = (MKL_Complex16*)out->zres1[kFuncI], *zmres = (MKL_Complex16*)out->zres1[kMFunc], *zmires = (MKL_Complex16*)out->zres1[kMFuncI];

    #pragma omp target data map(to:sarg1[0:length]) map(to:darg1[0:length]) map(to:sarg2[0:length]) map(to:darg2[0:length])\
                            map(to:carg1[0:length]) map(to:zarg1[0:length]) map(to:carg2[0:length]) map(to:zarg2[0:length])\
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            map(tofrom:cres[0:length]) map(tofrom:cires[0:length]) map(tofrom:cmres[0:length]) map(tofrom:cmires[0:length]) \
                            map(tofrom:zres[0:length]) map(tofrom:zires[0:length]) map(tofrom:zmres[0:length]) map(tofrom:zmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsDiv   (length, sarg1, sarg2, sres);
        #pragma omp dispatch device(devnum)
        vsDivI  (length/stride, sarg1, stride, sarg2, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdDiv   (length, darg1, darg2, dres);
        #pragma omp dispatch device(devnum)
        vdDivI  (length/stride, darg1, stride, darg2, stride, dires, stride);
        #pragma omp dispatch device(devnum)
        vcDiv   (length, carg1, carg2, cres);
        #pragma omp dispatch device(devnum)
        vcDivI  (length/stride, carg1, stride, carg2, stride, cires, stride);
        #pragma omp dispatch device(devnum)
        vzDiv   (length, zarg1, zarg2, zres);
        #pragma omp dispatch device(devnum)
        vzDivI  (length/stride, zarg1, stride, zarg2, stride, zires, stride);

        #pragma omp dispatch device(devnum)
        vmsDiv  (length, sarg1, sarg2, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsDivI (length/stride, sarg1, stride, sarg2, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdDiv  (length, darg1, darg2, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdDivI (length/stride, darg1, stride, darg2, stride, dmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmcDiv  (length, carg1, carg2, cmres, mode);
        #pragma omp dispatch device(devnum)
        vmcDivI (length/stride, carg1, stride, carg2, stride, cmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmzDiv  (length, zarg1, zarg2, zmres, mode);
        #pragma omp dispatch device(devnum)
        vmzDivI (length/stride, zarg1, stride, zarg2, stride, zmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_div */

/**
 * Pow
 */
static void own_vm_pow (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg1 = in->sarg1, *sarg2 = in->sarg2, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg1 = in->darg1, *darg2 = in->darg2, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];
    MKL_Complex8  *carg1 = (MKL_Complex8*)in->carg1,  *carg2 = (MKL_Complex8*)in->carg2,  *cres = (MKL_Complex8*)out->cres1[kFunc];
    MKL_Complex8  *cires = (MKL_Complex8*)out->cres1[kFuncI],  *cmres = (MKL_Complex8*)out->cres1[kMFunc],  *cmires = (MKL_Complex8*)out->cres1[kMFuncI];
    MKL_Complex16 *zarg1 = (MKL_Complex16*)in->zarg1, *zarg2 = (MKL_Complex16*)in->zarg2, *zres = (MKL_Complex16*)out->zres1[kFunc];
    MKL_Complex16 *zires = (MKL_Complex16*)out->zres1[kFuncI], *zmres = (MKL_Complex16*)out->zres1[kMFunc], *zmires = (MKL_Complex16*)out->zres1[kMFuncI];

    #pragma omp target data map(to:sarg1[0:length]) map(to:darg1[0:length]) map(to:sarg2[0:length]) map(to:darg2[0:length])\
                            map(to:carg1[0:length]) map(to:zarg1[0:length]) map(to:carg2[0:length]) map(to:zarg2[0:length])\
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            map(tofrom:cres[0:length]) map(tofrom:cires[0:length]) map(tofrom:cmres[0:length]) map(tofrom:cmires[0:length]) \
                            map(tofrom:zres[0:length]) map(tofrom:zires[0:length]) map(tofrom:zmres[0:length]) map(tofrom:zmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsPow   (length, sarg1, sarg2, sres);
        #pragma omp dispatch device(devnum)
        vsPowI  (length/stride, sarg1, stride, sarg2, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdPow   (length, darg1, darg2, dres);
        #pragma omp dispatch device(devnum)
        vdPowI  (length/stride, darg1, stride, darg2, stride, dires, stride);
        #pragma omp dispatch device(devnum)
        vcPow   (length, carg1, carg2, cres);
        #pragma omp dispatch device(devnum)
        vcPowI  (length/stride, carg1, stride, carg2, stride, cires, stride);
        #pragma omp dispatch device(devnum)
        vzPow   (length, zarg1, zarg2, zres);
        #pragma omp dispatch device(devnum)
        vzPowI  (length/stride, zarg1, stride, zarg2, stride, zires, stride);

        #pragma omp dispatch device(devnum)
        vmsPow  (length, sarg1, sarg2, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsPowI (length/stride, sarg1, stride, sarg2, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdPow  (length, darg1, darg2, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdPowI (length/stride, darg1, stride, darg2, stride, dmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmcPow  (length, carg1, carg2, cmres, mode);
        #pragma omp dispatch device(devnum)
        vmcPowI (length/stride, carg1, stride, carg2, stride, cmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmzPow  (length, zarg1, zarg2, zmres, mode);
        #pragma omp dispatch device(devnum)
        vmzPowI (length/stride, zarg1, stride, zarg2, stride, zmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_div */

/**
 * Powx
 */
static void own_vm_powx (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg1 = in->sarg1, sarg2 = fixed, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg1 = in->darg1, darg2 = fixed, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];
    MKL_Complex8  *carg1 = (MKL_Complex8*)in->carg1,  carg2 = {fixed, fixed}, *cres = (MKL_Complex8*)out->cres1[kFunc],  *cires = (MKL_Complex8*)out->cres1[kFuncI];
    MKL_Complex8  *cmres = (MKL_Complex8*)out->cres1[kMFunc],  *cmires = (MKL_Complex8*)out->cres1[kMFuncI];
    MKL_Complex16 *zarg1 = (MKL_Complex16*)in->zarg1, zarg2 = {fixed, fixed}, *zres = (MKL_Complex16*)out->zres1[kFunc], *zires = (MKL_Complex16*)out->zres1[kFuncI];
    MKL_Complex16 *zmres = (MKL_Complex16*)out->zres1[kMFunc], *zmires = (MKL_Complex16*)out->zres1[kMFuncI];

    #pragma omp target data map(to:sarg1[0:length]) map(to:darg1[0:length])\
                            map(to:carg1[0:length]) map(to:zarg1[0:length])\
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            map(tofrom:cres[0:length]) map(tofrom:cires[0:length]) map(tofrom:cmres[0:length]) map(tofrom:cmires[0:length]) \
                            map(tofrom:zres[0:length]) map(tofrom:zires[0:length]) map(tofrom:zmres[0:length]) map(tofrom:zmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsPowx   (length, sarg1, sarg2, sres);
        #pragma omp dispatch device(devnum)
        vsPowxI  (length/stride, sarg1, stride, sarg2, sires, stride);
        #pragma omp dispatch device(devnum)
        vdPowx   (length, darg1, darg2, dres);
        #pragma omp dispatch device(devnum)
        vdPowxI  (length/stride, darg1, stride, darg2, dires, stride);
        #pragma omp dispatch device(devnum)
        vcPowx   (length, carg1, carg2, cres);
        #pragma omp dispatch device(devnum)
        vcPowxI  (length/stride, carg1, stride, carg2, cires, stride);
        #pragma omp dispatch device(devnum)
        vzPowx   (length, zarg1, zarg2, zres);
        #pragma omp dispatch device(devnum)
        vzPowxI  (length/stride, zarg1, stride, zarg2, zires, stride);

        #pragma omp dispatch device(devnum)
        vmsPowx  (length, sarg1, sarg2, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsPowxI (length/stride, sarg1, stride, sarg2, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdPowx  (length, darg1, darg2, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdPowxI (length/stride, darg1, stride, darg2, dmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmcPowx  (length, carg1, carg2, cmres, mode);
        #pragma omp dispatch device(devnum)
        vmcPowxI (length/stride, carg1, stride, carg2, cmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmzPowx  (length, zarg1, zarg2, zmres, mode);
        #pragma omp dispatch device(devnum)
        vmzPowxI (length/stride, zarg1, stride, zarg2, zmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_powx */

/**
 * LinearFrac
 */
static void own_vm_linearfrac (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg1 = in->sarg1, *sarg2 = in->sarg2, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI];
    float  *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg1 = in->darg1, *darg2 = in->darg2, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI];
    double *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg1[0:length]) map(to:darg1[0:length]) map(to:sarg2[0:length]) map(to:darg2[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        float  sfixed = (float)fixed;
        double dfixed = (double)fixed;
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsLinearFrac   (length, sarg1, sarg2, sfixed, sfixed, sfixed, sfixed, sres);
        #pragma omp dispatch device(devnum)
        vsLinearFracI  (length/stride, sarg1, stride, sarg2, stride, sfixed, sfixed, sfixed, sfixed, sires, stride);
        #pragma omp dispatch device(devnum)
        vdLinearFrac   (length, darg1, darg2, dfixed, dfixed, dfixed, dfixed, dres);
        #pragma omp dispatch device(devnum)
        vdLinearFracI  (length/stride, darg1, stride, darg2, stride, dfixed, dfixed, dfixed, dfixed, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsLinearFrac  (length, sarg1, sarg2, sfixed, sfixed, sfixed, sfixed, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsLinearFracI (length/stride, sarg1, stride, sarg2, stride, sfixed, sfixed, sfixed, sfixed, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdLinearFrac  (length, darg1, darg2, dfixed, dfixed, dfixed, dfixed, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdLinearFracI (length/stride, darg1, stride, darg2, stride, dfixed, dfixed, dfixed, dfixed, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_linearfrac */

/**
 * Sqrt
 */
static void own_vm_sqrt (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];
    MKL_Complex8  *carg  = (MKL_Complex8*)in->carg1,  *cres = (MKL_Complex8*)out->cres1[kFunc],  *cires = (MKL_Complex8*)out->cres1[kFuncI];
    MKL_Complex8  *cmres = (MKL_Complex8*)out->cres1[kMFunc],  *cmires = (MKL_Complex8*)out->cres1[kMFuncI];
    MKL_Complex16 *zarg  = (MKL_Complex16*)in->zarg1, *zres = (MKL_Complex16*)out->zres1[kFunc], *zires = (MKL_Complex16*)out->zres1[kFuncI];
    MKL_Complex16 *zmres = (MKL_Complex16*)out->zres1[kMFunc], *zmires = (MKL_Complex16*)out->zres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(to:carg[0:length]) map(to:zarg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            map(tofrom:cres[0:length]) map(tofrom:cires[0:length]) map(tofrom:cmres[0:length]) map(tofrom:cmires[0:length]) \
                            map(tofrom:zres[0:length]) map(tofrom:zires[0:length]) map(tofrom:zmres[0:length]) map(tofrom:zmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsSqrt   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsSqrtI  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdSqrt   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdSqrtI  (length/stride, darg, stride, dires, stride);
        #pragma omp dispatch device(devnum)
        vcSqrt   (length, carg, cres);
        #pragma omp dispatch device(devnum)
        vcSqrtI  (length/stride, carg, stride, cires, stride);
        #pragma omp dispatch device(devnum)
        vzSqrt   (length, zarg, zres);
        #pragma omp dispatch device(devnum)
        vzSqrtI  (length/stride, zarg, stride, zires, stride);

        #pragma omp dispatch device(devnum)
        vmsSqrt  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsSqrtI (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdSqrt  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdSqrtI (length/stride, darg, stride, dmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmcSqrt  (length, carg, cmres, mode);
        #pragma omp dispatch device(devnum)
        vmcSqrtI (length/stride, carg, stride, cmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmzSqrt  (length, zarg, zmres, mode);
        #pragma omp dispatch device(devnum)
        vmzSqrtI (length/stride, zarg, stride, zmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_sqrt */

/**
 * Ln
 */
static void own_vm_ln (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];
    MKL_Complex8  *carg  = (MKL_Complex8*)in->carg1,  *cres = (MKL_Complex8*)out->cres1[kFunc],  *cires = (MKL_Complex8*)out->cres1[kFuncI];
    MKL_Complex8  *cmres = (MKL_Complex8*)out->cres1[kMFunc],  *cmires = (MKL_Complex8*)out->cres1[kMFuncI];
    MKL_Complex16 *zarg  = (MKL_Complex16*)in->zarg1, *zres = (MKL_Complex16*)out->zres1[kFunc], *zires = (MKL_Complex16*)out->zres1[kFuncI];
    MKL_Complex16 *zmres = (MKL_Complex16*)out->zres1[kMFunc], *zmires = (MKL_Complex16*)out->zres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(to:carg[0:length]) map(to:zarg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            map(tofrom:cres[0:length]) map(tofrom:cires[0:length]) map(tofrom:cmres[0:length]) map(tofrom:cmires[0:length]) \
                            map(tofrom:zres[0:length]) map(tofrom:zires[0:length]) map(tofrom:zmres[0:length]) map(tofrom:zmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsLn   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsLnI  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdLn   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdLnI  (length/stride, darg, stride, dires, stride);
        #pragma omp dispatch device(devnum)
        vcLn   (length, carg, cres);
        #pragma omp dispatch device(devnum)
        vcLnI  (length/stride, carg, stride, cires, stride);
        #pragma omp dispatch device(devnum)
        vzLn   (length, zarg, zres);
        #pragma omp dispatch device(devnum)
        vzLnI  (length/stride, zarg, stride, zires, stride);

        #pragma omp dispatch device(devnum)
        vmsLn  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsLnI (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdLn  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdLnI (length/stride, darg, stride, dmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmcLn  (length, carg, cmres, mode);
        #pragma omp dispatch device(devnum)
        vmcLnI (length/stride, carg, stride, cmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmzLn  (length, zarg, zmres, mode);
        #pragma omp dispatch device(devnum)
        vmzLnI (length/stride, zarg, stride, zmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_ln */

/**
 * Sind
 */
static void own_vm_sind (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsSind   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsSindI  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdSind   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdSindI  (length/stride, darg, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsSind  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsSindI (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdSind  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdSindI (length/stride, darg, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_sind */

/**
 * Cosd
 */
static void own_vm_cosd (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsCosd   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsCosdI  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdCosd   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdCosdI  (length/stride, darg, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsCosd  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsCosdI (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdCosd  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdCosdI (length/stride, darg, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_cosd */

/**
 * Tand
 */
static void own_vm_tand (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsTand   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsTandI  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdTand   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdTandI  (length/stride, darg, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsTand  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsTandI (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdTand  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdTandI (length/stride, darg, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_tand */

/**
 * Sinpi
 */
static void own_vm_sinpi (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsSinpi   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsSinpiI  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdSinpi   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdSinpiI  (length/stride, darg, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsSinpi  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsSinpiI (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdSinpi  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdSinpiI (length/stride, darg, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_sinpi */

/**
 * Cospi
 */
static void own_vm_cospi (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsCospi   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsCospiI  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdCospi   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdCospiI  (length/stride, darg, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsCospi  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsCospiI (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdCospi  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdCospiI (length/stride, darg, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_cospi */

/**
 * Tanpi
 */
static void own_vm_tanpi (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsTanpi   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsTanpiI  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdTanpi   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdTanpiI  (length/stride, darg, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsTanpi  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsTanpiI (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdTanpi  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdTanpiI (length/stride, darg, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_tanpi */

/**
 * SinCos
 */
static void own_vm_sincos (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres1 = out->sres1[kFunc], *sires1 = out->sres1[kFuncI], *smres1 = out->sres1[kMFunc], *smires1 = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres1 = out->dres1[kFunc], *dires1 = out->dres1[kFuncI], *dmres1 = out->dres1[kMFunc], *dmires1 = out->dres1[kMFuncI];
    float  *sres2 = out->sres2[kFunc], *sires2 = out->sres2[kFuncI], *smres2 = out->sres2[kMFunc], *smires2 = out->sres2[kMFuncI];
    double *dres2 = out->dres2[kFunc], *dires2 = out->dres2[kFuncI], *dmres2 = out->dres2[kMFunc], *dmires2 = out->dres2[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(tofrom:sres1[0:length]) map(tofrom:sires1[0:length]) map(tofrom:smres1[0:length]) map(tofrom:smires1[0:length]) \
                            map(tofrom:dres1[0:length]) map(tofrom:dires1[0:length]) map(tofrom:dmres1[0:length]) map(tofrom:dmires1[0:length]) \
                            map(tofrom:sres2[0:length]) map(tofrom:sires2[0:length]) map(tofrom:smres2[0:length]) map(tofrom:smires2[0:length]) \
                            map(tofrom:dres2[0:length]) map(tofrom:dires2[0:length]) map(tofrom:dmres2[0:length]) map(tofrom:dmires2[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsSinCos   (length, sarg, sres1, sres2);
        #pragma omp dispatch device(devnum)
        vsSinCosI  (length/stride, sarg, stride, sires1, stride, sires2, stride);
        #pragma omp dispatch device(devnum)
        vdSinCos   (length, darg, dres1, dres2);
        #pragma omp dispatch device(devnum)
        vdSinCosI  (length/stride, darg, stride, dires1, stride, dires2, stride);

        #pragma omp dispatch device(devnum)
        vmsSinCos  (length, sarg, smres1, smres2, mode);
        #pragma omp dispatch device(devnum)
        vmsSinCosI (length/stride, sarg, stride, smires1, stride, smires2, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdSinCos  (length, darg, dmres1, dmres2, mode);
        #pragma omp dispatch device(devnum)
        vmdSinCosI (length/stride, darg, stride, dmires1, stride, dmires2, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_sincos */

/**
 * Modf
 */
static void own_vm_modf (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres1 = out->sres1[kFunc], *sires1 = out->sres1[kFuncI], *smres1 = out->sres1[kMFunc], *smires1 = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres1 = out->dres1[kFunc], *dires1 = out->dres1[kFuncI], *dmres1 = out->dres1[kMFunc], *dmires1 = out->dres1[kMFuncI];
    float  *sres2 = out->sres2[kFunc], *sires2 = out->sres2[kFuncI], *smres2 = out->sres2[kMFunc], *smires2 = out->sres2[kMFuncI];
    double *dres2 = out->dres2[kFunc], *dires2 = out->dres2[kFuncI], *dmres2 = out->dres2[kMFunc], *dmires2 = out->dres2[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(tofrom:sres1[0:length]) map(tofrom:sires1[0:length]) map(tofrom:smres1[0:length]) map(tofrom:smires1[0:length]) \
                            map(tofrom:dres1[0:length]) map(tofrom:dires1[0:length]) map(tofrom:dmres1[0:length]) map(tofrom:dmires1[0:length]) \
                            map(tofrom:sres2[0:length]) map(tofrom:sires2[0:length]) map(tofrom:smres2[0:length]) map(tofrom:smires2[0:length]) \
                            map(tofrom:dres2[0:length]) map(tofrom:dires2[0:length]) map(tofrom:dmres2[0:length]) map(tofrom:dmires2[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsModf   (length, sarg, sres1, sres2);
        #pragma omp dispatch device(devnum)
        vsModfI  (length/stride, sarg, stride, sires1, stride, sires2, stride);
        #pragma omp dispatch device(devnum)
        vdModf   (length, darg, dres1, dres2);
        #pragma omp dispatch device(devnum)
        vdModfI  (length/stride, darg, stride, dires1, stride, dires2, stride);

        #pragma omp dispatch device(devnum)
        vmsModf  (length, sarg, smres1, smres2, mode);
        #pragma omp dispatch device(devnum)
        vmsModfI (length/stride, sarg, stride, smires1, stride, smires2, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdModf  (length, darg, dmres1, dmres2, mode);
        #pragma omp dispatch device(devnum)
        vmdModfI (length/stride, darg, stride, dmires1, stride, dmires2, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_modf */

/**
 * Conj
 */
static void own_vm_conj (int acc, VmInputData* in, VmOutputData* out)
{

    MKL_Complex8  *carg  = (MKL_Complex8*)in->carg1,  *cres = (MKL_Complex8*)out->cres1[kFunc],  *cires = (MKL_Complex8*)out->cres1[kFuncI];
    MKL_Complex8  *cmres = (MKL_Complex8*)out->cres1[kMFunc],  *cmires = (MKL_Complex8*)out->cres1[kMFuncI];
    MKL_Complex16 *zarg  = (MKL_Complex16*)in->zarg1, *zres = (MKL_Complex16*)out->zres1[kFunc], *zires = (MKL_Complex16*)out->zres1[kFuncI];
    MKL_Complex16 *zmres = (MKL_Complex16*)out->zres1[kMFunc], *zmires = (MKL_Complex16*)out->zres1[kMFuncI];

    #pragma omp target data map(to:carg[0:length]) map(to:zarg[0:length]) \
                            map(tofrom:cres[0:length]) map(tofrom:cires[0:length]) map(tofrom:cmres[0:length]) map(tofrom:cmires[0:length]) \
                            map(tofrom:zres[0:length]) map(tofrom:zires[0:length]) map(tofrom:zmres[0:length]) map(tofrom:zmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vcConj   (length, carg, cres);
        #pragma omp dispatch device(devnum)
        vcConjI  (length/stride, carg, stride, cires, stride);
        #pragma omp dispatch device(devnum)
        vzConj   (length, zarg, zres);
        #pragma omp dispatch device(devnum)
        vzConjI  (length/stride, zarg, stride, zires, stride);

        #pragma omp dispatch device(devnum)
        vmcConj  (length, carg, cmres, mode);
        #pragma omp dispatch device(devnum)
        vmcConjI (length/stride, carg, stride, cmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmzConj  (length, zarg, zmres, mode);
        #pragma omp dispatch device(devnum)
        vmzConjI (length/stride, zarg, stride, zmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_conj */

/**
 * MulByConj
 */
static void own_vm_mulbyconj (int acc, VmInputData* in, VmOutputData* out)
{

    MKL_Complex8  *carg1 = (MKL_Complex8*)in->carg1,  *carg2 = (MKL_Complex8*)in->carg2,  *cres = (MKL_Complex8*)out->cres1[kFunc];
    MKL_Complex8  *cires = (MKL_Complex8*)out->cres1[kFuncI],  *cmres = (MKL_Complex8*)out->cres1[kMFunc],  *cmires = (MKL_Complex8*)out->cres1[kMFuncI];
    MKL_Complex16 *zarg1 = (MKL_Complex16*)in->zarg1, *zarg2 = (MKL_Complex16*)in->zarg2, *zres = (MKL_Complex16*)out->zres1[kFunc];
    MKL_Complex16 *zires = (MKL_Complex16*)out->zres1[kFuncI], *zmres = (MKL_Complex16*)out->zres1[kMFunc], *zmires = (MKL_Complex16*)out->zres1[kMFuncI];

    #pragma omp target data map(to:carg1[0:length]) map(to:zarg1[0:length]) map(to:carg2[0:length]) map(to:zarg2[0:length])\
                            map(tofrom:cres[0:length]) map(tofrom:cires[0:length]) map(tofrom:cmres[0:length]) map(tofrom:cmires[0:length]) \
                            map(tofrom:zres[0:length]) map(tofrom:zires[0:length]) map(tofrom:zmres[0:length]) map(tofrom:zmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vcMulByConj   (length, carg1, carg2, cres);
        #pragma omp dispatch device(devnum)
        vcMulByConjI  (length/stride, carg1, stride, carg2, stride, cires, stride);
        #pragma omp dispatch device(devnum)
        vzMulByConj   (length, zarg1, zarg2, zres);
        #pragma omp dispatch device(devnum)
        vzMulByConjI  (length/stride, zarg1, stride, zarg2, stride, zires, stride);

        #pragma omp dispatch device(devnum)
        vmcMulByConj  (length, carg1, carg2, cmres, mode);
        #pragma omp dispatch device(devnum)
        vmcMulByConjI (length/stride, carg1, stride, carg2, stride, cmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmzMulByConj  (length, zarg1, zarg2, zmres, mode);
        #pragma omp dispatch device(devnum)
        vmzMulByConjI (length/stride, zarg1, stride, zarg2, stride, zmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_mulbyconj */

/**
 * Sin
 */
static void own_vm_sin (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];
    MKL_Complex8  *carg  = (MKL_Complex8*)in->carg1,  *cres = (MKL_Complex8*)out->cres1[kFunc],  *cires = (MKL_Complex8*)out->cres1[kFuncI];
    MKL_Complex8  *cmres = (MKL_Complex8*)out->cres1[kMFunc],  *cmires = (MKL_Complex8*)out->cres1[kMFuncI];
    MKL_Complex16 *zarg  = (MKL_Complex16*)in->zarg1, *zres = (MKL_Complex16*)out->zres1[kFunc], *zires = (MKL_Complex16*)out->zres1[kFuncI];
    MKL_Complex16 *zmres = (MKL_Complex16*)out->zres1[kMFunc], *zmires = (MKL_Complex16*)out->zres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(to:carg[0:length]) map(to:zarg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            map(tofrom:cres[0:length]) map(tofrom:cires[0:length]) map(tofrom:cmres[0:length]) map(tofrom:cmires[0:length]) \
                            map(tofrom:zres[0:length]) map(tofrom:zires[0:length]) map(tofrom:zmres[0:length]) map(tofrom:zmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsSin   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsSinI  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdSin   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdSinI  (length/stride, darg, stride, dires, stride);
        #pragma omp dispatch device(devnum)
        vcSin   (length, carg, cres);
        #pragma omp dispatch device(devnum)
        vcSinI  (length/stride, carg, stride, cires, stride);
        #pragma omp dispatch device(devnum)
        vzSin   (length, zarg, zres);
        #pragma omp dispatch device(devnum)
        vzSinI  (length/stride, zarg, stride, zires, stride);

        #pragma omp dispatch device(devnum)
        vmsSin  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsSinI (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdSin  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdSinI (length/stride, darg, stride, dmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmcSin  (length, carg, cmres, mode);
        #pragma omp dispatch device(devnum)
        vmcSinI (length/stride, carg, stride, cmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmzSin  (length, zarg, zmres, mode);
        #pragma omp dispatch device(devnum)
        vmzSinI (length/stride, zarg, stride, zmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_sin */

/**
 * Cos
 */
static void own_vm_cos (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];
    MKL_Complex8  *carg  = (MKL_Complex8*)in->carg1,  *cres = (MKL_Complex8*)out->cres1[kFunc],  *cires = (MKL_Complex8*)out->cres1[kFuncI];
    MKL_Complex8  *cmres = (MKL_Complex8*)out->cres1[kMFunc],  *cmires = (MKL_Complex8*)out->cres1[kMFuncI];
    MKL_Complex16 *zarg  = (MKL_Complex16*)in->zarg1, *zres = (MKL_Complex16*)out->zres1[kFunc], *zires = (MKL_Complex16*)out->zres1[kFuncI];
    MKL_Complex16 *zmres = (MKL_Complex16*)out->zres1[kMFunc], *zmires = (MKL_Complex16*)out->zres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(to:carg[0:length]) map(to:zarg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            map(tofrom:cres[0:length]) map(tofrom:cires[0:length]) map(tofrom:cmres[0:length]) map(tofrom:cmires[0:length]) \
                            map(tofrom:zres[0:length]) map(tofrom:zires[0:length]) map(tofrom:zmres[0:length]) map(tofrom:zmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsCos   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsCosI  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdCos   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdCosI  (length/stride, darg, stride, dires, stride);
        #pragma omp dispatch device(devnum)
        vcCos   (length, carg, cres);
        #pragma omp dispatch device(devnum)
        vcCosI  (length/stride, carg, stride, cires, stride);
        #pragma omp dispatch device(devnum)
        vzCos   (length, zarg, zres);
        #pragma omp dispatch device(devnum)
        vzCosI  (length/stride, zarg, stride, zires, stride);

        #pragma omp dispatch device(devnum)
        vmsCos  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsCosI (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdCos  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdCosI (length/stride, darg, stride, dmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmcCos  (length, carg, cmres, mode);
        #pragma omp dispatch device(devnum)
        vmcCosI (length/stride, carg, stride, cmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmzCos  (length, zarg, zmres, mode);
        #pragma omp dispatch device(devnum)
        vmzCosI (length/stride, zarg, stride, zmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_cos */

/**
 * Tan
 */
static void own_vm_tan (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];
    MKL_Complex8  *carg  = (MKL_Complex8*)in->carg1,  *cres = (MKL_Complex8*)out->cres1[kFunc],  *cires = (MKL_Complex8*)out->cres1[kFuncI];
    MKL_Complex8  *cmres = (MKL_Complex8*)out->cres1[kMFunc],  *cmires = (MKL_Complex8*)out->cres1[kMFuncI];
    MKL_Complex16 *zarg  = (MKL_Complex16*)in->zarg1, *zres = (MKL_Complex16*)out->zres1[kFunc], *zires = (MKL_Complex16*)out->zres1[kFuncI];
    MKL_Complex16 *zmres = (MKL_Complex16*)out->zres1[kMFunc], *zmires = (MKL_Complex16*)out->zres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(to:carg[0:length]) map(to:zarg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            map(tofrom:cres[0:length]) map(tofrom:cires[0:length]) map(tofrom:cmres[0:length]) map(tofrom:cmires[0:length]) \
                            map(tofrom:zres[0:length]) map(tofrom:zires[0:length]) map(tofrom:zmres[0:length]) map(tofrom:zmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsTan   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsTanI  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdTan   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdTanI  (length/stride, darg, stride, dires, stride);
        #pragma omp dispatch device(devnum)
        vcTan   (length, carg, cres);
        #pragma omp dispatch device(devnum)
        vcTanI  (length/stride, carg, stride, cires, stride);
        #pragma omp dispatch device(devnum)
        vzTan   (length, zarg, zres);
        #pragma omp dispatch device(devnum)
        vzTanI  (length/stride, zarg, stride, zires, stride);

        #pragma omp dispatch device(devnum)
        vmsTan  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsTanI (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdTan  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdTanI (length/stride, darg, stride, dmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmcTan  (length, carg, cmres, mode);
        #pragma omp dispatch device(devnum)
        vmcTanI (length/stride, carg, stride, cmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmzTan  (length, zarg, zmres, mode);
        #pragma omp dispatch device(devnum)
        vmzTanI (length/stride, zarg, stride, zmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_tan */

/**
 * Inv
 */
static void own_vm_inv (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsInv   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsInvI  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdInv   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdInvI  (length/stride, darg, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsInv  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsInvI (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdInv  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdInvI (length/stride, darg, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_inv */

/**
 * Sqr
 */
static void own_vm_sqr (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsSqr   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsSqrI  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdSqr   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdSqrI  (length/stride, darg, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsSqr  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsSqrI (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdSqr  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdSqrI (length/stride, darg, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_sqr */

/**
 * Frac
 */
static void own_vm_frac (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsFrac   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsFracI  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdFrac   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdFracI  (length/stride, darg, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsFrac  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsFracI (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdFrac  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdFracI (length/stride, darg, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_frac */

/**
 * Ceil
 */
static void own_vm_ceil (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsCeil   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsCeilI  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdCeil   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdCeilI  (length/stride, darg, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsCeil  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsCeilI (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdCeil  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdCeilI (length/stride, darg, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_ceil */

/**
 * Floor
 */
static void own_vm_floor (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsFloor   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsFloorI  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdFloor   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdFloorI  (length/stride, darg, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsFloor  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsFloorI (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdFloor  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdFloorI (length/stride, darg, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_floor */

/**
 * Round
 */
static void own_vm_round (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsRound   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsRoundI  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdRound   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdRoundI  (length/stride, darg, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsRound  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsRoundI (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdRound  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdRoundI (length/stride, darg, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_round */

/**
 * Trunc
 */
static void own_vm_trunc (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsTrunc   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsTruncI  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdTrunc   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdTruncI  (length/stride, darg, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsTrunc  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsTruncI (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdTrunc  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdTruncI (length/stride, darg, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_trunc */

/**
 * Rint
 */
static void own_vm_rint (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsRint   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsRintI  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdRint   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdRintI  (length/stride, darg, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsRint  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsRintI (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdRint  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdRintI (length/stride, darg, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_rint */

/**
 * NearbyInt
 */
static void own_vm_nearbyint (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsNearbyInt   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsNearbyIntI  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdNearbyInt   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdNearbyIntI  (length/stride, darg, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsNearbyInt  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsNearbyIntI (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdNearbyInt  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdNearbyIntI (length/stride, darg, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_nearbyint */

/**
 * Acospi
 */
static void own_vm_acospi (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsAcospi   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsAcospiI  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdAcospi   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdAcospiI  (length/stride, darg, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsAcospi  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsAcospiI (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdAcospi  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdAcospiI (length/stride, darg, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_acospi */

/**
 * Asinpi
 */
static void own_vm_asinpi (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsAsinpi   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsAsinpiI  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdAsinpi   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdAsinpiI  (length/stride, darg, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsAsinpi  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsAsinpiI (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdAsinpi  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdAsinpiI (length/stride, darg, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_asinpi */

/**
 * Atanpi
 */
static void own_vm_atanpi (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsAtanpi   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsAtanpiI  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdAtanpi   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdAtanpiI  (length/stride, darg, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsAtanpi  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsAtanpiI (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdAtanpi  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdAtanpiI (length/stride, darg, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_atanpi */

/**
 * Sinh
 */
static void own_vm_sinh (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];
    MKL_Complex8  *carg  = (MKL_Complex8*)in->carg1,  *cres = (MKL_Complex8*)out->cres1[kFunc],  *cires = (MKL_Complex8*)out->cres1[kFuncI];
    MKL_Complex8  *cmres = (MKL_Complex8*)out->cres1[kMFunc],  *cmires = (MKL_Complex8*)out->cres1[kMFuncI];
    MKL_Complex16 *zarg  = (MKL_Complex16*)in->zarg1, *zres = (MKL_Complex16*)out->zres1[kFunc], *zires = (MKL_Complex16*)out->zres1[kFuncI];
    MKL_Complex16 *zmres = (MKL_Complex16*)out->zres1[kMFunc], *zmires = (MKL_Complex16*)out->zres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(to:carg[0:length]) map(to:zarg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            map(tofrom:cres[0:length]) map(tofrom:cires[0:length]) map(tofrom:cmres[0:length]) map(tofrom:cmires[0:length]) \
                            map(tofrom:zres[0:length]) map(tofrom:zires[0:length]) map(tofrom:zmres[0:length]) map(tofrom:zmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsSinh   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsSinhI  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdSinh   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdSinhI  (length/stride, darg, stride, dires, stride);
        #pragma omp dispatch device(devnum)
        vcSinh   (length, carg, cres);
        #pragma omp dispatch device(devnum)
        vcSinhI  (length/stride, carg, stride, cires, stride);
        #pragma omp dispatch device(devnum)
        vzSinh   (length, zarg, zres);
        #pragma omp dispatch device(devnum)
        vzSinhI  (length/stride, zarg, stride, zires, stride);

        #pragma omp dispatch device(devnum)
        vmsSinh  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsSinhI (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdSinh  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdSinhI (length/stride, darg, stride, dmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmcSinh  (length, carg, cmres, mode);
        #pragma omp dispatch device(devnum)
        vmcSinhI (length/stride, carg, stride, cmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmzSinh  (length, zarg, zmres, mode);
        #pragma omp dispatch device(devnum)
        vmzSinhI (length/stride, zarg, stride, zmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_sinh */

/**
 * Cosh
 */
static void own_vm_cosh (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];
    MKL_Complex8  *carg  = (MKL_Complex8*)in->carg1,  *cres = (MKL_Complex8*)out->cres1[kFunc],  *cires = (MKL_Complex8*)out->cres1[kFuncI];
    MKL_Complex8  *cmres = (MKL_Complex8*)out->cres1[kMFunc],  *cmires = (MKL_Complex8*)out->cres1[kMFuncI];
    MKL_Complex16 *zarg  = (MKL_Complex16*)in->zarg1, *zres = (MKL_Complex16*)out->zres1[kFunc], *zires = (MKL_Complex16*)out->zres1[kFuncI];
    MKL_Complex16 *zmres = (MKL_Complex16*)out->zres1[kMFunc], *zmires = (MKL_Complex16*)out->zres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(to:carg[0:length]) map(to:zarg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            map(tofrom:cres[0:length]) map(tofrom:cires[0:length]) map(tofrom:cmres[0:length]) map(tofrom:cmires[0:length]) \
                            map(tofrom:zres[0:length]) map(tofrom:zires[0:length]) map(tofrom:zmres[0:length]) map(tofrom:zmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsCosh   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsCoshI  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdCosh   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdCoshI  (length/stride, darg, stride, dires, stride);
        #pragma omp dispatch device(devnum)
        vcCosh   (length, carg, cres);
        #pragma omp dispatch device(devnum)
        vcCoshI  (length/stride, carg, stride, cires, stride);
        #pragma omp dispatch device(devnum)
        vzCosh   (length, zarg, zres);
        #pragma omp dispatch device(devnum)
        vzCoshI  (length/stride, zarg, stride, zires, stride);

        #pragma omp dispatch device(devnum)
        vmsCosh  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsCoshI (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdCosh  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdCoshI (length/stride, darg, stride, dmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmcCosh  (length, carg, cmres, mode);
        #pragma omp dispatch device(devnum)
        vmcCoshI (length/stride, carg, stride, cmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmzCosh  (length, zarg, zmres, mode);
        #pragma omp dispatch device(devnum)
        vmzCoshI (length/stride, zarg, stride, zmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_cosh */

/**
 * Tanh
 */
static void own_vm_tanh (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];
    MKL_Complex8  *carg  = (MKL_Complex8*)in->carg1,  *cres = (MKL_Complex8*)out->cres1[kFunc],  *cires = (MKL_Complex8*)out->cres1[kFuncI];
    MKL_Complex8  *cmres = (MKL_Complex8*)out->cres1[kMFunc],  *cmires = (MKL_Complex8*)out->cres1[kMFuncI];
    MKL_Complex16 *zarg  = (MKL_Complex16*)in->zarg1, *zres = (MKL_Complex16*)out->zres1[kFunc], *zires = (MKL_Complex16*)out->zres1[kFuncI];
    MKL_Complex16 *zmres = (MKL_Complex16*)out->zres1[kMFunc], *zmires = (MKL_Complex16*)out->zres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(to:carg[0:length]) map(to:zarg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            map(tofrom:cres[0:length]) map(tofrom:cires[0:length]) map(tofrom:cmres[0:length]) map(tofrom:cmires[0:length]) \
                            map(tofrom:zres[0:length]) map(tofrom:zires[0:length]) map(tofrom:zmres[0:length]) map(tofrom:zmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsTanh   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsTanhI  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdTanh   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdTanhI  (length/stride, darg, stride, dires, stride);
        #pragma omp dispatch device(devnum)
        vcTanh   (length, carg, cres);
        #pragma omp dispatch device(devnum)
        vcTanhI  (length/stride, carg, stride, cires, stride);
        #pragma omp dispatch device(devnum)
        vzTanh   (length, zarg, zres);
        #pragma omp dispatch device(devnum)
        vzTanhI  (length/stride, zarg, stride, zires, stride);

        #pragma omp dispatch device(devnum)
        vmsTanh  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsTanhI (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdTanh  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdTanhI (length/stride, darg, stride, dmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmcTanh  (length, carg, cmres, mode);
        #pragma omp dispatch device(devnum)
        vmcTanhI (length/stride, carg, stride, cmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmzTanh  (length, zarg, zmres, mode);
        #pragma omp dispatch device(devnum)
        vmzTanhI (length/stride, zarg, stride, zmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_tanh */

/**
 * Exp
 */
static void own_vm_exp (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];
    MKL_Complex8  *carg  = (MKL_Complex8*)in->carg1,  *cres = (MKL_Complex8*)out->cres1[kFunc],  *cires = (MKL_Complex8*)out->cres1[kFuncI];
    MKL_Complex8  *cmres = (MKL_Complex8*)out->cres1[kMFunc],  *cmires = (MKL_Complex8*)out->cres1[kMFuncI];
    MKL_Complex16 *zarg  = (MKL_Complex16*)in->zarg1, *zres = (MKL_Complex16*)out->zres1[kFunc], *zires = (MKL_Complex16*)out->zres1[kFuncI];
    MKL_Complex16 *zmres = (MKL_Complex16*)out->zres1[kMFunc], *zmires = (MKL_Complex16*)out->zres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(to:carg[0:length]) map(to:zarg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            map(tofrom:cres[0:length]) map(tofrom:cires[0:length]) map(tofrom:cmres[0:length]) map(tofrom:cmires[0:length]) \
                            map(tofrom:zres[0:length]) map(tofrom:zires[0:length]) map(tofrom:zmres[0:length]) map(tofrom:zmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsExp   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsExpI  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdExp   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdExpI  (length/stride, darg, stride, dires, stride);
        #pragma omp dispatch device(devnum)
        vcExp   (length, carg, cres);
        #pragma omp dispatch device(devnum)
        vcExpI  (length/stride, carg, stride, cires, stride);
        #pragma omp dispatch device(devnum)
        vzExp   (length, zarg, zres);
        #pragma omp dispatch device(devnum)
        vzExpI  (length/stride, zarg, stride, zires, stride);

        #pragma omp dispatch device(devnum)
        vmsExp  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsExpI (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdExp  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdExpI (length/stride, darg, stride, dmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmcExp  (length, carg, cmres, mode);
        #pragma omp dispatch device(devnum)
        vmcExpI (length/stride, carg, stride, cmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmzExp  (length, zarg, zmres, mode);
        #pragma omp dispatch device(devnum)
        vmzExpI (length/stride, zarg, stride, zmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_exp */

/**
 * Exp2
 */
static void own_vm_exp2 (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsExp2   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsExp2I  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdExp2   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdExp2I  (length/stride, darg, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsExp2  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsExp2I (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdExp2  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdExp2I (length/stride, darg, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_exp2 */

/**
 * Exp10
 */
static void own_vm_exp10 (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsExp10   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsExp10I  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdExp10   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdExp10I  (length/stride, darg, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsExp10  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsExp10I (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdExp10  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdExp10I (length/stride, darg, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_exp10 */

/**
 * Expm1
 */
static void own_vm_expm1 (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsExpm1   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsExpm1I  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdExpm1   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdExpm1I  (length/stride, darg, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsExpm1  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsExpm1I (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdExpm1  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdExpm1I (length/stride, darg, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_expm1 */

/**
 * Log2
 */
static void own_vm_log2 (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsLog2   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsLog2I  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdLog2   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdLog2I  (length/stride, darg, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsLog2  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsLog2I (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdLog2  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdLog2I (length/stride, darg, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_log2 */

/**
 * Log10
 */
static void own_vm_log10 (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];
    MKL_Complex8  *carg  = (MKL_Complex8*)in->carg1,  *cres = (MKL_Complex8*)out->cres1[kFunc],  *cires = (MKL_Complex8*)out->cres1[kFuncI];
    MKL_Complex8  *cmres = (MKL_Complex8*)out->cres1[kMFunc],  *cmires = (MKL_Complex8*)out->cres1[kMFuncI];
    MKL_Complex16 *zarg  = (MKL_Complex16*)in->zarg1, *zres = (MKL_Complex16*)out->zres1[kFunc], *zires = (MKL_Complex16*)out->zres1[kFuncI];
    MKL_Complex16 *zmres = (MKL_Complex16*)out->zres1[kMFunc], *zmires = (MKL_Complex16*)out->zres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(to:carg[0:length]) map(to:zarg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            map(tofrom:cres[0:length]) map(tofrom:cires[0:length]) map(tofrom:cmres[0:length]) map(tofrom:cmires[0:length]) \
                            map(tofrom:zres[0:length]) map(tofrom:zires[0:length]) map(tofrom:zmres[0:length]) map(tofrom:zmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsLog10   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsLog10I  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdLog10   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdLog10I  (length/stride, darg, stride, dires, stride);
        #pragma omp dispatch device(devnum)
        vcLog10   (length, carg, cres);
        #pragma omp dispatch device(devnum)
        vcLog10I  (length/stride, carg, stride, cires, stride);
        #pragma omp dispatch device(devnum)
        vzLog10   (length, zarg, zres);
        #pragma omp dispatch device(devnum)
        vzLog10I  (length/stride, zarg, stride, zires, stride);

        #pragma omp dispatch device(devnum)
        vmsLog10  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsLog10I (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdLog10  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdLog10I (length/stride, darg, stride, dmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmcLog10  (length, carg, cmres, mode);
        #pragma omp dispatch device(devnum)
        vmcLog10I (length/stride, carg, stride, cmires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmzLog10  (length, zarg, zmres, mode);
        #pragma omp dispatch device(devnum)
        vmzLog10I (length/stride, zarg, stride, zmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_log10 */

/**
 * Log1p
 */
static void own_vm_log1p (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsLog1p   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsLog1pI  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdLog1p   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdLog1pI  (length/stride, darg, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsLog1p  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsLog1pI (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdLog1p  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdLog1pI (length/stride, darg, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_log1p */

/**
 * Erf
 */
static void own_vm_erf (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsErf   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsErfI  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdErf   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdErfI  (length/stride, darg, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsErf  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsErfI (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdErf  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdErfI (length/stride, darg, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_erf */

/**
 * Erfc
 */
static void own_vm_erfc (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsErfc   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsErfcI  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdErfc   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdErfcI  (length/stride, darg, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsErfc  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsErfcI (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdErfc  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdErfcI (length/stride, darg, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_erfc */

/**
 * ErfInv
 */
static void own_vm_erfinv (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsErfInv   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsErfInvI  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdErfInv   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdErfInvI  (length/stride, darg, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsErfInv  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsErfInvI (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdErfInv  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdErfInvI (length/stride, darg, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_erfinv */

/**
 * ErfcInv
 */
static void own_vm_erfcinv (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsErfcInv   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsErfcInvI  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdErfcInv   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdErfcInvI  (length/stride, darg, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsErfcInv  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsErfcInvI (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdErfcInv  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdErfcInvI (length/stride, darg, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_erfcinv */

/**
 * CdfNorm
 */
static void own_vm_cdfnorm (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsCdfNorm   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsCdfNormI  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdCdfNorm   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdCdfNormI  (length/stride, darg, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsCdfNorm  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsCdfNormI (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdCdfNorm  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdCdfNormI (length/stride, darg, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_cdfnorm */

/**
 * CdfNormInv
 */
static void own_vm_cdfnorminv (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsCdfNormInv   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsCdfNormInvI  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdCdfNormInv   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdCdfNormInvI  (length/stride, darg, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsCdfNormInv  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsCdfNormInvI (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdCdfNormInv  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdCdfNormInvI (length/stride, darg, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_cdfnorminv */

/**
 * LGamma
 */
static void own_vm_lgamma (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsLGamma   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsLGammaI  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdLGamma   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdLGammaI  (length/stride, darg, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsLGamma  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsLGammaI (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdLGamma  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdLGammaI (length/stride, darg, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_lgamma */

/**
 * TGamma
 */
static void own_vm_tgamma (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsTGamma   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsTGammaI  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdTGamma   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdTGammaI  (length/stride, darg, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsTGamma  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsTGammaI (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdTGamma  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdTGammaI (length/stride, darg, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_tgamma */

/**
 * ExpInt1
 */
static void own_vm_expint1 (int acc, VmInputData* in, VmOutputData* out)
{

    float  *sarg = in->sarg1, *sres = out->sres1[kFunc], *sires = out->sres1[kFuncI], *smres = out->sres1[kMFunc], *smires = out->sres1[kMFuncI];
    double *darg = in->darg1, *dres = out->dres1[kFunc], *dires = out->dres1[kFuncI], *dmres = out->dres1[kMFunc], *dmires = out->dres1[kMFuncI];

    #pragma omp target data map(to:sarg[0:length]) map(to:darg[0:length]) \
                            map(tofrom:sres[0:length]) map(tofrom:sires[0:length]) map(tofrom:smres[0:length]) map(tofrom:smires[0:length]) \
                            map(tofrom:dres[0:length]) map(tofrom:dires[0:length]) map(tofrom:dmres[0:length]) map(tofrom:dmires[0:length]) \
                            device(devnum)
    {
        int mode = vm_mode[acc];

        #pragma omp dispatch device(devnum)
        vmlSetMode(mode);

        #pragma omp dispatch device(devnum)
        vsExpInt1   (length, sarg, sres);
        #pragma omp dispatch device(devnum)
        vsExpInt1I  (length/stride, sarg, stride, sires, stride);
        #pragma omp dispatch device(devnum)
        vdExpInt1   (length, darg, dres);
        #pragma omp dispatch device(devnum)
        vdExpInt1I  (length/stride, darg, stride, dires, stride);

        #pragma omp dispatch device(devnum)
        vmsExpInt1  (length, sarg, smres, mode);
        #pragma omp dispatch device(devnum)
        vmsExpInt1I (length/stride, sarg, stride, smires, stride, mode);
        #pragma omp dispatch device(devnum)
        vmdExpInt1  (length, darg, dmres, mode);
        #pragma omp dispatch device(devnum)
        vmdExpInt1I (length/stride, darg, stride, dmires, stride, mode);

    }
    // #pragma omp taskwait

    return;
} /* own_vm_expint1 */

/**
 * Custom reference functions absent in LIBM:
 */
static double own_frac (double x) { return (x - trunc (x)); } /* signed fractional part */
static double own_invcbrt (double x) { return pow(x, -(1.0/3.0)); } /* inverse cube root */
static double own_pow2o3 (double x) { return pow(x, 2.0 / 3.0); } /* power x^(2/3) */
static double own_pow3o2 (double x) { return pow(x, 3.0 / 2.0); } /* power x^(3/2) */
static double own_inv (double x) { return 1.0 / x; } /* inverse 1/x */
static double own_sqr (double x) { return x * x; } /* square x*x */
static double own_add (double x, double y) { return x + y; } /* addition x+y */
static double own_sub (double x, double y) { return x - y; } /* subtraction x-y */
static double own_mul (double x, double y) { return x * y; } /* multiplication x-y */
static double own_div (double x, double y) { return x / y; } /* division x/y */
static double own_expint1 (double x) { double r; vmdExpInt1(1,&x, &r, VML_HA); return r; } /* exponential integral - exists in VM only */
static void   own_modf (double x, double* r1, double* r2) { *r2 = modf(x, r1); } /* truncated integer value |x| and the remaining fraction part x-|x| */
static double own_linearfrac (double x, double y, double a, double b, double c, double d) { return (x * a + b)/(y * c + d); } /* linear fraction (x*a + b)/(y*c + d)*/
static double own_minmag (double x, double y) { return (fabs(x) < fabs(y))?x:y; } /* min(|x|,|y|) */
static double own_maxmag (double x, double y) { return (fabs(x) > fabs(y))?x:y; } /* max(|x|,|y|) */
static void   own_sincos (double x, double* r1, double* r2) { *r1 = sin(x); *r2 = cos(x); return; } /* sin & cos */
static double own_invsqrt (double x) { return 1.0 / sqrt(x); } /* 1/sqrt(x) */

static double complex own_cadd (double complex x, double complex y) { return x + y; } /* complex x+y */
static double complex own_csub (double complex x, double complex y) { return x - y; } /* complex x+y */
static double complex own_cmul (double complex x, double complex y) { return x * y; } /* complex x*y */
static double complex own_cdiv (double complex x, double complex y) { return x / y; } /* complex x+y */
static double own_cabs (double complex x) { return hypot(creal(x),cimag(x)); } /* |x| */
static double own_carg (double complex x) { return atan2(cimag(x),creal(x)); } /* complex argument (atan2) */
static double complex own_cis (double x) { double r1, r2; own_sincos(x, &r2, &r1); double complex r = r1 + I * r2; return r; } /* complex CIS (sincos) */
static double complex own_cmulbyconj (double complex x, double complex y) { return x * conj(y); } /* complex x*conj(y) */

/**
 * @brief Safe malloc
 *
 * own_safe_malloc allocates memory and check resulted pointer.
 * Report error and exit application if unsuccessful.
 *
 * @param[in] size          Size in bytes
 * @return                  Pointer to allocated memory
 *
 */
static void* own_safe_malloc(int size)
{
    void* ptr = malloc (size);
    if (ptr == NULL)
    {
       fprintf (stderr, "\t\tERROR: %d bytes allocated unsuccessfully\n", size);
       exit(-1);
    }

    return ptr;
} /* own_safe_malloc */

/**
 * @brief Safe free
 *
 * own_safe_free deallocates memory.
 * Report error if NULL pointer passed.
 *
 * @param[in] ptr          Pointer to memory
 *
 */
static void own_safe_free(void *ptr)
{
    if (ptr != NULL) { free (ptr); }
    else
    {
       fprintf (stderr, "\t\tERROR: NULL pointer cannot be deallocated\n");
       exit(-1);
    }

    return;
} /* own_safe_free */

/**
 * @brief Allocate all input and reference result arrays
 *
 * Safe allocation of input and reference result arrays memory
 *
 * @param[in] len           Arrays length
 * @param[in, out] in       Input and reference resutl arrays
 * @param[in, out] out      Output arrays
 *
 */
static void own_allocate_data (int len, VmInputData* in, VmOutputData* out)
{
    in->sarg1    =  own_safe_malloc (len * sizeof (float));
    in->sarg2    =  own_safe_malloc (len * sizeof (float));
    in->darg1    =  own_safe_malloc (len * sizeof (double));
    in->darg2    =  own_safe_malloc (len * sizeof (double));
    in->carg1    =  own_safe_malloc (len * sizeof (float complex));
    in->carg2    =  own_safe_malloc (len * sizeof (float complex));
    in->zarg1    =  own_safe_malloc (len * sizeof (double complex));
    in->zarg2    =  own_safe_malloc (len * sizeof (double complex));
    in->sref1    =  own_safe_malloc (len * sizeof (double));
    in->sref2    =  own_safe_malloc (len * sizeof (double));
    in->dref1    =  own_safe_malloc (len * sizeof (double));
    in->dref2    =  own_safe_malloc (len * sizeof (double));
    in->cref1    =  own_safe_malloc (len * sizeof (double complex));
    in->cref2    =  own_safe_malloc (len * sizeof (double complex));
    in->zref1    =  own_safe_malloc (len * sizeof (double complex));
    in->zref2    =  own_safe_malloc (len * sizeof (double complex));

    for (int v = kFunc; v < kApiNum; v = v + 1)
    {
        out->sres1[v]   =  own_safe_malloc (len * sizeof (float));
        out->sres2[v]   =  own_safe_malloc (len * sizeof (float));
        out->dres1[v]   =  own_safe_malloc (len * sizeof (double));
        out->dres2[v]   =  own_safe_malloc (len * sizeof (double));
        out->cres1[v]   =  own_safe_malloc (len * sizeof (float complex));
        out->cres2[v]   =  own_safe_malloc (len * sizeof (float complex));
        out->zres1[v]   =  own_safe_malloc (len * sizeof (double complex));
        out->zres2[v]   =  own_safe_malloc (len * sizeof (double complex));
    }
} /* own_allocate_data */

/**
 * @brief Deallocate all input and reference result arrays
 *
 * Safe deallocation of input and reference result arrays memory
 *
 * @param[in, out] in       Input and reference resutl arrays
 * @param[in, out] out      Output arrays
 *
 */
static void own_deallocate_data (VmInputData* in, VmOutputData* out)
{
    own_safe_free (in->sarg1);
    own_safe_free (in->sarg2);
    own_safe_free (in->darg1);
    own_safe_free (in->darg2);
    own_safe_free (in->carg1);
    own_safe_free (in->carg2);
    own_safe_free (in->zarg1);
    own_safe_free (in->zarg2);
    own_safe_free (in->sref1);
    own_safe_free (in->sref2);
    own_safe_free (in->dref1);
    own_safe_free (in->dref2);
    own_safe_free (in->cref1);
    own_safe_free (in->cref2);
    own_safe_free (in->zref1);
    own_safe_free (in->zref2);

    for (int v = kFunc; v <= kMFuncI; v++)
    {
        own_safe_free (out->sres1[v]);
        own_safe_free (out->sres2[v]);
        own_safe_free (out->dres1[v]);
        own_safe_free (out->dres2[v]);
        own_safe_free (out->cres1[v]);
        own_safe_free (out->cres2[v]);
        own_safe_free (out->zres1[v]);
        own_safe_free (out->zres2[v]);
    }

    return;
} /* own_deallocate_data */

/**
 * @brief Single precision ULP calculation
 *
 * Computes ULP between result and reference value
 *
 * @param[in] res          Computed result
 * @param[in] ref          Reference result
 * @return                 Calculated ULP
 *
 */
static float own_s_compute_ulp (float res, double ref )
{
    int ex       = 0;
    double den   = 1.0;
    float curulp = 0.0;
    if (isfinite (ref))
    {
        frexp (ref, &ex);                                     /* ex: integral power of two of ref */
        den = ldexp (1.0, ex - 24);                           /* den: ulp's denominator 2^(ex-p+1) */
        den = (den == 0.0)? 0x1.p-149 : den;                  /* if den=0 then replace by EPS to avoid divbyzero */
        curulp = (float)fabs ((((double)(res) - ref)) / den); /* |res-ref|/2^(ex-24) */
        curulp = isfinite (curulp)?curulp : FLT_MAX;         /* replace infinite ulp by big finite float number */
    }
    else
    {
        if (fpclassify (res) == (fpclassify (ref))) { curulp = 0; }
        else { curulp = FLT_MAX; }
    }

    return curulp;
} /* own_s_compute_ulp */

/**
 * @brief Complex single precision ULP calculation
 *
 * Computes ULP between result and reference value
 *
 * @param[in] res          Computed result
 * @param[in] ref          Reference result
 * @return                 Calculated ULP
 *
 */
static float own_c_compute_ulp (float complex res, double complex ref )
{
    return fmaxf (own_s_compute_ulp(creal(res), creal(ref)), own_s_compute_ulp(cimag(res), cimag(ref)));
}

/**
 * @brief Double precision ULP calculation
 *
 * Computes ULP between result and reference value
 *
 * @param[in] res          Computed result
 * @param[in] ref          Reference result
 * @return                 Calculated ULP
 *
 */
static double own_d_compute_ulp (double res, double ref )
{
    int ex        = 0;
    double den    = 1.0;
    double curulp = 0.0;
    if (isfinite (ref))
    {
        frexp (ref, &ex);                                      /* ex: integral power of two of ref */
        den = ldexp (1.0, ex - 53);                            /* den: ulp's denominator 2^(ex-p+1) */
        den = (den == 0.0)? 0x1.p-1074 : den;                  /* if den=0 then replace by EPS to avoid divbyzero */
        curulp = (double)fabs ((((double)(res) - ref)) / den); /* |res-ref|/2^(ex-53) */
        curulp = isfinite (curulp)?curulp : DBL_MAX;           /* replace infinite ulp by big finite double number */
    }
    else
    {
        if (fpclassify (res) == (fpclassify (ref))) { curulp = 0; }
        else { curulp = DBL_MAX; }
    }

    return curulp;
} /* own_d_compute_ulp */

/**
 * @brief Complex double precision ULP calculation
 *
 * Computes ULP between result and reference value
 *
 * @param[in] res          Computed result
 * @param[in] ref          Reference result
 * @return                 Calculated ULP
 *
 */
static double own_z_compute_ulp (double complex res, double complex ref )
{
    return fmax (own_d_compute_ulp(creal(res), creal(ref)), own_d_compute_ulp(cimag(res), cimag(ref)));
}

/**
 * @brief Fill inputs
 *
 * Fills input vectors by random numbers
 *
 * @param[in] layout       Function arguments layout
 * @param[in] beg          Begin of input ranges
 * @param[in] end          End of input ranges
 * @param[out] in          Input arrays
 *
 */
static void own_fill_input (int layout, double beg, double end, VmInputData* in)
{
    srand(777);
    for (int i = 0; i < length; i = i + 1)
    {
        /**
         * Generate random numbers in [beg, end) range
         */
        double v1 = (beg + (end - beg) * (double)(rand ()) / (double)(RAND_MAX));
        double v2 = (beg + (end - beg) * (double)(rand ()) / (double)(RAND_MAX));
        double v3 = (beg + (end - beg) * (double)(rand ()) / (double)(RAND_MAX));
        double v4 = (beg + (end - beg) * (double)(rand ()) / (double)(RAND_MAX));

        in->darg1[i] = v1;
        in->darg2[i] = v2;
        in->sarg1[i] = (float)v1;
        in->sarg2[i] = (float)v2;

        in->zarg1[i] = v1 + I * v3;
        in->zarg2[i] = v2 + I * v4;
        in->carg1[i] = (float)v1 + I * (float)v3;
        in->carg2[i] = (float)v2 + I * (float)v4;
    } /* for (int i = 0; i < length; i++) */
} /* own_fill_input */

/**
 * @brief Fill reference vectors
 *
 * Compute reference results
 *
 * @param[in] layout       Function arguments layout
 * @param[in] reffunc      Real reference function
 * @param[in] creffunc     Complex reference function
 * @param[out] in          Input and reference results arrays
 *
 */
static void own_fill_reference (int layout, void* reffunc, void* creffunc, VmInputData* in)
{
    int has_real    = (reffunc != NULL)?1:0, has_complex = (creffunc != NULL)?1:0;
    for (int i = 0; i < length; i = i + 1)
    {
        if (layout == kVtoV)
        {
            if (has_real)
            {
                in->sref1[i] = ((RefVtoV)reffunc) (in->sarg1[i]);
                in->dref1[i] = ((RefVtoV)reffunc) (in->darg1[i]);
            }
            if (has_complex)
            {
                in->cref1[i] = ((CRefVtoV)creffunc) ((double complex) (in->carg1[i]));
                in->zref1[i] = ((CRefVtoV)creffunc) ( in->zarg1[i]);
            }
        }
        else if (layout == kVCtoVR)
        {
            if (has_real)
            {
                in->sref1[i] = ((RefVtoV)reffunc) (in->sarg1[i]);
                in->dref1[i] = ((RefVtoV)reffunc) (in->darg1[i]);
            }
            if (has_complex)
            {
                /* Use complex array containers to accept real results */
                ((double*)(in->cref1))[i] = ((CRefCtoR)creffunc) ((double complex) (in->carg1[i]));
                ((double*)(in->zref1))[i] = ((CRefCtoR)creffunc) (in->zarg1[i]);
            }
        }
        else if (layout == kVRtoVC)
        {
            if (has_real)
            {
                /* Use real arrays for kVRtoVC functions */
                in->sref1[i] = ((RefVtoV)reffunc) (in->sarg1[i]);
                in->dref1[i] = ((RefVtoV)reffunc) (in->darg1[i]);
            }
            if (has_complex)
            {
                in->cref1[i] = ((CRefRtoC)creffunc) (in->sarg1[i]);
                in->zref1[i] = ((CRefRtoC)creffunc) (in->darg1[i]);
            }
        }
        else if (layout == kVVtoV)
        {
            if (has_real)
            {
                in->sref1[i] = ((RefVVtoV)reffunc) (in->sarg1[i], in->sarg2[i]);
                in->dref1[i] = ((RefVVtoV)reffunc) (in->darg1[i], in->darg2[i]);
            }
            if (has_complex)
            {
                in->cref1[i] = ((CRefVVtoV)creffunc) ((double complex) (in->carg1[i]), (double complex) (in->carg2[i]));
                in->zref1[i] = ((CRefVVtoV)creffunc) (in->zarg1[i], in->zarg2[i]);
            }
        }
        else if (layout == kVXtoV)
        {
            float  sfixed = (float)fixed;
            double dfixed = (double)fixed;
            if (has_real)
            {
                in->sref1[i] = ((RefVVtoV)reffunc) (in->sarg1[i], sfixed);
                in->dref1[i] = ((RefVVtoV)reffunc) (in->darg1[i], dfixed);
            }
            if (has_complex)
            {
                double complex cfixed = sfixed + I * sfixed;
                double complex zfixed = dfixed + I * dfixed;
                in->cref1[i] = ((CRefVVtoV)creffunc) ((double complex) (in->carg1[i]), cfixed);
                in->zref1[i] = ((CRefVVtoV)creffunc) (in->zarg1[i], zfixed);
            }
        }
        else if (layout == kVtoVV)
        {
            ((RefVtoVV)reffunc) (in->sarg1[i], &(in->sref1[i]), &(in->sref2[i]));
            ((RefVtoVV)reffunc) (in->darg1[i], &(in->dref1[i]), &(in->dref2[i]));
        }
        else if (layout == kVVXtoV)
        {
            float  sfixed = (float)fixed;
            double dfixed = (double)fixed;
            in->sref1[i] = ((RefVVXtoV)reffunc) (in->sarg1[i], in->sarg2[i], sfixed, sfixed, sfixed, sfixed);
            in->dref1[i] = ((RefVVXtoV)reffunc) (in->darg1[i], in->darg2[i], dfixed, dfixed, dfixed, dfixed);
        }
    } /* for(int i = 0; i < length; i++) */
} /* own_fill_reference */


/**
 * @brief Full VM function name for printout
 *
 * Construct full VM function name with precision, api and accuracy suffices
 *
 * @param[out] buff        Pointer to output string buffer
 * @param[in] maxlen       Maximum string length
 * @param[in] fname        Base function name
 * @param[in] prec         Precision
 * @param[in] api          API variant
 * @param[in] acc          Accuracy
 * @return                 Pointer to constructed name
 *
 */
static char* own_full_name(char* buff, int maxlen, char* fname, int prec, int api, int acc)
{
    const char* api_prefix[]    = {"v", "vm", "v", "vm"};
    const char* api_suffix[]    = {"", "", "I", "I"};
    const char* prec_suffix[]   = {"s", "d", "c", "z"};
    const char* acc_suffix[]    = {"HA", "LA", "EP"};

    snprintf (buff, maxlen, "%s%s%s%s_%s ",
              api_prefix[api], prec_suffix[prec], fname, api_suffix[api], acc_suffix[acc]);

    return buff;
} /* own_full_name */

/**
 * @brief Printout ULP value
 *
 * Print arguments, results and ULP difference.
 *
 * @param[in] fname        Function name
 * @param[in] prec         Function precision
 * @param[in] layout       Function arguments layout
 * @param[in] acc          Function accuracy
 * @param[in] api          Function API variant
 * @param[in] idx          Vector index
 * @param[in] in           Input and reference resutl arrays
 * @param[out] out         Output arrays
 *
 */
static void own_print_ulp (char* fname, int prec, int layout, int acc, int api, int idx, double ulp, VmInputData* in, VmOutputData* out)
{
    char strbuff[NAME_LEN] = {0};

    if (prec == kSP)
    {
        fprintf (stderr, "\t\tULP_OVER_BOUND: %s[%d](", own_full_name (strbuff, NAME_LEN, fname, prec, api, acc), idx);
        fprintf (stderr, "%.3g {%a}", in->sarg1[idx], in->sarg1[idx]);
        if ((layout == kVVtoV) || (layout == kVVXtoV)) fprintf (stderr, ", %.3g {%a}", in->sarg2[idx], in->sarg2[idx]);
        fprintf (stderr, ") = %.3g {%a}", out->sres1[api][idx], out->sres1[api][idx]);
        if (layout == kVtoVV) fprintf (stderr, ", %.3g {%a}", out->sres2[api][idx], out->sres2[api][idx]);
        fprintf (stderr, ", expected = %.3lg {%la}", in->sref1[idx], in->sref1[idx]);
        if (layout == kVtoVV) fprintf (stderr, ", %.3lg {%la}", in->sref2[idx], in->sref2[idx]);
        fprintf (stderr, ", ulp = %.3lg\n", ulp);
    }
    else if (prec == kDP)
    {
        fprintf (stderr, "\t\tULP_OVER_BOUND: %s[%d](", own_full_name (strbuff, NAME_LEN, fname, prec, api, acc), idx);
        fprintf (stderr, "%.3lg {%la}",in->darg1[idx],in->darg1[idx]);
        if ((layout == kVVtoV) || (layout == kVVXtoV)) fprintf (stderr, ", %.3lg {%la}", in->darg2[idx], in->darg2[idx]);
        fprintf (stderr, ") = %.3lg {%la}", out->dres1[api][idx], out->dres1[api][idx]);
        if (layout == kVtoVV) fprintf (stderr, ", %.3lg {%la}", out->dres2[api][idx], out->dres2[api][idx]);
        fprintf (stderr, ", expected = %.3lg {%la}", in->dref1[idx], in->dref1[idx]);
        if (layout == kVtoVV) fprintf (stderr, ", %.3lg {%la}", in->dref2[idx], in->dref2[idx]);
        fprintf (stderr, ", ulp = %.3lg\n", ulp);
   }
    else if (prec == kCP)
    {
        fprintf (stderr, "\t\tERROR: %s[%d](", own_full_name(strbuff, NAME_LEN, fname, prec, api, acc), idx);
        if (layout == kVRtoVC) fprintf (stderr, "%.3g {%a}", in->sarg1[idx], in->sarg1[idx]);
        else                  fprintf (stderr, "%.3g+i*%.3g {%a+i*%a}",
                              creal(in->carg1[idx]), cimag(in->carg1[idx]), creal(in->carg1[idx]), cimag(in->carg1[idx]));
        if (layout == kVVtoV)  fprintf (stderr, ", %.3g+i*%.3g {%a+i*%a}",
                              creal(in->carg2[idx]), cimag(in->carg2[idx]), creal(in->carg2[idx]), cimag(in->carg2[idx]));
        if (layout == kVCtoVR) fprintf (stderr, ") = %.3g {%a}", ((float*)(out->cres1[api]))[idx], ((float*)(out->cres1[api]))[idx]);
        else                  fprintf (stderr, ") = %.3g+i*%.3g {%a+i*%a}",
                              creal(out->cres1[api][idx]), cimag(out->cres1[api][idx]), creal(out->cres1[api][idx]), cimag(out->cres1[api][idx]));
        if (layout == kVtoVV)  fprintf (stderr, ", %.3g+i*%.3g {%a+i*%a}",
                              creal(out->cres2[api][idx]), cimag(out->cres2[api][idx]), creal(out->cres2[api][idx]), cimag(out->cres2[api][idx]));
        if (layout == kVCtoVR) fprintf (stderr, ", expected = %.3lg {%la}", ((double*)(in->cref1))[idx], ((double*)(in->cref1))[idx]);
        else                  fprintf (stderr, ", expected = %.3lg+i*%.3lg {%la+i*%la}",
                              creal(in->cref1[idx]), cimag(in->cref1[idx]), creal(in->cref1[idx]), cimag(in->cref1[idx]));
        if (layout == kVtoVV)  fprintf (stderr, ", %.3g+i*%.3g {%la+i*%la}", creal(in->cref2[idx]),
                              cimag(in->cref2[idx]), creal(in->cref2[idx]), cimag(in->cref2[idx]));
        fprintf (stderr, ", ulp = %.3lg\n", ulp);
    }
    else if (prec == kZP)
    {
        fprintf (stderr, "\t\tERROR: %s[%d](", own_full_name(strbuff, NAME_LEN, fname, prec, api, acc), idx);
        if (layout == kVRtoVC) fprintf (stderr, "%.3lg {%la}", in->darg1[idx], in->darg1[idx]);
        else                  fprintf (stderr, "%.3lg+i*%.3lg {%la+i*%la}",
                              creal(in->zarg1[idx]), cimag(in->zarg1[idx]), creal(in->zarg1[idx]), cimag(in->zarg1[idx]));
        if (layout == kVVtoV)  fprintf (stderr, ", %.3lg+i*%.3lg {%la+i*%la}",
                              creal(in->zarg2[idx]), cimag(in->zarg2[idx]), creal(in->zarg2[idx]), cimag(in->zarg2[idx]));
        if (layout == kVCtoVR) fprintf (stderr, ") = %.3lg {%la}", ((double*)(out->zres1[api]))[idx], ((double*)(out->zres1[api]))[idx]);
        else                  fprintf (stderr, ") = %.3lg+i*%.3lg {%la+i*%la}",
                              creal(out->zres1[api][idx]), cimag(out->zres1[api][idx]), creal(out->zres1[api][idx]), cimag(out->zres1[api][idx]));
        if (layout == kVtoVV)  fprintf (stderr, ", %.3lg+i*%.3lg {%la+i*%la}",
                              creal(out->zres2[api][idx]), cimag(out->zres2[api][idx]), creal(out->zres2[api][idx]), cimag(out->zres2[api][idx]));
        if (layout == kVCtoVR) fprintf (stderr, ", expected = %.3lg {%la}", ((double*)(in->zref1))[idx], ((double*)(in->zref1))[idx]);
        else                  fprintf (stderr, ", expected = %.3lg+i*%.3lg {%la+i*%la}",
                              creal(in->zref1[idx]), cimag(in->zref1[idx]), creal(in->zref1[idx]), cimag(in->zref1[idx]));
        if (layout == kVtoVV)  fprintf (stderr, ", %.3lg+i*%.3lg {%la+i*%la}",
                              creal(in->zref2[idx]), cimag(in->zref2[idx]), creal(in->zref2[idx]), cimag(in->zref2[idx]));
        fprintf (stderr, ", ulp = %.3lg\n", ulp);
    }

    fflush (stderr);
    return;
} /* own_print_ulp */

/**
 * @brief Evaluation of one VM functions family
 *
 * Measure accuracy on VM functions family in comparison to reference scalar implementations.
 *
 * @param[in] fname        Function name
 * @param[in] beg          Begin of input ranges
 * @param[in] end          End of input ranges
 * @param[in] vmfunc       Pointer to VM functions launcher
 * @param[in] reffunc      Real reference function
 * @param[in] creffunc     Complex reference function
 * @param[in] layout       Function arguments layout
 * @param[in] in           Input and reference resutl arrays
 * @param[out] out         Output arrays
 * @return                 Total number of errors
 *
 */
static int own_evaluate_func (char* fname, double beg, double end,
                              VmFunc vmfunc, void* reffunc, void* creffunc,
                              int layout, VmInputData* in, VmOutputData* out)
{
    int printed = 0;
    int err = 0, warn = 0;
    int serr  = 0, derr  = 0, cerr  = 0, zerr  = 0,
        swarn = 0, dwarn = 0, cwarn = 0, zwarn = 0;
    float  sulp[kApiNum][kAccNum]    = {0}, culp[kApiNum][kAccNum]    = {0},
           smaxulp[kApiNum][kAccNum] = {0}, cmaxulp[kApiNum][kAccNum] = {0},
           sresulp[kAccNum]          = {0}, cresulp[kAccNum]          = {0};
    double dulp[kApiNum][kAccNum]    = {0}, zulp[kApiNum][kAccNum]    = {0},
           dmaxulp[kApiNum][kAccNum] = {0}, zmaxulp[kApiNum][kAccNum] = {0},
           dresulp[kAccNum]          = {0}, zresulp[kAccNum]          = {0};
    int has_real = (reffunc != NULL)?1:0, has_complex = (creffunc != NULL)?1:0;

    own_fill_input (layout, beg, end, in);
    own_fill_reference (layout, reffunc, creffunc, in);

    for (int a = kHA; a < kAccNum; a = a + 1)
    {
        /* Launch all API's of function family */
        vmfunc (a, in, out);

        for (int v = kFunc; v < kApiNum; v = v + 1)
        {
            printed = 0;

            if (has_real)
            {
                for (int i = 0; i < length; i = i + 1)
                {
                    warn = 0; err = 0;
                    /* Use stride increment for evaluating strided functions */
                    if (((v == kFuncI) || (v == kMFuncI)) && ((i % stride) || (i >= length/stride))) continue;

                    /* Compute ULP */
                    sulp[v][a] = own_s_compute_ulp (out->sres1[v][i], in->sref1[i]);
                    dulp[v][a] = own_d_compute_ulp (out->dres1[v][i], in->dref1[i]);
                    if (layout == kVtoVV)
                    {
                        sulp[v][a] = fmaxf (sulp[v][a], own_s_compute_ulp (out->sres2[v][i], in->sref2[i]));
                        dulp[v][a] = fmax  (dulp[v][a], own_d_compute_ulp (out->dres1[v][i], in->dref1[i]));
                    }

                    /* Check if ULP is greater than allowed */
                    if (sulp[v][a] > s_allowed_ulp[a])
                    {
                        /* Allows HA/LA linearfrac functions ULP within EP threshold */
                        /* and report warning if greater than HA/LA limits */
                        if ((layout == kVVXtoV) && (sulp[v][a] < s_allowed_ulp[kEP])) warn = 1;
                        else err = 1;

                        swarn += warn; serr += err;
                        if ((printed < max_printed) && ((print_err && err) || (print_warn && warn)))
                        {
                            own_print_ulp (fname, kSP, layout, a, v, i, sulp[v][a], in, out);
                            printed++;
                        }
                    }

                    /* Check if ULP is greater than allowed */
                    if (dulp[v][a] > d_allowed_ulp[a])
                    {
                        /* Allows HA/LA linearfrac functions ULP within EP threshold */
                        /* and report warning if greater than HA/LA limits */
                        if ((layout == kVVXtoV) && (dulp[v][a] < d_allowed_ulp[kEP])) warn = 1;
                        else err = 1;

                        dwarn += warn; derr += err;
                        if ((printed < max_printed) && ((print_err && err) || (print_warn && warn)))
                        {
                            own_print_ulp (fname, kDP, layout, a, v, i, dulp[v][a], in, out);
                            printed++;
                        }
                    }
                    smaxulp[v][a] = fmaxf (smaxulp[v][a], sulp[v][a]);
                    dmaxulp[v][a] = fmax  (dmaxulp[v][a], dulp[v][a]);
                } /* for (int i = 0; i < length; i++) */
                sresulp[a] = fmaxf (sresulp[a], smaxulp[v][a]);
                dresulp[a] = fmax  (dresulp[a], dmaxulp[v][a]);
            } /* if (has_real) */

            if (has_complex)
            {
                for (int i = 0; i < length; i++)
                {
                    /* Use stride increment for evaluating strided functions */
                    if (((v == kFuncI) || (v == kMFuncI)) && ((i % stride) || (i >= length/stride))) continue;

                    if (layout == kVCtoVR)
                    {
                        float  *sres1 = (float*)(out->cres1[v]);
                        double *sref1 = (double*)(in->cref1);
                        double *dres1 = (double*)(out->zres1[v]), *dref1 = (double*)(in->zref1);
                        culp[v][a] = own_s_compute_ulp (sres1[i], sref1[i]);
                        zulp[v][a] = own_d_compute_ulp (dres1[i], dref1[i]);
                    }
                    else
                    {
                        culp[v][a] = own_c_compute_ulp (out->cres1[v][i], in->cref1[i]);
                        zulp[v][a] = own_z_compute_ulp (out->zres1[v][i], in->zref1[i]);
                    }

                    if (culp[v][a] > c_allowed_ulp[a])
                    {
                        /* Allows HA/LA complex functions ULP within EP threshold */
                        /* and report warning if greater than HA/LA limits */
                        if (culp[v][a] < c_allowed_ulp[kEP]) warn = 1;
                        else err = 1;

                        cwarn += warn; cerr += err;
                        if ((printed < max_printed) && ((print_err && err) || (print_warn && warn)))
                        {
                            own_print_ulp (fname, kCP, layout, a, v, i, culp[v][a], in, out);
                            printed++;
                        }
                    }
                    if (zulp[v][a] > z_allowed_ulp[a])
                    {
                        /* Allows HA/LA complex functions ULP within EP threshold */
                        /* and report warning if greater than HA/LA limits */
                        if (zulp[v][a] < z_allowed_ulp[kEP]) warn = 1;
                        else err = 1;

                        zwarn += warn; zerr += err;
                        if ((printed < max_printed) && ((print_err && err) || (print_warn && warn)))
                        {
                            own_print_ulp (fname, kZP, layout, a, v, i, zulp[v][a], in, out);
                            printed++;
                        }
                    }
                    cmaxulp[v][a] = fmaxf (cmaxulp[v][a], culp[v][a]);
                    zmaxulp[v][a] = fmax  (zmaxulp[v][a], zulp[v][a]);
                } /* for (int i = 0; i < length; i++) */
                cresulp[a] = fmaxf (cresulp[a], cmaxulp[v][a]);
                zresulp[a] = fmax  (zresulp[a], zmaxulp[v][a]);
            } /* if (has_complex) */
        } /* for (int v = kFunc; v <= kMFuncI; v++) */
    } /* for (int a = kHA; a <= kEP; a++) */

    if (has_real)
    {
        fprintf (stdout, "\ts%-11s, ha:,%7.2g, la:, %7.2g, ep:, %7.2g, %s\n",
                 fname, sresulp[kHA], sresulp[kLA], sresulp[kEP], (serr)?"OVER":(swarn)?"WARN":"NORM");
        fprintf (stdout, "\td%-11s, ha:,%7.2lg, la:, %7.2lg, ep:, %7.2lg, %s\n",
                 fname, dresulp[kHA], dresulp[kLA], dresulp[kEP], (derr)?"OVER":(dwarn)?"WARN":"NORM");
    }
    if (has_complex)
    {
        fprintf (stdout, "\tc%-11s, ha:,%7.2g, la:, %7.2g, ep:, %7.2g, %s\n",
                 fname, cresulp[kHA], cresulp[kLA], cresulp[kEP], (cerr)?"OVER":(cwarn)?"WARN":"NORM");
        fprintf (stdout, "\tz%-11s, ha:,%7.2lg, la:, %7.2lg, ep:, %7.2lg, %s\n",
                 fname, zresulp[kHA], zresulp[kLA], zresulp[kEP], (zerr)?"OVER":(zwarn)?"WARN":"NORM");
    }

    fflush (stdout);
    // Return total number of errors
    return (serr + derr + cerr + zerr);
} /* own_evaluate_func */

/**
 * @brief Provide string description of VML status code
 *
 * vml_status_string provides string description of VML status code st
 *
 * @param[in] st  VML status code
 * @return         const char* with text of corresponding code
 *
 */
static const char* vml_status_string(int st) {

    switch (st) {
        case VML_STATUS_OK: return "VML_STATUS_OK";
        case VML_STATUS_BADSIZE: return "VML_STATUS_BADSIZE";
        case VML_STATUS_BADMEM: return "VML_STATUS_BADMEM";
        case VML_STATUS_ERRDOM: return "VML_STATUS_ERRDOM";
        case VML_STATUS_SING: return "VML_STATUS_SING";
        case VML_STATUS_OVERFLOW: return "VML_STATUS_OVERFLOW";
        case VML_STATUS_UNDERFLOW: return "VML_STATUS_UNDERFLOW";
        case VML_STATUS_ACCURACYWARNING: return "VML_STATUS_ACCURACYWARNING";
    }
    return "VML_STATUS_UNKNOWN";
}

/**
 * @brief Main function for VM API testing
 *
 * Main performs accuracy testing of all VM OMP offload math functions
 *
 * @param[in] argc         Number of arguments
 * @param[in] argv         Pointer to argument strings
 * @return                 0
 *
 */
int main (int argc, char **argv)
{
    /* Total errors */
    int err = 0;

    /* Error satatus */
    int st = VML_STATUS_OK;

    VmInputData  in;
    VmOutputData out;

    fprintf (stdout, "OpenMP Offload C vm_all_funcs: started...\n"); fflush (stdout);

    own_allocate_data (length, &in, &out);

    vmlClearErrStatus();

    fprintf (stdout, "\t===========================================================\n");
    err += own_evaluate_func ("Asin",       -0.9,   0.9,      own_vm_asin,       asin,           casin,          kVtoV,      &in, &out);
    err += own_evaluate_func ("Acos",       -0.9,   0.9,      own_vm_acos,       acos,           cacos,          kVtoV,      &in, &out);
    err += own_evaluate_func ("Atan",       -10000, 10000,    own_vm_atan,       atan,           catan,          kVtoV,      &in, &out);
    err += own_evaluate_func ("Atan2",      -10000, 10000,    own_vm_atan2,      atan2,          NULL,           kVVtoV,     &in, &out);
    err += own_evaluate_func ("Asinh",      -10000, 10000,    own_vm_asinh,      asinh,          casinh,         kVtoV,      &in, &out);
    err += own_evaluate_func ("Acosh",       1.01,  1000,     own_vm_acosh,      acosh,          cacosh,         kVtoV,      &in, &out);
    err += own_evaluate_func ("Atanh",      -0.9,   0.9,      own_vm_atanh,      atanh,          catanh,         kVtoV,      &in, &out);
    err += own_evaluate_func ("Sin",        -10,    10,       own_vm_sin,        sin,            csin,           kVtoV,      &in, &out);
    err += own_evaluate_func ("Cos",        -10,    10,       own_vm_cos,        cos,            ccos,           kVtoV,      &in, &out);
    err += own_evaluate_func ("Tan",        -10,    10,       own_vm_tan,        tan,            ctan,           kVtoV,      &in, &out);
    err += own_evaluate_func ("SinCos",     -10000, 10000,    own_vm_sincos,     own_sincos,     NULL,           kVtoVV,     &in, &out);
    err += own_evaluate_func ("Sinh",       -50,    50,       own_vm_sinh,       sinh,           csinh,          kVtoV,      &in, &out);
    err += own_evaluate_func ("Cosh",       -50,    50,       own_vm_cosh,       cosh,           ccosh,          kVtoV,      &in, &out);
    err += own_evaluate_func ("Tanh",       -5,     5,        own_vm_tanh,       tanh,           ctanh,          kVtoV,      &in, &out);
    err += own_evaluate_func ("Exp",        -75,    75,       own_vm_exp,        exp,            cexp,           kVtoV,      &in, &out);
    err += own_evaluate_func ("Exp2",       -30,    30,       own_vm_exp2,       exp2,           NULL,           kVtoV,      &in, &out);
    err += own_evaluate_func ("Expm1",      -30,    30,       own_vm_expm1,      expm1,          NULL,           kVtoV,      &in, &out);
    err += own_evaluate_func ("Ln",          1.01,  100000,   own_vm_ln,         log,            clog,           kVtoV,      &in, &out);
    err += own_evaluate_func ("Log2",        1.01,  100000,   own_vm_log2,       log2,           NULL,           kVtoV,      &in, &out);
    err += own_evaluate_func ("Log1p",       0.01,  100000,   own_vm_log1p,      log1p,          NULL,           kVtoV,      &in, &out);
    err += own_evaluate_func ("Pow",         0.1,   10,       own_vm_pow,        pow,            cpow,           kVVtoV,     &in, &out);
    err += own_evaluate_func ("Powr",        0.1,   10,       own_vm_powr,       pow,            NULL,           kVVtoV,     &in, &out);
    err += own_evaluate_func ("Powx",        0.1,   10,       own_vm_powx,       pow,            cpow,           kVXtoV,     &in, &out);
    err += own_evaluate_func ("Pow2o3",      0.1,   10,       own_vm_pow2o3,     own_pow2o3,     NULL,           kVtoV,      &in, &out);
    err += own_evaluate_func ("Pow3o2",      0.1,   10,       own_vm_pow3o2,     own_pow3o2,     NULL,           kVtoV,      &in, &out);
    err += own_evaluate_func ("Sqrt",        0.1,   100,      own_vm_sqrt,       sqrt,           csqrt,          kVtoV,      &in, &out);
    err += own_evaluate_func ("Cbrt",        0.1,   10000,    own_vm_cbrt,       cbrt,           NULL,           kVtoV,      &in, &out);
    err += own_evaluate_func ("InvSqrt",     0.1,   10000,    own_vm_invsqrt,    own_invsqrt,    NULL,           kVtoV,      &in, &out);
    err += own_evaluate_func ("InvCbrt",     0.1,   10000,    own_vm_invcbrt,    own_invcbrt,    NULL,           kVtoV,      &in, &out);
    err += own_evaluate_func ("Hypot",      -10000, 10000,    own_vm_hypot,      hypot,          NULL,           kVVtoV,     &in, &out);
    err += own_evaluate_func ("Erf",        -5,     5,        own_vm_erf,        erf,            NULL,           kVtoV,      &in, &out);
    err += own_evaluate_func ("Erfc",       -2,     5,        own_vm_erfc,       erfc,           NULL,           kVtoV,      &in, &out);
    err += own_evaluate_func ("LGamma",      0,     5,        own_vm_lgamma,     lgamma,         NULL,           kVtoV,      &in, &out);
    err += own_evaluate_func ("TGamma",      0,     5,        own_vm_tgamma,     tgamma,         NULL,           kVtoV,      &in, &out);
    err += own_evaluate_func ("ExpInt1",     0.1,   5,        own_vm_expint1,    own_expint1,    NULL,           kVtoV,      &in, &out);
    err += own_evaluate_func ("Ceil",       -10000, 10000,    own_vm_ceil,       ceil,           NULL,           kVtoV,      &in, &out);
    err += own_evaluate_func ("Floor",      -10000, 10000,    own_vm_floor,      floor,          NULL,           kVtoV,      &in, &out);
    err += own_evaluate_func ("Round",      -10000, 10000,    own_vm_round,      round,          NULL,           kVtoV,      &in, &out);
    err += own_evaluate_func ("Trunc",      -10000, 10000,    own_vm_trunc,      trunc,          NULL,           kVtoV,      &in, &out);
    err += own_evaluate_func ("Rint",       -10000, 10000,    own_vm_rint,       rint,           NULL,           kVtoV,      &in, &out);
    err += own_evaluate_func ("NearbyInt",  -10000, 10000,    own_vm_nearbyint,  nearbyint,      NULL,           kVtoV,      &in, &out);
    err += own_evaluate_func ("Remainder",  -10000, 10000,    own_vm_remainder,  remainder,      NULL,           kVVtoV,     &in, &out);
    err += own_evaluate_func ("LinearFrac", -1000,  1000,     own_vm_linearfrac, own_linearfrac, NULL,           kVVXtoV,    &in, &out);
    err += own_evaluate_func ("Add",        -10000, 10000,    own_vm_add,        own_add,        own_cadd,       kVVtoV,     &in, &out);
    err += own_evaluate_func ("Sub",        -10000, 10000,    own_vm_sub,        own_sub,        own_csub,       kVVtoV,     &in, &out);
    err += own_evaluate_func ("Mul",        -10000, 10000,    own_vm_mul,        own_mul,        own_cmul,       kVVtoV,     &in, &out);
    err += own_evaluate_func ("Div",        -10000, 10000,    own_vm_div,        own_div,        own_cdiv,       kVVtoV,     &in, &out);
    err += own_evaluate_func ("Sqr",        -10000, 10000,    own_vm_sqr,        own_sqr,        NULL,           kVtoV,      &in, &out);
    err += own_evaluate_func ("Inv",        -10000, 10000,    own_vm_inv,        own_inv,        NULL,           kVtoV,      &in, &out);
    err += own_evaluate_func ("Modf",       -10000, 10000,    own_vm_modf,       own_modf,       NULL,           kVtoVV,     &in, &out);
    err += own_evaluate_func ("Fmod",       -10000, 10000,    own_vm_fmod,       fmod,           NULL,           kVVtoV,     &in, &out);
    err += own_evaluate_func ("Fdim",       -10000, 10000,    own_vm_fdim,       fdim,           NULL,           kVVtoV,     &in, &out);
    err += own_evaluate_func ("Fmax",       -10000, 10000,    own_vm_fmax,       fmax,           NULL,           kVVtoV,     &in, &out);
    err += own_evaluate_func ("Fmin",       -10000, 10000,    own_vm_fmin,       fmin,           NULL,           kVVtoV,     &in, &out);
    err += own_evaluate_func ("MaxMag",     -10000, 10000,    own_vm_maxmag,     own_maxmag,     NULL,           kVVtoV,     &in, &out);
    err += own_evaluate_func ("MinMag",     -10000, 10000,    own_vm_minmag,     own_minmag,     NULL,           kVVtoV,     &in, &out);
    err += own_evaluate_func ("NextAfter",  -10000, 10000,    own_vm_nextafter,  nextafter,      NULL,           kVVtoV,     &in, &out);
    err += own_evaluate_func ("CopySign",   -10000, 10000,    own_vm_copysign,   copysign,       NULL,           kVVtoV,     &in, &out);
    err += own_evaluate_func ("Frac",       -10000, 10000,    own_vm_frac,       own_frac,       NULL,           kVtoV,      &in, &out);
    err += own_evaluate_func ("Conj",       -10000, 10000,    own_vm_conj,       NULL,           conj,           kVtoV,      &in, &out);
    err += own_evaluate_func ("MulByConj",  -10000, 10000,    own_vm_mulbyconj,  NULL,           own_cmulbyconj, kVVtoV,     &in, &out);
    err += own_evaluate_func ("CIS",        -10000, 10000,    own_vm_cis,        NULL,           own_cis,        kVRtoVC,    &in, &out);
    err += own_evaluate_func ("Arg",        -10000, 10000,    own_vm_arg,        NULL,           own_carg,       kVCtoVR,    &in, &out);
    err += own_evaluate_func ("Abs",        -10000, 10000,    own_vm_abs,        fabs,           own_cabs,       kVCtoVR,    &in, &out);
    /* Functions with Intel-specific reference LIBM implementations */
#if (defined __INTEL_COMPILER) || (defined __INTEL_LLVM_COMPILER)
    err += own_evaluate_func ("Asinpi",     -0.9,   0.9,      own_vm_asinpi,     asinpi,         NULL,           kVtoV,      &in, &out);
    err += own_evaluate_func ("Acospi",     -0.9,   0.9,      own_vm_acospi,     acospi,         NULL,           kVtoV,      &in, &out);
    err += own_evaluate_func ("Atanpi",     -10000, 10000,    own_vm_atanpi,     atanpi,         NULL,           kVtoV,      &in, &out);
    err += own_evaluate_func ("Atan2pi",    -10000, 10000,    own_vm_atan2pi,    atan2pi,        NULL,           kVVtoV,     &in, &out);
    err += own_evaluate_func ("Sind",       -10000, 10000,    own_vm_sind,       sind,           NULL,           kVtoV,      &in, &out);
    err += own_evaluate_func ("Cosd",       -10000, 10000,    own_vm_cosd,       cosd,           NULL,           kVtoV,      &in, &out);
    err += own_evaluate_func ("Tand",       -10000, 10000,    own_vm_tand,       tand,           NULL,           kVtoV,      &in, &out);
    err += own_evaluate_func ("Sinpi",      -10000, 10000,    own_vm_sinpi,      sinpi,          NULL,           kVtoV,      &in, &out);
    err += own_evaluate_func ("Cospi",      -10000, 10000,    own_vm_cospi,      cospi,          NULL,           kVtoV,      &in, &out);
    err += own_evaluate_func ("Tanpi",      -10000, 10000,    own_vm_tanpi,      tanpi,          NULL,           kVtoV,      &in, &out);
    err += own_evaluate_func ("Exp10",      -30,    30,       own_vm_exp10,      exp10,          NULL,           kVtoV,      &in, &out);
    err += own_evaluate_func ("Log10",       1.01,  100,      own_vm_log10,      log10,          clog10,         kVtoV,      &in, &out);
    err += own_evaluate_func ("ErfInv",     -0.9,   0.9,      own_vm_erfinv,     erfinv,         NULL,           kVtoV,      &in, &out);
    err += own_evaluate_func ("ErfcInv",    -0.1,   1.9,      own_vm_erfcinv,    erfcinv,        NULL,           kVtoV,      &in, &out);
    err += own_evaluate_func ("CdfNorm",    -4,     4,        own_vm_cdfnorm,    cdfnorm,        NULL,           kVtoV,      &in, &out);
    err += own_evaluate_func ("CdfNormInv", -0.1,   0.9,      own_vm_cdfnorminv, cdfnorminv,     NULL,           kVtoV,      &in, &out);
#endif

    st   = vmlGetErrStatus();
    fprintf (stdout, "\t===========================================================\n");
    fprintf (stdout, "OpenMP Offload C vm_all_funcs: status: %s[%s], accuracy: %s\n\n",
        vml_status_string(st),
        (st >= VML_STATUS_OK) ? "expected" : "unexpected",
        err > 0 ? "over bounds" : "normal"
    );

    own_deallocate_data (&in, &out);

    return 0;
} /* main */

