#line 1 "narray/gen/tmpl/lib.c"
/*
Ruby/Cumo::GSL - GSL wrapper for Ruby/Cumo::NArray
created on: 2017-03-11
Copyright (C) 2017 Masahiro Tanaka
Copyright (C) 2018 Naotoshi Seo
*/
#include <ruby.h>
#include <assert.h>
#include "cumo.h"
#include "cumo/narray.h"
#include "cumo/template.h"
#include "SFMT.h"
#include "cumo/cuda/memory_pool.h"
#include "cumo/cuda/runtime.h"
#include "cumo/indexer.h"
#define m_map(x) m_num_to_data(rb_yield(m_data_to_num(x)))
narray/types/bit.c
#include <static ID cumo_id_cast;
static ID cumo_id_divmod;
static ID cumo_id_eq;
static ID cumo_id_mulsum;
static ID cumo_id_ne;
static ID cumo_id_pow;
>
VALUE cT;
extern VALUE cRT;
cumo/types/bit.h
void
Init_
#line 1 "narray/gen/tmpl/class.c"
/*
class definition:
*/
VALUE Cumo::Bit;
static VALUE cT(VALUE,VALUE);
bit_store
#line 1 "narray/gen/tmpl/alloc_func.c"
static size_t
_memsize(const void* ptr)
{
size_t size = sizeof(cumo_narray_data_t);
const cumo_narray_data_t *na = (const cumo_narray_data_t*)ptr;
assert(na->base.type == CUMO_NARRAY_DATA_T);
if (na->ptr != NULL) {
size += ((na->base.size-1)/8/sizeof(CUMO_BIT_DIGIT)+1)*sizeof(CUMO_BIT_DIGIT);
}
if (na->base.size > 0) {
if (na->base.shape != NULL && na->base.shape != &(na->base.size)) {
size += sizeof(size_t) * na->base.ndim;
}
}
return size;
}
static void
bit_free(void* ptr)
{
cumo_narray_data_t *na = (cumo_narray_data_t*)ptr;
assert(na->base.type == CUMO_NARRAY_DATA_T);
if (na->ptr != NULL) {
cumo_cuda_runtime_free(na->ptr);
na->ptr = NULL;
}
if (na->base.size > 0) {
if (na->base.shape != NULL && na->base.shape != &(na->base.size)) {
xfree(na->base.shape);
na->base.shape = NULL;
}
}
xfree(na);
}
static cumo_narray_type_info_t bit_info = {
1, // element_bits
0, // element_bytes
1, // element_stride (in bits)
};
static const rb_data_type_t bit_data_type = {
"bit",
{0, Cumo::Bit_free, _memsize,},
&cumo_na_data_type,
&bitbit_info,
0, // flags
};
static VALUE
bit(VALUE klass)
{
cumo_narray_data_t *na = ALLOC(cumo_narray_data_t);
na->base.ndim = 0;
na->base.type = CUMO_NARRAY_DATA_T;
na->base.flag[0] = CUMO_NA_FL0_INIT;
na->base.flag[1] = CUMO_NA_FL1_INIT;
na->base.size = 0;
na->base.shape = NULL;
na->base.reduce = INT2FIX(0);
na->ptr = NULL;
return TypedData_Wrap_Struct(klass, &bit_s_alloc_func_data_type, (void*)na);
}
bit
#line 1 "narray/gen/tmpl_bit/allocate.c"
static VALUE
(VALUE self)
{
cumo_narray_t *na;
char *ptr;
CumoGetNArray(self,na);
switch(CUMO_NA_TYPE(na)) {
case CUMO_NARRAY_DATA_T:
ptr = CUMO_NA_DATA_PTR(na);
if (na->size > 0 && ptr == NULL) {
ptr = cumo_cuda_runtime_malloc(((na->size-1)/8/sizeof(CUMO_BIT_DIGIT)+1)*sizeof(CUMO_BIT_DIGIT));
CUMO_NA_DATA_PTR(na) = ptr;
}
break;
case CUMO_NARRAY_VIEW_T:
rb_funcall(CUMO_NA_VIEW_DATA(na), rb_intern("allocate"), 0);
break;
default:
rb_raise(rb_eRuntimeError,"invalid narray type");
}
return self;
}
bit_allocate
#line 1 "narray/gen/tmpl_bit/extract.c"
/*
Extract an element only if self is a dimensionless NArray.
@overload extract
@return [Numeric,Cumo::NArray]
--- Extract element value as Ruby Object if self is a dimensionless NArray,
otherwise returns self.
*/
// TODO(sonots): Return Cumo::Bit instead of ruby built-in object to avoid synchronization
static VALUE
(VALUE self)
{
CUMO_BIT_DIGIT *ptr, val;
size_t pos;
cumo_narray_t *na;
CumoGetNArray(self,na);
if (na->ndim==0) {
pos = cumo_na_get_offset(self);
ptr = (CUMO_BIT_DIGIT*)cumo_na_get_pointer_for_read(self);
CUMO_SHOW_SYNCHRONIZE_WARNING_ONCE("bit_extract", "");
cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
val = ((*((ptr)+(pos)/CUMO_NB)) >> ((pos)%CUMO_NB)) & 1u;
cumo_na_release_lock(self);
return INT2FIX(val);
}
return self;
}
extractbit
#line 1 "narray/gen/tmpl_bit/extract_cpu.c"
/*
Extract an element only if self is a dimensionless NArray.
@overload extract_cpu
@return [Numeric,Cumo::NArray]
--- Extract element value as Ruby Object if self is a dimensionless NArray,
otherwise returns self.
*/
static VALUE
(VALUE self)
{
CUMO_BIT_DIGIT *ptr, val;
size_t pos;
cumo_narray_t *na;
CumoGetNArray(self,na);
if (na->ndim==0) {
pos = cumo_na_get_offset(self);
ptr = (CUMO_BIT_DIGIT*)cumo_na_get_pointer_for_read(self);
CUMO_SHOW_SYNCHRONIZE_WARNING_ONCE("bit_extract_cpu", "");
cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
val = ((*((ptr)+(pos)/CUMO_NB)) >> ((pos)%CUMO_NB)) & 1u;
cumo_na_release_lock(self);
return INT2FIX(val);
}
return self;
}
extract_cpubit
#line 1 "narray/gen/tmpl/new_dim0.c"
void (dtype *ptr, dtype x);
static VALUE
cumo_bit_new_dim0_kernel_launch(dtype x)
{
VALUE v;
dtype *ptr;
v = cumo_na_new(cT, 0, NULL);
ptr = (dtype*)cumo_na_get_pointer_for_write(v);
bit_new_dim0(ptr, x);
cumo_na_release_lock(v);
return v;
}
cumo_bit_new_dim0_kernel_launch
#line 1 "narray/gen/tmpl/store.c"
/*
Store elements to Cumo::
#line 1 "narray/gen/tmpl/store_numeric.c"
static VALUE
(VALUE self, VALUE obj)
{
dtype x;
x = m_num_to_data(obj);
obj = bit_store_numeric_new_dim0(x);
bit(self,obj);
return self;
}
bit_store
#line 1 "narray/gen/tmpl_bit/store_bit.c"
static void
(cumo_na_loop_t *const lp)
{
size_t n;
size_t p1, p3;
ssize_t s1, s3;
size_t *idx1, *idx3;
int o1, l1, r1, len;
CUMO_BIT_DIGIT *a1, *a3;
CUMO_BIT_DIGIT x;
// TODO(sonots): CUDA kernelize
CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("iter_bit_store_bit", "");
cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
CUMO_INIT_COUNTER(lp, n);
CUMO_INIT_PTR_BIT_IDX(lp, 0, a3, p3, s3, idx3);
CUMO_INIT_PTR_BIT_IDX(lp, 1, a1, p1, s1, idx1);
if (s1!=1 || s3!=1 || idx1 || idx3) {
for (; n--;) {
CUMO_LOAD_BIT_STEP(a1, p1, s1, idx1, x);
CUMO_STORE_BIT_STEP(a3, p3, s3, idx3, x);
}
} else {
o1 = p1 % CUMO_NB;
o1 -= p3;
l1 = CUMO_NB+o1;
r1 = CUMO_NB-o1;
if (p3>0 || n<CUMO_NB) {
len = CUMO_NB - p3;
if ((int)n<len) len=n;
if (o1>=0) x = *a1>>o1;
else x = *a1<<-o1;
if (p1+len>CUMO_NB) x |= *(a1+1)<<r1;
a1++;
*a3 = (x & (CUMO_SLB(len)<<p3)) | (*a3 & ~(CUMO_SLB(len)<<p3));
a3++;
n -= len;
}
if (o1==0) {
for (; n>=CUMO_NB; n-=CUMO_NB) {
x = *(a1++);
*(a3++) = x;
}
} else {
for (; n>=CUMO_NB; n-=CUMO_NB) {
x = *a1>>o1;
if (o1<0) x |= *(a1-1)>>l1;
if (o1>0) x |= *(a1+1)<<r1;
a1++;
*(a3++) = x;
}
}
if (n>0) {
x = *a1>>o1;
if (o1<0) x |= *(a1-1)>>l1;
*a3 = (x & CUMO_SLB(n)) | (*a3 & CUMO_BALL<<n);
}
}
}
static VALUE
bitBit(VALUE self, VALUE obj)
{
cumo_ndfunc_arg_in_t ain[2] = {{CUMO_OVERWRITE,0},{Qnil,0}};
cumo_ndfunc_t ndf = {bit_store_bit, CUMO_FULL_LOOP, 2,0, ain,0};
cumo_na_ndloop(&ndf, 2, self, obj);
return self;
}
iter_bit_store_bit
#line 1 "narray/gen/tmpl_bit/store_from.c"
static void
(cumo_na_loop_t *const lp)
{
ssize_t i, s1, s2;
size_t p1;
char *p2;
size_t *idx1, *idx2;
iter_bit_store_dfloat x;
CUMO_BIT_DIGIT *a1;
CUMO_BIT_DIGIT y;
// TODO(sonots): CUDA kernelize
CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("store_double", "");
cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
CUMO_INIT_COUNTER(lp, i);
CUMO_INIT_PTR_BIT_IDX(lp, 0, a1, p1, s1, idx1);
CUMO_INIT_PTR_IDX(lp, 1, p2, s2, idx2);
if (idx2) {
if (idx1) {
for (; i--;) {
CUMO_GET_DATA_INDEX(p2,idx2,dfloatDFloat,x);
y = double(x);
CUMO_STORE_BIT(a1, p1+*idx1, y); idx1++;
}
} else {
for (; i--;) {
CUMO_GET_DATA_INDEX(p2,idx2,m_from_real,x);
y = double(x);
CUMO_STORE_BIT(a1, p1, y); p1+=s1;
}
}
} else {
if (idx1) {
for (; i--;) {
CUMO_GET_DATA_STRIDE(p2,s2,m_from_real,x);
y = double(x);
CUMO_STORE_BIT(a1, p1+*idx1, y); idx1++;
}
} else {
for (; i--;) {
CUMO_GET_DATA_STRIDE(p2,s2,m_from_real,x);
y = double(x);
CUMO_STORE_BIT(a1, p1, y); p1+=s1;
}
}
}
}
static VALUE
m_from_real(VALUE self, VALUE obj)
{
cumo_ndfunc_arg_in_t ain[2] = {{CUMO_OVERWRITE,0},{Qnil,0}};
cumo_ndfunc_t ndf = {bit_store_dfloat, CUMO_FULL_LOOP, 2,0, ain,0};
cumo_na_ndloop(&ndf, 2, self, obj);
return self;
}
iter_bit_store_dfloat
#line 1 "narray/gen/tmpl_bit/store_from.c"
static void
(cumo_na_loop_t *const lp)
{
ssize_t i, s1, s2;
size_t p1;
char *p2;
size_t *idx1, *idx2;
iter_bit_store_sfloat x;
CUMO_BIT_DIGIT *a1;
CUMO_BIT_DIGIT y;
// TODO(sonots): CUDA kernelize
CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("store_float", "");
cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
CUMO_INIT_COUNTER(lp, i);
CUMO_INIT_PTR_BIT_IDX(lp, 0, a1, p1, s1, idx1);
CUMO_INIT_PTR_IDX(lp, 1, p2, s2, idx2);
if (idx2) {
if (idx1) {
for (; i--;) {
CUMO_GET_DATA_INDEX(p2,idx2,sfloatSFloat,x);
y = float(x);
CUMO_STORE_BIT(a1, p1+*idx1, y); idx1++;
}
} else {
for (; i--;) {
CUMO_GET_DATA_INDEX(p2,idx2,m_from_real,x);
y = float(x);
CUMO_STORE_BIT(a1, p1, y); p1+=s1;
}
}
} else {
if (idx1) {
for (; i--;) {
CUMO_GET_DATA_STRIDE(p2,s2,m_from_real,x);
y = float(x);
CUMO_STORE_BIT(a1, p1+*idx1, y); idx1++;
}
} else {
for (; i--;) {
CUMO_GET_DATA_STRIDE(p2,s2,m_from_real,x);
y = float(x);
CUMO_STORE_BIT(a1, p1, y); p1+=s1;
}
}
}
}
static VALUE
m_from_real(VALUE self, VALUE obj)
{
cumo_ndfunc_arg_in_t ain[2] = {{CUMO_OVERWRITE,0},{Qnil,0}};
cumo_ndfunc_t ndf = {bit_store_sfloat, CUMO_FULL_LOOP, 2,0, ain,0};
cumo_na_ndloop(&ndf, 2, self, obj);
return self;
}
iter_bit_store_sfloat
#line 1 "narray/gen/tmpl_bit/store_from.c"
static void
(cumo_na_loop_t *const lp)
{
ssize_t i, s1, s2;
size_t p1;
char *p2;
size_t *idx1, *idx2;
iter_bit_store_int64 x;
CUMO_BIT_DIGIT *a1;
CUMO_BIT_DIGIT y;
// TODO(sonots): CUDA kernelize
CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("store_int64_t", "");
cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
CUMO_INIT_COUNTER(lp, i);
CUMO_INIT_PTR_BIT_IDX(lp, 0, a1, p1, s1, idx1);
CUMO_INIT_PTR_IDX(lp, 1, p2, s2, idx2);
if (idx2) {
if (idx1) {
for (; i--;) {
CUMO_GET_DATA_INDEX(p2,idx2,int64Int64,x);
y = int64_t(x);
CUMO_STORE_BIT(a1, p1+*idx1, y); idx1++;
}
} else {
for (; i--;) {
CUMO_GET_DATA_INDEX(p2,idx2,m_from_int64,x);
y = int64_t(x);
CUMO_STORE_BIT(a1, p1, y); p1+=s1;
}
}
} else {
if (idx1) {
for (; i--;) {
CUMO_GET_DATA_STRIDE(p2,s2,m_from_int64,x);
y = int64_t(x);
CUMO_STORE_BIT(a1, p1+*idx1, y); idx1++;
}
} else {
for (; i--;) {
CUMO_GET_DATA_STRIDE(p2,s2,m_from_int64,x);
y = int64_t(x);
CUMO_STORE_BIT(a1, p1, y); p1+=s1;
}
}
}
}
static VALUE
m_from_int64(VALUE self, VALUE obj)
{
cumo_ndfunc_arg_in_t ain[2] = {{CUMO_OVERWRITE,0},{Qnil,0}};
cumo_ndfunc_t ndf = {bit_store_int64, CUMO_FULL_LOOP, 2,0, ain,0};
cumo_na_ndloop(&ndf, 2, self, obj);
return self;
}
iter_bit_store_int64
#line 1 "narray/gen/tmpl_bit/store_from.c"
static void
(cumo_na_loop_t *const lp)
{
ssize_t i, s1, s2;
size_t p1;
char *p2;
size_t *idx1, *idx2;
iter_bit_store_int32 x;
CUMO_BIT_DIGIT *a1;
CUMO_BIT_DIGIT y;
// TODO(sonots): CUDA kernelize
CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("store_int32_t", "");
cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
CUMO_INIT_COUNTER(lp, i);
CUMO_INIT_PTR_BIT_IDX(lp, 0, a1, p1, s1, idx1);
CUMO_INIT_PTR_IDX(lp, 1, p2, s2, idx2);
if (idx2) {
if (idx1) {
for (; i--;) {
CUMO_GET_DATA_INDEX(p2,idx2,int32Int32,x);
y = int32_t(x);
CUMO_STORE_BIT(a1, p1+*idx1, y); idx1++;
}
} else {
for (; i--;) {
CUMO_GET_DATA_INDEX(p2,idx2,m_from_int32,x);
y = int32_t(x);
CUMO_STORE_BIT(a1, p1, y); p1+=s1;
}
}
} else {
if (idx1) {
for (; i--;) {
CUMO_GET_DATA_STRIDE(p2,s2,m_from_int32,x);
y = int32_t(x);
CUMO_STORE_BIT(a1, p1+*idx1, y); idx1++;
}
} else {
for (; i--;) {
CUMO_GET_DATA_STRIDE(p2,s2,m_from_int32,x);
y = int32_t(x);
CUMO_STORE_BIT(a1, p1, y); p1+=s1;
}
}
}
}
static VALUE
m_from_int32(VALUE self, VALUE obj)
{
cumo_ndfunc_arg_in_t ain[2] = {{CUMO_OVERWRITE,0},{Qnil,0}};
cumo_ndfunc_t ndf = {bit_store_int32, CUMO_FULL_LOOP, 2,0, ain,0};
cumo_na_ndloop(&ndf, 2, self, obj);
return self;
}
iter_bit_store_int32
#line 1 "narray/gen/tmpl_bit/store_from.c"
static void
(cumo_na_loop_t *const lp)
{
ssize_t i, s1, s2;
size_t p1;
char *p2;
size_t *idx1, *idx2;
iter_bit_store_int16 x;
CUMO_BIT_DIGIT *a1;
CUMO_BIT_DIGIT y;
// TODO(sonots): CUDA kernelize
CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("store_int16_t", "");
cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
CUMO_INIT_COUNTER(lp, i);
CUMO_INIT_PTR_BIT_IDX(lp, 0, a1, p1, s1, idx1);
CUMO_INIT_PTR_IDX(lp, 1, p2, s2, idx2);
if (idx2) {
if (idx1) {
for (; i--;) {
CUMO_GET_DATA_INDEX(p2,idx2,int16Int16,x);
y = int16_t(x);
CUMO_STORE_BIT(a1, p1+*idx1, y); idx1++;
}
} else {
for (; i--;) {
CUMO_GET_DATA_INDEX(p2,idx2,m_from_sint,x);
y = int16_t(x);
CUMO_STORE_BIT(a1, p1, y); p1+=s1;
}
}
} else {
if (idx1) {
for (; i--;) {
CUMO_GET_DATA_STRIDE(p2,s2,m_from_sint,x);
y = int16_t(x);
CUMO_STORE_BIT(a1, p1+*idx1, y); idx1++;
}
} else {
for (; i--;) {
CUMO_GET_DATA_STRIDE(p2,s2,m_from_sint,x);
y = int16_t(x);
CUMO_STORE_BIT(a1, p1, y); p1+=s1;
}
}
}
}
static VALUE
m_from_sint(VALUE self, VALUE obj)
{
cumo_ndfunc_arg_in_t ain[2] = {{CUMO_OVERWRITE,0},{Qnil,0}};
cumo_ndfunc_t ndf = {bit_store_int16, CUMO_FULL_LOOP, 2,0, ain,0};
cumo_na_ndloop(&ndf, 2, self, obj);
return self;
}
iter_bit_store_int16
#line 1 "narray/gen/tmpl_bit/store_from.c"
static void
(cumo_na_loop_t *const lp)
{
ssize_t i, s1, s2;
size_t p1;
char *p2;
size_t *idx1, *idx2;
iter_bit_store_int8 x;
CUMO_BIT_DIGIT *a1;
CUMO_BIT_DIGIT y;
// TODO(sonots): CUDA kernelize
CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("store_int8_t", "");
cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
CUMO_INIT_COUNTER(lp, i);
CUMO_INIT_PTR_BIT_IDX(lp, 0, a1, p1, s1, idx1);
CUMO_INIT_PTR_IDX(lp, 1, p2, s2, idx2);
if (idx2) {
if (idx1) {
for (; i--;) {
CUMO_GET_DATA_INDEX(p2,idx2,int8Int8,x);
y = int8_t(x);
CUMO_STORE_BIT(a1, p1+*idx1, y); idx1++;
}
} else {
for (; i--;) {
CUMO_GET_DATA_INDEX(p2,idx2,m_from_sint,x);
y = int8_t(x);
CUMO_STORE_BIT(a1, p1, y); p1+=s1;
}
}
} else {
if (idx1) {
for (; i--;) {
CUMO_GET_DATA_STRIDE(p2,s2,m_from_sint,x);
y = int8_t(x);
CUMO_STORE_BIT(a1, p1+*idx1, y); idx1++;
}
} else {
for (; i--;) {
CUMO_GET_DATA_STRIDE(p2,s2,m_from_sint,x);
y = int8_t(x);
CUMO_STORE_BIT(a1, p1, y); p1+=s1;
}
}
}
}
static VALUE
m_from_sint(VALUE self, VALUE obj)
{
cumo_ndfunc_arg_in_t ain[2] = {{CUMO_OVERWRITE,0},{Qnil,0}};
cumo_ndfunc_t ndf = {bit_store_int8, CUMO_FULL_LOOP, 2,0, ain,0};
cumo_na_ndloop(&ndf, 2, self, obj);
return self;
}
iter_bit_store_int8
#line 1 "narray/gen/tmpl_bit/store_from.c"
static void
(cumo_na_loop_t *const lp)
{
ssize_t i, s1, s2;
size_t p1;
char *p2;
size_t *idx1, *idx2;
iter_bit_store_uint64 x;
CUMO_BIT_DIGIT *a1;
CUMO_BIT_DIGIT y;
// TODO(sonots): CUDA kernelize
CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("store_u_int64_t", "");
cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
CUMO_INIT_COUNTER(lp, i);
CUMO_INIT_PTR_BIT_IDX(lp, 0, a1, p1, s1, idx1);
CUMO_INIT_PTR_IDX(lp, 1, p2, s2, idx2);
if (idx2) {
if (idx1) {
for (; i--;) {
CUMO_GET_DATA_INDEX(p2,idx2,uint64UInt64,x);
y = u_int64_t(x);
CUMO_STORE_BIT(a1, p1+*idx1, y); idx1++;
}
} else {
for (; i--;) {
CUMO_GET_DATA_INDEX(p2,idx2,m_from_uint64,x);
y = u_int64_t(x);
CUMO_STORE_BIT(a1, p1, y); p1+=s1;
}
}
} else {
if (idx1) {
for (; i--;) {
CUMO_GET_DATA_STRIDE(p2,s2,m_from_uint64,x);
y = u_int64_t(x);
CUMO_STORE_BIT(a1, p1+*idx1, y); idx1++;
}
} else {
for (; i--;) {
CUMO_GET_DATA_STRIDE(p2,s2,m_from_uint64,x);
y = u_int64_t(x);
CUMO_STORE_BIT(a1, p1, y); p1+=s1;
}
}
}
}
static VALUE
m_from_uint64(VALUE self, VALUE obj)
{
cumo_ndfunc_arg_in_t ain[2] = {{CUMO_OVERWRITE,0},{Qnil,0}};
cumo_ndfunc_t ndf = {bit_store_uint64, CUMO_FULL_LOOP, 2,0, ain,0};
cumo_na_ndloop(&ndf, 2, self, obj);
return self;
}
iter_bit_store_uint64
#line 1 "narray/gen/tmpl_bit/store_from.c"
static void
(cumo_na_loop_t *const lp)
{
ssize_t i, s1, s2;
size_t p1;
char *p2;
size_t *idx1, *idx2;
iter_bit_store_uint32 x;
CUMO_BIT_DIGIT *a1;
CUMO_BIT_DIGIT y;
// TODO(sonots): CUDA kernelize
CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("store_u_int32_t", "");
cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
CUMO_INIT_COUNTER(lp, i);
CUMO_INIT_PTR_BIT_IDX(lp, 0, a1, p1, s1, idx1);
CUMO_INIT_PTR_IDX(lp, 1, p2, s2, idx2);
if (idx2) {
if (idx1) {
for (; i--;) {
CUMO_GET_DATA_INDEX(p2,idx2,uint32UInt32,x);
y = u_int32_t(x);
CUMO_STORE_BIT(a1, p1+*idx1, y); idx1++;
}
} else {
for (; i--;) {
CUMO_GET_DATA_INDEX(p2,idx2,m_from_uint32,x);
y = u_int32_t(x);
CUMO_STORE_BIT(a1, p1, y); p1+=s1;
}
}
} else {
if (idx1) {
for (; i--;) {
CUMO_GET_DATA_STRIDE(p2,s2,m_from_uint32,x);
y = u_int32_t(x);
CUMO_STORE_BIT(a1, p1+*idx1, y); idx1++;
}
} else {
for (; i--;) {
CUMO_GET_DATA_STRIDE(p2,s2,m_from_uint32,x);
y = u_int32_t(x);
CUMO_STORE_BIT(a1, p1, y); p1+=s1;
}
}
}
}
static VALUE
m_from_uint32(VALUE self, VALUE obj)
{
cumo_ndfunc_arg_in_t ain[2] = {{CUMO_OVERWRITE,0},{Qnil,0}};
cumo_ndfunc_t ndf = {bit_store_uint32, CUMO_FULL_LOOP, 2,0, ain,0};
cumo_na_ndloop(&ndf, 2, self, obj);
return self;
}
iter_bit_store_uint32
#line 1 "narray/gen/tmpl_bit/store_from.c"
static void
(cumo_na_loop_t *const lp)
{
ssize_t i, s1, s2;
size_t p1;
char *p2;
size_t *idx1, *idx2;
iter_bit_store_uint16 x;
CUMO_BIT_DIGIT *a1;
CUMO_BIT_DIGIT y;
// TODO(sonots): CUDA kernelize
CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("store_u_int16_t", "");
cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
CUMO_INIT_COUNTER(lp, i);
CUMO_INIT_PTR_BIT_IDX(lp, 0, a1, p1, s1, idx1);
CUMO_INIT_PTR_IDX(lp, 1, p2, s2, idx2);
if (idx2) {
if (idx1) {
for (; i--;) {
CUMO_GET_DATA_INDEX(p2,idx2,uint16UInt16,x);
y = u_int16_t(x);
CUMO_STORE_BIT(a1, p1+*idx1, y); idx1++;
}
} else {
for (; i--;) {
CUMO_GET_DATA_INDEX(p2,idx2,m_from_sint,x);
y = u_int16_t(x);
CUMO_STORE_BIT(a1, p1, y); p1+=s1;
}
}
} else {
if (idx1) {
for (; i--;) {
CUMO_GET_DATA_STRIDE(p2,s2,m_from_sint,x);
y = u_int16_t(x);
CUMO_STORE_BIT(a1, p1+*idx1, y); idx1++;
}
} else {
for (; i--;) {
CUMO_GET_DATA_STRIDE(p2,s2,m_from_sint,x);
y = u_int16_t(x);
CUMO_STORE_BIT(a1, p1, y); p1+=s1;
}
}
}
}
static VALUE
m_from_sint(VALUE self, VALUE obj)
{
cumo_ndfunc_arg_in_t ain[2] = {{CUMO_OVERWRITE,0},{Qnil,0}};
cumo_ndfunc_t ndf = {bit_store_uint16, CUMO_FULL_LOOP, 2,0, ain,0};
cumo_na_ndloop(&ndf, 2, self, obj);
return self;
}
iter_bit_store_uint16
#line 1 "narray/gen/tmpl_bit/store_from.c"
static void
(cumo_na_loop_t *const lp)
{
ssize_t i, s1, s2;
size_t p1;
char *p2;
size_t *idx1, *idx2;
iter_bit_store_uint8 x;
CUMO_BIT_DIGIT *a1;
CUMO_BIT_DIGIT y;
// TODO(sonots): CUDA kernelize
CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("store_u_int8_t", "");
cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
CUMO_INIT_COUNTER(lp, i);
CUMO_INIT_PTR_BIT_IDX(lp, 0, a1, p1, s1, idx1);
CUMO_INIT_PTR_IDX(lp, 1, p2, s2, idx2);
if (idx2) {
if (idx1) {
for (; i--;) {
CUMO_GET_DATA_INDEX(p2,idx2,uint8UInt8,x);
y = u_int8_t(x);
CUMO_STORE_BIT(a1, p1+*idx1, y); idx1++;
}
} else {
for (; i--;) {
CUMO_GET_DATA_INDEX(p2,idx2,m_from_sint,x);
y = u_int8_t(x);
CUMO_STORE_BIT(a1, p1, y); p1+=s1;
}
}
} else {
if (idx1) {
for (; i--;) {
CUMO_GET_DATA_STRIDE(p2,s2,m_from_sint,x);
y = u_int8_t(x);
CUMO_STORE_BIT(a1, p1+*idx1, y); idx1++;
}
} else {
for (; i--;) {
CUMO_GET_DATA_STRIDE(p2,s2,m_from_sint,x);
y = u_int8_t(x);
CUMO_STORE_BIT(a1, p1, y); p1+=s1;
}
}
}
}
static VALUE
m_from_sint(VALUE self, VALUE obj)
{
cumo_ndfunc_arg_in_t ain[2] = {{CUMO_OVERWRITE,0},{Qnil,0}};
cumo_ndfunc_t ndf = {bit_store_uint8, CUMO_FULL_LOOP, 2,0, ain,0};
cumo_na_ndloop(&ndf, 2, self, obj);
return self;
}
iter_bit_store_uint8
#line 1 "narray/gen/tmpl_bit/store_from.c"
static void
(cumo_na_loop_t *const lp)
{
ssize_t i, s1, s2;
size_t p1;
char *p2;
size_t *idx1, *idx2;
iter_bit_store_robject x;
CUMO_BIT_DIGIT *a1;
CUMO_BIT_DIGIT y;
// TODO(sonots): CUDA kernelize
CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("store_VALUE", "");
cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
CUMO_INIT_COUNTER(lp, i);
CUMO_INIT_PTR_BIT_IDX(lp, 0, a1, p1, s1, idx1);
CUMO_INIT_PTR_IDX(lp, 1, p2, s2, idx2);
if (idx2) {
if (idx1) {
for (; i--;) {
CUMO_GET_DATA_INDEX(p2,idx2,robjectRObject,x);
y = VALUE(x);
CUMO_STORE_BIT(a1, p1+*idx1, y); idx1++;
}
} else {
for (; i--;) {
CUMO_GET_DATA_INDEX(p2,idx2,m_num_to_data,x);
y = VALUE(x);
CUMO_STORE_BIT(a1, p1, y); p1+=s1;
}
}
} else {
if (idx1) {
for (; i--;) {
CUMO_GET_DATA_STRIDE(p2,s2,m_num_to_data,x);
y = VALUE(x);
CUMO_STORE_BIT(a1, p1+*idx1, y); idx1++;
}
} else {
for (; i--;) {
CUMO_GET_DATA_STRIDE(p2,s2,m_num_to_data,x);
y = VALUE(x);
CUMO_STORE_BIT(a1, p1, y); p1+=s1;
}
}
}
}
static VALUE
m_num_to_data(VALUE self, VALUE obj)
{
cumo_ndfunc_arg_in_t ain[2] = {{CUMO_OVERWRITE,0},{Qnil,0}};
cumo_ndfunc_t ndf = {bit_store_robject, CUMO_FULL_LOOP, 2,0, ain,0};
cumo_na_ndloop(&ndf, 2, self, obj);
return self;
}
iter_bit_store_robject
#line 1 "narray/gen/tmpl_bit/store_array.c"
static void
(cumo_na_loop_t *const lp)
{
size_t i, n;
size_t i1, n1;
VALUE v1, *ptr;
CUMO_BIT_DIGIT *a1;
size_t p1;
size_t s1, *idx1;
VALUE x;
double y;
CUMO_BIT_DIGIT z;
size_t len, c;
double beg, step;
// TODO(sonots): CUDA kernelize
CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("iter_bit_store_array", "");
cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
CUMO_INIT_COUNTER(lp, n);
CUMO_INIT_PTR_BIT_IDX(lp, 0, a1, p1, s1, idx1);
v1 = lp->args[1].value;
i = 0;
if (lp->args[1].ptr) {
if (v1 == Qtrue) {
iter_arraybit_store_(lp);
i = lp->args[1].shape[0];
if (idx1) {
idx1 += i;
} else {
p1 += s1 * i;
}
}
goto loop_end;
}
ptr = &v1;
switch(TYPE(v1)) {
case T_ARRAY:
n1 = RARRAY_LEN(v1);
ptr = RARRAY_PTR(v1);
break;
case T_NIL:
n1 = 0;
break;
default:
n1 = 1;
}
if (idx1) {
for (i=i1=0; i1<n1 && i<n; i++,i1++) {
x = ptr[i1];
if (rb_obj_is_kind_of(x, rb_cRange) || rb_obj_is_kind_of(x, cumo_na_cStep)) {
cumo_na_step_sequence(x,&len,&beg,&step);
for (c=0; c<len && i<n; c++,i++) {
y = beg + step * c;
z = m_from_double(y);
CUMO_STORE_BIT(a1, p1+*idx1, z); idx1++;
}
}
if (TYPE(x) != T_ARRAY) {
if (x == Qnil) x = INT2FIX(0);
z = m_num_to_data(x);
CUMO_STORE_BIT(a1, p1+*idx1, z); idx1++;
}
}
} else {
for (i=i1=0; i1<n1 && i<n; i++,i1++) {
x = ptr[i1];
if (rb_obj_is_kind_of(x, rb_cRange) || rb_obj_is_kind_of(x, cumo_na_cStep)) {
cumo_na_step_sequence(x,&len,&beg,&step);
for (c=0; c<len && i<n; c++,i++) {
y = beg + step * c;
z = m_from_double(y);
CUMO_STORE_BIT(a1, p1, z); p1+=s1;
}
}
if (TYPE(x) != T_ARRAY) {
z = m_num_to_data(x);
CUMO_STORE_BIT(a1, p1, z); p1+=s1;
}
}
}
loop_end:
z = m_zero;
if (idx1) {
for (; i<n; i++) {
CUMO_STORE_BIT(a1, p1+*idx1, z); idx1++;
}
} else {
for (; i<n; i++) {
CUMO_STORE_BIT(a1, p1, z); p1+=s1;
}
}
}
static VALUE
bitbit(VALUE self, VALUE rary)
{
cumo_ndfunc_arg_in_t ain[2] = {{CUMO_OVERWRITE,0}, {rb_cArray,0}};
cumo_ndfunc_t ndf = {bit_store_array, CUMO_FULL_LOOP, 2, 0, ain, 0};
cumo_na_ndloop_store_rarray(&ndf, self, rary);
return self;
}
iter_bit_store_array from other.
@overload store(other)
@param [Object] other
@return [Cumo::Bit] self
*/
static VALUE
Bit(VALUE self, VALUE obj)
{
VALUE r, klass;
klass = CLASS_OF(obj);
if (bit_store) {
klass==cumo_cBit(self,obj);
return self;
}
if (bit_store_bit) {
IS_INTEGER_CLASS(klass) || klass==rb_cFloat || klass==rb_cComplex(self,obj);
return self;
}
if (bit_store_numeric) {
klass==cumo_cDFloat(self,obj);
return self;
}
if (bit_store_dfloat) {
klass==cumo_cSFloat(self,obj);
return self;
}
if (bit_store_sfloat) {
klass==cumo_cInt64(self,obj);
return self;
}
if (bit_store_int64) {
klass==cumo_cInt32(self,obj);
return self;
}
if (bit_store_int32) {
klass==cumo_cInt16(self,obj);
return self;
}
if (bit_store_int16) {
klass==cumo_cInt8(self,obj);
return self;
}
if (bit_store_int8) {
klass==cumo_cUInt64(self,obj);
return self;
}
if (bit_store_uint64) {
klass==cumo_cUInt32(self,obj);
return self;
}
if (bit_store_uint32) {
klass==cumo_cUInt16(self,obj);
return self;
}
if (bit_store_uint16) {
klass==cumo_cUInt8(self,obj);
return self;
}
if (bit_store_uint8) {
klass==cumo_cRObject(self,obj);
return self;
}
if (bit_store_robject) {
klass==rb_cArray(self,obj);
return self;
}
if (CumoIsNArray(obj)) {
r = rb_funcall(obj, rb_intern("coerce_cast"), 1, cT);
if (CLASS_OF(r)==cT) {
bit_store_array(self,r);
return self;
}
}
rb_raise(cumo_na_eCastError, "unknown conversion from %s to %s",
rb_class2name(CLASS_OF(obj)),
rb_class2name(CLASS_OF(self)));
return self;
}
bit_store
#line 1 "narray/gen/tmpl/extract_data.c"
/*
Convert a data value of obj (with a single element) to dtype.
*/
/*
static dtype
(VALUE obj)
{
cumo_narray_t *na;
dtype x;
char *ptr;
size_t pos;
VALUE r, klass;
CUMO_SHOW_SYNCHRONIZE_WARNING_ONCE("bit_extract_data", "");
cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
if (CumoIsNArray(obj)) {
CumoGetNArray(obj,na);
if (na->size != 1) {
rb_raise(cumo_na_eShapeError,"narray size should be 1");
}
klass = CLASS_OF(obj);
ptr = cumo_na_get_pointer_for_read(obj);
pos = cumo_na_get_offset(obj);
if (extract_databit) {
klass==cumo_cBit;
return x;
}
if ({BIT_DIGIT b; CUMO_LOAD_BIT(ptr,pos,b); x = m_from_sint(b);}) {
klass==cumo_cDFloat;
return x;
}
if (x = m_from_real(*(double*)(ptr+pos))) {
klass==cumo_cSFloat;
return x;
}
if (x = m_from_real(*(float*)(ptr+pos))) {
klass==cumo_cInt64;
return x;
}
if (x = m_from_int64(*(int64_t*)(ptr+pos))) {
klass==cumo_cInt32;
return x;
}
if (x = m_from_int32(*(int32_t*)(ptr+pos))) {
klass==cumo_cInt16;
return x;
}
if (x = m_from_sint(*(int16_t*)(ptr+pos))) {
klass==cumo_cInt8;
return x;
}
if (x = m_from_sint(*(int8_t*)(ptr+pos))) {
klass==cumo_cUInt64;
return x;
}
if (x = m_from_uint64(*(u_int64_t*)(ptr+pos))) {
klass==cumo_cUInt32;
return x;
}
if (x = m_from_uint32(*(u_int32_t*)(ptr+pos))) {
klass==cumo_cUInt16;
return x;
}
if (x = m_from_sint(*(u_int16_t*)(ptr+pos))) {
klass==cumo_cUInt8;
return x;
}
if (x = m_from_sint(*(u_int8_t*)(ptr+pos))) {
klass==cumo_cRObject;
return x;
}
// coerce
r = rb_funcall(obj, rb_intern("coerce_cast"), 1, cT);
if (CLASS_OF(r)==cT) {
return x = m_num_to_data(*(VALUE*)(ptr+pos))(r);
}
rb_raise(cumo_na_eCastError, "unknown conversion from %s to %s",
rb_class2name(CLASS_OF(obj)),
rb_class2name(cT));
}
if (TYPE(obj)==T_ARRAY) {
if (RARRAY_LEN(obj) != 1) {
rb_raise(cumo_na_eShapeError,"array size should be 1");
}
return m_num_to_data(RARRAY_AREF(obj,0));
}
return m_num_to_data(obj);
}
*/
bit_extract_data
#line 1 "narray/gen/tmpl/cast_array.c"
static VALUE
(VALUE rary)
{
VALUE nary;
cumo_narray_t *na;
nary = cumo_na_s_new_like(cT, rary);
CumoGetNArray(nary,na);
if (na->size > 0) {
bit_cast_array(nary,rary);
}
return nary;
}
bit_store_array
#line 1 "narray/gen/tmpl/cast.c"
/*
Cast object to Cumo::.
@overload [](elements)
@overload Bit(array)
@param [Numeric,Array] elements
@param [Array] array
@return [Cumo::cast]
*/
static VALUE
Bit(VALUE type, VALUE obj)
{
VALUE v;
cumo_narray_t *na;
dtype x;
if (CLASS_OF(obj)==cT) {
return obj;
}
if (RTEST(rb_obj_is_kind_of(obj,rb_cNumeric))) {
x = m_num_to_data(obj);
return bit_s_cast_new_dim0(x);
}
if (RTEST(rb_obj_is_kind_of(obj,rb_cArray))) {
return bit(obj);
}
if (CumoIsNArray(obj)) {
CumoGetNArray(obj,na);
v = cumo_na_new(cT, CUMO_NA_NDIM(na), CUMO_NA_SHAPE(na));
if (CUMO_NA_SIZE(na) > 0) {
bit_cast_array(v,obj);
}
return v;
}
rb_raise(cumo_na_eCastError,"cannot cast to %s",rb_class2name(type));
return Qnil;
}
bit_store
#line 1 "narray/gen/tmpl_bit/aref.c"
static VALUE
_cpu(int argc, VALUE *argv, VALUE self);
/*
Array element referenece or slice view.
@overload [](dim0,...,dimL)
@param [Numeric,Range,etc] dim0,...,dimL Multi-dimensional Index.
@return [Numeric,NArray::bit_aref] Element object or NArray view.
--- Returns the element at +dim0+, +dim1+, ... are Numeric indices
for each dimension, or returns a NArray View as a sliced subarray if
+dim0+, +dim1+, ... includes other than Numeric index, e.g., Range
or Array or true.
@example
a = Cumo::DFloat.new(4,5).seq
=> Cumo::DFloat#shape=[4,5]
[[0, 1, 2, 3, 4],
[5, 6, 7, 8, 9],
[10, 11, 12, 13, 14],
[15, 16, 17, 18, 19]]
a[1,1]
=> 6.0
a[1..3,1]
=> Cumo::DFloat#shape=[3]
[6, 11, 16]
a[1,[1,3,4]]
=> Cumo::DFloat#shape=[3]
[6, 8, 9]
a[true,2].fill(99)
a
=> Cumo::DFloat#shape=[4,5]
[[0, 1, 99, 3, 4],
[5, 6, 99, 8, 9],
[10, 11, 99, 13, 14],
[15, 16, 99, 18, 19]]
*/
static VALUE
Bit(int argc, VALUE *argv, VALUE self)
{
if (cumo_compatible_mode_enabled_p()) {
return bit_aref_cpu(argc, argv, self);
} else {
int result_nd;
size_t pos;
result_nd = cumo_na_get_result_dimension(self, argc, argv, 1, &pos);
return cumo_na_aref_main(argc, argv, self, 0, result_nd, pos);
}
}
bit_aref
#line 1 "narray/gen/tmpl_bit/aref_cpu.c"
/*
Array element referenece or slice view.
@overload [](dim0,...,dimL)
@param [Numeric,Range,etc] dim0,...,dimL Multi-dimensional Index.
@return [Numeric,NArray::] Element object or NArray view.
--- Returns the element at +dim0+, +dim1+, ... are Numeric indices
for each dimension, or returns a NArray View as a sliced subarray if
+dim0+, +dim1+, ... includes other than Numeric index, e.g., Range
or Array or true.
@example
a = Cumo::DFloat.new(4,5).seq
=> Cumo::DFloat#shape=[4,5]
[[0, 1, 2, 3, 4],
[5, 6, 7, 8, 9],
[10, 11, 12, 13, 14],
[15, 16, 17, 18, 19]]
a[1,1]
=> 6.0
a[1..3,1]
=> Cumo::DFloat#shape=[3]
[6, 11, 16]
a[1,[1,3,4]]
=> Cumo::DFloat#shape=[3]
[6, 8, 9]
a[true,2].fill(99)
a
=> Cumo::DFloat#shape=[4,5]
[[0, 1, 99, 3, 4],
[5, 6, 99, 8, 9],
[10, 11, 99, 13, 14],
[15, 16, 99, 18, 19]]
*/
static VALUE
Bit(int argc, VALUE *argv, VALUE self)
{
int nd;
size_t pos;
char *ptr;
dtype x;
nd = cumo_na_get_result_dimension(self, argc, argv, 1, &pos);
if (nd) {
return cumo_na_aref_main(argc, argv, self, 0, nd, pos);
} else {
ptr = cumo_na_get_pointer_for_read(self);
CUMO_SHOW_SYNCHRONIZE_WARNING_ONCE("bit_aref_cpu", "");
cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
CUMO_LOAD_BIT(ptr,pos,x);
return m_data_to_num(x);
}
}
aref_cpubit
#line 1 "narray/gen/tmpl_bit/aset.c"
/*
Array element(s) set.
@overload []=(dim0,..,dimL,val)
@param [Numeric,Range,etc] dim0,..,dimL Multi-dimensional Index.
@param [Numeric,Cumo::NArray,etc] val Value(s) to be set to self.
@return [Numeric] returns val (last argument).
--- Replace element(s) at +dim0+, +dim1+, ... (index/range/array/true
for each dimention). Broadcasting mechanism is applied.
@example
a = Cumo::DFloat.new(3,4).seq
=> Cumo::DFloat#shape=[3,4]
[[0, 1, 2, 3],
[4, 5, 6, 7],
[8, 9, 10, 11]]
a[1,2]=99
a
=> Cumo::DFloat#shape=[3,4]
[[0, 1, 2, 3],
[4, 5, 99, 7],
[8, 9, 10, 11]]
a[1,[0,2]] = [101,102]
a
=> Cumo::DFloat#shape=[3,4]
[[0, 1, 2, 3],
[101, 5, 102, 7],
[8, 9, 10, 11]]
a[1,true]=99
a
=> Cumo::DFloat#shape=[3,4]
[[0, 1, 2, 3],
[99, 99, 99, 99],
[8, 9, 10, 11]]
*/
static VALUE
(int argc, VALUE *argv, VALUE self)
{
int nd;
size_t pos;
VALUE a;
argc--;
if (argc==0) {
bit_aset(self, argv[argc]);
} else {
nd = cumo_na_get_result_dimension(self, argc, argv, 1, &pos);
a = cumo_na_aref_main(argc, argv, self, 0, nd, pos);
bit_store(a, argv[argc]);
}
return argv[argc];
}
bit_store
#line 1 "narray/gen/tmpl/coerce_cast.c"
/*
return NArray with cast to the type of self.
@overload coerce_cast(type)
@return [nil]
*/
static VALUE
(VALUE self, VALUE type)
{
return Qnil;
}
bit_coerce_cast
#line 1 "narray/gen/tmpl_bit/to_a.c"
static void
(cumo_na_loop_t *const lp)
{
size_t i;
CUMO_BIT_DIGIT *a1;
size_t p1;
ssize_t s1;
size_t *idx1;
CUMO_BIT_DIGIT x=0;
VALUE a, y;
CUMO_SHOW_SYNCHRONIZE_WARNING_ONCE("iter_bit_to_a", "");
cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
CUMO_INIT_COUNTER(lp, i);
CUMO_INIT_PTR_BIT_IDX(lp, 0, a1, p1, s1, idx1);
a = rb_ary_new2(i);
rb_ary_push(lp->args[1].value, a);
if (idx1) {
for (; i--;) {
CUMO_LOAD_BIT(a1,p1+*idx1,x); idx1++;
y = m_data_to_num(x);
rb_ary_push(a,y);
}
} else {
for (; i--;) {
CUMO_LOAD_BIT(a1,p1,x); p1+=s1;
y = m_data_to_num(x);
rb_ary_push(a,y);
}
}
}
/*
Convert self to Array.
@overload to_abit
@return [Array]
*/
static VALUE
to_a(VALUE self)
{
cumo_ndfunc_arg_in_t ain[3] = {{Qnil,0},{cumo_sym_loop_opt},{cumo_sym_option}};
cumo_ndfunc_arg_out_t aout[1] = {{rb_cArray,0}}; // dummy?
cumo_ndfunc_t ndf = {bit_to_a, CUMO_FULL_LOOP_NIP, 3,1, ain,aout};
return cumo_na_ndloop_cast_narray_to_rarray(&ndf, self, Qnil);
}
iter_bit_to_a
#line 1 "narray/gen/tmpl_bit/fill.c"
static void
(cumo_na_loop_t *const lp)
{
size_t n;
size_t p3;
ssize_t s3;
size_t *idx3;
int len;
CUMO_BIT_DIGIT *a3;
CUMO_BIT_DIGIT y;
VALUE x = lp->option;
// TODO(sonots): CUDA kernelize
CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("iter_bit_fill", "");
cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
if (x==INT2FIX(0) || x==Qfalse) {
y = 0;
} else
if (x==INT2FIX(1) || x==Qtrue) {
y = ~(CUMO_BIT_DIGIT)0;
} else {
rb_raise(rb_eArgError, "invalid value for Bit");
}
CUMO_INIT_COUNTER(lp, n);
CUMO_INIT_PTR_BIT_IDX(lp, 0, a3, p3, s3, idx3);
if (idx3) {
y = y&1;
for (; n--;) {
CUMO_STORE_BIT(a3, p3+*idx3, y); idx3++;
}
} else if (s3!=1) {
y = y&1;
for (; n--;) {
CUMO_STORE_BIT(a3, p3, y); p3+=s3;
}
} else {
if (p3>0 || n<CUMO_NB) {
len = CUMO_NB - p3;
if ((int)n<len) len=n;
*a3 = (y & (CUMO_SLB(len)<<p3)) | (*a3 & ~(CUMO_SLB(len)<<p3));
a3++;
n -= len;
}
for (; n>=CUMO_NB; n-=CUMO_NB) {
*(a3++) = y;
}
if (n>0) {
*a3 = (y & CUMO_SLB(n)) | (*a3 & CUMO_BALL<<n);
}
}
}
/*
Fill elements with other.
@overload fillbit other
@param [Numeric] other
@return [Cumo::fill] self.
*/
static VALUE
Bit(VALUE self, VALUE val)
{
cumo_ndfunc_arg_in_t ain[2] = {{CUMO_OVERWRITE,0},{cumo_sym_option}};
cumo_ndfunc_t ndf = {bit_fill, CUMO_FULL_LOOP, 2,0, ain,0};
cumo_na_ndloop(&ndf, 2, self, val);
return self;
}
iter_bit_fill
#line 1 "narray/gen/tmpl_bit/format.c"
static VALUE
format_(VALUE fmt, dtype x)
{
if (NIL_P(fmt)) {
char s[4];
int n;
n = m_sprintf(s,x);
return rb_str_new(s,n);
}
return rb_funcall(fmt, '%', 1, m_data_to_num(x));
}
static void
bit(cumo_na_loop_t *const lp)
{
size_t i;
CUMO_BIT_DIGIT *a1, x=0;
size_t p1;
char *p2;
ssize_t s1, s2;
size_t *idx1;
VALUE y;
VALUE fmt = lp->option;
CUMO_SHOW_SYNCHRONIZE_WARNING_ONCE("iter_bit_format", "");
cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
CUMO_INIT_COUNTER(lp, i);
CUMO_INIT_PTR_BIT_IDX(lp, 0, a1, p1, s1, idx1);
CUMO_INIT_PTR(lp, 1, p2, s2);
if (idx1) {
for (; i--;) {
CUMO_LOAD_BIT(a1, p1+*idx1, x); idx1++;
y = format_formatbit(fmt, x);
CUMO_SET_DATA_STRIDE(p2, s2, VALUE, y);
}
} else {
for (; i--;) {
CUMO_LOAD_BIT(a1, p1, x); p1+=s1;
y = format_bit(fmt, x);
CUMO_SET_DATA_STRIDE(p2, s2, VALUE, y);
}
}
}
/*
Format elements into strings.
@overload bit format
@param [String] format
@return [Cumo::RObject] array of formated strings.
*/
static VALUE
format(int argc, VALUE *argv, VALUE self)
{
VALUE fmt=Qnil;
cumo_ndfunc_arg_in_t ain[2] = {{Qnil,0},{cumo_sym_option}};
cumo_ndfunc_arg_out_t aout[1] = {{cumo_cRObject,0}};
cumo_ndfunc_t ndf = {bit_format, CUMO_FULL_LOOP_NIP, 2,1, ain,aout};
rb_scan_args(argc, argv, "01", &fmt);
return cumo_na_ndloop(&ndf, 2, self, fmt);
}
iter_bit_format
#line 1 "narray/gen/tmpl_bit/format_to_a.c"
static void
(cumo_na_loop_t *const lp)
{
size_t i;
CUMO_BIT_DIGIT *a1, x=0;
size_t p1;
ssize_t s1;
size_t *idx1;
VALUE y;
VALUE fmt = lp->option;
volatile VALUE a;
CUMO_INIT_COUNTER(lp, i);
CUMO_INIT_PTR_BIT_IDX(lp, 0, a1, p1, s1, idx1);
a = rb_ary_new2(i);
rb_ary_push(lp->args[1].value, a);
if (idx1) {
for (; i--;) {
CUMO_LOAD_BIT(a1, p1+*idx1, x); idx1++;
y = format_bit(fmt, x);
rb_ary_push(a,y);
}
} else {
for (; i--;) {
CUMO_LOAD_BIT(a1, p1, x); p1+=s1;
y = format_bit(fmt, x);
rb_ary_push(a,y);
}
}
}
/*
Format elements into strings.
@overload iter_bit_format_to_a format
@param [String] format
@return [Array] array of formated strings.
*/
static VALUE
format_to_a(int argc, VALUE *argv, VALUE self)
{
VALUE fmt=Qnil;
cumo_ndfunc_arg_in_t ain[3] = {{Qnil,0},{cumo_sym_loop_opt},{cumo_sym_option}};
cumo_ndfunc_arg_out_t aout[1] = {{rb_cArray,0}}; // dummy?
cumo_ndfunc_t ndf = {bit_format_to_a, CUMO_FULL_LOOP_NIP, 3,1, ain,aout};
CUMO_SHOW_SYNCHRONIZE_WARNING_ONCE("iter_bit_format_to_a", "");
cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
rb_scan_args(argc, argv, "01", &fmt);
return cumo_na_ndloop_cast_narray_to_rarray(&ndf, self, fmt);
}
format_to_abit
#line 1 "narray/gen/tmpl_bit/inspect.c"
static VALUE
(char *ptr, size_t pos, VALUE fmt)
{
dtype x;
CUMO_LOAD_BIT(ptr,pos,x);
return format_iter_bit_inspect(fmt, x);
}
/*
Returns a string containing a human-readable representation of NArray.
@overload inspect
@return [String]
*/
static VALUE
bit(VALUE ary)
{
CUMO_SHOW_SYNCHRONIZE_WARNING_ONCE("bit_inspect", "");
cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
return cumo_na_ndloop_inspect(ary, inspectbit, Qnil);
}
iter_bit_inspect
#line 1 "narray/gen/tmpl_bit/each.c"
static void
(cumo_na_loop_t *const lp)
{
size_t i;
CUMO_BIT_DIGIT *a1, x=0;
size_t p1;
ssize_t s1;
size_t *idx1;
VALUE y;
CUMO_INIT_COUNTER(lp, i);
CUMO_INIT_PTR_BIT_IDX(lp, 0, a1, p1, s1, idx1);
CUMO_SHOW_SYNCHRONIZE_WARNING_ONCE("iter_bit_each", "");
cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
if (idx1) {
for (; i--;) {
CUMO_LOAD_BIT(a1, p1+*idx1, x); idx1++;
y = m_data_to_num(x);
rb_yield(y);
}
} else {
for (; i--;) {
CUMO_LOAD_BIT(a1, p1, x); p1+=s1;
y = m_data_to_num(x);
rb_yield(y);
}
}
}
/*
Calls the given block once for each element in self,
passing that element as a parameter.
@overload eachbit
@return [Cumo::NArray] self
For a block {|x| ... }
@yield [x] x is element of NArray.
*/
static VALUE
each(VALUE self)
{
cumo_ndfunc_arg_in_t ain[1] = {{Qnil,0}};
cumo_ndfunc_t ndf = {bit_each, CUMO_FULL_LOOP_NIP, 1,0, ain,0};
cumo_na_ndloop(&ndf, 1, self);
return self;
}
iter_bit_each
#line 1 "narray/gen/tmpl_bit/each_with_index.c"
static inline void
yield_each_with_index(dtype x, size_t *c, VALUE *a, int nd, int md)
{
int j;
a[0] = m_data_to_num(x);
for (j=0; j<=nd; j++) {
a[j+1] = SIZET2NUM(c[j]);
}
rb_yield(rb_ary_new4(md,a));
}
static void
(cumo_na_loop_t *const lp)
{
size_t i;
CUMO_BIT_DIGIT *a1, x=0;
size_t p1;
ssize_t s1;
size_t *idx1;
VALUE *a;
size_t *c;
int nd, md;
c = (size_t*)(lp->opt_ptr);
nd = lp->ndim - 1;
md = lp->ndim + 1;
a = ALLOCA_N(VALUE,md);
CUMO_INIT_COUNTER(lp, i);
CUMO_INIT_PTR_BIT_IDX(lp, 0, a1, p1, s1, idx1);
c[nd] = 0;
CUMO_SHOW_SYNCHRONIZE_WARNING_ONCE("iter_bit_each_with_index", "");
cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
if (idx1) {
for (; i--;) {
CUMO_LOAD_BIT(a1, p1+*idx1, x); idx1++;
yield_each_with_index(x,c,a,nd,md);
c[nd]++;
}
} else {
for (; i--;) {
CUMO_LOAD_BIT(a1, p1, x); p1+=s1;
yield_each_with_index(x,c,a,nd,md);
c[nd]++;
}
}
}
/*
Invokes the given block once for each element of self,
passing that element and indices along each axis as parameters.
@overload each_with_indexbit
@return [Cumo::NArray] self
For a block {|x,i,j,...| ... }
@yield [x,i,j,...] x is an element, i,j,... are multidimensional indices.
*/
static VALUE
each_with_index(VALUE self)
{
cumo_ndfunc_arg_in_t ain[1] = {{Qnil,0}};
cumo_ndfunc_t ndf = {bit_each_with_index, CUMO_FULL_LOOP_NIP, 1,0, ain,0};
cumo_na_ndloop_with_index(&ndf, 1, self);
return self;
}
iter_bit_each_with_index
#line 1 "narray/gen/tmpl_bit/unary.c"
static void
(cumo_na_loop_t *const lp)
{
size_t n;
size_t p1, p3;
ssize_t s1, s3;
size_t *idx1, *idx3;
int o1, l1, r1, len;
CUMO_BIT_DIGIT *a1, *a3;
CUMO_BIT_DIGIT x;
CUMO_BIT_DIGIT y;
// TODO(sonots): CUDA kernelize
CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("iter_bit_copy", "");
cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
CUMO_INIT_COUNTER(lp, n);
CUMO_INIT_PTR_BIT_IDX(lp, 0, a1, p1, s1, idx1);
CUMO_INIT_PTR_BIT_IDX(lp, 1, a3, p3, s3, idx3);
if (s1!=1 || s3!=1 || idx1 || idx3) {
for (; n--;) {
CUMO_LOAD_BIT_STEP(a1, p1, s1, idx1, x);
y = m_copybit(x);
CUMO_STORE_BIT_STEP(a3, p3, s3, idx3, y);
}
} else {
o1 = p1 % CUMO_NB;
o1 -= p3;
l1 = CUMO_NB+o1;
r1 = CUMO_NB-o1;
if (p3>0 || n<CUMO_NB) {
len = CUMO_NB - p3;
if ((int)n<len) len=n;
if (o1>=0) x = *a1>>o1;
else x = *a1<<-o1;
if (p1+len>CUMO_NB) x |= *(a1+1)<<r1;
a1++;
y = m_copy(x);
*a3 = (y & (CUMO_SLB(len)<<p3)) | (*a3 & ~(CUMO_SLB(len)<<p3));
a3++;
n -= len;
}
if (o1==0) {
for (; n>=CUMO_NB; n-=CUMO_NB) {
x = *(a1++);
y = m_copy(x);
*(a3++) = y;
}
} else {
for (; n>=CUMO_NB; n-=CUMO_NB) {
x = *a1>>o1;
if (o1<0) x |= *(a1-1)>>l1;
if (o1>0) x |= *(a1+1)<<r1;
a1++;
y = m_copy(x);
*(a3++) = y;
}
}
if (n>0) {
x = *a1>>o1;
if (o1<0) x |= *(a1-1)>>l1;
y = m_copy(x);
*a3 = (y & CUMO_SLB(n)) | (*a3 & CUMO_BALL<<n);
}
}
}
/*
Unary copy.
@overload copy
@return [Cumo::copy] of self.
*/
static VALUE
Bitcopy(VALUE self)
{
cumo_ndfunc_arg_in_t ain[1] = {{cT,0}};
cumo_ndfunc_arg_out_t aout[1] = {{cT,0}};
cumo_ndfunc_t ndf = {bit_copy, CUMO_FULL_LOOP, 1,1, ain,aout};
return cumo_na_ndloop(&ndf, 1, self);
}
iter_bit_copy
#line 1 "narray/gen/tmpl_bit/unary.c"
static void
(cumo_na_loop_t *const lp)
{
size_t n;
size_t p1, p3;
ssize_t s1, s3;
size_t *idx1, *idx3;
int o1, l1, r1, len;
CUMO_BIT_DIGIT *a1, *a3;
CUMO_BIT_DIGIT x;
CUMO_BIT_DIGIT y;
// TODO(sonots): CUDA kernelize
CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("iter_bit_not", "");
cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
CUMO_INIT_COUNTER(lp, n);
CUMO_INIT_PTR_BIT_IDX(lp, 0, a1, p1, s1, idx1);
CUMO_INIT_PTR_BIT_IDX(lp, 1, a3, p3, s3, idx3);
if (s1!=1 || s3!=1 || idx1 || idx3) {
for (; n--;) {
CUMO_LOAD_BIT_STEP(a1, p1, s1, idx1, x);
y = m_notbit(x);
CUMO_STORE_BIT_STEP(a3, p3, s3, idx3, y);
}
} else {
o1 = p1 % CUMO_NB;
o1 -= p3;
l1 = CUMO_NB+o1;
r1 = CUMO_NB-o1;
if (p3>0 || n<CUMO_NB) {
len = CUMO_NB - p3;
if ((int)n<len) len=n;
if (o1>=0) x = *a1>>o1;
else x = *a1<<-o1;
if (p1+len>CUMO_NB) x |= *(a1+1)<<r1;
a1++;
y = m_not(x);
*a3 = (y & (CUMO_SLB(len)<<p3)) | (*a3 & ~(CUMO_SLB(len)<<p3));
a3++;
n -= len;
}
if (o1==0) {
for (; n>=CUMO_NB; n-=CUMO_NB) {
x = *(a1++);
y = m_not(x);
*(a3++) = y;
}
} else {
for (; n>=CUMO_NB; n-=CUMO_NB) {
x = *a1>>o1;
if (o1<0) x |= *(a1-1)>>l1;
if (o1>0) x |= *(a1+1)<<r1;
a1++;
y = m_not(x);
*(a3++) = y;
}
}
if (n>0) {
x = *a1>>o1;
if (o1<0) x |= *(a1-1)>>l1;
y = m_not(x);
*a3 = (y & CUMO_SLB(n)) | (*a3 & CUMO_BALL<<n);
}
}
}
/*
Unary not.
@overload not
@return [Cumo::not] of self.
*/
static VALUE
Bitnot(VALUE self)
{
cumo_ndfunc_arg_in_t ain[1] = {{cT,0}};
cumo_ndfunc_arg_out_t aout[1] = {{cT,0}};
cumo_ndfunc_t ndf = {bit_not, CUMO_FULL_LOOP, 1,1, ain,aout};
return cumo_na_ndloop(&ndf, 1, self);
}
iter_bit_not
#line 1 "narray/gen/tmpl_bit/binary.c"
static void
(cumo_na_loop_t *const lp)
{
size_t n;
size_t p1, p2, p3;
ssize_t s1, s2, s3;
size_t *idx1, *idx2, *idx3;
int o1, o2, l1, l2, r1, r2, len;
CUMO_BIT_DIGIT *a1, *a2, *a3;
CUMO_BIT_DIGIT x, y;
// TODO(sonots): CUDA kernelize
CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("iter_bit_and", "");
cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
CUMO_INIT_COUNTER(lp, n);
CUMO_INIT_PTR_BIT_IDX(lp, 0, a1, p1, s1, idx1);
CUMO_INIT_PTR_BIT_IDX(lp, 1, a2, p2, s2, idx2);
CUMO_INIT_PTR_BIT_IDX(lp, 2, a3, p3, s3, idx3);
if (s1!=1 || s2!=1 || s3!=1 || idx1 || idx2 || idx3) {
for (; n--;) {
CUMO_LOAD_BIT_STEP(a1, p1, s1, idx1, x);
CUMO_LOAD_BIT_STEP(a2, p2, s2, idx2, y);
x = m_andbit(x,y);
CUMO_STORE_BIT_STEP(a3, p3, s3, idx3, x);
}
} else {
o1 = p1 % CUMO_NB;
o1 -= p3;
o2 = p2 % CUMO_NB;
o2 -= p3;
l1 = CUMO_NB+o1;
r1 = CUMO_NB-o1;
l2 = CUMO_NB+o2;
r2 = CUMO_NB-o2;
if (p3>0 || n<CUMO_NB) {
len = CUMO_NB - p3;
if ((int)n<len) len=n;
if (o1>=0) x = *a1>>o1;
else x = *a1<<-o1;
if (p1+len>CUMO_NB) x |= *(a1+1)<<r1;
a1++;
if (o2>=0) y = *a2>>o2;
else y = *a2<<-o2;
if (p2+len>CUMO_NB) y |= *(a2+1)<<r2;
a2++;
x = m_and(x,y);
*a3 = (x & (CUMO_SLB(len)<<p3)) | (*a3 & ~(CUMO_SLB(len)<<p3));
a3++;
n -= len;
}
if (o1==0 && o2==0) {
for (; n>=CUMO_NB; n-=CUMO_NB) {
x = *(a1++);
y = *(a2++);
x = m_and(x,y);
*(a3++) = x;
}
} else {
for (; n>=CUMO_NB; n-=CUMO_NB) {
x = *a1>>o1;
if (o1<0) x |= *(a1-1)>>l1;
if (o1>0) x |= *(a1+1)<<r1;
a1++;
y = *a2>>o2;
if (o2<0) y |= *(a2-1)>>l2;
if (o2>0) y |= *(a2+1)<<r2;
a2++;
x = m_and(x,y);
*(a3++) = x;
}
}
if (n>0) {
x = *a1>>o1;
if (o1<0) x |= *(a1-1)>>l1;
y = *a2>>o2;
if (o2<0) y |= *(a2-1)>>l2;
x = m_and(x,y);
*a3 = (x & CUMO_SLB(n)) | (*a3 & CUMO_BALL<<n);
}
}
}
/*
Binary and.
@overload and other
@param [Cumo::NArray,Numeric] other
@return [Cumo::NArray] & of self and other.
*/
static VALUE
and(VALUE self, VALUE other)
{
cumo_ndfunc_arg_in_t ain[2] = {{cT,0},{cT,0}};
cumo_ndfunc_arg_out_t aout[1] = {{cT,0}};
cumo_ndfunc_t ndf = { bit_and, CUMO_FULL_LOOP, 2, 1, ain, aout };
return cumo_na_ndloop(&ndf, 2, self, other);
}
iter_bit_and
#line 1 "narray/gen/tmpl_bit/binary.c"
static void
(cumo_na_loop_t *const lp)
{
size_t n;
size_t p1, p2, p3;
ssize_t s1, s2, s3;
size_t *idx1, *idx2, *idx3;
int o1, o2, l1, l2, r1, r2, len;
CUMO_BIT_DIGIT *a1, *a2, *a3;
CUMO_BIT_DIGIT x, y;
// TODO(sonots): CUDA kernelize
CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("iter_bit_or", "");
cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
CUMO_INIT_COUNTER(lp, n);
CUMO_INIT_PTR_BIT_IDX(lp, 0, a1, p1, s1, idx1);
CUMO_INIT_PTR_BIT_IDX(lp, 1, a2, p2, s2, idx2);
CUMO_INIT_PTR_BIT_IDX(lp, 2, a3, p3, s3, idx3);
if (s1!=1 || s2!=1 || s3!=1 || idx1 || idx2 || idx3) {
for (; n--;) {
CUMO_LOAD_BIT_STEP(a1, p1, s1, idx1, x);
CUMO_LOAD_BIT_STEP(a2, p2, s2, idx2, y);
x = m_orbit(x,y);
CUMO_STORE_BIT_STEP(a3, p3, s3, idx3, x);
}
} else {
o1 = p1 % CUMO_NB;
o1 -= p3;
o2 = p2 % CUMO_NB;
o2 -= p3;
l1 = CUMO_NB+o1;
r1 = CUMO_NB-o1;
l2 = CUMO_NB+o2;
r2 = CUMO_NB-o2;
if (p3>0 || n<CUMO_NB) {
len = CUMO_NB - p3;
if ((int)n<len) len=n;
if (o1>=0) x = *a1>>o1;
else x = *a1<<-o1;
if (p1+len>CUMO_NB) x |= *(a1+1)<<r1;
a1++;
if (o2>=0) y = *a2>>o2;
else y = *a2<<-o2;
if (p2+len>CUMO_NB) y |= *(a2+1)<<r2;
a2++;
x = m_or(x,y);
*a3 = (x & (CUMO_SLB(len)<<p3)) | (*a3 & ~(CUMO_SLB(len)<<p3));
a3++;
n -= len;
}
if (o1==0 && o2==0) {
for (; n>=CUMO_NB; n-=CUMO_NB) {
x = *(a1++);
y = *(a2++);
x = m_or(x,y);
*(a3++) = x;
}
} else {
for (; n>=CUMO_NB; n-=CUMO_NB) {
x = *a1>>o1;
if (o1<0) x |= *(a1-1)>>l1;
if (o1>0) x |= *(a1+1)<<r1;
a1++;
y = *a2>>o2;
if (o2<0) y |= *(a2-1)>>l2;
if (o2>0) y |= *(a2+1)<<r2;
a2++;
x = m_or(x,y);
*(a3++) = x;
}
}
if (n>0) {
x = *a1>>o1;
if (o1<0) x |= *(a1-1)>>l1;
y = *a2>>o2;
if (o2<0) y |= *(a2-1)>>l2;
x = m_or(x,y);
*a3 = (x & CUMO_SLB(n)) | (*a3 & CUMO_BALL<<n);
}
}
}
/*
Binary or.
@overload or other
@param [Cumo::NArray,Numeric] other
@return [Cumo::NArray] | of self and other.
*/
static VALUE
or(VALUE self, VALUE other)
{
cumo_ndfunc_arg_in_t ain[2] = {{cT,0},{cT,0}};
cumo_ndfunc_arg_out_t aout[1] = {{cT,0}};
cumo_ndfunc_t ndf = { bit_or, CUMO_FULL_LOOP, 2, 1, ain, aout };
return cumo_na_ndloop(&ndf, 2, self, other);
}
iter_bit_or
#line 1 "narray/gen/tmpl_bit/binary.c"
static void
(cumo_na_loop_t *const lp)
{
size_t n;
size_t p1, p2, p3;
ssize_t s1, s2, s3;
size_t *idx1, *idx2, *idx3;
int o1, o2, l1, l2, r1, r2, len;
CUMO_BIT_DIGIT *a1, *a2, *a3;
CUMO_BIT_DIGIT x, y;
// TODO(sonots): CUDA kernelize
CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("iter_bit_xor", "");
cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
CUMO_INIT_COUNTER(lp, n);
CUMO_INIT_PTR_BIT_IDX(lp, 0, a1, p1, s1, idx1);
CUMO_INIT_PTR_BIT_IDX(lp, 1, a2, p2, s2, idx2);
CUMO_INIT_PTR_BIT_IDX(lp, 2, a3, p3, s3, idx3);
if (s1!=1 || s2!=1 || s3!=1 || idx1 || idx2 || idx3) {
for (; n--;) {
CUMO_LOAD_BIT_STEP(a1, p1, s1, idx1, x);
CUMO_LOAD_BIT_STEP(a2, p2, s2, idx2, y);
x = m_xorbit(x,y);
CUMO_STORE_BIT_STEP(a3, p3, s3, idx3, x);
}
} else {
o1 = p1 % CUMO_NB;
o1 -= p3;
o2 = p2 % CUMO_NB;
o2 -= p3;
l1 = CUMO_NB+o1;
r1 = CUMO_NB-o1;
l2 = CUMO_NB+o2;
r2 = CUMO_NB-o2;
if (p3>0 || n<CUMO_NB) {
len = CUMO_NB - p3;
if ((int)n<len) len=n;
if (o1>=0) x = *a1>>o1;
else x = *a1<<-o1;
if (p1+len>CUMO_NB) x |= *(a1+1)<<r1;
a1++;
if (o2>=0) y = *a2>>o2;
else y = *a2<<-o2;
if (p2+len>CUMO_NB) y |= *(a2+1)<<r2;
a2++;
x = m_xor(x,y);
*a3 = (x & (CUMO_SLB(len)<<p3)) | (*a3 & ~(CUMO_SLB(len)<<p3));
a3++;
n -= len;
}
if (o1==0 && o2==0) {
for (; n>=CUMO_NB; n-=CUMO_NB) {
x = *(a1++);
y = *(a2++);
x = m_xor(x,y);
*(a3++) = x;
}
} else {
for (; n>=CUMO_NB; n-=CUMO_NB) {
x = *a1>>o1;
if (o1<0) x |= *(a1-1)>>l1;
if (o1>0) x |= *(a1+1)<<r1;
a1++;
y = *a2>>o2;
if (o2<0) y |= *(a2-1)>>l2;
if (o2>0) y |= *(a2+1)<<r2;
a2++;
x = m_xor(x,y);
*(a3++) = x;
}
}
if (n>0) {
x = *a1>>o1;
if (o1<0) x |= *(a1-1)>>l1;
y = *a2>>o2;
if (o2<0) y |= *(a2-1)>>l2;
x = m_xor(x,y);
*a3 = (x & CUMO_SLB(n)) | (*a3 & CUMO_BALL<<n);
}
}
}
/*
Binary xor.
@overload xor other
@param [Cumo::NArray,Numeric] other
@return [Cumo::NArray] ^ of self and other.
*/
static VALUE
xor(VALUE self, VALUE other)
{
cumo_ndfunc_arg_in_t ain[2] = {{cT,0},{cT,0}};
cumo_ndfunc_arg_out_t aout[1] = {{cT,0}};
cumo_ndfunc_t ndf = { bit_xor, CUMO_FULL_LOOP, 2, 1, ain, aout };
return cumo_na_ndloop(&ndf, 2, self, other);
}
iter_bit_xor
#line 1 "narray/gen/tmpl_bit/binary.c"
static void
(cumo_na_loop_t *const lp)
{
size_t n;
size_t p1, p2, p3;
ssize_t s1, s2, s3;
size_t *idx1, *idx2, *idx3;
int o1, o2, l1, l2, r1, r2, len;
CUMO_BIT_DIGIT *a1, *a2, *a3;
CUMO_BIT_DIGIT x, y;
// TODO(sonots): CUDA kernelize
CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("iter_bit_eq", "");
cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
CUMO_INIT_COUNTER(lp, n);
CUMO_INIT_PTR_BIT_IDX(lp, 0, a1, p1, s1, idx1);
CUMO_INIT_PTR_BIT_IDX(lp, 1, a2, p2, s2, idx2);
CUMO_INIT_PTR_BIT_IDX(lp, 2, a3, p3, s3, idx3);
if (s1!=1 || s2!=1 || s3!=1 || idx1 || idx2 || idx3) {
for (; n--;) {
CUMO_LOAD_BIT_STEP(a1, p1, s1, idx1, x);
CUMO_LOAD_BIT_STEP(a2, p2, s2, idx2, y);
x = m_eqbit(x,y);
CUMO_STORE_BIT_STEP(a3, p3, s3, idx3, x);
}
} else {
o1 = p1 % CUMO_NB;
o1 -= p3;
o2 = p2 % CUMO_NB;
o2 -= p3;
l1 = CUMO_NB+o1;
r1 = CUMO_NB-o1;
l2 = CUMO_NB+o2;
r2 = CUMO_NB-o2;
if (p3>0 || n<CUMO_NB) {
len = CUMO_NB - p3;
if ((int)n<len) len=n;
if (o1>=0) x = *a1>>o1;
else x = *a1<<-o1;
if (p1+len>CUMO_NB) x |= *(a1+1)<<r1;
a1++;
if (o2>=0) y = *a2>>o2;
else y = *a2<<-o2;
if (p2+len>CUMO_NB) y |= *(a2+1)<<r2;
a2++;
x = m_eq(x,y);
*a3 = (x & (CUMO_SLB(len)<<p3)) | (*a3 & ~(CUMO_SLB(len)<<p3));
a3++;
n -= len;
}
if (o1==0 && o2==0) {
for (; n>=CUMO_NB; n-=CUMO_NB) {
x = *(a1++);
y = *(a2++);
x = m_eq(x,y);
*(a3++) = x;
}
} else {
for (; n>=CUMO_NB; n-=CUMO_NB) {
x = *a1>>o1;
if (o1<0) x |= *(a1-1)>>l1;
if (o1>0) x |= *(a1+1)<<r1;
a1++;
y = *a2>>o2;
if (o2<0) y |= *(a2-1)>>l2;
if (o2>0) y |= *(a2+1)<<r2;
a2++;
x = m_eq(x,y);
*(a3++) = x;
}
}
if (n>0) {
x = *a1>>o1;
if (o1<0) x |= *(a1-1)>>l1;
y = *a2>>o2;
if (o2<0) y |= *(a2-1)>>l2;
x = m_eq(x,y);
*a3 = (x & CUMO_SLB(n)) | (*a3 & CUMO_BALL<<n);
}
}
}
/*
Binary eq.
@overload eq other
@param [Cumo::NArray,Numeric] other
@return [Cumo::NArray] eq of self and other.
*/
static VALUE
eq(VALUE self, VALUE other)
{
cumo_ndfunc_arg_in_t ain[2] = {{cT,0},{cT,0}};
cumo_ndfunc_arg_out_t aout[1] = {{cT,0}};
cumo_ndfunc_t ndf = { bit_eq, CUMO_FULL_LOOP, 2, 1, ain, aout };
return cumo_na_ndloop(&ndf, 2, self, other);
}
iter_bit_eq
#line 1 "narray/gen/tmpl_bit/bit_count.c"
#undef int_t
#define int_t uint64_t
void (size_t p1, char *p2, CUMO_BIT_DIGIT *a1, size_t *idx1, uint64_t n);
void cumo_iter_bit_count_true_index_kernel_launch(size_t p1, char *p2, CUMO_BIT_DIGIT *a1, ssize_t s1, uint64_t n);
void cumo_iter_bit_count_true_stride_kernel_launch(size_t p1, char *p2, CUMO_BIT_DIGIT *a1, size_t *idx1, ssize_t s2, uint64_t n);
void cumo_iter_bit_count_true_index_stride_kernel_launch(size_t p1, char *p2, CUMO_BIT_DIGIT *a1, ssize_t s1, ssize_t s2, uint64_t n);
static void
cumo_iter_bit_count_true_stride_stride_kernel_launch(cumo_na_loop_t *const lp)
{
size_t i;
CUMO_BIT_DIGIT *a1;
size_t p1;
char *p2;
ssize_t s1, s2;
size_t *idx1;
CUMO_INIT_COUNTER(lp, i);
CUMO_INIT_PTR_BIT_IDX(lp, 0, a1, p1, s1, idx1);
CUMO_INIT_PTR(lp, 1, p2, s2);
if (s2==0) {
if (idx1) {
iter_bit_count_true(p1,p2,a1,idx1,i);
} else {
cumo_iter_bit_count_true_index_kernel_launch(p1,p2,a1,s1,i);
}
} else {
if (idx1) {
cumo_iter_bit_count_true_stride_kernel_launch(p1,p2,a1,idx1,s2,i);
} else {
cumo_iter_bit_count_true_index_stride_kernel_launch(p1,p2,a1,s1,s2,i);
}
}
}
static VALUE
cumo_iter_bit_count_true_stride_stride_kernel_launch_cpu(int argc, VALUE *argv, VALUE self);
/*
Returns the number of bits.
If argument is supplied, return Int-array counted along the axes.
@overload bit_count_true(axis:nil, keepdims:false)
@param [Integer,Array,Range] axis (keyword) axes to be counted.
@param [TrueClass] keepdims (keyword) If true, the reduced axes are left in the result array as dimensions with size one.
@return [Cumo::UInt64]
*/
static VALUE
count_true(int argc, VALUE *argv, VALUE self)
{
if (cumo_compatible_mode_enabled_p()) {
return bit_count_true_cpu(argc, argv, self);
} else {
VALUE v, reduce;
cumo_ndfunc_arg_in_t ain[3] = {{cT,0},{cumo_sym_reduce,0},{cumo_sym_init,0}};
cumo_ndfunc_arg_out_t aout[1] = {{cumo_cUInt64,0}};
cumo_ndfunc_t ndf = { bit_count_true, CUMO_FULL_LOOP_NIP, 3, 1, ain, aout };
reduce = cumo_na_reduce_dimension(argc, argv, 1, &self, &ndf, 0);
v = cumo_na_ndloop(&ndf, 3, self, reduce, INT2FIX(0));
return v;
}
}
iter_bit_count_true
#line 1 "narray/gen/tmpl_bit/bit_count.c"
#undef int_t
#define int_t uint64_t
void (size_t p1, char *p2, CUMO_BIT_DIGIT *a1, size_t *idx1, uint64_t n);
void cumo_iter_bit_count_false_index_kernel_launch(size_t p1, char *p2, CUMO_BIT_DIGIT *a1, ssize_t s1, uint64_t n);
void cumo_iter_bit_count_false_stride_kernel_launch(size_t p1, char *p2, CUMO_BIT_DIGIT *a1, size_t *idx1, ssize_t s2, uint64_t n);
void cumo_iter_bit_count_false_index_stride_kernel_launch(size_t p1, char *p2, CUMO_BIT_DIGIT *a1, ssize_t s1, ssize_t s2, uint64_t n);
static void
cumo_iter_bit_count_false_stride_stride_kernel_launch(cumo_na_loop_t *const lp)
{
size_t i;
CUMO_BIT_DIGIT *a1;
size_t p1;
char *p2;
ssize_t s1, s2;
size_t *idx1;
CUMO_INIT_COUNTER(lp, i);
CUMO_INIT_PTR_BIT_IDX(lp, 0, a1, p1, s1, idx1);
CUMO_INIT_PTR(lp, 1, p2, s2);
if (s2==0) {
if (idx1) {
iter_bit_count_false(p1,p2,a1,idx1,i);
} else {
cumo_iter_bit_count_false_index_kernel_launch(p1,p2,a1,s1,i);
}
} else {
if (idx1) {
cumo_iter_bit_count_false_stride_kernel_launch(p1,p2,a1,idx1,s2,i);
} else {
cumo_iter_bit_count_false_index_stride_kernel_launch(p1,p2,a1,s1,s2,i);
}
}
}
static VALUE
cumo_iter_bit_count_false_stride_stride_kernel_launch_cpu(int argc, VALUE *argv, VALUE self);
/*
Returns the number of bits.
If argument is supplied, return Int-array counted along the axes.
@overload bit_count_false(axis:nil, keepdims:false)
@param [Integer,Array,Range] axis (keyword) axes to be counted.
@param [TrueClass] keepdims (keyword) If true, the reduced axes are left in the result array as dimensions with size one.
@return [Cumo::UInt64]
*/
static VALUE
count_false(int argc, VALUE *argv, VALUE self)
{
if (cumo_compatible_mode_enabled_p()) {
return bit_count_false_cpu(argc, argv, self);
} else {
VALUE v, reduce;
cumo_ndfunc_arg_in_t ain[3] = {{cT,0},{cumo_sym_reduce,0},{cumo_sym_init,0}};
cumo_ndfunc_arg_out_t aout[1] = {{cumo_cUInt64,0}};
cumo_ndfunc_t ndf = { bit_count_false, CUMO_FULL_LOOP_NIP, 3, 1, ain, aout };
reduce = cumo_na_reduce_dimension(argc, argv, 1, &self, &ndf, 0);
v = cumo_na_ndloop(&ndf, 3, self, reduce, INT2FIX(0));
return v;
}
}
iter_bit_count_false
#line 1 "narray/gen/tmpl_bit/bit_count_cpu.c"
#undef int_t
#define int_t int64_t
static void
(cumo_na_loop_t *const lp)
{
size_t i;
CUMO_BIT_DIGIT *a1;
size_t p1;
char *p2;
ssize_t s1, s2;
size_t *idx1;
CUMO_BIT_DIGIT x=0;
int_t y;
CUMO_SHOW_SYNCHRONIZE_WARNING_ONCE("iter_bit_count_true_cpu", "");
cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
CUMO_INIT_COUNTER(lp, i);
CUMO_INIT_PTR_BIT_IDX(lp, 0, a1, p1, s1, idx1);
CUMO_INIT_PTR(lp, 1, p2, s2);
if (s2==0) {
CUMO_GET_DATA(p2, int_t, y);
if (idx1) {
for (; i--;) {
CUMO_LOAD_BIT(a1, p1+*idx1, x);
idx1++;
if (m_count_true_cpubit(x)) {
y++;
}
}
} else {
for (; i--;) {
CUMO_LOAD_BIT(a1, p1, x);
p1 += s1;
if (m_count_true_cpu(x)) {
y++;
}
}
}
*(int_t*)p2 = y;
} else {
if (idx1) {
for (; i--;) {
CUMO_LOAD_BIT(a1, p1+*idx1, x);
idx1++;
if (m_count_true_cpu(x)) {
CUMO_GET_DATA(p2, int_t, y);
y++;
CUMO_SET_DATA(p2, int_t, y);
}
p2+=s2;
}
} else {
for (; i--;) {
CUMO_LOAD_BIT(a1, p1, x);
p1+=s1;
if (m_count_true_cpu(x)) {
CUMO_GET_DATA(p2, int_t, y);
y++;
CUMO_SET_DATA(p2, int_t, y);
}
p2+=s2;
}
}
}
}
/*
Returns the number of bits.
If argument is supplied, return Int-array counted along the axes.
@overload count_true_cpu(axis:nil, keepdims:false)
@param [Integer,Array,Range] axis (keyword) axes to be counted.
@param [TrueClass] keepdims (keyword) If true, the reduced axes are left in the result array as dimensions with size one.
@return [Cumo::Int64]
*/
static VALUE
count_true_cpu(int argc, VALUE *argv, VALUE self)
{
VALUE v, reduce;
cumo_ndfunc_arg_in_t ain[3] = {{cT,0},{cumo_sym_reduce,0},{cumo_sym_init,0}};
cumo_ndfunc_arg_out_t aout[1] = {{cumo_cInt64,0}};
cumo_ndfunc_t ndf = { bit_count_true_cpu, CUMO_FULL_LOOP_NIP, 3, 1, ain, aout };
reduce = cumo_na_reduce_dimension(argc, argv, 1, &self, &ndf, 0);
v = cumo_na_ndloop(&ndf, 3, self, reduce, INT2FIX(0));
return rb_funcall(v,rb_intern("extract_cpu"),0);
}
iter_bit_count_true_cpu
#line 1 "narray/gen/tmpl_bit/bit_count_cpu.c"
#undef int_t
#define int_t int64_t
static void
(cumo_na_loop_t *const lp)
{
size_t i;
CUMO_BIT_DIGIT *a1;
size_t p1;
char *p2;
ssize_t s1, s2;
size_t *idx1;
CUMO_BIT_DIGIT x=0;
int_t y;
CUMO_SHOW_SYNCHRONIZE_WARNING_ONCE("iter_bit_count_false_cpu", "");
cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
CUMO_INIT_COUNTER(lp, i);
CUMO_INIT_PTR_BIT_IDX(lp, 0, a1, p1, s1, idx1);
CUMO_INIT_PTR(lp, 1, p2, s2);
if (s2==0) {
CUMO_GET_DATA(p2, int_t, y);
if (idx1) {
for (; i--;) {
CUMO_LOAD_BIT(a1, p1+*idx1, x);
idx1++;
if (m_count_false_cpubit(x)) {
y++;
}
}
} else {
for (; i--;) {
CUMO_LOAD_BIT(a1, p1, x);
p1 += s1;
if (m_count_false_cpu(x)) {
y++;
}
}
}
*(int_t*)p2 = y;
} else {
if (idx1) {
for (; i--;) {
CUMO_LOAD_BIT(a1, p1+*idx1, x);
idx1++;
if (m_count_false_cpu(x)) {
CUMO_GET_DATA(p2, int_t, y);
y++;
CUMO_SET_DATA(p2, int_t, y);
}
p2+=s2;
}
} else {
for (; i--;) {
CUMO_LOAD_BIT(a1, p1, x);
p1+=s1;
if (m_count_false_cpu(x)) {
CUMO_GET_DATA(p2, int_t, y);
y++;
CUMO_SET_DATA(p2, int_t, y);
}
p2+=s2;
}
}
}
}
/*
Returns the number of bits.
If argument is supplied, return Int-array counted along the axes.
@overload count_false_cpu(axis:nil, keepdims:false)
@param [Integer,Array,Range] axis (keyword) axes to be counted.
@param [TrueClass] keepdims (keyword) If true, the reduced axes are left in the result array as dimensions with size one.
@return [Cumo::Int64]
*/
static VALUE
count_false_cpu(int argc, VALUE *argv, VALUE self)
{
VALUE v, reduce;
cumo_ndfunc_arg_in_t ain[3] = {{cT,0},{cumo_sym_reduce,0},{cumo_sym_init,0}};
cumo_ndfunc_arg_out_t aout[1] = {{cumo_cInt64,0}};
cumo_ndfunc_t ndf = { bit_count_false_cpu, CUMO_FULL_LOOP_NIP, 3, 1, ain, aout };
reduce = cumo_na_reduce_dimension(argc, argv, 1, &self, &ndf, 0);
v = cumo_na_ndloop(&ndf, 3, self, reduce, INT2FIX(0));
return rb_funcall(v,rb_intern("extract_cpu"),0);
}
iter_bit_count_false_cpu
#line 1 "narray/gen/tmpl_bit/bit_reduce.c"
static void
(cumo_na_loop_t *const lp)
{
size_t i;
CUMO_BIT_DIGIT *a1, *a2;
size_t p1, p2;
ssize_t s1, s2;
size_t *idx1, *idx2;
CUMO_BIT_DIGIT x=0, y=0;
// TODO(sonots): CUDA kernelize
CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("iter_bit_all_p", "");
cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
CUMO_INIT_COUNTER(lp, i);
CUMO_INIT_PTR_BIT_IDX(lp, 0, a1, p1, s1, idx1);
CUMO_INIT_PTR_BIT_IDX(lp, 1, a2, p2, s2, idx2);
if (idx2) {
if (idx1) {
for (; i--;) {
CUMO_LOAD_BIT(a2, p2+*idx2, y);
if (y == all?bit) {
CUMO_LOAD_BIT(a1, p1+*idx1, x);
if (x != 1) {
CUMO_STORE_BIT(a2, p2+*idx2, x);
}
}
idx1++;
idx2++;
}
} else {
for (; i--;) {
CUMO_LOAD_BIT(a2, p2+*idx2, y);
if (y == 1) {
CUMO_LOAD_BIT(a1, p1, x);
if (x != 1) {
CUMO_STORE_BIT(a2, p2+*idx2, x);
}
}
p1 += s1;
idx2++;
}
}
} else if (s2) {
if (idx1) {
for (; i--;) {
CUMO_LOAD_BIT(a2, p2, y);
if (y == 1) {
CUMO_LOAD_BIT(a1, p1+*idx1, x);
if (x != 1) {
CUMO_STORE_BIT(a2, p2, x);
}
}
idx1++;
p2 += s2;
}
} else {
for (; i--;) {
CUMO_LOAD_BIT(a2, p2, y);
if (y == 1) {
CUMO_LOAD_BIT(a1, p1, x);
if (x != 1) {
CUMO_STORE_BIT(a2, p2, x);
}
}
p1 += s1;
p2 += s2;
}
}
} else {
CUMO_LOAD_BIT(a2, p2, x);
if (x != 1) {
return;
}
if (idx1) {
for (; i--;) {
CUMO_LOAD_BIT(a1, p1+*idx1, y);
if (y != 1) {
CUMO_STORE_BIT(a2, p2, y);
return;
}
idx1++;
}
} else {
for (; i--;) {
CUMO_LOAD_BIT(a1, p1, y);
if (y != 1) {
CUMO_STORE_BIT(a2, p2, y);
return;
}
p1 += s1;
}
}
}
}
/*
Return true if all of bits are one (true).
If argument is supplied, return Bit-array reduced along the axes.
@overload 1(axis:nil, keepdims:false)
@param [Integer,Array,Range] axis (keyword) axes to be reduced.
@param [TrueClass] keepdims (keyword) If true, the reduced axes are left in the result array as dimensions with size one.
@return [Cumo::Bit] .
*/
static VALUE
all?(int argc, VALUE *argv, VALUE self)
{
VALUE v, reduce;
cumo_ndfunc_arg_in_t ain[3] = {{cT,0},{cumo_sym_reduce,0},{cumo_sym_init,0}};
cumo_ndfunc_arg_out_t aout[1] = {{cumo_cBit,0}};
cumo_ndfunc_t ndf = {bit_all_p, CUMO_FULL_LOOP_NIP, 3,1, ain,aout};
reduce = cumo_na_reduce_dimension(argc, argv, 1, &self, &ndf, 0);
v = cumo_na_ndloop(&ndf, 3, self, reduce, INT2FIX(iter_bit_all_p));
if (argc > 0) {
return v;
}
v = 1(v);
switch (v) {
case INT2FIX(0):
return Qfalse;
case INT2FIX(1):
return Qtrue;
default:
rb_bug("unexpected result");
return v;
}
}
bit_extract
#line 1 "narray/gen/tmpl_bit/bit_reduce.c"
static void
(cumo_na_loop_t *const lp)
{
size_t i;
CUMO_BIT_DIGIT *a1, *a2;
size_t p1, p2;
ssize_t s1, s2;
size_t *idx1, *idx2;
CUMO_BIT_DIGIT x=0, y=0;
// TODO(sonots): CUDA kernelize
CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("iter_bit_any_p", "");
cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
CUMO_INIT_COUNTER(lp, i);
CUMO_INIT_PTR_BIT_IDX(lp, 0, a1, p1, s1, idx1);
CUMO_INIT_PTR_BIT_IDX(lp, 1, a2, p2, s2, idx2);
if (idx2) {
if (idx1) {
for (; i--;) {
CUMO_LOAD_BIT(a2, p2+*idx2, y);
if (y == any?bit) {
CUMO_LOAD_BIT(a1, p1+*idx1, x);
if (x != 0) {
CUMO_STORE_BIT(a2, p2+*idx2, x);
}
}
idx1++;
idx2++;
}
} else {
for (; i--;) {
CUMO_LOAD_BIT(a2, p2+*idx2, y);
if (y == 0) {
CUMO_LOAD_BIT(a1, p1, x);
if (x != 0) {
CUMO_STORE_BIT(a2, p2+*idx2, x);
}
}
p1 += s1;
idx2++;
}
}
} else if (s2) {
if (idx1) {
for (; i--;) {
CUMO_LOAD_BIT(a2, p2, y);
if (y == 0) {
CUMO_LOAD_BIT(a1, p1+*idx1, x);
if (x != 0) {
CUMO_STORE_BIT(a2, p2, x);
}
}
idx1++;
p2 += s2;
}
} else {
for (; i--;) {
CUMO_LOAD_BIT(a2, p2, y);
if (y == 0) {
CUMO_LOAD_BIT(a1, p1, x);
if (x != 0) {
CUMO_STORE_BIT(a2, p2, x);
}
}
p1 += s1;
p2 += s2;
}
}
} else {
CUMO_LOAD_BIT(a2, p2, x);
if (x != 0) {
return;
}
if (idx1) {
for (; i--;) {
CUMO_LOAD_BIT(a1, p1+*idx1, y);
if (y != 0) {
CUMO_STORE_BIT(a2, p2, y);
return;
}
idx1++;
}
} else {
for (; i--;) {
CUMO_LOAD_BIT(a1, p1, y);
if (y != 0) {
CUMO_STORE_BIT(a2, p2, y);
return;
}
p1 += s1;
}
}
}
}
/*
Return true if any of bits is one (true).
If argument is supplied, return Bit-array reduced along the axes.
@overload 0(axis:nil, keepdims:false)
@param [Integer,Array,Range] axis (keyword) axes to be reduced.
@param [TrueClass] keepdims (keyword) If true, the reduced axes are left in the result array as dimensions with size one.
@return [Cumo::Bit] .
*/
static VALUE
any?(int argc, VALUE *argv, VALUE self)
{
VALUE v, reduce;
cumo_ndfunc_arg_in_t ain[3] = {{cT,0},{cumo_sym_reduce,0},{cumo_sym_init,0}};
cumo_ndfunc_arg_out_t aout[1] = {{cumo_cBit,0}};
cumo_ndfunc_t ndf = {bit_any_p, CUMO_FULL_LOOP_NIP, 3,1, ain,aout};
reduce = cumo_na_reduce_dimension(argc, argv, 1, &self, &ndf, 0);
v = cumo_na_ndloop(&ndf, 3, self, reduce, INT2FIX(iter_bit_any_p));
if (argc > 0) {
return v;
}
v = 0(v);
switch (v) {
case INT2FIX(0):
return Qfalse;
case INT2FIX(1):
return Qtrue;
default:
rb_bug("unexpected result");
return v;
}
}
bit_extract
#line 1 "narray/gen/tmpl_bit/none_p.c"
static VALUE
(int argc, VALUE *argv, VALUE self)
{
VALUE v;
v = bit_none_p(argc,argv,self);
if (v==Qtrue) {
return Qfalse;
} else if (v==Qfalse) {
return Qtrue;
}
return bit_any_p(v);
}
bit_not
#line 1 "narray/gen/tmpl_bit/where.c"
typedef struct {
size_t count;
char *idx0;
char *idx1;
size_t elmsz;
} where_opt_t;
#define STORE_INT(ptr, esz, x) memcpy(ptr,&(x),esz)
static void
(cumo_na_loop_t *const lp)
{
size_t i;
CUMO_BIT_DIGIT *a;
size_t p;
ssize_t s;
size_t *idx;
CUMO_BIT_DIGIT x=0;
char *idx1;
size_t count;
size_t e;
where_opt_t *g;
// TODO(sonots): CUDA kernelize
CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("iter_bit_where", "");
cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
g = (where_opt_t*)(lp->opt_ptr);
count = g->count;
idx1 = g->idx1;
e = g->elmsz;
CUMO_INIT_COUNTER(lp, i);
CUMO_INIT_PTR_BIT_IDX(lp, 0, a, p, s, idx);
if (idx) {
for (; i--;) {
CUMO_LOAD_BIT(a, p+*idx, x);
idx++;
if (x!=0) {
STORE_INT(idx1,e,count);
idx1 += e;
}
count++;
}
} else {
for (; i--;) {
CUMO_LOAD_BIT(a, p, x);
p+=s;
if (x!=0) {
STORE_INT(idx1,e,count);
idx1 += e;
}
count++;
}
}
g->count = count;
g->idx1 = idx1;
}
/*
Returns the array of index where the bit is one (true).
@overload wherebit
@return [Cumo::Int32,Cumo::Int64]
*/
static VALUE
where(VALUE self)
{
volatile VALUE idx_1;
size_t size, n_1;
where_opt_t *g;
cumo_ndfunc_arg_in_t ain[1] = {{cT,0}};
cumo_ndfunc_t ndf = { bit_where, CUMO_FULL_LOOP, 1, 0, ain, 0 };
size = CUMO_RNARRAY_SIZE(self);
n_1 = NUM2SIZET(iter_bit_where(0, NULL, self));
g = ALLOCA_N(where_opt_t,1);
g->count = 0;
if (size>4294967295ul) {
idx_1 = cumo_na_new(cumo_cInt64, 1, &n_1);
g->elmsz = 8;
} else {
idx_1 = cumo_na_new(cumo_cInt32, 1, &n_1);
g->elmsz = 4;
}
g->idx1 = cumo_na_get_pointer_for_write(idx_1);
g->idx0 = NULL;
cumo_na_ndloop3(&ndf, g, 1, self);
cumo_na_release_lock(idx_1);
return idx_1;
}
bit_count_true_cpu
#line 1 "narray/gen/tmpl_bit/where2.c"
static void
(cumo_na_loop_t *const lp)
{
size_t i;
CUMO_BIT_DIGIT *a;
size_t p;
ssize_t s;
size_t *idx;
CUMO_BIT_DIGIT x=0;
char *idx0, *idx1;
size_t count;
size_t e;
where_opt_t *g;
// TODO(sonots): CUDA kernelize
CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("iter_bit_where2", "");
cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
g = (where_opt_t*)(lp->opt_ptr);
count = g->count;
idx0 = g->idx0;
idx1 = g->idx1;
e = g->elmsz;
CUMO_INIT_COUNTER(lp, i);
CUMO_INIT_PTR_BIT_IDX(lp, 0, a, p, s, idx);
if (idx) {
for (; i--;) {
CUMO_LOAD_BIT(a, p+*idx, x);
idx++;
if (x==0) {
STORE_INT(idx0,e,count);
idx0 += e;
} else {
STORE_INT(idx1,e,count);
idx1 += e;
}
count++;
}
} else {
for (; i--;) {
CUMO_LOAD_BIT(a, p, x);
p+=s;
if (x==0) {
STORE_INT(idx0,e,count);
idx0 += e;
} else {
STORE_INT(idx1,e,count);
idx1 += e;
}
count++;
}
}
g->count = count;
g->idx0 = idx0;
g->idx1 = idx1;
}
/*
Returns two index arrays.
The first array contains index where the bit is one (true).
The second array contains index where the bit is zero (false).
@overload where2bit
@return [Cumo::Int32,Cumo::Int64]*2
*/
static VALUE
where2(VALUE self)
{
VALUE idx_1, idx_0;
size_t size, n_1, n_0;
where_opt_t *g;
cumo_ndfunc_arg_in_t ain[1] = {{cT,0}};
cumo_ndfunc_t ndf = { bit_where2, CUMO_FULL_LOOP, 1, 0, ain, 0 };
size = CUMO_RNARRAY_SIZE(self);
n_1 = NUM2SIZET(iter_bit_where2(0, NULL, self));
n_0 = size - n_1;
g = ALLOCA_N(where_opt_t,1);
g->count = 0;
if (size>4294967295ul) {
idx_1 = cumo_na_new(cumo_cInt64, 1, &n_1);
idx_0 = cumo_na_new(cumo_cInt64, 1, &n_0);
g->elmsz = 8;
} else {
idx_1 = cumo_na_new(cumo_cInt32, 1, &n_1);
idx_0 = cumo_na_new(cumo_cInt32, 1, &n_0);
g->elmsz = 4;
}
g->idx1 = cumo_na_get_pointer_for_write(idx_1);
g->idx0 = cumo_na_get_pointer_for_write(idx_0);
cumo_na_ndloop3(&ndf, g, 1, self);
cumo_na_release_lock(idx_0);
cumo_na_release_lock(idx_1);
return rb_assoc_new(idx_1,idx_0);
}
bit_count_true_cpu
#line 1 "narray/gen/tmpl_bit/mask.c"
static void
(cumo_na_loop_t *const lp)
{
size_t i;
CUMO_BIT_DIGIT *a;
size_t p1, p2;
ssize_t s1, s2;
size_t *idx1, *idx2, *pidx;
CUMO_BIT_DIGIT x=0;
size_t count;
where_opt_t *g;
CUMO_SHOW_SYNCHRONIZE_WARNING_ONCE("iter_bit_mask", "");
cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
g = (where_opt_t*)(lp->opt_ptr);
count = g->count;
pidx = (size_t*)(g->idx1);
CUMO_INIT_COUNTER(lp, i);
CUMO_INIT_PTR_BIT_IDX(lp, 0, a, p1, s1, idx1);
//CUMO_INIT_PTR_IDX(lp, 1, p2, s2, idx2);
p2 = lp->args[1].iter[0].pos;
s2 = lp->args[1].iter[0].step;
idx2 = lp->args[1].iter[0].idx;
if (idx1) {
if (idx2) {
for (; i--;) {
CUMO_LOAD_BIT(a, p1+*idx1, x);
idx1++;
if (x) {
*(pidx++) = p2+*idx2;
count++;
}
idx2++;
}
} else {
for (; i--;) {
CUMO_LOAD_BIT(a, p1+*idx1, x);
idx1++;
if (x) {
*(pidx++) = p2;
count++;
}
p2 += s2;
}
}
} else {
if (idx2) {
for (; i--;) {
CUMO_LOAD_BIT(a, p1, x);
p1 += s1;
if (x) {
*(pidx++) = p2+*idx2;
count++;
}
idx2++;
}
} else {
for (; i--;) {
CUMO_LOAD_BIT(a, p1, x);
p1 += s1;
if (x) {
*(pidx++) = p2;
count++;
}
p2 += s2;
}
}
}
g->count = count;
g->idx1 = (char*)pidx;
}
#if SIZEOF_VOIDP == 8
#define cIndex cumo_cInt64
#elif SIZEOF_VOIDP == 4
#define cIndex cumo_cInt32
#endif
/*
Return subarray of argument masked with self bit array.
@overload maskbit(array)
@param [Cumo::NArray] array narray to be masked.
@return [Cumo::NArray] view of masked array.
*/
static VALUE
mask(VALUE mask, VALUE val)
{
volatile VALUE idx_1, view;
cumo_narray_data_t *nidx;
cumo_narray_view_t *nv;
cumo_narray_t *na;
cumo_narray_view_t *na1;
cumo_stridx_t stridx0;
size_t n_1;
where_opt_t g;
cumo_ndfunc_arg_in_t ain[2] = {{cT,0},{Qnil,0}};
cumo_ndfunc_t ndf = {bit_mask, CUMO_FULL_LOOP, 2, 0, ain, 0};
// TODO(sonots): bit_count_true synchronizes with CPU. Avoid.
n_1 = NUM2SIZET(iter_bit_mask(0, NULL, mask));
idx_1 = cumo_na_new(cIndex, 1, &n_1);
g.count = 0;
g.elmsz = SIZEOF_VOIDP;
g.idx1 = cumo_na_get_pointer_for_write(idx_1);
g.idx0 = NULL;
cumo_na_ndloop3(&ndf, &g, 2, mask, val);
view = cumo_na_s_allocate_view(CLASS_OF(val));
CumoGetNArrayView(view, nv);
cumo_na_setup_shape((cumo_narray_t*)nv, 1, &n_1);
CumoGetNArrayData(idx_1,nidx);
CUMO_SDX_SET_INDEX(stridx0,(size_t*)nidx->ptr);
nidx->ptr = NULL;
nv->stridx = ALLOC_N(cumo_stridx_t,1);
nv->stridx[0] = stridx0;
nv->offset = 0;
CumoGetNArray(val, na);
switch(CUMO_NA_TYPE(na)) {
case CUMO_NARRAY_DATA_T:
nv->data = val;
break;
case CUMO_NARRAY_VIEW_T:
CumoGetNArrayView(val, na1);
nv->data = na1->data;
break;
default:
rb_raise(rb_eRuntimeError,"invalid CUMO_NA_TYPE: %d",CUMO_NA_TYPE(na));
}
return view;
}
bit_count_true_cpu
(void)
{
VALUE hCast, cumo_bit;
mCumo = rb_define_module("Cumo");
mCumo
cumo_id_cast = rb_intern("cast");cumo_id_divmod = rb_intern("divmod");cumo_id_eq = rb_intern("eq");cumo_id_mulsum = rb_intern("mulsum");cumo_id_ne = rb_intern("ne");cumo_id_pow = rb_intern("pow");}
#line 1 "narray/gen/tmpl/init_class.c"
/*
Document-class:
Cumo::Bit
*/
cT = rb_define_class_under(, "", cNArray);
hCast = rb_hash_new();
rb_define_const(cT, "UPCAST", hCast);
rb_hash_aset(hCast, rb_cArray, cT);
mCumoBit
#ifdef RUBY_INTEGER_UNIFICATIONrb_hash_aset(hCast, rb_cInteger, cT);#elserb_hash_aset(hCast, rb_cFixnum, cT);rb_hash_aset(hCast, rb_cBignum, cT);#endifrb_hash_aset(hCast, rb_cFloat, cumo_cDFloat);rb_hash_aset(hCast, rb_cComplex, cumo_cDComplex);rb_hash_aset(hCast, cumo_cRObject, cumo_cRObject);rb_hash_aset(hCast, cumo_cDComplex, cumo_cDComplex);rb_hash_aset(hCast, cumo_cSComplex, cumo_cSComplex);rb_hash_aset(hCast, cumo_cDFloat, cumo_cDFloat);rb_hash_aset(hCast, cumo_cSFloat, cumo_cSFloat);rb_hash_aset(hCast, cumo_cInt64, cumo_cInt64);rb_hash_aset(hCast, cumo_cInt32, cumo_cInt32);rb_hash_aset(hCast, cumo_cInt16, cumo_cInt16);rb_hash_aset(hCast, cumo_cInt8, cumo_cInt8);rb_hash_aset(hCast, cumo_cUInt64, cumo_cUInt64);rb_hash_aset(hCast, cumo_cUInt32, cumo_cUInt32);rb_hash_aset(hCast, cumo_cUInt16, cumo_cUInt16);rb_hash_aset(hCast, cumo_cUInt8, cumo_cUInt8);
rb_define_singleton_method(cT, "[]", /**/
rb_define_const(cT,"ELEMENT_BIT_SIZE",INT2FIX(1));/**/
rb_define_const(cT,"ELEMENT_BYTE_SIZE",rb_float_new(1.0/8));/**/
rb_define_const(cT,"CONTIGUOUS_STRIDE",INT2FIX(1));rb_define_alloc_func(cT, bit_s_alloc_func);rb_define_method(cT, "allocate", bit_allocate, 0);rb_define_method(cT, "extract", bit_extract, 0);rb_define_method(cT, "extract_cpu", bit_extract_cpu, 0);rb_define_method(cT, "store", bit_store, 1);rb_define_singleton_method(cT, "cast", bit_s_cast, 1);rb_define_method(cT, "[]", bit_aref, -1);rb_define_method(cT, "aref_cpu", bit_aref_cpu, -1);rb_define_method(cT, "[]=", bit_aset, -1);rb_define_method(cT, "coerce_cast", bit_coerce_cast, 1);rb_define_method(cT, "to_a", bit_to_a, 0);rb_define_method(cT, "fill", bit_fill, 1);rb_define_method(cT, "format", bit_format, -1);rb_define_method(cT, "format_to_a", bit_format_to_a, -1);rb_define_method(cT, "inspect", bit_inspect, 0);rb_define_method(cT, "each", bit_each, 0);rb_define_method(cT, "each_with_index", bit_each_with_index, 0);rb_define_method(cT, "copy", bit_copy, 0);rb_define_method(cT, "~", bit_not, 0);rb_define_method(cT, "&", bit_and, 1);rb_define_method(cT, "|", bit_or, 1);rb_define_method(cT, "^", bit_xor, 1);rb_define_method(cT, "eq", bit_eq, 1);rb_define_method(cT, "count_true", bit_count_true, -1);rb_define_alias(cT, "count_1", "count_true");rb_define_alias(cT, "count", "count_true");rb_define_method(cT, "count_false", bit_count_false, -1);rb_define_alias(cT, "count_0", "count_false");rb_define_method(cT, "count_true_cpu", bit_count_true_cpu, -1);rb_define_alias(cT, "count_1_cpu", "count_true_cpu");rb_define_alias(cT, "count_cpu", "count_true_cpu");rb_define_method(cT, "count_false_cpu", bit_count_false_cpu, -1);rb_define_alias(cT, "count_0_cpu", "count_false_cpu");rb_define_method(cT, "all?", bit_all_p, -1);rb_define_method(cT, "any?", bit_any_p, -1);rb_define_method(cT, "none?", bit_none_p, -1);rb_define_method(cT, "where", bit_where, 0);rb_define_method(cT, "where2", bit_where2, 0);rb_define_method(cT, "mask", bit_mask, 1);, -2);
bit_s_cast