Files
impala/shell/ext-py/bitarray-1.2.1/bitarray/_util.c
David Knupp 537c30dd06 IMPALA-9720: Update bitarray 0.9.0 -> 1.2.1
This is needed for python3 compatibility.

Tested by running gerrit-verify-dryrun.

Change-Id: I0641b03e880314a424d9d5a0651945c4f51273bc
Reviewed-on: http://gerrit.cloudera.org:8080/15858
Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
2020-05-05 12:33:31 +00:00

423 lines
11 KiB
C

/*
Copyright (c) 2019, Ilan Schnell
bitarray is published under the PSF license.
This file contains the C implementation of some useful utility functions.
Author: Ilan Schnell
*/
#define PY_SSIZE_T_CLEAN
#include "Python.h"
#if PY_MAJOR_VERSION >= 3
#define IS_PY3K
#endif
#ifdef STDC_HEADERS
#include <stddef.h>
#else /* !STDC_HEADERS */
#ifdef HAVE_SYS_TYPES_H
#include <sys/types.h> /* For size_t */
#endif /* HAVE_SYS_TYPES_H */
#endif /* !STDC_HEADERS */
typedef long long int idx_t;
typedef struct {
PyObject_VAR_HEAD
char *ob_item;
Py_ssize_t allocated; /* how many bytes allocated */
idx_t nbits; /* length of bitarray, i.e. elements */
int endian; /* bit endianness of bitarray */
int ob_exports; /* how many buffer exports */
PyObject *weakreflist; /* list of weak references */
} bitarrayobject;
#define BITS(bytes) ((idx_t) (bytes) << 3)
#define BYTES(bits) (((bits) == 0) ? 0 : (((bits) - 1) / 8 + 1))
#define BITMASK(endian, i) (((char) 1) << ((endian) ? (7 - (i)%8) : (i)%8))
/* ------------ low level access to bits in bitarrayobject ------------- */
#ifndef NDEBUG
static int GETBIT(bitarrayobject *self, idx_t i) {
assert(0 <= i && i < self->nbits);
return ((self)->ob_item[(i) / 8] & BITMASK((self)->endian, i) ? 1 : 0);
}
#else
#define GETBIT(self, i) \
((self)->ob_item[(i) / 8] & BITMASK((self)->endian, i) ? 1 : 0)
#endif
static void
setbit(bitarrayobject *self, idx_t i, int bit)
{
char *cp, mask;
assert(0 <= i && i < BITS(Py_SIZE(self)));
mask = BITMASK(self->endian, i);
cp = self->ob_item + i / 8;
if (bit)
*cp |= mask;
else
*cp &= ~mask;
}
/* set using the Python module function _set_babt() */
static PyObject *bitarray_basetype = NULL;
/* return 1 if obj is a bitarray, 0 otherwise */
static int
bitarray_Check(PyObject *obj)
{
if (bitarray_basetype == NULL) /* fallback */
return PyObject_HasAttrString(obj, "endian");
return PyObject_IsInstance(obj, bitarray_basetype);
}
static void
setunused(bitarrayobject *self)
{
const idx_t n = BITS(Py_SIZE(self));
idx_t i;
for (i = self->nbits; i < n; i++)
setbit(self, i, 0);
}
static int bitcount_lookup[256] = {
0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8,
};
/* return the smallest index i for which a.count(1, 0, i) == n, or when
n exceeds the total count return -1 */
static idx_t
count_to_n(bitarrayobject *a, idx_t n)
{
idx_t i = 0, j = 0, m; /* i is the index, j the total count up to i */
Py_ssize_t block_start, block_stop, k;
unsigned char c;
if (n == 0)
return 0;
#define BLOCK_BITS 8192
/* by counting big blocks we save comparisons */
while (i + BLOCK_BITS < a->nbits) {
m = 0;
assert(i % 8 == 0);
block_start = (Py_ssize_t) (i / 8);
block_stop = block_start + (BLOCK_BITS / 8);
for (k = block_start; k < block_stop; k++) {
assert(k < Py_SIZE(a));
c = a->ob_item[k];
m += bitcount_lookup[c];
}
if (j + m >= n)
break;
j += m;
i += BLOCK_BITS;
}
#undef BLOCK_SIZE
while (i + 8 < a->nbits) {
k = (Py_ssize_t) (i / 8);
assert(k < Py_SIZE(a));
c = a->ob_item[k];
m = bitcount_lookup[c];
if (j + m >= n)
break;
j += m;
i += 8;
}
while (j < n && i < a->nbits ) {
j += GETBIT(a, i);
i++;
}
if (j < n)
return -1;
return i;
}
/* return index of last occurrence of vi, -1 when x is not in found. */
static idx_t
find_last(bitarrayobject *self, int vi)
{
Py_ssize_t j;
idx_t i;
char c;
if (self->nbits == 0)
return -1;
/* search within top byte */
for (i = self->nbits - 1; i >= BITS(self->nbits / 8); i--)
if (GETBIT(self, i) == vi)
return i;
if (i < 0) /* not found within top byte */
return -1;
assert((i + 1) % 8 == 0);
/* seraching for 1 means: break when byte is not 0x00
searching for 0 means: break when byte is not 0xff */
c = vi ? 0x00 : 0xff;
/* skip ahead by checking whole bytes */
for (j = BYTES(i) - 1; j >= 0; j--)
if (c ^ self->ob_item[j])
break;
if (j < 0) /* not found within bytes */
return -1;
/* search within byte found */
for (i = BITS(j + 1) - 1; i >= BITS(j); i--)
if (GETBIT(self, i) == vi)
return i;
return -1;
}
/*************************** Module functions **********************/
static PyObject *
count_n(PyObject *self, PyObject *args)
{
PyObject *a;
idx_t n, i;
if (!PyArg_ParseTuple(args, "OL:count_n", &a, &n))
return NULL;
if (!bitarray_Check(a)) {
PyErr_SetString(PyExc_TypeError, "bitarray object expected");
return NULL;
}
if (n < 0) {
PyErr_SetString(PyExc_ValueError, "non-negative integer expected");
return NULL;
}
#define aa ((bitarrayobject *) a)
if (n > aa->nbits) {
PyErr_SetString(PyExc_ValueError, "n larger than bitarray size");
return NULL;
}
i = count_to_n(aa, n); /* do actual work here */
#undef aa
if (i < 0) {
PyErr_SetString(PyExc_ValueError, "n exceeds total count");
return NULL;
}
return PyLong_FromLongLong(i);
}
PyDoc_STRVAR(count_n_doc,
"count_n(a, n, /) -> int\n\
\n\
Find the smallest index `i` for which `a[:i].count() == n`.\n\
Raises `ValueError`, when n exceeds the `a.count()`.");
static PyObject *
r_index(PyObject *self, PyObject *args)
{
PyObject *a, *x = Py_True;
idx_t i;
long vi;
if (!PyArg_ParseTuple(args, "O|O:rindex", &a, &x))
return NULL;
if (!bitarray_Check(a)) {
PyErr_SetString(PyExc_TypeError, "bitarray object expected");
return NULL;
}
vi = PyObject_IsTrue(x);
if (vi < 0)
return NULL;
i = find_last((bitarrayobject *) a, vi);
if (i < 0) {
PyErr_SetString(PyExc_ValueError, "index(x): x not in bitarray");
return NULL;
}
return PyLong_FromLongLong(i);
}
PyDoc_STRVAR(rindex_doc,
"rindex(bitarray, value=True, /) -> int\n\
\n\
Return the rightmost index of `bool(value)` in bitarray.\n\
Raises `ValueError` if the value is not present.");
enum kernel_type {
KERN_cand, /* count bitwise and -> int */
KERN_cor, /* count bitwise or -> int */
KERN_cxor, /* count bitwise xor -> int */
KERN_subset, /* is subset -> bool */
};
static PyObject *
two_bitarray_func(PyObject *args, enum kernel_type kern, char *format)
{
PyObject *a, *b;
Py_ssize_t i;
idx_t res = 0;
unsigned char c;
if (!PyArg_ParseTuple(args, format, &a, &b))
return NULL;
if (!(bitarray_Check(a) && bitarray_Check(b))) {
PyErr_SetString(PyExc_TypeError, "bitarray object expected");
return NULL;
}
#define aa ((bitarrayobject *) a)
#define bb ((bitarrayobject *) b)
if (aa->nbits != bb->nbits) {
PyErr_SetString(PyExc_ValueError,
"bitarrays of equal length expected");
return NULL;
}
if (aa->endian != bb->endian) {
PyErr_SetString(PyExc_ValueError,
"bitarrays of equal endianness expected");
return NULL;
}
setunused(aa);
setunused(bb);
switch (kern) {
case KERN_cand:
for (i = 0; i < Py_SIZE(aa); i++) {
c = aa->ob_item[i] & bb->ob_item[i];
res += bitcount_lookup[c];
}
break;
case KERN_cor:
for (i = 0; i < Py_SIZE(aa); i++) {
c = aa->ob_item[i] | bb->ob_item[i];
res += bitcount_lookup[c];
}
break;
case KERN_cxor:
for (i = 0; i < Py_SIZE(aa); i++) {
c = aa->ob_item[i] ^ bb->ob_item[i];
res += bitcount_lookup[c];
}
break;
case KERN_subset:
for (i = 0; i < Py_SIZE(aa); i++)
if ((aa->ob_item[i] & bb->ob_item[i]) != aa->ob_item[i])
Py_RETURN_FALSE;
Py_RETURN_TRUE;
default: /* should never happen */
return NULL;
}
#undef aa
#undef bb
return PyLong_FromLongLong(res);
}
#define COUNT_FUNC(oper, ochar) \
static PyObject * \
count_ ## oper (bitarrayobject *self, PyObject *args) \
{ \
return two_bitarray_func(args, KERN_c ## oper, "OO:count_" #oper); \
} \
PyDoc_STRVAR(count_ ## oper ## _doc, \
"count_" #oper "(a, b, /) -> int\n\
\n\
Returns `(a " ochar " b).count()`, but is more memory efficient,\n\
as no intermediate bitarray object gets created.")
COUNT_FUNC(and, "&");
COUNT_FUNC(or, "|");
COUNT_FUNC(xor, "^");
static PyObject *
subset(PyObject *self, PyObject *args)
{
return two_bitarray_func(args, KERN_subset, "OO:subset");
}
PyDoc_STRVAR(subset_doc,
"subset(a, b, /) -> bool\n\
\n\
Return True if bitarray `a` is a subset of bitarray `b` (False otherwise).\n\
`subset(a, b)` is equivalent to `(a & b).count() == a.count()` but is more\n\
efficient since we can stop as soon as one mismatch is found, and no\n\
intermediate bitarray object gets created.");
/* set bitarray_basetype (babt) */
static PyObject *
set_babt(PyObject *self, PyObject *obj)
{
bitarray_basetype = obj;
Py_RETURN_NONE;
}
static PyMethodDef module_functions[] = {
{"count_n", (PyCFunction) count_n, METH_VARARGS, count_n_doc},
{"rindex", (PyCFunction) r_index, METH_VARARGS, rindex_doc},
{"count_and", (PyCFunction) count_and, METH_VARARGS, count_and_doc},
{"count_or", (PyCFunction) count_or, METH_VARARGS, count_or_doc},
{"count_xor", (PyCFunction) count_xor, METH_VARARGS, count_xor_doc},
{"subset", (PyCFunction) subset, METH_VARARGS, subset_doc},
{"_set_babt", (PyCFunction) set_babt, METH_O, ""},
{NULL, NULL} /* sentinel */
};
/*********************** Install Module **************************/
#ifdef IS_PY3K
static PyModuleDef moduledef = {
PyModuleDef_HEAD_INIT, "_util", 0, -1, module_functions,
};
PyMODINIT_FUNC
PyInit__util(void)
#else
PyMODINIT_FUNC
init_util(void)
#endif
{
PyObject *m;
#ifdef IS_PY3K
m = PyModule_Create(&moduledef);
if (m == NULL)
return NULL;
#else
m = Py_InitModule3("_util", module_functions, 0);
if (m == NULL)
return;
#endif
#ifdef IS_PY3K
return m;
#endif
}