Datastructs

What is it?

  • C extension which provides simple data structures
  • Nowadays provides dict-like types Int2Int and Int2Float

Why?

  • Stored data are accessible from both Python and C
  • Package provides .h and .pxd headers
  • So, fill data in Python and compute in C or Cython

Example - Python way

import collections

Item = collections.namedtuple('Item', ['id', 'a', 'b'])

items = {
   762: Item(...),
   134: Item(...),
     2: Item(...),
   912: Item(...),
   741: Item(...),
    73: Item(...),
   235: Item(...),
   192: Item(...),
}

def compute(ids):
    res = {}
    for item_id in ids:
        item = items[item_id]
        res[item_id] = item.a + item.b
    return res

compute([134, 235, 73])

Example - datastructs way

memory model

     0   1   2   3   4   5   6   7
   ---------------------------------
ID |   |   |   |   |   |   |   |   |
   ---------------------------------

   ---------------------------------
 A |   |   |   |   |   |   |   |   |
   ---------------------------------

   ---------------------------------
 B |   |   |   |   |   |   |   |   |
   ---------------------------------

py

import array

from datastructs.hashmap import Int2Int

a = array.array('L', ...)
b = array.array('L', ...)

id2pos = Int2Int(8)
id2pos[762] = 0
id2pos[134] = 1
...

pos = id2pos[134]
print(a[pos], b[pos])

cython

from cpython cimport array
from hashmap cimport Int2IntHashTable_t, int2int_get

def compute(ids, id2pos, array.array a, array.array b):

    cdef unsigned long int * a_arr = a.data.as_ulongs
    cdef unsigned long int * b_arr = b.data.as_ulongs
    cdef size_t ptr_value = id2pos.get_ptr()
    cdef Int2IntHashTable_t * p_id2pos = <Int2IntHashTable_t *> ptr_value
    cdef size_t entity_id
    cdef size_t pos
    cdef res = {}

    for entity_id in ids:
        if int2int_get(p_id2pos, entity_id, &pos):
            raise KeyError(entity_id)

        res[entity_id] = a_arr[pos] + b_arr[pos]

    return res