#include <Python.h>
#include "structmember.h"
#include <sys/time.h>

// Old Python compatibility from here:
// http://www.python.org/dev/peps/pep-0353/
#if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN)
typedef int Py_ssize_t;
#define PY_SSIZE_T_MAX INT_MAX
#define PY_SSIZE_T_MIN INT_MIN
// This one was taken from:
// http://svn.python.org/projects/python/trunk/Modules/_ctypes/ctypes.h
#define PyNumber_AsSsize_t(ob, exc) PyInt_AsLong(ob)
#endif

/**

TODO: 
- Check max supported depth of recursion
- CLazyLinker should add context information to errors caught during evaluation. Say what node we were on, add the traceback attached to the node.
- Clear containers of fully-useed intermediate results if allow_gc is 1
- Add timers for profiling
- Add support for profiling space used.


  */
static double pytime(const struct timeval * tv)
{
  struct timeval t;
  if (!tv)
    {
      tv = &t;
      gettimeofday(&t, NULL);
    }
  return (double) tv->tv_sec + (double) tv->tv_usec / 1000000.0;
}

/**
  Helper routine to convert a PyList of integers to a c array of integers.
  */
static int unpack_list_of_ssize_t(PyObject * pylist, Py_ssize_t **dst, Py_ssize_t *len,
                                  const char* kwname)
{
  Py_ssize_t buflen, *buf;
  if (!PyList_Check(pylist))
    {
      PyErr_Format(PyExc_TypeError, "%s must be list", kwname);
      return -1;
    }
  assert (NULL == *dst);
  *len = buflen = PyList_Size(pylist);
  *dst = buf = (Py_ssize_t*)malloc(buflen * sizeof(Py_ssize_t));
  assert(buf);
  for (int ii = 0; ii < buflen; ++ii)
    {
      PyObject * el_i = PyList_GetItem(pylist, ii);
      Py_ssize_t n_i = PyNumber_AsSsize_t(el_i, PyExc_IndexError);
      if (PyErr_Occurred())
        {
          free(buf);
          *dst = NULL;
          return -1;
        }
      buf[ii] = n_i;
    }
  return 0;
}

/**

  CLazyLinker


  */
typedef struct {
    PyObject_HEAD
    /* Type-specific fields go here. */
    PyObject * nodes; // the python list of nodes
    PyObject * thunks; // python list of thunks
    PyObject * pre_call_clear; //list of cells to clear on call.
    int allow_gc;
    Py_ssize_t n_applies;
    int n_vars;    // number of variables in the graph
    int * var_computed; // 1 or 0 for every variable
    PyObject ** var_computed_cells;
    PyObject ** var_value_cells;

    Py_ssize_t n_output_vars;
    Py_ssize_t * output_vars; // variables that *must* be evaluated by call

    int * is_lazy; // 1 or 0 for every thunk

    Py_ssize_t * var_owner; // nodes[[var_owner[var_idx]]] is var[var_idx]->owner
    int * var_has_owner; //  1 or 0

    Py_ssize_t * node_n_inputs;
    Py_ssize_t * node_n_outputs;
    Py_ssize_t ** node_inputs;
    Py_ssize_t ** node_outputs;
    Py_ssize_t * node_inputs_outputs_base; // node_inputs and node_outputs point into this
    Py_ssize_t * node_n_prereqs;
    Py_ssize_t ** node_prereqs;

    Py_ssize_t * update_storage; // dst0, src0, dst1, src1, ... cells to switch after a call
    Py_ssize_t n_updates;

    void ** thunk_cptr_fn;
    void ** thunk_cptr_data;
    PyObject * call_times;
    PyObject * call_counts;
    int do_timing;
    int need_update_inputs;
    int position_of_error; // -1 for no error, otw the index into `thunks` that failed.
} CLazyLinker;


static void
CLazyLinker_dealloc(PyObject* _self)
{
  CLazyLinker* self = (CLazyLinker *) _self;
  free(self->thunk_cptr_fn);
  free(self->thunk_cptr_data);

  free(self->is_lazy);

  free(self->update_storage);

  if (self->node_n_prereqs)
    {
      for (int i = 0; i < self->n_applies; ++i)
        {
          free(self->node_prereqs[i]);
        }
    }
  free(self->node_n_prereqs);
  free(self->node_prereqs);
  free(self->node_inputs_outputs_base);
  free(self->node_n_inputs);
  free(self->node_n_outputs);
  free(self->node_inputs);
  free(self->node_outputs);

  free(self->var_owner);
  free(self->var_has_owner);
  free(self->var_computed);
  if (self->var_computed_cells)
    {
      for (int i = 0; i < self->n_vars; ++i)
        {
          Py_DECREF(self->var_computed_cells[i]);
          Py_DECREF(self->var_value_cells[i]);
        }
    }
  free(self->var_computed_cells);
  free(self->var_value_cells);
  free(self->output_vars);

  Py_XDECREF(self->nodes);
  Py_XDECREF(self->thunks);
  Py_XDECREF(self->call_times);
  Py_XDECREF(self->call_counts);
  Py_XDECREF(self->pre_call_clear);
  self->ob_type->tp_free((PyObject*)self);
}
static PyObject *
CLazyLinker_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
{
    CLazyLinker *self;

    self = (CLazyLinker *)type->tp_alloc(type, 0);
    if (self != NULL) {
      self->nodes = NULL;
      self->thunks = NULL;
      self->pre_call_clear = NULL;

      self->allow_gc = 1;
      self->n_applies = 0;
      self->n_vars = 0;
      self->var_computed = NULL;
      self->var_computed_cells = NULL;
      self->var_value_cells = NULL;

      self->n_output_vars = 0;
      self->output_vars = NULL;

      self->is_lazy = NULL;

      self->var_owner = NULL;
      self->var_has_owner = NULL;

      self->node_n_inputs = NULL;
      self->node_n_outputs = NULL;
      self->node_inputs = NULL;
      self->node_outputs = NULL;
      self->node_inputs_outputs_base = NULL;
      self->node_prereqs = NULL;
      self->node_n_prereqs = NULL;

      self->update_storage = NULL;
      self->n_updates = 0;

      self->thunk_cptr_data = NULL;
      self->thunk_cptr_fn = NULL;
      self->call_times = NULL;
      self->call_counts = NULL;
      self->do_timing = 0;

      self->need_update_inputs = 0;
      self->position_of_error = -1;
    }
    return (PyObject *)self;
}

static int
CLazyLinker_init(CLazyLinker *self, PyObject *args, PyObject *kwds)
{
    static char *kwlist[] = {
      (char*)"nodes",
      (char*)"thunks",
      (char*)"pre_call_clear",
      (char*)"allow_gc",
      (char*)"call_counts",
      (char*)"call_times",
      (char*)"compute_map_list",
      (char*)"storage_map_list",
      (char*)"base_input_output_list",
      (char*)"node_n_inputs",
      (char*)"node_n_outputs",
      (char*)"node_input_offset",
      (char*)"node_output_offset",
      (char*)"var_owner",
      (char*)"is_lazy_list",
      (char*)"output_vars",
      (char*)"node_prereqs",
      (char*)"node_output_size",
      (char*)"update_storage",
      NULL};

    PyObject *compute_map_list=NULL,
             *storage_map_list=NULL,
             *base_input_output_list=NULL,
             *node_n_inputs=NULL,
             *node_n_outputs=NULL,
             *node_input_offset=NULL,
             *node_output_offset=NULL,
             *var_owner=NULL,
             *is_lazy=NULL,
             *output_vars=NULL,
             *node_prereqs=NULL,
             *node_output_size=NULL,
             *update_storage=NULL;

    assert(!self->nodes);
    if (! PyArg_ParseTupleAndKeywords(args, kwds, "OOOiOOOOOOOOOOOOOOO", kwlist,
                                      &self->nodes,
                                      &self->thunks,
                                      &self->pre_call_clear,
                                      &self->allow_gc,
                                      &self->call_counts,
                                      &self->call_times,
                                      &compute_map_list,
                                      &storage_map_list,
                                      &base_input_output_list,
                                      &node_n_inputs,
                                      &node_n_outputs,
                                      &node_input_offset,
                                      &node_output_offset,
                                      &var_owner,
                                      &is_lazy,
                                      &output_vars,
                                      &node_prereqs,
                                      &node_output_size,
                                      &update_storage
                                      ))
        return -1;
    Py_INCREF(self->nodes);
    Py_INCREF(self->thunks);
    Py_INCREF(self->pre_call_clear);
    Py_INCREF(self->call_counts);
    Py_INCREF(self->call_times);

    Py_ssize_t n_applies = PyList_Size(self->nodes);

    self->n_applies = n_applies;
    self->n_vars = PyList_Size(var_owner);

    if (PyList_Size(self->thunks) != n_applies) return -1;
    if (PyList_Size(self->call_counts) != n_applies) return -1;
    if (PyList_Size(self->call_times) != n_applies) return -1;

    // allocated and initialize thunk_cptr_data and thunk_cptr_fn
    if (n_applies)
      {
        self->thunk_cptr_data = (void**)malloc(n_applies * sizeof(void*));
        self->thunk_cptr_fn = (void**)malloc(n_applies * sizeof(void*));
        self->is_lazy = (int*)malloc(n_applies * sizeof(int));
        self->node_prereqs = (Py_ssize_t**)malloc(n_applies*sizeof(Py_ssize_t*));
        self->node_n_prereqs = (Py_ssize_t*)malloc(n_applies*sizeof(Py_ssize_t));
        assert(self->node_prereqs);
        assert(self->node_n_prereqs);
        assert(self->is_lazy);
        assert(self->thunk_cptr_fn);
        assert(self->thunk_cptr_data);

        // init these basic arrays
        for (int i = 0; i < n_applies; ++i)
          {
            self->thunk_cptr_data[i] = NULL;
            self->thunk_cptr_fn[i] = NULL;
            self->is_lazy[i] = 1;
            self->node_prereqs[i] = NULL;
            self->node_n_prereqs[i] = 0;
          }

        for (int i = 0; i < n_applies; ++i)
          {
            PyObject * thunk = PyList_GetItem(self->thunks, i);
            //thunk is borrowed
            if (PyObject_HasAttrString(thunk, "cthunk"))
              {
                PyObject * cthunk = PyObject_GetAttrString(thunk, "cthunk");
                //new reference
                assert (cthunk && PyCObject_Check(cthunk));
                self->thunk_cptr_fn[i] = PyCObject_AsVoidPtr(cthunk);
                self->thunk_cptr_data[i] = PyCObject_GetDesc(cthunk);
                Py_DECREF(cthunk);
                // cthunk is kept alive by membership in self->thunks
              }
            else
              {
                self->thunk_cptr_fn[i] = NULL;
                self->thunk_cptr_data[i] = NULL;
              }

            PyObject * el_i = PyList_GetItem(is_lazy, i);
            self->is_lazy[i] = PyNumber_AsSsize_t(el_i, NULL);

            /* now get the prereqs */
            el_i = PyList_GetItem(node_prereqs, i);
            assert (PyList_Check(el_i));
            self->node_n_prereqs[i] = PyList_Size(el_i);
            if (self->node_n_prereqs[i])
              {
                self->node_prereqs[i] = (Py_ssize_t*)malloc(
                              PyList_Size(el_i)*sizeof(Py_ssize_t));
                for (int j = 0; j < PyList_Size(el_i); ++j)
                  {
                    PyObject * el_ij = PyList_GetItem(el_i, j);
                    Py_ssize_t N = PyNumber_AsSsize_t(el_ij, PyExc_IndexError);
                    if (PyErr_Occurred())
                      return -1;
                    // N < n. variables
                    assert(N < PyList_Size(var_owner));
                    self->node_prereqs[i][j] = N;
                  }
              }
          }
      }
    if (PyList_Check(base_input_output_list))
      {
        Py_ssize_t n_inputs_outputs_base = PyList_Size(base_input_output_list);
        self->node_inputs_outputs_base = (Py_ssize_t*)malloc(n_inputs_outputs_base*sizeof(Py_ssize_t));
        assert(self->node_inputs_outputs_base);
        for (int i = 0; i < n_inputs_outputs_base; ++i)
          {
            PyObject *el_i = PyList_GetItem(base_input_output_list, i);
            Py_ssize_t idx = PyNumber_AsSsize_t(el_i, PyExc_IndexError);
            if (PyErr_Occurred()) return -1;
            self->node_inputs_outputs_base[i] = idx;
          }
        self->node_n_inputs = (Py_ssize_t*)malloc(n_applies*sizeof(Py_ssize_t));
        assert(self->node_n_inputs);
        self->node_n_outputs = (Py_ssize_t*)malloc(n_applies*sizeof(Py_ssize_t));
        assert(self->node_n_outputs);
        self->node_inputs = (Py_ssize_t**)malloc(n_applies*sizeof(Py_ssize_t*));
        assert(self->node_inputs);
        self->node_outputs = (Py_ssize_t**)malloc(n_applies*sizeof(Py_ssize_t*));
        assert(self->node_outputs);
        for (int i = 0; i < n_applies; ++i)
          {
            Py_ssize_t N;
            N = PyNumber_AsSsize_t(PyList_GetItem(node_n_inputs, i),PyExc_IndexError);
            if (PyErr_Occurred()) return -1;
            assert (N <= n_inputs_outputs_base);
            self->node_n_inputs[i] = N;
            N = PyNumber_AsSsize_t(PyList_GetItem(node_n_outputs, i),PyExc_IndexError);
            if (PyErr_Occurred()) return -1;
            assert (N <= n_inputs_outputs_base);
            self->node_n_outputs[i] = N;
            N = PyNumber_AsSsize_t(PyList_GetItem(node_input_offset, i),PyExc_IndexError);
            if (PyErr_Occurred()) return -1;
            assert (N <= n_inputs_outputs_base);
            self->node_inputs[i] = &self->node_inputs_outputs_base[N];
            N = PyNumber_AsSsize_t(PyList_GetItem(node_output_offset, i),PyExc_IndexError);
            if (PyErr_Occurred()) return -1;
            assert (N <= n_inputs_outputs_base);
            self->node_outputs[i] = &self->node_inputs_outputs_base[N];
          }
      }
    else
      {
        PyErr_SetString(PyExc_TypeError, "base_input_output_list must be list");
        return -1;
      }

    // allocation for var_owner
    if (PyList_Check(var_owner))
      {
        self->var_owner = (Py_ssize_t*)malloc(self->n_vars*sizeof(Py_ssize_t));
        self->var_has_owner = (int*)malloc(self->n_vars*sizeof(int));
        self->var_computed = (int*)malloc(self->n_vars*sizeof(int));
        self->var_computed_cells = (PyObject**)malloc(self->n_vars*sizeof(PyObject*));
        self->var_value_cells = (PyObject**)malloc(self->n_vars*sizeof(PyObject*));
        for (int i = 0; i < self->n_vars; ++i)
          {
            PyObject * el_i = PyList_GetItem(var_owner, i);
            if (el_i == Py_None)
              {
                self->var_has_owner[i] = 0;
              }
            else
              {
                Py_ssize_t N = PyNumber_AsSsize_t(el_i, PyExc_IndexError);
                if (PyErr_Occurred()) return -1;
                assert (N <= n_applies);
                self->var_owner[i] = N;
                self->var_has_owner[i] = 1;
              }
            self->var_computed_cells[i] = PyList_GetItem(compute_map_list, i);
            Py_INCREF(self->var_computed_cells[i]);
            self->var_value_cells[i] = PyList_GetItem(storage_map_list, i);
            Py_INCREF(self->var_value_cells[i]);
          }
      }
    else
      {
        PyErr_SetString(PyExc_TypeError, "var_owner must be list");
        return -1;
      }

    if (unpack_list_of_ssize_t(output_vars, &self->output_vars, &self->n_output_vars,
                               "output_vars"))
      return -1;
    for (int i = 0; i < self->n_output_vars; ++i)
      {
        assert(self->output_vars[i] < self->n_vars);
      }
    if (unpack_list_of_ssize_t(update_storage, &self->update_storage, &self->n_updates,
                               "updates_storage"))
      return -1;
    assert((self->n_updates % 2) == 0);
    self->n_updates /= 2;
    return 0;
}
static void set_position_of_error(CLazyLinker * self, int owner_idx)
{
  if (self->position_of_error == -1)
    {
      self->position_of_error = owner_idx;
    }
}
static PyObject * pycall(CLazyLinker * self, Py_ssize_t node_idx, int verbose)
{
  // call thunk to see which inputs it wants
  PyObject * thunk = PyList_GetItem(self->thunks, node_idx);
  // refcounting - thunk is borrowed
  PyObject * rval = NULL;
  if (self->do_timing)
    {
      double t0 = pytime(NULL);
      if (verbose) fprintf(stderr, "calling via Python (node %i)\n", (int)node_idx);
      rval = PyObject_CallObject(thunk, NULL);
      double t1 = pytime(NULL);
      double ti = PyFloat_AsDouble(PyList_GetItem(self->call_times, node_idx));
      PyList_SetItem(self->call_times, node_idx, PyFloat_FromDouble(t1 - t0 + ti));
      PyObject * count = PyList_GetItem(self->call_counts, node_idx);
      long icount = PyInt_AsLong(count);
      PyList_SetItem(self->call_counts, node_idx, PyInt_FromLong(icount+1));
    }
  else
    {
      if (verbose) fprintf(stderr, "calling via Python (node %i)\n", (int)node_idx);
      rval = PyObject_CallObject(thunk, NULL);
    }
  return rval;
}
static int c_call(CLazyLinker * self, Py_ssize_t node_idx, int verbose)
{
  void * ptr_addr = self->thunk_cptr_fn[node_idx];
  int (*fn)(void*) = (int (*)(void*))(ptr_addr);
  if (verbose) fprintf(stderr, "calling non-lazy shortcut (node %i)\n", (int)node_idx);
  int err = 0;
  if (self->do_timing)
    {
      double t0 = pytime(NULL);
      err = fn(self->thunk_cptr_data[node_idx]);
      double t1 = pytime(NULL);
      double ti = PyFloat_AsDouble(PyList_GetItem(self->call_times, node_idx));
      PyList_SetItem(self->call_times, node_idx, PyFloat_FromDouble(t1 - t0 + ti));
      PyObject * count = PyList_GetItem(self->call_counts, node_idx);
      long icount = PyInt_AsLong(count);
      PyList_SetItem(self->call_counts, node_idx, PyInt_FromLong(icount+1));
    }
  else
    {
      err = fn(self->thunk_cptr_data[node_idx]);
    }

  if (err)
    {
      // cast the argument to a PyList (as described near line 226 of cc.py)
      PyObject * __ERROR = ((PyObject**)self->thunk_cptr_data[node_idx])[0];
      assert (PyList_Check(__ERROR));
      assert (PyList_Size(__ERROR) == 3);
      PyObject * err_type = PyList_GetItem(__ERROR, 0); //stolen ref
      PyObject * err_msg = PyList_GetItem(__ERROR, 1); //stolen ref
      PyObject * err_trace = PyList_GetItem(__ERROR, 2); //stolen ref
      PyList_SET_ITEM(__ERROR, 0, Py_None); Py_INCREF(Py_None); //clobbers old ref
      PyList_SET_ITEM(__ERROR, 1, Py_None); Py_INCREF(Py_None); //clobbers old ref
      PyList_SET_ITEM(__ERROR, 2, Py_None); Py_INCREF(Py_None); //clobbers old ref

      assert(!PyErr_Occurred()); // because CLinker hid the exception in __ERROR aka data
      PyErr_Restore(err_type, err_msg, err_trace); //steals refs to args
    }
  if (err) set_position_of_error(self, node_idx);
  return err;
}
static
int lazy_rec_eval(CLazyLinker * self, Py_ssize_t var_idx, PyObject*one, PyObject*zero)
{
  int verbose = 0;
  if (verbose) fprintf(stderr, "lazy_rec computing %i\n", (int)var_idx);
  int err = 0;
  if (self->var_computed[var_idx] || !self->var_has_owner[var_idx])
    {
      return 0;
    }
  else
    {
      Py_ssize_t owner_idx = self->var_owner[var_idx];

      // STEP 1: compute the pre-requirements of the node
      for (int i = 0; i < self->node_n_prereqs[owner_idx]; ++i)
        {
          Py_ssize_t prereq_idx = self->node_prereqs[owner_idx][i];
          if (!self->var_computed[prereq_idx])
            {
              err = lazy_rec_eval(self, prereq_idx, one, zero);
              if (err) return err;
            }
          assert (self->var_computed[prereq_idx]);
        }

      // STEP 2: compute the node itself
      if (self->is_lazy[owner_idx])
        {
          // update the compute_map cells corresponding to the inputs of this thunk
          for (int i = 0; i < self->node_n_inputs[owner_idx] && (!err); ++i)
            {
              int in_idx = self->node_inputs[owner_idx][i];
              if (self->var_computed[in_idx])
                {
                  Py_INCREF(one);
                  err = PyList_SetItem(self->var_computed_cells[in_idx], 0, one);
                }
              else
                {
                  Py_INCREF(zero);
                  err = PyList_SetItem(self->var_computed_cells[in_idx], 0, zero);
                }
            }
          if (err)
            {
              set_position_of_error(self, owner_idx);
              return err;
            }

          PyObject * rval = pycall(self, owner_idx, verbose);
          // refcounting - rval is new ref
          //TODO: to prevent infinite loops
          // - consider check that a thunk does not ask for an input that is already computed
          if (rval) //call returned normally (no exception)
            {
              //update the computed-ness of any output cells
              for (int i = 0; i < self->node_n_outputs[owner_idx]; ++i)
                {
                  int out_idx = self->node_outputs[owner_idx][i];
                  PyObject * el_i = PyList_GetItem(self->var_computed_cells[out_idx], 0);
                  Py_ssize_t N = PyNumber_AsSsize_t(el_i, PyExc_IndexError);
                  if (PyErr_Occurred())
                    {
                      Py_DECREF(rval);
                      set_position_of_error(self, owner_idx);
                      return -1;
                    }
                  assert (N==0 || N==1);
                  self->var_computed[out_idx] = N;
                }
              if (!self->var_computed[var_idx])
                {
                  if (PyList_Check(rval))
                    {
                      if (PyList_Size(rval))
                        {
                          for (int i = 0; i < PyList_Size(rval) && (!err); ++i)
                            {
                              PyObject * el_i = PyList_GetItem(rval, i);
                              Py_ssize_t N = PyNumber_AsSsize_t(el_i, PyExc_IndexError);
                              if (PyErr_Occurred())
                                {
                                  err = 1;
                                }
                              else
                                {
                                  assert (N <= self->node_n_inputs[owner_idx]);
                                  Py_ssize_t input_idx = self->node_inputs[owner_idx][N];
                                  err = lazy_rec_eval(self, input_idx, one, zero);
                                }
                            }
                          if (!err)
                            err = lazy_rec_eval(self, var_idx, one, zero);
                        }
                      else
                        {
                          PyErr_SetString(PyExc_ValueError,
                                    "lazy thunk returned empty list without computing output");
                          err = 1;
                          set_position_of_error(self, owner_idx);
                        }
                      Py_DECREF(rval);
                      set_position_of_error(self, owner_idx);
                      return err;
                    }
                  else // don't know what it returned, but it wasn't right.
                    {
                      //TODO: More helpful error to help find *which node* made this
                      // bad thunk
                      PyErr_SetString(PyExc_TypeError,
                                      "lazy thunk should list");
                      Py_DECREF(rval);
                      set_position_of_error(self, owner_idx);
                      return 1;
                    }
                }
              Py_DECREF(rval);
            }
          else // pycall returned NULL (internal error)
            {
              assert (PyErr_Occurred());
              set_position_of_error(self, owner_idx);
              return 1;
            }
        }
      else //owner is not a lazy op. Ensure all intputs are evaluated.
        {
          // loop over inputs to owner
          // call lazy_rec_eval on each one that is not computed.
          // if there's an error, pass it up the stack
          for (int i = 0; i < self->node_n_inputs[owner_idx]; ++i)
            {
              Py_ssize_t input_idx = self->node_inputs[owner_idx][i];
              if (!self->var_computed[input_idx])
                {
                  err = lazy_rec_eval(self, input_idx, one, zero);
                  if (err) return err;
                }
              assert (self->var_computed[input_idx]);
            }

          // call the thunk for this owner.
          if (self->thunk_cptr_fn[owner_idx])
            {
              err = c_call(self, owner_idx, verbose);
            }
          else
            {
              PyObject * rval = pycall(self, owner_idx, verbose);
              //rval is new ref
              if (rval) //pycall returned normally (no exception)
                {
                  if (rval == Py_None)
                    {
                      Py_DECREF(rval); //ignore a return of None
                    }
                  else if (PyList_Check(rval))
                    {
                      PyErr_SetString(PyExc_TypeError,
                                      "non-lazy thunk should return None, not list");
                      err=1;
                      set_position_of_error(self, owner_idx);

                      Py_DECREF(rval);
                    }
                  else // don't know what it returned, but it wasn't right.
                    {
                      PyErr_SetObject(PyExc_TypeError, rval);
                      err=1;
                      set_position_of_error(self, owner_idx);
                    }
                }
              else // pycall returned NULL (internal error)
                {
                  err=1;
                  set_position_of_error(self, owner_idx);
                }
            }
        }

      // loop over all outputs and mark them as computed
      for (int i = 0; i < self->node_n_outputs[owner_idx] && (!err); ++i)
        {
          self->var_computed[self->node_outputs[owner_idx][i]] = 1;
        }
    }

  return err;
}
PyObject *
CLazyLinker_call(PyObject *_self, PyObject *args, PyObject *kwds)
{
  CLazyLinker * self = (CLazyLinker*)_self;
  static char *kwlist[] = {
    (char*)"time_thunks",
    (char *)"n_calls",
    NULL};
  int n_calls=1;
  if (! PyArg_ParseTupleAndKeywords(args, kwds, "|ii", kwlist,
                                    &self->do_timing,
                                    &n_calls))
    return NULL;
  int err = 0;
  self->position_of_error = -1;
  PyObject * one = PyInt_FromLong(1);
  PyObject * zero = PyInt_FromLong(0);
  //clear storage of pre_call_clear elements
  for (int call_i = 0; call_i < n_calls && (!err); ++call_i)
    {
      Py_ssize_t n_pre_call_clear = PyList_Size(self->pre_call_clear);
      assert(PyList_Check(self->pre_call_clear));
      for (int i = 0; i < n_pre_call_clear; ++i)
        {
          PyObject * el_i = PyList_GetItem(self->pre_call_clear, i);
          Py_INCREF(Py_None);
          PyList_SetItem(el_i, 0, Py_None);
        }
      //clear the computed flag out of all non-input vars
      for (int i = 0; i < self->n_vars; ++i)
        {
          self->var_computed[i] = !self->var_has_owner[i];
          if (self->var_computed[i])
            {
              Py_INCREF(one);
              PyList_SetItem(self->var_computed_cells[i], 0, one);
            }
          else
            {
              Py_INCREF(zero);
              PyList_SetItem(self->var_computed_cells[i], 0, zero);
            }
        }

      for (int i = 0; i < self->n_output_vars && (!err); ++i)
        {
          err = lazy_rec_eval(self, self->output_vars[i], one, zero);
        }

      for (int i = 0; i < self->n_updates; ++i)
        {
          Py_ssize_t dst = self->update_storage[2*i];
          Py_ssize_t src = self->update_storage[2*i+1];
          PyObject* tmp = PyList_GetItem(self->var_value_cells[src], 0);
          Py_INCREF(Py_None);
          Py_INCREF(tmp);
          PyList_SetItem(self->var_value_cells[dst], 0, tmp);
          PyList_SetItem(self->var_value_cells[src], 0, Py_None);
        }
    }
  Py_DECREF(one);
  Py_DECREF(zero);
  if (err) return NULL;
  Py_INCREF(Py_None);
  return Py_None;
}

#if 0
static PyMethodDef CLazyLinker_methods[] = {
    {
      //"name", (PyCFunction)CLazyLinker_accept, METH_VARARGS, "Return the name, combining the first and last name"
    },
    {NULL}  /* Sentinel */
};
#endif

static PyMemberDef CLazyLinker_members[] = {
    {(char*)"nodes", T_OBJECT_EX, offsetof(CLazyLinker, nodes), 0,
     (char*)"list of nodes"},
    {(char*)"thunks", T_OBJECT_EX, offsetof(CLazyLinker, thunks), 0,
     (char*)"list of thunks in program"},
    {(char*)"call_counts", T_OBJECT_EX, offsetof(CLazyLinker, call_counts), 0,
     (char*)"number of calls of each thunk"},
    {(char*)"call_times", T_OBJECT_EX, offsetof(CLazyLinker, call_times), 0,
     (char*)"total runtime in each thunk"},
    {(char*)"position_of_error", T_INT, offsetof(CLazyLinker, position_of_error), 0,
     (char*)"position of failed thunk"},
    {(char*)"time_thunks", T_INT, offsetof(CLazyLinker, do_timing), 0,
     (char*)"bool: nonzero means call will time thunks"},
    {(char*)"need_update_inputs", T_INT, offsetof(CLazyLinker, need_update_inputs), 0,
     (char*)"bool: nonzero means Function.__call__ must implement update mechanism"},
    {NULL}  /* Sentinel */
};

static PyTypeObject lazylinker_ext_CLazyLinkerType = {
    PyObject_HEAD_INIT(NULL)
    0,                         /*ob_size*/
    "lazylinker_ext.CLazyLinker",             /*tp_name*/
    sizeof(CLazyLinker), /*tp_basicsize*/
    0,                         /*tp_itemsize*/
    CLazyLinker_dealloc,       /*tp_dealloc*/
    0,                         /*tp_print*/
    0,                         /*tp_getattr*/
    0,                         /*tp_setattr*/
    0,                         /*tp_compare*/
    0,                         /*tp_repr*/
    0,                         /*tp_as_number*/
    0,                         /*tp_as_sequence*/
    0,                         /*tp_as_mapping*/
    0,                         /*tp_hash */
    CLazyLinker_call,          /*tp_call*/
    0,                         /*tp_str*/
    0,                         /*tp_getattro*/
    0,                         /*tp_setattro*/
    0,                         /*tp_as_buffer*/
    Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,        /*tp_flags*/
    "CLazyLinker object",      /* tp_doc */
    0,                         /* tp_traverse */
    0,                         /* tp_clear */
    0,                         /* tp_richcompare */
    0,                         /* tp_weaklistoffset */
    0,                         /* tp_iter */
    0,                         /* tp_iternext */
    0,//CLazyLinker_methods,       /* tp_methods */
    CLazyLinker_members,       /* tp_members */
    0,                         /* tp_getset */
    0,                         /* tp_base */
    0,                         /* tp_dict */
    0,                         /* tp_descr_get */
    0,                         /* tp_descr_set */
    0,                         /* tp_dictoffset */
    (initproc)CLazyLinker_init,/* tp_init */
    0,                         /* tp_alloc */
    CLazyLinker_new,           /* tp_new */
};

static PyObject * get_version(PyObject *dummy, PyObject *args)
{
  PyObject *result = PyFloat_FromDouble(0.1);
  return result;
}

static PyMethodDef lazylinker_ext_methods[] = {
  {"get_version",  get_version, METH_VARARGS, "Get extension version."},
  {NULL, NULL, 0, NULL}        /* Sentinel */
};

#ifndef PyMODINIT_FUNC  /* declarations for DLL import/export */
#define PyMODINIT_FUNC void
#endif
PyMODINIT_FUNC
initlazylinker_ext(void) 
{
    PyObject* m;

    lazylinker_ext_CLazyLinkerType.tp_new = PyType_GenericNew;
    if (PyType_Ready(&lazylinker_ext_CLazyLinkerType) < 0)
        return;

    m = Py_InitModule3("lazylinker_ext", lazylinker_ext_methods,
                       "Example module that creates an extension type.");

    Py_INCREF(&lazylinker_ext_CLazyLinkerType);
    PyModule_AddObject(m, "CLazyLinker", (PyObject *)&lazylinker_ext_CLazyLinkerType);
}

