/*
#define NODED(n) (*((n)->_d))
#define NODERHS(n) (*((n)->_rhs))

#if CACHEVEC == 0
#define NODEA(n) ((n)->_a)
#define NODEB(n) ((n)->_b)
#define NODEV(n) ((n)->_v)
#define NODEAREA(n) ((n)->_area)

#else

#define NODEV(n) (*((n)->_v))
#define NODEAREA(n) ((n)->_area)
#define NODERINV(n) ((n)->_rinv)
#define VEC_A(i) (_nt->_actual_a[(i)])
#define VEC_B(i) (_nt->_actual_b[(i)])
#define VEC_D(i) (_nt->_actual_d[(i)])
#define VEC_RHS(i) (_nt->_actual_rhs[(i)])
#define VEC_V(i) (_nt->_actual_v[(i)])
#define VEC_AREA(i) (_nt->_actual_area[(i)])
#define NODEA(n) (VEC_A((n)->v_node_index))
#define NODEB(n) (VEC_B((n)->v_node_index))
#endif
*/

/* triangularization of the matrix equations */
void
triang(NrnThread* _nt)
{
	register Node *nd, *pnd;
	double p;
	int i, i2, i3;

	double *vec_a = _nt->_actual_a;
	double *vec_b = _nt->_actual_b;
	double *vec_d = _nt->_actual_d;
	double *vec_rhs = _nt->_actual_rhs;
	double *vec_v = _nt->_actual_v;
	int *parent_index = _nt->_v_parent_index;

	i2 = _nt->ncell;
	i3 = _nt->end;
	//printf("triang : ");

#if CACHEVEC
    if (use_cachevec) {
      //printf("simd\n");
#pragma loop noalias
#pragma loop norecurrence
#pragma loop prefetch
#pragma loop simd
	for (i = i3 - 1; i >= i2; --i) {
	  double tmp;
	  //p = VEC_A(i) / VEC_D(i);
	  //VEC_D(_nt->_v_parent_index[i]) -= p * VEC_B(i);
	  //VEC_RHS(_nt->_v_parent_index[i]) -= p * VEC_RHS(i);
	  tmp = vec_a[i] / vec_d[i];
	  vec_d[parent_index[i]] -= tmp * vec_b[i];
	  vec_rhs[parent_index[i]] -= tmp * vec_rhs[i];
	}
    }else
#endif /* CACHEVEC */
    {
      //printf("non simd\n");
	for (i = i3 - 1; i >= i2; --i) {
		nd = _nt->_v_node[i];
		pnd = _nt->_v_parent[i];
		p = NODEA(nd) / NODED(nd);
		NODED(pnd) -= p * NODEB(nd);
		NODERHS(pnd) -= p * NODERHS(nd);
	}
    }
}

/* back substitution to finish solving the matrix equations */
void
bksub(NrnThread* _nt)
{
	register Node *nd, *cnd;
	int i, i1, i2, i3;
	i1 = 0;
	i2 = i1 + _nt->ncell;
	i3 = _nt->end;

	double *vec_a = _nt->_actual_a;
	double *vec_b = _nt->_actual_b;
	double *vec_d = _nt->_actual_d;
	double *vec_rhs = _nt->_actual_rhs;
	double *vec_v = _nt->_actual_v;
	int *parent_index = _nt->_v_parent_index;

#if CACHEVEC
    if (use_cachevec) {
#pragma loop noalias
#pragma loop norecurrence
#pragma loop prefetch
#pragma loop simd
	for (i = i1; i < i2; ++i) {
	  //VEC_RHS(i) /= VEC_D(i);
	  vec_rhs[i] /= vec_d[i];
	}

#pragma loop prefetch
#pragma loop simd
	for (i = i2; i < i3; ++i) {
	  //VEC_RHS(i) -= VEC_B(i) * VEC_RHS(_nt->_v_parent_index[i]);
	  //VEC_RHS(i) /= VEC_D(i);
	  vec_rhs[i] -= vec_b[i] * vec_rhs[parent_index[i]];
	  vec_rhs[i] /= vec_d[i];
	}	
    }else
#endif /* CACHEVEC */
    {
	for (i = i1; i < i2; ++i) {
		NODERHS(_nt->_v_node[i]) /= NODED(_nt->_v_node[i]);
	}
	for (i = i2; i < i3; ++i) {
		cnd = _nt->_v_node[i];
		nd = _nt->_v_parent[i];
		NODERHS(cnd) -= NODEB(cnd) * NODERHS(nd);
		NODERHS(cnd) /= NODED(cnd);
	}	
    }
}

