31 #define XDP_ADD_MSIZE 4
33 #ifndef XDP_ADD_NMATRIX
34 #define XDP_ADD_NMATRIX 8
36 #ifndef XDP_ADD_NINPUTS
37 #define XDP_ADD_NINPUTS 2
39 #ifndef XDP_ADD_ISTATE
40 #define XDP_ADD_ISTATE 0
42 #ifndef XDP_ADD_COLSUM
43 #define XDP_ADD_COLSUM 4
46 #define XDP_ADD_NORM 1.0 /(double)XDP_ADD_COLSUM
61 double xdp_add(gsl_matrix* A[2][2][2], WORD_T da, WORD_T db, WORD_T dc);
63 double xdp_add_exper(
const WORD_T da,
const WORD_T db,
const WORD_T dc);
65 WORD_T
aop(WORD_T x, WORD_T n_in);
67 WORD_T
cap(WORD_T x, WORD_T y);
69 bool is_eq(WORD_T x, WORD_T y, WORD_T z);
71 WORD_T
eq(
const WORD_T x,
const WORD_T y,
const WORD_T z);
73 WORD_T
eq(
const WORD_T x,
const WORD_T y,
const WORD_T z,
const uint32_t word_size);
93 inline double xdp_add_lm(WORD_T da, WORD_T db, WORD_T dc)
96 printf(
"[%s:%d] %s() %llX %llX %llX\n", __FILE__, __LINE__, __FUNCTION__,
97 (WORD_MAX_T)da, (WORD_MAX_T)db, (WORD_MAX_T)dc);
98 #endif// #if 0 // DEBUG
100 #if(WORD_SIZE <= 32) // mask without the MSB
101 WORD_T mask_no_msb = (0xffffffffUL >> (32 - (
WORD_SIZE - 1)));
102 WORD_T eq_d =
eq(da, db, dc);
103 WORD_T eq_d_sl_1 = ((eq_d << 1) | 0x00000001UL) &
MASK;
104 #else // #if(WORD_SIZE <= 32)
106 WORD_T mask_no_msb = (0xffffffffffffffffULL >> (64 - (
WORD_SIZE - 1)));
107 WORD_T eq_d =
eq(da, db, dc);
108 WORD_T eq_d_sl_1 = ((eq_d << 1) | 0x0000000000000001ULL) &
MASK;
109 #endif // #if(WORD_SIZE <= 32)
111 bool b_is_possible = ((eq_d_sl_1 & (da ^ db ^ dc ^ (da << 1))) == 0);
117 #else // assembly instruction for HW (-mpopcnt)
118 uint32_t w = __builtin_popcount(neq & mask_no_msb);
119 #endif // #if 0 // standard HW
124 p = (double) 1.0 / (
double)(1ULL << w);
130 printf(
"[%s:%d] w mask neq %d %llX %lld %lld\n", __FILE__, __LINE__,
131 w, (WORD_MAX_T)mask, (WORD_MAX_T)neq, (WORD_MAX_T)(neq & mask_no_msb));
132 #endif // #if 1 // DEBUG
144 inline double xdp_add_lm(WORD_T da, WORD_T db, WORD_T dc, uint32_t word_size)
149 WORD_MAX_T mask = (~0ULL >> (64 - word_size));
152 #if(WORD_SIZE <= 32) // mask without the MSB
154 WORD_T mask = ~(0xffffffffUL << word_size);
156 WORD_T eq_d =
eq(da, db, dc);
157 WORD_T eq_d_sl_1 = ((eq_d << 1) | 0x00000001) & mask;
158 #else // #if(word_size <= 32)
159 WORD_T mask = ~(0xffffffffffffffffULL << word_size);
161 WORD_T eq_d =
eq(da, db, dc);
162 WORD_T eq_d_sl_1 = ((eq_d << 1) | 0x0000000000000001ULL) & mask;
163 #endif // #if(WORD_SIZE <= 32)
168 bool b_is_possible = ((eq_d_sl_1 & (da ^ db ^ dc ^ (da << 1))) == 0);
175 #else // assembly instruction for HW (-mpopcnt)
176 uint32_t w = __builtin_popcount(neq & mask_no_msb);
177 #endif // #if 0 // standard HW
182 p = (double) 1.0 / (
double)(1ULL << w);
187 if((da ^ db) == dc) {
214 const WORD_T mask = (0xffffffffUL >> (32 - (
WORD_SIZE - 1)));
215 #else // #if(WORD_SIZE <= 32)
216 const WORD_T mask = (0xffffffffffffffffULL >> (64 - (
WORD_SIZE - 1)));
217 #endif // #if(WORD_SIZE <= 32)
219 WORD_T eq_d =
eq (da, db, dc);
220 #if (WORD_SIZE <= 32)
221 WORD_T eq_d_sl_1 = ((eq_d << 1) | 0x00000001UL) &
MASK;
223 WORD_T eq_d_sl_1 = ((eq_d << 1) | 0x0000000000000001ULL) &
MASK;
225 bool b_is_possible = ((eq_d_sl_1 & (da ^ db ^ dc ^ (da << 1))) == 0);
259 #if (WORD_SIZE <= 32)
260 WORD_T mask = ~(0xffffffffUL << word_size);
262 WORD_T mask = ~(0xffffffffffffffffULL << word_size);
265 WORD_T eq_d =
eq(da, db, dc);
267 #if (WORD_SIZE <= 32)
268 WORD_T eq_d_sl_1 = ((eq_d << 1) | 0x00000001UL) & mask;
270 WORD_T eq_d_sl_1 = ((eq_d << 1) | 0x0000000000000001ULL) & mask;
272 bool b_is_possible = ((eq_d_sl_1 & (da ^ db ^ dc ^ (da << 1))) == 0);
275 WORD_T neq = ~eq_d & (mask >> 1);
295 if (((da & 1) ^ (db & 1)) == (dc & 1))
321 static inline WORD_T eq_opt(
const WORD_T x,
const WORD_T y,
const WORD_T z)
323 WORD_T e = ~((x ^ y) | (x ^ z)) &
MASK;
339 static inline int xdp_add_lm_log2_opt(WORD_T da, WORD_T db, WORD_T dc)
341 const WORD_T eq_d = eq_opt(da, db, dc);
342 const WORD_T eq_d_sl_1 = ((eq_d << 1) | (WORD_T)1) &
MASK;
343 const WORD_T b_is_possible_if_zero = (eq_d_sl_1 & (da ^ db ^ dc ^ (da << 1)));
344 if (b_is_possible_if_zero == 0)
346 const WORD_T neq = ~eq_d & MASK_NO_MSB;
347 const int w = builtin_hamming_weight(neq);
362 static inline int xdp_add_lm_log2_opt(WORD_T da, WORD_T db, WORD_T dc, uint32_t word_size)
367 #if (WORD_SIZE <= 32)
368 const WORD_T mask = ~(0xffffffffUL << word_size);
370 const WORD_T mask = ~(0xffffffffffffffffULL << word_size);
372 const WORD_T eq_d = eq_opt(da, db, dc);
373 const WORD_T eq_d_sl_1 = ((eq_d << 1) | (WORD_T)1) & mask;
374 const WORD_T b_is_possible_if_zero = (eq_d_sl_1 & (da ^ db ^ dc ^ (da << 1)));
375 if (b_is_possible_if_zero == 0)
377 const WORD_T neq = ~eq_d & (mask >> 1);
378 p = -builtin_hamming_weight(neq);
387 if (((da ^ db ^dc) & (WORD_T)1) == 0)
399 #endif // #ifndef XDP_ADD_H
WORD_T eq(const WORD_T x, const WORD_T y, const WORD_T z)
Definition: xdp-add.cc:628
#define MASK
Definition: common.hh:129
WORD_T aop(WORD_T x, WORD_T n_in)
Definition: xdp-add.cc:436
#define WORD_SIZE
Definition: common.hh:119
double xdp_add_exper(const WORD_T da, const WORD_T db, const WORD_T dc)
Definition: xdp-add.cc:403
WORD_T cap(WORD_T x, WORD_T y)
Definition: xdp-add.cc:569
void xdp_add_free_matrices(gsl_matrix *A[2][2][2])
Definition: xdp-add.cc:58
void xdp_add_normalize_matrices(gsl_matrix *A[2][2][2])
Definition: xdp-add.cc:78
double xdp_add_lm(WORD_T da, WORD_T db, WORD_T dc)
Definition: xdp-add.hh:93
void xdp_add_print_matrices_sage(gsl_matrix *A[2][2][2])
Definition: xdp-add.cc:146
uint32_t hamming_weight(const WORD_T w)
Definition: common.cc:128
bool is_eq(WORD_T x, WORD_T y, WORD_T z)
Definition: xdp-add.cc:610
void xdp_add_alloc_matrices(gsl_matrix *A[2][2][2])
Definition: xdp-add.cc:39
void xdp_add_print_matrices(gsl_matrix *A[2][2][2])
Definition: xdp-add.cc:109
bool xdp_add_is_nonzero(WORD_T da, WORD_T db, WORD_T dc)
Definition: xdp-add.cc:676
double xdp_add(gsl_matrix *A[2][2][2], WORD_T da, WORD_T db, WORD_T dc)
Definition: xdp-add.cc:324
void xdp_add_sf(gsl_matrix *A[2][2][2])
Definition: xdp-add.cc:234
void print_binary(const uint64_t n)
Definition: common.cc:218
int xdp_add_lm_log2(WORD_T da, WORD_T db, WORD_T dc)
Definition: xdp-add.hh:209