Matrix r5059
Loading...
Searching...
No Matches
idz.c
Go to the documentation of this file.
1#include "Mdefines.h"
2#include "M5.h"
3#include "idz.h"
4
5#define TEMPLATE(c) \
6void \
7c##swap2(size_t n, \
8 c##TYPE *x, size_t dx, \
9 c##TYPE *y, size_t dy) \
10{ \
11 c##TYPE tmp; \
12 while (n--) { \
13 tmp = *x; \
14 *x = *y; \
15 *y = tmp; \
16 x += dx; \
17 y += dy; \
18 } \
19 return; \
20} \
21 \
22void \
23c##swap1(size_t n, \
24 c##TYPE *x, size_t dx, size_t addx, int nddx, \
25 c##TYPE *y, size_t dy, size_t addy, int nddy) \
26{ \
27 c##TYPE tmp; \
28 while (n--) { \
29 tmp = *x; \
30 *x = *y; \
31 *y = tmp; \
32 x += dx; \
33 y += dy; \
34 if (nddx) dx -= addx; else dx += addx; \
35 if (nddy) dy -= addy; else dy += addy; \
36 } \
37 return; \
38} \
39 \
40void \
41c##copy2(size_t n, \
42 c##TYPE *x, size_t dx, \
43 const c##TYPE *y, size_t dy) \
44{ \
45 while (n--) { \
46 *x = *y; \
47 x += dx; \
48 y += dy; \
49 } \
50 return; \
51} \
52 \
53void \
54c##copy1(size_t n, \
55 c##TYPE *x, size_t dx, size_t addx, int nddx, \
56 const c##TYPE *y, size_t dy, size_t addy, int nddy) \
57{ \
58 while (n--) { \
59 *x = *y; \
60 x += dx; \
61 y += dy; \
62 if (nddx) dx -= addx; else dx += addx; \
63 if (nddy) dy -= addy; else dy += addy; \
64 } \
65 return; \
66} \
67 \
68static void \
69c##symswapr2(c##TYPE *x, \
70 size_t n, char uplo, size_t i, size_t j) \
71{ \
72 if (n == 0 || i == j) \
73 return; \
74 if (i > j) { \
75 size_t k = i; \
76 i = j; \
77 j = k; \
78 } \
79 c##TYPE *xi = x + i * n, *xj = x + j * n, tmp; \
80 tmp = xi[i]; \
81 xi[i] = xj[j]; \
82 xj[j] = tmp; \
83 if (uplo == 'U') { \
84 c##swap2(i, xi, 1, xj, 1); \
85 c##swap2(j - i - 1, xi + i + n, n, xj + i + 1, 1); \
86 c##swap2(n - j - 1, xj + i + n, n, xj + j + n, n); \
87 } else { \
88 c##swap2(i, x + i, n, x + j, n); \
89 c##swap2(j - i - 1, xi + i + 1, 1, xi + j + n, n); \
90 c##swap2(n - j - 1, xi + j + 1, 1, xj + j + 1, 1); \
91 } \
92 return; \
93} \
94 \
95static void \
96c##symswapr1(c##TYPE *x, \
97 size_t n, char uplo, size_t i, size_t j) \
98{ \
99 if (n == 0 || i == j) \
100 return; \
101 if (i >= j) { \
102 size_t k = i; \
103 i = j; \
104 j = k; \
105 } \
106 c##TYPE *xi, *xj, tmp; \
107 if (uplo == 'U') { \
108 xi = x + DENSE_INDEX_U(0, i, n); \
109 xj = x + DENSE_INDEX_U(0, j, n); \
110 tmp = xi[i]; \
111 xi[i] = xj[j]; \
112 xj[j] = tmp; \
113 c##swap1(i, xi, 1, 0, 0, xj, 1, 0, 0); \
114 c##swap1(j - i - 1, xi + i + (i + 1), i + 2, 1, 0, xj + i + 1, 1, 0, 0); \
115 c##swap1(n - j - 1, xj + i + (j + 1), j + 2, 1, 0, xj + j + (j + 1), j + 2, 1, 0); \
116 } else { \
117 xi = x + DENSE_INDEX_L(i, i, n); \
118 xj = x + DENSE_INDEX_L(j, j, n); \
119 tmp = xi[0]; \
120 xi[0] = xj[0]; \
121 xj[0] = tmp; \
122 c##swap1(i, x + i, n - 1, 1, 1, x + j, n - 1, 1, 1); \
123 c##swap1(j - i - 1, xi + (i - i) + 1, 1, 0, 0, xi + (j - i) + (n - i - 1), n - i - 2, 1, 1); \
124 c##swap1(n - j - 1, xi + (j - i) + 1, 1, 0, 0, xj + (j - j) + 1, 1, 0, 0); \
125 } \
126 return; \
127} \
128 \
129static void \
130c##symcopyr2(c##TYPE *x, const c##TYPE *y, \
131 size_t n, char uplo, size_t i, size_t j) \
132{ \
133 if (n == 0) \
134 return; \
135 c##TYPE *xi = x + i * n, *xj = x + j * n; \
136 const c##TYPE *yi = y + i * n, *yj = y + j * n; \
137 xi[i] = yj[j]; \
138 if (uplo == 'U') { \
139 if (i <= j) { \
140 xj[i] = yj[i]; \
141 c##copy2(i, xi, 1, yj, 1); \
142 if (i < j) \
143 c##copy2(j - i - 1, xi + i + n, n, yj + i + 1, 1); \
144 c##copy2(n - j - 1, xj + i + n, n, yj + j + n, n); \
145 } else { \
146 xi[j] = yi[j]; \
147 c##copy2(j, xi, 1, yj, 1); \
148 c##copy2(i - j - 1, xi + j + 1, 1, yj + j + n, n); \
149 c##copy2(n - i - 1, xi + i + n, n, yi + j + n, n); \
150 } \
151 } else { \
152 if (i <= j) { \
153 xi[j] = yi[j]; \
154 c##copy2(i, x + i, n, y + j, n); \
155 if (i < j) \
156 c##copy2(j - i - 1, xi + i + 1, 1, yi + j + n, n); \
157 c##copy2(n - j - 1, xi + j + 1, 1, yj + j + 1, 1); \
158 } else { \
159 xj[i] = yj[i]; \
160 c##copy2(j, x + i, n, y + j, n); \
161 c##copy2(i - j - 1, xj + i + n, n, yj + j + 1, 1); \
162 c##copy2(n - i - 1, xi + i + 1, 1, yj + i + 1, 1); \
163 } \
164 } \
165 return; \
166} \
167 \
168static void \
169c##symcopyr1(c##TYPE *x, const c##TYPE *y, \
170 size_t n, char uplo, size_t i, size_t j) \
171{ \
172 if (n == 0) \
173 return; \
174 c##TYPE *xi, *xj; \
175 const c##TYPE *yi, *yj; \
176 if (uplo == 'U') { \
177 xi = x + DENSE_INDEX_U(0, i, n); \
178 xj = x + DENSE_INDEX_U(0, j, n); \
179 yi = y + DENSE_INDEX_U(0, i, n); \
180 yj = y + DENSE_INDEX_U(0, j, n); \
181 xi[i] = yj[j]; \
182 if (i <= j) { \
183 xj[i] = yj[i]; \
184 c##copy1(i, xi, 1, 0, 0, yj, 1, 0, 0); \
185 if (i < j) \
186 c##copy1(j - i - 1, xi + i + (i + 1), i + 2, 1, 0, yj + i + 1, 1, 0, 0); \
187 c##copy1(n - j - 1, xj + i + (j + 1), j + 2, 1, 0, yj + j + (j + 1), j + 2, 1, 0); \
188 } else { \
189 xi[j] = yi[j]; \
190 c##copy1(j, xi, 1, 0, 0, yj, 1, 0, 0); \
191 c##copy1(i - j - 1, xi + j + 1, 1, 0, 0, yj + j + (j + 1), j + 2, 1, 0); \
192 c##copy1(n - i - 1, xi + i + (i + 1), 0, i + 2, 1, yi + j + (i + 1), i + 2, 1, 0); \
193 } \
194 } else { \
195 xi = x + DENSE_INDEX_L(i, i, n); \
196 xj = x + DENSE_INDEX_L(j, j, n); \
197 yi = y + DENSE_INDEX_L(i, i, n); \
198 yj = y + DENSE_INDEX_L(j, j, n); \
199 xi[0] = yj[0]; \
200 if (i <= j) { \
201 xi[j] = yi[j]; \
202 c##copy1(i, x + i, n - 1, 1, 1, y + j, n - 1, 1, 1); \
203 if (i < j) \
204 c##copy1(j - i - 1, xi + (i - i) + 1, 1, 0, 0, yi + (j - i) + (n - i - 1), n - i - 2, 1, 1); \
205 c##copy1(n - j - 1, xi + (j - i) + 1, 1, 0, 0, yj + (j - j) + 1, 1, 0, 0); \
206 } else { \
207 xj[i] = yj[i]; \
208 c##copy1(j, x + i, n - 1, 1, 1, y + j, n - 1, 1, 1); \
209 c##copy1(i - j - 1, xj + (i - j) + (n - j - 1), n - j - 2, 1, 1, yj + (j - j) + 1, 1, 0, 0); \
210 c##copy1(n - i - 1, xi + (i - i) + 1, 1, 0, 0, yj + (i - j) + 1, 1, 0, 0); \
211 } \
212 } \
213 return; \
214} \
215 \
216void \
217c##rowperm2(c##TYPE *x, const c##TYPE *y, \
218 int m, int n, const int *p, int off, int invert) \
219{ \
220 if (m <= 0 || n <= 0) \
221 return; \
222 size_t m_ = (size_t) m, n_ = (size_t) n; \
223 if (!p) { \
224 if (y) \
225 memcpy(x, y, sizeof(c##TYPE) * m_ * n_); \
226 } else { \
227 if (y) { \
228 int i, j; \
229 if (!invert) \
230 for (j = 0; j < n; ++j, x += m, y += m) \
231 for (i = 0; i < m; ++i) \
232 x[i] = y[p[i] - off]; \
233 else \
234 for (j = 0; j < n; ++j, x += m, y += m) \
235 for (i = 0; i < m; ++i) \
236 x[p[i] - off] = y[i]; \
237 } else { \
238 int i, k0, k1, *q; \
239 Matrix_Calloc(q, m, int); \
240 if (!invert) \
241 for (i = 0; i < m; ++i) \
242 q[i] = -(p[i] - off + 1); \
243 else \
244 for (i = 0; i < m; ++i) \
245 q[p[i] - off] = -(i + 1); \
246 for (i = 0; i < m; ++i) { \
247 if (q[i] > 0) \
248 continue; \
249 k0 = i; \
250 q[k0] = -q[k0]; \
251 k1 = q[k0] - 1; \
252 while (q[k1] < 0) { \
253 c##swap2(n_, x + k0, m_, x + k1, m_); \
254 k0 = k1; \
255 q[k0] = -q[k0]; \
256 k1 = q[k0] - 1; \
257 } \
258 } \
259 Matrix_Free(q, m); \
260 } \
261 } \
262 return; \
263} \
264 \
265void \
266c##symperm2(c##TYPE *x, const c##TYPE *y, \
267 int n, char uplo, const int *p, int off, int invert) \
268{ \
269 if (n <= 0) \
270 return; \
271 size_t n_ = (size_t) n; \
272 if (!p) { \
273 if (y) \
274 memcpy(x, y, sizeof(c##TYPE) * n_ * n_); \
275 } else { \
276 if (y) { \
277 int j; \
278 if (!invert) \
279 for (j = 0; j < n; ++j) \
280 c##symcopyr2(x, y, n_, uplo, (size_t) j, (size_t) (p[j] - off)); \
281 else \
282 for (j = 0; j < n; ++j) \
283 c##symcopyr2(x, y, n_, uplo, (size_t) (p[j] - off), (size_t) j); \
284 } else { \
285 int j, k0, k1, *q; \
286 Matrix_Calloc(q, n, int); \
287 if (!invert) \
288 for (j = 0; j < n; ++j) \
289 q[j] = -(p[j] - off + 1); \
290 else \
291 for (j = 0; j < n; ++j) \
292 q[p[j] - off] = -(j + 1); \
293 for (j = 0; j < n; ++j) { \
294 if (q[j] > 0) \
295 continue; \
296 k0 = j; \
297 q[k0] = -q[k0]; \
298 k1 = q[k0] - 1; \
299 while (q[k1] < 0) { \
300 c##symswapr2(x, n_, uplo, (size_t) k0, (size_t) k1); \
301 k0 = k1; \
302 q[k0] = -q[k0]; \
303 k1 = q[k0] - 1; \
304 } \
305 } \
306 Matrix_Free(q, n); \
307 } \
308 } \
309 return; \
310} \
311 \
312void \
313c##symperm1(c##TYPE *x, const c##TYPE *y, \
314 int n, char uplo, const int *p, int off, int invert) \
315{ \
316 if (n <= 0) \
317 return; \
318 size_t n_ = (size_t) n; \
319 if (!p) { \
320 if (y) \
321 memcpy(x, y, sizeof(c##TYPE) * PACKED_LENGTH(n_)); \
322 } else { \
323 if (y) { \
324 int j; \
325 if (!invert) \
326 for (j = 0; j < n; ++j) \
327 c##symcopyr1(x, y, n_, uplo, (size_t) j, (size_t) (p[j] - off)); \
328 else \
329 for (j = 0; j < n; ++j) \
330 c##symcopyr1(x, y, n_, uplo, (size_t) (p[j] - off), (size_t) j); \
331 } else { \
332 int j, k0, k1, *q; \
333 Matrix_Calloc(q, n, int); \
334 if (!invert) \
335 for (j = 0; j < n; ++j) \
336 q[j] = -(p[j] - off + 1); \
337 else \
338 for (j = 0; j < n; ++j) \
339 q[p[j] - off] = -(j + 1); \
340 for (j = 0; j < n; ++j) { \
341 if (q[j] > 0) \
342 continue; \
343 k0 = j; \
344 q[k0] = -q[k0]; \
345 k1 = q[k0] - 1; \
346 while (q[k1] < 0) { \
347 c##symswapr1(x, n_, uplo, (size_t) k0, (size_t) k1); \
348 k0 = k1; \
349 q[k0] = -q[k0]; \
350 k1 = q[k0] - 1; \
351 } \
352 } \
353 Matrix_Free(q, n); \
354 } \
355 } \
356 return; \
357} \
358 \
359void \
360c##pack2(c##TYPE *x, const c##TYPE *y, \
361 size_t n, char uplo, char trans, char diag) \
362{ \
363 size_t i, j; \
364 c##TYPE *tmp = x; \
365 if (uplo == 'U') { \
366 for (j = 0; j < n; y += n - (++j)) \
367 for (i = 0; i <= j; ++i) \
368 *(x++) = *(y++); \
369 if (diag != '\0') { \
370 if (diag != 'N') \
371 for (j = 0, x = tmp; j < n; x += (++j) + 1) \
372 c##SET_UNIT(*x); \
373 } else if (trans == 'C') \
374 for (j = 0, x = tmp; j < n; x += (++j) + 1) \
375 c##SET_PROJ_REAL(*x); \
376 } else { \
377 for (j = 0; j < n; y += (++j)) \
378 for (i = j; i < n; ++i) \
379 *(x++) = *(y++); \
380 if (diag != '\0') { \
381 if (diag != 'N') \
382 for (j = 0, x = tmp; j < n; x += n - (j++)) \
383 c##SET_UNIT(*x); \
384 } else if (trans == 'C') \
385 for (j = 0, x = tmp; j < n; x += n - (j++)) \
386 c##SET_PROJ_REAL(*x); \
387 } \
388 return; \
389} \
390 \
391void \
392c##pack1(c##TYPE *x, const c##TYPE *y, \
393 size_t n, char uplo, char trans, char diag) \
394{ \
395 size_t i, j; \
396 if (diag != '\0') { \
397 c##TYPE *tmp = x; \
398 if (uplo == 'U') { \
399 for (j = 0; j < n; ++j) { \
400 for (i = 0; i <= j; ++i) \
401 *(x++) = *(y++); \
402 for (i = j + 1; i < n; ++i) \
403 *(x++) = c##ZERO; \
404 } \
405 } else { \
406 for (j = 0; j < n; ++j) { \
407 for (i = 0; i < j; ++i) \
408 *(x++) = c##ZERO; \
409 for (i = j; i < n; ++i) \
410 *(x++) = *(y++); \
411 } \
412 } \
413 if (diag != 'N') { \
414 size_t dx = n + 1; \
415 for (j = 0, x = tmp; j < n; ++j, x += dx) \
416 c##SET_UNIT(*x); \
417 } \
418 } else if (trans == 'C') { \
419 c##TYPE *u = x, *l = x; \
420 if (uplo == 'U') { \
421 for (j = 0; j < n; ++j) { \
422 for (i = 0; i < j; ++i) { \
423 c##ASSIGN_IDEN(*u, *y); \
424 c##ASSIGN_CONJ(*l, *y); \
425 u += 1; y += 1; l += n; \
426 } \
427 c##ASSIGN_PROJ_REAL(*u, *y); \
428 u += 1; y += 1; \
429 u += n - j - 1; l = x + j + 1; \
430 } \
431 } else { \
432 for (j = 0; j < n; ++j) { \
433 l += j; u = l + n; \
434 c##ASSIGN_PROJ_REAL(*l, *y); \
435 l += 1; y += 1; \
436 for (i = j + 1; i < n; ++i) { \
437 c##ASSIGN_IDEN(*l, *y); \
438 c##ASSIGN_CONJ(*u, *y); \
439 l += 1; y += 1; u += n; \
440 } \
441 } \
442 } \
443 } else { \
444 c##TYPE *u = x, *l = x; \
445 if (uplo == 'U') { \
446 for (j = 0; j < n; ++j) { \
447 for (i = 0; i < j; ++i) { \
448 c##ASSIGN_IDEN(*u, *y); \
449 c##ASSIGN_IDEN(*l, *y); \
450 u += 1; y += 1; l += n; \
451 } \
452 c##ASSIGN_IDEN(*u, *y); \
453 u += 1; y += 1; \
454 u += n - j - 1; l = x + j + 1; \
455 } \
456 } else { \
457 for (j = 0; j < n; ++j) { \
458 l += j; u = l + n; \
459 c##ASSIGN_IDEN(*l, *y); \
460 l += 1; y += 1; \
461 for (i = j + 1; i < n; ++i) { \
462 c##ASSIGN_IDEN(*l, *y); \
463 c##ASSIGN_IDEN(*u, *y); \
464 l += 1; y += 1; u += n; \
465 } \
466 } \
467 } \
468 } \
469 return; \
470} \
471 \
472void \
473c##force2(c##TYPE *x, const c##TYPE *y, \
474 size_t n, char uplo, char trans, char diag) \
475{ \
476 size_t i, j; \
477 if (diag < '\0') { \
478 if (diag != -'N') { \
479 size_t dx = n + 1; \
480 memset(x, 0, sizeof(c##TYPE) * n * n); \
481 for (j = 0; j < n; ++j, x += dx) \
482 c##SET_UNIT(*x); \
483 } else if (y) { \
484 size_t dx = n + 1; \
485 size_t dy = (trans) ? 1 : dx; \
486 memset(x, 0, sizeof(c##TYPE) * n * n); \
487 for (j = 0; j < n; ++j, x += dx, y += dy) \
488 *x = *y; \
489 } else { \
490 for (j = 0; j < n; ++j) { \
491 for (i = 0; i < j; ++i) \
492 *(x++) = c##ZERO; \
493 x++; \
494 for (i = j + 1; i < n; ++i) \
495 *(x++) = c##ZERO; \
496 } \
497 } \
498 } else if (diag > '\0') { \
499 c##TYPE *tmp = x; \
500 if (uplo == 'U') { \
501 for (j = 0; j < n; ++j) { \
502 if (!y) \
503 x += j + 1; \
504 else { \
505 for (i = 0; i <= j; ++i) \
506 *(x++) = *(y++); \
507 y += n - j - 1; \
508 } \
509 for (i = j + 1; i < n; ++i) \
510 *(x++) = c##ZERO; \
511 } \
512 } else { \
513 for (j = 0; j < n; ++j) { \
514 for (i = 0; i < j; ++i) \
515 *(x++) = c##ZERO; \
516 if (!y) \
517 x += n - j; \
518 else { \
519 y += j; \
520 for (i = j; i < n; ++i) \
521 *(x++) = *(y++); \
522 } \
523 } \
524 } \
525 if (diag != 'N') { \
526 size_t dx = n + 1; \
527 for (j = 0, x = tmp; j < n; ++j, x += dx) \
528 c##SET_UNIT(*x); \
529 } \
530 } else if (trans == 'C') { \
531 c##TYPE *u = x, *l = x; \
532 if (uplo == 'U') { \
533 for (j = 0; j < n; ++j) { \
534 for (i = 0; i < j; ++i) { \
535 if (!y) \
536 c##ASSIGN_CONJ(*l, *u); \
537 else { \
538 c##ASSIGN_IDEN(*u, *y); \
539 c##ASSIGN_CONJ(*l, *y); \
540 y += 1; \
541 } \
542 u += 1; l += n; \
543 } \
544 if (!y) \
545 c##SET_PROJ_REAL(*u); \
546 else { \
547 c##ASSIGN_PROJ_REAL(*u, *y); \
548 y += 1; \
549 y += n - j - 1; \
550 } \
551 u += 1; \
552 u += n - j - 1; l = x + j + 1; \
553 } \
554 } else { \
555 for (j = 0; j < n; ++j) { \
556 l += j; u = l + n; \
557 if (!y) \
558 c##SET_PROJ_REAL(*l); \
559 else { \
560 y += j; \
561 c##ASSIGN_PROJ_REAL(*l, *y); \
562 y += 1; \
563 } \
564 l += 1; \
565 for (i = j + 1; i < n; ++i) { \
566 if (!y) \
567 c##ASSIGN_CONJ(*u, *l); \
568 else { \
569 c##ASSIGN_IDEN(*l, *y); \
570 c##ASSIGN_CONJ(*u, *y); \
571 y += 1; \
572 } \
573 l += 1; u += n; \
574 } \
575 } \
576 } \
577 } else { \
578 c##TYPE *u = x, *l = x; \
579 if (uplo == 'U') { \
580 for (j = 0; j < n; ++j) { \
581 for (i = 0; i < j; ++i) { \
582 if (!y) \
583 c##ASSIGN_IDEN(*l, *u); \
584 else { \
585 c##ASSIGN_IDEN(*u, *y); \
586 c##ASSIGN_IDEN(*l, *y); \
587 y += 1; \
588 } \
589 u += 1; l += n; \
590 } \
591 if (y) { \
592 c##ASSIGN_IDEN(*u, *y); \
593 y += 1; \
594 y += n - j - 1; \
595 } \
596 u += 1; \
597 u += n - j - 1; l = x + j + 1; \
598 } \
599 } else { \
600 for (j = 0; j < n; ++j) { \
601 l += j; u = l + n; \
602 if (y) { \
603 y += j; \
604 c##ASSIGN_IDEN(*l, *y); \
605 y += 1; \
606 } \
607 l += 1; \
608 for (i = j + 1; i < n; ++i) { \
609 if (!y) \
610 c##ASSIGN_IDEN(*u, *l); \
611 else { \
612 c##ASSIGN_IDEN(*l, *y); \
613 c##ASSIGN_IDEN(*u, *y); \
614 y += 1; \
615 } \
616 l += 1; u += n; \
617 } \
618 } \
619 } \
620 } \
621 return; \
622} \
623 \
624void \
625c##force1(c##TYPE *x, const c##TYPE *y, \
626 size_t n, char uplo, char trans, char diag) \
627{ \
628 size_t i, j; \
629 if (uplo == 'U') { \
630 if (diag < '\0') { \
631 if (diag != -'N') { \
632 memset(x, 0, sizeof(c##TYPE) * PACKED_LENGTH(n)); \
633 for (j = 0; j < n; x += (++j) + 1) \
634 *x = c##UNIT; \
635 } else if (y) { \
636 memset(x, 0, sizeof(c##TYPE) * PACKED_LENGTH(n)); \
637 for (j = 0; j < n; ++j, x += j + 1, y += (trans) ? 1 : j + 1) \
638 *x = *y; \
639 } else { \
640 for (j = 0; j < n; ++j) { \
641 for (i = 0; i < j; ++i) \
642 *(x++) = c##ZERO; \
643 x++; \
644 } \
645 } \
646 } else if (diag > '\0') { \
647 if (y) \
648 memcpy(x, y, sizeof(c##TYPE) * PACKED_LENGTH(n)); \
649 if (diag != 'N') \
650 for (j = 0; j < n; x += (++j) + 1) \
651 c##SET_UNIT(*x); \
652 } else if (trans == 'C') { \
653 if (y) \
654 memcpy(x, y, sizeof(c##TYPE) * PACKED_LENGTH(n)); \
655 for (j = 0; j < n; x += (++j) + 1) \
656 c##SET_PROJ_REAL(*x); \
657 } \
658 } else { \
659 if (diag < '\0') { \
660 if (diag != -'N') { \
661 memset(x, 0, sizeof(c##TYPE) * PACKED_LENGTH(n)); \
662 for (j = 0; j < n; x += n - (j++)) \
663 *x = c##UNIT; \
664 } else if (y) { \
665 memset(x, 0, sizeof(c##TYPE) * PACKED_LENGTH(n)); \
666 for (j = 0; j < n; x += n - j, y += (trans) ? 1 : n - j, j++) \
667 *x = *y; \
668 } else { \
669 for (j = 0; j < n; ++j) { \
670 x++; \
671 for (i = j + 1; i < n; ++i) \
672 *(x++) = c##ZERO; \
673 } \
674 } \
675 } else if (diag > '\0') { \
676 if (y) \
677 memcpy(x, y, sizeof(c##TYPE) * PACKED_LENGTH(n)); \
678 if (diag != 'N') \
679 for (j = 0; j < n; x += n - (j++)) \
680 c##SET_UNIT(*x); \
681 } else if (trans == 'C') { \
682 if (y) \
683 memcpy(x, y, sizeof(c##TYPE) * PACKED_LENGTH(n)); \
684 for (j = 0; j < n; x += n - (j++)) \
685 c##SET_PROJ_REAL(*x); \
686 } \
687 } \
688 return; \
689} \
690 \
691void \
692c##trans2(c##TYPE *x, const c##TYPE *y, \
693 size_t m, size_t n, char trans) \
694{ \
695 size_t i, j, mn = m * n, mn1s = mn - 1; \
696 if (trans == 'N') \
697 memcpy(x, y, sizeof(c##TYPE) * mn); \
698 else if (trans == 'C') \
699 for (j = 0; j < m; ++j, y -= mn1s) \
700 for (i = 0; i < n; ++i, x += 1, y += m) \
701 c##ASSIGN_CONJ(*x, *y); \
702 else \
703 for (j = 0; j < m; ++j, y -= mn1s) \
704 for (i = 0; i < n; ++i, x += 1, y += m) \
705 c##ASSIGN_IDEN(*x, *y); \
706 return; \
707} \
708 \
709void \
710c##trans1(c##TYPE *x, const c##TYPE *y, \
711 size_t n, char uplo, char trans) \
712{ \
713 c##TYPE tmp; \
714 size_t i, j; \
715 if (trans == 'N') { \
716 memcpy(x, y, sizeof(c##TYPE) * PACKED_LENGTH(n)); \
717 } else if (trans == 'C') { \
718 if (uplo == 'U') \
719 for (j = 0; j < n; ++j) { \
720 for (i = j; i < n; ++i) { \
721 tmp = *(y + DENSE_INDEX_U(j, i, n)); \
722 c##ASSIGN_CONJ(*x, tmp); \
723 x += 1; \
724 } \
725 } \
726 else \
727 for (j = 0; j < n; ++j) { \
728 for (i = 0; i <= j; ++i) { \
729 tmp = *(y + DENSE_INDEX_L(j, i, n)); \
730 c##ASSIGN_CONJ(*x, tmp); \
731 x += 1; \
732 } \
733 } \
734 } else { \
735 if (uplo == 'U') \
736 for (j = 0; j < n; ++j) { \
737 for (i = j; i < n; ++i) { \
738 tmp = *(y + DENSE_INDEX_U(j, i, n)); \
739 c##ASSIGN_IDEN(*x, tmp); \
740 x += 1; \
741 } \
742 } \
743 else \
744 for (j = 0; j < n; ++j) { \
745 for (i = 0; i <= j; ++i) { \
746 tmp = *(y + DENSE_INDEX_L(j, i, n)); \
747 c##ASSIGN_IDEN(*x, tmp); \
748 x += 1; \
749 } \
750 } \
751 } \
752 return; \
753} \
754 \
755void \
756c##band2(c##TYPE *x, const c##TYPE *y, \
757 size_t m, size_t n, size_t a, size_t b) \
758{ \
759 if (m == 0 || n == 0) \
760 return; \
761 size_t d; \
762 if (a > b || a >= m + n || b == 0) { \
763 d = m * n; \
764 memset(x, 0, sizeof(c##TYPE) * d); \
765 return; \
766 } \
767 a = (a == 0) ? 1 : a; \
768 b = (b >= m + n) ? m + n - 1 : b; \
769 size_t i, j, i0, i1, \
770 j0 = (a < m) ? 0 : a - m, \
771 j1 = (b < n) ? b : n; \
772 if (j0 > 0) { \
773 d = m * j0; \
774 memset(x, 0, sizeof(c##TYPE) * d); \
775 x += d; if (y) y += d; \
776 } \
777 for (j = j0; j < j1; ++j) { \
778 i0 = (b <= m + j) ? m + j - b : 0; \
779 i1 = (a >= j + 1) ? m + j - a + 1 : m; \
780 for (i = 0; i < i0; ++i) \
781 x[i] = c##ZERO; \
782 if (y) { \
783 for (i = i0; i < i1; ++i) \
784 x[i] = y[i]; \
785 y += m; \
786 } \
787 for (i = i1; i < m; ++i) \
788 x[i] = c##ZERO; \
789 x += m; \
790 } \
791 if (j1 < n) { \
792 d = m * (n - j1); \
793 memset(x, 0, sizeof(c##TYPE) * d); \
794 x += d; if (y) y += d; \
795 } \
796 return; \
797} \
798 \
799void \
800c##band1(c##TYPE *x, const c##TYPE *y, \
801 size_t n, char uplo, size_t a, size_t b) \
802{ \
803 if (n == 0) \
804 return; \
805 if (uplo == 'U') \
806 a = (a < n) ? n : a; \
807 else \
808 b = (b > n) ? n : b; \
809 size_t d; \
810 if (a > b || a >= n + n || b == 0) { \
811 d = PACKED_LENGTH(n); \
812 memset(x, 0, sizeof(c##TYPE) * d); \
813 return; \
814 } \
815 if (uplo == 'U') \
816 b = (b >= n + n) ? n + n - 1 : b; \
817 else \
818 a = (a == 0) ? 1 : a; \
819 size_t i, j, i0, i1, \
820 j0 = (a < n) ? 0 : a - n, \
821 j1 = (b < n) ? b : n; \
822 if (uplo == 'U') { \
823 if (j0 > 0) { \
824 d = PACKED_LENGTH(j0); \
825 memset(x, 0, sizeof(c##TYPE) * d); \
826 x += d; if (y) y += d; \
827 } \
828 for (j = j0; j < j1; ++j) { \
829 i0 = (b <= n + j) ? n + j - b : 0; \
830 i1 = (a >= n ) ? n + j - a + 1 : j + 1; \
831 for (i = 0; i < i0; ++i) \
832 x[i] = c##ZERO; \
833 if (y) { \
834 for (i = i0; i < i1; ++i) \
835 x[i] = y[i]; \
836 y += j + 1; \
837 } \
838 for (i = i1; i <= j; ++i) \
839 x[i] = c##ZERO; \
840 x += j + 1; \
841 } \
842 if (j1 < n) { \
843 d = PACKED_LENGTH(n) - PACKED_LENGTH(j1); \
844 memset(x, 0, sizeof(c##TYPE) * d); \
845 x += d; if (y) y += d; \
846 } \
847 } else { \
848 if (j0 > 0) { \
849 d = PACKED_LENGTH(n) - PACKED_LENGTH(j0); \
850 memset(x, 0, sizeof(c##TYPE) * d); \
851 x += d; if (y) y += d; \
852 } \
853 for (j = j0; j < j1; ++j) { \
854 i0 = (b <= n ) ? n + j - b : j; \
855 i1 = (a >= j + 1) ? n + j - a + 1 : n; \
856 for (i = j; i < i0; ++i) \
857 x[i - j] = c##ZERO; \
858 if (y) { \
859 for (i = i0; i < i1; ++i) \
860 x[i - j] = y[i - j]; \
861 y += n - j; \
862 } \
863 for (i = i1; i < n; ++i) \
864 x[i - j] = c##ZERO; \
865 x += n - j; \
866 } \
867 if (j1 < n) { \
868 d = PACKED_LENGTH(n - j1); \
869 memset(x, 0, sizeof(c##TYPE) * d); \
870 x += d; if (y) y += d; \
871 } \
872 } \
873 return; \
874} \
875
876
877TEMPLATE(i)
878TEMPLATE(d)
879TEMPLATE(z)
880
881#undef TEMPLATE
882
883#define TEMPLATE(c) \
884int \
885c##test2(const c##TYPE *x, \
886 size_t n, char uplo, char trans, char diag) \
887{ \
888 size_t i, j; \
889 if (diag < '\0') { \
890 for (j = 0; j < n; ++j) { \
891 for (i = 0; i < j; ++i) { \
892 if (c##NOT_ZERO(*x)) \
893 return 1; \
894 x += 1; \
895 } \
896 if (diag != -'N' && c##NOT_UNIT(*x)) \
897 return 1; \
898 x += 1; \
899 for (i = j + 1; i < n; ++i) { \
900 if (c##NOT_ZERO(*x)) \
901 return 1; \
902 x += 1; \
903 } \
904 } \
905 } else if (diag > '\0') { \
906 if (uplo == 'U') { \
907 for (j = 0; j < n; ++j) { \
908 x += j; \
909 if (diag != 'N' && c##NOT_UNIT(*x)) \
910 return 1; \
911 x += 1; \
912 for (i = j + 1; i < n; ++i) { \
913 if (c##NOT_ZERO(*x)) \
914 return 1; \
915 x += 1; \
916 } \
917 } \
918 } else { \
919 for (j = 0; j < n; ++j) { \
920 for (i = 0; i < j; ++i) { \
921 if (c##NOT_ZERO(*x)) \
922 return 1; \
923 x += 1; \
924 } \
925 if (diag != 'N' && c##NOT_UNIT(*x)) \
926 return 1; \
927 x += 1; \
928 x += n - j - 1; \
929 } \
930 } \
931 } else if (trans == 'C') { \
932 const c##TYPE *u = x, *l = x; \
933 for (j = 0; j < n; ++j) { \
934 for (i = 0; i < j; ++i) { \
935 if (c##NOT_CONJ(*l, *u)) \
936 return 1; \
937 u += 1; l += n; \
938 } \
939 if (c##NOT_ZERO_IMAG(*u)) \
940 return 1; \
941 u += 1; \
942 u += n - j - 1; l = x + j + 1; \
943 } \
944 } else { \
945 const c##TYPE *u = x, *l = x; \
946 for (j = 0; j < n; ++j) { \
947 for (i = 0; i < j; ++i) { \
948 if (c##NOT_IDEN(*l, *u)) \
949 return 1; \
950 u += 1; l += n; \
951 } \
952 u += 1; \
953 u += n - j - 1; l = x + j + 1; \
954 } \
955 } \
956 return 0; \
957} \
958 \
959int \
960c##test1(const c##TYPE *x, \
961 size_t n, char uplo, char trans, char diag) \
962{ \
963 size_t i, j; \
964 if (uplo == 'U') { \
965 if (diag < '\0') { \
966 for (j = 0; j < n; ++j) { \
967 for (i = 0; i < j; ++i) { \
968 if (c##NOT_ZERO(*x)) \
969 return 1; \
970 x += 1; \
971 } \
972 if (diag != -'N' && c##NOT_UNIT(*x)) \
973 return 1; \
974 x += 1; \
975 } \
976 } else if (diag > '\0') { \
977 if (diag != 'N') \
978 for (j = 0; j < n; x += (++j) + 1) \
979 if (c##NOT_UNIT(*x)) \
980 return 1; \
981 } else if (trans == 'C') { \
982 for (j = 0; j < n; x += (++j) + 1) \
983 if (c##NOT_ZERO_IMAG(*x)) \
984 return 1; \
985 } \
986 } else { \
987 if (diag < '\0') { \
988 for (j = 0; j < n; ++j) { \
989 if (diag != -'N' && c##NOT_UNIT(*x)) \
990 return 1; \
991 x += 1; \
992 for (i = j + 1; i < n; ++i) { \
993 if (c##NOT_ZERO(*x)) \
994 return 1; \
995 x += 1; \
996 } \
997 } \
998 } else if (diag > '\0') { \
999 if (diag != 'N') \
1000 for (j = 0; j < n; x += n - (j++)) \
1001 if (c##NOT_UNIT(*x)) \
1002 return 1; \
1003 } else if (trans == 'C') { \
1004 for (j = 0; j < n; x += n - (j++)) \
1005 if (c##NOT_ZERO_IMAG(*x)) \
1006 return 1; \
1007 } \
1008 } \
1009 return 0; \
1010} \
1011 \
1012void c##tspaggr( int *i1, int *j1, c##TYPE *x1, \
1013 const int *i0, const int *j0, const c##TYPE *x0, \
1014 int m, int n, int *nnz, int *iwork, c##TYPE *work) \
1015{ \
1016 int *iworkA = iwork, *iworkB = iworkA + m + 1, *iworkC = iworkB + m, \
1017 *j_ = iworkC + n, i, j, k, kend, ka, kb; \
1018 c##IF_NPATTERN( \
1019 c##TYPE *x_ = work; \
1020 ); \
1021 \
1022 if (!i1) { \
1023 \
1024 int nnz0 = *nnz, nnz1 = 0; \
1025 \
1026 /* 1. Tabulate column indices in iworkA[i] */ \
1027 \
1028 for (i = 0; i <= m; ++i) \
1029 iworkA[i] = 0; \
1030 ++iworkA; \
1031 for (k = 0; k < nnz0; ++k) \
1032 ++iworkA[i0[k]]; \
1033 for (i = 0; i < m; ++i) \
1034 iworkA[i] += iworkA[i - 1]; \
1035 --iworkA; \
1036 \
1037 /* iworkA[i]: number of column indices listed for row < i, */ \
1038 /* incl. duplicates */ \
1039 \
1040 /* 2. Group column indices and data by row in j_[k], x_[k] */ \
1041 \
1042 for (k = 0; k < nnz0; ++k) { \
1043 c##IF_NPATTERN( \
1044 x_[iworkA[i0[k]] ] = x0[k]; \
1045 ); \
1046 j_[iworkA[i0[k]]++] = j0[k]; \
1047 } \
1048 \
1049 /* iworkA[i]: number of column indices listed for row <= i, */ \
1050 /* incl. duplicates */ \
1051 /* j_[k]: column indices grouped by row, */ \
1052 /* incl. duplicates, unsorted */ \
1053 /* x_[k]: corresponding data */ \
1054 \
1055 /* 3. Gather unique column indices at the front of each group, */ \
1056 /* aggregating data accordingly; record in iworkB[i] where */ \
1057 /* the unique column indices stop and the duplicates begin */ \
1058 \
1059 for (j = 0; j < n; ++j) \
1060 iworkC[j] = -1; \
1061 for (i = 0, k = 0; i < m; ++i) { \
1062 kend = iworkA[i]; \
1063 ka = kb = k; \
1064 while (k < kend) { \
1065 if (iworkC[j_[k]] < ka) { \
1066 /* Have not yet seen this column index */ \
1067 iworkC[j_[k]] = kb; \
1068 c##IF_NPATTERN( \
1069 x_[kb ] = x_[k]; \
1070 ); \
1071 j_[kb++] = j_[k]; \
1072 } else { \
1073 /* Have already seen this column index */ \
1074 c##IF_NPATTERN( \
1075 c##INCREMENT_IDEN(x_[iworkC[j_[k]]], x_[k]); \
1076 ); \
1077 } \
1078 ++k; \
1079 } \
1080 iworkB[i] = kb; \
1081 nnz1 += kb - ka; \
1082 } \
1083 \
1084 /* iworkB[i]: pointer to first non-unique column index in row i */ \
1085 /* j_[k]: column indices grouped by row */ \
1086 /* with unique indices in front, */ \
1087 /* i.e., in positions iworkA[i - 1] <= k < iworkB[i] */ \
1088 /* x_[k]: corresponding data "cumulated" appropriately */ \
1089 \
1090 *nnz = nnz1; \
1091 \
1092 } else { \
1093 \
1094 /* 4. Copy unique (i,j) pairs from unsorted stacks 0 <= i < m */ \
1095 \
1096 for (i = 0, k = 0; i < m; ++i) { \
1097 kb = iworkB[i]; \
1098 while (k < kb) { \
1099 *(i1++) = i; \
1100 *(j1++) = j_[k]; \
1101 c##IF_NPATTERN( \
1102 *(x1++) = x_[k]; \
1103 ); \
1104 ++k; \
1105 } \
1106 k = iworkA[i]; \
1107 } \
1108 \
1109 } \
1110 \
1111 return; \
1112} \
1113 \
1114void c##tspsort( int *p1, int *i1, c##TYPE *x1, \
1115 const int *i0, const int *j0, const c##TYPE *x0, \
1116 int m, int n, int *nnz, int *iwork, c##TYPE *work) \
1117{ \
1118 if (!i1) { \
1119 \
1120 c##tspaggr(NULL, NULL, NULL, i0, j0, x0, m, n, nnz, iwork, work); \
1121 \
1122 } else { \
1123 \
1124 int *iworkA = iwork, *iworkB = iworkA + m + 1, *iworkC = iworkB + m, \
1125 *j_ = iworkC + n, i, j, k, kb; \
1126 c##IF_NPATTERN( \
1127 c##TYPE *x_ = work; \
1128 ); \
1129 \
1130 /* 4. Tabulate _unique_ column indices in iworkC[j] */ \
1131 \
1132 for (j = 0; j <= n; ++j) \
1133 p1[j] = 0; \
1134 ++p1; \
1135 for (i = 0, k = 0; i < m; ++i) { \
1136 kb = iworkB[i]; \
1137 while (k < kb) { \
1138 ++p1[j_[k]]; \
1139 ++k; \
1140 } \
1141 k = iworkA[i]; \
1142 } \
1143 for (j = 0; j < n; ++j) \
1144 p1[j] += (iworkC[j] = p1[j - 1]); \
1145 --p1; \
1146 \
1147 /* iworkC[j]: number of nonzero elements in columns < j */ \
1148 \
1149 /* 5. Pop unique (i,j) pairs from unsorted stacks 0 <= i < m */ \
1150 /* onto new stacks 0 <= j < n, which will be sorted */ \
1151 \
1152 for (i = 0, k = 0; i < m; ++i) { \
1153 kb = iworkB[i]; \
1154 while (k < kb) { \
1155 c##IF_NPATTERN( \
1156 x1[iworkC[j_[k]] ] = x_[k]; \
1157 ); \
1158 i1[iworkC[j_[k]]++] = i; \
1159 ++k; \
1160 } \
1161 k = iworkA[i]; \
1162 } \
1163 \
1164 /* iworkC[j]: number of nonzero elements in columns <= j */ \
1165 \
1166 } \
1167 \
1168 return; \
1169} \
1170 \
1171void c##cspsort(int *p1, int *i1, c##TYPE *x1, \
1172 int m, int n, int *iwork, c##TYPE *work) \
1173{ \
1174 int *iworkA = iwork, *iworkB = iworkA + m + 1, \
1175 *j_ = iworkB + ((m < n) ? n : m), i, j, k, kend, nnz = p1[n]; \
1176 c##IF_NPATTERN( \
1177 c##TYPE *x_ = work; \
1178 ); \
1179 \
1180 for (i = 0; i <= m; ++i) \
1181 iworkA[i] = 0; \
1182 ++iworkA; \
1183 for (k = 0; k < nnz; ++k) \
1184 ++iworkA[i1[k]]; \
1185 for (i = 0; i < m; ++i) \
1186 iworkA[i] += (iworkB[i] = iworkA[i - 1]); \
1187 --iworkA; \
1188 \
1189 ++p1; \
1190 for (j = 0, k = 0; j < n; ++j) { \
1191 kend = p1[j]; \
1192 while (k < kend) { \
1193 i = i1[k]; \
1194 c##IF_NPATTERN( \
1195 x_[iworkB[i] ] = x1[k]; \
1196 ); \
1197 j_[iworkB[i]++] = j; \
1198 ++k; \
1199 } \
1200 } \
1201 --p1; \
1202 \
1203 for (j = 0; j < n; ++j) \
1204 iworkB[j] = p1[j]; \
1205 \
1206 ++iworkA; \
1207 for (i = 0, k = 0; i < m; ++i) { \
1208 kend = iworkA[i]; \
1209 while (k < kend) { \
1210 j = j_[k]; \
1211 c##IF_NPATTERN( \
1212 x1[iworkB[j] ] = x_[k]; \
1213 ); \
1214 i1[iworkB[j]++] = i; \
1215 ++k; \
1216 } \
1217 } \
1218 --iworkA; \
1219 \
1220 return; \
1221} \
1222 \
1223void c##csptrans( int *p1, int *i1, c##TYPE *x1, \
1224 const int *p0, const int *i0, const c##TYPE *x0, \
1225 int m, int n, char trans, int *iwork) \
1226{ \
1227 int i, j, k, kend, nnz = p0[n]; \
1228 p0++; \
1229 \
1230 for (i = 0; i <= m; ++i) \
1231 p1[i] = 0; \
1232 ++p1; \
1233 for (k = 0; k < nnz; ++k) \
1234 ++p1[i0[k]]; \
1235 for (i = 0; i < m; ++i) \
1236 p1[i] += (iwork[i] = p1[i - 1]); \
1237 --p1; \
1238 \
1239 if (trans == 'C') \
1240 for (j = 0, k = 0; j < n; ++j) { \
1241 kend = p0[j]; \
1242 while (k < kend) { \
1243 c##IF_NPATTERN( \
1244 c##ASSIGN_CONJ(x1[iwork[i0[k]]], x0[k]); \
1245 ); \
1246 i1[iwork[i0[k]]++] = j; \
1247 ++k; \
1248 } \
1249 } \
1250 else \
1251 for (j = 0, k = 0; j < n; ++j) { \
1252 kend = p0[j]; \
1253 while (k < kend) { \
1254 c##IF_NPATTERN( \
1255 c##ASSIGN_IDEN(x1[iwork[i0[k]]], x0[k]); \
1256 ); \
1257 i1[iwork[i0[k]]++] = j; \
1258 ++k; \
1259 } \
1260 } \
1261 \
1262 return; \
1263} \
1264
1265
1266TEMPLATE(n)
1267TEMPLATE(l)
1268TEMPLATE(i)
1269TEMPLATE(d)
1270TEMPLATE(z)
1271
1272#undef TEMPLATE
1273
1274void zvreal(Rcomplex *x, const Rcomplex *y, size_t n)
1275{
1276 if (y)
1277 while (n--) {
1278 (* x ).r = (*(y++)).r;
1279 (*(x++)).i = 0.0;
1280 }
1281 else
1282 while (n--)
1283 (*(x++)).i = 0.0;
1284 return;
1285}
1286
1287void zvimag(Rcomplex *x, const Rcomplex *y, size_t n)
1288{
1289 if (y)
1290 while (n--) {
1291 (* x ).r = 0.0;
1292 (*(x++)).i = (*(y++)).i;
1293 }
1294 else
1295 while (n--)
1296 (*(x++)).r = 0.0;
1297 return;
1298}
1299
1300void zvconj(Rcomplex *x, const Rcomplex *y, size_t n)
1301{
1302 if (y)
1303 while (n--) {
1304 (* x ).r = (* y ).r;
1305 (*(x++)).i = -(*(y++)).i;
1306 }
1307 else
1308 while (n--) {
1309 (*x).i = -(*x).i;
1310 x++;
1311 }
1312 return;
1313}
void zvimag(Rcomplex *x, const Rcomplex *y, size_t n)
Definition idz.c:1287
void zvreal(Rcomplex *x, const Rcomplex *y, size_t n)
Definition idz.c:1274
#define TEMPLATE(c)
Definition idz.c:5
void zvconj(Rcomplex *x, const Rcomplex *y, size_t n)
Definition idz.c:1300