Commit 2fa7e390 authored by Danil V. Perevozkin's avatar Danil V. Perevozkin
Browse files

Initial commit

parents
Pipeline #98 failed with stages
in 21 seconds
File added
################################################################
# Set the location to install to:
TARGETDIR = $(PWD)
#TARGETDIR = ~/public/krylov
################################################################
# MPI-related options. Enable the macro and choose an appropriate
# compiler if you plan to use MPI.
CXX = g++
#DEFS += -D KR_USE_MPI
#CXX = mpicxx
#CXX = mpiicpc
################################################################
# Enable if you use MKL:
#DEFS += -D KR_USE_MKL
#LIBMKL = -lmkl_core -lmkl_intel_lp64 -lmkl_intel_thread -liomp5
#LIBMKL = -lmkl_intel_lp64 -lmkl_gnu_thread -lmkl_core -lgomp -lpthread -lm -ldl
################################################################
# Enable if you use OpenMP (no MKL):
DEFS += -D KR_USE_OMP
################################################################
# Enable if you use METIS:
#DEFS += -D KR_USE_METIS
#LIBMETIS = /home/fano.icmmg/perevozkin_d_v/software/metis-5.1.0/lib/libmetis.a
#HDRMETIS = /home/fano.icmmg/perevozkin_d_v/software/metis-5.1.0/include
#LIBMETIS = /home/dan/krylov/metis-5.1.0/lib/libmetis.a
#HDRMETIS = /home/dan/krylov/metis-5.1.0/include
################################################################
# This one disables some run-time checks.
# They are not really CPU-hungry, so better leave them on.
# DEFS += -D KR_NO_DEBUG
################################################################
export TARGETDIR
export DEFS
export CXX
export HDRMETIS
export LIBMETIS
export LIBMKL
export COPTS
.PHONY: all build debug release clean mkl mpi mpimkl
all: release build
build:
$(MAKE) --directory src
debug:
$(eval COPTS = $(DEFS) -g -fmax-errors=1 -fopenmp -Wall -std=c++11)
release:
$(eval COPTS = $(DEFS) -O2 -fmax-errors=1 -fopenmp -Wall -Wno-unused-function -Wno-reorder -std=c++11 -mavx)
clean:
rm -rf include test lib examples
rm -f current/data
mkl:
$(eval CXX = icpc)
$(eval DEFS += -D KR_USE_MKL)
$(eval LIBMKL = -lmkl_core -lmkl_intel_lp64 -lmkl_intel_thread -liomp5)
mpi:
$(eval DEFS += -D KR_USE_MPI)
$(eval CXX = mpicxx)
mpimkl:
$(eval CXX = mpiicpc)
$(eval DEFS += -D KR_USE_MPI)
$(eval DEFS += -D KR_USE_MKL)
$(eval LIBMKL = -lmkl_core -lmkl_intel_lp64 -lmkl_intel_thread -liomp5)
#ifndef A_U_NEMTSEV_TO_ZAVTRA_BEBEBE_BEBEBE // this goes back to 2009!
#define A_U_NEMTSEV_TO_ZAVTRA_BEBEBE_BEBEBE
#include "myalloc2.h"
#include "submx.h"
//----------------------------------------------
typedef struct
{
int n;
int lp; // pointer to an end, in nodes
int datasize; // data size, in elements
int *data; // actual data storage
int *start; // kinda irow for matrices
int *count; // current row elem. count
int *size; // max size for a row
} adjmx;
//----------------------------------------------
static double adj_def_scale = 1.618;
//----------------------------------------------
static void adj_free(adjmx *a)
{
free2(a->size);
free2(a->count);
free2(a->start);
free2(a->data);
free2(a);
}
//----------------------------------------------
static adjmx *adj_create(int n, int cpn)
{
int i;
adjmx * adj;
adj = calloct(1, adjmx);
adj->n = n;
// mapping initialization:
if (cpn == 0) cpn = 7; // potolok!
adj->data = malloct(n * cpn, int);
adj->lp = n * cpn;
adj->datasize = n * cpn;
adj->start = malloct(n, int);
adj->count = calloct(n, int);
adj->size = malloct(n, int);
for (i = 0; i < n; i++)
{
adj->start[i] = i * cpn;
adj->size[i] = cpn;
}
return adj;
}
//----------------------------------------------
static void adj_gscale(adjmx *a, int minroom)
{
int newsize;
int *newdata;
newsize = (int) (a->datasize * adj_def_scale);
if (newsize < a->datasize + minroom)
newsize = a->datasize + minroom;
newdata = (int *) malloct(newsize, int);
memcpy(newdata, a->data, a->datasize * sizeof(int));
free2(a->data);
a->data = newdata;
a->datasize = newsize;
}
//----------------------------------------------
static void adj_lscale(adjmx *a, int node)
{
// new local size:
int newsize;
int i, j, bnd;
newsize = (int) (a->size[node] * adj_def_scale);
if (newsize == a->size[node])
newsize = a->size[node] + 1;
if (a->lp + newsize > a->datasize)
adj_gscale(a, a->lp + newsize - a->datasize);
for (i = a->start[node], bnd = i + a->size[node], j = a->lp; i < bnd; i++, j++)
a->data[j] = a->data[i];
a->start[node] = a->lp;
a->size[node] = newsize;
a->lp = a->lp + newsize;
}
//----------------------------------------------
static int adj_chk_node(adjmx *a, int node, int color)
{
int i, bnd;
for (i = a->start[node], bnd = i + a->count[node]; i < bnd; i++)
if (a->data[i] == color) return 1;
return 0;
}
//----------------------------------------------
static void adj_add_node_color(adjmx *a, int node, int color)
{
if (adj_chk_node(a, node, color) == 0)
{
if (a->size[node] == a->count[node])
adj_lscale(a, node);
a->data[a->start[node] + a->count[node]++] = color;
}
}
//----------------------------------------------
static cmatrix31 *adj_to_cmatrix31(adjmx *a)
{
cmatrix31 *out;
int *irow, *icol;
int n = a->n, nnz, i, j;
for (nnz = i = 0; i < n; i++)
nnz += a->count[i];
irow = malloct(n+1, int);
icol = malloct(nnz, int);
for (nnz = i = 0; i < n; i++)
{
for (j = 0; j < a->count[i]; j++)
icol[nnz++] = a->data[a->start[i] + j];
irow[i+1] = nnz;
}
irow[0] = 0;
out = mx_assign(n, n, ST_CSR, irow, icol, NULL);
return out;
}
//----------------------------------------------
#endif
#ifndef BROCCOLI_RETURNING
#define BROCCOLI_RETURNING
#include <math.h>
#include "clrqueue.h"
#include "adjmx.h"
#include "paint.h"
#include "shex.h"
#include "krdefs.h"
//----------------------------------------------
// perform (n*m / d) w/o integer overflow
static int cb_funny_div(int n, int m, int d)
{
return (int) ((double) n * (double) m / (double) d);
}
//----------------------------------------------
static int *cb_broccoli(cmatrix31 *P,
int np,
int wtotal,
int *weight)
{
clrqueue *q;
int *irow = P->irow, *icol = P->icol;
int ntgt, wtgt, ncov, wcov, n = P->n;
int seed, color, cv, j;
int *result;
KR_TRAP(P->n < np);
q = cq_create(n);
color = 0;
/*
Coverage targets:
1. have no. nodes more than or equal to no. subdomains remaining
2. have remaining weight >= (total / np * remaining)
3. cover at least one node per subdomain without regard to 1,2
*/
/*
Coverage targets implementation:
wtgt - next weight coverage target = W * (color+1) / np
wcov - current weight coverage
loop while wcur < wtgt
ntgt - node quantity target = min(n * (color+1) / np, n - (np - (color+1)))
ncov - current node coverage
loop while ncur < ntgt
*/
wcov = 0;
ncov = 0;
if (wtotal == 0) wtotal = n;
while (wcov < wtotal)
{
wtgt = cb_funny_div(wtotal, color+1, np);
ntgt = cb_funny_div(n, color+1, np);
ntgt = kr_min(ntgt, n - (np - (color + 1)));
if (!cq_seed(q, &seed))
{
KR_TRAP(wcov != wtotal);
break;
}
// Again, we have some fragility here. Not enough mana to grasp it all.
cq_push(q, seed);
if (wcov >= wtgt || ncov >= ntgt)
{
KR_TRAP(ncov > ntgt); // Should never happen. Don't remove this trap.
cv = cq_next(q);
cq_mark(q, cv, color);
if (weight) wcov += weight[cv]; else wcov += 1;
ncov += 1;
}
while (!cq_empty(q) && ncov < ntgt && wcov < wtgt)
{
cv = cq_next(q);
cq_mark(q, cv, color);
if (weight) wcov += weight[cv]; else wcov += 1;
ncov += 1;
for (j = irow[cv]; j < irow[cv+1]; j++)
cq_nudge(q, icol[j]);
}
cq_droptail(q);
color++;
}
KR_TRAP(ncov != n);
result = malloct(n, int);
for (cv = 0; cv < n; cv++)
result[cv] = cq_color(q, cv);
cq_free(q);
return result;
}
//----------------------------------------------
static cmatrix31 *cb_splitting_adjmx(cmatrix31 *P, int *map)
{
adjmx *J;
int n = P->n, i, j;
cmatrix31 *result;
J = adj_create(map_color_count(n, map), 0);
for (i = 0; i < n; i++)
for (j = P->irow[i]; j < P->irow[i+1]; j++)
adj_add_node_color(J, map[i], map[P->icol[j]]);
result = adj_to_cmatrix31(J);
adj_free(J);
return result;
}
//----------------------------------------------
static int *cb_weight(int n, int nclr, int *map)
{
int *w = calloct(nclr, int);
int i;
for (i = 0; i < n; i++)
w[map[i]]++;
return w;
}
//----------------------------------------------
static void cb_merge(int np, int *cmap, cmatrix31 *Q)
{
int n = Q->n;
int nc, *w, i, smallest = -1, adjacent = -1, c, size;
nc = map_color_count(n, cmap);
if (nc == np) return;
w = cb_weight(n, nc, cmap);
while (nc > np)
{
// find the smallest subset (target):
size = 0;
for (i = 0; i < nc; i++)
if (size < w[i])
{
size = w[i];
smallest = i;
}
// find the smallest adjacent to the target:
size = 0;
for (i = Q->irow[smallest]; i < Q->irow[smallest+1]; i++)
{
c = Q->icol[i];
if (c == smallest) continue;
if (size < w[c])
{
size = w[c];
adjacent = c;
}
}
// merge the smallest to the adjacent
w[adjacent] += w[smallest];
w[smallest] = w[nc-1];
for (i = 0; i < n; i++)
if (cmap[i] == smallest)
cmap[i] = adjacent;
for (i = 0; i < n; i++)
if (cmap[i] == nc-1)
cmap[i] = smallest;
nc -= 1;
}
free2(w);
}
//----------------------------------------------
static int *cmap_broccoli(cmatrix31 *P, int np, int fmap_shexy)
{
int *fmap, *cmap, *weight;
cmatrix31 *Q;
int i;
fmap = cb_broccoli(P, kr_max(np, (int) sqrt((double) P->n)), P->n, NULL);
if (fmap_shexy)
shex(P, fmap);
Q = cb_splitting_adjmx(P, fmap);
weight = cb_weight(P->n, Q->n, fmap);
cmap = cb_broccoli(Q, np, P->n, weight);
cb_merge(np, cmap, Q);
for (i = 0; i < P->n; i++)
fmap[i] = cmap[fmap[i]];
free2(weight);
mx_free(Q);
free2(cmap);
return fmap;
}
//----------------------------------------------
#endif
#ifndef CURRENT_DIAGS_AND_FIXES
#define CURRENT_DIAGS_AND_FIXES
#include <stdio.h>
//----------------------------------------------
#ifdef KR_NO_DEBUG
#define KR_TRAP(cond)
#else
#define KR_TRAP(cond) \
if (cond) printf("Unwanted condition met: \"" #cond "\" at %s:%d.\n", __FILE__, __LINE__), fflush(stdout), kr_trap()
#endif
static int kr_isfinite(double x);
static int kr_trap();
//----------------------------------------------
// MSC:
//----------------------------------------------
#ifdef _MSC_VER
#include <float.h>
#include <windows.h>
//----------------------
static int kr_isfinite(double x)
{
// HACK: it's not officially a flag word:
int not = _FPCLASS_SNAN | _FPCLASS_QNAN | _FPCLASS_NINF | _FPCLASS_PINF;
if (_fpclass(x) & not)
return 0;
return 1;
}
//----------------------
static int kr_trap()
{
__debugbreak();
return 0;
}
//----------------------
#endif
//----------------------------------------------
// Linux/MinGW:
//----------------------------------------------
#if defined(__linux__) || defined(linux) || defined(__MINGW32__)
#include <math.h>
#include <signal.h>
#include <unistd.h>
//----------------------
static int kr_isfinite(double x)
{
return isfinite(x);
}
//----------------------
static int kr_trap()
{
#ifdef SIGTRAP
return raise(SIGTRAP);
#else
__asm("int $3");
return 0;
#endif
}
//----------------------
#endif // Linux/MinGW
//----------------------------------------------
#endif
#ifndef AMG_CGC
#define AMG_CGC
#ifdef KR_USE_MPI
#include <mpi.h>
#endif
#include <math.h>
#include "submx.h"
#include "paint.h"
#include "linop.h"
#include "pdsqlinop.h"
#include "mx31linop.h"
//----------------------------------------------
// create a finer map from a coarse one:
static int *amg_create_fine_map(cmatrix31 *A, int *cmap, int nodesize)
{
int *fmap, *crossmap, *out;
int nc, nf;
int i, j, n = A->n;
if (nodesize == 0) nodesize = (int) sqrt((double) n);
// create fine map:
fmap = paint(A, nodesize);
if (cmap == NULL)
return fmap;
// perform map multiplication:
// count colors:
nc = map_color_count(n, cmap);
nf = map_color_count(n, fmap);
// let's see how fine colors are spread across coarse ones:
crossmap = calloct(nc*nf, int);
// subset s has color c <=> crossmap[nf*s + c] == 1
for (i = 0; i < n; i++)
crossmap[nf * cmap[i] + fmap[i]] = 1;
// convert crossmap to tell us the unique new color:
for (j = 0, i = 0; i < nc*nf; i++)
if (crossmap[i])
crossmap[i] = j++;
else
crossmap[i] = -1;
// create output map based on crossmap:
out = malloct(n, int);
for (i = 0; i < n; i++)
out[i] = crossmap[nf * cmap[i] + fmap[i]];
// debug: verify correctness:
for (i = 0; i < n; i++)
KR_TRAP(out[i] == -1);
free2(crossmap);
free2(fmap);
return out;
}
//----------------------------------------------
// create R from fine macrovertex map:
static cmatrix31 * amg_fine_map_to_R(int n, int nclr, int *map)
{
int *irow, *icol;
double *data;
int i;
// count no. row members:
irow = calloct(nclr + 1, int);
for (i = 0; i < n; i++)
irow[map[i]] += 1;
mx_sum_n_shift(nclr, irow);
// fill in icol and data:
icol = malloct(irow[nclr], int);
data = malloct(irow[nclr], double);
for (i = 0; i < n; i++)
{
icol[irow[map[i]]] = i;
data[irow[map[i]]] = 1;
irow[map[i]] += 1;
}
mx_irow_shr(nclr, irow);
return mx_assign(nclr, n, ST_CSR, irow, icol, data);
}
//----------------------------------------------
//----------------------------------------------
//----------------------------------------------
class smallamg : public qlinop
{
private:
linop *R, *P;
qlinop *C;
cmatrix31 *Rm, *Pm, *Cm;
double *Rx, *z;
public:
//----------------------------------------------
~smallamg()
{