From c000ca6818357246de5c3c25eb7e1b2451a51a1e Mon Sep 17 00:00:00 2001 From: Ravi Manumachu Date: Sat, 16 Sep 2017 19:04:47 +0100 Subject: [PATCH] hclmxv: Adding MXV MPI... --- INSTALL.txt | 2 +- Makefile | 25 +++++++++++++++++++++++-- absdevpowers.c | 9 +++++++++ absdevs.c | 16 ++++++++++++++++ absdevs.h | 27 +++++++++++++++++++++++++++ absdevs.lst | 34 ++++++++++++++++++++++++++++++++++ absdevsreader.cpp | 375 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ compile.sh | 33 +++++++++++++++++++++++++++++++++ cputopology.cpp | 578 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ cputopology.hpp | 150 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ hdpi/CMakeLists.txt | 44 ++++++++++++++++++++++++++++++++++++++++++++ hdpi/hdpi.h | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ hdpi/hmpi_err.h | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ hdpi/hmpi_partitioning.h | 33 +++++++++++++++++++++++++++++++++ hdpi/hmpi_partitioning_graphs.c | 1780 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ hdpi/hmpi_partitioning_graphs.h | 78 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ hdpi/hmpi_partitioning_internal.h | 284 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ hdpi/hmpi_partitioning_matrices.c | 5190 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ hdpi/hmpi_partitioning_matrices.h | 329 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ hdpi/hmpi_partitioning_sets.c | 3938 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ hdpi/hmpi_partitioning_sets.h | 96 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ hdpi/hmpi_partitioning_sets_speed_function_of_problem_size.c | 1194 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ hdpi/hmpi_partitioning_sets_speed_function_of_problem_size_weighted_elements.c | 2103 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ hdpi/hmpi_partitioning_sets_speed_single_numbers_weighted_elements.c | 1709 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ hdpi/hmpi_partitioning_trees.h | 42 ++++++++++++++++++++++++++++++++++++++++++ hdpi/hmpi_partitioning_types.h | 27 +++++++++++++++++++++++++++ main.cpp | 728 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 27 files changed, 18943 insertions(+), 3 deletions(-) create mode 100644 absdevpowers.c create mode 100644 absdevs.c create mode 100644 absdevs.h create mode 100644 absdevs.lst create mode 100644 absdevsreader.cpp create mode 100755 compile.sh create mode 100755 cputopology.cpp create mode 100755 cputopology.hpp create mode 100644 hdpi/CMakeLists.txt create mode 100644 hdpi/hdpi.h create mode 100644 hdpi/hmpi_err.h create mode 100644 hdpi/hmpi_partitioning.h create mode 100644 hdpi/hmpi_partitioning_graphs.c create mode 100644 hdpi/hmpi_partitioning_graphs.h create mode 100644 hdpi/hmpi_partitioning_internal.h create mode 100644 hdpi/hmpi_partitioning_matrices.c create mode 100644 hdpi/hmpi_partitioning_matrices.h create mode 100644 hdpi/hmpi_partitioning_sets.c create mode 100644 hdpi/hmpi_partitioning_sets.h create mode 100644 hdpi/hmpi_partitioning_sets_speed_function_of_problem_size.c create mode 100644 hdpi/hmpi_partitioning_sets_speed_function_of_problem_size_weighted_elements.c create mode 100644 hdpi/hmpi_partitioning_sets_speed_single_numbers_weighted_elements.c create mode 100644 hdpi/hmpi_partitioning_trees.h create mode 100644 hdpi/hmpi_partitioning_types.h create mode 100644 main.cpp diff --git a/INSTALL.txt b/INSTALL.txt index 882814e..31d49cf 100644 --- a/INSTALL.txt +++ b/INSTALL.txt @@ -24,7 +24,7 @@ To build and install: 1). cd -2). make +2). ./compile.sh This will create the executables. diff --git a/Makefile b/Makefile index 6aed5a0..ca55a31 100644 --- a/Makefile +++ b/Makefile @@ -3,12 +3,15 @@ ############################################################### CC = icc +MPICC = mpiicc NVCC = nvcc OPTFLAGS = -O3 -fopenmp CPP11FLAGS = ${OPTFLAGS} -std=c++11 MKLFLAGS = ${OPTFLAGS} -mkl +MPIMKLFLAGS = ${OPTFLAGS} -mkl -mt_mpi +HDPIHOME = hdpi/installation_dir -all: dgemvcpu dgemvphi dgemvgpu +all: dgemvcpu dgemvphi dgemvgpu absdevsreader runabsdevreader mpimain dgemvcpu: ${CC} ${MKLFLAGS} -o dgemvcpu dgemvcpu.c -lm @@ -20,7 +23,25 @@ dgemvgpu: ${NVCC} -I/usr/local/cuda/include -o dgemvgpu dgemvgpu.c \ -L/usr/local/cuda/lib64 -lcublas +absdevsreader: + @echo "--------------------------------------------" + @echo "Now compiling abstract devices reader file..." + @echo "--------------------------------------------" + @echo + ${CC} ${OPTFLAGS} -c cputopology.cpp + ${CC} ${CPP11FLAGS} -o absdevsreader absdevsreader.cpp cputopology.o + +runabsdevreader: + @echo "--------------------------------------------" + @echo "Now running abstract devices reader to generate absdevs.c..." + @echo "--------------------------------------------" + @echo + ./absdevsreader ./absdevs.lst 0 + +mpimain: + $(MPICC) -I${HDPIHOME}/include ${MKLFLAGS} -o main main.cpp -L${HDPIHOME}/lib -lhdpi -lm + clean: - rm -f dgemvcpu dgemvphi dgemvgpu + rm -f dgemvcpu dgemvphi dgemvgpu *.o absdevsreader main ############################################################### diff --git a/absdevpowers.c b/absdevpowers.c new file mode 100644 index 0000000..c50d090 --- /dev/null +++ b/absdevpowers.c @@ -0,0 +1,9 @@ + +/*----------------------------------------------------------------------------*/ + +const char* hcl_powerplatforms[] = { +"CPUPCM" +}; + +/*----------------------------------------------------------------------------*/ + diff --git a/absdevs.c b/absdevs.c new file mode 100644 index 0000000..4d30592 --- /dev/null +++ b/absdevs.c @@ -0,0 +1,16 @@ + +/*----------------------------------------------------------------------------*/ + +#include "absdevs.h" + +/*----------------------------------------------------------------------------*/ + +const unsigned int hcl_coreindex[] = { +0,6,12,18,24 +}; +const unsigned int hcl_corebindings[] = { +0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23 +}; + +/*----------------------------------------------------------------------------*/ + diff --git a/absdevs.h b/absdevs.h new file mode 100644 index 0000000..9d265e5 --- /dev/null +++ b/absdevs.h @@ -0,0 +1,27 @@ + +/*----------------------------------------------------------------------------*/ + +#ifndef _ABSDEVS_HH +#define _ABSDEVS_HH + +/*----------------------------------------------------------------------------*/ + +typedef struct _hcl_abstractdevicestable_ { + int(*init)(const int, const int, const int, const int, + const unsigned int, const unsigned int); + int(*gemm)(const int, const int*, const int*, const int*, + double*, double*, double*, double*); + int(*destroy)(const int); + const unsigned int nompt; +} hcl_abstractdevicestable; + +extern hcl_abstractdevicestable hcl_absdevtable[]; +extern const unsigned int hcl_coreindex[]; +extern const unsigned int hcl_corebindings[]; + +/*----------------------------------------------------------------------------*/ + +#endif + +/*----------------------------------------------------------------------------*/ + diff --git a/absdevs.lst b/absdevs.lst new file mode 100644 index 0000000..aeb9f02 --- /dev/null +++ b/absdevs.lst @@ -0,0 +1,34 @@ +#cores DGEMM No. of MPI processes No. of OpenMP threads +#----- ----- --------------------- --------------------- +#0-3 CPU,MKL 1 0 +#4-7 CPU,MKL 1 0 +#8-11 CPU,MKL 1 0 +#24-27 CPU,MKL 1 0 +#28-31 CPU,MKL 1 0 +#32-35 CPU,MKL 1 0 +#12-15 CPU,MKL 1 0 +#16-19 CPU,MKL 1 0 +#20-23 CPU,MKL 1 0 +#36-39 CPU,MKL 1 0 +#40-43 CPU,MKL 1 0 +#44-47 CPU,MKL 1 0 +#0-2 CPU,MKL 1 0 +#3-5 CPU,MKL 1 0 +#6-8 CPU,MKL 1 0 +#9-11 CPU,MKL 1 0 +#24-26 CPU,MKL 1 0 +#27-29 CPU,MKL 1 0 +#30-32 CPU,MKL 1 0 +#33-35 CPU,MKL 1 0 +#12-14 CPU,MKL 1 0 +#15-17 CPU,MKL 1 0 +#18-20 CPU,MKL 1 0 +#21-23 CPU,MKL 1 0 +#36-38 CPU,MKL 1 0 +#39-41 CPU,MKL 1 0 +#42-44 CPU,MKL 1 0 +#45-47 CPU,MKL 1 0 +0-5 CPU,MKL 1 0 +6-11 CPU,MKL 1 0 +12-17 CPU,MKL 1 0 +18-23 CPU,MKL 1 0 diff --git a/absdevsreader.cpp b/absdevsreader.cpp new file mode 100644 index 0000000..b2dbbd4 --- /dev/null +++ b/absdevsreader.cpp @@ -0,0 +1,375 @@ + +/*-----------------------------------------------------------*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cputopology.hpp" + +/*-----------------------------------------------------------*/ + +int main(int argc, char** argv) +{ + if (argc != 3) + { + std::cerr << "Usage: " << argv[0] + << " " + << std::endl; + exit(EXIT_FAILURE); + } + + std::string abstractDevicesFile = argv[1]; + bool verbosity = atoi(argv[2]); + + std::ifstream absDevices(abstractDevicesFile.c_str()); + if (!absDevices.is_open()) + { + std::cerr << "Unable to open " + << abstractDevicesFile + << std::endl; + exit(EXIT_FAILURE); + } + + unsigned int numLogicalCPUs, numPhysicalCPUs; + int rc = hcl::topology::getNumLogicalCpus(&numLogicalCPUs); + if (rc != 0) + { + std::cerr << "Error to get number of logical cores..." + << std::endl; + exit(EXIT_FAILURE); + } + + rc = hcl::topology::getNumPhysicalCpus(&numPhysicalCPUs); + if (rc != 0) + { + std::cerr << "Error to get number of physical cores..." + << std::endl; + exit(EXIT_FAILURE); + } + + std::cout << "Number of logical cores " << numLogicalCPUs + << std::endl; + std::cout << "Number of physical cores " << numPhysicalCPUs + << std::endl; + + std::vector nCoresList; + std::vector coresList; + std::vector abstractDevicesTable; + std::vector powerPlatforms; + + /* + * We always include the CPU + */ + powerPlatforms.push_back("CPUPCM"); + + unsigned int numCoresBound = 0; + std::string line; + nCoresList.push_back(numCoresBound); + while (std::getline(absDevices, line)) + { + /* + * Ignore comment line... + */ + if (line.find('#') != std::string::npos) + { + continue; + } + + std::stringstream ss(line); + std::string coreListing; + std::string gemmKernel; + unsigned int numMPIProcesses; + std::string numOMPThreads; + + ss >> coreListing + >> gemmKernel + >> numMPIProcesses + >> numOMPThreads; + + if (verbosity) + { + std::cout << coreListing << " " + << gemmKernel << " " + << numMPIProcesses << " " + << numOMPThreads << std::endl; + } + + if (coreListing.find('-') == std::string::npos) + { + /* + * Simplest case, just one core to bind. + */ + if (coreListing.find(',') == std::string::npos) + { + coresList.push_back(coreListing); + numCoresBound++; + nCoresList.push_back(numCoresBound); + } + else + { + /* + * There are comma separated list of cores... + */ + char* cstr = new char[coreListing.length() + 1]; + strcpy(cstr, coreListing.c_str()); + char* tok = strtok(cstr, ","); + while (tok != NULL) + { + coresList.push_back(tok); + numCoresBound++; + tok = strtok(NULL, ","); + } + delete []cstr; + nCoresList.push_back(numCoresBound); + } + } + else + { + /* + * Just one range token... + */ + if (coreListing.find(',') == std::string::npos) + { + std::vector coreRange; + char* cstr = new char[coreListing.length() + 1]; + strcpy(cstr, coreListing.c_str()); + char* tok = strtok(cstr, "-"); + while (tok != NULL) + { + coreRange.push_back(atoi(tok)); + tok = strtok(NULL, ","); + } + delete []cstr; + + /* + * We expect just two elements in core range... + */ + unsigned int start = coreRange[0]; + unsigned int end = coreRange[1]; + + /* + * The MPI processes divide the cores equally amongst them... + */ + for (size_t e = 0; e < numMPIProcesses; e++) + { + numCoresBound += (end - start + 1) / numMPIProcesses; + nCoresList.push_back(numCoresBound); + } + + for (size_t e = start; e <= end; e++) + { + coresList.push_back(std::to_string(e)); + } + } + else + { + /* + * A mix of - and , + */ + char* cstr1 = (char*)coreListing.c_str(); + char* saveptr1, *saveptr2; + + char* tok = strtok_r(cstr1, ",", &saveptr1); + while (tok != NULL) + { + std::cout << tok << std::endl; + char* tok2 = strtok_r(tok, "-", &saveptr2); + std::vector coreRange; + while (tok2 != NULL) + { + std::cout << tok2 << std::endl; + coreRange.push_back(atoi(tok2)); + tok2 = strtok_r(NULL, "-", &saveptr2); + } + + /* + * We expect just two elements in core range... + */ + unsigned int start = coreRange[0]; + unsigned int end = coreRange[1]; + + /* + * The MPI processes divide the cores equally amongst them... + */ + for (size_t e = 0; e < numMPIProcesses; e++) + { + numCoresBound += (end - start + 1) / numMPIProcesses; + nCoresList.push_back(numCoresBound); + } + + for (size_t e = start; e <= end; e++) + { + coresList.push_back(std::to_string(e)); + } + + tok = strtok_r(NULL, ",", &saveptr1); + } + } + } + + if (gemmKernel.find("CPU,GPU") != std::string::npos) + { + powerPlatforms.push_back("GPULITE"); + } + + if (gemmKernel.find("CPU,PHI") != std::string::npos) + { + powerPlatforms.push_back("PHILITE"); + } + + if (gemmKernel.find("CPU,FPGA") != std::string::npos) + { + powerPlatforms.push_back("FPGA"); + } + + for (size_t p = 0; p < numMPIProcesses; p++) + { + std::stringstream ssOut; + + if (gemmKernel.find("CPU") != std::string::npos) + { + ssOut << "cpuinit, cpudgemm, cpudestroy"; + } + + if (gemmKernel.find("GPU") != std::string::npos) + { + ssOut << "gpuinit, gpudgemm, gpudestroy"; + } + + if (gemmKernel.find("PHI") != std::string::npos) + { + ssOut << "phiinit, phidgemm, phidestroy"; + } + + if (gemmKernel.find("FPGA") != std::string::npos) + { + ssOut << "fpgainit, fpgadgemm, fpgadestroy"; + } + + if (numOMPThreads.find('-') != std::string::npos) + { + ssOut << ", 0"; + } + else + { + ssOut << ", " << numOMPThreads; + } + + abstractDevicesTable.push_back(ssOut.str()); + } + } + + if (numCoresBound > numLogicalCPUs) + { + std::cerr << "Number of cores bound " + << numCoresBound << " exceeded " + << "the allowed number of logical cores." + << std::endl; + exit(EXIT_FAILURE); + } + + if (numCoresBound > numPhysicalCPUs) + { + std::cerr << "Warning: Number of cores bound exceeded " + << "the allowed number of physical cores." + << std::endl; + } + + /* + * Spit out the abstract devices table... + */ + std::ofstream abstractDevicesOFile; + abstractDevicesOFile.open("absdevs.c"); + + abstractDevicesOFile << "\n/*----------------------------------" + << "------------------------------------------*/\n" + << std::endl; + abstractDevicesOFile << "#include \"absdevs.h\"" << std::endl; + abstractDevicesOFile << "\n/*----------------------------------" + << "------------------------------------------*/\n" + << std::endl; + + /* + * Core bindings here... + */ + abstractDevicesOFile << "const unsigned int hcl_coreindex[] = {" + << std::endl; + size_t n = nCoresList.size(); + for (size_t e = 0; e < n; e++) + { + if (e == (n-1)) + { + abstractDevicesOFile << nCoresList[e]; + } + else + { + abstractDevicesOFile << nCoresList[e] << ","; + } + } + abstractDevicesOFile << "\n};" << std::endl; + + abstractDevicesOFile << "const unsigned int hcl_corebindings[] = {" + << std::endl; + n = coresList.size(); + for (size_t e = 0; e < n; e++) + { + if (e == (n-1)) + { + abstractDevicesOFile << coresList[e]; + } + else + { + abstractDevicesOFile << coresList[e] << ","; + } + } + abstractDevicesOFile << "\n};" << std::endl; + + abstractDevicesOFile << "\n/*----------------------------------" + << "------------------------------------------*/\n" + << std::endl; + abstractDevicesOFile.close(); + + std::ofstream abstractDevicesPowersOFile; + abstractDevicesPowersOFile.open("absdevpowers.c"); + + /* + * Compute Platforms here... + */ + abstractDevicesPowersOFile << "\n/*----------------------------------" + << "------------------------------------------*/\n" + << std::endl; + + abstractDevicesPowersOFile << "const char* hcl_powerplatforms[] = {" + << std::endl; + n = powerPlatforms.size(); + for (size_t e = 0; e < n; e++) + { + if (e == (n-1)) + { + abstractDevicesPowersOFile << "\"" << powerPlatforms[e] << "\""; + } + else + { + abstractDevicesPowersOFile << "\"" << powerPlatforms[e] << "\","; + } + } + abstractDevicesPowersOFile << "\n};" << std::endl; + + abstractDevicesPowersOFile << "\n/*----------------------------------" + << "------------------------------------------*/\n" + << std::endl; + + abstractDevicesPowersOFile.close(); + + std::cout << "Abstract devices file successfully parsed" << std::endl; + + exit(EXIT_SUCCESS); +} + +/*-----------------------------------------------------------*/ + diff --git a/compile.sh b/compile.sh new file mode 100755 index 0000000..93157b7 --- /dev/null +++ b/compile.sh @@ -0,0 +1,33 @@ +#!/bin/bash + +############################################################ + +DGEMVBASEDIR=`pwd` +HDPIBASEDIR=${DGEMVBASEDIR}/hdpi + +############################################################ +# HDPI Library +############################################################ + +echo "Building HDPI library..." + +(cd hdpi \ + && mkdir -p build \ + && cd build \ + && cmake -DCMAKE_INSTALL_PREFIX=${HDPIBASEDIR}/installation_dir .. \ + && make \ + && make install) + +############################################################ + +echo "Setting MKL variables..." +source /opt/intel/mkl/bin/mklvars.sh intel64 + +echo "Compiling heterogeneous dgemm..." +make clean all + +############################################################ + +exit 0 + +############################################################ diff --git a/cputopology.cpp b/cputopology.cpp new file mode 100755 index 0000000..2968fe7 --- /dev/null +++ b/cputopology.cpp @@ -0,0 +1,578 @@ + +/*--------------------------------------------------------*/ + +/* +@file +@author Ravi Reddy Manumachu +@version 1.0 +*/ + +/*-----------------------------------------------------------*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/*-----------------------------------------------------------*/ + +namespace hcl { + +namespace topology { + +/*-----------------------------------------------------------*/ + +/* + * One way to do this is to store the lines in a vector. + * Parse top-to-bottom and bottom-to-top to get the siblings. + */ +int +getSibling( + const unsigned int cpu, + std::vector& siblings +) +{ + FILE* commandFp = popen("lscpu -p", "r"); + if (commandFp == NULL) + { + std::cerr << "Error from execution of lscpu -p." + << std::endl; + return -1; + } + + char* line = NULL; + size_t len = 0; + std::vector lscpuLines; + + while (getline(&line, &len, commandFp) != -1) + { + if (line[0] == '#') + { + continue; + } + + lscpuLines.push_back(line); + } + + free(line); + + int status = pclose(commandFp); + if (status == -1) + { + std::cerr << "Error from execution of lscpu -p." + << std::endl; + return -1; + } + + bool siblingsFound = false; + bool myCoreFound = false; + unsigned int myCore = UINT_MAX; + + for (unsigned int cpuLine = 0; + cpuLine < lscpuLines.size(); + cpuLine++) + { + std::stringstream cpuLineStream(lscpuLines[cpuLine]); + std::string token; + std::vector tokens; + while (std::getline( + cpuLineStream, token, ',' + ) + ) + { + tokens.push_back(token); + } + + unsigned int cpuParsed = atoi(tokens[0].c_str()); + + if (cpu == cpuParsed) + { + myCore = atoi(tokens[1].c_str()); + myCoreFound = true; + } + else + { + unsigned int core = atoi(tokens[1].c_str()); + + if (core == myCore) + { + siblings.push_back( + atoi(tokens[0].c_str()) + ); + siblingsFound = true; + } + } + } + + if (myCoreFound == false) + { + std::cerr << "Failed to find siblings." << std::endl; + return -1; + } + + /* + * If siblings are not found, then + * it is possible that we have to traverse in the + * reverse order to build the siblings. + */ + if (siblingsFound) + { + return 0; + } + + std::vector::reverse_iterator rit = + lscpuLines.rbegin(); + + for (; rit != lscpuLines.rend(); ++rit) + { + std::stringstream cpuLineStream(*rit); + std::string token; + std::vector tokens; + while (std::getline( + cpuLineStream, token, ',' + ) + ) + { + tokens.push_back(token); + } + + unsigned int cpuParsed = atoi(tokens[0].c_str()); + + if (cpu == cpuParsed) + { + myCore = atoi(tokens[1].c_str()); + } + else + { + unsigned int core = atoi(tokens[1].c_str()); + + if (core == myCore) + { + siblings.push_back( + atoi(tokens[0].c_str()) + ); + } + } + } + + std::reverse( + siblings.begin(), siblings.end()); + + return 0; +} + +/*-----------------------------------------------------------*/ + +int +getNumCpuNumaNodes( + unsigned int* numNUMAs +) +{ + FILE* commandFp = popen("lscpu -p", "r"); + if (commandFp == NULL) + { + std::cerr << "Error from execution of lscpu -p." + << std::endl; + return -1; + } + + char* line = NULL; + size_t len = 0; + + *numNUMAs = 0; + + while (getline(&line, &len, commandFp) != -1) + { + if (line[0] == '#') + { + continue; + } + + char* token = strtok(line, ","); + + /* + * The core... + */ + if (token != NULL) + { + token = strtok(NULL, ","); + } + + /* + * The socket + */ + if (token != NULL) + { + token = strtok(NULL, ","); + unsigned int numaNodeParsed = atoi(token); + if (numaNodeParsed > *numNUMAs) + { + *numNUMAs = numaNodeParsed; + } + } + } + + free(line); + + int status = pclose(commandFp); + if (status == -1) + { + std::cerr << "Error from execution of lscpu -p." + << std::endl; + return -1; + } + + *numNUMAs = *numNUMAs + 1; + + return 0; +} + +/*-----------------------------------------------------------*/ + +int +getCpuNumaNode( + const unsigned int cpu, + unsigned int* numaNode +) +{ + FILE* commandFp = popen("lscpu -p", "r"); + if (commandFp == NULL) + { + std::cerr << "Error from execution of lscpu -p." + << std::endl; + return -1; + } + + char* line = NULL; + size_t len = 0; + + while (getline(&line, &len, commandFp) != -1) + { + if (line[0] == '#') + { + continue; + } + + char* token = strtok(line, ","); + unsigned int cpuParsed = atoi(token); + + if (cpuParsed == cpu) + { + ; + } + else + { + continue; + } + + /* + * The core... + */ + if (token != NULL) + { + token = strtok(NULL, ","); + } + + /* + * The socket + */ + if (token != NULL) + { + token = strtok(NULL, ","); + *numaNode = atoi(token); + break; + } + } + + free(line); + + int status = pclose(commandFp); + if (status == -1) + { + std::cerr << "Error from execution of lscpu -p." + << std::endl; + return -1; + } + + return 0; +} + +/*-----------------------------------------------------------*/ + +int +getNumThreadsPerCore( + unsigned int* numTPerCore +) +{ + FILE* commandFp = popen("lscpu -p", "r"); + if (commandFp == NULL) + { + std::cerr << "Error from execution of lscpu -p." + << std::endl; + return -1; + } + + char* line = NULL; + size_t len = 0; + + *numTPerCore = 0; + + while (getline(&line, &len, commandFp) != -1) + { + if (line[0] == '#') + { + continue; + } + + char* token = strtok(line, ","); + + /* + * The core... + */ + if (token != NULL) + { + token = strtok(NULL, ","); + unsigned int coreParsed = atoi(token); + + /* + * We will just count for core 0... + */ + if (coreParsed == 0) + { + *numTPerCore = *numTPerCore + 1; + } + } + } + + free(line); + + int status = pclose(commandFp); + if (status == -1) + { + std::cerr << "Error from execution of lscpu -p." + << std::endl; + return -1; + } + + return 0; +} + +/*-------------------------------------------------------------------------*/ + +int +getNumLogicalCpus( + unsigned int* numCPUs +) +{ + FILE* commandFp = popen("lscpu -p", "r"); + if (commandFp == NULL) + { + std::cerr << "Error from execution of lscpu -p." + << std::endl; + return -1; + } + + char* line = NULL; + size_t len = 0; + + *numCPUs = 0; + + while (getline(&line, &len, commandFp) != -1) + { + if (line[0] == '#') + { + continue; + } + + *numCPUs = *numCPUs + 1; + } + + free(line); + + int status = pclose(commandFp); + if (status == -1) + { + std::cerr << "Error from execution of lscpu -p." + << std::endl; + return -1; + } + + return 0; +} + +/*-------------------------------------------------------------------------*/ + +int +getNumPhysicalCpus( + unsigned int* numPhysicalCPUs +) +{ + unsigned int numLogicalCpus; + + int rc = getNumLogicalCpus( + &numLogicalCpus + ); + + if (rc != 0) + { + return rc; + } + + unsigned int threadsPerCore; + + rc = getNumThreadsPerCore(&threadsPerCore); + + if (rc != 0) + { + return rc; + } + + *numPhysicalCPUs = numLogicalCpus / threadsPerCore; + + return 0; +} + +/*-------------------------------------------------------------------------*/ + +int +getNumCpus( + const unsigned int numaNode, + unsigned int* numCPUs +) +{ + FILE* commandFp = popen("lscpu -p", "r"); + if (commandFp == NULL) + { + std::cerr << "Error from execution of lscpu -p." + << std::endl; + return -1; + } + + char* line = NULL; + size_t len = 0; + + *numCPUs = 0; + + while (getline(&line, &len, commandFp) != -1) + { + if (line[0] == '#') + { + continue; + } + + char* token = strtok(line, ","); + + /* + * The core... + */ + if (token != NULL) + { + token = strtok(NULL, ","); + } + + /* + * The socket + */ + if (token != NULL) + { + token = strtok(NULL, ","); + unsigned int numaNodeParsed = atoi(token); + + if (numaNodeParsed == numaNode) + { + *numCPUs = *numCPUs + 1; + } + } + } + + free(line); + + int status = pclose(commandFp); + if (status == -1) + { + std::cerr << "Error from execution of lscpu -p." + << std::endl; + return -1; + } + + return 0; +} + +/*-------------------------------------------------------------------------*/ + +int +getCpus( + const unsigned int numaNode, + std::vector& cpus +) +{ + FILE* commandFp = popen("lscpu -p", "r"); + if (commandFp == NULL) + { + std::cerr << "Error from execution of lscpu -p." + << std::endl; + return -1; + } + + char* line = NULL; + size_t len = 0; + + while (getline(&line, &len, commandFp) != -1) + { + if (line[0] == '#') + { + continue; + } + + char* token = strtok(line, ","); + unsigned int cpuParsed = atoi(token); + + /* + * The core... + */ + if (token != NULL) + { + token = strtok(NULL, ","); + } + + /* + * The socket + */ + if (token != NULL) + { + token = strtok(NULL, ","); + unsigned int numaNodeParsed = atoi(token); + + if (numaNodeParsed == numaNode) + { + cpus.push_back(cpuParsed); + } + } + } + + free(line); + + int status = pclose(commandFp); + if (status == -1) + { + std::cerr << "Error from execution of lscpu -p." + << std::endl; + return -1; + } + + return 0; +} + +/*-----------------------------------------------------------*/ + +} + +} + +/*-----------------------------------------------------------*/ + diff --git a/cputopology.hpp b/cputopology.hpp new file mode 100755 index 0000000..73f9a79 --- /dev/null +++ b/cputopology.hpp @@ -0,0 +1,150 @@ +/*--------------------------------------------------------*/ + +/* +@file +@author Ravi Reddy Manumachu +@version 1.0 +*/ + +/*-----------------------------------------------------------*/ + +#ifndef _HCL_CPUTOPOLOGY_HPP_ +#define _HCL_CPUTOPOLOGY_HPP_ + +/*-----------------------------------------------------------*/ + +namespace hcl { + +namespace topology { + +/*-----------------------------------------------------------*/ + +/** + * Returns the sibling for a CPU. + * + * @param sibling Sibling for a CPU. + * + * @return HCL_SUCCESS if the query is successful. + */ +int +getSibling( + const unsigned int cpu, + std::vector& siblings +); + +/*-----------------------------------------------------------*/ + +/** + * Returns the total number of threads per core. + * + * @param numTPerCore The number of threads per core. + * + * @return HCL_SUCCESS if the query is successful. + */ +int +getNumThreadsPerCore( + unsigned int* numTPerCore +); + +/*-----------------------------------------------------------*/ + +/** + * Returns the total number of logical cores. + * + * @param numLogicalCPUs The number of logical CPUs. + * + * @return HCL_SUCCESS if the query is successful. + */ +int +getNumLogicalCpus( + unsigned int* numLogicalCPUs +); + +/*-----------------------------------------------------------*/ + +/** + * Returns the total number of physical cores. + * + * @param numPhysicalCPUs The number of physical CPUs. + * + * @return HCL_SUCCESS if the query is successful. + */ +int +getNumPhysicalCpus( + unsigned int* numPhysicalCPUs +); + +/*-----------------------------------------------------------*/ + +/** + * Returns the number of CPUs in a NUMA node. + * + * @param numaNode The NUMA node identifier. + * @param numCPUs The number of CPUs. + * + * @return HCL_SUCCESS if the query is successful. + */ +int +getNumCpus( + const unsigned int numaNode, + unsigned int* numCPUs +); + +/*-----------------------------------------------------------*/ + +/** + * Returns the CPUs in a NUMA node. + * + * @param numaNode The NUMA node identifier. + * @param cpus The number of CPUs. + * + * @return HCL_SUCCESS if the query is successful. + */ +int +getCpus( + const unsigned int numaNode, + std::vector& cpus +); + +/*-----------------------------------------------------------*/ + +/** + * Returns the number of CPU NUMA nodes. + * + * @param numNUMAs The number of CPU NUMA nodes. + * + * @return HCL_SUCCESS if the query is successful. + */ +int +getNumCpuNumaNodes( + unsigned int* numNUMAs +); + +/*-----------------------------------------------------------*/ + +/** + * Returns the CPU NUMA node that has the cpu. + * + * @param cpu The cpu. + * @param numaNode The numa node containing the cpu returned. + * + * @return HCL_SUCCESS if the query is successful. + */ +int +getCpuNumaNode( + const unsigned int cpu, + unsigned int* numaNode +); + +/*-----------------------------------------------------------*/ + +} + +} + +/*-----------------------------------------------------------*/ + +#endif /*_HCL_CPUTOPOLOGY_HPP_ */ + +/*-------------------------------------------------------------------------*/ + diff --git a/hdpi/CMakeLists.txt b/hdpi/CMakeLists.txt new file mode 100644 index 0000000..965aa97 --- /dev/null +++ b/hdpi/CMakeLists.txt @@ -0,0 +1,44 @@ + +#-----------------------------------------------------------# + +cmake_minimum_required(VERSION 2.8 FATAL_ERROR) + +#-----------------------------------------------------------# + +project(HDPI) + +#-----------------------------------------------------------# + +set (HEADERS hdpi.h + hmpi_err.h + hmpi_partitioning.h + hmpi_partitioning_internal.h + hmpi_partitioning_matrices.h + hmpi_partitioning_sets.h + hmpi_partitioning_graphs.h + hmpi_partitioning_trees.h + hmpi_partitioning_types.h) + +#-----------------------------------------------------------# + +set (SOURCES hmpi_partitioning_sets.c + hmpi_partitioning_sets_speed_function_of_problem_size.c + hmpi_partitioning_sets_speed_function_of_problem_size_weighted_elements.c + hmpi_partitioning_sets_speed_single_numbers_weighted_elements.c + hmpi_partitioning_graphs.c + hmpi_partitioning_matrices.c) + +#-----------------------------------------------------------# + +include_directories (${PROJECT_SOURCE_DIR}) + +#-----------------------------------------------------------# + +add_library(hdpi ${HEADERS} ${SOURCES}) + +#-----------------------------------------------------------# + +install(TARGETS hdpi DESTINATION lib) +install(FILES ${HEADERS} DESTINATION include) + +#-----------------------------------------------------------# diff --git a/hdpi/hdpi.h b/hdpi/hdpi.h new file mode 100644 index 0000000..2b3b594 --- /dev/null +++ b/hdpi/hdpi.h @@ -0,0 +1,64 @@ + + /**************************************************/ + /* HeteroDPI - Heterogeneous Data */ + /* Partitioning Interface */ + /* */ + /* Revision history */ + /* 19-05-2003 -- Initial version */ + /**************************************************/ + + #ifndef __HDPI_HH + #define __HDPI_HH + + #ifdef __cplusplus + extern "C" { + #endif + + + #include "hmpi_err.h" + #include "hmpi_partitioning.h" + + /* + * sets + */ + #define Partition_unordered_set HMPI_Partition_unordered_set + #define Partition_ordered_set HMPI_Partition_ordered_set + #define Get_set_processor HMPI_Get_set_processor + #define Get_my_partition HMPI_Get_my_partition + + /* + * matrices + */ + #define Partition_matrix_2d HMPI_Partition_matrix_2d + #define Partition_matrix_1d_dp HMPI_Partition_matrix_1d_dp + #define Partition_matrix_1d_iterative HMPI_Partition_matrix_1d_iterative + #define Partition_matrix_1d_refining HMPI_Partition_matrix_1d_refining + #define Get_matrix_processor HMPI_Get_matrix_processor + #define Get_processor_2d HMPI_Get_processor_2d + #define Get_processor_1d HMPI_Get_processor_1d + #define Print_rectangle_1d HMPI_Print_rectangle_1d + #define Print_rectangle_2d HMPI_Print_rectangle_2d + #define Common_height HMPI_Common_height + #define Get_my_width HMPI_Get_my_width + #define Get_my_height HMPI_Get_my_height + #define Get_diagonal HMPI_Get_diagonal + #define Get_my_elements HMPI_Get_my_elements + #define Get_my_kk_elements HMPI_Get_my_kk_elements + + /* + * graphs + */ + #define Partition_graph HMPI_Partition_graph + #define Partition_bipartite_graph HMPI_Partition_bipartite_graph + #define Partition_hypergraph HMPI_Partition_hypergraph + + /* + * trees + */ + #define Partition_tree HMPI_Partition_tree + + #ifdef __cplusplus + } + #endif + + #endif /* __HDPI_HH */ diff --git a/hdpi/hmpi_err.h b/hdpi/hmpi_err.h new file mode 100644 index 0000000..54911c5 --- /dev/null +++ b/hdpi/hmpi_err.h @@ -0,0 +1,58 @@ + +/************************************************************************* +* * +* HeteroMPI Programming Environment * +* ================================= * +* * +* Copyright (c) 2005 Department of Computer Science, * +* University College Dublin. * +* * +* All rights reserved. We assume no responsibility for the use * +* or reliability of our software. * +* * +*************************************************************************/ + + /************************************************/ + /* Error codes for the HeteroMPI Library */ + /* */ + /* Revision history */ + /* 01-02-2002 -- Initial version */ + /************************************************/ + + #ifndef __HMPI_ERR_HH + #define __HMPI_ERR_HH + + #define MPC_OK 0 + #define MPC_ERR_NOMEM 17 + #define MPC_ERR_LAST 36 + + /* + * HMPI success and error codes + */ + #define HMPI_OK MPC_OK + #define HMPI_SUCCESS MPC_OK + #define HMPI_ERR_NOMEM MPC_ERR_NOMEM + #define HMPI_NOT_MEMBER (MPC_ERR_LAST + 1) + #define HMPI_NULL_GROUP (MPC_ERR_LAST + 2) + #define HMPI_ERR_GROUP_NOT_EXIST (MPC_ERR_LAST + 3) + #define HMPI_INVALID_GROUP (MPC_ERR_LAST + 4) + #define HMPI_INVALID_PARAMS (MPC_ERR_LAST + 5) + #define HMPI_INVALID_OPERAND_TYPE (MPC_ERR_LAST + 6) + #define HMPI_INVALID_OPERATOR (MPC_ERR_LAST + 7) + #define HMPI_ERROR_CONDITION (MPC_ERR_LAST + 8) + #define HMPI_NOT_HOST (MPC_ERR_LAST + 9) + #define HMPI_NOT_HOST_AND_NOT_FREE (MPC_ERR_LAST + 10) + #define HMPI_ERR_INTERNAL (MPC_ERR_LAST + 11) + #define HMPI_ERR_PARTITION_SET (MPC_ERR_LAST + 12) + #define HMPI_ERR_MLIMITS (MPC_ERR_LAST + 13) + #define HMPI_ERR_INVALID_DIMP (MPC_ERR_LAST + 14) + #define HMPI_ERR_PARTITION_MATRIX (MPC_ERR_LAST + 15) + #define HMPI_ERR_PARTITION_NOT_EXISTS (MPC_ERR_LAST + 16) + #define HMPI_ERR_PARTITION_GRAPH (MPC_ERR_LAST + 17) + #define HMPI_ERR_PARTITION_TREE (MPC_ERR_LAST + 19) + #define HMPI_ERR_METRIC (MPC_ERR_LAST + 20) + #define HMPI_LAST_ERROR (MPC_ERR_LAST + 21) + + #define HMPI_UNDEFINED -1 + + #endif /* __HMPI_ERR_HH */ diff --git a/hdpi/hmpi_partitioning.h b/hdpi/hmpi_partitioning.h new file mode 100644 index 0000000..553d6a8 --- /dev/null +++ b/hdpi/hmpi_partitioning.h @@ -0,0 +1,33 @@ + +/************************************************************************* +* * +* Heterogeneous Data Partitioning Interface * +* ========================================= * +* * +* Copyright (c) 2002 Department of Computer Science, * +* University College Dublin. * +* * +* All rights reserved. We assume no responsibility for the use * +* or reliability of our software. * +* * +*************************************************************************/ + + /************************************************/ + /* hmpi_partitioning - Partitioning interfaces */ + /* for the HMPI Library */ + /* */ + /* Revision history */ + /* 19-05-2003 -- Initial version */ + /************************************************/ + + #ifndef __HMPI_PARTITIONING_HH + #define __HMPI_PARTITIONING_HH + + #include "hmpi_partitioning_types.h" + #include "hmpi_partitioning_sets.h" + #include "hmpi_partitioning_internal.h" + #include "hmpi_partitioning_matrices.h" + #include "hmpi_partitioning_graphs.h" + #include "hmpi_partitioning_trees.h" + + #endif /* __HMPI_PARTITIONING_HH */ diff --git a/hdpi/hmpi_partitioning_graphs.c b/hdpi/hmpi_partitioning_graphs.c new file mode 100644 index 0000000..10b6644 --- /dev/null +++ b/hdpi/hmpi_partitioning_graphs.c @@ -0,0 +1,1780 @@ + + /************************************************/ + /* Partitioning interfaces for graphs */ + /* */ + /* Revision history */ + /* 22-04-2004 -- Initial version */ + /************************************************/ + + #include + + #include + #include + #include + + /*-----------------------------------------------------*/ + + int HMPI_Partition_graph( + int p, + int pn, + const double *speeds, + const int *psizes, + const int *mlimits, + int n, + int m, + const int *vwgt, + const int *xadj, + const int *adjacency, + const int *adjwgt, + int *vp, + int *edgecut + ) + { + int i, j, rc; + + /* + * Use the partitioning interface for Set when + * edges have no weights. + */ + if ((vwgt == NULL) + && (xadj == NULL) + && (adjacency == NULL) + && (adjwgt == NULL + ) + ) + { + rc = HMPI_Partition_set( + p, + pn, + speeds, + psizes, + mlimits, + n, + NULL, + 0, + 1, + -1, + NULL, + NULL, + vp + ); + + if (rc != HMPI_OK) + { + return rc; + } + + *edgecut = 0; + + for (i = 0; i < n; i++) + { + int owner_processor = vp[i]; + + for (j = xadj[i]; j < xadj[i+1]; j++) + { + int neighbor = adjacency[j]; + + if (vp[neighbor] != owner_processor) + { + (*edgecut)++; + } + } + } + + return HMPI_OK; + } + + /* + * Partition the graph such that the edgecut is minimal + * Edgecut is the total number of edges that straddle + * partitions. + * There is no upper bound on the number of elements + * stored by each processor. + */ + if ((speeds == NULL) + && (mlimits == NULL) + && (vwgt == NULL) + && (adjwgt == NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that the edgecut is minimal + * Edgecut is the total number of edges that straddle + * partitions. + * There is an upper bound on the number of elements + * stored by each processor. + */ + if ((speeds == NULL) + && (mlimits != NULL) + && (vwgt == NULL) + && (adjwgt == NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that the edgecut is minimal + * Edgecut is the sum of the weight of the straddling edges. + * There is no upper bound on the number of elements + * stored by each processor. + */ + if ((speeds == NULL) + && (mlimits == NULL) + && (vwgt == NULL) + && (adjwgt != NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that the edgecut is minimal + * Edgecut is the sum of the weight of the straddling edges. + * There is an upper bound on the number of elements + * stored by each processor. + */ + if ((speeds == NULL) + && (mlimits != NULL) + && (vwgt == NULL) + && (adjwgt != NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that the edgecut is minimal + * Edgecut is the total number of edges that straddle + * partitions. + * Ideally all the partitions should be equally weighted. + * There is no upper bound on the number of elements + * stored by each processor. + */ + if ((speeds == NULL) + && (mlimits == NULL) + && (vwgt != NULL) + && (adjwgt == NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that the edgecut is minimal + * Edgecut is the total number of edges that straddle + * partitions. + * Ideally all the partitions should be equally weighted. + * There is a upper bound on the number of elements + * stored by each processor. + */ + if ((speeds == NULL) + && (mlimits != NULL) + && (vwgt != NULL) + && (adjwgt == NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that the edgecut is minimal + * Edgecut is the sum of the weight of the straddling edges. + * There is an upper bound on the number of elements + * stored by each processor. + */ + if ((speeds == NULL) + && (mlimits == NULL) + && (vwgt != NULL) + && (adjwgt != NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that the edgecut is minimal + * Edgecut is the sum of the weight of the straddling edges. + * Ideally all the partitions should be equally weighted. + * There is a upper bound on the number of elements + * stored by each processor. + */ + if ((speeds == NULL) + && (mlimits != NULL) + && (vwgt != NULL) + && (adjwgt != NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The number of vertices in each partition is proportional + * to the speed of the processor owning that partition. + * (b) The edgecut is minimal. Edgecut is the total number + * of edges that straddle partitions. + * There is no upper bound on the number of elements + * stored by each processor. + */ + if ((speeds != NULL) + && (pn == 1) + && (mlimits == NULL) + && (vwgt == NULL) + && (adjwgt == NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The number of vertices in each partition is proportional + * to the speed of the processor owning that partition. + * (b) The edgecut is minimal. + * There is an upper bound on the number of elements + * stored by each processor. + */ + if ((speeds != NULL) + && (pn == 1) + && (mlimits != NULL) + && (vwgt == NULL) + && (adjwgt == NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The number of vertices in each partition is proportional + * to the speed of the processor owning that partition. + * (b) The edgecut is minimal. + * There is no upper bound on the number of elements + * stored by each processor. + * Speeds of processors are functions of problem size. + */ + if ((speeds != NULL) + && (pn > 1) + && (mlimits == NULL) + && (vwgt == NULL) + && (adjwgt == NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The number of vertices in each partition is proportional + * to the speed of the processor owning that partition. + * (b) The edgecut is minimal. + * Speeds of processors are functions of problem size. + * There is an upper bound on the number of elements + * stored by each processor. + */ + if ((speeds != NULL) + && (pn > 1) + && (mlimits != NULL) + && (vwgt == NULL) + && (adjwgt == NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The number of vertices in each partition is + * proportional to the speed of the processor + * owning that partition. + * (b) The edgecut is minimal. Edgecut is the sum + * of the weights of the straddling edges. + * There is no upper bound on the number of elements + * stored by each processor. + */ + if ((speeds != NULL) + && (pn == 1) + && (mlimits == NULL) + && (vwgt == NULL) + && (adjwgt != NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The number of vertices in each partition is + * proportional to the speed of the processor + * owning that partition. + * (b) The edgecut is minimal. Edgecut is the sum + * of the weights of the straddling edges. + * There is an upper bound on the number of elements + * stored by each processor. + */ + if ((speeds != NULL) + && (pn == 1) + && (mlimits != NULL) + && (vwgt == NULL) + && (adjwgt != NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The number of vertices in each partition is + * proportional to the speed of the processor + * owning that partition. + * (b) The edgecut is minimal. Edgecut is the sum + * of the weights of the straddling edges. + * There is no upper bound on the number of elements + * stored by each processor. + * Speeds of processors are functions of problem size. + */ + if ((speeds != NULL) + && (pn > 1) + && (mlimits == NULL) + && (vwgt == NULL) + && (adjwgt != NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The number of vertices in each partition is + * proportional to the speed of the processor + * owning that partition. + * (b) The edgecut is minimal. Edgecut is the sum + * of the weights of the straddling edges. + * There is an upper bound on the number of elements + * stored by each processor. + * Speeds of processors are functions of problem size. + */ + if ((speeds != NULL) + && (pn > 1) + && (mlimits != NULL) + && (vwgt == NULL) + && (adjwgt != NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The sum of weights of vertices in each partition is + * proportional to the speed of the processor owning that partition. + * (b) The edgecut is minimal. + * There is no upper bound on the number of elements + * stored by each processor. + */ + if ((speeds != NULL) + && (pn == 1) + && (mlimits == NULL) + && (vwgt != NULL) + && (adjwgt == NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The sum of weights of vertices in each partition is + * proportional to the speed of the processor owning that partition. + * (b) The edgecut is minimal. + * There is an upper bound on the number of elements + * stored by each processor. + */ + if ((speeds != NULL) + && (pn == 1) + && (mlimits != NULL) + && (vwgt != NULL) + && (adjwgt == NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The sum of weights of vertices in each partition is + * proportional to the speed of the processor owning that partition. + * (b) The edgecut is minimal. + * There is no upper bound on the number of elements + * stored by each processor. + * Speeds of processors are functions of problem size. + */ + if ((speeds != NULL) + && (pn > 1) + && (mlimits == NULL) + && (vwgt != NULL) + && (adjwgt == NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The sum of weights of vertices in each partition is + * proportional to the speed of the processor owning that partition. + * (b) The edgecut is minimal. + * There is an upper bound on the number of elements + * stored by each processor. + * Speeds of processors are functions of problem size. + */ + if ((speeds != NULL) + && (pn > 1) + && (mlimits != NULL) + && (vwgt != NULL) + && (adjwgt == NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The sum of weights of vertices in each partition is + * proportional to the speed of the processor owning that partition. + * (b) The edgecut is minimal. + * There is no upper bound on the number of elements + * stored by each processor. + */ + if ((speeds != NULL) + && (pn == 1) + && (mlimits == NULL) + && (vwgt != NULL) + && (adjwgt != NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The sum of weights of vertices in each partition is + * proportional to the speed of the processor owning that partition. + * (b) The edgecut is minimal. + * There is an upper bound on the number of elements + * stored by each processor. + */ + if ((speeds != NULL) + && (pn == 1) + && (mlimits != NULL) + && (vwgt != NULL) + && (adjwgt != NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The sum of weights of vertices in each partition is + * proportional to the speed of the processor owning that partition. + * (b) The edgecut is minimal. + * speeds are functions of problem size. + * There is no upper bound on the number of elements + * stored by each processor. + */ + if ((speeds != NULL) + && (pn > 1) + && (mlimits == NULL) + && (vwgt != NULL) + && (adjwgt != NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The sum of weights of vertices in each partition is + * proportional to the speed of the processor owning that partition. + * (b) The edgecut is minimal. + * speeds are functions of problem size. + * There is an upper bound on the number of elements + * stored by each processor. + */ + if ((speeds != NULL) + && (pn > 1) + && (mlimits != NULL) + && (vwgt != NULL) + && (adjwgt != NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + printf("Parameters are erroneous\n"); + return HMPI_ERR_PARTITION_GRAPH; + } + + /*-----------------------------------------------------*/ + + int HMPI_Partition_bipartite_graph( + int p, + int pn, + const double *speeds, + const int *psizes, + const int *mlimits, + int n, + int m, + const int *vtype, + const int *vwgt, + const int *xadj, + const int *adjacency, + const int *adjwgt, + int type_of_partitioning, + int *vp, + int *edgecut + ) + { + int i, j, rc; + + /* + * Use the partitioning interface for Set when + * edges have no weights. And there is no + * adjacency matrix. The two disjoint subsets are + * not considered separately. + */ + if ((vwgt == NULL) + && (xadj == NULL) + && (adjacency == NULL) + && (adjwgt == NULL) + && (type_of_partitioning == PARTITION_OTHER + ) + ) + { + rc = HMPI_Partition_set( + p, + pn, + speeds, + psizes, + mlimits, + n, + NULL, + 0, + 1, + -1, + NULL, + NULL, + vp + ); + + if (rc != HMPI_OK) + { + return rc; + } + + *edgecut = 0; + + for (i = 0; i < n; i++) + { + int owner_processor = vp[i]; + + for (j = xadj[i]; j < xadj[i+1]; j++) + { + int neighbor = adjacency[j]; + + if (vp[neighbor] != owner_processor) + { + (*edgecut)++; + } + } + } + + return HMPI_OK; + } + + /* + * Use the partitioning interface for Set when + * edges have no weights. And there is no + * adjacency matrix. The two disjoint subsets are + * considered separately. + */ + if ((vwgt == NULL) + && (xadj == NULL) + && (adjacency == NULL) + && (adjwgt == NULL) + && (type_of_partitioning == PARTITION_SUBSET + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + if (type_of_partitioning == PARTITION_OTHER) + { + return HMPI_Partition_graph( + p, + pn, + speeds, + psizes, + mlimits, + n, + m, + vwgt, + xadj, + adjacency, + adjwgt, + vp, + edgecut + ); + } + + /* + * Partition the graph such that the edgecut is minimal + * Edgecut is the total number of edges that straddle + * partitions. + * There is no upper bound on the number of elements + * stored by each processor. + * The number of vertices in each partition in each subset + * should be the same. + */ + if ((speeds == NULL) + && (mlimits == NULL) + && (vwgt == NULL) + && (adjwgt == NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that the edgecut is minimal + * Edgecut is the total number of edges that straddle + * partitions. + * There is an upper bound on the number of elements + * stored by each processor. + * The number of vertices in each partition in each subset + * should be the same. + */ + if ((speeds == NULL) + && (mlimits != NULL) + && (vwgt == NULL) + && (adjwgt == NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that the edgecut is minimal + * Edgecut is the sum of the weight of the straddling edges. + * There is no upper bound on the number of elements + * stored by each processor. + * The number of vertices in each partition in each subset + * should be the same. + */ + if ((speeds == NULL) + && (mlimits == NULL) + && (vwgt == NULL) + && (adjwgt != NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that the edgecut is minimal + * Edgecut is the sum of the weight of the straddling edges. + * There is an upper bound on the number of elements + * stored by each processor. + * The number of vertices in each partition in each subset + * should be the same. + */ + if ((speeds == NULL) + && (mlimits != NULL) + && (vwgt == NULL) + && (adjwgt != NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that the edgecut is minimal + * Edgecut is the total number of edges that straddle + * partitions. + * Ideally all the partitions in each subset should + * be equally weighted. + * There is no upper bound on the number of elements + * stored by each processor. + */ + if ((speeds == NULL) + && (mlimits == NULL) + && (vwgt != NULL) + && (adjwgt == NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that the edgecut is minimal + * Edgecut is the total number of edges that straddle + * partitions. + * There is a upper bound on the number of elements + * stored by each processor. + * Ideally all the partitions in each subset should + * be equally weighted. + */ + if ((speeds == NULL) + && (mlimits != NULL) + && (vwgt != NULL) + && (adjwgt == NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that the edgecut is minimal + * Edgecut is the sum of the weight of the straddling edges. + * There is an upper bound on the number of elements + * stored by each processor. + * Ideally all the partitions in each subset should + * be equally weighted. + */ + if ((speeds == NULL) + && (mlimits == NULL) + && (vwgt != NULL) + && (adjwgt != NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that the edgecut is minimal + * Edgecut is the sum of the weight of the straddling edges. + * There is a upper bound on the number of elements + * stored by each processor. + * Ideally all the partitions in each subset should + * be equally weighted. + */ + if ((speeds == NULL) + && (mlimits != NULL) + && (vwgt != NULL) + && (adjwgt != NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The number of vertices in each partition in each subset + * is proportional to the speed of the processor + * owning that partition. + * (b) The edgecut is minimal. Edgecut is the total number + * of edges that straddle partitions. + * There is no upper bound on the number of elements + * stored by each processor. + */ + if ((speeds != NULL) + && (pn == 1) + && (mlimits == NULL) + && (vwgt == NULL) + && (adjwgt == NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The number of vertices in each partition in each subset + * is proportional to the speed of the processor + * owning that partition. + * (b) The edgecut is minimal. Edgecut is the total number + * of edges that straddle partitions. + * There is an upper bound on the number of elements + * stored by each processor. + */ + if ((speeds != NULL) + && (pn == 1) + && (mlimits != NULL) + && (vwgt == NULL) + && (adjwgt == NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The number of vertices in each partition in each subset + * is proportional to the speed of the processor + * owning that partition. + * (b) The edgecut is minimal. Edgecut is the total number + * of edges that straddle partitions. + * There is no upper bound on the number of elements + * stored by each processor. + * Speeds of processors are functions of problem size. + */ + if ((speeds != NULL) + && (pn > 1) + && (mlimits == NULL) + && (vwgt == NULL) + && (adjwgt == NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The number of vertices in each partition in each subset + * is proportional to the speed of the processor + * owning that partition. + * (b) The edgecut is minimal. Edgecut is the total number + * of edges that straddle partitions. + * Speeds of processors are functions of problem size. + * There is an upper bound on the number of elements + * stored by each processor. + */ + if ((speeds != NULL) + && (pn > 1) + && (mlimits != NULL) + && (vwgt == NULL) + && (adjwgt == NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The number of vertices in each partition in each subset + * is proportional to the speed of the processor + * owning that partition. + * (b) The edgecut is minimal. Edgecut is the sum + * of the weights of the straddling edges. + * There is no upper bound on the number of elements + * stored by each processor. + */ + if ((speeds != NULL) + && (pn == 1) + && (mlimits == NULL) + && (vwgt == NULL) + && (adjwgt != NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The number of vertices in each partition is + * in each subset is proportional to the speed + * of the processor owning that partition. + * (b) The edgecut is minimal. Edgecut is the sum + * of the weights of the straddling edges. + * There is an upper bound on the number of elements + * stored by each processor. + */ + if ((speeds != NULL) + && (pn == 1) + && (mlimits != NULL) + && (vwgt == NULL) + && (adjwgt != NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The number of vertices in each partition in each + * subset is proportional to the speed of the processor + * owning that partition. + * (b) The edgecut is minimal. Edgecut is the sum + * of the weights of the straddling edges. + * There is no upper bound on the number of elements + * stored by each processor. + * Speeds of processors are functions of problem size. + */ + if ((speeds != NULL) + && (pn > 1) + && (mlimits == NULL) + && (vwgt == NULL) + && (adjwgt != NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The number of vertices in each partition in each + * subset is proportional to the speed of the processor + * owning that partition. + * (b) The edgecut is minimal. Edgecut is the sum + * of the weights of the straddling edges. + * There is an upper bound on the number of elements + * stored by each processor. + * Speeds of processors are functions of problem size. + */ + if ((speeds != NULL) + && (pn > 1) + && (mlimits != NULL) + && (vwgt == NULL) + && (adjwgt != NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The sum of weights of vertices in each partition in each + * subset is proportional to the speed of the processor + * owning that partition. + * (b) The edgecut is minimal. + * There is no upper bound on the number of elements + * stored by each processor. + */ + if ((speeds != NULL) + && (pn == 1) + && (mlimits == NULL) + && (vwgt != NULL) + && (adjwgt == NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The sum of weights of vertices in each partition + * in each subset is proportional to the speed of + * the processor owning that partition. + * (b) The edgecut is minimal. + * There is an upper bound on the number of elements + * stored by each processor. + */ + if ((speeds != NULL) + && (pn == 1) + && (mlimits != NULL) + && (vwgt != NULL) + && (adjwgt == NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The sum of weights of vertices in each partition in each + * subset is proportional to the speed of the processor + * owning that partition. + * (b) The edgecut is minimal. + * There is no upper bound on the number of elements + * stored by each processor. + * Speeds of processors are functions of problem size. + */ + if ((speeds != NULL) + && (pn > 1) + && (mlimits == NULL) + && (vwgt != NULL) + && (adjwgt == NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The sum of weights of vertices in each partition in each + * subset is proportional to the speed of the processor + * owning that partition. + * (b) The edgecut is minimal. + * There is an upper bound on the number of elements + * stored by each processor. + * Speeds of processors are functions of problem size. + */ + if ((speeds != NULL) + && (pn > 1) + && (mlimits != NULL) + && (vwgt != NULL) + && (adjwgt == NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The sum of weights of vertices in each partition is + * proportional to the speed of the processor owning + * that partition. + * (b) The edgecut is minimal. + * There is no upper bound on the number of elements + * stored by each processor. + */ + if ((speeds != NULL) + && (pn == 1) + && (mlimits == NULL) + && (vwgt != NULL) + && (adjwgt != NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The sum of weights of vertices in each partition is + * proportional to the speed of the processor + * owning that partition. + * (b) The edgecut is minimal. + * There is an upper bound on the number of elements + * stored by each processor. + */ + if ((speeds != NULL) + && (pn == 1) + && (mlimits != NULL) + && (vwgt != NULL) + && (adjwgt != NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The sum of weights of vertices in each partition is + * proportional to the speed of the processor owning that partition. + * (b) The edgecut is minimal. + * speeds are functions of problem size. + * There is no upper bound on the number of elements + * stored by each processor. + */ + if ((speeds != NULL) + && (pn > 1) + && (mlimits == NULL) + && (vwgt != NULL) + && (adjwgt != NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The sum of weights of vertices in each partition is + * proportional to the speed of the processor owning that partition. + * (b) The edgecut is minimal. + * speeds are functions of problem size. + * There is an upper bound on the number of elements + * stored by each processor. + */ + if ((speeds != NULL) + && (pn > 1) + && (mlimits != NULL) + && (vwgt != NULL) + && (adjwgt != NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + printf("Parameters are erroneous\n"); + return HMPI_ERR_PARTITION_GRAPH; + } + + /*-----------------------------------------------------*/ + + /* + * There are two methods provided by hMETIS to partition + * a hypergraph. + * One using multilevel recursive bisection and the other + * using multilevel k-way partitioning + * hMETIS provides options to define the quality criteria + * that can be used for partitioning. + * We use default options for the present but however when + * hMETIS is integrated with HMPI, application programmers + * will be allowed to choose the options. + * + */ + int HMPI_Partition_hypergraph( + int p, + int pn, + const double *speeds, + const int *psizes, + const int *mlimits, + int nv, + int nedges, + const int *vwgt, + const int *hptr, + const int *hind, + const int *hwgt, + int *vp, + int *edgecut + ) + { + int i, j, rc; + + /* + * Use the partitioning interface for Set when + * edges have no weights. + */ + if ((vwgt == NULL) + && (hptr == NULL) + && (hind == NULL) + && (hwgt == NULL + ) + ) + { + rc = HMPI_Partition_set( + p, + pn, + speeds, + psizes, + mlimits, + nv, + NULL, + 0, + 1, + -1, + NULL, + NULL, + vp + ); + + if (rc != HMPI_OK) + { + return rc; + } + + *edgecut = 0; + + for (i = 0; i < nedges; i++) + { + int owner_processor, ix = 0; + int *vertex = (int*)malloc( + sizeof(int) + * + (hptr[i+1] - hptr[i]) + ); + + if (vertex == NULL) + { + return MPC_ERR_NOMEM; + } + + for (j = hptr[i]; j < hptr[i+1]; j++) + { + vertex[ix++] = hind[j]; + } + + owner_processor = vp[vertex[0]]; + + for (j = 1; j < (hptr[i+1] - hptr[i]); j++) + { + if (vp[vertex[j]] != owner_processor) + { + (*edgecut)++; + } + } + + free(vertex); + } + + return HMPI_OK; + } + + /* + * Partition the graph such that the edgecut is minimal + * Edgecut is the total number of edges that straddle + * partitions. + * There is no upper bound on the number of elements + * stored by each processor. + */ + if ((speeds == NULL) + && (mlimits == NULL) + && (vwgt == NULL) + && (hwgt == NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that the edgecut is minimal + * Edgecut is the total number of edges that straddle + * partitions. + * There is an upper bound on the number of elements + * stored by each processor. + */ + if ((speeds == NULL) + && (mlimits != NULL) + && (vwgt == NULL) + && (hwgt == NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that the edgecut is minimal + * Edgecut is the sum of the weight of the straddling edges. + * There is no upper bound on the number of elements + * stored by each processor. + */ + if ((speeds == NULL) + && (mlimits == NULL) + && (vwgt == NULL) + && (hwgt != NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that the edgecut is minimal + * Edgecut is the sum of the weight of the straddling edges. + * There is an upper bound on the number of elements + * stored by each processor. + */ + if ((speeds == NULL) + && (mlimits != NULL) + && (vwgt == NULL) + && (hwgt != NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that the edgecut is minimal + * Edgecut is the total number of edges that straddle + * partitions. + * Ideally all the partitions should be equally weighted. + * There is no upper bound on the number of elements + * stored by each processor. + */ + if ((speeds == NULL) + && (mlimits == NULL) + && (vwgt != NULL) + && (hwgt == NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that the edgecut is minimal + * Edgecut is the total number of edges that straddle + * partitions. + * Ideally all the partitions should be equally weighted. + * There is a upper bound on the number of elements + * stored by each processor. + */ + if ((speeds == NULL) + && (mlimits != NULL) + && (vwgt != NULL) + && (hwgt == NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that the edgecut is minimal + * Edgecut is the sum of the weight of the straddling edges. + * There is an upper bound on the number of elements + * stored by each processor. + */ + if ((speeds == NULL) + && (mlimits == NULL) + && (vwgt != NULL) + && (hwgt != NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that the edgecut is minimal + * Edgecut is the sum of the weight of the straddling edges. + * Ideally all the partitions should be equally weighted. + * There is a upper bound on the number of elements + * stored by each processor. + */ + if ((speeds == NULL) + && (mlimits != NULL) + && (vwgt != NULL) + && (hwgt != NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The number of vertices in each partition is proportional + * to the speed of the processor owning that partition. + * (b) The edgecut is minimal. Edgecut is the total number + * of edges that straddle partitions. + * There is no upper bound on the number of elements + * stored by each processor. + */ + if ((speeds != NULL) + && (pn == 1) + && (mlimits == NULL) + && (vwgt == NULL) + && (hwgt == NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The number of vertices in each partition is proportional + * to the speed of the processor owning that partition. + * (b) The edgecut is minimal. + * There is an upper bound on the number of elements + * stored by each processor. + */ + if ((speeds != NULL) + && (pn == 1) + && (mlimits != NULL) + && (vwgt == NULL) + && (hwgt == NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The number of vertices in each partition is proportional + * to the speed of the processor owning that partition. + * (b) The edgecut is minimal. + * There is no upper bound on the number of elements + * stored by each processor. + * Speeds of processors are functions of problem size. + */ + if ((speeds != NULL) + && (pn > 1) + && (mlimits == NULL) + && (vwgt == NULL) + && (hwgt == NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The number of vertices in each partition is proportional + * to the speed of the processor owning that partition. + * (b) The edgecut is minimal. + * Speeds of processors are functions of problem size. + * There is an upper bound on the number of elements + * stored by each processor. + */ + if ((speeds != NULL) + && (pn > 1) + && (mlimits != NULL) + && (vwgt == NULL) + && (hwgt == NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The number of vertices in each partition is + * proportional to the speed of the processor + * owning that partition. + * (b) The edgecut is minimal. Edgecut is the sum + * of the weights of the straddling edges. + * There is no upper bound on the number of elements + * stored by each processor. + */ + if ((speeds != NULL) + && (pn == 1) + && (mlimits == NULL) + && (vwgt == NULL) + && (hwgt != NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The number of vertices in each partition is + * proportional to the speed of the processor + * owning that partition. + * (b) The edgecut is minimal. Edgecut is the sum + * of the weights of the straddling edges. + * There is an upper bound on the number of elements + * stored by each processor. + */ + if ((speeds != NULL) + && (pn == 1) + && (mlimits != NULL) + && (vwgt == NULL) + && (hwgt != NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The number of vertices in each partition is + * proportional to the speed of the processor + * owning that partition. + * (b) The edgecut is minimal. Edgecut is the sum + * of the weights of the straddling edges. + * There is no upper bound on the number of elements + * stored by each processor. + * Speeds of processors are functions of problem size. + */ + if ((speeds != NULL) + && (pn > 1) + && (mlimits == NULL) + && (vwgt == NULL) + && (hwgt != NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The number of vertices in each partition is + * proportional to the speed of the processor + * owning that partition. + * (b) The edgecut is minimal. Edgecut is the sum + * of the weights of the straddling edges. + * There is an upper bound on the number of elements + * stored by each processor. + * Speeds of processors are functions of problem size. + */ + if ((speeds != NULL) + && (pn > 1) + && (mlimits != NULL) + && (vwgt == NULL) + && (hwgt != NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The sum of weights of vertices in each partition is + * proportional to the speed of the processor owning that partition. + * (b) The edgecut is minimal. + * There is no upper bound on the number of elements + * stored by each processor. + */ + if ((speeds != NULL) + && (pn == 1) + && (mlimits == NULL) + && (vwgt != NULL) + && (hwgt == NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The sum of weights of vertices in each partition is + * proportional to the speed of the processor owning that partition. + * (b) The edgecut is minimal. + * There is an upper bound on the number of elements + * stored by each processor. + */ + if ((speeds != NULL) + && (pn == 1) + && (mlimits != NULL) + && (vwgt != NULL) + && (hwgt == NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The sum of weights of vertices in each partition is + * proportional to the speed of the processor owning that partition. + * (b) The edgecut is minimal. + * There is no upper bound on the number of elements + * stored by each processor. + * Speeds of processors are functions of problem size. + */ + if ((speeds != NULL) + && (pn > 1) + && (mlimits == NULL) + && (vwgt != NULL) + && (hwgt == NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The sum of weights of vertices in each partition is + * proportional to the speed of the processor owning that partition. + * (b) The edgecut is minimal. + * There is an upper bound on the number of elements + * stored by each processor. + * Speeds of processors are functions of problem size. + */ + if ((speeds != NULL) + && (pn > 1) + && (mlimits != NULL) + && (vwgt != NULL) + && (hwgt == NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The sum of weights of vertices in each partition is + * proportional to the speed of the processor owning that partition. + * (b) The edgecut is minimal. + * There is no upper bound on the number of elements + * stored by each processor. + */ + if ((speeds != NULL) + && (pn == 1) + && (mlimits == NULL) + && (vwgt != NULL) + && (hwgt != NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The sum of weights of vertices in each partition is + * proportional to the speed of the processor owning that partition. + * (b) The edgecut is minimal. + * There is an upper bound on the number of elements + * stored by each processor. + */ + if ((speeds != NULL) + && (pn == 1) + && (mlimits != NULL) + && (vwgt != NULL) + && (hwgt != NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The sum of weights of vertices in each partition is + * proportional to the speed of the processor owning that partition. + * (b) The edgecut is minimal. + * speeds are functions of problem size. + * There is no upper bound on the number of elements + * stored by each processor. + */ + if ((speeds != NULL) + && (pn > 1) + && (mlimits == NULL) + && (vwgt != NULL) + && (hwgt != NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The sum of weights of vertices in each partition is + * proportional to the speed of the processor owning that partition. + * (b) The edgecut is minimal. + * speeds are functions of problem size. + * There is an upper bound on the number of elements + * stored by each processor. + */ + if ((speeds != NULL) + && (pn > 1) + && (mlimits != NULL) + && (vwgt != NULL) + && (hwgt != NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + printf("Parameters are erroneous\n"); + return HMPI_ERR_PARTITION_GRAPH; + } + + /*-----------------------------------------------------*/ diff --git a/hdpi/hmpi_partitioning_graphs.h b/hdpi/hmpi_partitioning_graphs.h new file mode 100644 index 0000000..4b51900 --- /dev/null +++ b/hdpi/hmpi_partitioning_graphs.h @@ -0,0 +1,78 @@ + +/************************************************************************* +* * +* Heterogeneous Data Partitioning Interface * +* ========================================= * +* * +* Copyright (c) 2002 Department of Computer Science, * +* University College Dublin. * +* * +* All rights reserved. We assume no responsibility for the use * +* or reliability of our software. * +* * +*************************************************************************/ + + /************************************************/ + /* Partitioning interfaces for graphs */ + /* */ + /* Revision history */ + /* 19-05-2003 -- Initial version */ + /************************************************/ + + #ifndef __HMPI_PARTITIONING_GRAPHS_HH + #define __HMPI_PARTITIONING_GRAPHS_HH + + #define PARTITION_SUBSET 1 + #define PARTITION_OTHER 2 + + int HMPI_Partition_graph( + int p, + int pn, + const double *speeds, + const int *psizes, + const int *mlimits, + int n, + int m, + const int *vwgt, + const int *xadj, + const int *adjacency, + const int *adjwgt, + int *vp, + int *edgecut + ); + + int HMPI_Partition_bipartite_graph( + int p, + int pn, + const double *speeds, + const int *psizes, + const int *mlimits, + int n, + int m, + const int *vtype, + const int *vwgt, + const int *xadj, + const int *adjacency, + const int *adjwgt, + int type_of_partitioning, + int *vp, + int *edgecut + ); + + int HMPI_Partition_hypergraph( + int p, + int pn, + const double *speeds, + const int *psizes, + const int *mlimits, + int nv, + int nedges, + const int *vwgt, + const int *hptr, + const int *hind, + const int *hwgt, + int *vp, + int *edgecut + ); + + #endif /* __HMPI_PARTITIONING_GRAPHS_HH */ diff --git a/hdpi/hmpi_partitioning_internal.h b/hdpi/hmpi_partitioning_internal.h new file mode 100644 index 0000000..7e31075 --- /dev/null +++ b/hdpi/hmpi_partitioning_internal.h @@ -0,0 +1,284 @@ +/************************************************************************* +* * +* Heterogeneous Data Partitioning Interface * +* ========================================= * +* * +* Copyright (c) 2002 Department of Computer Science, * +* University College Dublin. * +* * +* All rights reserved. We assume no responsibility for the use * +* or reliability of our software. * +* * +*************************************************************************/ + + /************************************************/ + /* partitioning internal interfaces */ + /* Revision history */ + /* 19-05-2003 -- Initial version */ + /************************************************/ + + #ifndef __HMPI_PARTITIONING_INTERNAL_HH + #define __HMPI_PARTITIONING_INTERNAL_HH + + int __HMPI_Homogeneous_distribution_with_mlimits + ( + int p, + int n, + const int* mlimits, + int *np + ); + + int __HMPI_Homogeneous_distribution_with_mlimits_and_weights_ordered_sets + ( + int p, + int n, + const int* mlimits, + const int* w, + int *np + ); + + int __HMPI_Partition_set_homogeneous + ( + int p, + const int *mlimits, + int n, + const int *w, + int ordering, + int processor_ordering, + int type_of_metric, + User_defined_metric umf, + double *metric, + int *np + ); + + int __HMPI_Speeds_are_single_numbers_with_mlimits + ( + int p, + const double *speeds, + const int *bounds, + int n, + int *np + ); + + int __HMPI_Number_of_elements_proportional_to_speed + ( + int p, + int n, + const double *speeds, + int *allocations + ); + + double __HMPI_System_defined_metric + ( + int p, + const double *speeds, + const int *actual, + const int *ideal + ); + + int __HMPI_Size_of_bins + ( + int p, + int n, + const double *speeds, + const int *w, + int *wallocations, + int *tsum + ); + + int __HMPI_Sum_of_weights_for_ordered_set + ( + int p, + int n, + const double *speeds, + const int *w, + int type_of_metric, + User_defined_metric umf, + double *metric, + int *np + ); + + int __HMPI_Apply_mlimits_to_ordered_sum_of_weights + ( + int p, + int n, + const double *speeds, + const int *mlimits, + const int *w, + int type_of_metric, + User_defined_metric umf, + double *metric, + int *np + ); + + int __HMPI_Sum_of_weights_for_nonordered_set + ( + int p, + int n, + const double *speeds, + const int *w, + int type_of_metric, + User_defined_metric umf, + double *metric, + int *np + ); + + int __HMPI_Apply_mlimits_to_unordered_sum_of_weights_algo_2 + ( + int p, + int n, + const double *speeds, + const int *mlimits, + const int *w, + int type_of_metric, + User_defined_metric umf, + double *metric, + int *np + ); + + int __HMPI_Apply_mlimits_to_unordered_sum_of_weights + ( + int p, + int n, + const double *speeds, + const int *mlimits, + const int *w, + int type_of_metric, + User_defined_metric umf, + double *metric, + int *np + ); + + int + __HMPI_Distribute_with_single_number_for_speed + ( + int n, + int p, + const double *s, + double *npd + ); + + int __HMPI_Recursive_bisection_middle_region + ( + int p, + int pn, + const double *speeds, + const int *psizes, + int n, + double slopei, + double slopef, + double *speeds_opt, + double *npd + ); + + int __HMPI_Speed_function_of_problem_size_with_mlimits + ( + int p, + int pn, + const double *speeds, + const int *psizes, + const int *bounds, + int n, + double *speeds_opt, + int *np + ); + + int __HMPI_Speed_function_of_problem_size + ( + int p, + int pn, + const double *speeds, + const int *psizes, + int n, + double *speeds_opt, + int *np + ); + + int __HMPI_Sum_of_weights_for_nonordered_set_speed_functions + ( + int p, + int pn, + const double *speeds, + const int *psizes, + int n, + const int *w, + int type_of_metric, + User_defined_metric umf, + double *metric, + int *np + ); + + int __HMPI_Sum_of_weights_for_nonordered_set_speed_functions_with_mlimits + ( + int p, + int pn, + const double *speeds, + const int *psizes, + const int *mlimits, + int n, + const int *w, + int type_of_metric, + User_defined_metric umf, + double *metric, + int *np + ); + + int __HMPI_Sum_of_weights_for_ordered_set_speed_functions + ( + int p, + int pn, + const double *speeds, + const int *psizes, + int n, + const int *w, + int type_of_metric, + User_defined_metric umf, + double *metric, + int *np + ); + + int __HMPI_Sum_of_weights_for_ordered_set_speed_functions_with_mlimits + ( + int p, + int pn, + const double *speeds, + const int *psizes, + const int *mlimits, + int n, + const int *w, + int type_of_metric, + User_defined_metric umf, + double *metric, + int *np + ); + + int __HMPI_Sum_of_weights_for_ordered_set_speed_functions_processor_reordering + ( + int p, + int pn, + const double *speeds, + const int *psizes, + int n, + const int *w, + int type_of_metric, + User_defined_metric umf, + double *metric, + int *np + ); + + int __HMPI_Sum_of_weights_for_ordered_set_speed_functions_processor_reordering_with_mlimits + ( + int p, + int pn, + const double *speeds, + const int *psizes, + const int *mlimits, + int n, + const int *w, + int type_of_metric, + User_defined_metric umf, + double *metric, + int *np + ); + + #endif diff --git a/hdpi/hmpi_partitioning_matrices.c b/hdpi/hmpi_partitioning_matrices.c new file mode 100644 index 0000000..66d2736 --- /dev/null +++ b/hdpi/hmpi_partitioning_matrices.c @@ -0,0 +1,5190 @@ + + /************************************************/ + /* Implementation of Partitioning interfaces */ + /* for matrices */ + /* */ + /* Revision history */ + /* 23-05-2003 -- Initial version */ + /************************************************/ + + #include + #include + #include + + #include + #include + + static int HMPI_Debug_flag = 0; + + #ifndef min + #define min(x, y) ((x < y) ? x : y) + #endif + + /*-----------------------------------------------------*/ + + int HMPI_Create_rectangles_1d_recursive( + int p, + const int *row_np, + const int *column_np, + int *w, + int *h, + int *trow, + int *tcolumn + ) + { + } + + /*-----------------------------------------------------*/ + + int HMPI_Create_rectangles_1d( + int p, + int q, + int m, + int n, + const int *row_np, + const int *column_np, + int *w, + int *h, + int *trow, + int *tcolumn, + int *ci + ) + { + int i, j, k, x, y; + + for (i = 0; i < p; i++) + { + for (j = 0; j < q; j++) + { + w[HMPI_RECT_INDEX(i, j, i, j, p, q)] = column_np[i*q+j]; + } + } + + for (i = 0; i < p; i++) + { + for (j = 0; j < q; j++) + { + h[HMPI_RECT_INDEX(i, j, i, j, p, q)] = row_np[i*q+j]; + } + } + + for (i = 0; i < p; i++) + { + for (j = 0; j < q; j++) + { + trow[i*q+j] = 0; + for (k = 0; k < i; k++) + { + trow[i*q+j] += h[HMPI_RECT_INDEX(k, j, k, j, p, q)]; + } + } + } + + for (j = 0; j < q; j++) + { + for (i = 0; i < p; i++) + { + tcolumn[i*q+j] = 0; + for (k = 0; k < j; k++) + { + tcolumn[i*q+j] += w[HMPI_RECT_INDEX(i, k, i, k, p, q)]; + } + } + } + + for (i = 0; i < p; i++) + { + for (j = 0; j < q; j++) + { + for (x = 0; x < p; x++) + { + for (y = 0; y < q; y++) + { + int width = HMPI_Common_height( + tcolumn[i*q+j], + (tcolumn[i*q+j] + + + w[HMPI_RECT_INDEX(i, j, i, j, p, q)] + ), + tcolumn[x*q+y], + (tcolumn[x*q+y] + + + w[HMPI_RECT_INDEX(x, y, x, y, p, q)] + ) + ); + + w[HMPI_RECT_INDEX(i, j, x, y, p, q)] = width; + } + } + } + } + + for (i = 0; i < p; i++) + { + for (j = 0; j < q; j++) + { + for (x = 0; x < p; x++) + { + for (y = 0; y < q; y++) + { + int height = HMPI_Common_height( + trow[i*q+j], + (trow[i*q+j] + + + h[HMPI_RECT_INDEX(i, j, i, j, p, q)] + ), + trow[x*q+y], + (trow[x*q+y] + + + h[HMPI_RECT_INDEX(x, y, x, y, p, q)] + ) + ); + + h[HMPI_RECT_INDEX(i, j, x, y, p, q)] = height; + } + } + } + } + + if (ci == NULL) + { + return HMPI_OK; + } + + for (x = 0; x < p; x++) + { + for (y = 0; y < q; y++) + { + int hi = h[HMPI_RECT_INDEX(x, y, x, y, p, q)]; + int wi = w[HMPI_RECT_INDEX(x, y, x, y, p, q)]; + int tr = trow[x*q+y]; + int tc = tcolumn[x*q+y]; + + for (i = 0; i < hi; i++) + { + for (j = 0; j < wi; j++) + { + ci[(tr+i)*n+tc+j] = x*q+y; + } + } + } + } + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int HMPI_Create_rectangles_2d + ( + int p, + int q, + int m, + int n, + const int *row_np, + const int *column_np, + int *w, + int *h, + int *trow, + int *tcolumn, + int *ci, + int *cj + ) + { + int i, j, k, x, y; + + for (i = 0; i < p; i++) + { + for (j = 0; j < q; j++) + { + w[HMPI_RECT_INDEX(i, j, i, j, p, q)] = column_np[i*q+j]; + } + } + + for (i = 0; i < p; i++) + { + for (j = 0; j < q; j++) + { + h[HMPI_RECT_INDEX(i, j, i, j, p, q)] = row_np[i*q+j]; + } + } + + for (i = 0; i < p; i++) + { + for (j = 0; j < q; j++) + { + trow[i*q+j] = 0; + for (k = 0; k < i; k++) + { + trow[i*q+j] += h[HMPI_RECT_INDEX(k, j, k, j, p, q)]; + } + } + } + + for (j = 0; j < q; j++) + { + for (i = 0; i < p; i++) + { + tcolumn[i*q+j] = 0; + for (k = 0; k < j; k++) + { + tcolumn[i*q+j] += w[HMPI_RECT_INDEX(i, k, i, k, p, q)]; + } + } + } + + for (i = 0; i < p; i++) + { + for (j = 0; j < q; j++) + { + for (x = 0; x < p; x++) + { + for (y = 0; y < q; y++) + { + int width = HMPI_Common_height( + tcolumn[i*q+j], + (tcolumn[i*q+j] + + + w[HMPI_RECT_INDEX(i, j, i, j, p, q)] + ), + tcolumn[x*q+y], + (tcolumn[x*q+y] + + + w[HMPI_RECT_INDEX(x, y, x, y, p, q)] + ) + ); + + w[HMPI_RECT_INDEX(i, j, x, y, p, q)] = width; + } + } + } + } + + for (i = 0; i < p; i++) + { + for (j = 0; j < q; j++) + { + for (x = 0; x < p; x++) + { + for (y = 0; y < q; y++) + { + int height = HMPI_Common_height( + trow[i*q+j], + (trow[i*q+j] + + + h[HMPI_RECT_INDEX(i, j, i, j, p, q)] + ), + trow[x*q+y], + (trow[x*q+y] + + + h[HMPI_RECT_INDEX(x, y, x, y, p, q)] + ) + ); + + h[HMPI_RECT_INDEX(i, j, x, y, p, q)] = height; + } + } + } + } + + if ((ci == NULL) + && (cj == NULL + ) + ) + { + return HMPI_OK; + } + + for (x = 0; x < p; x++) + { + for (y = 0; y < q; y++) + { + int hi = h[HMPI_RECT_INDEX(x, y, x, y, p, q)]; + int wi = w[HMPI_RECT_INDEX(x, y, x, y, p, q)]; + int tr = trow[x*q+y]; + int tc = tcolumn[x*q+y]; + + for (i = 0; i < hi; i++) + { + for (j = 0; j < wi; j++) + { + ci[(tr+i)*n+tc+j] = x; + cj[(tr+i)*n+tc+j] = y; + } + } + } + } + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int _HMPI_Factor( + int divisor, + int* quotient, + int **factors, + int *numf, + int *max_n_of_factors + ) + { + int i; + + while (((*quotient)%divisor) == 0) + { + (*factors)[(*numf)++] = divisor; + + /* + * Copy and enlarge the array + */ + if ((*numf) >= (*max_n_of_factors)) + { + int tempmax; + int *tempf = (int*)malloc( + sizeof(int) + * + (*max_n_of_factors) + ); + + if (tempf == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < (*max_n_of_factors); i++) + { + tempf[i] = (*factors)[i]; + } + + free(factors[0]); + + tempmax = (*max_n_of_factors); + (*max_n_of_factors) *= 2; + + factors[0] = (int*)malloc( + sizeof(int) + * + (*max_n_of_factors) + ); + + if (factors[0] == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < tempmax; i++) + { + (*factors)[i] = tempf[i]; + } + + free(tempf); + } + + (*quotient) /= divisor; + } + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int _HMPI_Get_factors( + int n, + int *numf, + int **result + ) + { + int rc; + int quotient = n; + int divisor, maxDivisor; + int max_n_of_factors = 2; + + *numf = 0; + + result[0] = (int*)malloc( + sizeof(int) + * + max_n_of_factors + ); + + if (result[0] == NULL) + { + return MPC_ERR_NOMEM; + } + + // + // Try special cases of 2 and 3 + rc = _HMPI_Factor( + 2, + "ient, + result, + numf, + &max_n_of_factors + ); + + if (rc != HMPI_OK) + { + return rc; + } + + rc = _HMPI_Factor( + 3, + "ient, + result, + numf, + &max_n_of_factors + ); + + if (rc != HMPI_OK) + { + return rc; + } + + // + // Try pairs of the form 6m-1 and 6m+1 + // (i.e. 5, 7, 11, 13, 17, 19, . .) + maxDivisor = sqrt(quotient); + for (divisor = 5; divisor <= maxDivisor; divisor+=6) + { + rc = _HMPI_Factor( + divisor, + "ient, + result, + numf, + &max_n_of_factors + ); + + if (rc != HMPI_OK) + { + return rc; + } + + rc = _HMPI_Factor( + divisor+2, + "ient, + result, + numf, + &max_n_of_factors + ); + + if (rc != HMPI_OK) + { + return rc; + } + } + + // store final factor + if (quotient > 1) + { + (*result)[(*numf)++] = quotient; + } + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int HMPI_Homogeneous_matrix_1d_no_mlimits + ( + int p, + int m, + int n, + int *w, + int *h, + int *trow, + int *tcol, + int *ci + ) + { + int *row_np, *column_np; + + /* + * This could be a open problem. + * The first question is can we use a + * 1D array as a logical 2D array of 1*p + * processes or p*1 processes. + * If not, what grid to use. Does it matter. + * We find all factors of p and form a + * grid. May not be a optimal one. + */ + int i, j; + int f1 = 1; + int f2 = 1; + + f1 = sqrt(p); + f2 = sqrt(p); + + if ((f1*f2) != p) + { + int numf, rc; + int **result = (int**)malloc( + sizeof(int*) + ); + + if (result == NULL) + { + return MPC_ERR_NOMEM; + } + + rc = _HMPI_Get_factors( + p, + &numf, + result + ); + + if (rc != HMPI_OK) + { + return rc; + } + + for (f1 = 1, j = 0; j < numf; j+=2) + { + f1 *= (*result)[j]; + } + + for (f2 = 1, j = 1; j < numf; j+=2) + { + f2 *= (*result)[j]; + } + + free(result[0]); + free(result); + } + + if (HMPI_Debug_flag) + { + printf( + "HMPI===> HMPI_Homogeneous_matrix_1d_no_mlimits: 1d array %d converted to 2d grid" + " (%d,%d)\n", + p, + f1, + f2 + ); + } + + row_np = (int*)malloc( + sizeof(int) + * + (f1*f2) + ); + + if (row_np == NULL) + { + return MPC_ERR_NOMEM; + } + + column_np = (int*)malloc( + sizeof(int) + * + (f1*f2) + ); + + if (column_np == NULL) + { + return MPC_ERR_NOMEM; + } + + if ((m < f1) + && (n < f2 + ) + ) + { + for (i = 0; i < m; i++) + { + for (j = 0; j < f2; j++) + { + row_np[i*f2+ j] = 1; + } + } + + for (i = m; i < f1; i++) + { + for (j = 0; j < f2; j++) + { + row_np[i*f2+ j] = 0; + } + } + + for (i = 0; i < n; i++) + { + for (j = 0; j < f1; j++) + { + column_np[i*f1 + j] = 1; + } + } + + for (i = n; i < f2; i++) + { + for (j = 0; j < f1; j++) + { + column_np[i*f1 + j] = 0; + } + } + } + + if ((m < f1) + && (n >= f2 + ) + ) + { + for (i = 0; i < m; i++) + { + for (j = 0; j < f2; j++) + { + row_np[i*f2+ j] = 1; + } + } + + for (i = m; i < f1; i++) + { + for (j = 0; j < f2; j++) + { + row_np[i*f2+ j] = 0; + } + } + + for (i = 0; i < f2; i++) + { + for (j = 0; j < m; j++) + { + column_np[i + j*f2] = n/f2; + } + + for (j = m; j < f1; j++) + { + column_np[i + j*f2] = 0; + } + } + + for (i = 0; i < m; i++) + { + column_np[i*f2] += n%f2; + } + } + + if ((m >= f1) + && (n >= f2 + ) + ) + { + for (i = 0; i < f1; i++) + { + for (j = 0; j < f2; j++) + { + row_np[i*f2+ j] = m/f1; + } + } + + for (i = 0; i < f2; i++) + { + row_np[i] += m%f1; + } + + for (i = 0; i < f2; i++) + { + for (j = 0; j < f1; j++) + { + column_np[i + j*f2] = n/f2; + } + } + + for (i = 0; i < f1; i++) + { + column_np[i*f2] += n%f2; + } + } + + if ((m >= f1) + && (n < f2 + ) + ) + { + for (i = 0; i < n; i++) + { + for (j = 0; j < f1; j++) + { + column_np[i*f1 + j] = 1; + } + } + + for (i = n; i < f2; i++) + { + for (j = 0; j < f1; j++) + { + column_np[i*f1 + j] = 0; + } + } + + for (i = 0; i < f1; i++) + { + for (j = 0; j < n; j++) + { + row_np[i*f2+ j] = m/f1; + } + + for (j = n; j < f2; j++) + { + row_np[i*f2+ j] = 0; + } + } + + for (i = 0; i < n; i++) + { + row_np[i] += m%f1; + } + } + + HMPI_Create_rectangles_1d( + f1, + f2, + m, + n, + row_np, + column_np, + w, + h, + trow, + tcol, + ci + ); + + free(row_np); + free(column_np); + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int HMPI_Homogeneous_matrix_2d_no_mlimits + ( + int p, + int q, + int m, + int n, + int *w, + int *h, + int *trow, + int *tcol, + int *ci, + int *cj + ) + { + int i, j, rc; + int *row_np, *column_np; + + row_np = (int*)malloc( + sizeof(int) + * + (p*q) + ); + + if (row_np == NULL) + { + return MPC_ERR_NOMEM; + } + + column_np = (int*)malloc( + sizeof(int) + * + (p*q) + ); + + if (column_np == NULL) + { + return MPC_ERR_NOMEM; + } + + if ((m < p) + && (n < q + ) + ) + { + for (i = 0; i < m; i++) + { + for (j = 0; j < q; j++) + { + row_np[i*q+ j] = 1; + } + } + + for (i = m; i < p; i++) + { + for (j = 0; j < q; j++) + { + row_np[i*q+ j] = 0; + } + } + + for (i = 0; i < n; i++) + { + for (j = 0; j < p; j++) + { + column_np[i*p + j] = 1; + } + } + + for (i = n; i < q; i++) + { + for (j = 0; j < p; j++) + { + column_np[i*p + j] = 0; + } + } + } + + if ((m < p) + && (n >= q + ) + ) + { + for (i = 0; i < m; i++) + { + for (j = 0; j < q; j++) + { + row_np[i*q+ j] = 1; + } + } + + for (i = m; i < p; i++) + { + for (j = 0; j < q; j++) + { + row_np[i*q+ j] = 0; + } + } + + for (i = 0; i < q; i++) + { + for (j = 0; j < m; j++) + { + column_np[i + j*q] = n/q; + } + + for (j = m; j < p; j++) + { + column_np[i + j*q] = 0; + } + } + + for (i = 0; i < m; i++) + { + column_np[i*q] += n%q; + } + } + + if ((m >= p) + && (n >= q + ) + ) + { + for (i = 0; i < p; i++) + { + for (j = 0; j < q; j++) + { + row_np[i*q+ j] = m/p; + } + } + + for (i = 0; i < (m%p); i++) + { + for (j = 0; j < q; j++) + { + row_np[i*q+ j] += 1; + } + } + + for (i = 0; i < q; i++) + { + for (j = 0; j < p; j++) + { + column_np[i + j*q] = n/q; + } + } + + for (i = 0; i < (n%q); i++) + { + for (j = 0; j < p; j++) + { + column_np[i + j*q] += 1; + } + } + } + + if ((m >= p) + && (n < q + ) + ) + { + for (i = 0; i < n; i++) + { + for (j = 0; j < p; j++) + { + column_np[i*p + j] = 1; + } + } + + for (i = n; i < q; i++) + { + for (j = 0; j < p; j++) + { + column_np[i*p + j] = 0; + } + } + + for (i = 0; i < p; i++) + { + for (j = 0; j < n; j++) + { + row_np[i*q+ j] = m/p; + } + + for (j = n; j < q; j++) + { + row_np[i*q+ j] = 0; + } + } + + for (i = 0; i < n; i++) + { + row_np[i] += m%p; + } + } + + HMPI_Create_rectangles_2d( + p, + q, + m, + n, + row_np, + column_np, + w, + h, + trow, + tcol, + ci, + cj + ); + + free(row_np); + free(column_np); + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int HMPI_Matrix_cq_recursive_bisection + ( + int p, + const double *speeds, + const int *mlimits, + int dimension, + int m, + int n, + int *w, + int *h, + int *trow, + int *tcol + ) + { + int i, j, rc, dimension_size; + int total_limits = 0; + double tspeeds = 0.0; + double first_half_speeds = 0.0; + int tmlimits = 0; + int first_half_mlimits = 0.0; + int second_half_mlimits = 0.0; + double first_half_allocation; + double second_half_allocation; + int H = 0; + int V = 1; + + if (mlimits != NULL) + { + for (i = 0; i < p; i++) + { + total_limits += mlimits[i]; + } + + if (total_limits < (m*n)) + { + printf( + "==>HMPI: Problems applying the limits during " + " matrix partitioning using RECURSIVE " + "one-dimensional distribution\n" + ); + + return HMPI_ERR_MLIMITS; + } + } + + if (p == 1) + { + w[0] = n; + h[0] = m; + trow[0] = 0; + tcol[0] = 0; + + return HMPI_OK; + } + + for (i = 0; i < p; i++) + { + if (i < (p/2)) + { + first_half_speeds += speeds[i]; + first_half_mlimits += mlimits[i]; + } + + tspeeds += speeds[i]; + tmlimits += mlimits[i]; + } + + if (dimension == H) + { + dimension_size = m; + first_half_mlimits = (first_half_mlimits)/n; + second_half_mlimits = (tmlimits/n) - first_half_mlimits; + } + + if (dimension == V) + { + dimension_size = n; + first_half_mlimits = (first_half_mlimits)/m; + second_half_mlimits = (tmlimits/m) - first_half_mlimits; + } + + { + int allocations[2]; + double speedsr[] = { + first_half_speeds, + (tspeeds - first_half_speeds) + }; + int mlimitsr[] = { + first_half_mlimits, + second_half_mlimits + }; + + rc = HMPI_Partition_set( + 2, + 1, + speedsr, + NULL, + mlimitsr, + dimension_size, + NULL, + 1, + 0, + -1, + NULL, + NULL, + allocations + ); + + if (rc == HMPI_ERR_PARTITION_SET) + { + printf( + "==>HMPI: Problems partitioning the matrix " + " using RECURSIVE one-dimensional distribution\n" + ); + return HMPI_ERR_PARTITION_MATRIX; + } + + if (rc == HMPI_ERR_MLIMITS) + { + printf( + "==>HMPI: Problems applying the limits during " + " matrix partitioning using RECURSIVE " + "one-dimensional distribution\n" + ); + return HMPI_ERR_MLIMITS; + } + + first_half_allocation = allocations[0]; + second_half_allocation = allocations[1]; + } + + if (p == 2) + { + if (dimension == H) + { + w[0] = n; + h[0] = first_half_allocation; + trow[0] = 0; + tcol[0] = 0; + + w[1] = n; + h[1] = second_half_allocation; + trow[0] = first_half_allocation; + tcol[0] = 0; + } + + if (dimension == V) + { + w[0] = first_half_allocation; + h[0] = m; + trow[0] = 0; + tcol[0] = 0; + + w[1] = second_half_allocation; + h[1] = m; + trow[0] = 0; + tcol[0] = first_half_allocation; + + } + + return HMPI_OK; + } + + { + int dimensionr; + int partitionm; + int partitionn; + + if (dimension == H) + { + dimensionr = V; + } + + if (dimension == V) + { + dimensionr = H; + } + + if (dimension == H) + { + partitionm = first_half_allocation; + partitionn = n; + } + + if (dimension == V) + { + partitionm = m; + partitionn = first_half_allocation; + } + + rc = HMPI_Matrix_cq_recursive_bisection( + (p/2), + speeds, + mlimits, + dimension, + partitionm, + partitionn, + w, + h, + trow, + tcol + ); + + if (rc != HMPI_OK) + { + return rc; + } + + for (i = 0; i < (p/2); i++) + { + trow[i] = 0; + tcol[i] = 0; + + for (j = 0; j < i; j++) + { + trow[i] += h[j]; + tcol[i] += w[j]; + } + } + + if (dimension == H) + { + partitionm = second_half_allocation; + partitionn = n; + } + + if (dimension == V) + { + partitionm = m; + partitionn = second_half_allocation; + } + + rc = HMPI_Matrix_cq_recursive_bisection( + p - (p/2), + speeds + (p/2), + mlimits + (p/2), + dimension, + partitionm, + partitionn, + w + (p/2), + h + (p/2), + trow + (p/2), + tcol + (p/2) + ); + + if (rc != HMPI_OK) + { + return rc; + } + + for (i = 0; i < (p - (p/2)); i++) + { + if (dimension == H) + { + trow[(p/2) + i] = first_half_allocation; + tcol[(p/2) + i] = 0; + } + + if (dimension == V) + { + trow[(p/2) + i] = 0; + tcol[(p/2) + i] = first_half_allocation; + } + + for (j = 0; j < i; j++) + { + trow[(p/2) + i] += h[j]; + tcol[(p/2) + i] += w[j]; + } + } + } + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int HMPI_Matrix_one_dimensional_recursive + ( + int p, + const double *speeds, + const int *mlimits, + int m, + int n, + int *w, + int *h, + int *trow, + int *tcol + ) + { + int rc; + int H = 0; + int V = 1; + + /* + * Use the Orthogonal recursive bisection scheme of + * Crandall and Quinn. + */ + int *wp, *hp; + + wp = (int*)malloc( + sizeof(int) + * + p + ); + + hp = (int*)malloc( + sizeof(int) + * + p + ); + + rc = HMPI_Matrix_cq_recursive_bisection( + p, + speeds, + mlimits, + V, + m, + n, + wp, + hp, + trow, + tcol + ); + + if (rc != HMPI_OK) + { + return rc; + } + + HMPI_Create_rectangles_1d_recursive( + p, + wp, + hp, + w, + h, + trow, + tcol + ); + + free(wp); + free(hp); + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int HMPI_Common_height + ( + int top_row_1, + int bottom_row_1, + int top_row_2, + int bottom_row_2 + ) + { + if ((top_row_1 == 0) + && (bottom_row_1 == 0 + ) + ) + { + return 0; + } + + if ((top_row_2 == 0) + && (bottom_row_2 == 0 + ) + ) + { + return 0; + } + + /* + * One area contains the other + */ + if ((top_row_1 >= top_row_2) + && (bottom_row_1 <= bottom_row_2) + ) + { + return (bottom_row_1 - top_row_1); + } + + if ((top_row_1 <= top_row_2) + && (bottom_row_1 >= bottom_row_2) + ) + { + return (bottom_row_2 - top_row_2); + } + + /* + * One area is followed or preceded by another + * with an overlap + */ + if ((top_row_1 <= top_row_2) + && (bottom_row_1 >= top_row_2) + && (bottom_row_1 <= bottom_row_2) + ) + { + return (bottom_row_1 - top_row_2); + } + + if ((top_row_1 >= top_row_2) + && (top_row_1 <= bottom_row_2) + && (bottom_row_1 >= bottom_row_2) + ) + { + return (bottom_row_2 - top_row_1); + } + + /* + * There is no overlap + */ + if ((bottom_row_1 < top_row_2) + || (top_row_1 > bottom_row_2) + ) + { + return 0; + } + + if ((top_row_1 < top_row_2) + && (bottom_row_1 < bottom_row_2) + ) + { + return 0; + } + + if ((top_row_1 > top_row_2) + && (bottom_row_1 > bottom_row_2) + ) + { + return 0; + } + + return 0; + } + + /*-----------------------------------------------------*/ + + void HMPI_Create_rectangles_1d_column_based + ( + int p, + int number_of_columns, + int *rectangles_in_each_column, + const double *areas, + int m, + int n, + int *w, + int *h, + int *trow, + int *tcolumn + ) + { + int i, j, ix = 0; + + for (i = 0; i < number_of_columns; i++) + { + int width = 0; + + for (j = 0; j < rectangles_in_each_column[i]; j++) + { + width += areas[p - j - 1 - ix]; + } + + for (j = 0; j < rectangles_in_each_column[i]; j++) + { + w[p - j - 1 - ix] = width*n; + h[p - j - 1 - ix] = (areas[p - j - 1 - ix]/width)*m; + } + + ix += rectangles_in_each_column[i]; + } + + /* + * Adjust the parameters w & h + * TBD + */ + return; + } + + /*-----------------------------------------------------*/ + + int HMPI_2d_column_based_speeds_single_numbers_with_mlimits + ( + int p, + int q, + const double *speeds, + const int *mlimits, + int m, + int n, + int *w, + int *h, + int *trow, + int *tcol, + int *ci, + int *cj + ) + { + /* + * TBD + */ + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int HMPI_2d_row_based_speeds_single_numbers_no_mlimits + ( + int p, + int q, + const double *speeds, + int m, + int n, + int *w, + int *h, + int *trow, + int *tcol, + int *ci, + int *cj + ) + { + int i, j, rc; + double *row_speed_sums; + int *row_np, *row_np_sub; + int *column_np; + + row_speed_sums = (double*)malloc( + sizeof(double) + * + p + ); + + if (row_speed_sums == NULL) + { + return MPC_ERR_NOMEM; + } + + row_np = (int*)malloc( + sizeof(int) + * + (p*q) + ); + + if (row_np == NULL) + { + return MPC_ERR_NOMEM; + } + + column_np = (int*)malloc( + sizeof(int) + * + (p*q) + ); + + if (column_np == NULL) + { + return MPC_ERR_NOMEM; + } + + row_np_sub = (int*)malloc( + sizeof(int) + * + p + ); + + if (row_np_sub == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < p; i++) + { + row_speed_sums[i] = 0.0; + for (j = 0; j < q; j++) + { + row_speed_sums[i] += speeds[i*q+j]; + } + } + + /* + * Partition of the row dimension among + * p processors + */ + rc = HMPI_Partition_set( + p, + 1, + row_speed_sums, + NULL, + NULL, + m, + NULL, + 0, + 0, + -1, + NULL, + NULL, + row_np_sub + ); + + if (rc == HMPI_ERR_PARTITION_SET) + { + return HMPI_ERR_PARTITION_MATRIX; + } + + if (rc == HMPI_ERR_MLIMITS) + { + return HMPI_ERR_MLIMITS; + } + + if (rc != HMPI_OK) + { + return rc; + } + + for (i = 0; i < p; i++) + { + for (j = 0; j < q; j++) + { + row_np[i*q+j] = row_np_sub[i]; + } + } + + free(row_speed_sums); + free(row_np_sub); + + for (i = 0; i < p; i++) + { + double *column_speed_sums; + int *column_np_sub; + + column_speed_sums = (double*)malloc( + sizeof(double) + * + q + ); + + if (column_speed_sums == NULL) + { + return MPC_ERR_NOMEM; + } + + column_np_sub = (int*)malloc( + sizeof(int) + * + q + ); + + if (column_np_sub == NULL) + { + return MPC_ERR_NOMEM; + } + + for (j = 0; j < q; j++) + { + column_speed_sums[j] = speeds[i*q + j]; + } + + /* + * Partition of the column dimension among + * q processors + */ + rc = HMPI_Partition_set( + q, + 1, + column_speed_sums, + NULL, + NULL, + n, + NULL, + 0, + 0, + -1, + NULL, + NULL, + column_np_sub + ); + + if (rc == HMPI_ERR_PARTITION_SET) + { + return HMPI_ERR_PARTITION_MATRIX; + } + + if (rc == HMPI_ERR_MLIMITS) + { + return HMPI_ERR_MLIMITS; + } + + if (rc != HMPI_OK) + { + return rc; + } + + for (j = 0; j < q; j++) + { + column_np[i*q + j] = column_np_sub[j]; + } + + free(column_np_sub); + free(column_speed_sums); + } + + HMPI_Create_rectangles_2d( + p, + q, + m, + n, + row_np, + column_np, + w, + h, + trow, + tcol, + ci, + cj + ); + + free(row_np); + free(column_np); + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int HMPI_2d_column_based_speeds_single_numbers_no_mlimits + ( + int p, + int q, + const double *speeds, + int m, + int n, + int *w, + int *h, + int *trow, + int *tcol, + int *ci, + int *cj + ) + { + int i, j, rc; + double *column_speed_sums; + int *column_np, *column_np_sub; + int *row_np; + + column_speed_sums = (double*)malloc( + sizeof(double) + * + q + ); + + if (column_speed_sums == NULL) + { + return MPC_ERR_NOMEM; + } + + row_np = (int*)malloc( + sizeof(int) + * + (p*q) + ); + + if (row_np == NULL) + { + return MPC_ERR_NOMEM; + } + + column_np = (int*)malloc( + sizeof(int) + * + (p*q) + ); + + if (column_np == NULL) + { + return MPC_ERR_NOMEM; + } + + column_np_sub = (int*)malloc( + sizeof(int) + * + q + ); + + if (column_np_sub == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < q; i++) + { + column_speed_sums[i] = 0.0; + for (j = 0; j < p; j++) + { + column_speed_sums[i] += speeds[j*q+i]; + } + } + + /* + * Partition of the column dimension among + * q processors + */ + rc = HMPI_Partition_set( + q, + 1, + column_speed_sums, + NULL, + NULL, + n, + NULL, + 0, + 0, + -1, + NULL, + NULL, + column_np_sub + ); + + if (rc == HMPI_ERR_PARTITION_SET) + { + return HMPI_ERR_PARTITION_MATRIX; + } + + if (rc == HMPI_ERR_MLIMITS) + { + return HMPI_ERR_MLIMITS; + } + + if (rc != HMPI_OK) + { + return rc; + } + + for (i = 0; i < p; i++) + { + for (j = 0; j < q; j++) + { + column_np[i*q+j] = column_np_sub[j]; + } + } + + free(column_speed_sums); + free(column_np_sub); + + for (i = 0; i < q; i++) + { + double *row_speed_sums; + int *row_np_sub; + + row_speed_sums = (double*)malloc( + sizeof(double) + * + p + ); + + if (row_speed_sums == NULL) + { + return MPC_ERR_NOMEM; + } + + row_np_sub = (int*)malloc( + sizeof(int) + * + p + ); + + if (row_np_sub == NULL) + { + return MPC_ERR_NOMEM; + } + + for (j = 0; j < p; j++) + { + row_speed_sums[j] = speeds[j*q + i]; + } + + /* + * Partition of the row dimension among + * p processors + */ + rc = HMPI_Partition_set( + p, + 1, + row_speed_sums, + NULL, + NULL, + m, + NULL, + 0, + 0, + -1, + NULL, + NULL, + row_np_sub + ); + + if (rc == HMPI_ERR_PARTITION_SET) + { + return HMPI_ERR_PARTITION_MATRIX; + } + + if (rc == HMPI_ERR_MLIMITS) + { + return HMPI_ERR_MLIMITS; + } + + if (rc != HMPI_OK) + { + return rc; + } + + for (j = 0; j < p; j++) + { + row_np[j*q + i] = row_np_sub[j]; + } + + free(row_np_sub); + free(row_speed_sums); + } + + HMPI_Create_rectangles_2d( + p, + q, + m, + n, + row_np, + column_np, + w, + h, + trow, + tcol, + ci, + cj + ); + + free(row_np); + free(column_np); + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int HMPI_2d_column_based_speed_functions_no_mlimits + ( + int p, + int q, + int pn, + const double *speeds, + const int *psizes, + int m, + int n, + int *w, + int *h, + int *trow, + int *tcol, + int *ci, + int *cj + ) + { + printf("Implementation currently not available\n"); + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int HMPI_2d_column_based_speed_functions_with_mlimits + ( + int p, + int q, + int pn, + const double *speeds, + const int *psizes, + const int *mlimits, + int m, + int n, + int *w, + int *h, + int *trow, + int *tcol, + int *ci, + int *cj + ) + { + printf("Implementation currently not available\n"); + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int HMPI_Matrix_two_dimensional_column_based + ( + int p, + int q, + int pn, + const double *speeds, + const int *psizes, + const int *mlimits, + int m, + int n, + int *w, + int *h, + int *trow, + int *tcol, + int *ci, + int *cj + ) + { + /* + * Homogeneous distribution + */ + if ((speeds == NULL) + && (mlimits == NULL + ) + ) + { + return HMPI_Homogeneous_matrix_2d_no_mlimits( + p, + q, + m, + n, + w, + h, + trow, + tcol, + ci, + cj + ); + } + + /* + * TBD: Meaning of mlimits + */ + if ((speeds == NULL) + && (mlimits != NULL + ) + ) + { + } + + if ((mlimits == NULL) + && (pn == 1 + ) + ) + { + return HMPI_2d_column_based_speeds_single_numbers_no_mlimits( + p, + q, + speeds, + m, + n, + w, + h, + trow, + tcol, + ci, + cj + ); + } + + if ((mlimits != NULL) + && (pn == 1 + ) + ) + { + /* + * TBD: Meaning of mlimits + * No known results for this case + */ + return HMPI_2d_column_based_speeds_single_numbers_with_mlimits( + p, + q, + speeds, + mlimits, + m, + n, + w, + h, + trow, + tcol, + ci, + cj + ); + } + + if ((mlimits == NULL) + && (pn > 1 + ) + ) + { + /* + * No known results for this case + */ + return HMPI_2d_column_based_speed_functions_no_mlimits( + p, + q, + pn, + speeds, + psizes, + m, + n, + w, + h, + trow, + tcol, + ci, + cj + ); + } + + if ((mlimits != NULL) + && (pn > 1 + ) + ) + { + /* + * No known results for this case + */ + return HMPI_2d_column_based_speed_functions_with_mlimits( + p, + q, + pn, + speeds, + psizes, + mlimits, + m, + n, + w, + h, + trow, + tcol, + ci, + cj + ); + } + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int HMPI_2d_row_based_speed_functions_with_mlimits + ( + int p, + int q, + int pn, + const double *speeds, + const int *psizes, + const int *mlimits, + int m, + int n, + int *w, + int *h, + int *trow, + int *tcol, + int *ci, + int *cj + ) + { + printf("Implementation currently not available\n"); + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int HMPI_2d_row_based_speed_functions_no_mlimits + ( + int p, + int q, + int pn, + const double *speeds, + const int *psizes, + int m, + int n, + int *w, + int *h, + int *trow, + int *tcol, + int *ci, + int *cj + ) + { + printf("Implementation currently not available\n"); + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int HMPI_2d_row_based_speeds_single_numbers_with_mlimits + ( + int p, + int q, + const double *speeds, + const int *psizes, + int m, + int n, + int *w, + int *h, + int *trow, + int *tcol, + int *ci, + int *cj + ) + { + printf("Implementation currently not available\n"); + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int HMPI_Matrix_two_dimensional_row_based + ( + int p, + int q, + int pn, + const double *speeds, + const int *psizes, + const int *mlimits, + int m, + int n, + int *w, + int *h, + int *trow, + int *tcol, + int *ci, + int *cj + ) + { + if ((speeds == NULL) + && (mlimits == NULL + ) + ) + { + return HMPI_Homogeneous_matrix_2d_no_mlimits( + p, + q, + m, + n, + w, + h, + trow, + tcol, + ci, + cj + ); + } + + /* + * No known results for this case + */ + if ((speeds == NULL) + && (mlimits != NULL + ) + ) + { + } + + if ((mlimits == NULL) + && (pn == 1 + ) + ) + { + return HMPI_2d_row_based_speeds_single_numbers_no_mlimits( + p, + q, + speeds, + m, + n, + w, + h, + trow, + tcol, + ci, + cj + ); + } + + /* + * No known results for this case + */ + if ((mlimits != NULL) + && (pn == 1 + ) + ) + { + return HMPI_2d_row_based_speeds_single_numbers_with_mlimits( + p, + q, + speeds, + mlimits, + m, + n, + w, + h, + trow, + tcol, + ci, + cj + ); + } + + /* + * No known results for this case + */ + if ((mlimits == NULL) + && (pn > 1 + ) + ) + { + return HMPI_2d_row_based_speed_functions_no_mlimits( + p, + q, + pn, + speeds, + psizes, + m, + n, + w, + h, + trow, + tcol, + ci, + cj + ); + } + + /* + * No known results for this case + */ + if ((mlimits != NULL) + && (pn > 1 + ) + ) + { + return HMPI_2d_row_based_speed_functions_with_mlimits( + p, + q, + pn, + speeds, + psizes, + mlimits, + m, + n, + w, + h, + trow, + tcol, + ci, + cj + ); + } + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int HMPI_2d_cartesian_speeds_single_numbers_no_mlimits + ( + int p, + int q, + const double *speeds, + int m, + int n, + int *w, + int *h, + int *trow, + int *tcol, + int *ci, + int *cj + ) + { + int i, j, rc; + double *row_speed_sums; + int *row_np, *row_np_sub; + double *column_speed_sums; + int *column_np, *column_np_sub; + + row_np = (int*)malloc( + sizeof(int) + * + (p*q) + ); + + if (row_np == NULL) + { + return MPC_ERR_NOMEM; + } + + row_np_sub = (int*)malloc( + sizeof(int) + * + p + ); + + if (row_np_sub == NULL) + { + return MPC_ERR_NOMEM; + } + + column_np = (int*)malloc( + sizeof(int) + * + (p*q) + ); + + if (column_np == NULL) + { + return MPC_ERR_NOMEM; + } + + column_np_sub = (int*)malloc( + sizeof(int) + * + q + ); + + if (column_np_sub == NULL) + { + return MPC_ERR_NOMEM; + } + + row_speed_sums = (double*)malloc( + sizeof(double) + * + p + ); + + if (row_speed_sums == NULL) + { + return MPC_ERR_NOMEM; + } + + column_speed_sums = (double*)malloc( + sizeof(double) + * + q + ); + + if (column_speed_sums == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < p; i++) + { + row_speed_sums[i] = 0.0; + for (j = 0; j < q; j++) + { + row_speed_sums[i] += speeds[i*q+j]; + } + } + + for (i = 0; i < q; i++) + { + column_speed_sums[i] = 0.0; + for (j = 0; j < p; j++) + { + column_speed_sums[i] += speeds[j*q+i]; + } + } + + /* + * Partition of the row dimension among + * p processors + */ + rc = HMPI_Partition_set( + p, + 1, + row_speed_sums, + NULL, + NULL, + m, + NULL, + 0, + 0, + -1, + NULL, + NULL, + row_np_sub + ); + + if (rc == HMPI_ERR_PARTITION_SET) + { + printf( + "Problems partitioning the matrix " + " using HMPI_CARTESIAN two-dimensional distribution\n" + ); + + return HMPI_ERR_PARTITION_MATRIX; + } + + if (rc == HMPI_ERR_MLIMITS) + { + printf( + "Problems applying the limits during " + " matrix partitioning using HMPI_CARTESIAN " + "two-dimensional distribution\n" + ); + + return HMPI_ERR_MLIMITS; + } + + if (rc != HMPI_OK) + { + return rc; + } + + /* + * Partition of the column dimension among + * q processors + */ + rc = HMPI_Partition_set( + q, + 1, + column_speed_sums, + NULL, + NULL, + n, + NULL, + 0, + 0, + -1, + NULL, + NULL, + column_np_sub + ); + + if (rc == HMPI_ERR_PARTITION_SET) + { + printf( + "Problems partitioning the matrix" + " using HMPI_CARTESIAN two-dimensional distribution\n" + ); + return HMPI_ERR_PARTITION_MATRIX; + } + + if (rc == HMPI_ERR_MLIMITS) + { + printf( + "Problems applying the limits during" + " matrix partitioning using HMPI_CARTESIAN " + "two-dimensional distribution\n" + ); + + return HMPI_ERR_MLIMITS; + } + + if (rc != HMPI_OK) + { + return rc; + } + + for (i = 0; i < p; i++) + { + for (j = 0; j < q; j++) + { + row_np[i*q+j] = row_np_sub[i]; + column_np[i*q+j] = column_np_sub[j]; + } + } + + HMPI_Create_rectangles_2d( + p, + q, + m, + n, + row_np, + column_np, + w, + h, + trow, + tcol, + ci, + cj + ); + + free(row_np); + free(column_np); + free(row_np_sub); + free(column_np_sub); + free(row_speed_sums); + free(column_speed_sums); + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int HMPI_2d_cartesian_speeds_single_numbers_with_mlimits + ( + int p, + int q, + const double *speeds, + const int *mlimits, + int m, + int n, + int *w, + int *h, + int *trow, + int *tcol, + int *ci, + int *cj + ) + { + printf("Implementation currently not available\n"); + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int HMPI_2d_cartesian_speed_functions_with_mlimits + ( + int p, + int q, + int pn, + const double *speeds, + const int *psizes, + const int *mlimits, + int m, + int n, + int *w, + int *h, + int *trow, + int *tcol, + int *ci, + int *cj + ) + { + printf("Implementation currently not available\n"); + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int HMPI_2d_cartesian_speed_functions_no_mlimits + ( + int p, + int q, + int pn, + const double *speeds, + const int *psizes, + int m, + int n, + int *w, + int *h, + int *trow, + int *tcol, + int *ci, + int *cj + ) + { + printf("Implementation currently not available\n"); + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int HMPI_Matrix_two_dimensional_cartesian + ( + int p, + int q, + int pn, + const double *speeds, + const int *psizes, + const int *mlimits, + int m, + int n, + int *w, + int *h, + int *trow, + int *tcol, + int *ci, + int *cj + ) + { + if ((speeds == NULL) + && (mlimits == NULL + ) + ) + { + return HMPI_Homogeneous_matrix_2d_no_mlimits( + p, + q, + m, + n, + w, + h, + trow, + tcol, + ci, + cj + ); + } + + if ((speeds == NULL) + && (mlimits != NULL + ) + ) + { + } + + if ((mlimits == NULL) + && (pn == 1 + ) + ) + { + return HMPI_2d_cartesian_speeds_single_numbers_no_mlimits( + p, + q, + speeds, + m, + n, + w, + h, + trow, + tcol, + ci, + cj + ); + } + + /* + * No known results for this case + */ + if ((mlimits != NULL) + && (pn == 1 + ) + ) + { + return HMPI_2d_cartesian_speeds_single_numbers_with_mlimits( + p, + q, + speeds, + mlimits, + m, + n, + w, + h, + trow, + tcol, + ci, + cj + ); + } + + /* + * No known results for this case + */ + if ((mlimits == NULL) + && (pn > 1 + ) + ) + { + return HMPI_2d_cartesian_speed_functions_no_mlimits( + p, + q, + pn, + speeds, + psizes, + m, + n, + w, + h, + trow, + tcol, + ci, + cj + ); + } + + /* + * No known results for this case + */ + if ((mlimits != NULL) + && (pn > 1 + ) + ) + { + return HMPI_2d_cartesian_speed_functions_with_mlimits( + p, + q, + pn, + speeds, + psizes, + mlimits, + m, + n, + w, + h, + trow, + tcol, + ci, + cj + ); + } + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int HMPI_Partition_matrix_2d + ( + int p, + int q, + int pn, + const double *speeds, + const int *psizes, + const int *mlimits, + int m, + int n, + int type_of_distribution, + int *w, + int *h, + int *trow, + int *tcol, + int *ci, + int *cj + ) + { + switch (type_of_distribution) + { + case HMPI_COLUMN_BASED: + { + return HMPI_Matrix_two_dimensional_column_based( + p, + q, + pn, + speeds, + psizes, + mlimits, + m, + n, + w, + h, + trow, + tcol, + ci, + cj + ); + } + break; + case HMPI_ROW_BASED: + { + return HMPI_Matrix_two_dimensional_row_based( + p, + q, + pn, + speeds, + psizes, + mlimits, + m, + n, + w, + h, + trow, + tcol, + ci, + cj + ); + } + break; + case HMPI_CARTESIAN: + { + return HMPI_Matrix_two_dimensional_cartesian( + p, + q, + pn, + speeds, + psizes, + mlimits, + m, + n, + w, + h, + trow, + tcol, + ci, + cj + ); + } + break; + default: + { + printf( + "Invalid type of distribution provided" + " for two-dimensional processor arrangement\n" + ); + return HMPI_ERR_PARTITION_MATRIX; + break; + } + } + + printf( + "Invalid type of distribution provided" + " for two-dimensional processor arrangement\n" + ); + + return HMPI_ERR_PARTITION_MATRIX; + } + + /*-----------------------------------------------------*/ + + int HMPI_1d_dynamic_row_based_speeds_single_numbers_no_mlimits + ( + int p, + const double *speeds, + int m, + int n, + HMPI_Lower_bound lb, + HMPI_DP_function dpf, + int *w, + int *h, + int *trow, + int *tcol + ) + { + return HMPI_1d_dynamic_column_based_speeds_single_numbers_no_mlimits( + p, + speeds, + n, + m, + lb, + dpf, + w, + h, + trow, + tcol + ); + } + + /*-----------------------------------------------------*/ + + int HMPI_1d_dynamic_column_based_speeds_single_numbers_no_mlimits + ( + int p, + const double *speeds, + int m, + int n, + HMPI_Lower_bound lb, + HMPI_DP_function dpf, + int *w, + int *h, + int *trow, + int *tcol + ) + { + int i, j, q, C, c_opt; + double S = 0.0, tspeed = 0.0, tarea = 0.0; + double **perimeter; + int **cumulative_r; + double *one_d_p; + int *one_d_r, *optimal_d; + double *areas, *rearranged_speeds; + int *rearrangedp; + double temp; + int temp_number; + + /* + * Sort the speeds in ascending order + */ + rearranged_speeds = (double*)malloc( + sizeof(double) + * + p + ); + + if (rearranged_speeds == NULL) + { + return MPC_ERR_NOMEM; + } + + rearrangedp = (int*)malloc( + sizeof(int) + * + p + ); + + if (rearrangedp == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < p; i++) + { + rearrangedp[i] = i; + } + + for (i = 0; i < p; i++) + { + rearranged_speeds[i] = speeds[i]; + } + + for (i = 0; i < p; i++) + { + for (j = 1; j < p; j++) + { + if (rearranged_speeds[j-1] > rearranged_speeds[j]) + { + temp = rearranged_speeds[j-1]; + rearranged_speeds[j-1] = rearranged_speeds[j]; + rearranged_speeds[j] = temp; + + temp_number = rearrangedp[j-1]; + rearrangedp[j-1] = rearrangedp[j]; + rearrangedp[j] = temp_number; + } + } + } + + /* + * Normalise to two decimal places + * Sum of the areas should be 1. + */ + areas = (double*)malloc( + sizeof(double) + * + p + ); + + if (areas == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < p; i++) + { + tspeed += rearranged_speeds[i]; + } + + for (i = 0; i < p; i++) + { + int to_2_decimals; + areas[i] = (rearranged_speeds[i]/tspeed)*100; + to_2_decimals = areas[i]; + areas[i] = to_2_decimals; + areas[i] = (areas[i]/100); + tarea += areas[i]; + } + + areas[0] = areas[0] + (1 - tarea); + + /* + * perimeter and cumulative_r are Upper Triangular arrangements + * Study the paper 'Matrix Multiplication on Heterogeneous + * Platforms' by Beaumont et al to see the layout of the + * arrays perimeter and cumulative_r + */ + one_d_p = (double*)malloc( + sizeof(double) + * + (p*(p+1)/2) + ); + + if (one_d_p == NULL) + { + return MPC_ERR_NOMEM; + } + + one_d_r = (int*)malloc( + sizeof(int) + * + (p*(p+1)/2) + ); + + if (one_d_r == NULL) + { + return MPC_ERR_NOMEM; + } + + perimeter = (double**)malloc( + sizeof(double*) + * + p + ); + + if (perimeter == NULL) + { + return MPC_ERR_NOMEM; + } + + cumulative_r = (int**)malloc( + sizeof(int*) + * + p + ); + + if (cumulative_r == NULL) + { + return MPC_ERR_NOMEM; + } + + optimal_d = (int*)malloc( + sizeof(int) + * + p + ); + + if (optimal_d == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < p; i++) + { + perimeter[i] = one_d_p - i; + one_d_p+=(p-i); + + cumulative_r[i] = one_d_r - i; + one_d_r+=(p-i); + } + + for (q = 0; q < p; q++) + { + S += speeds[q]; + perimeter[0][q] = 1 + S*q; + cumulative_r[0][q] = 0; + } + + for (C = 1; C < p; C++) + { + for (q = C; q < p; q++) + { + int r_opt; + + perimeter[C][q] = (*dpf)( + C, + q, + p, + rearranged_speeds, + perimeter, + &r_opt + ); + + cumulative_r[C][q] = r_opt; + } + } + + q = p; + c_opt = p; + i = 0; + + while (c_opt >= 2) + { + int temp, c_optimal = 1; + + temp = perimeter[0][q]; + + for (C = 1; C < p; C++) + { + if ((perimeter[C][q-1]) < temp) + { + c_optimal = C+1; + temp = perimeter[C][q-1]; + } + } + + optimal_d[i++] = q - cumulative_r[c_optimal-1][q-1]; + q = cumulative_r[c_optimal-1][q-1]; + c_opt = c_optimal; + } + + optimal_d[i] = q; + + /* + * Fill the output parameters + */ + HMPI_Create_rectangles_1d_column_based( + p, + i+1, + optimal_d, + areas, + m, + n, + w, + h, + trow, + tcol + ); + + free(one_d_p); + free(one_d_r); + free(perimeter); + free(optimal_d); + free(cumulative_r); + free(rearranged_speeds); + free(rearrangedp); + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + /* + * column-based partitioning/row-based partitioning + * of the matrix using the column-based heuristic approach + * proposed by Beaumont et al + */ + int HMPI_1d_dynamic_speeds_single_numbers_no_mlimits + ( + int p, + const double *speeds, + int m, + int n, + HMPI_Lower_bound lb, + HMPI_DP_function dpf, + int type_of_distribution, + int *w, + int *h, + int *trow, + int *tcol + ) + { + switch (type_of_distribution) + { + case HMPI_ROW_BASED: + { + return + HMPI_1d_dynamic_row_based_speeds_single_numbers_no_mlimits( + p, + speeds, + m, + n, + lb, + dpf, + w, + h, + trow, + tcol + ); + } + break; + case HMPI_COLUMN_BASED: + { + return + HMPI_1d_dynamic_column_based_speeds_single_numbers_no_mlimits( + p, + speeds, + m, + n, + lb, + dpf, + w, + h, + trow, + tcol + ); + } + break; + case HMPI_GENERAL: + { + } + break; + default: + { + printf( + "Invalid type of distribution provided" + " for one-dimensional processor arrangement " + " with DYNAMIC formulation\n" + ); + + return HMPI_ERR_PARTITION_MATRIX; + + break; + } + } + + printf( + "Invalid type of distribution provided for one-dimensional " + "processor arrangement with DYNAMIC formulation\n" + ); + + return HMPI_ERR_PARTITION_MATRIX; + } + + /*-----------------------------------------------------*/ + + int HMPI_1d_dynamic_speeds_single_numbers_with_mlimits + ( + int p, + const double *speeds, + const int *mlimits, + int m, + int n, + HMPI_Lower_bound lb, + HMPI_DP_function dpf, + int type_of_distribution, + int *w, + int *h, + int *trow, + int *tcol + ) + { + printf("Implementation currently not available\n"); + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int HMPI_1d_dynamic_speed_functions_no_mlimits + ( + int p, + int pn, + const double *speeds, + const int *psizes, + int m, + int n, + HMPI_Lower_bound lb, + HMPI_DP_function dpf, + int type_of_distribution, + int *w, + int *h, + int *trow, + int *tcol + ) + { + printf("Implementation currently not available\n"); + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int HMPI_1d_dynamic_speed_functions_with_mlimits + ( + int p, + int pn, + const double *speeds, + const int *psizes, + const int *mlimits, + int m, + int n, + HMPI_Lower_bound lb, + HMPI_DP_function dpf, + int type_of_distribution, + int *w, + int *h, + int *trow, + int *tcol + ) + { + printf("Implementation currently not available\n"); + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int HMPI_Partition_matrix_1d_dp + ( + int p, + int pn, + const double *speeds, + const int *psizes, + const int *mlimits, + int m, + int n, + HMPI_Lower_bound lb, + HMPI_DP_function dpf, + int type_of_distribution, + int *w, + int *h, + int *trow, + int *tcol, + int *ci + ) + { + if ((speeds == NULL) + && (mlimits == NULL + ) + ) + { + return HMPI_Homogeneous_matrix_1d_no_mlimits( + p, + m, + n, + w, + h, + trow, + tcol, + ci + ); + } + + if ((speeds == NULL) + && (mlimits != NULL + ) + ) + { + } + + if ((mlimits == NULL) + && (pn == 1 + ) + ) + { + return HMPI_1d_dynamic_speeds_single_numbers_no_mlimits( + p, + speeds, + m, + n, + lb, + dpf, + type_of_distribution, + w, + h, + trow, + tcol + ); + } + + /* + * No known results for this case + */ + if ((mlimits != NULL) + && (pn == 1 + ) + ) + { + return HMPI_1d_dynamic_speeds_single_numbers_with_mlimits( + p, + speeds, + mlimits, + m, + n, + lb, + dpf, + type_of_distribution, + w, + h, + trow, + tcol + ); + } + + /* + * No known results for this case + */ + if ((mlimits == NULL) + && (pn > 1 + ) + ) + { + return HMPI_1d_dynamic_speed_functions_no_mlimits( + p, + pn, + speeds, + psizes, + m, + n, + lb, + dpf, + type_of_distribution, + w, + h, + trow, + tcol + ); + } + + /* + * No known results for this case + */ + if ((mlimits != NULL) + && (pn > 1 + ) + ) + { + return HMPI_1d_dynamic_speed_functions_with_mlimits( + p, + pn, + speeds, + psizes, + mlimits, + m, + n, + lb, + dpf, + type_of_distribution, + w, + h, + trow, + tcol + ); + } + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + + int HMPI_Next_best_matrix_partition( + int m, + int n, + int* oldw, + int* oldh, + int* oldtrow, + int* oldtcol + ) + { + } + + /*-----------------------------------------------------*/ + + int HMPI_1d_iterative_speeds_single_numbers_no_mlimits + ( + int p, + const double *speeds, + int m, + int n, + HMPI_Lower_bound lb, + HMPI_Iterative_function cf, + int *w, + int *h, + int *trow, + int *tcol + ) + { + int i, rc; + int Lower_bound = (*lb)( + p, + speeds, + m, + n + ); + int *oldw, *oldh, *oldtrow, *oldtcol; + double cost; + + oldw = (int*)malloc( + sizeof(int) + * + p + ); + + if (oldw == NULL) + { + return MPC_ERR_NOMEM; + } + + oldh = (int*)malloc( + sizeof(int) + * + (p*p) + ); + + if (oldh == NULL) + { + return MPC_ERR_NOMEM; + } + + oldtrow = (int*)malloc( + sizeof(int) + * + p + ); + + if (oldtrow == NULL) + { + return MPC_ERR_NOMEM; + } + + oldtcol = (int*)malloc( + sizeof(int) + * + p + ); + + if (oldtcol == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < p; i++) + { + oldw[i] = w[i]; + } + + for (i = 0; i < (p*p); i++) + { + oldh[i] = h[i]; + } + + for (i = 0; i < p; i++) + { + oldtrow[i] = trow[i]; + } + + for (i = 0; i < p; i++) + { + oldtcol[i] = tcol[i]; + } + + do + { + cost = (*cf)( + p, + oldw, + oldh, + oldtrow, + oldtcol + ); + + if ((cost < 0) + || (cost <= Lower_bound + ) + ) + { + for (i = 0; i < p; i++) + { + w[i] = oldw[i]; + } + + for (i = 0; i < (p*p); i++) + { + h[i] = oldh[i]; + } + + for (i = 0; i < p; i++) + { + trow[i] = oldtrow[i]; + } + + for (i = 0; i < p; i++) + { + tcol[i] = oldtcol[i]; + } + } + else + { + /* + * TBD + */ + HMPI_Next_best_matrix_partition( + m, + n, + oldw, + oldh, + oldtrow, + oldtcol + ); + } + } while ((cost > 0) && (cost > Lower_bound)); + + free(oldw); + free(oldh); + free(oldtrow); + free(oldtcol); + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int HMPI_1d_iterative_speeds_single_numbers_with_mlimits + ( + int p, + const double *speeds, + const int *mlimits, + int m, + int n, + HMPI_Lower_bound lb, + HMPI_Iterative_function cf, + int *w, + int *h, + int *trow, + int *tcol + ) + { + printf("Implementation currently not available\n"); + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int HMPI_1d_iterative_speed_functions_no_mlimits + ( + int p, + int pn, + const double *speeds, + const int *psizes, + int m, + int n, + HMPI_Lower_bound lb, + HMPI_Iterative_function cf, + int *w, + int *h, + int *trow, + int *tcol + ) + { + printf("Implementation currently not available\n"); + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int HMPI_1d_iterative_speed_functions_with_mlimits + ( + int p, + int pn, + const double *speeds, + const int *psizes, + const int *mlimits, + int m, + int n, + HMPI_Lower_bound lb, + HMPI_Iterative_function cf, + int *w, + int *h, + int *trow, + int *tcol + ) + { + printf("Implementation currently not available\n"); + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int HMPI_Partition_matrix_1d_iterative + ( + int p, + int pn, + const double *speeds, + const int *psizes, + const int *mlimits, + int m, + int n, + HMPI_Lower_bound lb, + HMPI_Iterative_function cf, + int *w, + int *h, + int *trow, + int *tcol, + int *ci + ) + { + if ((speeds == NULL) + && (mlimits == NULL + ) + ) + { + return HMPI_Homogeneous_matrix_1d_no_mlimits( + p, + m, + n, + w, + h, + trow, + tcol, + ci + ); + } + + if ((speeds == NULL) + && (mlimits != NULL + ) + ) + { + } + + if ((mlimits == NULL) + && (pn == 1 + ) + ) + { + return HMPI_1d_iterative_speeds_single_numbers_no_mlimits( + p, + speeds, + m, + n, + lb, + cf, + w, + h, + trow, + tcol + ); + } + + /* + * No known results for this case + */ + if ((mlimits != NULL) + && (pn == 1 + ) + ) + { + return HMPI_1d_iterative_speeds_single_numbers_with_mlimits( + p, + speeds, + mlimits, + m, + n, + lb, + cf, + w, + h, + trow, + tcol + ); + } + + /* + * No known results for this case + */ + if ((mlimits == NULL) + && (pn > 1 + ) + ) + { + return HMPI_1d_iterative_speed_functions_no_mlimits( + p, + pn, + speeds, + psizes, + m, + n, + lb, + cf, + w, + h, + trow, + tcol + ); + } + + /* + * No known results for this case + */ + if ((mlimits != NULL) + && (pn > 1 + ) + ) + { + return HMPI_1d_iterative_speed_functions_with_mlimits( + p, + pn, + speeds, + psizes, + mlimits, + m, + n, + lb, + cf, + w, + h, + trow, + tcol + ); + } + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int HMPI_1d_refining_speeds_single_numbers_no_mlimits + ( + int p, + const double *speeds, + int m, + int n, + HMPI_Lower_bound lb, + HMPI_Refining_function rf, + int *w, + int *h, + int *trow, + int *tcol + ) + { + int i, rc; + int *oldw, *oldh, *oldtrow, *oldtcol; + + oldw = (int*)malloc( + sizeof(int) + * + p + ); + + if (oldw == NULL) + { + return MPC_ERR_NOMEM; + } + + oldh = (int*)malloc( + sizeof(int) + * + (p*p) + ); + + if (oldh == NULL) + { + return MPC_ERR_NOMEM; + } + + oldtrow = (int*)malloc( + sizeof(int) + * + p + ); + + if (oldtrow == NULL) + { + return MPC_ERR_NOMEM; + } + + oldtcol = (int*)malloc( + sizeof(int) + * + p + ); + + if (oldtcol == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < p; i++) + { + oldw[i] = w[i]; + } + + for (i = 0; i < (p*p); i++) + { + oldh[i] = h[i]; + } + + for (i = 0; i < p; i++) + { + oldtrow[i] = trow[i]; + } + + for (i = 0; i < p; i++) + { + oldtcol[i] = tcol[i]; + } + + do + { + rc = (*rf)( + p, + speeds, + m, + n, + oldw, + oldh, + oldtrow, + oldtcol, + w, + h, + trow, + tcol + ); + + if (rc > 0) + { + for (i = 0; i < p; i++) + { + oldw[i] = w[i]; + } + + for (i = 0; i < (p*p); i++) + { + oldh[i] = h[i]; + } + + for (i = 0; i < p; i++) + { + oldtrow[i] = trow[i]; + } + + for (i = 0; i < p; i++) + { + oldtcol[i] = tcol[i]; + } + } + else + { + for (i = 0; i < p; i++) + { + w[i] = oldw[i]; + } + + for (i = 0; i < (p*p); i++) + { + h[i] = oldh[i]; + } + + for (i = 0; i < p; i++) + { + trow[i] = oldtrow[i]; + } + + for (i = 0; i < p; i++) + { + tcol[i] = oldtcol[i]; + } + } + } while (rc > 0); + + free(oldw); + free(oldh); + free(oldtrow); + free(oldtcol); + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int HMPI_1d_refining_speeds_single_numbers_with_mlimits + ( + int p, + const double *speeds, + const int *mlimits, + int m, + int n, + HMPI_Lower_bound lb, + HMPI_Refining_function rf, + int *w, + int *h, + int *trow, + int *tcol + ) + { + printf("Implementation currently not available\n"); + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int HMPI_1d_refining_speed_functions_no_mlimits + ( + int p, + int pn, + const double *speeds, + const int *psizes, + int m, + int n, + HMPI_Lower_bound lb, + HMPI_Refining_function rf, + int *w, + int *h, + int *trow, + int *tcol + ) + { + printf("Implementation currently not available\n"); + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int HMPI_1d_refining_speed_functions_with_mlimits + ( + int p, + int pn, + const double *speeds, + const int *psizes, + const int *mlimits, + int m, + int n, + HMPI_Lower_bound lb, + HMPI_Refining_function rf, + int *w, + int *h, + int *trow, + int *tcol + ) + { + printf("Implementation currently not available\n"); + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int HMPI_Partition_matrix_1d_refining + ( + int p, + int pn, + const double *speeds, + const int *psizes, + const int *mlimits, + int m, + int n, + HMPI_Lower_bound lb, + HMPI_Refining_function rf, + int *w, + int *h, + int *trow, + int *tcol, + int *ci + ) + { + if ((speeds == NULL) + && (mlimits == NULL + ) + ) + { + return HMPI_Homogeneous_matrix_1d_no_mlimits( + p, + m, + n, + w, + h, + trow, + tcol, + ci + ); + } + + /* + * Meaning of mlimits + */ + if ((speeds == NULL) + && (mlimits != NULL + ) + ) + { + } + + if ((mlimits == NULL) + && (pn == 1 + ) + ) + { + return HMPI_1d_refining_speeds_single_numbers_no_mlimits( + p, + speeds, + m, + n, + lb, + rf, + w, + h, + trow, + tcol + ); + } + + /* + * No known results for this case + */ + if ((mlimits != NULL) + && (pn == 1 + ) + ) + { + return HMPI_1d_refining_speeds_single_numbers_with_mlimits( + p, + speeds, + mlimits, + m, + n, + lb, + rf, + w, + h, + trow, + tcol + ); + } + + /* + * No known results for this case + */ + if ((mlimits == NULL) + && (pn > 1 + ) + ) + { + return HMPI_1d_refining_speed_functions_no_mlimits( + p, + pn, + speeds, + psizes, + m, + n, + lb, + rf, + w, + h, + trow, + tcol + ); + } + + /* + * No known results for this case + */ + if ((mlimits != NULL) + && (pn > 1 + ) + ) + { + return HMPI_1d_refining_speed_functions_with_mlimits( + p, + pn, + speeds, + psizes, + mlimits, + m, + n, + lb, + rf, + w, + h, + trow, + tcol + ); + } + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int HMPI_Get_processor_2d + ( + int row, + int column, + int p, + int q, + const int *w, + const int *h, + const int *trow, + const int *tcol, + int *I, + int *J + ) + { + int x, y, i, j; + + for (x = 0; x < p; x++) + { + for (y = 0; y < q; y++) + { + int hi = h[HMPI_RECT_INDEX(x, y, x, y, p, q)]; + int wi = w[HMPI_RECT_INDEX(x, y, x, y, p, q)]; + int toprow = trow[x*q+y]; + int topcol = tcol[x*q+y]; + + for (i = 0; i < hi; i++) + { + for (j = 0; j < wi; j++) + { + if (((row >= (toprow + i)) + && (row < (toprow + hi) + ) + ) + && + ((column >= (topcol + j)) + && (column < (topcol + wi) + ) + ) + ) + { + *I = i; + *J = j; + + return HMPI_OK; + } + } + } + } + } + + return HMPI_ERR_PARTITION_NOT_EXISTS; + } + + /*-----------------------------------------------------*/ + + int HMPI_Get_processor_1d + ( + int row, + int column, + int p, + const int *w, + const int *h, + const int *trow, + const int *tcol, + int *I + ) + { + int x, y, i, j; + + for (x = 0; x < p; x++) + { + int hi = h[HMPI_RECT_INDEX(x, 0, x, 0, p, 1)]; + int wi = w[HMPI_RECT_INDEX(x, 0, x, 0, p, 1)]; + int toprow = trow[x]; + int topcol = tcol[x]; + + for (i = 0; i < hi; i++) + { + for (j = 0; j < wi; j++) + { + if (((row >= (toprow + i)) + && (row < (toprow + hi) + ) + ) + && + ((column >= (topcol + j)) + && (column < (topcol + wi) + ) + ) + ) + { + *I = i; + return HMPI_OK; + } + } + } + } + + return HMPI_ERR_PARTITION_NOT_EXISTS; + } + + /*-----------------------------------------------------*/ + + int HMPI_Print_rectangle_1d( + int p, + int m, + int n, + const int *w, + const int *h, + const int *trow, + const int *tcol, + const int *ci + ) + { + int i, j, k, l; + int q = 1; + + if (ci != NULL) + { + printf("The processor allocation is :\n"); + + for (i = 0; i < m; i++) + { + for (j = 0; j < n; j++) + { + printf("(%d) ", ci[i*n+j]); + } + printf("\n"); + } + + printf("\n"); + } + + printf("The top row coordinates are:\n"); + + for (i = 0; i < p; i++) + { + for (j = 0; j < q; j++) + { + printf("%d ", trow[i*q+j]); + } + printf("\n"); + } + + printf("\n"); + + printf("The top column coordinates are:\n"); + + for (i = 0; i < p; i++) + { + for (j = 0; j < q; j++) + { + printf("%d ", tcol[i*q+j]); + } + printf("\n"); + } + + printf("\n"); + + printf("The common widths of rectangles are:\n"); + + for (i = 0; i < p; i++) + { + for (j = 0; j < q; j++) + { + for (k = 0; k < p; k++) + { + for (l = 0; l < q; l++) + { + printf("%d ", w[HMPI_RECT_INDEX(i, j, k, l, p, q)]); + } + } + printf("\n"); + } + } + + printf("\n"); + + printf("The common heights of rectangles are:\n"); + + for (i = 0; i < p; i++) + { + for (j = 0; j < q; j++) + { + for (k = 0; k < p; k++) + { + for (l = 0; l < q; l++) + { + printf("%d ", h[HMPI_RECT_INDEX(i, j, k, l, p, q)]); + } + } + printf("\n"); + } + } + + printf("\n"); + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int HMPI_Print_rectangle_2d( + int p, + int q, + int m, + int n, + const int *w, + const int *h, + const int *trow, + const int *tcol, + const int *ci, + const int *cj + ) + { + int i, j, k, l; + + if ((ci != NULL) + && (cj != NULL + ) + ) + { + printf("The processor allocation is :\n"); + + for (i = 0; i < m; i++) + { + for (j = 0; j < n; j++) + { + printf("(%d,%d) ", ci[i*n+j], cj[i*n+j]); + } + printf("\n"); + } + + printf("\n"); + } + + printf("The top row coordinates are:\n"); + + for (i = 0; i < p; i++) + { + for (j = 0; j < q; j++) + { + printf("%d ", trow[i*q+j]); + } + printf("\n"); + } + + printf("\n"); + + printf("The top column coordinates are:\n"); + + for (i = 0; i < p; i++) + { + for (j = 0; j < q; j++) + { + printf("%d ", tcol[i*q+j]); + } + printf("\n"); + } + + printf("\n"); + + printf("The common widths of rectangles are:\n"); + + for (i = 0; i < p; i++) + { + for (j = 0; j < q; j++) + { + for (k = 0; k < p; k++) + { + for (l = 0; l < q; l++) + { + printf("%d ", w[HMPI_RECT_INDEX(i, j, k, l, p, q)]); + } + } + printf("\n"); + } + } + + printf("\n"); + + printf("The common heights of rectangles are:\n"); + + for (i = 0; i < p; i++) + { + for (j = 0; j < q; j++) + { + for (k = 0; k < p; k++) + { + for (l = 0; l < q; l++) + { + printf("%d ", h[HMPI_RECT_INDEX(i, j, k, l, p, q)]); + } + } + printf("\n"); + } + } + + printf("\n"); + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + /* + * If the user set trow and tcol as NULL, then the + * type_of_distribution has to be two-dimensional. + */ + int HMPI_Get_matrix_processor ( + int r, + int c, + int p, + int q, + const int *w, + const int *h, + const int *trow, + const int *tcol, + HMPI_Processor *root + ) + { + int i, j, rc; + + if ((p==1) || (q==1)) + { + if (p == 1) + { + rc = HMPI_Get_processor_1d( + r, + c, + q, + w, + h, + trow, + tcol, + &c + ); + + if (rc != HMPI_OK) + { + return rc; + } + + root->I = 0; + root->J = c; + + return HMPI_OK; + } + + rc = HMPI_Get_processor_1d( + r, + c, + p, + w, + h, + trow, + tcol, + &c + ); + + if (rc != HMPI_OK) + { + return rc; + } + + root->I = c; + root->J = 0; + + return HMPI_OK; + } + + rc = HMPI_Get_processor_2d( + r, + c, + p, + q, + w, + h, + trow, + tcol, + &i, + &j + ); + + if (rc != HMPI_OK) + { + return rc; + } + + root->I = i; + root->J = j; + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int HMPI_Get_my_width + ( + int i, + int j, + int p, + int q, + const double *speeds, + int type_of_distribution, + int m, + int n + ) + { + int rc; + int *w, *h, *tcol, *trow; + int width; + + w = (int*)malloc( + sizeof(int) + * + (p*q) + ); + + if (w == NULL) + { + return MPC_ERR_NOMEM; + } + + h = (int*)malloc( + sizeof(int) + * + (p*q*p*q) + ); + + if (h == NULL) + { + return MPC_ERR_NOMEM; + } + + trow = (int*)malloc( + sizeof(int) + * + (p*q) + ); + + if (trow == NULL) + { + return MPC_ERR_NOMEM; + } + + tcol = (int*)malloc( + sizeof(int) + * + (p*q) + ); + + if (tcol == NULL) + { + return MPC_ERR_NOMEM; + } + + rc = HMPI_Partition_matrix_2d( + p, + q, + 1, + speeds, + NULL, + NULL, + m, + n, + type_of_distribution, + w, + h, + trow, + tcol, + NULL, + NULL + ); + + if (rc != HMPI_OK) + { + return rc; + } + + width = w[i*q+j]; + + free(w); + free(h); + free(trow); + free(tcol); + + return width; + } + + /*-----------------------------------------------------*/ + + int HMPI_Get_my_height + ( + int i, + int j, + int p, + int q, + const double *speeds, + int type_of_distribution, + int m, + int n + ) + { + int rc; + int *w, *h, *tcol, *trow; + int height; + + w = (int*)malloc( + sizeof(int) + * + (p*q) + ); + + if (w == NULL) + { + return MPC_ERR_NOMEM; + } + + h = (int*)malloc( + sizeof(int) + * + (p*q*p*q) + ); + + if (h == NULL) + { + return MPC_ERR_NOMEM; + } + + trow = (int*)malloc( + sizeof(int) + * + (p*q) + ); + + if (trow == NULL) + { + return MPC_ERR_NOMEM; + } + + tcol = (int*)malloc( + sizeof(int) + * + (p*q) + ); + + if (tcol == NULL) + { + return MPC_ERR_NOMEM; + } + + rc = HMPI_Partition_matrix_2d( + p, + q, + 1, + speeds, + NULL, + NULL, + m, + n, + type_of_distribution, + w, + h, + trow, + tcol, + NULL, + NULL + ); + + if (rc != HMPI_OK) + { + return rc; + } + + height = HMPI_RECT_INDEX(i, j, i, j, p, q); + + free(w); + free(h); + free(trow); + free(tcol); + + return height; + } + + /*-----------------------------------------------------*/ + + int HMPI_Get_diagonal + ( + int i, + int j, + int p, + int q, + const int *w, + const int *h, + const int *trow, + const int *tcol + ) + { + return min(w[i*q+j], HMPI_RECT_INDEX(i, j, i, j, p, q)); + } + + /*-----------------------------------------------------*/ + + int _HMPI_a22elements + ( + int k, + int n, + int l, + int w, + int h, + int trow, + int tcol + ) + { + if (((k+1)%l) >= (tcol+w)) + { + return (_HMPI_myelements(n, l, w, h, trow, tcol) - + (1/2)*((k+1)/l+1)*(2*(n/l) - (k+1)/l)*_HMPI_myelements_g(n, l, w, h, trow, tcol)); + } + + if ((((k+1)%l) < trow) + && (((k+1)%l) < tcol + ) + ) + { + return (_HMPI_myelements(n, l, w, h, trow, tcol) - + ((1/2)*(n/l)*(n/l+1)- (1/2)*(n/l - (k+1)/l)*(n/l - (k+1)/l + 1))*_HMPI_myelements_g(n, l, w, h, trow, tcol)); + } + + if (((((k+1)%l) >= trow)) + && ((((k+1)%l) < (trow+h)) + ) + && + ((((k+1)%l) >= (tcol)) + && (((k+1)%l) < (tcol+w)) + ) + ) + { + return (_HMPI_myelements(n, l, w, h, trow, tcol) - + ((1/2)*(n/l)*(n/l+1)- (1/2)*(n/l - (k+1)/l)*(n/l - (k+1)/l + 1))*_HMPI_myelements_g(n, l, w, h, trow, tcol) - + ((n/l - (k+1)/l)*_HMPI_myelements_g(n, l, w, h, trow, tcol) - (1/2)*(tcol+w - (k+1)%l)*(tcol+w - (k+1)%l + 1)) + ); + } + + if (((((k+1)%l) >= trow)) + && ((((k+1)%l) < (trow+h)) + ) + && + (((k+1)%l) < (tcol) + ) + ) + { + return ((1/2)*(k+1/l)*(2*(n/l) - (k+1)/l - 1)*_HMPI_myelements_g(n, l, w, h, trow, tcol)); + } + + if ((((k+1)%l) > (trow+h)) + && + (((k+1)%l) < tcol + ) + ) + { + return ((1/2)*(k+1/l)*(2*(n/l) - (k+1)/l - 1)*_HMPI_myelements_g(n, l, w, h, trow, tcol)); + } + + if ((((k+1)%l) > (trow+h)) + && + ((((k+1)%l) >= (tcol)) + && (((k+1)%l) < (tcol+w)) + ) + ) + { + return (_HMPI_myelements(n, l, w, h, trow, tcol) - + (1/2)*(k+1/l)*(2*(n/l) - (k+1)/l - 1)*_HMPI_myelements_g(n, l, w, h, trow, tcol) - + (n/l - (k+1)/l)*h*((k+1)%l- tcol)); + } + + if (((((k+1)%l) < trow)) + && + ((((k+1)%l) >= (tcol)) + && (((k+1)%l) < (tcol+w)) + ) + ) + { + return (_HMPI_myelements(n, l, w, h, trow, tcol) - + ((1/2)*(n/l)*(n/l+1)- (1/2)*(n/l - (k+1)/l)*(n/l - (k+1)/l + 1))*_HMPI_myelements_g(n, l, w, h, trow, tcol) - + (n/l - (k+1)/l)*h*((k+1)%l- tcol)); + } + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int _HMPI_myelements_g + ( + int n, + int l, + int w, + int h, + int trow, + int tcol + ) + { + if ((w == h) + && ((trow+h) == (tcol+w) + ) + ) + { + return ((1/2)*w*(w+1)); + } + + if (trow >= (tcol+w)) + { + return w*h; + } + + if ((trow+h) <= tcol) + { + return 0; + } + + if ((trow == 0) + && (tcol == 0 + ) + ) + { + if ((trow < (tcol+w)) + && ((trow+h) > tcol + ) + ) + { + return 1/2*h*(h+1); + } + } + + if ((trow < (tcol+w)) + && ((trow+h) > tcol + ) + ) + { + if ((trow == tcol) + && ((trow+h) == (tcol+w) + ) + ) + { + return 1/2*w*(w+1); + } + if ((trow == tcol) + && ((trow+h) < (tcol+w) + ) + ) + { + return (1/2*((trow+h)-tcol)*((trow+h)-tcol+1)); + } + if ((trow == tcol) + && ((trow+h) > (tcol+w) + ) + ) + { + return (w*h - 1/2*((tcol+w)-trow)*((tcol+w)-trow+1)); + } + if (((trow+h) == (tcol+w)) + && (trow > tcol + ) + ) + { + return (w*h - 1/2*((tcol+w)-trow)*((tcol+w)-trow+1)); + } + if (((trow+h) == (tcol+w)) + && (trow < tcol + ) + ) + { + return (1/2*((trow+h)-tcol)*((trow+h)-tcol+1)); + } + if ((trow > tcol) + && ((trow+h) < (tcol+w) + ) + ) + { + return (1/2*h*(trow-tcol + trow+h-tcol)); + } + if ((trow < tcol) + && ((trow+h) > (tcol+w) + ) + ) + { + return (1/2*w*(tcol-trow + tcol+w-trow)); + } + } + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int _HMPI_myelements + ( + int n, + int l, + int w, + int h, + int trow, + int tcol + ) + { + if ((w == h) + && ((trow+h) == (tcol+w) + ) + ) + { + return (((((n/l)*(n/l+1))/2) - (n/l))*w*h + (n/l)*(1/2)*w*(w+1)); + } + + if (trow >= (tcol+w)) + { + return (((n/l)*(n/l+1))/2)*w*h; + } + + if ((trow+h) <= tcol) + { + return (((((n/l)*(n/l+1))/2) - (n/l))*w*h); + } + + if ((trow == 0) + && (tcol == 0 + ) + ) + { + if ((trow < (tcol+w)) + && ((trow+h) > tcol + ) + ) + { + return (((((n/l)*(n/l+1))/2) - (n/l))*w*h + 1/2*(n/l)*h*(h+1)); + } + } + + if ((trow < (tcol+w)) + && ((trow+h) > tcol + ) + ) + { + if ((trow == tcol) + && ((trow+h) == (tcol+w) + ) + ) + { + return (((n/l)*(n/l+1)/2 - n/l)*w*h + 1/2*w*(w+1)); + } + if ((trow == tcol) + && ((trow+h) < (tcol+w) + ) + ) + { + return (((n/l)*(n/l+1)/2 - n/l)*w*h + (1/2*((trow+h)-tcol)*((trow+h)-tcol+1))); + } + if ((trow == tcol) + && ((trow+h) > (tcol+w) + ) + ) + { + return ((((n/l)*(n/l+1)/2 - n/l)*w*h + w*h - 1/2*((tcol+w)-trow)*((tcol+w)-trow+1))); + } + if (((trow+h) == (tcol+w)) + && (trow > tcol + ) + ) + { + return (((n/l)*(n/l+1)/2 - n/l)*w*h + (w*h - 1/2*((tcol+w)-trow)*((tcol+w)-trow+1))); + } + if (((trow+h) == (tcol+w)) + && (trow < tcol + ) + ) + { + return (((n/l)*(n/l+1)/2 - n/l)*w*h + 1/2*(trow+h-tcol)*(trow+h-tcol+1)); + } + if ((trow > tcol) + && ((trow+h) < (tcol+w) + ) + ) + { + return (((n/l)*(n/l+1)/2 - n/l)*w*h + 1/2*h*(trow-tcol + trow+h-tcol)); + } + if ((trow < tcol) + && ((trow+h) > (tcol+w) + ) + ) + { + return (((n/l)*(n/l+1)/2 - n/l)*w*h + 1/2*w*(tcol-trow + tcol+w-trow)); + } + } + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + /* + * Currently only square dense matrices are assumed. + */ + int HMPI_Get_my_elements + ( + int m, + int n, + int gm, + int gn, + int i, + int j, + int p, + int q, + const int *w, + const int *h, + const int *trow, + const int *tcol, + int type_of_distribution, + char upper_or_lower + ) + { + int lower = _HMPI_myelements( + m, + gm, + w[i*q+j], + HMPI_RECT_INDEX(i, j, i, j, p, q), + trow[i*q+j], + tcol[i*q+j] + ); + + if (upper_or_lower == 'L') + { + return lower; + } + + return (n*n - lower); + } + + /*-----------------------------------------------------*/ + + /* + * Currently only square dense matrices are assumed. + */ + int HMPI_Get_my_kk_elements + ( + int k, + int m, + int n, + int gm, + int gn, + int i, + int j, + int p, + int q, + const int *w, + const int *h, + const int *trow, + const int *tcol, + int type_of_distribution, + char upper_or_lower + ) + { + int lower= _HMPI_a22elements( + k, + m, + gm, + w[i*q+j], + HMPI_RECT_INDEX(i, j, i, j, p, q), + trow[i*q+j], + tcol[i*q+j] + ); + + if (upper_or_lower == 'L') + { + return lower; + } + + return ((m-k)*(m-k) - lower); + } + + /*-----------------------------------------------------*/ diff --git a/hdpi/hmpi_partitioning_matrices.h b/hdpi/hmpi_partitioning_matrices.h new file mode 100644 index 0000000..a5883e6 --- /dev/null +++ b/hdpi/hmpi_partitioning_matrices.h @@ -0,0 +1,329 @@ + +/************************************************************************* +* * +* Heterogeneous Data Partitioning Interface * +* ========================================= * +* * +* Copyright (c) 2002 Department of Computer Science, * +* University College Dublin. * +* * +* All rights reserved. We assume no responsibility for the use * +* or reliability of our software. * +* * +*************************************************************************/ + + /************************************************/ + /* Partitioning interfaces for matrices */ + /* */ + /* Revision history */ + /* 19-05-2003 -- Initial version */ + /************************************************/ + + #ifndef __HMPI_PARTITIONING_MATRICES_HH + #define __HMPI_PARTITIONING_MATRICES_HH + + /* + * Two-dimensional distributions + */ + #define HMPI_ROW_BASED 1 + #define HMPI_COLUMN_BASED 2 + #define HMPI_CARTESIAN 3 + + /* + * General rectangular 1D distribution + */ + #define HMPI_GENERAL 4 + + /* + * Types of formulation for general one-dimensional + * rectangular distributions + */ + #define HMPI_DYNAMIC 1 + #define HMPI_ITERATIVE 2 + #define HMPI_REFINING 3 + + #define HMPI_RECT_INDEX(a, b, c, d, p, q) (a*p*q*q+b*p*q+c*q+d) + #define H(a, b, c, d, p, q) (a*p*q*q+b*p*q+c*q+d) + + typedef double (*HMPI_Lower_bound) ( + int p, + const double *speeds, + int m, + int n + ); + + typedef double (*HMPI_DP_function) ( + int rowsorcolumns, + int rectangles, + int p, + const double *speeds, + double **previous, + int *r + ); + + typedef double (*HMPI_Iterative_function) ( + int p, + const int *w, + const int *h, + const int *trow, + const int *tcol + ); + + typedef double (*HMPI_Refining_function) ( + int p, + const double *speeds, + int m, + int n, + const int *oldw, + const int *oldh, + const int *oldtrow, + const int *oldtcol, + const int *neww, + const int *newh, + const int *newtrow, + const int *newtcol + ); + + int HMPI_Partition_matrix_2d( + int p, + int q, + int pn, + const double *speeds, + const int *psizes, + const int *mlimits, + int m, + int n, + int type_of_distribution, + int *w, + int *h, + int *trow, + int *tcol, + int *ci, + int *cj + ); + + int HMPI_Partition_matrix_1d( + int p, + int pn, + const double *speeds, + const int *psizes, + const int *mlimits, + int m, + int n, + int formulation, + HMPI_Lower_bound lb, + HMPI_DP_function dpf, + HMPI_Iterative_function cf, + HMPI_Refining_function rf, + const int *iw, + const int *ih, + const int *itrow, + const int *itcol, + int type_of_distribution, + int *w, + int *h, + int *trow, + int *tcol, + int *ci + ); + + int HMPI_Partition_matrix_1d_dp( + int p, + int pn, + const double *speeds, + const int *psizes, + const int *mlimits, + int m, + int n, + HMPI_Lower_bound lb, + HMPI_DP_function dpf, + int type_of_distribution, + int *w, + int *h, + int *trow, + int *tcol, + int *ci + ); + + int HMPI_Partition_matrix_1d_iterative( + int p, + int pn, + const double *speeds, + const int *psizes, + const int *mlimits, + int m, + int n, + HMPI_Lower_bound lb, + HMPI_Iterative_function cf, + int *w, + int *h, + int *trow, + int *tcol, + int *ci + ); + + int HMPI_Partition_matrix_1d_refining( + int p, + int pn, + const double *speeds, + const int *psizes, + const int *mlimits, + int m, + int n, + HMPI_Lower_bound lb, + HMPI_Refining_function rf, + int *w, + int *h, + int *trow, + int *tcol, + int *ci + ); + + int HMPI_Get_processor_2d( + int i, + int j, + int p, + int q, + const int *w, + const int *h, + const int *trow, + const int *tcol, + int *ci, + int *cj + ); + + int HMPI_Get_processor_1d ( + int i, + int j, + int p, + const int *w, + const int *h, + const int *trow, + const int *tcol, + int *c + ); + + typedef struct { + int I; + int J; + } HMPI_Processor; + + int HMPI_Get_matrix_processor ( + int r, + int c, + int p, + int q, + const int *w, + const int *h, + const int *trow, + const int *tcol, + HMPI_Processor *root + ); + + int HMPI_Print_rectangle_1d( + int p, + int m, + int n, + const int *w, + const int *h, + const int *trow, + const int *tcol, + const int *ci + ); + + int HMPI_Print_rectangle_2d( + int p, + int q, + int m, + int n, + const int *w, + const int *h, + const int *trow, + const int *tcol, + const int *ci, + const int *cj + ); + + int HMPI_Common_height + ( + int top_row_1, + int bottom_row_1, + int top_row_2, + int bottom_row_2 + ); + + int HMPI_Get_my_width + ( + int i, + int j, + int p, + int q, + const double *speeds, + int type_of_distribution, + int m, + int n + ); + + int HMPI_Get_my_height + ( + int i, + int j, + int p, + int q, + const double *speeds, + int type_of_distribution, + int m, + int n + ); + + int HMPI_Get_diagonal + ( + int i, + int j, + int p, + int q, + const int *w, + const int *h, + const int *trow, + const int *tcol + ); + + int HMPI_Get_my_elements + ( + int m, + int n, + int gm, + int gn, + int i, + int j, + int p, + int q, + const int *w, + const int *h, + const int *trow, + const int *tcol, + int type_of_distribution, + char upper_or_lower + ); + + int HMPI_Get_my_kk_elements + ( + int k, + int m, + int n, + int gm, + int gn, + int i, + int j, + int p, + int q, + const int *w, + const int *h, + const int *trow, + const int *tcol, + int type_of_distribution, + char upper_or_lower + ); + + #endif /* __HMPI_PARTITIONING_MATRICES_HH */ + diff --git a/hdpi/hmpi_partitioning_sets.c b/hdpi/hmpi_partitioning_sets.c new file mode 100644 index 0000000..610e5ef --- /dev/null +++ b/hdpi/hmpi_partitioning_sets.c @@ -0,0 +1,3938 @@ + + /************************************************/ + /* Implementation of Partitioning Interfaces of */ + /* Sets */ + /* */ + /* Revision history */ + /* 20-05-2003 -- Initial version */ + /************************************************/ + + #include + #include + #include + #include + + #include + #include + + /*-----------------------------------------------------*/ + + /* + * All the processors are homogeneous. That is they exhibit + * same speeds. However each processor has an upper bound on + * number of elements it can store. + * This is of complexity O(p*p) + */ + int __HMPI_Homogeneous_distribution_with_mlimits + ( + int p, + int n, + const int* mlimits, + int *np + ) + { + int i, j, rc; + int sum = 0; + int bound_exceeded = 0; + + for (i = 0; i < p; i++) + { + np[i] = n/p; + } + + for (i = 0; i < (n%p); i++) + { + np[i] += 1; + } + + for (i = 0; i < p; i++) + { + if (np[i] > mlimits[i]) + { + np[i] = mlimits[i]; + bound_exceeded = 1; + break; + } + } + + /* + * For all the processors whose upper bounds + * are exceeded, we assign the number of elements + * equal to their upper bounds. However we proceed + * by one processor at the time. TBD + */ + if (bound_exceeded == 0) + { + return HMPI_OK; + } + + { + int *boundsm, *npm; + int nm = n - mlimits[i]; + int ind = 0; + + npm = (int*)malloc( + sizeof(int) + * + (p-1) + ); + + if (npm == NULL) + { + return MPC_ERR_NOMEM; + } + + boundsm = (int*)malloc( + sizeof(int) + * + (p-1) + ); + + if (boundsm == NULL) + { + return MPC_ERR_NOMEM; + } + + for (j = 0; j < p; j++) + { + if (j == i) + { + continue; + } + + npm[ind] = np[j]; + boundsm[ind] = mlimits[j]; + ind++; + } + + rc = __HMPI_Homogeneous_distribution_with_mlimits( + p-1, + nm, + boundsm, + npm + ); + + if (rc != HMPI_OK) + { + return rc; + } + + for (ind = 0, j = 0; j < p; j++) + { + if (j == i) + { + continue; + } + + np[j] = npm[ind]; + ind++; + } + + free(boundsm); + free(npm); + } + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + /* + * This is for ordered sets. + * All the processors are homogeneous. That is they exhibit + * same speeds. However each processor has an upper bound on + * number of elements it can store. The sum of weights in each + * partition should be the same. + */ + int __HMPI_Homogeneous_distribution_with_mlimits_and_weights_ordered_sets + ( + int p, + int n, + const int* mlimits, + const int* w, + int *np + ) + { + int i, j, k, rc; + double sumw = 0; + double sumwcum = 0; + int *Size_of_bin; + double *wallocations; + double sumcum = 0; + + wallocations = (double*)malloc( + sizeof(double) + * + (p+1) + ); + + if (wallocations == NULL) + { + return MPC_ERR_NOMEM; + } + + Size_of_bin = (int*)malloc( + sizeof(int) + * + p + ); + + if (Size_of_bin == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < n; i++) + { + sumw += w[i]; + } + + for (i = 0; i < p; i++) + { + Size_of_bin[i] = sumw/p; + sumwcum += Size_of_bin[i]; + np[i] = 0; + } + + Size_of_bin[0] += sumw - sumwcum; + + wallocations[0] = 0.0; + for (i = 1; i <= p; i++) + { + wallocations[i] = wallocations[i-1] + Size_of_bin[i-1]; + } + + for (i = 0; i < n; i++) + { + sumcum += w[i]; + + for (j = 0; j < p; j++) + { + if ((sumcum > wallocations[j]) + && (sumcum <= wallocations[j+1] + ) + ) + { + if (j == (p-1)) + { + np[j] = n-i; + + free(wallocations); + free(Size_of_bin); + + if (np[j] > mlimits[j]) + { + return HMPI_ERR_PARTITION_SET; + } + + return HMPI_OK; + } + + np[j]++; + + if (np[j] > mlimits[j]) + { + np[j] = mlimits[j]; + + free(wallocations); + free(Size_of_bin); + + return __HMPI_Homogeneous_distribution_with_mlimits_and_weights_ordered_sets( + p-(j+1), + n-i, + mlimits+j+1, + w+i, + np+j+1 + ); + } + } + } + } + + free(wallocations); + free(Size_of_bin); + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int __HMPI_Partition_set_homogeneous + ( + int p, + const int *mlimits, + int n, + const int *w, + int ordering, + int processor_ordering, + int type_of_metric, + User_defined_metric umf, + double *metric, + int *np + ) + { + int i, j, rc; + int sum = 0; + double sumd = 0; + + /* + * Homogeneous distribution for unordered sets. + * + * The following criterion and restriction should + * be satisfied: + * The number of elements in each partition should be + * proportional to the speed of the processor + * owning that partition. + * + * The number of elements in each partition must + * be less than the maximum number of elements a + * processor can hold. + */ + if ((w == NULL) + && (mlimits == NULL) + && (ordering == 0 + ) + ) + { + if (n < p) + { + for (i = 0; i < n; i++) + { + np[i] = 1; + } + + for (i = n; i < p; i++) + { + np[i] = 0; + } + + return HMPI_OK; + } + + for (i = 0; i < p; i++) + { + np[i] = n/p; + } + + for (i = 0; i < (n%p); i++) + { + np[i] += 1; + } + + return HMPI_OK; + } + + /* + * There is an upper bound on the number of elements + * that each processor can store. + */ + if ((w == NULL) + && (mlimits != NULL) + && (ordering == 0 + ) + ) + { + for (i = 0; i < p; i++) + { + sumd += mlimits[i]; + } + + if (sumd < n) + { + printf( + "This problem size %d cannot be solved, " + "sum of upper bounds on the number of elements exceeded\n", + n + ); + + return HMPI_ERR_MLIMITS; + } + + if (sumd == n) + { + for (i = 0; i < p; i++) + { + np[i] = mlimits[i]; + } + + return HMPI_OK; + } + + if (n < p) + { + for (i = 0; i < n; i++) + { + np[i] = 1; + } + + for (i = n; i < p; i++) + { + np[i] = 0; + } + + return HMPI_OK; + } + + rc = __HMPI_Homogeneous_distribution_with_mlimits( + p, + n, + mlimits, + np + ); + + if (rc != HMPI_OK) + { + printf( + "Problems with homogeneous partitioning of set " + "with upper bounds on the number of elements " + "that can be stored by each processor\n" + ); + + return HMPI_ERR_PARTITION_SET; + } + + return HMPI_OK; + } + + /* + * This is a NP-hard problem. + * The set should be split such that the sum + * of the weights in each subset is the same. + * A naive implementation is provided here + * This is of complexity O(n*n). + * Total complexity = O(n*n) + O(n*p) + * ^^^^^^ + * sorting of weights + */ + if ((w != NULL) + && (mlimits == NULL) + && (ordering == 0 + ) + ) + { + int *Size_of_bin, *Current_bin_capacity; + double sumw = 0; + double sumwcum = 0; + int *rearranged_weights; + double *speeds; + int *rearrangedw; + int temp, temp_number, *allocations, *chosen; + + Size_of_bin = (int*)malloc( + sizeof(int) + * + p + ); + + if (Size_of_bin == NULL) + { + return MPC_ERR_NOMEM; + } + + Current_bin_capacity = (int*)malloc( + sizeof(int) + * + p + ); + + if (Current_bin_capacity == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < n; i++) + { + sumw += w[i]; + } + + for (i = 0; i < p; i++) + { + Size_of_bin[i] = sumw/p; + sumwcum += Size_of_bin[i]; + Current_bin_capacity[i] = 0; + } + + Size_of_bin[0] += sumw - sumwcum; + + /* + * We rearrange the element weights + * in descending order. + */ + { + rearranged_weights = (int*)malloc( + sizeof(int) + * + n + ); + + if (rearranged_weights == NULL) + { + return MPC_ERR_NOMEM; + } + + rearrangedw = (int*)malloc( + sizeof(int) + * + n + ); + + if (rearrangedw == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < n; i++) + { + rearrangedw[i] = i; + rearranged_weights[i] = w[i]; + } + + for (i = 0; i < n; i++) + { + for (j = 1; j < n; j++) + { + if (rearranged_weights[j-1] < rearranged_weights[j]) + { + temp = rearranged_weights[j-1]; + rearranged_weights[j-1] = rearranged_weights[j]; + rearranged_weights[j] = temp; + + temp_number = rearrangedw[j-1]; + rearrangedw[j-1] = rearrangedw[j]; + rearrangedw[j] = temp_number; + } + } + } + } + + allocations = (int*)malloc( + sizeof(int) + * + n + ); + + if (allocations == NULL) + { + return MPC_ERR_NOMEM; + } + + chosen = (int*)malloc( + sizeof(int) + * + n + ); + + if (chosen == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < n; i++) + { + chosen[i] = 0; + } + + /* + * Fill into the bins until the bins are full + * or until any addition exceeds the capacity + */ + for (i = 0; i < p; i++) + { + for (j = 0; j < n; j++) + { + if (chosen[j] == 1) + { + continue; + } + + if (Current_bin_capacity[i] == Size_of_bin[i]) + { + continue; + } + + if ((Current_bin_capacity[i] + rearranged_weights[j]) > Size_of_bin[i]) + { + continue; + } + + Current_bin_capacity[i] += rearranged_weights[j]; + allocations[j] = i; + chosen[j] = 1; + } + } + + /* + * Fill into the bin that causes the minumum + * waste + */ + for (i = 0; i < n; i++) + { + int temp = INT_MAX; + int optimal_bin; + + if (chosen[i] == 1) + { + continue; + } + + for (j = 0; j < p; j++) + { + int waste = Current_bin_capacity[j] + + + rearranged_weights[i] + - + Size_of_bin[j]; + + if (waste < temp) + { + temp = waste; + optimal_bin = j; + } + } + + Current_bin_capacity[optimal_bin] += rearranged_weights[i]; + allocations[i] = optimal_bin; + chosen[i] = 1; + } + + for (i = 0; i < n; i++) + { + np[rearrangedw[i]] = allocations[i]; + } + + free(rearranged_weights); + free(rearrangedw); + free(allocations); + free(chosen); + + if (metric == NULL) + { + free(Size_of_bin); + free(Current_bin_capacity); + + return HMPI_OK; + } + + speeds = (double*)malloc( + sizeof(double) + * + p + ); + + if (speeds == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < p; i++) + { + speeds[i] = 1.0; + } + + /* + * The ideal sum of weights is given by + * elements of array Size_of_bin and the + * actual sum of weights is calculated for array elements + * of Current_bin_capacity. + */ + switch (type_of_metric) + { + case USER_SPECIFIED: + { + *metric = (*umf)( + p, + speeds, + Current_bin_capacity, + Size_of_bin + ); + } + break; + case SYSTEM_DEFINED: + { + *metric = __HMPI_System_defined_metric( + p, + speeds, + Current_bin_capacity, + Size_of_bin + ); + } + break; + default: + { + return HMPI_ERR_METRIC; + } + break; + } + + free(Size_of_bin); + free(Current_bin_capacity); + free(speeds); + + return HMPI_OK; + } + + /* + * This is an NP-hard problem. + * The set should be split such that the + * sum of the weights in each subset is the same and the + * number of elements assigned to each processor + * must not exceed the upper bound it can store. + * A naive implementation is provided here + */ + if ((w != NULL) + && (mlimits != NULL) + && (ordering == 0 + ) + ) + { + int *Size_of_bin, *Current_bin_capacity; + double *speeds; + int suml = 0; + double sumw = 0; + double sumwcum = 0; + int *rearranged_weights; + int temp, temp_number, temp_mlimit, *allocations, *chosen, *Number_in_bin; + int *rearrangedw; + int *rearranged_mlimits, *rearrangedp; + + for (i = 0; i < p; i++) + { + sumd += mlimits[i]; + } + + if (sumd < n) + { + printf( + "This problem %d size cannot be solved, " + "sum of upper bounds exceeded\n" + , n + ); + + return HMPI_ERR_MLIMITS; + } + + Size_of_bin = (int*)malloc( + sizeof(int) + * + p + ); + + if (Size_of_bin == NULL) + { + return MPC_ERR_NOMEM; + } + + Current_bin_capacity = (int*)malloc( + sizeof(int) + * + p + ); + + if (Current_bin_capacity == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < n; i++) + { + sumw += w[i]; + } + + for (i = 0; i < p; i++) + { + Current_bin_capacity[i] = 0; + Size_of_bin[i] = sumw/p; + sumwcum += Size_of_bin[i]; + } + + Size_of_bin[0] += sumw - sumwcum; + + /* + * We rearrange the element weights + * in descending order. + */ + { + rearranged_weights = (int*)malloc( + sizeof(int) + * + n + ); + + if (rearranged_weights == NULL) + { + return MPC_ERR_NOMEM; + } + + rearrangedw = (int*)malloc( + sizeof(int) + * + n + ); + + if (rearrangedw == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < n; i++) + { + rearrangedw[i] = i; + rearranged_weights[i] = w[i]; + } + + for (i = 0; i < n; i++) + { + for (j = 1; j < n; j++) + { + if (rearranged_weights[j-1] < rearranged_weights[j]) + { + temp = rearranged_weights[j-1]; + rearranged_weights[j-1] = rearranged_weights[j]; + rearranged_weights[j] = temp; + + temp_number = rearrangedw[j-1]; + rearrangedw[j-1] = rearrangedw[j]; + rearrangedw[j] = temp_number; + } + } + } + } + + allocations = (int*)malloc( + sizeof(int) + * + n + ); + + if (allocations == NULL) + { + return MPC_ERR_NOMEM; + } + + speeds = (double*)malloc( + sizeof(double) + * + p + ); + + if (speeds == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < p; i++) + { + speeds[i] = 1.0; + } + + if (sumd == n) + { + int ind = 0; + + /* + * TBD: + * This looks like a NP-hard problem. + * We know the number of elements in each subset + * given by the upper bound. + * The sum of weights of the elements in each + * subset should be the same. + * We provide a naive implementation here. + * This is of complexity O(n*n). + * We arrange the processors in increasing + * order of their upper bounds and we arrange + * the weights in decreasing order. + */ + rearranged_mlimits = (int*)malloc( + sizeof(int) + * + p + ); + + if (rearranged_mlimits == NULL) + { + return MPC_ERR_NOMEM; + } + + rearrangedp = (int*)malloc( + sizeof(int) + * + p + ); + + if (rearrangedp == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < p; i++) + { + rearrangedp[i] = i; + rearranged_mlimits[i] = mlimits[i]; + } + + for (i = 0; i < p; i++) + { + for (j = 1; j < p; j++) + { + if (rearranged_mlimits[j-1] > rearranged_mlimits[j]) + { + temp_number = rearrangedp[j-1]; + rearrangedp[j-1] = rearrangedp[j]; + rearrangedp[j] = temp_number; + + temp_mlimit = rearranged_mlimits[j-1]; + rearranged_mlimits[j-1] = rearranged_mlimits[j]; + rearranged_mlimits[j] = temp_mlimit; + } + } + } + + for (i = 0; i < p; i++) + { + for (j = 0; j < rearranged_mlimits[i]; j++) + { + allocations[ind] = i; + Current_bin_capacity[rearrangedp[i]] += rearranged_weights[ind]; + ind++; + } + } + + for (i = 0; i < n; i++) + { + np[rearrangedw[i]] = rearrangedp[allocations[i]]; + } + + free(rearranged_weights); + free(rearrangedw); + free(rearranged_mlimits); + free(rearrangedp); + free(allocations); + + if (metric == NULL) + { + free(Size_of_bin); + free(Current_bin_capacity); + free(speeds); + + return HMPI_OK; + } + + /* + * The ideal sum of weights is given by + * elements of array Size_of_bin and the + * actual sum of weights is calculated for array elements + * of Current_bin_capacity. + */ + switch (type_of_metric) + { + case USER_SPECIFIED: + { + *metric = (*umf)( + p, + speeds, + Current_bin_capacity, + Size_of_bin + ); + } + break; + case SYSTEM_DEFINED: + { + *metric = __HMPI_System_defined_metric( + p, + speeds, + Current_bin_capacity, + Size_of_bin + ); + } + break; + default: + { + return HMPI_ERR_METRIC; + } + break; + } + + free(Size_of_bin); + free(Current_bin_capacity); + free(speeds); + + return HMPI_OK; + } + + Number_in_bin = (int*)malloc( + sizeof(int) + * + p + ); + + if (Number_in_bin == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < p; i++) + { + Number_in_bin[i] = 0; + } + + chosen = (int*)malloc( + sizeof(int) + * + n + ); + + if (chosen == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < n; i++) + { + chosen[i] = 0; + } + + for (i = 0; i < p; i++) + { + for (j = 0; j < n; j++) + { + if (chosen[j] == 1) + { + continue; + } + + if (Current_bin_capacity[i] == Size_of_bin[i]) + { + continue; + } + + if ((Current_bin_capacity[i] + rearranged_weights[j]) > Size_of_bin[i]) + { + continue; + } + + if ((Number_in_bin[i] + 1) > mlimits[i]) + { + break; + } + + Number_in_bin[i]++; + Current_bin_capacity[i] += rearranged_weights[j]; + allocations[j] = i; + chosen[j] = 1; + } + } + + for (i = 0; i < n; i++) + { + int temp = INT_MAX; + int optimal_bin = -1; + + if (chosen[i] == 1) + { + continue; + } + + for (j = 0; j < p; j++) + { + int waste; + + if (Number_in_bin[j] >= mlimits[j]) + { + continue; + } + + waste = Current_bin_capacity[j] + + + rearranged_weights[i] + - + Size_of_bin[j]; + + if (waste < temp) + { + temp = waste; + optimal_bin = j; + } + } + + Current_bin_capacity[optimal_bin] += rearranged_weights[i]; + allocations[i] = optimal_bin; + chosen[i] = 1; + } + + for (i = 0; i < n; i++) + { + np[rearrangedw[i]] = allocations[i]; + } + + free(rearranged_weights); + free(rearrangedw); + free(allocations); + free(chosen); + free(Number_in_bin); + + if (metric == NULL) + { + free(Size_of_bin); + free(Current_bin_capacity); + free(speeds); + + return HMPI_OK; + } + + /* + * The ideal sum of weights is given by + * elements of array Size_of_bin and the + * actual sum of weights is calculated for + * array elements of Current_bin_capacity. + */ + switch (type_of_metric) + { + case USER_SPECIFIED: + { + *metric = (*umf)( + p, + speeds, + Current_bin_capacity, + Size_of_bin + ); + } + break; + case SYSTEM_DEFINED: + { + *metric = __HMPI_System_defined_metric( + p, + speeds, + Current_bin_capacity, + Size_of_bin + ); + } + break; + default: + { + return HMPI_ERR_METRIC; + } + break; + } + + free(Size_of_bin); + free(Current_bin_capacity); + free(speeds); + + return HMPI_OK; + } + + /* + * Homogeneous distribution for ordered sets + * + * The following criterion and restriction should + * be satisfied: + * The number of elements in each partition should be + * proportional to the speed of the processor + * owning that partition. + * + * The number of elements in each partition must + * be less than the maximum number of elements a + * processor can hold. + * + */ + if ((w == NULL) + && (mlimits == NULL) + && (ordering == 1 + ) + ) + { + if (n < p) + { + np[0] = 0; + + for (i = 1; i <= n; i++) + { + np[i] = np[i-1] + 1; + } + + for (i = n+1; i <= p; i++) + { + np[i] = np[n]; + } + + return HMPI_OK; + } + + np[0] = 0; + np[1] = n/p + n%p; + + for (i = 2; i <= p; i++) + { + np[i] = np[i-1] + (n/p); + } + + return HMPI_OK; + } + + if ((w == NULL) + && (mlimits != NULL) + && (ordering == 1 + ) + ) + { + int *allocations; + + for (i = 0; i < p; i++) + { + sumd += mlimits[i]; + } + + if (sumd < n) + { + printf( + "This problem size %d cannot be solved, " + "sum of memory bounds exceeded\n", + n + ); + + return HMPI_ERR_MLIMITS; + } + + if (sumd == n) + { + np[0] = 0; + + for (i = 1; i <= p; i++) + { + np[i] = np[i-1] + mlimits[i-1]; + } + + return HMPI_OK; + } + + if (n < p) + { + np[0] = 0; + + for (i = 1; i <= n; i++) + { + np[i] = np[i-1] + 1; + } + + for (i = n+1; i <= p; i++) + { + np[i] = np[n]; + } + + return HMPI_OK; + } + + allocations = (int*)malloc( + sizeof(int) + * + p + ); + + if (allocations == NULL) + { + return MPC_ERR_NOMEM; + } + + rc = __HMPI_Homogeneous_distribution_with_mlimits( + p, + n, + mlimits, + allocations + ); + + if (rc != HMPI_OK) + { + printf( + "Problems with homogeneous partitioning of set " + "with memory bounds on the number of elements " + "that can be stored by each processor\n" + ); + + return HMPI_ERR_PARTITION_SET; + } + + np[0] = 0; + for (i = 1; i <= p; i++) + { + np[i] = np[i-1] + allocations[i-1]; + } + + free(allocations); + + return HMPI_OK; + } + + /* + * The following criterion and restriction should + * be satisfied: + * The sum of weights of the elements in each + * partition should be proportional to the speeda + * of the processor owning that partition. + * + * The number of elements in each partition must + * be less than the maximum number of elements a + * processor can hold. + * + * This case is of complexity O(n*p) + */ + if ((w != NULL) + && (mlimits == NULL) + && (ordering == 1 + ) + ) + { + int *Size_of_bin; + double *wallocations; + int *allocations; + double sumcum = 0; + double sumwcum = 0; + double sumw = 0; + + allocations = (int*)malloc( + sizeof(int) + * + p + ); + + if (allocations == NULL) + { + return MPC_ERR_NOMEM; + } + + wallocations = (double*)malloc( + sizeof(double) + * + (p+1) + ); + + if (wallocations == NULL) + { + return MPC_ERR_NOMEM; + } + + Size_of_bin = (int*)malloc( + sizeof(int) + * + p + ); + + if (Size_of_bin == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < n; i++) + { + sumw += w[i]; + } + + for (i = 0; i < p; i++) + { + Size_of_bin[i] = sumw/p; + sumwcum += Size_of_bin[i]; + allocations[i] = 0; + } + + Size_of_bin[0] += sumw - sumwcum; + + wallocations[0] = 0.0; + for (i = 1; i <= p; i++) + { + wallocations[i] = wallocations[i-1] + Size_of_bin[i-1]; + } + + for (i = 0; i < n; i++) + { + sumcum += w[i]; + + for (j = 0; j < p; j++) + { + if ((sumcum > wallocations[j]) + && (sumcum <= wallocations[j+1] + ) + ) + { + allocations[j]++; + break; + } + } + } + + np[0] = 0; + for (i = 1; i <= p; i++) + { + np[i] = np[i-1] + allocations[i-1]; + } + + free(allocations); + free(wallocations); + free(Size_of_bin); + + return HMPI_OK; + } + + /* + * This looks like a NP-hard problem. + * Processors cannot be reordered. + * We allocate the elements taking into + * account the upper bounds of the processors and + * also the proportionality of the speeds of the + * processors to the sum of weights of the elements. + * It could happen that at the final step, the upper + * upper bound of the final processor is exceeded in + * which case we just allocate the number of elements + * equal to their upper bounds right from the start. + * This is just an approximation. + */ + if ((w != NULL) + && (mlimits != NULL) + && (ordering == 1) + && (processor_ordering == 0 + ) + ) + { + int *Size_of_bin; + int *Current_bin_capacity; + int *allocations; + double sumw = 0; + double sumwcum = 0; + int ind = 0; + double *speeds; + + for (i = 0; i < p; i++) + { + sumd += mlimits[i]; + } + + if (sumd < n) + { + printf( + "This problem size %d cannot be solved, " + "sum of upper bounds exceeded\n", + n + ); + + return HMPI_ERR_MLIMITS; + } + + if (sumd == n) + { + np[0] = 0; + + for (i = 1; i <= p; i++) + { + np[i] = np[i-1] + mlimits[i-1]; + } + + return HMPI_OK; + } + + allocations = (int*)malloc( + sizeof(int) + * + p + ); + + if (allocations == NULL) + { + return MPC_ERR_NOMEM; + } + + rc = __HMPI_Homogeneous_distribution_with_mlimits_and_weights_ordered_sets( + p, + n, + mlimits, + w, + allocations + ); + + if (rc == HMPI_ERR_PARTITION_SET) + { + rc = __HMPI_Homogeneous_distribution_with_mlimits( + p, + n, + mlimits, + allocations + ); + + if (rc != HMPI_OK) + { + printf( + "Problems with homogeneous partitioning of set " + "with memory bounds on the number of elements " + "that can be stored by each processor\n" + ); + + return HMPI_ERR_PARTITION_SET; + } + } + + if (rc != HMPI_OK) + { + return rc; + } + + np[0] = 0; + for (i = 1; i <= p; i++) + { + np[i] = np[i-1] + allocations[i-1]; + } + + if (metric == NULL) + { + free(allocations); + return HMPI_OK; + } + + Size_of_bin = (int*)malloc( + sizeof(int) + * + p + ); + + if (Size_of_bin == NULL) + { + return MPC_ERR_NOMEM; + } + + Current_bin_capacity = (int*)malloc( + sizeof(int) + * + p + ); + + if (Current_bin_capacity == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < n; i++) + { + sumw += w[i]; + } + + for (i = 0; i < p; i++) + { + Current_bin_capacity[i] = 0; + Size_of_bin[i] = sumw/p; + sumwcum += Size_of_bin[i]; + } + + Size_of_bin[0] += sumw - sumwcum; + + for (i = 0; i < p; i++) + { + for (j = 0; j < allocations[i]; j++) + { + Current_bin_capacity[i] += w[ind++]; + } + } + + speeds = (double*)malloc( + sizeof(double) + * + p + ); + + if (speeds == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < p; i++) + { + speeds[i] = 1.0; + } + + /* + * The ideal sum of weights is given by + * elements of array Size_of_bin and the + * actual sum of weights is calculated for array elements + * of Current_bin_capacity. + */ + switch (type_of_metric) + { + case USER_SPECIFIED: + { + *metric = (*umf)( + p, + speeds, + Current_bin_capacity, + Size_of_bin + ); + } + break; + case SYSTEM_DEFINED: + { + *metric = __HMPI_System_defined_metric( + p, + speeds, + Current_bin_capacity, + Size_of_bin + ); + } + break; + default: + { + return HMPI_ERR_METRIC; + } + break; + } + + free(Size_of_bin); + free(Current_bin_capacity); + free(speeds); + free(allocations); + + return HMPI_OK; + } + + /* + * This looks like a NP-hard problem. + * Processors can be reordered. + */ + if ((w != NULL) + && (mlimits != NULL) + && (ordering == 1) + && (processor_ordering == 1 + ) + ) + { + int *Size_of_bin; + int *Current_bin_capacity; + int *allocations; + int ind = 0; + double sumw = 0; + double sumwcum = 0; + int *rearranged_mlimits, *rearrangedp; + int temp; + double *speeds; + + for (i = 0; i < p; i++) + { + sumd += mlimits[i]; + } + + if (sumd < n) + { + printf( + "This problem size %d cannot be solved, " + "sum of upper bounds exceeded\n", + n + ); + + return HMPI_ERR_MLIMITS; + } + + if (sumd == n) + { + for (i = 0; i < p; i++) + { + np[2*i] = i; + } + + for (i = 0; i < p; i++) + { + np[2*i+1] = mlimits[i]; + } + + return HMPI_OK; + } + + rearranged_mlimits = (int*)malloc( + sizeof(int) + * + p + ); + + if (rearranged_mlimits == NULL) + { + return MPC_ERR_NOMEM; + } + + rearrangedp = (int*)malloc( + sizeof(int) + * + p + ); + + if (rearrangedp == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < p; i++) + { + rearrangedp[i] = i; + rearranged_mlimits[i] = mlimits[i]; + } + + for (i = 0; i < p; i++) + { + for (j = 1; j < p; j++) + { + if (rearranged_mlimits[j-1] > rearranged_mlimits[j]) + { + temp = rearrangedp[j-1]; + rearrangedp[j-1] = rearrangedp[j]; + rearrangedp[j] = temp; + + temp = rearranged_mlimits[j-1]; + rearranged_mlimits[j-1] = rearranged_mlimits[j]; + rearranged_mlimits[j] = temp; + } + } + } + + allocations = (int*)malloc( + sizeof(int) + * + p + ); + + if (allocations == NULL) + { + return MPC_ERR_NOMEM; + } + + rc = __HMPI_Homogeneous_distribution_with_mlimits_and_weights_ordered_sets( + p, + n, + rearranged_mlimits, + w, + allocations + ); + + if (rc == HMPI_ERR_PARTITION_SET) + { + rc = __HMPI_Homogeneous_distribution_with_mlimits( + p, + n, + rearranged_mlimits, + allocations + ); + + if (rc != HMPI_OK) + { + printf( + "Problems with homogeneous partitioning of set " + "with memory bounds on the number of elements " + "that can be stored by each processor\n" + ); + + return HMPI_ERR_PARTITION_SET; + } + } + + if (rc != HMPI_OK) + { + return rc; + } + + for (ind = 0, i = 0; i < p; i++) + { + np[ind++] = rearrangedp[i]; + np[ind++] = allocations[i]; + } + + if (metric == NULL) + { + free(allocations); + free(rearranged_mlimits); + free(rearrangedp); + + return HMPI_OK; + } + + Size_of_bin = (int*)malloc( + sizeof(int) + * + p + ); + + if (Size_of_bin == NULL) + { + return MPC_ERR_NOMEM; + } + + Current_bin_capacity = (int*)malloc( + sizeof(int) + * + p + ); + + if (Current_bin_capacity == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < n; i++) + { + sumw += w[i]; + } + + for (i = 0; i < p; i++) + { + Current_bin_capacity[i] = 0; + Size_of_bin[rearrangedp[i]] = sumw/p; + sumwcum += Size_of_bin[i]; + } + + Size_of_bin[rearrangedp[0]] += sumw - sumwcum; + + for (i = 0; i < p; i++) + { + for (j = 0; j < allocations[i]; j++) + { + Current_bin_capacity[i] += w[ind++]; + } + } + + speeds = (double*)malloc( + sizeof(double) + * + p + ); + + if (speeds == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < p; i++) + { + speeds[i] = 1.0; + } + + /* + * The ideal sum of weights is given by + * elements of array Size_of_bin and the + * actual sum of weights is calculated for array elements + * of Current_bin_capacity. + */ + switch (type_of_metric) + { + case USER_SPECIFIED: + { + *metric = (*umf)( + p, + speeds, + Current_bin_capacity, + Size_of_bin + ); + } + break; + case SYSTEM_DEFINED: + { + *metric = __HMPI_System_defined_metric( + p, + speeds, + Current_bin_capacity, + Size_of_bin + ); + } + break; + default: + { + return HMPI_ERR_METRIC; + } + break; + } + + free(Size_of_bin); + free(Current_bin_capacity); + free(speeds); + free(allocations); + free(rearranged_mlimits); + free(rearrangedp); + + return HMPI_OK; + } + + printf("Parameters provided are invalid\n"); + return HMPI_ERR_PARTITION_SET; + } + + /*-----------------------------------------------------*/ + + int __HMPI_Speeds_are_single_numbers_with_mlimits + ( + int p, + const double *speeds, + const int *bounds, + int n, + int *np + ) + { + int i, j, rc; + int bound_exceeded = 0; + int sum = 0; + double sumd = 0; + + for (i = 0; i < p; i++) + { + sumd += bounds[i]; + } + + if (sumd < n) + { + printf( + "The problem size %d cannot be solved because " + "memory bounds on the number of elements " + "that can be stored by each processor are exceeded\n", + n + ); + + return HMPI_ERR_PARTITION_SET; + } + + if (sumd == n) + { + for (i = 0; i < p; i++) + { + np[i] = bounds[i]; + } + + return HMPI_OK; + } + + rc = __HMPI_Number_of_elements_proportional_to_speed( + p, + n, + speeds, + np + ); + + if (rc != HMPI_OK) + { + return rc; + } + + for (i = 0; i < p; i++) + { + if (np[i] > bounds[i]) + { + np[i] = bounds[i]; + bound_exceeded = 1; + break; + } + } + + if (bound_exceeded == 1) + { + int k; + int ind = 0; + int sind = 0; + double *speedsm; + int *npm; + int *boundsm; + int nm = n - bounds[i]; + + speedsm = (double*)malloc( + sizeof(double) + * + (p-1) + ); + + if (speeds == NULL) + { + return MPC_ERR_NOMEM; + } + + npm = (int*)malloc( + sizeof(int) + * + (p-1) + ); + + if (npm == NULL) + { + return MPC_ERR_NOMEM; + } + + boundsm = (int*)malloc( + sizeof(int) + * + (p-1) + ); + + if (boundsm == NULL) + { + return MPC_ERR_NOMEM; + } + + for (j = 0; j < p; j++) + { + if (j == i) + { + continue; + } + + npm[ind] = np[j]; + boundsm[ind] = bounds[j]; + speedsm[sind++] = speeds[j]; + ind++; + } + + rc = __HMPI_Speeds_are_single_numbers_with_mlimits( + p-1, + speedsm, + boundsm, + nm, + npm + ); + + if (rc != HMPI_OK) + { + return rc; + } + + for (ind = 0, j = 0; j < p; j++) + { + if (j == i) + { + continue; + } + + np[j] = npm[ind]; + ind++; + } + + free(speedsm); + free(boundsm); + free(npm); + } + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int HMPI_Partition_set + ( + int p, + int pn, + const double *speeds, + const int *psizes, + const int *mlimits, + int n, + const int *w, + int ordering, + int processor_ordering, + int type_of_metric, + User_defined_metric umf, + double *metric, + int *np + ) + { + int i, j, rc; + + /* + * Check the parameters provided by the user + * If w is not NULL and the set is well ordered, then the + * user has to specify if the implementations can reorder + * the processors before partitioning. + */ + if ((w != NULL) + && (ordering == 1 + ) + ) + { + if ((speeds == NULL) + && (mlimits != NULL + ) + ) + { + if ((processor_ordering != 0) + && (processor_ordering != 1 + ) + ) + { + printf( + "If weights of the elements are not NULL and the set is well ordered, " + "processor reordering has to be 0 or 1\n" + ); + return HMPI_ERR_PARTITION_SET; + } + } + + if (speeds != NULL) + { + if ((processor_ordering != 0) + && (processor_ordering != 1 + ) + ) + { + printf( + "If weights of the elements are not NULL and the set is well ordered, " + "processor reordering has to be 0 or 1\n" + ); + return HMPI_ERR_PARTITION_SET; + } + } + } + + /* + * Distribution of the set amongst processors + * that are homogeneous + */ + if (speeds == NULL) + { + return __HMPI_Partition_set_homogeneous( + p, + mlimits, + n, + w, + ordering, + processor_ordering, + type_of_metric, + umf, + metric, + np + ); + } + + /* + * Heterogeneous distribution for non-ordered sets. + * + * The following criterion and restriction should + * be satisfied: + * The number of elements in each partition should be + * proportional to the speed of the processor + * owning that partition. + * + * The number of elements in each partition must + * be less than the maximum number of elements a + * processor can hold. + * + * Speeds are single numbers, Set elements has + * no weights and no bounds on the number of + * elements that can be stored by each processor + */ + if ((w == NULL) + && (mlimits == NULL) + && (ordering == 0) + && (pn == 1 + ) + ) + { + return __HMPI_Number_of_elements_proportional_to_speed( + p, + n, + speeds, + np + ); + } + + /* + * Speeds are single numbers, set elements have no + * weights and there is a limit on the number of + * elements that can be stored by each processor + */ + if ((w == NULL) + && (mlimits != NULL) + && (ordering == 0) + && (pn == 1 + ) + ) + { + return __HMPI_Speeds_are_single_numbers_with_mlimits( + p, + speeds, + mlimits, + n, + np + ); + } + + /* + * Speeds are functions of problem size, set elements + * have no weights and no bounds on the number of + * elements that can be stored by each processor + */ + if ((w == NULL) + && (mlimits == NULL) + && (ordering == 0) + && (pn > 1 + ) + ) + { + double *speeds_opt = (double*)malloc( + sizeof(double) + * + p + ); + + if (speeds_opt == NULL) + { + return MPC_ERR_NOMEM; + } + + rc = __HMPI_Speed_function_of_problem_size( + p, + pn, + speeds, + psizes, + n, + speeds_opt, + np + ); + + if (rc != HMPI_OK) + { + return rc; + } + + free(speeds_opt); + + return HMPI_OK; + } + + /* + * Speeds are functions of problem size, no weights and there + * is a limit on the number of elements that can + * be stored by each processor + */ + if ((w == NULL) + && (mlimits != NULL) + && (ordering == 0) + && (pn > 1 + ) + ) + { + double *speeds_opt = (double*)malloc( + sizeof(double) + * + p + ); + + if (speeds_opt == NULL) + { + return MPC_ERR_NOMEM; + } + + rc = __HMPI_Speed_function_of_problem_size_with_mlimits( + p, + pn, + speeds, + psizes, + mlimits, + n, + speeds_opt, + np + ); + + if (rc != HMPI_OK) + { + return rc; + } + + free(speeds_opt); + + return HMPI_OK; + } + + /* + * The following criterion and restriction should + * be satisfied: + * The sum of weights of the elements in each + * partition should be proportional to the speeds + * of the processor owning that partition. + * + * The number of elements in each partition must + * be less than the maximum number of elements a + * processor can hold. + * + * Speeds are single numbers, set has weighted weights + * and no bounds on the number of elements + * This is a NP-hard problem. + * A naive implementation is provided here. + * This is of complexity O(n*n) + */ + if ((w != NULL) + && (mlimits == NULL) + && (ordering == 0) + && (pn == 1 + ) + ) + { + double *rearranged_speeds; + int *rearranged_weights; + int *rearrangedp; + int *rearrangedw; + double temp; + int temp_number; + int *allocations; + + rearranged_speeds = (double*)malloc( + sizeof(double) + * + p + ); + + if (rearranged_speeds == NULL) + { + return MPC_ERR_NOMEM; + } + + rearrangedp = (int*)malloc( + sizeof(int) + * + p + ); + + if (rearrangedp == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < p; i++) + { + rearrangedp[i] = i; + rearranged_speeds[i] = speeds[i]; + } + + for (i = 0; i < p; i++) + { + for (j = 1; j < p; j++) + { + if (rearranged_speeds[j-1] < rearranged_speeds[j]) + { + temp = rearranged_speeds[j-1]; + rearranged_speeds[j-1] = rearranged_speeds[j]; + rearranged_speeds[j] = temp; + + temp_number = rearrangedp[j-1]; + rearrangedp[j-1] = rearrangedp[j]; + rearrangedp[j] = temp_number; + } + } + } + + /* + * We rearrange the element weights + * in descending order. + */ + { + rearranged_weights = (int*)malloc( + sizeof(int) + * + n + ); + + if (rearranged_weights == NULL) + { + return MPC_ERR_NOMEM; + } + + rearrangedw = (int*)malloc( + sizeof(int) + * + n + ); + + if (rearrangedw == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < n; i++) + { + rearrangedw[i] = i; + rearranged_weights[i] = w[i]; + } + + for (i = 0; i < n; i++) + { + for (j = 1; j < n; j++) + { + if (rearranged_weights[j-1] < rearranged_weights[j]) + { + temp_number = rearranged_weights[j-1]; + rearranged_weights[j-1] = rearranged_weights[j]; + rearranged_weights[j] = temp_number; + + temp_number = rearrangedw[j-1]; + rearrangedw[j-1] = rearrangedw[j]; + rearrangedw[j] = temp_number; + } + } + } + } + + allocations = (int*)malloc( + sizeof(int) + * + n + ); + + if (allocations == NULL) + { + return MPC_ERR_NOMEM; + } + + rc = __HMPI_Sum_of_weights_for_nonordered_set( + p, + n, + rearranged_speeds, + rearranged_weights, + type_of_metric, + umf, + metric, + allocations + ); + + if (rc != HMPI_OK) + { + return rc; + } + + for (i = 0; i < n; i++) + { + np[rearrangedw[i]] = rearrangedp[allocations[i]]; + } + + free(rearranged_speeds); + free(rearrangedp); + free(rearrangedw); + free(rearranged_weights); + free(allocations); + + return HMPI_OK; + } + + /* + * Speeds are single numbers, set has weighted elements + * and there is a limit on the number of elements that can + * be stored by each processor. + * This is a NP-hard problem. + * A naive implementation is provided here. + */ + if ((w != NULL) + && (mlimits != NULL) + && (ordering == 0) + && (pn == 1 + ) + ) + { + double *rearranged_speeds; + int *rearranged_weights; + int *rearrangedp; + int *rearrangedw; + int *rearranged_mlimits; + double temp; + int temp_number; + int *allocations; + int temp_mlimit; + + rearranged_mlimits = (int*)malloc( + sizeof(int) + * + p + ); + + if (rearranged_mlimits == NULL) + { + return MPC_ERR_NOMEM; + } + + rearranged_speeds = (double*)malloc( + sizeof(double) + * + p + ); + + if (rearranged_speeds == NULL) + { + return MPC_ERR_NOMEM; + } + + rearrangedp = (int*)malloc( + sizeof(int) + * + p + ); + + if (rearrangedp == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < p; i++) + { + rearrangedp[i] = i; + rearranged_speeds[i] = speeds[i]; + rearranged_mlimits[i] = mlimits[i]; + } + + for (i = 0; i < p; i++) + { + for (j = 1; j < p; j++) + { + if (rearranged_speeds[j-1] < rearranged_speeds[j]) + { + temp = rearranged_speeds[j-1]; + rearranged_speeds[j-1] = rearranged_speeds[j]; + rearranged_speeds[j] = temp; + + temp_number = rearrangedp[j-1]; + rearrangedp[j-1] = rearrangedp[j]; + rearrangedp[j] = temp_number; + + temp_mlimit = rearranged_mlimits[j-1]; + rearranged_mlimits[j-1] = rearranged_mlimits[j]; + rearranged_mlimits[j] = temp_mlimit; + } + } + } + + /* + * We rearrange the element weights + * in descending order. + */ + { + rearranged_weights = (int*)malloc( + sizeof(int) + * + n + ); + + if (rearranged_weights == NULL) + { + return MPC_ERR_NOMEM; + } + + rearrangedw = (int*)malloc( + sizeof(int) + * + n + ); + + if (rearrangedw == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < n; i++) + { + rearrangedw[i] = i; + rearranged_weights[i] = w[i]; + } + + for (i = 0; i < n; i++) + { + for (j = 1; j < n; j++) + { + if (rearranged_weights[j-1] < rearranged_weights[j]) + { + temp = rearranged_weights[j-1]; + rearranged_weights[j-1] = rearranged_weights[j]; + rearranged_weights[j] = temp; + + temp_number = rearrangedw[j-1]; + rearrangedw[j-1] = rearrangedw[j]; + rearrangedw[j] = temp_number; + } + } + } + } + + allocations = (int*)malloc( + sizeof(int) + * + n + ); + + if (allocations == NULL) + { + return MPC_ERR_NOMEM; + } + + rc = __HMPI_Apply_mlimits_to_unordered_sum_of_weights ( + p, + n, + rearranged_speeds, + rearranged_mlimits, + rearranged_weights, + type_of_metric, + umf, + metric, + allocations + ); + + if (rc != HMPI_OK) + { + return rc; + } + + for (i = 0; i < n; i++) + { + np[rearrangedw[i]] = rearrangedp[allocations[i]]; + } + + free(rearranged_speeds); + free(rearranged_mlimits); + free(rearranged_weights); + free(rearrangedp); + free(rearrangedw); + free(allocations); + + return HMPI_OK; + } + + /* + * Speeds are functions of problem size + * set with weighted elements and + * no bounds on the number of elements + * No known results + */ + if ((w != NULL) + && (mlimits == NULL) + && (ordering == 0) + && (pn > 1 + ) + ) + { + /* + * Naive implementation + */ + return __HMPI_Sum_of_weights_for_nonordered_set_speed_functions( + p, + pn, + speeds, + psizes, + n, + w, + type_of_metric, + umf, + metric, + np + ); + } + + /* + * Speeds are functions of problem size, + * set with weighted elements and there + * is a limit on the number of elements that can + * be stored by each processor. + * No known results. + */ + if ((w != NULL) + && (mlimits != NULL) + && (ordering == 0) + && (pn > 1 + ) + ) + { + /* + * Naive implementation + */ + return __HMPI_Sum_of_weights_for_nonordered_set_speed_functions_with_mlimits( + p, + pn, + speeds, + psizes, + mlimits, + n, + w, + type_of_metric, + umf, + metric, + np + ); + } + + /* + * Heterogeneous distribution for ordered sets. + * + * The following criterion and restriction should + * be satisfied: + * The number of elements in each partition should be + * proportional to the speed of the processor + * owning that partition. + * + * The number of elements in each partition must + * be less than the maximum number of elements a + * processor can hold. + * + * Speeds are single numbers, no weights and no + * bounds on the number of elements + */ + if ((w == NULL) + && (mlimits == NULL) + && (ordering == 1) + && (pn == 1 + ) + ) + { + int *allocations; + + allocations = (int*)malloc( + sizeof(int) + * + p + ); + + if (allocations == NULL) + { + return MPC_ERR_NOMEM; + } + + rc = __HMPI_Number_of_elements_proportional_to_speed( + p, + n, + speeds, + allocations + ); + + if (rc != HMPI_OK) + { + return rc; + } + + np[0] = 0; + for (i = 1; i <= p; i++) + { + np[i] = np[i-1] + allocations[i-1]; + } + + free(allocations); + + return HMPI_OK; + } + + /* + * Speeds are single numbers, no weights and there + * is a limit on the number of elements that can + * be stored by each processor + */ + if ((w == NULL) + && (mlimits != NULL) + && (ordering == 1) + && (pn == 1 + ) + ) + { + int *allocations; + + allocations = (int*)malloc( + sizeof(int) + * + p + ); + + if (allocations == NULL) + { + return MPC_ERR_NOMEM; + } + + rc = __HMPI_Speeds_are_single_numbers_with_mlimits( + p, + speeds, + mlimits, + n, + allocations + ); + + if (rc != HMPI_OK) + { + return rc; + } + + np[0] = 0; + for (i = 1; i <= p; i++) + { + np[i] = np[i-1] + allocations[i-1]; + } + + free(allocations); + + return HMPI_OK; + } + + /* + * Speeds are functions of problem size + * no weights and no bounds on the number of elements + */ + if ((w == NULL) + && (mlimits == NULL) + && (ordering == 1) + && (pn > 1 + ) + ) + { + double *speeds_opt; + int *allocations; + + allocations = (int*)malloc( + sizeof(int) + * + p + ); + + if (allocations == NULL) + { + return MPC_ERR_NOMEM; + } + + speeds_opt = (double*)malloc( + sizeof(double) + * + p + ); + + if (speeds_opt == NULL) + { + return MPC_ERR_NOMEM; + } + + rc = __HMPI_Speed_function_of_problem_size( + p, + pn, + speeds, + psizes, + n, + speeds_opt, + allocations + ); + + if (rc != HMPI_OK) + { + return rc; + } + + np[0] = 0; + for (i = 1; i <= p; i++) + { + np[i] = np[i-1] + allocations[i-1]; + } + + free(speeds_opt); + free(allocations); + + return HMPI_OK; + } + + /* + * Speeds are functions of problem size, no weights and there + * is a limit on the number of elements that can + * be stored by each processor + */ + if ((w == NULL) + && (mlimits != NULL) + && (ordering == 1) + && (pn > 1 + ) + ) + { + int *allocations; + double *speeds_opt; + + allocations = (int*)malloc( + sizeof(int) + * + p + ); + + if (allocations == NULL) + { + return MPC_ERR_NOMEM; + } + + speeds_opt = (double*)malloc( + sizeof(double) + * + p + ); + + if (speeds_opt == NULL) + { + return MPC_ERR_NOMEM; + } + + rc = __HMPI_Speed_function_of_problem_size_with_mlimits( + p, + pn, + speeds, + psizes, + mlimits, + n, + speeds_opt, + allocations + ); + + if (rc != HMPI_OK) + { + return rc; + } + + np[0] = 0; + for (i = 1; i <= p; i++) + { + np[i] = np[i-1] + allocations[i-1]; + } + + free(speeds_opt); + free(allocations); + + return HMPI_OK; + } + + /* + * The following criterion and restriction should + * be satisfied: + * The sum of weights of the elements in each + * partition should be proportional to the speeda + * of the processor owning that partition. + * + * The number of elements in each partition must + * be less than the maximum number of elements a + * processor can hold. + * + * Speeds are single numbers, set has weighted weights + * and no bounds on the number of elements + * Processors cannot be reordered. + * No known results. + */ + if ((w != NULL) + && (mlimits == NULL) + && (ordering == 1) + && (pn == 1) + && (processor_ordering == 0 + ) + ) + { + int *allocations; + + allocations = (int*)malloc( + sizeof(int) + * + p + ); + + if (allocations == NULL) + { + return MPC_ERR_NOMEM; + } + + rc = __HMPI_Sum_of_weights_for_ordered_set( + p, + n, + speeds, + w, + type_of_metric, + umf, + metric, + allocations + ); + + if (rc != HMPI_OK) + { + return rc; + } + + np[0] = 0; + for (i = 1; i <= p; i++) + { + np[i] = np[i-1] + allocations[i-1]; + } + + free(allocations); + + return HMPI_OK; + } + + /* + * Processors can be reordered. + * We rearrange the processors in decreasing order + * of speeds + * No known results. + * A naive implementation is provided here. + */ + if ((w != NULL) + && (mlimits == NULL) + && (ordering == 1) + && (pn == 1) + && (processor_ordering == 1 + ) + ) + { + double *rearranged_speeds; + int *rearrangedp; + double temp; + int temp_number; + int *allocations; + int ind; + + rearranged_speeds = (double*)malloc( + sizeof(double) + * + p + ); + + if (rearranged_speeds == NULL) + { + return MPC_ERR_NOMEM; + } + + rearrangedp = (int*)malloc( + sizeof(int) + * + p + ); + + if (rearrangedp == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < p; i++) + { + rearrangedp[i] = i; + rearranged_speeds[i] = speeds[i]; + } + + for (i = 0; i < p; i++) + { + for (j = 1; j < p; j++) + { + if (rearranged_speeds[j-1] < rearranged_speeds[j]) + { + temp = rearranged_speeds[j-1]; + rearranged_speeds[j-1] = rearranged_speeds[j]; + rearranged_speeds[j] = temp; + + temp_number = rearrangedp[j-1]; + rearrangedp[j-1] = rearrangedp[j]; + rearrangedp[j] = temp_number; + } + } + } + + allocations = (int*)malloc( + sizeof(int) + * + p + ); + + if (allocations == NULL) + { + return MPC_ERR_NOMEM; + } + + rc = __HMPI_Sum_of_weights_for_ordered_set( + p, + n, + rearranged_speeds, + w, + type_of_metric, + umf, + metric, + allocations + ); + + if (rc != HMPI_OK) + { + return rc; + } + + for (ind = 0, i = 0; i < p; i++) + { + np[ind++] = rearrangedp[i]; + np[ind++] = allocations[i]; + } + + free(rearranged_speeds); + free(rearrangedp); + free(allocations); + + return HMPI_OK; + } + + /* + * Speeds are single numbers, set has weighted elements and there + * is a limit on the number of elements that can + * be stored by each processor + * Processors cannot be reordered. + * No known results. + * A naive implementation is provided here + */ + if ((w != NULL) + && (mlimits != NULL) + && (ordering == 1) + && (pn == 1) + && (processor_ordering == 0 + ) + ) + { + int *allocations; + allocations = (int*)malloc( + sizeof(int) + * + p + ); + + if (allocations == NULL) + { + return MPC_ERR_NOMEM; + } + + rc = __HMPI_Apply_mlimits_to_ordered_sum_of_weights( + p, + n, + speeds, + mlimits, + w, + -1, + NULL, + NULL, + allocations + ); + + if (rc != HMPI_OK) + { + return rc; + } + + /* + * Metric is not calculated before. + * Do it now. + */ + if (metric != NULL) + { + int i, ind, j, sumw; + int *Size_of_bin, *Current_bin_capacity; + + Size_of_bin = (int*)malloc( + sizeof(int) + * + p + ); + + if (Size_of_bin == NULL) + { + return MPC_ERR_NOMEM; + } + + rc = __HMPI_Size_of_bins( + p, + n, + speeds, + w, + Size_of_bin, + &sumw + ); + + if (rc != HMPI_OK) + { + return rc; + } + + Current_bin_capacity = (int*)malloc( + sizeof(int) + * + p + ); + + if (Current_bin_capacity == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < p; i++) + { + Current_bin_capacity[i] = 0; + } + + for (i = 0, ind = 0; i < p; i++) + { + for (j = 0; j < allocations[i]; j++) + { + Current_bin_capacity[i] += w[ind++]; + } + } + + /* + * The ideal sum of weights is given by + * elements of array Size_of_bin and the + * actual sum of weights is calculated for array elements + * of Current_bin_capacity. + */ + switch (type_of_metric) + { + case USER_SPECIFIED: + { + *metric = (*umf)( + p, + speeds, + Current_bin_capacity, + Size_of_bin + ); + } + break; + case SYSTEM_DEFINED: + { + *metric = __HMPI_System_defined_metric( + p, + speeds, + Current_bin_capacity, + Size_of_bin + ); + } + break; + default: + { + return HMPI_ERR_METRIC; + } + break; + } + + free(Size_of_bin); + free(Current_bin_capacity); + } + + np[0] = 0; + for (i = 1; i <= p; i++) + { + np[i] = np[i-1] + allocations[i-1]; + } + + free(allocations); + + return HMPI_OK; + } + + /* + * Processors can be reordered. + * No known results. + * A naive implementation is provided here. + */ + if ((w != NULL) + && (mlimits != NULL) + && (ordering == 1) + && (pn == 1) + && (processor_ordering == 1 + ) + ) + { + double *rearranged_speeds; + int *rearrangedp; + int *rearranged_mlimits; + double temp; + int temp_number, temp_mlimit; + int *allocations; + int ind; + + rearranged_mlimits = (int*)malloc( + sizeof(int) + * + p + ); + + if (rearranged_mlimits == NULL) + { + return MPC_ERR_NOMEM; + } + + rearranged_speeds = (double*)malloc( + sizeof(double) + * + p + ); + + if (rearranged_speeds == NULL) + { + return MPC_ERR_NOMEM; + } + + rearrangedp = (int*)malloc( + sizeof(int) + * + p + ); + + if (rearrangedp == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < p; i++) + { + rearrangedp[i] = i; + rearranged_speeds[i] = speeds[i]; + rearranged_mlimits[i] = mlimits[i]; + } + + for (i = 0; i < p; i++) + { + for (j = 1; j < p; j++) + { + if (rearranged_speeds[j-1] < rearranged_speeds[j]) + { + temp = rearranged_speeds[j-1]; + rearranged_speeds[j-1] = rearranged_speeds[j]; + rearranged_speeds[j] = temp; + + temp_number = rearrangedp[j-1]; + rearrangedp[j-1] = rearrangedp[j]; + rearrangedp[j] = temp_number; + + temp_mlimit = rearranged_mlimits[j-1]; + rearranged_mlimits[j-1] = rearranged_mlimits[j]; + rearranged_mlimits[j] = temp_mlimit; + } + } + } + + allocations = (int*)malloc( + sizeof(int) + * + p + ); + + if (allocations == NULL) + { + return MPC_ERR_NOMEM; + } + + rc = __HMPI_Apply_mlimits_to_ordered_sum_of_weights ( + p, + n, + rearranged_speeds, + rearranged_mlimits, + w, + -1, + NULL, + NULL, + allocations + ); + + if (rc != HMPI_OK) + { + return rc; + } + + /* + * Metric is not calculated before. + * Do it now. + */ + if (metric != NULL) + { + int i, ind, j, sumw; + int *Size_of_bin, *Current_bin_capacity; + + Size_of_bin = (int*)malloc( + sizeof(int) + * + p + ); + + if (Size_of_bin == NULL) + { + return MPC_ERR_NOMEM; + } + + rc = __HMPI_Size_of_bins( + p, + n, + rearranged_speeds, + w, + Size_of_bin, + &sumw + ); + + if (rc != HMPI_OK) + { + return rc; + } + + Current_bin_capacity = (int*)malloc( + sizeof(int) + * + p + ); + + if (Current_bin_capacity == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < p; i++) + { + Current_bin_capacity[i] = 0; + } + + for (i = 0, ind = 0; i < p; i++) + { + for (j = 0; j < allocations[i]; j++) + { + Current_bin_capacity[i] += w[ind++]; + } + } + + /* + * The ideal sum of weights is given by + * elements of array Size_of_bin and the + * actual sum of weights is calculated for array elements + * of Current_bin_capacity. + */ + switch (type_of_metric) + { + case USER_SPECIFIED: + { + *metric = (*umf)( + p, + rearranged_speeds, + Current_bin_capacity, + Size_of_bin + ); + } + break; + case SYSTEM_DEFINED: + { + *metric = __HMPI_System_defined_metric( + p, + rearranged_speeds, + Current_bin_capacity, + Size_of_bin + ); + } + break; + default: + { + return HMPI_ERR_METRIC; + } + break; + } + + free(Size_of_bin); + free(Current_bin_capacity); + } + + for (ind = 0, i = 0; i < p; i++) + { + np[ind++] = rearrangedp[i]; + np[ind++] = allocations[i]; + } + + free(rearranged_speeds); + free(rearranged_mlimits); + free(rearrangedp); + free(allocations); + + return HMPI_OK; + } + + /* + * Speeds are functions of problem size + * set with weighted elements and + * no bounds on the number of elements + * Processors cannot be reordered. + * No known results. + */ + if ((w != NULL) + && (mlimits == NULL) + && (ordering == 1) + && (pn > 1) + && (processor_ordering == 0 + ) + ) + { + int *allocations; + allocations = (int*)malloc( + sizeof(int) + * + p + ); + + if (allocations == NULL) + { + return MPC_ERR_NOMEM; + } + + /* + * Naive implementation + */ + rc = __HMPI_Sum_of_weights_for_ordered_set_speed_functions( + p, + pn, + speeds, + psizes, + n, + w, + type_of_metric, + umf, + metric, + allocations + ); + + if (rc != HMPI_OK) + { + return rc; + } + + np[0] = 0; + + for (i = 1; i <= p; i++) + { + np[i] = np[i-1] + allocations[i-1]; + } + + free(allocations); + + return HMPI_OK; + } + + /* + * Processors can be reordered. + * No known results. + */ + if ((w != NULL) + && (mlimits == NULL) + && (ordering == 1) + && (pn > 1) + && (processor_ordering == 1 + ) + ) + { + /* + * Naive implementation + */ + return __HMPI_Sum_of_weights_for_ordered_set_speed_functions_processor_reordering( + p, + pn, + speeds, + psizes, + n, + w, + type_of_metric, + umf, + metric, + np + ); + } + + /* + * Speeds are functions of problem size, + * set with weighted elements and there + * is a limit on the number of elements that can + * be stored by each processor + * Processors cannot be reordered. + * No known results. + */ + if ((w != NULL) + && (mlimits != NULL) + && (ordering == 1) + && (pn > 1) + && (processor_ordering == 0 + ) + ) + { + int *allocations; + + allocations = (int*)malloc( + sizeof(int) + * + p + ); + + if (allocations == NULL) + { + return MPC_ERR_NOMEM; + } + + /* + * Naive implementation + */ + rc = __HMPI_Sum_of_weights_for_ordered_set_speed_functions_with_mlimits( + p, + pn, + speeds, + psizes, + mlimits, + n, + w, + type_of_metric, + umf, + metric, + allocations + ); + + if (rc != HMPI_OK) + { + return rc; + } + + /* + * Metric is not calculated before. + * Do it now. + */ + if (metric != NULL) + { + int i, ind, j, sumw; + int *Size_of_bin, *Current_bin_capacity; + + Size_of_bin = (int*)malloc( + sizeof(int) + * + p + ); + + if (Size_of_bin == NULL) + { + return MPC_ERR_NOMEM; + } + + rc = __HMPI_Size_of_bins( + p, + n, + speeds, + w, + Size_of_bin, + &sumw + ); + + if (rc != HMPI_OK) + { + return rc; + } + + Current_bin_capacity = (int*)malloc( + sizeof(int) + * + p + ); + + if (Current_bin_capacity == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < p; i++) + { + Current_bin_capacity[i] = 0; + } + + for (i = 0, ind = 0; i < p; i++) + { + for (j = 0; j < allocations[i]; j++) + { + Current_bin_capacity[i] += w[ind++]; + } + } + + /* + * The ideal sum of weights is given by + * elements of array Size_of_bin and the + * actual sum of weights is calculated for array elements + * of Current_bin_capacity. + */ + switch (type_of_metric) + { + case USER_SPECIFIED: + { + *metric = (*umf)( + p, + speeds, + Current_bin_capacity, + Size_of_bin + ); + } + break; + case SYSTEM_DEFINED: + { + *metric = __HMPI_System_defined_metric( + p, + speeds, + Current_bin_capacity, + Size_of_bin + ); + } + break; + default: + { + return HMPI_ERR_METRIC; + } + break; + } + + free(Size_of_bin); + free(Current_bin_capacity); + } + + np[0] = 0; + + for (i = 1; i <= p; i++) + { + np[i] = np[i-1] + allocations[i-1]; + } + + free(allocations); + + return HMPI_OK; + } + + /* + * Processors can be reordered. + * No known results. + */ + if ((w != NULL) + && (mlimits != NULL) + && (ordering == 1) + && (pn > 1) + && (processor_ordering == 1 + ) + ) + { + /* + * Naive implementation + */ + return __HMPI_Sum_of_weights_for_ordered_set_speed_functions_processor_reordering_with_mlimits( + p, + pn, + speeds, + psizes, + mlimits, + n, + w, + type_of_metric, + umf, + metric, + np + ); + } + + printf("Parameters provided are invalid\n"); + return HMPI_ERR_PARTITION_SET; + } + + /*-----------------------------------------------------*/ + + int HMPI_Get_set_processor( + int pos, + int n, + int p, + int processor_ordering, + const int *np + ) + { + int i, j; + + if (processor_ordering == 1) + { + int *cumnp = (int*)malloc( + sizeof(int) + * + (p+1) + ); + + if (cumnp == NULL) + { + printf("Can't allocate cumnp in Function HMPI_Get_set_processor\n"); + return MPC_ERR_NOMEM; + } + + cumnp[0] = 0; + + for (i = 1; i <= p; i++) + { + cumnp[i] = np[2*i - 1] + cumnp[i - 1]; + } + + for (i = 0; i < p; i++) + { + if ((pos >= cumnp[i]) + && (pos < cumnp[i+1] + ) + ) + { + free(cumnp); + return np[i]; + } + } + + free(cumnp); + return -1; + } + + { + int *cumnp = (int*)malloc( + sizeof(int) + * + (p+1) + ); + + if (cumnp == NULL) + { + printf("Can't allocate cumnp in Function HMPI_Get_set_processor\n"); + return MPC_ERR_NOMEM; + } + + for (i = 0; i <= p; i++) + { + cumnp[i] = 0; + for (j = 0; j < i; j++) + { + cumnp[i] += np[j]; + } + } + + for (i = 0; i < p; i++) + { + if ((pos >= cumnp[i]) + && (pos < cumnp[i+1] + ) + ) + { + free(cumnp); + return i; + } + } + + free(cumnp); + } + + return -1; + } + + /*-----------------------------------------------------*/ + + int HMPI_Partition_unordered_set ( + int p, + int pn, + const double *speeds, + const int *psizes, + const int *mlimits, + int n, + const int *w, + int type_of_metric, + User_defined_metric umf, + double *metric, + int *np + ) + { + return HMPI_Partition_set( + p, + pn, + speeds, + psizes, + mlimits, + n, + w, + 0, + -1, + type_of_metric, + umf, + metric, + np + ); + } + + /*-----------------------------------------------------*/ + + int HMPI_Partition_ordered_set ( + int p, + int pn, + const double *speeds, + const int *psizes, + const int *mlimits, + int n, + const int *w, + int processor_reordering, + int type_of_metric, + User_defined_metric umf, + double *metric, + int *np + ) + { + return HMPI_Partition_set( + p, + pn, + speeds, + psizes, + mlimits, + n, + w, + 1, + processor_reordering, + type_of_metric, + umf, + metric, + np + ); + } + + /*-----------------------------------------------------*/ + + int + HMPI_Get_my_partition + ( + int i, + int p, + const int *speeds, + int n + ) + { + int ind; + int rc, myd; + double *perf; + int *d = (int*)malloc( + sizeof(int) + * + p + ); + + if (d == NULL) + { + printf("Can't allocate cumnp in Function HMPI_Get_my_partition\n"); + return MPC_ERR_NOMEM; + } + + perf = (double*)malloc( + sizeof(double) + * + p + ); + + if (perf == NULL) + { + printf("Can't allocate perf in Function HMPI_Get_my_partition\n"); + return MPC_ERR_NOMEM; + } + + for (ind = 0; ind < p; ind++) + { + perf[ind] = speeds[ind]; + } + + rc = HMPI_Partition_set( + p, + 1, + perf, + NULL, + NULL, + n, + NULL, + 0, + 0, + -1, + NULL, + NULL, + d + ); + + if (rc != HMPI_OK) + { + printf("Problems partitioning\n"); + return -1; + } + + myd = d[i]; + + free(d); + free(perf); + + return myd; + } + + /*-----------------------------------------------------*/ diff --git a/hdpi/hmpi_partitioning_sets.h b/hdpi/hmpi_partitioning_sets.h new file mode 100644 index 0000000..e847a40 --- /dev/null +++ b/hdpi/hmpi_partitioning_sets.h @@ -0,0 +1,96 @@ + +/************************************************************************* +* * +* Heterogeneous Data Partitioning Interface * +* ========================================= * +* * +* Copyright (c) 2002 Department of Computer Science, * +* University College Dublin. * +* * +* All rights reserved. We assume no responsibility for the use * +* or reliability of our software. * +* * +*************************************************************************/ + + /************************************************/ + /* Partitioning interfaces for sets */ + /* */ + /* Revision history */ + /* 19-05-2003 -- Initial version */ + /************************************************/ + + #ifndef __HMPI_PARTITIONING_SETS_HH + #define __HMPI_PARTITIONING_SETS_HH + + #define USER_SPECIFIED 1 + #define SYSTEM_DEFINED 2 + + typedef double (*User_defined_metric)( + int p, + const double *speeds, + const int *actual, + const int *ideal + ); + + int HMPI_Partition_unordered_set ( + int p, + int pn, + const double *speeds, + const int *psizes, + const int *mlimits, + int n, + const int *w, + int type_of_metric, + User_defined_metric umf, + double *metric, + int *np + ); + + int HMPI_Partition_ordered_set ( + int p, + int pn, + const double *speeds, + const int *psizes, + const int *mlimits, + int n, + const int *w, + int processor_reordering, + int type_of_metric, + User_defined_metric umf, + double *metric, + int *np + ); + + int HMPI_Partition_set( + int p, + int pn, + const double *speeds, + const int *psizes, + const int *mlimits, + int n, + const int *w, + int ordering, + int processor_ordering, + int type_of_metric, + User_defined_metric umf, + double *metric, + int *np + ); + + int HMPI_Get_set_processor( + int i, + int n, + int p, + int processor_ordering, + const int *np + ); + + int HMPI_Get_my_partition( + int i, + int p, + const int *speeds, + int n + ); + + #endif /* __HMPI_PARTITIONING_SETS_HH */ + diff --git a/hdpi/hmpi_partitioning_sets_speed_function_of_problem_size.c b/hdpi/hmpi_partitioning_sets_speed_function_of_problem_size.c new file mode 100644 index 0000000..9fdb3a4 --- /dev/null +++ b/hdpi/hmpi_partitioning_sets_speed_function_of_problem_size.c @@ -0,0 +1,1194 @@ + + /************************************************/ + /* Implementation of Partitioning Interfaces of */ + /* Sets using processor graphs with speed and */ + /* memory */ + /* */ + /* Revision history */ + /* 01-07-2003 -- Initial version */ + /************************************************/ + + #include + #include + #include + #include + #include + + #include + #include + + static int _HMPI_Bisection_count = 0; + static int HMPI_Debug_flag = 0; + + /*-----------------------------------------------------*/ + + int + __HMPI_Distribute_with_single_number_for_speed + ( + int n, + int p, + const double *s, + double *npd + ) + { + int i, left, rc, sum = 0; + int* npp = (int*)malloc( + sizeof(int) + * + p + ); + + if (npp == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < p; i++) + { + sum += npd[i]; + } + + left = n - sum; + + if (left > 0) + { + rc = __HMPI_Number_of_elements_proportional_to_speed( + p, + left, + s, + npp + ); + + if (rc != HMPI_OK) + { + return rc; + } + + for (i = 0; i < p; i++) + { + npd[i] += npp[i]; + } + } + else + { + rc = __HMPI_Number_of_elements_proportional_to_speed( + p, + sum - n, + s, + npp + ); + + if (rc != HMPI_OK) + { + return rc; + } + + for (i = 0; i < p; i++) + { + npd[i] -= npp[i]; + } + } + + free(npp); + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int __HMPI_Recursive_bisection_middle_region + ( + int p, + int pn, + const double *speeds, + const int *psizes, + int n, + double slopei, + double slopef, + double *speeds_opt, + double *npd + ) + { + int i, j, rc; + double sumd; + double slope, slopeii, slopeff; + double slope_1_plus_2; + double *s = (double*)malloc( + sizeof(double) + * + p + ); + + if (s == NULL) + { + return MPC_ERR_NOMEM; + } + + /* + * Initialize npd + */ + for (i = 0; i < p; i++) + { + npd[i] = 0.; + } + + /* + * Now use the straight line with half slope + */ + slope_1_plus_2 = (double)(slopei+slopef) + / + (double)(1 - slopei*slopef); + + if (slope_1_plus_2 > 0) + { + slope = (double)(sqrt(1+pow(slope_1_plus_2, 2)) - 1) + / + (double)slope_1_plus_2; + } + + if (slope_1_plus_2 < 0) + { + slope = (double)(sqrt(1+pow(slope_1_plus_2, 2)) + 1) + / + (double)(-slope_1_plus_2); + } + + if (HMPI_Debug_flag) + { + printf( + "HMPI===> __HMPI_Recursive_bisection_middle_region: Slope is %0.10f, %0.10f, %0.10f\n", + slopei, slopef, slope + ); + } + + for (i = 0; i < p; i++) + { + double slopep; + double interceptp; + double x; + + int intersection_point_found = 0; + + for (j = 0; j < (pn - 1); j++) + { + /* + * Ignore problem sizes of 0 for the moment + */ + if (psizes[i*pn + j] == 0) + { + npd[i] = 0; + speeds_opt[i] = DBL_MAX; + s[i] = DBL_MAX; + continue; + } + + /* + * This is a extra/bad experimental point + * + * If this is the last point, assume a constant function + * for the jump + */ + if (psizes[i*pn + j + 1] == psizes[i*pn + j]) + { + if (j == (pn - 2)) + { + x = speeds[i*pn + j] + / + slope; + + if (x >= 0) + { + npd[i] = x; + speeds_opt[i] = s[i] = speeds[i*pn + j]; + } + + break; + } + + continue; + } + + slopep = (speeds[i*pn + j + 1] - speeds[i*pn + j]) + / + (double)(psizes[i*pn + j + 1] - psizes[i*pn + j]); + + interceptp = speeds[i*pn + j] - slopep*psizes[i*pn + j]; + + x = (double)interceptp + / + (double)(slope - slopep); + + /* + * The x-coordinate of the intersection is negative + * Proceed to the next straight line in the + * functional model + */ + if (x < 0) + { + continue; + } + + /* + * The intersection point lies beyond the end points + * of the straight line, so should we proceed to the next + * iteration? + */ + if ((x < psizes[i*pn + j]) + || (x > psizes[i*pn + j + 1] + ) + ) + { + continue; + } + + intersection_point_found = 1; + npd[i] = x; + speeds_opt[i] = s[i] = x*slope; + break; + } + + if (intersection_point_found == 1) + { + continue; + } + + /* + * Start with a function with the same slope as the first line segment + */ + slopep = (speeds[i*pn + 1] - speeds[i*pn]) + / + (double)(psizes[i*pn + 1] - psizes[i*pn]); + + interceptp = speeds[i*pn] - slopep*psizes[i*pn]; + + x = (double)interceptp + / + (double)(slope - slopep); + + if ((x >= 0) + && (x <= psizes[i*pn] + ) + ) + { + npd[i] = x; + s[i] = x*slope; + speeds_opt[i] = s[i]; + continue; + } + + /* + * Assume a constant function in the beginning + * for a problem size of 0 and the first experimental + * point + */ + if (psizes[i*pn] != 0) + { + x = (double)speeds[i*pn] + / + (double)slope; + + if ((x >= 0) + && (x <= psizes[i*pn] + ) + ) + { + npd[i] = x; + speeds_opt[i] = s[i] = speeds[i*pn]; + continue; + } + } + + /* + * Try the function with the same slope as before in the end + */ + slopep = (speeds[i*pn + pn - 1] - speeds[i*pn + pn - 2]) + / + (double)(psizes[i*pn + pn - 1] - psizes[i*pn + pn - 2]); + interceptp = speeds[i*pn + pn - 2] - slopep*psizes[i*pn + pn - 2]; + x = (double)interceptp + / + (double)(slope - slopep); + + if (x >= 0) + { + npd[i] = x; + speeds_opt[i] = s[i] = x*slope; + continue; + } + + /* + * Now assume a constant function in the end + */ + x = (double)speeds[i*pn + pn - 1] + / + (double)slope; + + if (x >= 0) + { + npd[i] = x; + speeds_opt[i] = s[i] = speeds[i*pn + pn - 1]; + continue; + } + + if (x < 0) + { + printf("HMPI===> __HMPI_Recursive_bisection_middle_region: Panic, no intersection\n"); + } + } + + /* + * If the sum is equal to n, we have a + * perfect fit. + */ + sumd = 0.0; + for (i = 0; i < p; i++) + { + sumd += npd[i]; + } + + if (((floor(sumd)) == n) + || ((ceil(sumd)) == n + ) + ) + { + int sum = 0; + + for (i = 0; i < p; i++) + { + sum += floor(npd[i]); + } + + if (sum == n) + { + return HMPI_OK; + } + + for (i = 0; i < p; i++) + { + if (npd[i] <= 1) + { + continue; + } + + npd[i] = npd[i] + 1; + + sum = 0; + + for (j = 0; j < p; j++) + { + sum += floor(npd[j]); + } + + if (sum == n) + { + break; + } + } + + free(s); + + return HMPI_OK; + } + + if (HMPI_Debug_flag) + { + printf("HMPI===> __HMPI_Recursive_bisection_middle_region: Sum is %0.6f\n", sumd); + } + + /* + * After HMPI_MAX_BISECTION_STEPS steps, we have not + * arrived at a solution. It is known that for bisection, no more + * than 53 iterations are needed to obtain full single precision. + * Distribute the rest of the elements of the set + * using the speeds at the current point. + */ + if (_HMPI_Bisection_count == HMPI_MAX_BISECTION_STEPS) + { + for (i = 0; i < p; i++) + { + speeds_opt[i] = s[i]; + } + + rc = __HMPI_Distribute_with_single_number_for_speed( + n, + p, + s, + npd + ); + + if (rc != HMPI_OK) + { + return rc; + } + + free(s); + + return HMPI_OK; + } + + free(s); + + _HMPI_Bisection_count++; + + if (sumd > n) + { + slopeii = slopei; + slopeff = slope; + + return __HMPI_Recursive_bisection_middle_region( + p, + pn, + speeds, + psizes, + n, + slopeii, + slopeff, + speeds_opt, + npd + ); + } + + slopeii = slope; + slopeff = slopef; + + return __HMPI_Recursive_bisection_middle_region( + p, + pn, + speeds, + psizes, + n, + slopeii, + slopeff, + speeds_opt, + npd + ); + } + + /*-----------------------------------------------------*/ + + int __HMPI_Speed_function_of_problem_size_with_mlimits + ( + int p, + int pn, + const double *speeds, + const int *psizes, + const int *bounds, + int n, + double *speeds_opt, + int *np + ) + { + int i, j, rc; + int bound_exceeded = 0; + double sumd = 0; + + _HMPI_Bisection_count = 0; + + for (i = 0; i < p; i++) + { + sumd += bounds[i]; + } + + if (sumd < n) + { + printf( + "Problem size %d cannot be solved, memory bounds " + "on the number of elements that can be stored by " + "each processor exceeded\n", n); + return HMPI_ERR_PARTITION_SET; + } + + if (sumd == n) + { + for (i = 0; i < p; i++) + { + np[i] = bounds[i]; + } + + return HMPI_OK; + } + + rc = __HMPI_Speed_function_of_problem_size( + p, + pn, + speeds, + psizes, + n, + speeds_opt, + np + ); + + if (rc != HMPI_OK) + { + return rc; + } + + for (i = 0; i < p; i++) + { + if (np[i] > bounds[i]) + { + np[i] = bounds[i]; + + bound_exceeded = 1; + + break; + } + } + + if (bound_exceeded == 1) + { + int k; + int ind = 0; + int sind = 0; + int mind = 0; + int *psizesm; + double *speedsm; + double *speedsm_opt; + int *npm; + int *boundsm; + int nm = n - bounds[i]; + + speedsm = (double*)malloc( + sizeof(double) + * + (p-1) + * + pn + ); + + if (speedsm == NULL) + { + return MPC_ERR_NOMEM; + } + + speedsm_opt = (double*)malloc( + sizeof(double) + * + (p-1) + * + pn + ); + + if (speedsm_opt == NULL) + { + return MPC_ERR_NOMEM; + } + + psizesm = (int*)malloc( + sizeof(int) + * + (p-1) + * + pn + ); + + if (psizesm == NULL) + { + return MPC_ERR_NOMEM; + } + + npm = (int*)malloc( + sizeof(int) + * + (p-1) + ); + + if (npm == NULL) + { + return MPC_ERR_NOMEM; + } + + boundsm = (int*)malloc( + sizeof(int) + * + (p-1) + ); + + if (boundsm == NULL) + { + return MPC_ERR_NOMEM; + } + + for (j = 0; j < p; j++) + { + if (j == i) + { + continue; + } + + npm[ind] = np[j]; + boundsm[ind] = bounds[j]; + ind++; + + for (k = 0; k < pn; k++) + { + speedsm[sind++] = speeds[j*pn + k]; + psizesm[mind++] = psizes[j*pn + k]; + } + } + + rc = __HMPI_Speed_function_of_problem_size_with_mlimits( + p-1, + pn, + speedsm, + psizesm, + boundsm, + nm, + speedsm_opt, + npm + ); + + if (rc != HMPI_OK) + { + return rc; + } + + for (ind = 0, j = 0; j < p; j++) + { + if (j == i) + { + continue; + } + + np[j] = npm[ind]; + speeds_opt[j] = speedsm_opt[ind]; + ind++; + } + + free(speedsm); + free(speedsm_opt); + free(psizesm); + free(boundsm); + free(npm); + } + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int __HMPI_Speed_function_of_problem_size + ( + int p, + int pn, + const double *speeds, + const int *psizes, + int n, + double *speeds_opt, + int *np + ) + { + int istart = p; + int negative_x_intersection = 0; + int rc, ind, i, j; + double temp, sumd; + double slopei, slopef; + double *npd = (double*)malloc( + sizeof(double) + * + p + ); + + if (npd == NULL) + { + return MPC_ERR_NOMEM; + } + + _HMPI_Bisection_count = 0; + + /* + * Initialize npd + */ + for (i = 0; i < p; i++) + { + npd[i] = 0.; + } + + /* + * The functions may start from the problem size of 0 + */ + for (i = 0; i < p; i++) + { + if (psizes[i*pn] != 0) + { + istart = i; + break; + } + } + + if (istart != p) + { + temp = (double)speeds[istart*pn] + / + (double)psizes[istart*pn]; + ind = istart; + for (i = istart+1; i < p; i++) + { + double temps; + + /* + * Ignore the point where the problem size is 0 + */ + if (psizes[i*pn] == 0) + { + continue; + } + + temps = (double)speeds[i*pn] + / + (double)psizes[i*pn]; + + if (temp < temps) + { + temp = temps; + ind = i; + } + } + } + else + { + ind = 0; + } + + /* + * Solve the equations + * y = (maximum slope)*x and + * y0 = c0, y1 = c1, ... for points + * x0, x1, x2, ...,xp-1 + */ + npd[ind] = psizes[ind*pn]; + speeds_opt[ind] = speeds[ind*pn]; + for (i = 0; i < p; i++) + { + double xj; + + if (i == ind) + { + continue; + } + + /* + * Ignore problem sizes of 0 for the moment + */ + if (psizes[i*pn] == 0) + { + npd[i] = 0; + speeds_opt[i] = speeds[i*pn]; + break; + } + + xj = ( + (double)psizes[ind*pn] + / + (double)speeds[ind*pn] + ) + * + speeds[i*pn]; + + npd[i] = xj; + speeds_opt[i] = speeds[i*pn]; + } + + /* + * If the sum is equal to n, we have a + * perfect fit. + */ + sumd = 0.0; + for (i = 0; i < p; i++) + { + sumd += npd[i]; + } + + if (((floor(sumd)) == n) + || ((ceil(sumd)) == n + ) + ) + { + int sum = 0; + + for (i = 0; i < p; i++) + { + sum += floor(npd[i]); + } + + if (sum == n) + { + for (i = 0; i < p; i++) + { + np[i] = floor(npd[i]); + } + + return HMPI_OK; + } + + for (i = 0; i < p; i++) + { + if (npd[i] <= 1) + { + continue; + } + + npd[i] = npd[i] + 1; + + sum = 0; + + for (j = 0; j < p; j++) + { + sum += floor(npd[j]); + } + + if (sum == n) + { + break; + } + } + + for (i = 0; i < p; i++) + { + np[i] = floor(npd[i]); + } + + free(npd); + + return HMPI_OK; + } + + /* + * The intersection points lie before the starting + * experimental point. So use the single number speeds + * This is the only thing we can do at this point. + */ + if (sumd > n) + { + double *s = (double*)malloc( + sizeof(double) + * + p + ); + + if (s == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < p; i++) + { + s[i] = speeds[i*pn]; + } + + rc = __HMPI_Number_of_elements_proportional_to_speed( + p, + n, + s, + np + ); + + if (rc != HMPI_OK) + { + return rc; + } + + free(npd); + free(s); + + return HMPI_OK; + } + + if (istart != p) + { + slopei = (double)speeds[ind*pn] + / + (double)psizes[ind*pn]; + } + else + { + slopei = DBL_MAX; + } + + /* + * Start with a straight line passing + * with minimum slope at last point + * + * Actually this is ridiculous. The + * last point can't have the problem size 0. + * But we take care of this situation as best as we can. + */ + istart = p; + for (i = 0; i < p; i++) + { + if (psizes[i*pn + pn - 1] != 0) + { + istart = i; + break; + } + } + + if (istart != p) + { + temp = (double)speeds[istart*pn + pn - 1] + / + (double)psizes[istart*pn + pn - 1]; + ind = istart; + for (i = istart+1; i < p; i++) + { + double temps = (double)speeds[i*pn + pn - 1] + / + (double)psizes[i*pn + pn - 1]; + if (temp > temps) + { + temp = temps; + ind = i; + } + } + } + else + { + ind = 0; + } + + /* + * Solve the equations for the last point + * y = (minimum slope)*x and for the processors + * y0 = b0*x+c0, y1 = b1*x1+c1, ... for points + * x0, x1, x2, ...,xp-1 + * + * Initialize npd + */ + for (i = 0; i < p; i++) + { + npd[i] = 0.; + } + + npd[ind] = psizes[ind*pn + pn - 1]; + speeds_opt[ind] = speeds[ind*pn + pn - 1]; + for (i = 0; i < p; i++) + { + if (i == ind) + { + continue; + } + + { + double slopep, slopeo; + double interceptp; + double x, y; + + /* + * Ignore problem sizes of 0 for the moment + */ + if (psizes[i*pn + pn - 2] == 0) + { + npd[i] = 0; + speeds_opt[i] = speeds[i*pn + pn - 2]; + continue; + } + + /* + * This is an extra or bad experimental point + */ + if (psizes[i*pn + pn - 1] == psizes[i*pn + pn - 2]) + { + /* + * Replace this jump by constant function + */ + slopeo = speeds_opt[ind] + / + (double)npd[ind]; + + x = speeds[i*pn + pn - 2] + / + slopeo; + + if (x >= 0) + { + npd[i] = x; + speeds_opt[i] = speeds[i*pn + pn - 2]; + } + + if (x < 0) + { + break; + } + + continue; + } + + slopep = (speeds[i*pn + pn - 1] - speeds[i*pn + pn - 2]) + / + (double)(psizes[i*pn + pn - 1] - psizes[i*pn + pn - 2]); + + interceptp = speeds[i*pn + pn - 2] - slopep*psizes[i*pn + pn - 2]; + + slopeo = speeds_opt[ind] + / + (double)npd[ind]; + + x = (double)interceptp + / + (double)(slopeo - slopep); + + /* + * The x-coordinate of the intersection is negative + */ + if (x < 0) + { + negative_x_intersection = 1; + break; + } + + npd[i] = x; + speeds_opt[i] = x*slopeo; + } + } + + if (negative_x_intersection == 0) + { + /* + * If the sum is equal to n, we have a + * perfect fit. + */ + sumd = 0.0; + for (i = 0; i < p; i++) + { + sumd += npd[i]; + } + + if (((floor(sumd)) == n) + || ((ceil(sumd)) == n + ) + ) + { + int sum = 0; + + for (i = 0; i < p; i++) + { + sum += floor(npd[i]); + } + + if (sum == n) + { + for (i = 0; i < p; i++) + { + np[i] = floor(npd[i]); + } + + return HMPI_OK; + } + + for (i = 0; i < p; i++) + { + if (npd[i] <= 1) + { + continue; + } + + npd[i] = npd[i] + 1; + + sum = 0; + + for (j = 0; j < p; j++) + { + sum += floor(npd[j]); + } + + if (sum == n) + { + break; + } + } + + for (i = 0; i < p; i++) + { + np[i] = floor(npd[i]); + } + + free(npd); + + return HMPI_OK; + } + + /* + * The functions constructed are inadequate. More + * experimental points are required. + * Should we alert the user? + */ + if (sumd < n) + { + double *s = (double*)malloc( + sizeof(double) + * + p + ); + + if (s == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < p; i++) + { + s[i] = ( + (double)speeds[ind*pn + pn - 1] + / + (double)npd[ind] + ) + * + npd[i]; + speeds_opt[i] = s[i]; + } + + rc = __HMPI_Number_of_elements_proportional_to_speed( + p, + n, + s, + np + ); + + if (rc != HMPI_OK) + { + return rc; + } + + free(s); + free(npd); + + return HMPI_OK; + } + } + + slopef = (double)speeds[ind*pn + pn - 1] + / + (double)psizes[ind*pn + pn - 1]; + + /* + * Use recursive Bisection to get a perfect fit + */ + rc = __HMPI_Recursive_bisection_middle_region( + p, + pn, + speeds, + psizes, + n, + slopei, + slopef, + speeds_opt, + npd + ); + + if (rc != HMPI_OK) + { + return rc; + } + + for (i = 0; i < p; i++) + { + np[i] = floor(npd[i]); + } + + free(npd); + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + diff --git a/hdpi/hmpi_partitioning_sets_speed_function_of_problem_size_weighted_elements.c b/hdpi/hmpi_partitioning_sets_speed_function_of_problem_size_weighted_elements.c new file mode 100644 index 0000000..17ace6e --- /dev/null +++ b/hdpi/hmpi_partitioning_sets_speed_function_of_problem_size_weighted_elements.c @@ -0,0 +1,2103 @@ + + + /************************************************/ + /* Implementation of Partitioning Interfaces of */ + /* Sets using processor graphs with speed and */ + /* memory. The elements of the set have weights.*/ + /* */ + /* Revision history */ + /* 01-07-2003 -- Initial version */ + /************************************************/ + + #include + #include + #include + #include + + #include + #include + + static int HMPI_Debug_flag = 0; + + /*-----------------------------------------------------*/ + + int __HMPI_Sum_of_weights_for_nonordered_set_speed_functions + ( + int p, + int pn, + const double *speeds, + const int *psizes, + int n, + const int *w, + int type_of_metric, + User_defined_metric umf, + double *metric, + int *np + ) + { + int *rearranged_weights; + int *rearrangedw; + int *allocations; + int sumw = 0; + int i, j, rc; + int *Size_of_bin, *Current_bin_capacity; + double *speeds_opt; + int temp, temp_number; + + for (i = 0; i < n; i++) + { + sumw += w[i]; + } + + Size_of_bin = (int*)malloc( + sizeof(int) + * + p + ); + + if (Size_of_bin == NULL) + { + return MPC_ERR_NOMEM; + } + + speeds_opt = (double*)malloc( + sizeof(double) + * + p + ); + + if (speeds_opt == NULL) + { + return MPC_ERR_NOMEM; + } + + /* + * Assume the application programmer represents + * speeds as function of problem size and + * problem size is measured in terms of the weights + * of the elements. + */ + rc = __HMPI_Speed_function_of_problem_size( + p, + pn, + speeds, + psizes, + sumw, + speeds_opt, + Size_of_bin + ); + + if (rc != HMPI_OK) + { + return rc; + } + + /* + * We rearrange the element weights + * in descending order. + */ + { + rearranged_weights = (int*)malloc( + sizeof(int) + * + n + ); + + if (rearranged_weights == NULL) + { + return MPC_ERR_NOMEM; + } + + rearrangedw = (int*)malloc( + sizeof(int) + * + n + ); + + if (rearrangedw == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < n; i++) + { + rearrangedw[i] = i; + rearranged_weights[i] = w[i]; + } + + for (i = 0; i < n; i++) + { + for (j = 1; j < n; j++) + { + if (rearranged_weights[j-1] < rearranged_weights[j]) + { + temp = rearranged_weights[j-1]; + rearranged_weights[j-1] = rearranged_weights[j]; + rearranged_weights[j] = temp; + + temp_number = rearrangedw[j-1]; + rearrangedw[j-1] = rearrangedw[j]; + rearrangedw[j] = temp_number; + } + } + } + } + + allocations = (int*)malloc( + sizeof(int) + * + n + ); + + if (allocations == NULL) + { + return MPC_ERR_NOMEM; + } + + Current_bin_capacity = (int*)malloc( + sizeof(int) + * + p + ); + + if (Current_bin_capacity == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < p; i++) + { + Current_bin_capacity[i] = 0; + } + + for (i = 0; i < n; i++) + { + int waste = INT_MAX; + int chosen = -1; + + for (j = 0; j < p; j++) + { + if ((Current_bin_capacity[j] + w[i]) <= Size_of_bin[j]) + { + int wastej = ( + Size_of_bin[j] + - + ( + Current_bin_capacity[j] + + + w[i] + ) + ); + + if (wastej < waste) + { + chosen = j; + waste = wastej; + } + } + } + + if (chosen == -1) + { + waste = INT_MAX; + + for (j = 0; j < p; j++) + { + int wastej = fabs( + Size_of_bin[j] + - + ( + Current_bin_capacity[j] + + + w[i] + ) + ); + + if (wastej < waste) + { + chosen = j; + waste = wastej; + } + } + } + + allocations[i] = chosen; + Current_bin_capacity[chosen] += w[i]; + } + + if (metric == NULL) + { + for (i = 0; i < n; i++) + { + np[rearrangedw[i]] = allocations[i]; + } + + free(Size_of_bin); + free(Current_bin_capacity); + free(rearrangedw); + free(rearranged_weights); + free(allocations); + free(speeds_opt); + + return HMPI_OK; + } + + /* + * The ideal sum of weights is given by + * elements of array Size_of_bin and the + * actual sum of weights is calculated for array elements + * of Current_bin_capacity. + */ + switch (type_of_metric) + { + case USER_SPECIFIED: + { + *metric = (*umf)( + p, + speeds_opt, + Current_bin_capacity, + Size_of_bin + ); + } + break; + case SYSTEM_DEFINED: + { + *metric = __HMPI_System_defined_metric( + p, + speeds_opt, + Current_bin_capacity, + Size_of_bin + ); + } + break; + default: + { + return HMPI_ERR_METRIC; + } + break; + } + + for (i = 0; i < n; i++) + { + np[rearrangedw[i]] = allocations[i]; + } + + { + free(Size_of_bin); + free(Current_bin_capacity); + free(rearrangedw); + free(rearranged_weights); + free(allocations); + free(speeds_opt); + } + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int __HMPI_Sum_of_weights_for_nonordered_set_speed_functions_with_mlimits + ( + int p, + int pn, + const double *speeds, + const int *psizes, + const int *mlimits, + int n, + const int *w, + int type_of_metric, + User_defined_metric umf, + double *metric, + int *np + ) + { + int i, j, k, rc; + int *rearranged_weights; + int *rearrangedw; + int temp; + int temp_number, temp_mlimit; + int *allocations; + int *Size_of_bin, *Current_bin_capacity; + int total_limits = 0; + int *Open, *Number_in_bin; + int sumw = 0; + double *speeds_opt; + int *shortlist; + + for (i = 0; i < p; i++) + { + total_limits += mlimits[i]; + } + + if (total_limits < n) + { + printf( + "The number of elements in the set" + " is greater than the sum of numbers of elements" + " the processors can hold or" + " Partitioning cannot be done with the restrictions" + " provided\n" + ); + + return HMPI_ERR_MLIMITS; + } + + for (i = 0; i < n; i++) + { + sumw += w[i]; + } + + Size_of_bin = (int*)malloc( + sizeof(int) + * + p + ); + + if (Size_of_bin == NULL) + { + return MPC_ERR_NOMEM; + } + + speeds_opt = (double*)malloc( + sizeof(double) + * + p + ); + + if (speeds_opt == NULL) + { + return MPC_ERR_NOMEM; + } + + /* + * Assume the application programmer represents + * speeds as function of problem size and + * problem size is measured in terms of the weights + * of the elements. + */ + rc = __HMPI_Speed_function_of_problem_size( + p, + pn, + speeds, + psizes, + sumw, + speeds_opt, + Size_of_bin + ); + + if (rc != HMPI_OK) + { + return rc; + } + + /* + * We rearrange the element weights + * in descending order. + */ + { + rearranged_weights = (int*)malloc( + sizeof(int) + * + n + ); + + if (rearranged_weights == NULL) + { + return MPC_ERR_NOMEM; + } + + rearrangedw = (int*)malloc( + sizeof(int) + * + n + ); + + if (rearrangedw == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < n; i++) + { + rearrangedw[i] = i; + rearranged_weights[i] = w[i]; + } + + for (i = 0; i < n; i++) + { + for (j = 1; j < n; j++) + { + if (rearranged_weights[j-1] < rearranged_weights[j]) + { + temp = rearranged_weights[j-1]; + rearranged_weights[j-1] = rearranged_weights[j]; + rearranged_weights[j] = temp; + + temp_number = rearrangedw[j-1]; + rearrangedw[j-1] = rearrangedw[j]; + rearrangedw[j] = temp_number; + } + } + } + } + + allocations = (int*)malloc( + sizeof(int) + * + n + ); + + if (allocations == NULL) + { + return MPC_ERR_NOMEM; + } + + Current_bin_capacity = (int*)malloc( + sizeof(int) + * + p + ); + + if (Current_bin_capacity == NULL) + { + return MPC_ERR_NOMEM; + } + + if (total_limits == n) + { + int ind = 0; + int *rearranged_mlimits, *rearrangedp; + double *rearranged_speeds_opt; + + rearrangedp = (int*)malloc( + sizeof(int) + * + p + ); + + if (rearrangedp == NULL) + { + return MPC_ERR_NOMEM; + } + + rearranged_mlimits = (int*)malloc( + sizeof(int) + * + p + ); + + if (rearranged_mlimits == NULL) + { + return MPC_ERR_NOMEM; + } + + rearranged_speeds_opt = (double*)malloc( + sizeof(double) + * + p + ); + + if (rearranged_speeds_opt == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < p; i++) + { + rearrangedp[i] = i; + rearranged_speeds_opt[i] = speeds_opt[i]; + rearranged_mlimits[i] = mlimits[i]; + + Current_bin_capacity[i] = 0; + } + + for (i = 0; i < p; i++) + { + for (j = 1; j < p; j++) + { + if (rearranged_mlimits[j-1] > rearranged_mlimits[j]) + { + temp = rearranged_speeds_opt[j-1]; + rearranged_speeds_opt[j-1] = rearranged_speeds_opt[j]; + rearranged_speeds_opt[j] = temp; + + temp_number = rearrangedp[j-1]; + rearrangedp[j-1] = rearrangedp[j]; + rearrangedp[j] = temp_number; + + temp_mlimit = rearranged_mlimits[j-1]; + rearranged_mlimits[j-1] = rearranged_mlimits[j]; + rearranged_mlimits[j] = temp_mlimit; + } + } + } + + /* + * This looks like a NP-hard problem. + * We know the number of elements in each subset + * given by the upper bound. + * We provide a naive implementation here. + * This is of complexity O(n*n). + * We arrange the processors in increasing + * order of their upper bounds and we arrange + * the weights in decreasing order. + */ + for (i = 0; i < p; i++) + { + for (j = 0; j < rearranged_mlimits[i]; j++) + { + allocations[ind] = rearrangedp[i]; + Current_bin_capacity[rearrangedp[i]] += w[ind]; + ind++; + } + } + + for (i = 0; i < n; i++) + { + np[rearrangedw[i]] = allocations[i]; + } + + if (metric == NULL) + { + free(Size_of_bin); + free(Current_bin_capacity); + free(rearranged_weights); + free(rearranged_mlimits); + free(rearrangedw); + free(rearrangedp); + free(allocations); + free(speeds_opt); + free(rearranged_speeds_opt); + + return HMPI_OK; + } + + /* + * The ideal sum of weights is given by + * elements of array Size_of_bin and the + * actual sum of weights is calculated for array elements + * of Current_bin_capacity. + */ + switch (type_of_metric) + { + case USER_SPECIFIED: + { + *metric = (*umf)( + p, + speeds_opt, + Current_bin_capacity, + Size_of_bin + ); + } + break; + case SYSTEM_DEFINED: + { + *metric = __HMPI_System_defined_metric( + p, + speeds_opt, + Current_bin_capacity, + Size_of_bin + ); + } + break; + default: + { + return HMPI_ERR_METRIC; + } + break; + } + + free(Size_of_bin); + free(Current_bin_capacity); + free(rearranged_weights); + free(rearranged_mlimits); + free(rearrangedw); + free(rearrangedp); + free(allocations); + free(speeds_opt); + free(rearranged_speeds_opt); + + return HMPI_OK; + } + + Open = (int*)malloc( + sizeof(int) + * + p + ); + + if (Open == NULL) + { + return MPC_ERR_NOMEM; + } + + Number_in_bin = (int*)malloc( + sizeof(int) + * + p + ); + + if (Number_in_bin == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < p; i++) + { + Current_bin_capacity[i] = 0; + Open[i] = 1; + Number_in_bin[i] = 0; + } + + shortlist = (int*)malloc( + sizeof(int) + * + p + ); + + if (shortlist == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < n; i++) + { + int nslist = 0; + int chosen = -1; + + for (j = 0; j < p; j++) + { + if (((Current_bin_capacity[j] + w[i]) <= Size_of_bin[j]) + && (Open[j] == 1 + ) + ) + { + shortlist[nslist++] = j; + } + } + + if (nslist > 0) + { + int temp = Size_of_bin[shortlist[0]] + - + Current_bin_capacity[shortlist[0]] + ; + chosen = shortlist[0]; + + for (k = 1; k < nslist; k++) + { + int tempk = Size_of_bin[shortlist[k]] + - + Current_bin_capacity[shortlist[k]] + ; + + if ((tempk >= temp) + && (Open[shortlist[k]] == 1 + ) + ) + { + temp = tempk; + chosen = shortlist[k]; + } + } + } + else + { + int waste = INT_MAX; + for (j = 0; j < p; j++) + { + if (Open[j] == 1) + { + int wastej = ( + Current_bin_capacity[j] + + + w[i] + - + Size_of_bin[j] + ); + + if (wastej < waste) + { + chosen = j; + waste = wastej; + } + } + } + } + + if ((Number_in_bin[chosen] + 1) == mlimits[chosen]) + { + allocations[i] = chosen; + Number_in_bin[chosen]++; + Current_bin_capacity[chosen] = Current_bin_capacity[chosen] + + + w[i] + ; + Open[chosen] = 0; + + continue; + } + + if ((Number_in_bin[chosen] + 1) > mlimits[chosen]) + { + printf("HMPI===> __HMPI_Sum_of_weights_for_nonordered_set_speed_functions_with_mlimits:" + " error in code, must not come into this part\n" + ); + + return HMPI_ERR_INTERNAL; + } + + allocations[i] = chosen; + Number_in_bin[chosen]++; + Current_bin_capacity[chosen] = Current_bin_capacity[chosen] + + + w[i] + ; + } + + free(shortlist); + free(Open); + free(Number_in_bin); + + if (metric == NULL) + { + for (i = 0; i < n; i++) + { + np[rearrangedw[i]] = allocations[i]; + } + + free(Size_of_bin); + free(Current_bin_capacity); + free(rearranged_weights); + free(rearrangedw); + free(allocations); + free(speeds_opt); + + return HMPI_OK; + } + + /* + * The ideal sum of weights is given by + * elements of array Size_of_bin and the + * actual sum of weights is calculated for array elements + * of Current_bin_capacity. + */ + if (HMPI_Debug_flag) + { + printf("Speeds opt are: \n"); + + for (i = 0; i < p; i++) + { + printf("%0.1f ", speeds_opt[i]); + } + + printf("\n"); + + printf("Current bin capacities are: \n"); + + for (i = 0; i < p; i++) + { + printf("%d ", Current_bin_capacity[i]); + } + + printf("\n"); + + printf("Sizes of bin are: \n"); + + for (i = 0; i < p; i++) + { + printf("%d ", Size_of_bin[i]); + } + + printf("\n"); + } + + switch (type_of_metric) + { + case USER_SPECIFIED: + { + *metric = (*umf)( + p, + speeds_opt, + Current_bin_capacity, + Size_of_bin + ); + } + break; + case SYSTEM_DEFINED: + { + *metric = __HMPI_System_defined_metric( + p, + speeds_opt, + Current_bin_capacity, + Size_of_bin + ); + } + break; + default: + { + return HMPI_ERR_METRIC; + } + break; + } + + for (i = 0; i < n; i++) + { + np[rearrangedw[i]] = allocations[i]; + } + + free(Size_of_bin); + free(Current_bin_capacity); + free(rearranged_weights); + free(rearrangedw); + free(allocations); + free(speeds_opt); + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int __HMPI_Sum_of_weights_for_ordered_set_speed_functions + ( + int p, + int pn, + const double *speeds, + const int *psizes, + int n, + const int *w, + int type_of_metric, + User_defined_metric umf, + double *metric, + int *np + ) + { + int sumw = 0; + int sumcum = 0; + int i, j, rc, prev_proc; + int *wallocationsc; + int *Size_of_bin, *Current_bin_capacity; + double *speeds_opt; + + for (i = 0; i < n; i++) + { + sumw += w[i]; + } + + Size_of_bin = (int*)malloc( + sizeof(int) + * + p + ); + + if (Size_of_bin == NULL) + { + return MPC_ERR_NOMEM; + } + + speeds_opt = (double*)malloc( + sizeof(double) + * + p + ); + + if (speeds_opt == NULL) + { + return MPC_ERR_NOMEM; + } + + /* + * Assume the application programmer represents + * speeds as function of problem size and + * problem size is measured in terms of the weights + * of the elements. + */ + rc = __HMPI_Speed_function_of_problem_size( + p, + pn, + speeds, + psizes, + sumw, + speeds_opt, + Size_of_bin + ); + + if (rc != HMPI_OK) + { + return rc; + } + + wallocationsc = (int*)malloc( + sizeof(int) + * + (p+1) + ); + + if (wallocationsc == NULL) + { + return MPC_ERR_NOMEM; + } + + wallocationsc[0] = 0; + for (i = 1; i <= p; i++) + { + wallocationsc[i] = wallocationsc[i-1] + Size_of_bin[i-1]; + } + + Current_bin_capacity = (int*)malloc( + sizeof(int) + * + p + ); + + if (Current_bin_capacity == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < p; i++) + { + Current_bin_capacity[i] = 0; + np[i] = 0; + } + + for (i = 0; i < n; i++) + { + prev_proc = 0; + sumcum += w[i]; + + for (j = 0; j < p; j++) + { + int Wastej_1, Wastej; + + if ((sumcum > wallocationsc[j]) + && (sumcum <= wallocationsc[j+1] + ) + ) + { + if (prev_proc == j) + { + np[j]++; + Current_bin_capacity[j] += w[i]; + break; + } + + /* + * The elements preceding the current one + * exactly fit into partition (j-1) + */ + if ((sumcum - w[i]) == wallocationsc[j]) + { + prev_proc = j; + np[j]++; + Current_bin_capacity[j] += w[i]; + break; + } + + /* + * This is a border element. + * The waste is calculated if this element + * goes to j-1 or to j. + */ + Wastej_1 = fabs( + Size_of_bin[j-1] + - + ( + Current_bin_capacity[j-1] + + + w[i] + ) + ); + + Wastej = fabs( ( + sumw - wallocationsc[j] + ) + - + ( + sumw - sumcum + w[i] + ) + ); + + if (Wastej_1 <= Wastej) + { + np[j-1]++; + Current_bin_capacity[j-1] += w[i]; + } + else + { + np[j]++; + Current_bin_capacity[j] += w[i]; + } + } + } + } + + if (metric == NULL) + { + free(wallocationsc); + free(Size_of_bin); + free(Current_bin_capacity); + free(speeds_opt); + + return HMPI_OK; + } + + /* + * The ideal sum of weights is given by + * elements of array Size_of_bin and the + * actual sum of weights is calculated for array elements + * of Current_bin_capacity. + */ + switch (type_of_metric) + { + case USER_SPECIFIED: + { + *metric = (*umf)( + p, + speeds_opt, + Current_bin_capacity, + Size_of_bin + ); + } + break; + case SYSTEM_DEFINED: + { + *metric = __HMPI_System_defined_metric( + p, + speeds_opt, + Current_bin_capacity, + Size_of_bin + ); + } + break; + default: + { + return HMPI_ERR_METRIC; + } + break; + } + + free(wallocationsc); + free(Size_of_bin); + free(Current_bin_capacity); + free(speeds_opt); + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int __HMPI_Sum_of_weights_for_ordered_set_speed_functions_with_mlimits + ( + int p, + int pn, + const double *speeds, + const int *psizes, + const int *mlimits, + int n, + const int *w, + int type_of_metric, + User_defined_metric umf, + double *metric, + int *np + ) + { + int i, j, k, rc, indl; + int total_limits = 0; + int mlimits_apply = 0; + int x, y, l, m, opt_start; + int wastei, sumtmp, wastef; + int sumw; + int *Size_of_bin; + int total_sub_mlimits; + double *speeds_opt; + + for (i = 0; i < p; i++) + { + total_limits += mlimits[i]; + } + + if (total_limits == n) + { + for (i = 0; i < p; i++) + { + np[i] = mlimits[i]; + } + + return HMPI_OK; + } + + if (total_limits < n) + { + printf( + "The number of elements in the set" + " is greater than the sum of numbers of elements" + " the processors can hold or" + " Partitioning cannot be done with the restrictions" + " provided\n" + ); + + return HMPI_ERR_MLIMITS; + } + + for (i = 0; i < n; i++) + { + sumw += w[i]; + } + + Size_of_bin = (int*)malloc( + sizeof(int) + * + p + ); + + if (Size_of_bin == NULL) + { + return MPC_ERR_NOMEM; + } + + speeds_opt = (double*)malloc( + sizeof(double) + * + p + ); + + if (speeds_opt == NULL) + { + return MPC_ERR_NOMEM; + } + + /* + * Assume the application programmer represents + * speeds as function of problem size and + * problem size is measured in terms of the weights + * of the elements. + */ + rc = __HMPI_Speed_function_of_problem_size( + p, + pn, + speeds, + psizes, + sumw, + speeds_opt, + Size_of_bin + ); + + if (rc != HMPI_OK) + { + return rc; + } + + /* + * Assume the application programmer represents + * speeds as function of problem size and + * problem size is measured in terms of the weights + * of the elements. + */ + rc = __HMPI_Sum_of_weights_for_ordered_set_speed_functions( + p, + pn, + speeds, + psizes, + n, + w, + type_of_metric, + umf, + metric, + np + ); + + if (rc != HMPI_OK) + { + return rc; + } + + for (i = 0; i < p; i++) + { + if (np[i] > mlimits[i]) + { + mlimits_apply = 1; + break; + } + } + + if (mlimits_apply == 0) + { + return HMPI_OK; + } + + for (i = 0; i < p; i++) + { + if (np[i] <= mlimits[i]) + { + continue; + } + + /* + * We try to distribute the remaining + * elements to the processors following it + */ + if (i == 0) + { + int reduced_set_size; + np[i] = mlimits[i]; + reduced_set_size = n - np[i]; + + if (HMPI_Debug_flag) + { + printf("HMPI===> __HMPI_Sum_of_weights_for_ordered_set_speed_functions_with_mlimits: mlimits = %d, Reduced set size = %d\n", mlimits[i], reduced_set_size); + } + + free(Size_of_bin); + free(speeds_opt); + + return __HMPI_Sum_of_weights_for_ordered_set_speed_functions_with_mlimits( + p-1, + pn, + (speeds + pn), + (psizes + pn), + (mlimits + 1), + reduced_set_size, + (w + np[i]), + type_of_metric, + umf, + metric, + np + 1 + ); + } + + /* + * If this is the last processor, + * we try to distribute the remaining + * elements to the processors preceding it + */ + if (i == (p - 1)) + { + int reduced_set_size = 0; + + for (j = 0; j < i; j++) + { + reduced_set_size += np[j]; + } + + reduced_set_size += (np[i] - mlimits[i]); + np[i] = mlimits[i]; + + free(Size_of_bin); + free(speeds_opt); + + return __HMPI_Sum_of_weights_for_ordered_set_speed_functions_with_mlimits( + p-1, + pn, + speeds, + psizes, + mlimits, + reduced_set_size, + w, + type_of_metric, + umf, + metric, + np + ); + } + + if (HMPI_Debug_flag) + { + printf( + "HMPI===> " + "__HMPI_Sum_of_weights_for_ordered_set_speed_functions_with_mlimits: " + "Processor %d has upper bound exceeded\n", + i + ); + + printf("HMPI===> Allocations are: \n"); + for (k = 0; k < p; k++) + { + printf("%d ", np[k]); + } + printf("\n"); + + printf("HMPI===> element limits are:\n"); + for (k = 0; k < p; k++) + { + printf("%d ", mlimits[k]); + } + printf("\n"); + } + + for (k = i+1, total_sub_mlimits = 0; k < p; k++) + { + total_sub_mlimits += mlimits[k]; + } + + /* + * Find the maximum subsequence of elements, the number + * of elements being equal to mlimits[i] and packing these + * element into bin i generates least amount of waste + */ + l = 0; + for (k = 0; k < i; k++) + { + l += np[k]; + } + + indl = l; + + do + { + wastei = INT_MAX; + + for (x = indl; x < (indl+np[i]); x++) + { + if (((indl+np[i]) - x) < mlimits[i]) + { + break; + } + + sumtmp = 0; + + for (y = 0; y < mlimits[i]; y++) + { + sumtmp += w[x+y]; + } + + wastef = fabs(sumtmp - Size_of_bin[i]); + + if (HMPI_Debug_flag) + { + printf("x=%d, wastef=%d, Size of bin=%d ", x, wastef, Size_of_bin[i]); + } + + if (wastef < wastei) + { + wastei = wastef; + opt_start = x; + } + } + + if (HMPI_Debug_flag) + { + printf("\n"); + } + + l = opt_start; + m = l + mlimits[i]; + + indl++; + } + while ((n-m) > total_sub_mlimits); + + np[i] = mlimits[i]; + + if (HMPI_Debug_flag) + { + printf( + "HMPI===> " + "__HMPI_Sum_of_weights_for_ordered_set_speed_functions_with_mlimits: " + "Total number of elements=%d," + "Number of elements to be redistributed before=%d," + " elements after the element %d to be redistributed\n", + n, + l, + m + ); + } + + /* + * spread the elements {0, 1, ..., l-1} + * amongst the processors before i + */ + rc = __HMPI_Sum_of_weights_for_ordered_set_speed_functions_with_mlimits( + i, + pn, + speeds, + psizes, + mlimits, + l, + w, + type_of_metric, + umf, + metric, + np + ); + + if (rc != HMPI_OK) + { + return rc; + } + + /* + * spread the elements {m+1, m+2, ..., n-1} + * amongst the processors following i + */ + rc = __HMPI_Sum_of_weights_for_ordered_set_speed_functions_with_mlimits( + p-(i+1), + pn, + (speeds+(i+1)*pn), + (psizes+(i+1)*pn), + (mlimits+i+1), + (n-l-mlimits[i]), + (w+l+mlimits[i]), + type_of_metric, + umf, + metric, + (np+i+1) + ); + + if (rc != HMPI_OK) + { + return rc; + } + + free(Size_of_bin); + free(speeds_opt); + + break; + } + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int __HMPI_Sum_of_weights_for_ordered_set_speed_functions_processor_reordering + ( + int p, + int pn, + const double *speeds, + const int *psizes, + int n, + const int *w, + int type_of_metric, + User_defined_metric umf, + double *metric, + int *np + ) + { + int sumw = 0; + int sumcum = 0; + int ind, i, j, rc, prev_proc; + int *wallocationsc, *allocations; + int *Size_of_bin, *Current_bin_capacity; + double *speeds_opt, *rearranged_speeds_opt; + int *rearranged_size_of_bin; + int *rearrangedp; + int temp, temp_number; + + for (i = 0; i < n; i++) + { + sumw += w[i]; + } + + Size_of_bin = (int*)malloc( + sizeof(int) + * + p + ); + + if (Size_of_bin == NULL) + { + return MPC_ERR_NOMEM; + } + + speeds_opt = (double*)malloc( + sizeof(double) + * + p + ); + + if (speeds_opt == NULL) + { + return MPC_ERR_NOMEM; + } + + rearranged_speeds_opt = (double*)malloc( + sizeof(double) + * + p + ); + + if (rearranged_speeds_opt == NULL) + { + return MPC_ERR_NOMEM; + } + + /* + * Assume the application programmer represents + * speeds as function of problem size and + * problem size is measured in terms of the weights + * of the elements. + */ + rc = __HMPI_Speed_function_of_problem_size( + p, + pn, + speeds, + psizes, + sumw, + speeds_opt, + Size_of_bin + ); + + if (rc != HMPI_OK) + { + return rc; + } + + rearranged_size_of_bin = (int*)malloc( + sizeof(int) + * + p + ); + + if (rearranged_size_of_bin == NULL) + { + return MPC_ERR_NOMEM; + } + + rearrangedp = (int*)malloc( + sizeof(int) + * + p + ); + + if (rearrangedp == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < p; i++) + { + rearrangedp[i] = i; + rearranged_size_of_bin[i] = Size_of_bin[i]; + rearranged_speeds_opt[i] = speeds_opt[i]; + } + + for (i = 0; i < p; i++) + { + for (j = 1; j < p; j++) + { + if (rearranged_size_of_bin[j-1] < rearranged_size_of_bin[j]) + { + temp = rearranged_size_of_bin[j-1]; + rearranged_size_of_bin[j-1] = rearranged_size_of_bin[j]; + rearranged_size_of_bin[j] = temp; + + temp = rearranged_speeds_opt[j-1]; + rearranged_speeds_opt[j-1] = rearranged_speeds_opt[j]; + rearranged_speeds_opt[j] = temp; + + temp_number = rearrangedp[j-1]; + rearrangedp[j-1] = rearrangedp[j]; + rearrangedp[j] = temp_number; + } + } + } + + wallocationsc = (int*)malloc( + sizeof(int) + * + (p+1) + ); + + if (wallocationsc == NULL) + { + return MPC_ERR_NOMEM; + } + + wallocationsc[0] = 0; + for (i = 1; i <= p; i++) + { + wallocationsc[i] = wallocationsc[i-1] + rearranged_size_of_bin[i-1]; + } + + Current_bin_capacity = (int*)malloc( + sizeof(int) + * + p + ); + + if (Current_bin_capacity == NULL) + { + return MPC_ERR_NOMEM; + } + + allocations = (int*)malloc( + sizeof(int) + * + p + ); + + if (allocations == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < p; i++) + { + Current_bin_capacity[i] = 0; + allocations[i] = 0; + } + + for (i = 0; i < n; i++) + { + prev_proc = 0; + sumcum += w[i]; + + for (j = 0; j < p; j++) + { + int Wastej_1, Wastej; + + if ((sumcum > wallocationsc[j]) + && (sumcum <= wallocationsc[j+1] + ) + ) + { + if (prev_proc == j) + { + allocations[j]++; + Current_bin_capacity[j] += w[i]; + break; + } + + /* + * The elements preceding the current one + * exactly fit into partition (j-1) + */ + if ((sumcum - w[i]) == wallocationsc[j]) + { + prev_proc = j; + allocations[j]++; + Current_bin_capacity[j] += w[i]; + break; + } + + /* + * This is a border element. + * The waste is calculated if this element + * goes to j-1 or to j. + */ + Wastej_1 = fabs( + rearranged_size_of_bin[j-1] + - + ( + Current_bin_capacity[j-1] + + + w[i] + ) + ); + + Wastej = fabs( ( + sumw - wallocationsc[j] + ) + - + ( + sumw - sumcum + w[i] + ) + ); + + if (Wastej_1 <= Wastej) + { + allocations[j-1]++; + Current_bin_capacity[j-1] += w[i]; + } + else + { + allocations[j]++; + Current_bin_capacity[j] += w[i]; + } + } + } + } + + if (metric == NULL) + { + for (ind = 0, i = 0; i < p; i++) + { + np[ind++] = rearrangedp[i]; + np[ind++] = allocations[i]; + } + + free(wallocationsc); + free(Size_of_bin); + free(rearranged_size_of_bin); + free(rearrangedp); + free(Current_bin_capacity); + free(speeds_opt); + free(rearranged_speeds_opt); + free(allocations); + + return HMPI_OK; + } + + /* + * The ideal sum of weights is given by + * elements of array Size_of_bin and the + * actual sum of weights is calculated for array elements + * of Current_bin_capacity. + */ + switch (type_of_metric) + { + case USER_SPECIFIED: + { + *metric = (*umf)( + p, + rearranged_speeds_opt, + Current_bin_capacity, + rearranged_size_of_bin + ); + } + break; + case SYSTEM_DEFINED: + { + *metric = __HMPI_System_defined_metric( + p, + rearranged_speeds_opt, + Current_bin_capacity, + rearranged_size_of_bin + ); + } + break; + default: + { + return HMPI_ERR_METRIC; + } + break; + } + + for (ind = 0, i = 0; i < p; i++) + { + np[ind++] = rearrangedp[i]; + np[ind++] = allocations[i]; + } + + free(wallocationsc); + free(Size_of_bin); + free(rearranged_size_of_bin); + free(rearrangedp); + free(Current_bin_capacity); + free(speeds_opt); + free(rearranged_speeds_opt); + free(allocations); + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int __HMPI_Sum_of_weights_for_ordered_set_speed_functions_processor_reordering_with_mlimits + ( + int p, + int pn, + const double *speeds, + const int *psizes, + const int *mlimits, + int n, + const int *w, + int type_of_metric, + User_defined_metric umf, + double *metric, + int *np + ) + { + int sumw = 0; + int *allocations; + int *Size_of_bin, *rearranged_size_of_bin; + double *speeds_opt, *rearranged_speeds_opt; + int *rearrangedp; + int *rearranged_mlimits; + double temp; + int temp_number; + int i, j, rc, ind; + int total_limits = 0; + int mlimits_apply = 0; + + for (i = 0; i < p; i++) + { + total_limits += mlimits[i]; + } + + if (total_limits == n) + { + for (i = 0, ind = 0; i < p; i++) + { + np[ind++] = i; + np[ind++] = mlimits[i]; + } + + return HMPI_OK; + } + + if (total_limits < n) + { + printf( + "The number of elements in the set" + " is greater than the sum of numbers of elements" + " the processors can hold or" + " Partitioning cannot be done with the restrictions" + " provided\n" + ); + + return HMPI_ERR_MLIMITS; + } + + for (i = 0; i < n; i++) + { + sumw += w[i]; + } + + Size_of_bin = (int*)malloc( + sizeof(int) + * + p + ); + + if (Size_of_bin == NULL) + { + return MPC_ERR_NOMEM; + } + + speeds_opt = (double*)malloc( + sizeof(double) + * + p + ); + + if (speeds_opt == NULL) + { + return MPC_ERR_NOMEM; + } + + /* + * Assume the application programmer represents + * speeds as function of problem size and + * problem size is measured in terms of the weights + * of the elements. + */ + rc = __HMPI_Speed_function_of_problem_size( + p, + pn, + speeds, + psizes, + sumw, + speeds_opt, + Size_of_bin + ); + + if (rc != HMPI_OK) + { + return rc; + } + + rearranged_speeds_opt = (double*)malloc( + sizeof(double) + * + p + ); + + if (rearranged_speeds_opt == NULL) + { + return MPC_ERR_NOMEM; + } + + rearranged_size_of_bin = (int*)malloc( + sizeof(int) + * + p + ); + + if (rearranged_size_of_bin == NULL) + { + return MPC_ERR_NOMEM; + } + + rearrangedp = (int*)malloc( + sizeof(int) + * + p + ); + + if (rearrangedp == NULL) + { + return MPC_ERR_NOMEM; + } + + rearranged_mlimits = (int*)malloc( + sizeof(int) + * + p + ); + + if (rearranged_mlimits == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < p; i++) + { + rearrangedp[i] = i; + rearranged_mlimits[i] = mlimits[i]; + rearranged_size_of_bin[i] = Size_of_bin[i]; + rearranged_speeds_opt[i] = speeds_opt[i]; + } + + for (i = 0; i < p; i++) + { + for (j = 1; j < p; j++) + { + if (rearranged_size_of_bin[j-1] < rearranged_size_of_bin[j]) + { + temp = rearranged_size_of_bin[j-1]; + rearranged_size_of_bin[j-1] = rearranged_size_of_bin[j]; + rearranged_size_of_bin[j] = temp; + + temp = rearranged_speeds_opt[j-1]; + rearranged_speeds_opt[j-1] = rearranged_speeds_opt[j]; + rearranged_speeds_opt[j] = temp; + + temp_number = rearrangedp[j-1]; + rearrangedp[j-1] = rearrangedp[j]; + rearrangedp[j] = temp_number; + + temp_number = rearranged_mlimits[j-1]; + rearranged_mlimits[j-1] = rearranged_mlimits[j]; + rearranged_mlimits[j] = temp_number; + } + } + } + + allocations = (int*)malloc( + sizeof(int) + * + p + ); + + if (allocations == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < p; i++) + { + allocations[i] = 0; + } + + if (HMPI_Debug_flag) + { + printf( + "HMPI===> " + "__HMPI_Sum_of_weights_for_ordered_set_speed_functions_processor_reordering_with_mlimits:" + " Optimal speeds are: \n" + ); + + for (i = 0; i < p; i++) + { + printf("%.1f ", rearranged_speeds_opt[i]); + } + + printf("\n"); + } + + rc = __HMPI_Apply_mlimits_to_ordered_sum_of_weights( + p, + n, + rearranged_speeds_opt, + rearranged_mlimits, + w, + -1, + NULL, + NULL, + allocations + ); + + if (rc != HMPI_OK) + { + return rc; + } + + /* + * Calculate the metric + */ + if (metric != NULL) + { + int i, ind, j; + int *Current_bin_capacity = (int*)malloc( + sizeof(int) + * + p + ); + + if (Current_bin_capacity == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < p; i++) + { + Current_bin_capacity[i] = 0; + } + + for (i = 0, ind = 0; i < p; i++) + { + for (j = 0; j < allocations[i]; j++) + { + Current_bin_capacity[i] += w[ind++]; + } + } + + /* + * The ideal sum of weights is given by + * elements of array Size_of_bin and the + * actual sum of weights is calculated for array elements + * of Current_bin_capacity. + */ + switch (type_of_metric) + { + case USER_SPECIFIED: + { + *metric = (*umf)( + p, + rearranged_speeds_opt, + Current_bin_capacity, + rearranged_size_of_bin + ); + } + break; + case SYSTEM_DEFINED: + { + *metric = __HMPI_System_defined_metric( + p, + rearranged_speeds_opt, + Current_bin_capacity, + rearranged_size_of_bin + ); + } + break; + default: + { + return HMPI_ERR_METRIC; + } + break; + } + + free(Current_bin_capacity); + } + + for (ind = 0, i = 0; i < p; i++) + { + np[ind++] = rearrangedp[i]; + np[ind++] = allocations[i]; + } + + free(Size_of_bin); + free(rearranged_size_of_bin); + free(rearrangedp); + free(speeds_opt); + free(rearranged_speeds_opt); + free(rearranged_mlimits); + free(allocations); + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + diff --git a/hdpi/hmpi_partitioning_sets_speed_single_numbers_weighted_elements.c b/hdpi/hmpi_partitioning_sets_speed_single_numbers_weighted_elements.c new file mode 100644 index 0000000..fd3ed55 --- /dev/null +++ b/hdpi/hmpi_partitioning_sets_speed_single_numbers_weighted_elements.c @@ -0,0 +1,1709 @@ + + /************************************************/ + /* Helpers for Partitioning Interfaces of */ + /* */ + /* Revision history */ + /* 20-05-2003 -- Initial version */ + /************************************************/ + + #include + #include + #include + #include + + #include + #include + + static int HMPI_Debug_flag = 0; + + /*-----------------------------------------------------*/ + + int __HMPI_Number_of_elements_proportional_to_speed + ( + int p, + int n, + const double *speeds, + int *allocations + ) + { + int i, j; + int total = 0; + double sum = 0.0; + + for (i = 0; i < p; i++) + { + sum += speeds[i]; + } + + for (i = 0; i < p; i++) + { + allocations[i] = ( + (double)speeds[i] + / + (double)sum + ) + * + n; + } + + for (i = 0; i < p; i++) + { + total += allocations[i]; + } + + if (total == n) + { + return HMPI_OK; + } + + for (i = total; i < n; i++) + { + int optimal_p; + int *revised_allocations; + double *allocation_ratios; + double temp; + + revised_allocations = (int*)malloc( + sizeof(int) + * + p + ); + + if (revised_allocations == NULL) + { + return MPC_ERR_NOMEM; + } + + allocation_ratios = (double*)malloc( + sizeof(double) + * + p + ); + + if (allocation_ratios == NULL) + { + return MPC_ERR_NOMEM; + } + + for (j = 0; j < p; j++) + { + revised_allocations[j] = allocations[j] + 1; + allocation_ratios[j] = (double)revised_allocations[j] + / + (double)speeds[j]; + } + + temp = allocation_ratios[0]; + optimal_p = 0; + for (j = 1; j < p; j++) + { + if (temp > allocation_ratios[j]) + { + temp = allocation_ratios[j]; + optimal_p = j; + } + } + + allocations[optimal_p] = allocations[optimal_p] + 1; + + free(revised_allocations); + free(allocation_ratios); + } + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + double __HMPI_System_defined_metric + ( + int p, + const double *speeds, + const int *actual, + const int *ideal + ) + { + int i; + double metric; + double sumd = 0.0; + + if (HMPI_Debug_flag) + { + printf( + "HMPI===> __HMPI_System_defined_metric: " + "speeds are\n"); + + for (i = 0; i < p; i++) + { + printf("%.1f ", speeds[i]); + } + + printf("\n"); + + printf("HMPI===> __HMPI_System_defined_metric: cumulative sumd = \n"); + } + + for (i = 0; i < p; i++) + { + if ((int)speeds[i] == 0) + { + continue; + } + + sumd += ((actual[i] - ideal[i])*(actual[i] - ideal[i])) + / + speeds[i]; + + if (HMPI_Debug_flag) + { + printf("%.1f ", sumd); + } + } + + if (HMPI_Debug_flag) + { + printf("\n"); + } + + metric = sqrt(sumd); + + return metric; + } + + /*-----------------------------------------------------*/ + + int __HMPI_Size_of_bins + ( + int p, + int n, + const double *speeds, + const int *w, + int *wallocations, + int *tsum + ) + { + int i, j, rc; + double sump = 0.0; + int sumw = 0; + int totalw = 0; + + for (i = 0; i < p; i++) + { + sump += speeds[i]; + } + + for (i = 0; i < n; i++) + { + sumw += w[i]; + } + + *tsum = sumw; + + for (i = 0; i < p; i++) + { + wallocations[i] = ( + (double)speeds[i] + / + (double)sump + ) + * + sumw; + } + + for (i = 0; i < p; i++) + { + totalw += wallocations[i]; + } + + for (i = totalw; i < sumw; i++) + { + int optimal_p; + int *revised_allocations; + double *allocation_ratios; + double temp; + + revised_allocations = (int*)malloc( + sizeof(int) + * + p + ); + + if (revised_allocations == NULL) + { + return MPC_ERR_NOMEM; + } + + allocation_ratios = (double*)malloc( + sizeof(double) + * + p + ); + + if (allocation_ratios == NULL) + { + return MPC_ERR_NOMEM; + } + + for (j = 0; j < p; j++) + { + revised_allocations[j] = wallocations[j] + 1; + allocation_ratios[j] = (double)revised_allocations[j] + / + (double)speeds[j]; + } + + temp = allocation_ratios[0]; + optimal_p = 0; + + for (j = 1; j < p; j++) + { + if (temp > allocation_ratios[j]) + { + temp = allocation_ratios[j]; + optimal_p = j; + } + } + + wallocations[optimal_p] = wallocations[optimal_p] + 1; + + free(revised_allocations); + free(allocation_ratios); + } + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int __HMPI_Sum_of_weights_for_ordered_set + ( + int p, + int n, + const double *speeds, + const int *w, + int type_of_metric, + User_defined_metric umf, + double *metric, + int *np + ) + { + int i, j, rc, prev_proc; + int *Size_of_bin; + int *wallocationsc; + int sumw; + int sumcum = 0; + int *Current_bin_capacity; + + Size_of_bin = (int*)malloc( + sizeof(int) + * + p + ); + + if (Size_of_bin == NULL) + { + return MPC_ERR_NOMEM; + } + + rc = __HMPI_Size_of_bins( + p, + n, + speeds, + w, + Size_of_bin, + &sumw + ); + + if (rc != HMPI_OK) + { + return rc; + } + + if (HMPI_Debug_flag) + { + printf("HMPI===> __HMPI_Sum_of_weights_for_ordered_set: Sizes of bins are: \n"); + printf("HMPI===> "); + + for (i = 0; i < p; i++) + { + printf("%d ", Size_of_bin[i]); + } + + printf("\n"); + } + + wallocationsc = (int*)malloc( + sizeof(int) + * + (p+1) + ); + + if (wallocationsc == NULL) + { + return MPC_ERR_NOMEM; + } + + wallocationsc[0] = 0; + for (i = 1; i <= p; i++) + { + wallocationsc[i] = wallocationsc[i-1] + Size_of_bin[i-1]; + } + + Current_bin_capacity = (int*)malloc( + sizeof(int) + * + p + ); + + if (Current_bin_capacity == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < p; i++) + { + np[i] = 0; + Current_bin_capacity[i] = 0; + } + + for (i = 0; i < n; i++) + { + prev_proc = 0; + sumcum += w[i]; + + for (j = 0; j < p; j++) + { + int Wastej_1, Wastej; + + if ((sumcum > wallocationsc[j]) + && (sumcum <= wallocationsc[j+1] + ) + ) + { + if (prev_proc == j) + { + np[j]++; + Current_bin_capacity[j] += w[i]; + break; + } + + /* + * The elements preceding the current one + * exactly fit into partition (j-1) + */ + if ((sumcum - w[i]) == wallocationsc[j]) + { + prev_proc = j; + np[j]++; + Current_bin_capacity[j] += w[i]; + break; + } + + /* + * This is a border element. + * The waste is calculated if this element + * goes to j-1 or to j. + */ + Wastej_1 = fabs( + Size_of_bin[j-1] + - + ( + Current_bin_capacity[j-1] + + + w[i] + ) + ); + + Wastej = fabs( ( + sumw - wallocationsc[j] + ) + - + ( + sumw - sumcum + w[i] + ) + ); + + if (Wastej_1 <= Wastej) + { + np[j-1]++; + Current_bin_capacity[j-1] += w[i]; + } + else + { + np[j]++; + Current_bin_capacity[j] += w[i]; + } + + break; + } + } + } + + if (metric == NULL) + { + free(wallocationsc); + free(Size_of_bin); + free(Current_bin_capacity); + + return HMPI_OK; + } + + /* + * The ideal sum of weights is given by + * elements of array Size_of_bin and the + * actual sum of weights is calculated for array elements + * of Current_bin_capacity. + */ + switch (type_of_metric) + { + case USER_SPECIFIED: + { + *metric = (*umf)( + p, + speeds, + Current_bin_capacity, + Size_of_bin + ); + } + break; + case SYSTEM_DEFINED: + { + *metric = __HMPI_System_defined_metric( + p, + speeds, + Current_bin_capacity, + Size_of_bin + ); + } + break; + default: + { + return HMPI_ERR_METRIC; + } + break; + } + + free(wallocationsc); + free(Size_of_bin); + free(Current_bin_capacity); + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int __HMPI_Apply_mlimits_to_ordered_sum_of_weights + ( + int p, + int n, + const double *speeds, + const int *mlimits, + const int *w, + int type_of_metric, + User_defined_metric umf, + double *metric, + int *np + ) + { + int i, j, k, rc, indl; + int *Size_of_bin; + int sumw; + int total_limits = 0; + int mlimits_apply = 0; + int x, y, l, m, opt_start; + int total_sub_mlimits; + int wastei, sumtmp, wastef; + + for (i = 0; i < p; i++) + { + total_limits += mlimits[i]; + } + + if (total_limits == n) + { + for (i = 0; i < p; i++) + { + np[i] = mlimits[i]; + } + return HMPI_OK; + } + + if (total_limits < n) + { + printf( + "The number of elements in the set" + " exceed the upper bounds of the processors\n" + ); + + return HMPI_ERR_MLIMITS; + } + + rc = __HMPI_Sum_of_weights_for_ordered_set( + p, + n, + speeds, + w, + type_of_metric, + umf, + metric, + np + ); + + if (rc != HMPI_OK) + { + return rc; + } + + for (i = 0; i < p; i++) + { + if (np[i] > mlimits[i]) + { + mlimits_apply = 1; + break; + } + } + + if (mlimits_apply == 0) + { + return HMPI_OK; + } + + Size_of_bin = (int*)malloc( + sizeof(int) + * + p + ); + + if (Size_of_bin == NULL) + { + return MPC_ERR_NOMEM; + } + + rc = __HMPI_Size_of_bins( + p, + n, + speeds, + w, + Size_of_bin, + &sumw + ); + + if (rc != HMPI_OK) + { + return rc; + } + + for (i = 0; i < p; i++) + { + if (np[i] <= mlimits[i]) + { + continue; + } + + /* + * We try to distribute the remaining + * elements to the processors following it + */ + if (i == 0) + { + int reduced_set_size; + np[i] = mlimits[i]; + reduced_set_size = n - np[i]; + + if (HMPI_Debug_flag) + { + printf("HMPI===> __HMPI_Apply_mlimits_to_ordered_sum_of_weights: mlimits = %d, Reduced set size = %d\n", mlimits[i], reduced_set_size); + } + + free(Size_of_bin); + + return __HMPI_Apply_mlimits_to_ordered_sum_of_weights( + p-1, + reduced_set_size, + (speeds + 1), + (mlimits + 1), + (w + np[i]), + type_of_metric, + umf, + metric, + np + 1 + ); + } + + /* + * If this is the last processor, + * we try to distribute the remaining + * elements to the processors preceding it + */ + if (i == (p - 1)) + { + int reduced_set_size = 0; + + for (j = 0; j < i; j++) + { + reduced_set_size += np[j]; + } + + reduced_set_size += (np[i] - mlimits[i]); + np[i] = mlimits[i]; + + free(Size_of_bin); + + return __HMPI_Apply_mlimits_to_ordered_sum_of_weights( + p-1, + reduced_set_size, + speeds, + mlimits, + w, + type_of_metric, + umf, + metric, + np + ); + } + + if (HMPI_Debug_flag) + { + printf( + "HMPI===> " + "__HMPI_Apply_mlimits_to_ordered_sum_of_weights: " + "Processor %d has upper bound exceeded\n", + i + ); + + printf("HMPI===> Allocations are: \n"); + for (k = 0; k < p; k++) + { + printf("%d ", np[k]); + } + printf("\n"); + + printf("HMPI===> element limits are:\n"); + for (k = 0; k < p; k++) + { + printf("%d ", mlimits[k]); + } + printf("\n"); + } + + for (k = i+1, total_sub_mlimits = 0; k < p; k++) + { + total_sub_mlimits += mlimits[k]; + } + + /* + * Find the maximum subsequence of elements, the number + * of elements being equal to mlimits[i] and packing these + * elements into bin i generates least amount of waste + */ + l = 0; + for (k = 0; k < i; k++) + { + l += np[k]; + } + + indl = l; + + do + { + wastei = INT_MAX; + + for (x = indl; x < (indl+np[i]); x++) + { + if (((indl+np[i]) - x) < mlimits[i]) + { + break; + } + + sumtmp = 0; + + for (y = 0; y < mlimits[i]; y++) + { + sumtmp += w[x+y]; + } + + wastef = fabs(sumtmp - Size_of_bin[i]); + + if (wastef < wastei) + { + wastei = wastef; + opt_start = x; + } + } + + if (HMPI_Debug_flag) + { + printf("x=%d ", x); + } + + l = opt_start; + m = l + mlimits[i]; + + indl++; + } + while ((n-m) > total_sub_mlimits); + + if (HMPI_Debug_flag) + { + printf("\n"); + } + + np[i] = mlimits[i]; + + if (HMPI_Debug_flag) + { + printf( + "HMPI===> " + "__HMPI_Sum_of_weights_for_ordered_set_speed_functions_with_mlimits: " + "Total number of elements=%d," + "Number of elements to be redistributed before=%d," + " elements after the element %d to be redistributed\n", + n, + l, + m + ); + } + + /* + * spread the elements {0, 1, ..., l-1} + * amongst the processors before i + */ + rc = __HMPI_Apply_mlimits_to_ordered_sum_of_weights( + i, + l, + speeds, + mlimits, + w, + type_of_metric, + umf, + metric, + np + ); + + if (rc != HMPI_OK) + { + return rc; + } + + /* + * spread the elements {m, m+1, ..., n-1} + * amongst the processors following i + */ + rc = __HMPI_Apply_mlimits_to_ordered_sum_of_weights( + p-(i+1), + (n-l-mlimits[i]), + (speeds+i+1), + (mlimits+i+1), + (w+l+mlimits[i]), + type_of_metric, + umf, + metric, + (np+i+1) + ); + + if (rc != HMPI_OK) + { + return rc; + } + + free(Size_of_bin); + + break; + } + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int __HMPI_Sum_of_weights_for_nonordered_set + ( + int p, + int n, + const double *speeds, + const int *w, + int type_of_metric, + User_defined_metric umf, + double *metric, + int *np + ) + { + int i, j, rc; + int sumw; + int *Size_of_bin, *Current_bin_capacity; + + Size_of_bin = (int*)malloc( + sizeof(int) + * + p + ); + + if (Size_of_bin == NULL) + { + return MPC_ERR_NOMEM; + } + + rc = __HMPI_Size_of_bins( + p, + n, + speeds, + w, + Size_of_bin, + &sumw + ); + + if (rc != HMPI_OK) + { + return rc; + } + + Current_bin_capacity = (int*)malloc( + sizeof(int) + * + p + ); + + if (Current_bin_capacity == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < p; i++) + { + Current_bin_capacity[i] = 0; + } + + for (i = 0; i < n; i++) + { + int waste = INT_MAX; + int chosen; + + for (j = 0; j < p; j++) + { + if (Current_bin_capacity[j] == Size_of_bin[j]) + { + continue; + } + + if ((Current_bin_capacity[j] + w[i]) <= Size_of_bin[j]) + { + np[i] = j; + Current_bin_capacity[j] += w[i]; + break; + } + } + + if (j == p) + { + for (j = 0; j < p; j++) + { + int wastej = fabs( + Size_of_bin[j] + - + ( + Current_bin_capacity[j] + + + w[i] + ) + ); + + if (wastej < waste) + { + chosen = j; + waste = wastej; + } + } + + np[i] = chosen; + Current_bin_capacity[chosen] += w[i]; + } + } + + if (metric == NULL) + { + free(Size_of_bin); + free(Current_bin_capacity); + + return HMPI_OK; + } + + /* + * The ideal sum of weights is given by + * elements of array Size_of_bin and the + * actual sum of weights is calculated for array elements + * of Current_bin_capacity. + */ + switch (type_of_metric) + { + case USER_SPECIFIED: + { + *metric = (*umf)( + p, + speeds, + Current_bin_capacity, + Size_of_bin + ); + } + break; + case SYSTEM_DEFINED: + { + *metric = __HMPI_System_defined_metric( + p, + speeds, + Current_bin_capacity, + Size_of_bin + ); + } + break; + default: + { + return HMPI_ERR_METRIC; + } + break; + } + + free(Size_of_bin); + free(Current_bin_capacity); + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int __HMPI_Apply_mlimits_to_unordered_sum_of_weights_algo_2 + ( + int p, + int n, + const double *speeds, + const int *mlimits, + const int *w, + int type_of_metric, + User_defined_metric umf, + double *metric, + int *np + ) + { + int i, j, k, rc; + int sumw; + int total_limits = 0; + int *Size_of_bin, *Current_bin_capacity; + int *Open, *Number_in_bin, *shortlist; + + for (i = 0; i < p; i++) + { + total_limits += mlimits[i]; + } + + if (total_limits < n) + { + printf( + "The number of elements in the set" + " is greater than the sum of numbers of elements" + " the processors can hold or" + " Partitioning cannot be done with the restrictions" + " provided\n" + ); + + return HMPI_ERR_MLIMITS; + } + + Size_of_bin = (int*)malloc( + sizeof(int) + * + p + ); + + if (Size_of_bin == NULL) + { + return MPC_ERR_NOMEM; + } + + rc = __HMPI_Size_of_bins( + p, + n, + speeds, + w, + Size_of_bin, + &sumw + ); + + if (rc != HMPI_OK) + { + return rc; + } + + Current_bin_capacity = (int*)malloc( + sizeof(int) + * + p + ); + + if (Current_bin_capacity == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < p; i++) + { + Current_bin_capacity[i] = 0; + } + + if (total_limits == n) + { + int ind = 0, temp_number, temp_mlimit; + int *rearranged_mlimits; + int *rearrangedp; + + rearranged_mlimits = (int*)malloc( + sizeof(int) + * + p + ); + + if (rearranged_mlimits == NULL) + { + return MPC_ERR_NOMEM; + } + + rearrangedp = (int*)malloc( + sizeof(int) + * + p + ); + + if (rearrangedp == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < p; i++) + { + rearrangedp[i] = i; + rearranged_mlimits[i] = mlimits[i]; + } + + for (i = 0; i < p; i++) + { + for (j = 1; j < p; j++) + { + if (rearranged_mlimits[j-1] > rearranged_mlimits[j]) + { + temp_number = rearrangedp[j-1]; + rearrangedp[j-1] = rearrangedp[j]; + rearrangedp[j] = temp_number; + + temp_mlimit = rearranged_mlimits[j-1]; + rearranged_mlimits[j-1] = rearranged_mlimits[j]; + rearranged_mlimits[j] = temp_mlimit; + } + } + } + + /* + * TBD: + * This looks like a NP-hard problem. + * We know the number of elements in each subset + * given by the upper bound. + * We provide a naive implementation here. + * This is of complexity O(n*n). + * We arrange the processors in increasing + * order of their upper bounds and we arrange + * the weights in decreasing order. + */ + for (i = 0; i < p; i++) + { + for (j = 0; j < rearranged_mlimits[i]; j++) + { + np[ind] = rearrangedp[i]; + Current_bin_capacity[rearrangedp[i]] += w[ind]; + ind++; + } + } + + if (metric == NULL) + { + free(Size_of_bin); + free(Current_bin_capacity); + free(rearranged_mlimits); + free(rearrangedp); + + return HMPI_OK; + } + + /* + * The ideal sum of weights is given by + * elements of array Size_of_bin and the + * actual sum of weights is calculated for array elements + * of Current_bin_capacity. + */ + switch (type_of_metric) + { + case USER_SPECIFIED: + { + *metric = (*umf)( + p, + speeds, + Current_bin_capacity, + Size_of_bin + ); + } + break; + case SYSTEM_DEFINED: + { + *metric = __HMPI_System_defined_metric( + p, + speeds, + Current_bin_capacity, + Size_of_bin + ); + } + break; + default: + { + return HMPI_ERR_METRIC; + } + break; + } + + free(Size_of_bin); + free(Current_bin_capacity); + free(rearranged_mlimits); + free(rearrangedp); + + return HMPI_OK; + } + + Open = (int*)malloc( + sizeof(int) + * + p + ); + + if (Open == NULL) + { + return MPC_ERR_NOMEM; + } + + Number_in_bin = (int*)malloc( + sizeof(int) + * + p + ); + + if (Number_in_bin == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < p; i++) + { + Open[i] = 1; + Number_in_bin[i] = 0; + } + + shortlist = (int*)malloc( + sizeof(int) + * + p + ); + + if (shortlist == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < n; i++) + { + int nslist = 0; + int chosen = -1; + + for (j = 0; j < p; j++) + { + if (Current_bin_capacity[j] == Size_of_bin[j]) + { + continue; + } + + if (((Current_bin_capacity[j] + w[i]) <= Size_of_bin[j]) + && (Open[j] == 1 + ) + ) + { + shortlist[nslist++] = j; + } + } + + if (nslist > 0) + { + int temp = Size_of_bin[shortlist[0]] + - + Current_bin_capacity[shortlist[0]] + ; + chosen = shortlist[0]; + + for (k = 1; k < nslist; k++) + { + int tempk = Size_of_bin[shortlist[k]] + - + Current_bin_capacity[shortlist[k]] + ; + + if ((tempk >= temp) + && (Open[shortlist[k]] == 1 + ) + ) + { + temp = tempk; + chosen = shortlist[k]; + } + } + } + else + { + int waste = INT_MAX; + for (j = 0; j < p; j++) + { + if (Open[j] == 1) + { + int wastej = ( + Current_bin_capacity[j] + + + w[i] + - + Size_of_bin[j] + ); + + if (wastej < waste) + { + chosen = j; + waste = wastej; + } + } + } + } + + np[i] = chosen; + Number_in_bin[chosen]++; + + if (Number_in_bin[chosen] >= mlimits[chosen]) + { + Open[chosen] = 0; + } + + Current_bin_capacity[chosen] = Current_bin_capacity[chosen] + + + w[i] + ; + } + + free(shortlist); + free(Open); + free(Number_in_bin); + + if (metric == NULL) + { + free(Size_of_bin); + free(Current_bin_capacity); + + return HMPI_OK; + } + + /* + * The ideal sum of weights is given by + * elements of array Size_of_bin and the + * actual sum of weights is calculated for array elements + * of Current_bin_capacity. + */ + switch (type_of_metric) + { + case USER_SPECIFIED: + { + *metric = (*umf)( + p, + speeds, + Current_bin_capacity, + Size_of_bin + ); + } + break; + case SYSTEM_DEFINED: + { + *metric = __HMPI_System_defined_metric( + p, + speeds, + Current_bin_capacity, + Size_of_bin + ); + } + break; + default: + { + return HMPI_ERR_METRIC; + } + break; + } + + free(Size_of_bin); + free(Current_bin_capacity); + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int __HMPI_Apply_mlimits_to_unordered_sum_of_weights + ( + int p, + int n, + const double *speeds, + const int *mlimits, + const int *w, + int type_of_metric, + User_defined_metric umf, + double *metric, + int *np + ) + { + int i, j, k, rc; + int sumw; + int total_limits = 0; + int *Size_of_bin, *Current_bin_capacity; + int *Open, *Number_in_bin, *shortlist; + + for (i = 0; i < p; i++) + { + total_limits += mlimits[i]; + } + + if (total_limits < n) + { + printf( + "The number of elements in the set" + " is greater than the sum of numbers of elements" + " the processors can hold or" + " Partitioning cannot be done with the restrictions" + " provided\n" + ); + + return HMPI_ERR_MLIMITS; + } + + Size_of_bin = (int*)malloc( + sizeof(int) + * + p + ); + + if (Size_of_bin == NULL) + { + return MPC_ERR_NOMEM; + } + + rc = __HMPI_Size_of_bins( + p, + n, + speeds, + w, + Size_of_bin, + &sumw + ); + + if (rc != HMPI_OK) + { + return rc; + } + + Current_bin_capacity = (int*)malloc( + sizeof(int) + * + p + ); + + if (Current_bin_capacity == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < p; i++) + { + Current_bin_capacity[i] = 0; + } + + if (total_limits == n) + { + int ind = 0, temp_number, temp_mlimit; + int *rearranged_mlimits; + int *rearrangedp; + + rearranged_mlimits = (int*)malloc( + sizeof(int) + * + p + ); + + if (rearranged_mlimits == NULL) + { + return MPC_ERR_NOMEM; + } + + rearrangedp = (int*)malloc( + sizeof(int) + * + p + ); + + if (rearrangedp == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < p; i++) + { + rearrangedp[i] = i; + rearranged_mlimits[i] = mlimits[i]; + } + + for (i = 0; i < p; i++) + { + for (j = 1; j < p; j++) + { + if (rearranged_mlimits[j-1] > rearranged_mlimits[j]) + { + temp_number = rearrangedp[j-1]; + rearrangedp[j-1] = rearrangedp[j]; + rearrangedp[j] = temp_number; + + temp_mlimit = rearranged_mlimits[j-1]; + rearranged_mlimits[j-1] = rearranged_mlimits[j]; + rearranged_mlimits[j] = temp_mlimit; + } + } + } + + /* + * TBD: + * This looks like a NP-hard problem. + * We know the number of elements in each subset + * given by the upper bound. + * We provide a naive implementation here. + * This is of complexity O(n*n). + * We arrange the processors in increasing + * order of their upper bounds and we arrange + * the weights in decreasing order. + */ + for (i = 0; i < p; i++) + { + for (j = 0; j < rearranged_mlimits[i]; j++) + { + np[ind] = rearrangedp[i]; + Current_bin_capacity[rearrangedp[i]] += w[ind]; + ind++; + } + } + + if (metric == NULL) + { + free(Size_of_bin); + free(Current_bin_capacity); + free(rearranged_mlimits); + free(rearrangedp); + + return HMPI_OK; + } + + /* + * The ideal sum of weights is given by + * elements of array Size_of_bin and the + * actual sum of weights is calculated for array elements + * of Current_bin_capacity. + */ + switch (type_of_metric) + { + case USER_SPECIFIED: + { + *metric = (*umf)( + p, + speeds, + Current_bin_capacity, + Size_of_bin + ); + } + break; + case SYSTEM_DEFINED: + { + *metric = __HMPI_System_defined_metric( + p, + speeds, + Current_bin_capacity, + Size_of_bin + ); + } + break; + default: + { + return HMPI_ERR_METRIC; + } + break; + } + + free(Size_of_bin); + free(Current_bin_capacity); + free(rearranged_mlimits); + free(rearrangedp); + + return HMPI_OK; + } + + Open = (int*)malloc( + sizeof(int) + * + p + ); + + if (Open == NULL) + { + return MPC_ERR_NOMEM; + } + + Number_in_bin = (int*)malloc( + sizeof(int) + * + p + ); + + if (Number_in_bin == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < p; i++) + { + Open[i] = 1; + Number_in_bin[i] = 0; + } + + shortlist = (int*)malloc( + sizeof(int) + * + p + ); + + if (shortlist == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < n; i++) + { + int nslist = 0; + int chosen = -1; + + for (j = 0; j < p; j++) + { + if (Current_bin_capacity[j] == Size_of_bin[j]) + { + continue; + } + + if (((Current_bin_capacity[j] + w[i]) <= Size_of_bin[j]) + && (Open[j] == 1 + ) + ) + { + chosen = j; + break; + } + } + + if (chosen == -1) + { + int waste = INT_MAX; + for (j = 0; j < p; j++) + { + if (Open[j] == 1) + { + int wastej = ( + Current_bin_capacity[j] + + + w[i] + - + Size_of_bin[j] + ); + + if (wastej < waste) + { + chosen = j; + waste = wastej; + } + } + } + } + + np[i] = chosen; + Number_in_bin[chosen]++; + + if (Number_in_bin[chosen] >= mlimits[chosen]) + { + Open[chosen] = 0; + } + + Current_bin_capacity[chosen] = Current_bin_capacity[chosen] + + + w[i] + ; + } + + free(shortlist); + free(Open); + free(Number_in_bin); + + if (metric == NULL) + { + free(Size_of_bin); + free(Current_bin_capacity); + + return HMPI_OK; + } + + /* + * The ideal sum of weights is given by + * elements of array Size_of_bin and the + * actual sum of weights is calculated for array elements + * of Current_bin_capacity. + */ + switch (type_of_metric) + { + case USER_SPECIFIED: + { + *metric = (*umf)( + p, + speeds, + Current_bin_capacity, + Size_of_bin + ); + } + break; + case SYSTEM_DEFINED: + { + *metric = __HMPI_System_defined_metric( + p, + speeds, + Current_bin_capacity, + Size_of_bin + ); + } + break; + default: + { + return HMPI_ERR_METRIC; + } + break; + } + + free(Size_of_bin); + free(Current_bin_capacity); + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ diff --git a/hdpi/hmpi_partitioning_trees.h b/hdpi/hmpi_partitioning_trees.h new file mode 100644 index 0000000..8b3793d --- /dev/null +++ b/hdpi/hmpi_partitioning_trees.h @@ -0,0 +1,42 @@ + +/************************************************************************* +* * +* Heterogeneous Data Partitioning Interface * +* ========================================= * +* * +* Copyright (c) 2002 Department of Computer Science, * +* University College Dublin. * +* * +* All rights reserved. We assume no responsibility for the use * +* or reliability of our software. * +* * +*************************************************************************/ + + /************************************************/ + /* Partitioning interfaces for trees */ + /* */ + /* Revision history */ + /* 19-05-2003 -- Initial version */ + /************************************************/ + + #ifndef __HMPI_PARTITIONING_TREES_HH + #define __HMPI_PARTITIONING_TREES_HH + + int HMPI_Partition_tree( + int p, + int pn, + const double *speeds, + const int *psizes, + const int *mlimits, + int n, + int nedges, + const int *nwgt, + const int *xadj, + const int *adjacency, + const int *adjwgt, + int *vp, + int *edgecut + ); + + #endif /* __HMPI_PARTITIONING_TREES_HH */ + diff --git a/hdpi/hmpi_partitioning_types.h b/hdpi/hmpi_partitioning_types.h new file mode 100644 index 0000000..6d99b5d --- /dev/null +++ b/hdpi/hmpi_partitioning_types.h @@ -0,0 +1,27 @@ + +/************************************************************************* +* * +* Heterogeneous Data Partitioning Interface * +* ========================================= * +* * +* Copyright (c) 2002 Department of Computer Science, * +* University College Dublin. * +* * +* All rights reserved. We assume no responsibility for the use * +* or reliability of our software. * +* * +*************************************************************************/ + + /************************************************/ + /* Common typedefs used in partitioning */ + /* interfaces */ + /* Revision history */ + /* 19-05-2003 -- Initial version */ + /************************************************/ + + #ifndef __HMPI_PARTITIONING_TYPES_HH + #define __HMPI_PARTITIONING_TYPES_HH + + #define HMPI_MAX_BISECTION_STEPS 64 + + #endif diff --git a/main.cpp b/main.cpp new file mode 100644 index 0000000..3b50909 --- /dev/null +++ b/main.cpp @@ -0,0 +1,728 @@ + +/*-----------------------------------------------------------*/ + +#include +#include +#include +#include +#include +#include +#include +#include + +/*-----------------------------------------------------------*/ + +#include +#include +#include +#include + +/*-----------------------------------------------------------*/ + +#include "absdevs.c" + +/*-----------------------------------------------------------*/ + +#define MPI_MSG_TAG 0xff + +/*-----------------------------------------------------------*/ + +void rank2coord +( + int pnum, + const int *ppar, + int *pcoord +) +{ + int tmp; + tmp = * (ppar + 1); + * pcoord = pnum / tmp; + pnum = pnum % tmp; + * (pcoord + 1) = pnum; +} + +/*-----------------------------------------------------------*/ + +int print_stdin_2d_allocation_local( + int p, int q, + int m, int n, + const int *w, + const int *h, + const int *trow, + const int *tcol +) +{ + int i, j, k, l; + + printf("The widths of rectangles are:\n"); + + /* + * COLUMN BASED; HENCE ONLY widths of first row are enough + */ + for (j = 0; j < q; j++) + { + printf("%d ", w[HMPI_RECT_INDEX(0, j, 0, j, p, q)]); + } + printf("\n"); + + printf("The heights of rectangles are:\n"); + + for (i = 0; i < p; i++) + { + for (j = 0; j < q; j++) + { + for (k = 0; k < p; k++) + { + for (l = 0; l < q; l++) + { + printf("%d ", h[HMPI_RECT_INDEX(i, j, k, l, p, q)]); + } + } + printf("\n"); + } + } + + printf("The trows of rectangles are:\n"); + + for (i = 0; i < p; i++) + { + for (j = 0; j < q; j++) + { + printf("%d ", trow[i*q+j]); + } + printf("\n"); + } + + printf("The tcols of rectangles are:\n"); + + for (i = 0; i < p; i++) + { + for (j = 0; j < q; j++) + { + printf("%d ", tcol[i*q+j]); + } + printf("\n"); + } + + printf("\n"); + + return 0; +} + +/*-----------------------------------------------------------*/ + +int initializeData( + const int m, const int n, + double *A, double *b +) +{ + int i; + for (i = 0; i < (m*n); i++) + { + A[i] = 1.5; + } + + for (i = 0; i < n; i++) + { + + b[i] = 2.5; + } + + return 0; +} + +/*-----------------------------------------------------------*/ + +int main(int argc, char **argv) +{ + int rc, rc1, rc2, rc3, rc4, rc5; + int *w, *h, *trow, *tcol, + gme, gsize, myi, myj; + + int hostnamelen; + char hostname[MPI_MAX_PROCESSOR_NAME]; + + rc = MPI_Init(&argc, &argv); + + if (rc != MPI_SUCCESS) + { + fprintf( + stderr, + "MAIN: Problems initializing MPI...Exiting...\n" + ); + MPI_Finalize(); + exit(EXIT_FAILURE); + } + + rc = MPI_Comm_rank( + MPI_COMM_WORLD, + &gme + ); + + if (rc != MPI_SUCCESS) + { + fprintf( + stderr, + "MAIN: Problems getting rank...Exiting...\n" + ); + MPI_Finalize(); + exit(EXIT_FAILURE); + } + + unsigned int inputsIncorrect = 0; + + if (gme == 0) + { + if (argc != 6) + { + fprintf( + stderr, + "Usage: mpirun -np

%s n p q communicate(0 | 1) verbosity(0 | 1).\n" + "If communicate is 1, process 0 communicates the matrix A, vector b to all the " + "other processes before the parallel computations...\n", argv[0]); + inputsIncorrect = 1; + } + } + + rc = MPI_Bcast( + &inputsIncorrect, + 1, MPI_UNSIGNED, + 0, MPI_COMM_WORLD); + + if (rc != MPI_SUCCESS) + { + printf("(%d):Problems broadcasting w\n", gme); + } + + if (inputsIncorrect) + { + MPI_Finalize(); + exit(EXIT_SUCCESS); + } + + int n, p, q, verbosity, communicate; + + if (gme == 0) + { + n = atoi(argv[1]); + p = atoi(argv[2]); + q = atoi(argv[3]); + communicate = atoi(argv[4]); + verbosity = atoi(argv[5]); + } + + rc1 = MPI_Bcast( + &n, + 1, MPI_INT, + 0, MPI_COMM_WORLD); + + rc2 = MPI_Bcast( + &p, + 1, MPI_INT, + 0, MPI_COMM_WORLD); + + rc3 = MPI_Bcast( + &q, + 1, MPI_INT, + 0, MPI_COMM_WORLD); + + rc4 = MPI_Bcast( + &verbosity, + 1, MPI_INT, + 0, MPI_COMM_WORLD); + + rc5 = MPI_Bcast( + &communicate, + 1, MPI_INT, + 0, MPI_COMM_WORLD); + + if ((rc1 != MPI_SUCCESS) + || (rc2 != MPI_SUCCESS) + || (rc3 != MPI_SUCCESS) + || (rc4 != MPI_SUCCESS) + || (rc5 != MPI_SUCCESS + ) + ) + { + fprintf( + stderr, + "(%d):Problems broadcasting n | p | q | communicate | verbosity\n", gme + ); + MPI_Finalize(); + exit(EXIT_SUCCESS); + } + + /* + * Now bind the processes using the abstract devices table... + */ + cpu_set_t cpuSet; + CPU_ZERO(&cpuSet); + + if (verbosity) + { + printf( + "gme %d: start %d end %d.\n", + gme, hcl_coreindex[gme], hcl_coreindex[gme+1]); + printf( + "gme %d: Bound cores: ", gme); + } + + int cpu; + for (cpu = hcl_coreindex[gme]; cpu < hcl_coreindex[gme+1]; cpu++) + { + if (verbosity) + { + printf( + "%d ", hcl_corebindings[cpu]); + } + + CPU_SET(hcl_corebindings[cpu], &cpuSet); + } + + if (verbosity) + { + printf("\n"); + } + + int status = sched_setaffinity( + getpid(), + sizeof(cpu_set_t), + &cpuSet); + + if (status != 0) + { + fprintf( + stderr, + "%d: Problems setting sched_setaffinity.\n", + gme); + exit(EXIT_FAILURE); + } + + if (gme == 0) + { + printf("=======================================\n"); + printf("Executing Matrix-Vector Multiplication.\n"); + printf("=======================================\n"); + + printf( + "Inputs: n:%d, p:%d, q:%d, communicate %d.\n", + n, p, q, communicate); + } + + rc = MPI_Comm_size( + MPI_COMM_WORLD, + &gsize + ); + + if (rc != MPI_SUCCESS) + { + printf("MAIN:Problems getting size...Exiting...\n"); + } + + if (gsize != (p*q)) + { + if (gme == 0) + { + fprintf( + stderr, + "MAIN: MPI_COMM_WORLD size greater than p * q grid of processes...\n" + ); + } + MPI_Finalize(); + exit(EXIT_SUCCESS); + } + + w = (int*)malloc( + sizeof(int) + * + (p*q*p*q) + ); + + if (w == NULL) + { + fprintf(stderr, "(%d): Cannot allocate w\n", gme); + MPI_Finalize(); + exit(EXIT_FAILURE); + } + + h = (int*)malloc( + sizeof(int) + * + (p*q*p*q) + ); + + if (h == NULL) + { + printf("(%d): Cannot allocate h\n", gme); + MPI_Finalize(); + exit(EXIT_FAILURE); + } + + trow = (int*)malloc( + sizeof(int) + * + (p*q) + ); + + if (trow == NULL) + { + printf("(%d): Cannot allocate trow\n", gme); + MPI_Finalize(); + exit(EXIT_FAILURE); + } + + tcol = (int*)malloc( + sizeof(int) + * + (p*q) + ); + + if (tcol == NULL) + { + printf("(%d): Cannot allocate tcol\n", gme); + MPI_Finalize(); + exit(EXIT_FAILURE); + } + + if (gme == 0) + { + rc = HMPI_Partition_matrix_2d( + p, q, + 1, + NULL, + NULL, NULL, + n, n, + HMPI_COLUMN_BASED, + w, h, trow, tcol, + NULL, NULL + ); + + if (rc != HMPI_OK) + { + printf("(%d): Problems partitioning\n", gme); + } + } + + rc = MPI_Bcast( + w, p*q*p*q, MPI_INT, 0, MPI_COMM_WORLD + ); + + if (rc != MPI_SUCCESS) + { + printf("(%d):Problems broadcasting w\n", gme); + } + + rc = MPI_Bcast( + h, p*q*p*q, MPI_INT, 0, MPI_COMM_WORLD + ); + + if (rc != MPI_SUCCESS) + { + printf("(%d):Problems broadcasting h\n", gme); + } + + rc = MPI_Bcast( + trow, p*q, MPI_INT, 0, MPI_COMM_WORLD + ); + + if (rc != MPI_SUCCESS) + { + printf("(%d):Problems broadcasting trow\n", gme); + } + + rc = MPI_Bcast( + tcol, p*q, MPI_INT, 0, MPI_COMM_WORLD + ); + + if (rc != MPI_SUCCESS) + { + printf("(%d):Problems broadcasting tcol\n", gme); + } + + if ((verbosity > 0) && (gme == 0)) + { + printf("Partitioning of matrix is: \n"); + + print_stdin_2d_allocation_local( + p, q, n, n, + w, h, trow, tcol + ); + } + + /* + * Parallel Matrix-Vector Multiplication... + */ + const int ppar[] = {p, q}; + int* mycoords = (int*)malloc(sizeof(int)* 2); + rank2coord(gme, ppar, mycoords); + myi = mycoords[0]; + myj = mycoords[1]; + free(mycoords); + + /* + * My local store is a matrix of size lda x ldb + */ + const double alpha = 1.0; + const double beta = 0.0; + + int myh = h[HMPI_RECT_INDEX(myi, myj, myi, myj, p, q)]; + int mm = myh; + int myw = w[HMPI_RECT_INDEX(myi, myj, myi, myj, p, q)]; + int nn = myw; + + double *WA = (double*)malloc( + sizeof(double) + * + (mm*nn)); + if (WA == NULL) + { + printf("me=%d: No memory to allocate my A buffer, WA\n", gme); + } + + double *Wb = (double*)malloc( + sizeof(double) + * + nn); + if (Wb == NULL) + { + printf("me=%d: No memory to allocate buffer b, Wb\n", gme); + } + + double *Wc = (double*)malloc( + sizeof(double) + * + mm); + if (Wc == NULL) + { + printf("me=%d: No memory to allocate buffer c, Wc\n", gme); + } + + initializeData(mm, nn, WA, Wb); + + struct timeval start, end; + gettimeofday(&start, NULL); + + if (communicate) + { + if (gme == 0) + { + int i, j, cIndex = 0; + for (i = 0; i < p; i++) + { + for (j = 0; j < q; j++) + { + if ((i == 0) && (j == 0)) + { + continue; + } + int mm = h[HMPI_RECT_INDEX(i, j, i, j, p, q)]; + int nn = w[HMPI_RECT_INDEX(i, j, i, j, p, q)]; + + double* tmpA = (double*)malloc( + sizeof(double)*mm*nn); + + if (tmpA == NULL) + { + printf("me=%d: No memory to allocate buffer tmpA\n", gme); + } + + double* tmpB = (double*)malloc( + sizeof(double)*nn); + + if (tmpB == NULL) + { + printf("me=%d: No memory to allocate buffer tmpB.\n", gme); + } + + initializeData(mm, nn, tmpA, tmpB); + + if (verbosity) + { + printf("Communicating A, b to process %d.\n", i*q + j); + } + + MPI_Send( + tmpA, mm*nn, MPI_DOUBLE, + i*q + j, + MPI_MSG_TAG, + MPI_COMM_WORLD); + free(tmpA); + MPI_Send( + tmpB, nn, MPI_DOUBLE, + i*q + j, + MPI_MSG_TAG, + MPI_COMM_WORLD); + free(tmpB); + } + } + initializeData(mm, nn, WA, Wb); + } + else + { + MPI_Recv( + WA, mm*nn, MPI_DOUBLE, + 0, + MPI_MSG_TAG, + MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv( + Wb, nn, MPI_DOUBLE, + 0, + MPI_MSG_TAG, + MPI_COMM_WORLD, MPI_STATUS_IGNORE); + } + } + else + { + initializeData(mm, nn, WA, Wb); + } + + cblas_dgemv(CblasRowMajor, CblasNoTrans, + mm, nn, alpha, WA, nn, Wb, 1, beta, Wc, 1); + + if (gme == 0) + { + double *c = (double*)malloc( + n*sizeof(double)); + if (c == NULL) + { + printf("me=%d: No memory to allocate buffer c, Wc\n", gme); + } + memcpy(c, Wc, sizeof(double)*mm); + + int i, j, cIndex = 0; + for (i = 0; i < p; i++) + { + for (j = 0; j < q; j++) + { + if ((i == 0) && (j == 0)) + { + continue; + } + + int myh = h[HMPI_RECT_INDEX(i, j, i, j, p, q)]; + int mm = myh; + double* tmpC = (double*)malloc( + sizeof(double)*mm); + + if (tmpC == NULL) + { + printf("me=%d: No memory to allocate buffer tmpC\n", gme); + } + + MPI_Recv( + tmpC, mm, MPI_DOUBLE, + i*q + j, + MPI_MSG_TAG, + MPI_COMM_WORLD, MPI_STATUS_IGNORE); + + int elem; + for (elem = 0; elem < mm; elem++) + { + c[cIndex + elem] += tmpC[elem]; + } + + free(tmpC); + } + + cIndex += mm; + } + + if (verbosity > 1) + { + printf("Result c[]: "); + for (i = 0; i < n; i++) + { + printf("%lf ", c[i]); + } + printf("\n"); + } + + free(c); + } + else + { + if (verbosity > 1) + { + int i; + printf("Sending Wc[]: "); + for (i = 0; i < mm; i++) + { + printf("%lf ", Wc[i]); + } + printf("\n"); + } + + MPI_Send( + Wc, mm, MPI_DOUBLE, + 0, + MPI_MSG_TAG, + MPI_COMM_WORLD); + } + + gettimeofday(&end, NULL); + + double tstart = start.tv_sec + start.tv_usec/1000000.; + double tend = end.tv_sec + end.tv_usec/1000000.; + double myTime = (tend - tstart); + + free(WA); + free(Wb); + free(Wc); + + double avgSpeed, maxTime; + double myPsize = 2.0*mm*nn; + double mySpeed = (myPsize / myTime) * 1e-06; + + MPI_Reduce( + &mySpeed, + &avgSpeed, + 1, + MPI_DOUBLE, + MPI_SUM, + 0, + MPI_COMM_WORLD); + + MPI_Reduce( + &myTime, + &maxTime, + 1, + MPI_DOUBLE, + MPI_MAX, + 0, + MPI_COMM_WORLD); + + if (gme == 0) + { + double dN = n; + double nxn = dN * dN * 0.001 * 0.001; + double pspeed = 2.0 * nxn / maxTime; + + printf( + "Parallel MxV successful: n=%d, " + "Average speed(MFLOPs)=%3f, " + "Parallel speed(MFLOPs)=%3f, " + "MxV execution time(sec)=%3f\n", + n, avgSpeed / (double)gsize, + pspeed, maxTime); + } + + printf( + "Me=%d: " + "Speed(MFLOPs)=%3f, " + "MxV execution time(sec)=%3f\n", + gme, + mySpeed, + myTime + ); + + free(w); + free(h); + free(trow); + free(tcol); + + MPI_Finalize(); + + exit(EXIT_SUCCESS); +} + +/*---------------------------------------------------------------*/ -- libgit2 0.22.2