diff --git a/INSTALL.txt b/INSTALL.txt index 882814e..31d49cf 100644 --- a/INSTALL.txt +++ b/INSTALL.txt @@ -24,7 +24,7 @@ To build and install: 1). cd -2). make +2). ./compile.sh This will create the executables. diff --git a/Makefile b/Makefile index 6aed5a0..ca55a31 100644 --- a/Makefile +++ b/Makefile @@ -3,12 +3,15 @@ ############################################################### CC = icc +MPICC = mpiicc NVCC = nvcc OPTFLAGS = -O3 -fopenmp CPP11FLAGS = ${OPTFLAGS} -std=c++11 MKLFLAGS = ${OPTFLAGS} -mkl +MPIMKLFLAGS = ${OPTFLAGS} -mkl -mt_mpi +HDPIHOME = hdpi/installation_dir -all: dgemvcpu dgemvphi dgemvgpu +all: dgemvcpu dgemvphi dgemvgpu absdevsreader runabsdevreader mpimain dgemvcpu: ${CC} ${MKLFLAGS} -o dgemvcpu dgemvcpu.c -lm @@ -20,7 +23,25 @@ dgemvgpu: ${NVCC} -I/usr/local/cuda/include -o dgemvgpu dgemvgpu.c \ -L/usr/local/cuda/lib64 -lcublas +absdevsreader: + @echo "--------------------------------------------" + @echo "Now compiling abstract devices reader file..." + @echo "--------------------------------------------" + @echo + ${CC} ${OPTFLAGS} -c cputopology.cpp + ${CC} ${CPP11FLAGS} -o absdevsreader absdevsreader.cpp cputopology.o + +runabsdevreader: + @echo "--------------------------------------------" + @echo "Now running abstract devices reader to generate absdevs.c..." + @echo "--------------------------------------------" + @echo + ./absdevsreader ./absdevs.lst 0 + +mpimain: + $(MPICC) -I${HDPIHOME}/include ${MKLFLAGS} -o main main.cpp -L${HDPIHOME}/lib -lhdpi -lm + clean: - rm -f dgemvcpu dgemvphi dgemvgpu + rm -f dgemvcpu dgemvphi dgemvgpu *.o absdevsreader main ############################################################### diff --git a/absdevpowers.c b/absdevpowers.c new file mode 100644 index 0000000..c50d090 --- /dev/null +++ b/absdevpowers.c @@ -0,0 +1,9 @@ + +/*----------------------------------------------------------------------------*/ + +const char* hcl_powerplatforms[] = { +"CPUPCM" +}; + +/*----------------------------------------------------------------------------*/ + diff --git a/absdevs.c b/absdevs.c new file mode 100644 index 0000000..4d30592 --- /dev/null +++ b/absdevs.c @@ -0,0 +1,16 @@ + +/*----------------------------------------------------------------------------*/ + +#include "absdevs.h" + +/*----------------------------------------------------------------------------*/ + +const unsigned int hcl_coreindex[] = { +0,6,12,18,24 +}; +const unsigned int hcl_corebindings[] = { +0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23 +}; + +/*----------------------------------------------------------------------------*/ + diff --git a/absdevs.h b/absdevs.h new file mode 100644 index 0000000..9d265e5 --- /dev/null +++ b/absdevs.h @@ -0,0 +1,27 @@ + +/*----------------------------------------------------------------------------*/ + +#ifndef _ABSDEVS_HH +#define _ABSDEVS_HH + +/*----------------------------------------------------------------------------*/ + +typedef struct _hcl_abstractdevicestable_ { + int(*init)(const int, const int, const int, const int, + const unsigned int, const unsigned int); + int(*gemm)(const int, const int*, const int*, const int*, + double*, double*, double*, double*); + int(*destroy)(const int); + const unsigned int nompt; +} hcl_abstractdevicestable; + +extern hcl_abstractdevicestable hcl_absdevtable[]; +extern const unsigned int hcl_coreindex[]; +extern const unsigned int hcl_corebindings[]; + +/*----------------------------------------------------------------------------*/ + +#endif + +/*----------------------------------------------------------------------------*/ + diff --git a/absdevs.lst b/absdevs.lst new file mode 100644 index 0000000..aeb9f02 --- /dev/null +++ b/absdevs.lst @@ -0,0 +1,34 @@ +#cores DGEMM No. of MPI processes No. of OpenMP threads +#----- ----- --------------------- --------------------- +#0-3 CPU,MKL 1 0 +#4-7 CPU,MKL 1 0 +#8-11 CPU,MKL 1 0 +#24-27 CPU,MKL 1 0 +#28-31 CPU,MKL 1 0 +#32-35 CPU,MKL 1 0 +#12-15 CPU,MKL 1 0 +#16-19 CPU,MKL 1 0 +#20-23 CPU,MKL 1 0 +#36-39 CPU,MKL 1 0 +#40-43 CPU,MKL 1 0 +#44-47 CPU,MKL 1 0 +#0-2 CPU,MKL 1 0 +#3-5 CPU,MKL 1 0 +#6-8 CPU,MKL 1 0 +#9-11 CPU,MKL 1 0 +#24-26 CPU,MKL 1 0 +#27-29 CPU,MKL 1 0 +#30-32 CPU,MKL 1 0 +#33-35 CPU,MKL 1 0 +#12-14 CPU,MKL 1 0 +#15-17 CPU,MKL 1 0 +#18-20 CPU,MKL 1 0 +#21-23 CPU,MKL 1 0 +#36-38 CPU,MKL 1 0 +#39-41 CPU,MKL 1 0 +#42-44 CPU,MKL 1 0 +#45-47 CPU,MKL 1 0 +0-5 CPU,MKL 1 0 +6-11 CPU,MKL 1 0 +12-17 CPU,MKL 1 0 +18-23 CPU,MKL 1 0 diff --git a/absdevsreader.cpp b/absdevsreader.cpp new file mode 100644 index 0000000..b2dbbd4 --- /dev/null +++ b/absdevsreader.cpp @@ -0,0 +1,375 @@ + +/*-----------------------------------------------------------*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "cputopology.hpp" + +/*-----------------------------------------------------------*/ + +int main(int argc, char** argv) +{ + if (argc != 3) + { + std::cerr << "Usage: " << argv[0] + << " " + << std::endl; + exit(EXIT_FAILURE); + } + + std::string abstractDevicesFile = argv[1]; + bool verbosity = atoi(argv[2]); + + std::ifstream absDevices(abstractDevicesFile.c_str()); + if (!absDevices.is_open()) + { + std::cerr << "Unable to open " + << abstractDevicesFile + << std::endl; + exit(EXIT_FAILURE); + } + + unsigned int numLogicalCPUs, numPhysicalCPUs; + int rc = hcl::topology::getNumLogicalCpus(&numLogicalCPUs); + if (rc != 0) + { + std::cerr << "Error to get number of logical cores..." + << std::endl; + exit(EXIT_FAILURE); + } + + rc = hcl::topology::getNumPhysicalCpus(&numPhysicalCPUs); + if (rc != 0) + { + std::cerr << "Error to get number of physical cores..." + << std::endl; + exit(EXIT_FAILURE); + } + + std::cout << "Number of logical cores " << numLogicalCPUs + << std::endl; + std::cout << "Number of physical cores " << numPhysicalCPUs + << std::endl; + + std::vector nCoresList; + std::vector coresList; + std::vector abstractDevicesTable; + std::vector powerPlatforms; + + /* + * We always include the CPU + */ + powerPlatforms.push_back("CPUPCM"); + + unsigned int numCoresBound = 0; + std::string line; + nCoresList.push_back(numCoresBound); + while (std::getline(absDevices, line)) + { + /* + * Ignore comment line... + */ + if (line.find('#') != std::string::npos) + { + continue; + } + + std::stringstream ss(line); + std::string coreListing; + std::string gemmKernel; + unsigned int numMPIProcesses; + std::string numOMPThreads; + + ss >> coreListing + >> gemmKernel + >> numMPIProcesses + >> numOMPThreads; + + if (verbosity) + { + std::cout << coreListing << " " + << gemmKernel << " " + << numMPIProcesses << " " + << numOMPThreads << std::endl; + } + + if (coreListing.find('-') == std::string::npos) + { + /* + * Simplest case, just one core to bind. + */ + if (coreListing.find(',') == std::string::npos) + { + coresList.push_back(coreListing); + numCoresBound++; + nCoresList.push_back(numCoresBound); + } + else + { + /* + * There are comma separated list of cores... + */ + char* cstr = new char[coreListing.length() + 1]; + strcpy(cstr, coreListing.c_str()); + char* tok = strtok(cstr, ","); + while (tok != NULL) + { + coresList.push_back(tok); + numCoresBound++; + tok = strtok(NULL, ","); + } + delete []cstr; + nCoresList.push_back(numCoresBound); + } + } + else + { + /* + * Just one range token... + */ + if (coreListing.find(',') == std::string::npos) + { + std::vector coreRange; + char* cstr = new char[coreListing.length() + 1]; + strcpy(cstr, coreListing.c_str()); + char* tok = strtok(cstr, "-"); + while (tok != NULL) + { + coreRange.push_back(atoi(tok)); + tok = strtok(NULL, ","); + } + delete []cstr; + + /* + * We expect just two elements in core range... + */ + unsigned int start = coreRange[0]; + unsigned int end = coreRange[1]; + + /* + * The MPI processes divide the cores equally amongst them... + */ + for (size_t e = 0; e < numMPIProcesses; e++) + { + numCoresBound += (end - start + 1) / numMPIProcesses; + nCoresList.push_back(numCoresBound); + } + + for (size_t e = start; e <= end; e++) + { + coresList.push_back(std::to_string(e)); + } + } + else + { + /* + * A mix of - and , + */ + char* cstr1 = (char*)coreListing.c_str(); + char* saveptr1, *saveptr2; + + char* tok = strtok_r(cstr1, ",", &saveptr1); + while (tok != NULL) + { + std::cout << tok << std::endl; + char* tok2 = strtok_r(tok, "-", &saveptr2); + std::vector coreRange; + while (tok2 != NULL) + { + std::cout << tok2 << std::endl; + coreRange.push_back(atoi(tok2)); + tok2 = strtok_r(NULL, "-", &saveptr2); + } + + /* + * We expect just two elements in core range... + */ + unsigned int start = coreRange[0]; + unsigned int end = coreRange[1]; + + /* + * The MPI processes divide the cores equally amongst them... + */ + for (size_t e = 0; e < numMPIProcesses; e++) + { + numCoresBound += (end - start + 1) / numMPIProcesses; + nCoresList.push_back(numCoresBound); + } + + for (size_t e = start; e <= end; e++) + { + coresList.push_back(std::to_string(e)); + } + + tok = strtok_r(NULL, ",", &saveptr1); + } + } + } + + if (gemmKernel.find("CPU,GPU") != std::string::npos) + { + powerPlatforms.push_back("GPULITE"); + } + + if (gemmKernel.find("CPU,PHI") != std::string::npos) + { + powerPlatforms.push_back("PHILITE"); + } + + if (gemmKernel.find("CPU,FPGA") != std::string::npos) + { + powerPlatforms.push_back("FPGA"); + } + + for (size_t p = 0; p < numMPIProcesses; p++) + { + std::stringstream ssOut; + + if (gemmKernel.find("CPU") != std::string::npos) + { + ssOut << "cpuinit, cpudgemm, cpudestroy"; + } + + if (gemmKernel.find("GPU") != std::string::npos) + { + ssOut << "gpuinit, gpudgemm, gpudestroy"; + } + + if (gemmKernel.find("PHI") != std::string::npos) + { + ssOut << "phiinit, phidgemm, phidestroy"; + } + + if (gemmKernel.find("FPGA") != std::string::npos) + { + ssOut << "fpgainit, fpgadgemm, fpgadestroy"; + } + + if (numOMPThreads.find('-') != std::string::npos) + { + ssOut << ", 0"; + } + else + { + ssOut << ", " << numOMPThreads; + } + + abstractDevicesTable.push_back(ssOut.str()); + } + } + + if (numCoresBound > numLogicalCPUs) + { + std::cerr << "Number of cores bound " + << numCoresBound << " exceeded " + << "the allowed number of logical cores." + << std::endl; + exit(EXIT_FAILURE); + } + + if (numCoresBound > numPhysicalCPUs) + { + std::cerr << "Warning: Number of cores bound exceeded " + << "the allowed number of physical cores." + << std::endl; + } + + /* + * Spit out the abstract devices table... + */ + std::ofstream abstractDevicesOFile; + abstractDevicesOFile.open("absdevs.c"); + + abstractDevicesOFile << "\n/*----------------------------------" + << "------------------------------------------*/\n" + << std::endl; + abstractDevicesOFile << "#include \"absdevs.h\"" << std::endl; + abstractDevicesOFile << "\n/*----------------------------------" + << "------------------------------------------*/\n" + << std::endl; + + /* + * Core bindings here... + */ + abstractDevicesOFile << "const unsigned int hcl_coreindex[] = {" + << std::endl; + size_t n = nCoresList.size(); + for (size_t e = 0; e < n; e++) + { + if (e == (n-1)) + { + abstractDevicesOFile << nCoresList[e]; + } + else + { + abstractDevicesOFile << nCoresList[e] << ","; + } + } + abstractDevicesOFile << "\n};" << std::endl; + + abstractDevicesOFile << "const unsigned int hcl_corebindings[] = {" + << std::endl; + n = coresList.size(); + for (size_t e = 0; e < n; e++) + { + if (e == (n-1)) + { + abstractDevicesOFile << coresList[e]; + } + else + { + abstractDevicesOFile << coresList[e] << ","; + } + } + abstractDevicesOFile << "\n};" << std::endl; + + abstractDevicesOFile << "\n/*----------------------------------" + << "------------------------------------------*/\n" + << std::endl; + abstractDevicesOFile.close(); + + std::ofstream abstractDevicesPowersOFile; + abstractDevicesPowersOFile.open("absdevpowers.c"); + + /* + * Compute Platforms here... + */ + abstractDevicesPowersOFile << "\n/*----------------------------------" + << "------------------------------------------*/\n" + << std::endl; + + abstractDevicesPowersOFile << "const char* hcl_powerplatforms[] = {" + << std::endl; + n = powerPlatforms.size(); + for (size_t e = 0; e < n; e++) + { + if (e == (n-1)) + { + abstractDevicesPowersOFile << "\"" << powerPlatforms[e] << "\""; + } + else + { + abstractDevicesPowersOFile << "\"" << powerPlatforms[e] << "\","; + } + } + abstractDevicesPowersOFile << "\n};" << std::endl; + + abstractDevicesPowersOFile << "\n/*----------------------------------" + << "------------------------------------------*/\n" + << std::endl; + + abstractDevicesPowersOFile.close(); + + std::cout << "Abstract devices file successfully parsed" << std::endl; + + exit(EXIT_SUCCESS); +} + +/*-----------------------------------------------------------*/ + diff --git a/compile.sh b/compile.sh new file mode 100755 index 0000000..93157b7 --- /dev/null +++ b/compile.sh @@ -0,0 +1,33 @@ +#!/bin/bash + +############################################################ + +DGEMVBASEDIR=`pwd` +HDPIBASEDIR=${DGEMVBASEDIR}/hdpi + +############################################################ +# HDPI Library +############################################################ + +echo "Building HDPI library..." + +(cd hdpi \ + && mkdir -p build \ + && cd build \ + && cmake -DCMAKE_INSTALL_PREFIX=${HDPIBASEDIR}/installation_dir .. \ + && make \ + && make install) + +############################################################ + +echo "Setting MKL variables..." +source /opt/intel/mkl/bin/mklvars.sh intel64 + +echo "Compiling heterogeneous dgemm..." +make clean all + +############################################################ + +exit 0 + +############################################################ diff --git a/cputopology.cpp b/cputopology.cpp new file mode 100755 index 0000000..2968fe7 --- /dev/null +++ b/cputopology.cpp @@ -0,0 +1,578 @@ + +/*--------------------------------------------------------*/ + +/* +@file +@author Ravi Reddy Manumachu +@version 1.0 +*/ + +/*-----------------------------------------------------------*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/*-----------------------------------------------------------*/ + +namespace hcl { + +namespace topology { + +/*-----------------------------------------------------------*/ + +/* + * One way to do this is to store the lines in a vector. + * Parse top-to-bottom and bottom-to-top to get the siblings. + */ +int +getSibling( + const unsigned int cpu, + std::vector& siblings +) +{ + FILE* commandFp = popen("lscpu -p", "r"); + if (commandFp == NULL) + { + std::cerr << "Error from execution of lscpu -p." + << std::endl; + return -1; + } + + char* line = NULL; + size_t len = 0; + std::vector lscpuLines; + + while (getline(&line, &len, commandFp) != -1) + { + if (line[0] == '#') + { + continue; + } + + lscpuLines.push_back(line); + } + + free(line); + + int status = pclose(commandFp); + if (status == -1) + { + std::cerr << "Error from execution of lscpu -p." + << std::endl; + return -1; + } + + bool siblingsFound = false; + bool myCoreFound = false; + unsigned int myCore = UINT_MAX; + + for (unsigned int cpuLine = 0; + cpuLine < lscpuLines.size(); + cpuLine++) + { + std::stringstream cpuLineStream(lscpuLines[cpuLine]); + std::string token; + std::vector tokens; + while (std::getline( + cpuLineStream, token, ',' + ) + ) + { + tokens.push_back(token); + } + + unsigned int cpuParsed = atoi(tokens[0].c_str()); + + if (cpu == cpuParsed) + { + myCore = atoi(tokens[1].c_str()); + myCoreFound = true; + } + else + { + unsigned int core = atoi(tokens[1].c_str()); + + if (core == myCore) + { + siblings.push_back( + atoi(tokens[0].c_str()) + ); + siblingsFound = true; + } + } + } + + if (myCoreFound == false) + { + std::cerr << "Failed to find siblings." << std::endl; + return -1; + } + + /* + * If siblings are not found, then + * it is possible that we have to traverse in the + * reverse order to build the siblings. + */ + if (siblingsFound) + { + return 0; + } + + std::vector::reverse_iterator rit = + lscpuLines.rbegin(); + + for (; rit != lscpuLines.rend(); ++rit) + { + std::stringstream cpuLineStream(*rit); + std::string token; + std::vector tokens; + while (std::getline( + cpuLineStream, token, ',' + ) + ) + { + tokens.push_back(token); + } + + unsigned int cpuParsed = atoi(tokens[0].c_str()); + + if (cpu == cpuParsed) + { + myCore = atoi(tokens[1].c_str()); + } + else + { + unsigned int core = atoi(tokens[1].c_str()); + + if (core == myCore) + { + siblings.push_back( + atoi(tokens[0].c_str()) + ); + } + } + } + + std::reverse( + siblings.begin(), siblings.end()); + + return 0; +} + +/*-----------------------------------------------------------*/ + +int +getNumCpuNumaNodes( + unsigned int* numNUMAs +) +{ + FILE* commandFp = popen("lscpu -p", "r"); + if (commandFp == NULL) + { + std::cerr << "Error from execution of lscpu -p." + << std::endl; + return -1; + } + + char* line = NULL; + size_t len = 0; + + *numNUMAs = 0; + + while (getline(&line, &len, commandFp) != -1) + { + if (line[0] == '#') + { + continue; + } + + char* token = strtok(line, ","); + + /* + * The core... + */ + if (token != NULL) + { + token = strtok(NULL, ","); + } + + /* + * The socket + */ + if (token != NULL) + { + token = strtok(NULL, ","); + unsigned int numaNodeParsed = atoi(token); + if (numaNodeParsed > *numNUMAs) + { + *numNUMAs = numaNodeParsed; + } + } + } + + free(line); + + int status = pclose(commandFp); + if (status == -1) + { + std::cerr << "Error from execution of lscpu -p." + << std::endl; + return -1; + } + + *numNUMAs = *numNUMAs + 1; + + return 0; +} + +/*-----------------------------------------------------------*/ + +int +getCpuNumaNode( + const unsigned int cpu, + unsigned int* numaNode +) +{ + FILE* commandFp = popen("lscpu -p", "r"); + if (commandFp == NULL) + { + std::cerr << "Error from execution of lscpu -p." + << std::endl; + return -1; + } + + char* line = NULL; + size_t len = 0; + + while (getline(&line, &len, commandFp) != -1) + { + if (line[0] == '#') + { + continue; + } + + char* token = strtok(line, ","); + unsigned int cpuParsed = atoi(token); + + if (cpuParsed == cpu) + { + ; + } + else + { + continue; + } + + /* + * The core... + */ + if (token != NULL) + { + token = strtok(NULL, ","); + } + + /* + * The socket + */ + if (token != NULL) + { + token = strtok(NULL, ","); + *numaNode = atoi(token); + break; + } + } + + free(line); + + int status = pclose(commandFp); + if (status == -1) + { + std::cerr << "Error from execution of lscpu -p." + << std::endl; + return -1; + } + + return 0; +} + +/*-----------------------------------------------------------*/ + +int +getNumThreadsPerCore( + unsigned int* numTPerCore +) +{ + FILE* commandFp = popen("lscpu -p", "r"); + if (commandFp == NULL) + { + std::cerr << "Error from execution of lscpu -p." + << std::endl; + return -1; + } + + char* line = NULL; + size_t len = 0; + + *numTPerCore = 0; + + while (getline(&line, &len, commandFp) != -1) + { + if (line[0] == '#') + { + continue; + } + + char* token = strtok(line, ","); + + /* + * The core... + */ + if (token != NULL) + { + token = strtok(NULL, ","); + unsigned int coreParsed = atoi(token); + + /* + * We will just count for core 0... + */ + if (coreParsed == 0) + { + *numTPerCore = *numTPerCore + 1; + } + } + } + + free(line); + + int status = pclose(commandFp); + if (status == -1) + { + std::cerr << "Error from execution of lscpu -p." + << std::endl; + return -1; + } + + return 0; +} + +/*-------------------------------------------------------------------------*/ + +int +getNumLogicalCpus( + unsigned int* numCPUs +) +{ + FILE* commandFp = popen("lscpu -p", "r"); + if (commandFp == NULL) + { + std::cerr << "Error from execution of lscpu -p." + << std::endl; + return -1; + } + + char* line = NULL; + size_t len = 0; + + *numCPUs = 0; + + while (getline(&line, &len, commandFp) != -1) + { + if (line[0] == '#') + { + continue; + } + + *numCPUs = *numCPUs + 1; + } + + free(line); + + int status = pclose(commandFp); + if (status == -1) + { + std::cerr << "Error from execution of lscpu -p." + << std::endl; + return -1; + } + + return 0; +} + +/*-------------------------------------------------------------------------*/ + +int +getNumPhysicalCpus( + unsigned int* numPhysicalCPUs +) +{ + unsigned int numLogicalCpus; + + int rc = getNumLogicalCpus( + &numLogicalCpus + ); + + if (rc != 0) + { + return rc; + } + + unsigned int threadsPerCore; + + rc = getNumThreadsPerCore(&threadsPerCore); + + if (rc != 0) + { + return rc; + } + + *numPhysicalCPUs = numLogicalCpus / threadsPerCore; + + return 0; +} + +/*-------------------------------------------------------------------------*/ + +int +getNumCpus( + const unsigned int numaNode, + unsigned int* numCPUs +) +{ + FILE* commandFp = popen("lscpu -p", "r"); + if (commandFp == NULL) + { + std::cerr << "Error from execution of lscpu -p." + << std::endl; + return -1; + } + + char* line = NULL; + size_t len = 0; + + *numCPUs = 0; + + while (getline(&line, &len, commandFp) != -1) + { + if (line[0] == '#') + { + continue; + } + + char* token = strtok(line, ","); + + /* + * The core... + */ + if (token != NULL) + { + token = strtok(NULL, ","); + } + + /* + * The socket + */ + if (token != NULL) + { + token = strtok(NULL, ","); + unsigned int numaNodeParsed = atoi(token); + + if (numaNodeParsed == numaNode) + { + *numCPUs = *numCPUs + 1; + } + } + } + + free(line); + + int status = pclose(commandFp); + if (status == -1) + { + std::cerr << "Error from execution of lscpu -p." + << std::endl; + return -1; + } + + return 0; +} + +/*-------------------------------------------------------------------------*/ + +int +getCpus( + const unsigned int numaNode, + std::vector& cpus +) +{ + FILE* commandFp = popen("lscpu -p", "r"); + if (commandFp == NULL) + { + std::cerr << "Error from execution of lscpu -p." + << std::endl; + return -1; + } + + char* line = NULL; + size_t len = 0; + + while (getline(&line, &len, commandFp) != -1) + { + if (line[0] == '#') + { + continue; + } + + char* token = strtok(line, ","); + unsigned int cpuParsed = atoi(token); + + /* + * The core... + */ + if (token != NULL) + { + token = strtok(NULL, ","); + } + + /* + * The socket + */ + if (token != NULL) + { + token = strtok(NULL, ","); + unsigned int numaNodeParsed = atoi(token); + + if (numaNodeParsed == numaNode) + { + cpus.push_back(cpuParsed); + } + } + } + + free(line); + + int status = pclose(commandFp); + if (status == -1) + { + std::cerr << "Error from execution of lscpu -p." + << std::endl; + return -1; + } + + return 0; +} + +/*-----------------------------------------------------------*/ + +} + +} + +/*-----------------------------------------------------------*/ + diff --git a/cputopology.hpp b/cputopology.hpp new file mode 100755 index 0000000..73f9a79 --- /dev/null +++ b/cputopology.hpp @@ -0,0 +1,150 @@ +/*--------------------------------------------------------*/ + +/* +@file +@author Ravi Reddy Manumachu +@version 1.0 +*/ + +/*-----------------------------------------------------------*/ + +#ifndef _HCL_CPUTOPOLOGY_HPP_ +#define _HCL_CPUTOPOLOGY_HPP_ + +/*-----------------------------------------------------------*/ + +namespace hcl { + +namespace topology { + +/*-----------------------------------------------------------*/ + +/** + * Returns the sibling for a CPU. + * + * @param sibling Sibling for a CPU. + * + * @return HCL_SUCCESS if the query is successful. + */ +int +getSibling( + const unsigned int cpu, + std::vector& siblings +); + +/*-----------------------------------------------------------*/ + +/** + * Returns the total number of threads per core. + * + * @param numTPerCore The number of threads per core. + * + * @return HCL_SUCCESS if the query is successful. + */ +int +getNumThreadsPerCore( + unsigned int* numTPerCore +); + +/*-----------------------------------------------------------*/ + +/** + * Returns the total number of logical cores. + * + * @param numLogicalCPUs The number of logical CPUs. + * + * @return HCL_SUCCESS if the query is successful. + */ +int +getNumLogicalCpus( + unsigned int* numLogicalCPUs +); + +/*-----------------------------------------------------------*/ + +/** + * Returns the total number of physical cores. + * + * @param numPhysicalCPUs The number of physical CPUs. + * + * @return HCL_SUCCESS if the query is successful. + */ +int +getNumPhysicalCpus( + unsigned int* numPhysicalCPUs +); + +/*-----------------------------------------------------------*/ + +/** + * Returns the number of CPUs in a NUMA node. + * + * @param numaNode The NUMA node identifier. + * @param numCPUs The number of CPUs. + * + * @return HCL_SUCCESS if the query is successful. + */ +int +getNumCpus( + const unsigned int numaNode, + unsigned int* numCPUs +); + +/*-----------------------------------------------------------*/ + +/** + * Returns the CPUs in a NUMA node. + * + * @param numaNode The NUMA node identifier. + * @param cpus The number of CPUs. + * + * @return HCL_SUCCESS if the query is successful. + */ +int +getCpus( + const unsigned int numaNode, + std::vector& cpus +); + +/*-----------------------------------------------------------*/ + +/** + * Returns the number of CPU NUMA nodes. + * + * @param numNUMAs The number of CPU NUMA nodes. + * + * @return HCL_SUCCESS if the query is successful. + */ +int +getNumCpuNumaNodes( + unsigned int* numNUMAs +); + +/*-----------------------------------------------------------*/ + +/** + * Returns the CPU NUMA node that has the cpu. + * + * @param cpu The cpu. + * @param numaNode The numa node containing the cpu returned. + * + * @return HCL_SUCCESS if the query is successful. + */ +int +getCpuNumaNode( + const unsigned int cpu, + unsigned int* numaNode +); + +/*-----------------------------------------------------------*/ + +} + +} + +/*-----------------------------------------------------------*/ + +#endif /*_HCL_CPUTOPOLOGY_HPP_ */ + +/*-------------------------------------------------------------------------*/ + diff --git a/hdpi/CMakeLists.txt b/hdpi/CMakeLists.txt new file mode 100644 index 0000000..965aa97 --- /dev/null +++ b/hdpi/CMakeLists.txt @@ -0,0 +1,44 @@ + +#-----------------------------------------------------------# + +cmake_minimum_required(VERSION 2.8 FATAL_ERROR) + +#-----------------------------------------------------------# + +project(HDPI) + +#-----------------------------------------------------------# + +set (HEADERS hdpi.h + hmpi_err.h + hmpi_partitioning.h + hmpi_partitioning_internal.h + hmpi_partitioning_matrices.h + hmpi_partitioning_sets.h + hmpi_partitioning_graphs.h + hmpi_partitioning_trees.h + hmpi_partitioning_types.h) + +#-----------------------------------------------------------# + +set (SOURCES hmpi_partitioning_sets.c + hmpi_partitioning_sets_speed_function_of_problem_size.c + hmpi_partitioning_sets_speed_function_of_problem_size_weighted_elements.c + hmpi_partitioning_sets_speed_single_numbers_weighted_elements.c + hmpi_partitioning_graphs.c + hmpi_partitioning_matrices.c) + +#-----------------------------------------------------------# + +include_directories (${PROJECT_SOURCE_DIR}) + +#-----------------------------------------------------------# + +add_library(hdpi ${HEADERS} ${SOURCES}) + +#-----------------------------------------------------------# + +install(TARGETS hdpi DESTINATION lib) +install(FILES ${HEADERS} DESTINATION include) + +#-----------------------------------------------------------# diff --git a/hdpi/hdpi.h b/hdpi/hdpi.h new file mode 100644 index 0000000..2b3b594 --- /dev/null +++ b/hdpi/hdpi.h @@ -0,0 +1,64 @@ + + /**************************************************/ + /* HeteroDPI - Heterogeneous Data */ + /* Partitioning Interface */ + /* */ + /* Revision history */ + /* 19-05-2003 -- Initial version */ + /**************************************************/ + + #ifndef __HDPI_HH + #define __HDPI_HH + + #ifdef __cplusplus + extern "C" { + #endif + + + #include "hmpi_err.h" + #include "hmpi_partitioning.h" + + /* + * sets + */ + #define Partition_unordered_set HMPI_Partition_unordered_set + #define Partition_ordered_set HMPI_Partition_ordered_set + #define Get_set_processor HMPI_Get_set_processor + #define Get_my_partition HMPI_Get_my_partition + + /* + * matrices + */ + #define Partition_matrix_2d HMPI_Partition_matrix_2d + #define Partition_matrix_1d_dp HMPI_Partition_matrix_1d_dp + #define Partition_matrix_1d_iterative HMPI_Partition_matrix_1d_iterative + #define Partition_matrix_1d_refining HMPI_Partition_matrix_1d_refining + #define Get_matrix_processor HMPI_Get_matrix_processor + #define Get_processor_2d HMPI_Get_processor_2d + #define Get_processor_1d HMPI_Get_processor_1d + #define Print_rectangle_1d HMPI_Print_rectangle_1d + #define Print_rectangle_2d HMPI_Print_rectangle_2d + #define Common_height HMPI_Common_height + #define Get_my_width HMPI_Get_my_width + #define Get_my_height HMPI_Get_my_height + #define Get_diagonal HMPI_Get_diagonal + #define Get_my_elements HMPI_Get_my_elements + #define Get_my_kk_elements HMPI_Get_my_kk_elements + + /* + * graphs + */ + #define Partition_graph HMPI_Partition_graph + #define Partition_bipartite_graph HMPI_Partition_bipartite_graph + #define Partition_hypergraph HMPI_Partition_hypergraph + + /* + * trees + */ + #define Partition_tree HMPI_Partition_tree + + #ifdef __cplusplus + } + #endif + + #endif /* __HDPI_HH */ diff --git a/hdpi/hmpi_err.h b/hdpi/hmpi_err.h new file mode 100644 index 0000000..54911c5 --- /dev/null +++ b/hdpi/hmpi_err.h @@ -0,0 +1,58 @@ + +/************************************************************************* +* * +* HeteroMPI Programming Environment * +* ================================= * +* * +* Copyright (c) 2005 Department of Computer Science, * +* University College Dublin. * +* * +* All rights reserved. We assume no responsibility for the use * +* or reliability of our software. * +* * +*************************************************************************/ + + /************************************************/ + /* Error codes for the HeteroMPI Library */ + /* */ + /* Revision history */ + /* 01-02-2002 -- Initial version */ + /************************************************/ + + #ifndef __HMPI_ERR_HH + #define __HMPI_ERR_HH + + #define MPC_OK 0 + #define MPC_ERR_NOMEM 17 + #define MPC_ERR_LAST 36 + + /* + * HMPI success and error codes + */ + #define HMPI_OK MPC_OK + #define HMPI_SUCCESS MPC_OK + #define HMPI_ERR_NOMEM MPC_ERR_NOMEM + #define HMPI_NOT_MEMBER (MPC_ERR_LAST + 1) + #define HMPI_NULL_GROUP (MPC_ERR_LAST + 2) + #define HMPI_ERR_GROUP_NOT_EXIST (MPC_ERR_LAST + 3) + #define HMPI_INVALID_GROUP (MPC_ERR_LAST + 4) + #define HMPI_INVALID_PARAMS (MPC_ERR_LAST + 5) + #define HMPI_INVALID_OPERAND_TYPE (MPC_ERR_LAST + 6) + #define HMPI_INVALID_OPERATOR (MPC_ERR_LAST + 7) + #define HMPI_ERROR_CONDITION (MPC_ERR_LAST + 8) + #define HMPI_NOT_HOST (MPC_ERR_LAST + 9) + #define HMPI_NOT_HOST_AND_NOT_FREE (MPC_ERR_LAST + 10) + #define HMPI_ERR_INTERNAL (MPC_ERR_LAST + 11) + #define HMPI_ERR_PARTITION_SET (MPC_ERR_LAST + 12) + #define HMPI_ERR_MLIMITS (MPC_ERR_LAST + 13) + #define HMPI_ERR_INVALID_DIMP (MPC_ERR_LAST + 14) + #define HMPI_ERR_PARTITION_MATRIX (MPC_ERR_LAST + 15) + #define HMPI_ERR_PARTITION_NOT_EXISTS (MPC_ERR_LAST + 16) + #define HMPI_ERR_PARTITION_GRAPH (MPC_ERR_LAST + 17) + #define HMPI_ERR_PARTITION_TREE (MPC_ERR_LAST + 19) + #define HMPI_ERR_METRIC (MPC_ERR_LAST + 20) + #define HMPI_LAST_ERROR (MPC_ERR_LAST + 21) + + #define HMPI_UNDEFINED -1 + + #endif /* __HMPI_ERR_HH */ diff --git a/hdpi/hmpi_partitioning.h b/hdpi/hmpi_partitioning.h new file mode 100644 index 0000000..553d6a8 --- /dev/null +++ b/hdpi/hmpi_partitioning.h @@ -0,0 +1,33 @@ + +/************************************************************************* +* * +* Heterogeneous Data Partitioning Interface * +* ========================================= * +* * +* Copyright (c) 2002 Department of Computer Science, * +* University College Dublin. * +* * +* All rights reserved. We assume no responsibility for the use * +* or reliability of our software. * +* * +*************************************************************************/ + + /************************************************/ + /* hmpi_partitioning - Partitioning interfaces */ + /* for the HMPI Library */ + /* */ + /* Revision history */ + /* 19-05-2003 -- Initial version */ + /************************************************/ + + #ifndef __HMPI_PARTITIONING_HH + #define __HMPI_PARTITIONING_HH + + #include "hmpi_partitioning_types.h" + #include "hmpi_partitioning_sets.h" + #include "hmpi_partitioning_internal.h" + #include "hmpi_partitioning_matrices.h" + #include "hmpi_partitioning_graphs.h" + #include "hmpi_partitioning_trees.h" + + #endif /* __HMPI_PARTITIONING_HH */ diff --git a/hdpi/hmpi_partitioning_graphs.c b/hdpi/hmpi_partitioning_graphs.c new file mode 100644 index 0000000..10b6644 --- /dev/null +++ b/hdpi/hmpi_partitioning_graphs.c @@ -0,0 +1,1780 @@ + + /************************************************/ + /* Partitioning interfaces for graphs */ + /* */ + /* Revision history */ + /* 22-04-2004 -- Initial version */ + /************************************************/ + + #include + + #include + #include + #include + + /*-----------------------------------------------------*/ + + int HMPI_Partition_graph( + int p, + int pn, + const double *speeds, + const int *psizes, + const int *mlimits, + int n, + int m, + const int *vwgt, + const int *xadj, + const int *adjacency, + const int *adjwgt, + int *vp, + int *edgecut + ) + { + int i, j, rc; + + /* + * Use the partitioning interface for Set when + * edges have no weights. + */ + if ((vwgt == NULL) + && (xadj == NULL) + && (adjacency == NULL) + && (adjwgt == NULL + ) + ) + { + rc = HMPI_Partition_set( + p, + pn, + speeds, + psizes, + mlimits, + n, + NULL, + 0, + 1, + -1, + NULL, + NULL, + vp + ); + + if (rc != HMPI_OK) + { + return rc; + } + + *edgecut = 0; + + for (i = 0; i < n; i++) + { + int owner_processor = vp[i]; + + for (j = xadj[i]; j < xadj[i+1]; j++) + { + int neighbor = adjacency[j]; + + if (vp[neighbor] != owner_processor) + { + (*edgecut)++; + } + } + } + + return HMPI_OK; + } + + /* + * Partition the graph such that the edgecut is minimal + * Edgecut is the total number of edges that straddle + * partitions. + * There is no upper bound on the number of elements + * stored by each processor. + */ + if ((speeds == NULL) + && (mlimits == NULL) + && (vwgt == NULL) + && (adjwgt == NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that the edgecut is minimal + * Edgecut is the total number of edges that straddle + * partitions. + * There is an upper bound on the number of elements + * stored by each processor. + */ + if ((speeds == NULL) + && (mlimits != NULL) + && (vwgt == NULL) + && (adjwgt == NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that the edgecut is minimal + * Edgecut is the sum of the weight of the straddling edges. + * There is no upper bound on the number of elements + * stored by each processor. + */ + if ((speeds == NULL) + && (mlimits == NULL) + && (vwgt == NULL) + && (adjwgt != NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that the edgecut is minimal + * Edgecut is the sum of the weight of the straddling edges. + * There is an upper bound on the number of elements + * stored by each processor. + */ + if ((speeds == NULL) + && (mlimits != NULL) + && (vwgt == NULL) + && (adjwgt != NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that the edgecut is minimal + * Edgecut is the total number of edges that straddle + * partitions. + * Ideally all the partitions should be equally weighted. + * There is no upper bound on the number of elements + * stored by each processor. + */ + if ((speeds == NULL) + && (mlimits == NULL) + && (vwgt != NULL) + && (adjwgt == NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that the edgecut is minimal + * Edgecut is the total number of edges that straddle + * partitions. + * Ideally all the partitions should be equally weighted. + * There is a upper bound on the number of elements + * stored by each processor. + */ + if ((speeds == NULL) + && (mlimits != NULL) + && (vwgt != NULL) + && (adjwgt == NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that the edgecut is minimal + * Edgecut is the sum of the weight of the straddling edges. + * There is an upper bound on the number of elements + * stored by each processor. + */ + if ((speeds == NULL) + && (mlimits == NULL) + && (vwgt != NULL) + && (adjwgt != NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that the edgecut is minimal + * Edgecut is the sum of the weight of the straddling edges. + * Ideally all the partitions should be equally weighted. + * There is a upper bound on the number of elements + * stored by each processor. + */ + if ((speeds == NULL) + && (mlimits != NULL) + && (vwgt != NULL) + && (adjwgt != NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The number of vertices in each partition is proportional + * to the speed of the processor owning that partition. + * (b) The edgecut is minimal. Edgecut is the total number + * of edges that straddle partitions. + * There is no upper bound on the number of elements + * stored by each processor. + */ + if ((speeds != NULL) + && (pn == 1) + && (mlimits == NULL) + && (vwgt == NULL) + && (adjwgt == NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The number of vertices in each partition is proportional + * to the speed of the processor owning that partition. + * (b) The edgecut is minimal. + * There is an upper bound on the number of elements + * stored by each processor. + */ + if ((speeds != NULL) + && (pn == 1) + && (mlimits != NULL) + && (vwgt == NULL) + && (adjwgt == NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The number of vertices in each partition is proportional + * to the speed of the processor owning that partition. + * (b) The edgecut is minimal. + * There is no upper bound on the number of elements + * stored by each processor. + * Speeds of processors are functions of problem size. + */ + if ((speeds != NULL) + && (pn > 1) + && (mlimits == NULL) + && (vwgt == NULL) + && (adjwgt == NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The number of vertices in each partition is proportional + * to the speed of the processor owning that partition. + * (b) The edgecut is minimal. + * Speeds of processors are functions of problem size. + * There is an upper bound on the number of elements + * stored by each processor. + */ + if ((speeds != NULL) + && (pn > 1) + && (mlimits != NULL) + && (vwgt == NULL) + && (adjwgt == NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The number of vertices in each partition is + * proportional to the speed of the processor + * owning that partition. + * (b) The edgecut is minimal. Edgecut is the sum + * of the weights of the straddling edges. + * There is no upper bound on the number of elements + * stored by each processor. + */ + if ((speeds != NULL) + && (pn == 1) + && (mlimits == NULL) + && (vwgt == NULL) + && (adjwgt != NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The number of vertices in each partition is + * proportional to the speed of the processor + * owning that partition. + * (b) The edgecut is minimal. Edgecut is the sum + * of the weights of the straddling edges. + * There is an upper bound on the number of elements + * stored by each processor. + */ + if ((speeds != NULL) + && (pn == 1) + && (mlimits != NULL) + && (vwgt == NULL) + && (adjwgt != NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The number of vertices in each partition is + * proportional to the speed of the processor + * owning that partition. + * (b) The edgecut is minimal. Edgecut is the sum + * of the weights of the straddling edges. + * There is no upper bound on the number of elements + * stored by each processor. + * Speeds of processors are functions of problem size. + */ + if ((speeds != NULL) + && (pn > 1) + && (mlimits == NULL) + && (vwgt == NULL) + && (adjwgt != NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The number of vertices in each partition is + * proportional to the speed of the processor + * owning that partition. + * (b) The edgecut is minimal. Edgecut is the sum + * of the weights of the straddling edges. + * There is an upper bound on the number of elements + * stored by each processor. + * Speeds of processors are functions of problem size. + */ + if ((speeds != NULL) + && (pn > 1) + && (mlimits != NULL) + && (vwgt == NULL) + && (adjwgt != NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The sum of weights of vertices in each partition is + * proportional to the speed of the processor owning that partition. + * (b) The edgecut is minimal. + * There is no upper bound on the number of elements + * stored by each processor. + */ + if ((speeds != NULL) + && (pn == 1) + && (mlimits == NULL) + && (vwgt != NULL) + && (adjwgt == NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The sum of weights of vertices in each partition is + * proportional to the speed of the processor owning that partition. + * (b) The edgecut is minimal. + * There is an upper bound on the number of elements + * stored by each processor. + */ + if ((speeds != NULL) + && (pn == 1) + && (mlimits != NULL) + && (vwgt != NULL) + && (adjwgt == NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The sum of weights of vertices in each partition is + * proportional to the speed of the processor owning that partition. + * (b) The edgecut is minimal. + * There is no upper bound on the number of elements + * stored by each processor. + * Speeds of processors are functions of problem size. + */ + if ((speeds != NULL) + && (pn > 1) + && (mlimits == NULL) + && (vwgt != NULL) + && (adjwgt == NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The sum of weights of vertices in each partition is + * proportional to the speed of the processor owning that partition. + * (b) The edgecut is minimal. + * There is an upper bound on the number of elements + * stored by each processor. + * Speeds of processors are functions of problem size. + */ + if ((speeds != NULL) + && (pn > 1) + && (mlimits != NULL) + && (vwgt != NULL) + && (adjwgt == NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The sum of weights of vertices in each partition is + * proportional to the speed of the processor owning that partition. + * (b) The edgecut is minimal. + * There is no upper bound on the number of elements + * stored by each processor. + */ + if ((speeds != NULL) + && (pn == 1) + && (mlimits == NULL) + && (vwgt != NULL) + && (adjwgt != NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The sum of weights of vertices in each partition is + * proportional to the speed of the processor owning that partition. + * (b) The edgecut is minimal. + * There is an upper bound on the number of elements + * stored by each processor. + */ + if ((speeds != NULL) + && (pn == 1) + && (mlimits != NULL) + && (vwgt != NULL) + && (adjwgt != NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The sum of weights of vertices in each partition is + * proportional to the speed of the processor owning that partition. + * (b) The edgecut is minimal. + * speeds are functions of problem size. + * There is no upper bound on the number of elements + * stored by each processor. + */ + if ((speeds != NULL) + && (pn > 1) + && (mlimits == NULL) + && (vwgt != NULL) + && (adjwgt != NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The sum of weights of vertices in each partition is + * proportional to the speed of the processor owning that partition. + * (b) The edgecut is minimal. + * speeds are functions of problem size. + * There is an upper bound on the number of elements + * stored by each processor. + */ + if ((speeds != NULL) + && (pn > 1) + && (mlimits != NULL) + && (vwgt != NULL) + && (adjwgt != NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + printf("Parameters are erroneous\n"); + return HMPI_ERR_PARTITION_GRAPH; + } + + /*-----------------------------------------------------*/ + + int HMPI_Partition_bipartite_graph( + int p, + int pn, + const double *speeds, + const int *psizes, + const int *mlimits, + int n, + int m, + const int *vtype, + const int *vwgt, + const int *xadj, + const int *adjacency, + const int *adjwgt, + int type_of_partitioning, + int *vp, + int *edgecut + ) + { + int i, j, rc; + + /* + * Use the partitioning interface for Set when + * edges have no weights. And there is no + * adjacency matrix. The two disjoint subsets are + * not considered separately. + */ + if ((vwgt == NULL) + && (xadj == NULL) + && (adjacency == NULL) + && (adjwgt == NULL) + && (type_of_partitioning == PARTITION_OTHER + ) + ) + { + rc = HMPI_Partition_set( + p, + pn, + speeds, + psizes, + mlimits, + n, + NULL, + 0, + 1, + -1, + NULL, + NULL, + vp + ); + + if (rc != HMPI_OK) + { + return rc; + } + + *edgecut = 0; + + for (i = 0; i < n; i++) + { + int owner_processor = vp[i]; + + for (j = xadj[i]; j < xadj[i+1]; j++) + { + int neighbor = adjacency[j]; + + if (vp[neighbor] != owner_processor) + { + (*edgecut)++; + } + } + } + + return HMPI_OK; + } + + /* + * Use the partitioning interface for Set when + * edges have no weights. And there is no + * adjacency matrix. The two disjoint subsets are + * considered separately. + */ + if ((vwgt == NULL) + && (xadj == NULL) + && (adjacency == NULL) + && (adjwgt == NULL) + && (type_of_partitioning == PARTITION_SUBSET + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + if (type_of_partitioning == PARTITION_OTHER) + { + return HMPI_Partition_graph( + p, + pn, + speeds, + psizes, + mlimits, + n, + m, + vwgt, + xadj, + adjacency, + adjwgt, + vp, + edgecut + ); + } + + /* + * Partition the graph such that the edgecut is minimal + * Edgecut is the total number of edges that straddle + * partitions. + * There is no upper bound on the number of elements + * stored by each processor. + * The number of vertices in each partition in each subset + * should be the same. + */ + if ((speeds == NULL) + && (mlimits == NULL) + && (vwgt == NULL) + && (adjwgt == NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that the edgecut is minimal + * Edgecut is the total number of edges that straddle + * partitions. + * There is an upper bound on the number of elements + * stored by each processor. + * The number of vertices in each partition in each subset + * should be the same. + */ + if ((speeds == NULL) + && (mlimits != NULL) + && (vwgt == NULL) + && (adjwgt == NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that the edgecut is minimal + * Edgecut is the sum of the weight of the straddling edges. + * There is no upper bound on the number of elements + * stored by each processor. + * The number of vertices in each partition in each subset + * should be the same. + */ + if ((speeds == NULL) + && (mlimits == NULL) + && (vwgt == NULL) + && (adjwgt != NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that the edgecut is minimal + * Edgecut is the sum of the weight of the straddling edges. + * There is an upper bound on the number of elements + * stored by each processor. + * The number of vertices in each partition in each subset + * should be the same. + */ + if ((speeds == NULL) + && (mlimits != NULL) + && (vwgt == NULL) + && (adjwgt != NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that the edgecut is minimal + * Edgecut is the total number of edges that straddle + * partitions. + * Ideally all the partitions in each subset should + * be equally weighted. + * There is no upper bound on the number of elements + * stored by each processor. + */ + if ((speeds == NULL) + && (mlimits == NULL) + && (vwgt != NULL) + && (adjwgt == NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that the edgecut is minimal + * Edgecut is the total number of edges that straddle + * partitions. + * There is a upper bound on the number of elements + * stored by each processor. + * Ideally all the partitions in each subset should + * be equally weighted. + */ + if ((speeds == NULL) + && (mlimits != NULL) + && (vwgt != NULL) + && (adjwgt == NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that the edgecut is minimal + * Edgecut is the sum of the weight of the straddling edges. + * There is an upper bound on the number of elements + * stored by each processor. + * Ideally all the partitions in each subset should + * be equally weighted. + */ + if ((speeds == NULL) + && (mlimits == NULL) + && (vwgt != NULL) + && (adjwgt != NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that the edgecut is minimal + * Edgecut is the sum of the weight of the straddling edges. + * There is a upper bound on the number of elements + * stored by each processor. + * Ideally all the partitions in each subset should + * be equally weighted. + */ + if ((speeds == NULL) + && (mlimits != NULL) + && (vwgt != NULL) + && (adjwgt != NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The number of vertices in each partition in each subset + * is proportional to the speed of the processor + * owning that partition. + * (b) The edgecut is minimal. Edgecut is the total number + * of edges that straddle partitions. + * There is no upper bound on the number of elements + * stored by each processor. + */ + if ((speeds != NULL) + && (pn == 1) + && (mlimits == NULL) + && (vwgt == NULL) + && (adjwgt == NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The number of vertices in each partition in each subset + * is proportional to the speed of the processor + * owning that partition. + * (b) The edgecut is minimal. Edgecut is the total number + * of edges that straddle partitions. + * There is an upper bound on the number of elements + * stored by each processor. + */ + if ((speeds != NULL) + && (pn == 1) + && (mlimits != NULL) + && (vwgt == NULL) + && (adjwgt == NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The number of vertices in each partition in each subset + * is proportional to the speed of the processor + * owning that partition. + * (b) The edgecut is minimal. Edgecut is the total number + * of edges that straddle partitions. + * There is no upper bound on the number of elements + * stored by each processor. + * Speeds of processors are functions of problem size. + */ + if ((speeds != NULL) + && (pn > 1) + && (mlimits == NULL) + && (vwgt == NULL) + && (adjwgt == NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The number of vertices in each partition in each subset + * is proportional to the speed of the processor + * owning that partition. + * (b) The edgecut is minimal. Edgecut is the total number + * of edges that straddle partitions. + * Speeds of processors are functions of problem size. + * There is an upper bound on the number of elements + * stored by each processor. + */ + if ((speeds != NULL) + && (pn > 1) + && (mlimits != NULL) + && (vwgt == NULL) + && (adjwgt == NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The number of vertices in each partition in each subset + * is proportional to the speed of the processor + * owning that partition. + * (b) The edgecut is minimal. Edgecut is the sum + * of the weights of the straddling edges. + * There is no upper bound on the number of elements + * stored by each processor. + */ + if ((speeds != NULL) + && (pn == 1) + && (mlimits == NULL) + && (vwgt == NULL) + && (adjwgt != NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The number of vertices in each partition is + * in each subset is proportional to the speed + * of the processor owning that partition. + * (b) The edgecut is minimal. Edgecut is the sum + * of the weights of the straddling edges. + * There is an upper bound on the number of elements + * stored by each processor. + */ + if ((speeds != NULL) + && (pn == 1) + && (mlimits != NULL) + && (vwgt == NULL) + && (adjwgt != NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The number of vertices in each partition in each + * subset is proportional to the speed of the processor + * owning that partition. + * (b) The edgecut is minimal. Edgecut is the sum + * of the weights of the straddling edges. + * There is no upper bound on the number of elements + * stored by each processor. + * Speeds of processors are functions of problem size. + */ + if ((speeds != NULL) + && (pn > 1) + && (mlimits == NULL) + && (vwgt == NULL) + && (adjwgt != NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The number of vertices in each partition in each + * subset is proportional to the speed of the processor + * owning that partition. + * (b) The edgecut is minimal. Edgecut is the sum + * of the weights of the straddling edges. + * There is an upper bound on the number of elements + * stored by each processor. + * Speeds of processors are functions of problem size. + */ + if ((speeds != NULL) + && (pn > 1) + && (mlimits != NULL) + && (vwgt == NULL) + && (adjwgt != NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The sum of weights of vertices in each partition in each + * subset is proportional to the speed of the processor + * owning that partition. + * (b) The edgecut is minimal. + * There is no upper bound on the number of elements + * stored by each processor. + */ + if ((speeds != NULL) + && (pn == 1) + && (mlimits == NULL) + && (vwgt != NULL) + && (adjwgt == NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The sum of weights of vertices in each partition + * in each subset is proportional to the speed of + * the processor owning that partition. + * (b) The edgecut is minimal. + * There is an upper bound on the number of elements + * stored by each processor. + */ + if ((speeds != NULL) + && (pn == 1) + && (mlimits != NULL) + && (vwgt != NULL) + && (adjwgt == NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The sum of weights of vertices in each partition in each + * subset is proportional to the speed of the processor + * owning that partition. + * (b) The edgecut is minimal. + * There is no upper bound on the number of elements + * stored by each processor. + * Speeds of processors are functions of problem size. + */ + if ((speeds != NULL) + && (pn > 1) + && (mlimits == NULL) + && (vwgt != NULL) + && (adjwgt == NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The sum of weights of vertices in each partition in each + * subset is proportional to the speed of the processor + * owning that partition. + * (b) The edgecut is minimal. + * There is an upper bound on the number of elements + * stored by each processor. + * Speeds of processors are functions of problem size. + */ + if ((speeds != NULL) + && (pn > 1) + && (mlimits != NULL) + && (vwgt != NULL) + && (adjwgt == NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The sum of weights of vertices in each partition is + * proportional to the speed of the processor owning + * that partition. + * (b) The edgecut is minimal. + * There is no upper bound on the number of elements + * stored by each processor. + */ + if ((speeds != NULL) + && (pn == 1) + && (mlimits == NULL) + && (vwgt != NULL) + && (adjwgt != NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The sum of weights of vertices in each partition is + * proportional to the speed of the processor + * owning that partition. + * (b) The edgecut is minimal. + * There is an upper bound on the number of elements + * stored by each processor. + */ + if ((speeds != NULL) + && (pn == 1) + && (mlimits != NULL) + && (vwgt != NULL) + && (adjwgt != NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The sum of weights of vertices in each partition is + * proportional to the speed of the processor owning that partition. + * (b) The edgecut is minimal. + * speeds are functions of problem size. + * There is no upper bound on the number of elements + * stored by each processor. + */ + if ((speeds != NULL) + && (pn > 1) + && (mlimits == NULL) + && (vwgt != NULL) + && (adjwgt != NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The sum of weights of vertices in each partition is + * proportional to the speed of the processor owning that partition. + * (b) The edgecut is minimal. + * speeds are functions of problem size. + * There is an upper bound on the number of elements + * stored by each processor. + */ + if ((speeds != NULL) + && (pn > 1) + && (mlimits != NULL) + && (vwgt != NULL) + && (adjwgt != NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + printf("Parameters are erroneous\n"); + return HMPI_ERR_PARTITION_GRAPH; + } + + /*-----------------------------------------------------*/ + + /* + * There are two methods provided by hMETIS to partition + * a hypergraph. + * One using multilevel recursive bisection and the other + * using multilevel k-way partitioning + * hMETIS provides options to define the quality criteria + * that can be used for partitioning. + * We use default options for the present but however when + * hMETIS is integrated with HMPI, application programmers + * will be allowed to choose the options. + * + */ + int HMPI_Partition_hypergraph( + int p, + int pn, + const double *speeds, + const int *psizes, + const int *mlimits, + int nv, + int nedges, + const int *vwgt, + const int *hptr, + const int *hind, + const int *hwgt, + int *vp, + int *edgecut + ) + { + int i, j, rc; + + /* + * Use the partitioning interface for Set when + * edges have no weights. + */ + if ((vwgt == NULL) + && (hptr == NULL) + && (hind == NULL) + && (hwgt == NULL + ) + ) + { + rc = HMPI_Partition_set( + p, + pn, + speeds, + psizes, + mlimits, + nv, + NULL, + 0, + 1, + -1, + NULL, + NULL, + vp + ); + + if (rc != HMPI_OK) + { + return rc; + } + + *edgecut = 0; + + for (i = 0; i < nedges; i++) + { + int owner_processor, ix = 0; + int *vertex = (int*)malloc( + sizeof(int) + * + (hptr[i+1] - hptr[i]) + ); + + if (vertex == NULL) + { + return MPC_ERR_NOMEM; + } + + for (j = hptr[i]; j < hptr[i+1]; j++) + { + vertex[ix++] = hind[j]; + } + + owner_processor = vp[vertex[0]]; + + for (j = 1; j < (hptr[i+1] - hptr[i]); j++) + { + if (vp[vertex[j]] != owner_processor) + { + (*edgecut)++; + } + } + + free(vertex); + } + + return HMPI_OK; + } + + /* + * Partition the graph such that the edgecut is minimal + * Edgecut is the total number of edges that straddle + * partitions. + * There is no upper bound on the number of elements + * stored by each processor. + */ + if ((speeds == NULL) + && (mlimits == NULL) + && (vwgt == NULL) + && (hwgt == NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that the edgecut is minimal + * Edgecut is the total number of edges that straddle + * partitions. + * There is an upper bound on the number of elements + * stored by each processor. + */ + if ((speeds == NULL) + && (mlimits != NULL) + && (vwgt == NULL) + && (hwgt == NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that the edgecut is minimal + * Edgecut is the sum of the weight of the straddling edges. + * There is no upper bound on the number of elements + * stored by each processor. + */ + if ((speeds == NULL) + && (mlimits == NULL) + && (vwgt == NULL) + && (hwgt != NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that the edgecut is minimal + * Edgecut is the sum of the weight of the straddling edges. + * There is an upper bound on the number of elements + * stored by each processor. + */ + if ((speeds == NULL) + && (mlimits != NULL) + && (vwgt == NULL) + && (hwgt != NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that the edgecut is minimal + * Edgecut is the total number of edges that straddle + * partitions. + * Ideally all the partitions should be equally weighted. + * There is no upper bound on the number of elements + * stored by each processor. + */ + if ((speeds == NULL) + && (mlimits == NULL) + && (vwgt != NULL) + && (hwgt == NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that the edgecut is minimal + * Edgecut is the total number of edges that straddle + * partitions. + * Ideally all the partitions should be equally weighted. + * There is a upper bound on the number of elements + * stored by each processor. + */ + if ((speeds == NULL) + && (mlimits != NULL) + && (vwgt != NULL) + && (hwgt == NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that the edgecut is minimal + * Edgecut is the sum of the weight of the straddling edges. + * There is an upper bound on the number of elements + * stored by each processor. + */ + if ((speeds == NULL) + && (mlimits == NULL) + && (vwgt != NULL) + && (hwgt != NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that the edgecut is minimal + * Edgecut is the sum of the weight of the straddling edges. + * Ideally all the partitions should be equally weighted. + * There is a upper bound on the number of elements + * stored by each processor. + */ + if ((speeds == NULL) + && (mlimits != NULL) + && (vwgt != NULL) + && (hwgt != NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The number of vertices in each partition is proportional + * to the speed of the processor owning that partition. + * (b) The edgecut is minimal. Edgecut is the total number + * of edges that straddle partitions. + * There is no upper bound on the number of elements + * stored by each processor. + */ + if ((speeds != NULL) + && (pn == 1) + && (mlimits == NULL) + && (vwgt == NULL) + && (hwgt == NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The number of vertices in each partition is proportional + * to the speed of the processor owning that partition. + * (b) The edgecut is minimal. + * There is an upper bound on the number of elements + * stored by each processor. + */ + if ((speeds != NULL) + && (pn == 1) + && (mlimits != NULL) + && (vwgt == NULL) + && (hwgt == NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The number of vertices in each partition is proportional + * to the speed of the processor owning that partition. + * (b) The edgecut is minimal. + * There is no upper bound on the number of elements + * stored by each processor. + * Speeds of processors are functions of problem size. + */ + if ((speeds != NULL) + && (pn > 1) + && (mlimits == NULL) + && (vwgt == NULL) + && (hwgt == NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The number of vertices in each partition is proportional + * to the speed of the processor owning that partition. + * (b) The edgecut is minimal. + * Speeds of processors are functions of problem size. + * There is an upper bound on the number of elements + * stored by each processor. + */ + if ((speeds != NULL) + && (pn > 1) + && (mlimits != NULL) + && (vwgt == NULL) + && (hwgt == NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The number of vertices in each partition is + * proportional to the speed of the processor + * owning that partition. + * (b) The edgecut is minimal. Edgecut is the sum + * of the weights of the straddling edges. + * There is no upper bound on the number of elements + * stored by each processor. + */ + if ((speeds != NULL) + && (pn == 1) + && (mlimits == NULL) + && (vwgt == NULL) + && (hwgt != NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The number of vertices in each partition is + * proportional to the speed of the processor + * owning that partition. + * (b) The edgecut is minimal. Edgecut is the sum + * of the weights of the straddling edges. + * There is an upper bound on the number of elements + * stored by each processor. + */ + if ((speeds != NULL) + && (pn == 1) + && (mlimits != NULL) + && (vwgt == NULL) + && (hwgt != NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The number of vertices in each partition is + * proportional to the speed of the processor + * owning that partition. + * (b) The edgecut is minimal. Edgecut is the sum + * of the weights of the straddling edges. + * There is no upper bound on the number of elements + * stored by each processor. + * Speeds of processors are functions of problem size. + */ + if ((speeds != NULL) + && (pn > 1) + && (mlimits == NULL) + && (vwgt == NULL) + && (hwgt != NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The number of vertices in each partition is + * proportional to the speed of the processor + * owning that partition. + * (b) The edgecut is minimal. Edgecut is the sum + * of the weights of the straddling edges. + * There is an upper bound on the number of elements + * stored by each processor. + * Speeds of processors are functions of problem size. + */ + if ((speeds != NULL) + && (pn > 1) + && (mlimits != NULL) + && (vwgt == NULL) + && (hwgt != NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The sum of weights of vertices in each partition is + * proportional to the speed of the processor owning that partition. + * (b) The edgecut is minimal. + * There is no upper bound on the number of elements + * stored by each processor. + */ + if ((speeds != NULL) + && (pn == 1) + && (mlimits == NULL) + && (vwgt != NULL) + && (hwgt == NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The sum of weights of vertices in each partition is + * proportional to the speed of the processor owning that partition. + * (b) The edgecut is minimal. + * There is an upper bound on the number of elements + * stored by each processor. + */ + if ((speeds != NULL) + && (pn == 1) + && (mlimits != NULL) + && (vwgt != NULL) + && (hwgt == NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The sum of weights of vertices in each partition is + * proportional to the speed of the processor owning that partition. + * (b) The edgecut is minimal. + * There is no upper bound on the number of elements + * stored by each processor. + * Speeds of processors are functions of problem size. + */ + if ((speeds != NULL) + && (pn > 1) + && (mlimits == NULL) + && (vwgt != NULL) + && (hwgt == NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The sum of weights of vertices in each partition is + * proportional to the speed of the processor owning that partition. + * (b) The edgecut is minimal. + * There is an upper bound on the number of elements + * stored by each processor. + * Speeds of processors are functions of problem size. + */ + if ((speeds != NULL) + && (pn > 1) + && (mlimits != NULL) + && (vwgt != NULL) + && (hwgt == NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The sum of weights of vertices in each partition is + * proportional to the speed of the processor owning that partition. + * (b) The edgecut is minimal. + * There is no upper bound on the number of elements + * stored by each processor. + */ + if ((speeds != NULL) + && (pn == 1) + && (mlimits == NULL) + && (vwgt != NULL) + && (hwgt != NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The sum of weights of vertices in each partition is + * proportional to the speed of the processor owning that partition. + * (b) The edgecut is minimal. + * There is an upper bound on the number of elements + * stored by each processor. + */ + if ((speeds != NULL) + && (pn == 1) + && (mlimits != NULL) + && (vwgt != NULL) + && (hwgt != NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The sum of weights of vertices in each partition is + * proportional to the speed of the processor owning that partition. + * (b) The edgecut is minimal. + * speeds are functions of problem size. + * There is no upper bound on the number of elements + * stored by each processor. + */ + if ((speeds != NULL) + && (pn > 1) + && (mlimits == NULL) + && (vwgt != NULL) + && (hwgt != NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + /* + * Partition the graph such that + * (a) The sum of weights of vertices in each partition is + * proportional to the speed of the processor owning that partition. + * (b) The edgecut is minimal. + * speeds are functions of problem size. + * There is an upper bound on the number of elements + * stored by each processor. + */ + if ((speeds != NULL) + && (pn > 1) + && (mlimits != NULL) + && (vwgt != NULL) + && (hwgt != NULL + ) + ) + { + printf("Implementation currently not available\n"); + return HMPI_OK; + } + + printf("Parameters are erroneous\n"); + return HMPI_ERR_PARTITION_GRAPH; + } + + /*-----------------------------------------------------*/ diff --git a/hdpi/hmpi_partitioning_graphs.h b/hdpi/hmpi_partitioning_graphs.h new file mode 100644 index 0000000..4b51900 --- /dev/null +++ b/hdpi/hmpi_partitioning_graphs.h @@ -0,0 +1,78 @@ + +/************************************************************************* +* * +* Heterogeneous Data Partitioning Interface * +* ========================================= * +* * +* Copyright (c) 2002 Department of Computer Science, * +* University College Dublin. * +* * +* All rights reserved. We assume no responsibility for the use * +* or reliability of our software. * +* * +*************************************************************************/ + + /************************************************/ + /* Partitioning interfaces for graphs */ + /* */ + /* Revision history */ + /* 19-05-2003 -- Initial version */ + /************************************************/ + + #ifndef __HMPI_PARTITIONING_GRAPHS_HH + #define __HMPI_PARTITIONING_GRAPHS_HH + + #define PARTITION_SUBSET 1 + #define PARTITION_OTHER 2 + + int HMPI_Partition_graph( + int p, + int pn, + const double *speeds, + const int *psizes, + const int *mlimits, + int n, + int m, + const int *vwgt, + const int *xadj, + const int *adjacency, + const int *adjwgt, + int *vp, + int *edgecut + ); + + int HMPI_Partition_bipartite_graph( + int p, + int pn, + const double *speeds, + const int *psizes, + const int *mlimits, + int n, + int m, + const int *vtype, + const int *vwgt, + const int *xadj, + const int *adjacency, + const int *adjwgt, + int type_of_partitioning, + int *vp, + int *edgecut + ); + + int HMPI_Partition_hypergraph( + int p, + int pn, + const double *speeds, + const int *psizes, + const int *mlimits, + int nv, + int nedges, + const int *vwgt, + const int *hptr, + const int *hind, + const int *hwgt, + int *vp, + int *edgecut + ); + + #endif /* __HMPI_PARTITIONING_GRAPHS_HH */ diff --git a/hdpi/hmpi_partitioning_internal.h b/hdpi/hmpi_partitioning_internal.h new file mode 100644 index 0000000..7e31075 --- /dev/null +++ b/hdpi/hmpi_partitioning_internal.h @@ -0,0 +1,284 @@ +/************************************************************************* +* * +* Heterogeneous Data Partitioning Interface * +* ========================================= * +* * +* Copyright (c) 2002 Department of Computer Science, * +* University College Dublin. * +* * +* All rights reserved. We assume no responsibility for the use * +* or reliability of our software. * +* * +*************************************************************************/ + + /************************************************/ + /* partitioning internal interfaces */ + /* Revision history */ + /* 19-05-2003 -- Initial version */ + /************************************************/ + + #ifndef __HMPI_PARTITIONING_INTERNAL_HH + #define __HMPI_PARTITIONING_INTERNAL_HH + + int __HMPI_Homogeneous_distribution_with_mlimits + ( + int p, + int n, + const int* mlimits, + int *np + ); + + int __HMPI_Homogeneous_distribution_with_mlimits_and_weights_ordered_sets + ( + int p, + int n, + const int* mlimits, + const int* w, + int *np + ); + + int __HMPI_Partition_set_homogeneous + ( + int p, + const int *mlimits, + int n, + const int *w, + int ordering, + int processor_ordering, + int type_of_metric, + User_defined_metric umf, + double *metric, + int *np + ); + + int __HMPI_Speeds_are_single_numbers_with_mlimits + ( + int p, + const double *speeds, + const int *bounds, + int n, + int *np + ); + + int __HMPI_Number_of_elements_proportional_to_speed + ( + int p, + int n, + const double *speeds, + int *allocations + ); + + double __HMPI_System_defined_metric + ( + int p, + const double *speeds, + const int *actual, + const int *ideal + ); + + int __HMPI_Size_of_bins + ( + int p, + int n, + const double *speeds, + const int *w, + int *wallocations, + int *tsum + ); + + int __HMPI_Sum_of_weights_for_ordered_set + ( + int p, + int n, + const double *speeds, + const int *w, + int type_of_metric, + User_defined_metric umf, + double *metric, + int *np + ); + + int __HMPI_Apply_mlimits_to_ordered_sum_of_weights + ( + int p, + int n, + const double *speeds, + const int *mlimits, + const int *w, + int type_of_metric, + User_defined_metric umf, + double *metric, + int *np + ); + + int __HMPI_Sum_of_weights_for_nonordered_set + ( + int p, + int n, + const double *speeds, + const int *w, + int type_of_metric, + User_defined_metric umf, + double *metric, + int *np + ); + + int __HMPI_Apply_mlimits_to_unordered_sum_of_weights_algo_2 + ( + int p, + int n, + const double *speeds, + const int *mlimits, + const int *w, + int type_of_metric, + User_defined_metric umf, + double *metric, + int *np + ); + + int __HMPI_Apply_mlimits_to_unordered_sum_of_weights + ( + int p, + int n, + const double *speeds, + const int *mlimits, + const int *w, + int type_of_metric, + User_defined_metric umf, + double *metric, + int *np + ); + + int + __HMPI_Distribute_with_single_number_for_speed + ( + int n, + int p, + const double *s, + double *npd + ); + + int __HMPI_Recursive_bisection_middle_region + ( + int p, + int pn, + const double *speeds, + const int *psizes, + int n, + double slopei, + double slopef, + double *speeds_opt, + double *npd + ); + + int __HMPI_Speed_function_of_problem_size_with_mlimits + ( + int p, + int pn, + const double *speeds, + const int *psizes, + const int *bounds, + int n, + double *speeds_opt, + int *np + ); + + int __HMPI_Speed_function_of_problem_size + ( + int p, + int pn, + const double *speeds, + const int *psizes, + int n, + double *speeds_opt, + int *np + ); + + int __HMPI_Sum_of_weights_for_nonordered_set_speed_functions + ( + int p, + int pn, + const double *speeds, + const int *psizes, + int n, + const int *w, + int type_of_metric, + User_defined_metric umf, + double *metric, + int *np + ); + + int __HMPI_Sum_of_weights_for_nonordered_set_speed_functions_with_mlimits + ( + int p, + int pn, + const double *speeds, + const int *psizes, + const int *mlimits, + int n, + const int *w, + int type_of_metric, + User_defined_metric umf, + double *metric, + int *np + ); + + int __HMPI_Sum_of_weights_for_ordered_set_speed_functions + ( + int p, + int pn, + const double *speeds, + const int *psizes, + int n, + const int *w, + int type_of_metric, + User_defined_metric umf, + double *metric, + int *np + ); + + int __HMPI_Sum_of_weights_for_ordered_set_speed_functions_with_mlimits + ( + int p, + int pn, + const double *speeds, + const int *psizes, + const int *mlimits, + int n, + const int *w, + int type_of_metric, + User_defined_metric umf, + double *metric, + int *np + ); + + int __HMPI_Sum_of_weights_for_ordered_set_speed_functions_processor_reordering + ( + int p, + int pn, + const double *speeds, + const int *psizes, + int n, + const int *w, + int type_of_metric, + User_defined_metric umf, + double *metric, + int *np + ); + + int __HMPI_Sum_of_weights_for_ordered_set_speed_functions_processor_reordering_with_mlimits + ( + int p, + int pn, + const double *speeds, + const int *psizes, + const int *mlimits, + int n, + const int *w, + int type_of_metric, + User_defined_metric umf, + double *metric, + int *np + ); + + #endif diff --git a/hdpi/hmpi_partitioning_matrices.c b/hdpi/hmpi_partitioning_matrices.c new file mode 100644 index 0000000..66d2736 --- /dev/null +++ b/hdpi/hmpi_partitioning_matrices.c @@ -0,0 +1,5190 @@ + + /************************************************/ + /* Implementation of Partitioning interfaces */ + /* for matrices */ + /* */ + /* Revision history */ + /* 23-05-2003 -- Initial version */ + /************************************************/ + + #include + #include + #include + + #include + #include + + static int HMPI_Debug_flag = 0; + + #ifndef min + #define min(x, y) ((x < y) ? x : y) + #endif + + /*-----------------------------------------------------*/ + + int HMPI_Create_rectangles_1d_recursive( + int p, + const int *row_np, + const int *column_np, + int *w, + int *h, + int *trow, + int *tcolumn + ) + { + } + + /*-----------------------------------------------------*/ + + int HMPI_Create_rectangles_1d( + int p, + int q, + int m, + int n, + const int *row_np, + const int *column_np, + int *w, + int *h, + int *trow, + int *tcolumn, + int *ci + ) + { + int i, j, k, x, y; + + for (i = 0; i < p; i++) + { + for (j = 0; j < q; j++) + { + w[HMPI_RECT_INDEX(i, j, i, j, p, q)] = column_np[i*q+j]; + } + } + + for (i = 0; i < p; i++) + { + for (j = 0; j < q; j++) + { + h[HMPI_RECT_INDEX(i, j, i, j, p, q)] = row_np[i*q+j]; + } + } + + for (i = 0; i < p; i++) + { + for (j = 0; j < q; j++) + { + trow[i*q+j] = 0; + for (k = 0; k < i; k++) + { + trow[i*q+j] += h[HMPI_RECT_INDEX(k, j, k, j, p, q)]; + } + } + } + + for (j = 0; j < q; j++) + { + for (i = 0; i < p; i++) + { + tcolumn[i*q+j] = 0; + for (k = 0; k < j; k++) + { + tcolumn[i*q+j] += w[HMPI_RECT_INDEX(i, k, i, k, p, q)]; + } + } + } + + for (i = 0; i < p; i++) + { + for (j = 0; j < q; j++) + { + for (x = 0; x < p; x++) + { + for (y = 0; y < q; y++) + { + int width = HMPI_Common_height( + tcolumn[i*q+j], + (tcolumn[i*q+j] + + + w[HMPI_RECT_INDEX(i, j, i, j, p, q)] + ), + tcolumn[x*q+y], + (tcolumn[x*q+y] + + + w[HMPI_RECT_INDEX(x, y, x, y, p, q)] + ) + ); + + w[HMPI_RECT_INDEX(i, j, x, y, p, q)] = width; + } + } + } + } + + for (i = 0; i < p; i++) + { + for (j = 0; j < q; j++) + { + for (x = 0; x < p; x++) + { + for (y = 0; y < q; y++) + { + int height = HMPI_Common_height( + trow[i*q+j], + (trow[i*q+j] + + + h[HMPI_RECT_INDEX(i, j, i, j, p, q)] + ), + trow[x*q+y], + (trow[x*q+y] + + + h[HMPI_RECT_INDEX(x, y, x, y, p, q)] + ) + ); + + h[HMPI_RECT_INDEX(i, j, x, y, p, q)] = height; + } + } + } + } + + if (ci == NULL) + { + return HMPI_OK; + } + + for (x = 0; x < p; x++) + { + for (y = 0; y < q; y++) + { + int hi = h[HMPI_RECT_INDEX(x, y, x, y, p, q)]; + int wi = w[HMPI_RECT_INDEX(x, y, x, y, p, q)]; + int tr = trow[x*q+y]; + int tc = tcolumn[x*q+y]; + + for (i = 0; i < hi; i++) + { + for (j = 0; j < wi; j++) + { + ci[(tr+i)*n+tc+j] = x*q+y; + } + } + } + } + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int HMPI_Create_rectangles_2d + ( + int p, + int q, + int m, + int n, + const int *row_np, + const int *column_np, + int *w, + int *h, + int *trow, + int *tcolumn, + int *ci, + int *cj + ) + { + int i, j, k, x, y; + + for (i = 0; i < p; i++) + { + for (j = 0; j < q; j++) + { + w[HMPI_RECT_INDEX(i, j, i, j, p, q)] = column_np[i*q+j]; + } + } + + for (i = 0; i < p; i++) + { + for (j = 0; j < q; j++) + { + h[HMPI_RECT_INDEX(i, j, i, j, p, q)] = row_np[i*q+j]; + } + } + + for (i = 0; i < p; i++) + { + for (j = 0; j < q; j++) + { + trow[i*q+j] = 0; + for (k = 0; k < i; k++) + { + trow[i*q+j] += h[HMPI_RECT_INDEX(k, j, k, j, p, q)]; + } + } + } + + for (j = 0; j < q; j++) + { + for (i = 0; i < p; i++) + { + tcolumn[i*q+j] = 0; + for (k = 0; k < j; k++) + { + tcolumn[i*q+j] += w[HMPI_RECT_INDEX(i, k, i, k, p, q)]; + } + } + } + + for (i = 0; i < p; i++) + { + for (j = 0; j < q; j++) + { + for (x = 0; x < p; x++) + { + for (y = 0; y < q; y++) + { + int width = HMPI_Common_height( + tcolumn[i*q+j], + (tcolumn[i*q+j] + + + w[HMPI_RECT_INDEX(i, j, i, j, p, q)] + ), + tcolumn[x*q+y], + (tcolumn[x*q+y] + + + w[HMPI_RECT_INDEX(x, y, x, y, p, q)] + ) + ); + + w[HMPI_RECT_INDEX(i, j, x, y, p, q)] = width; + } + } + } + } + + for (i = 0; i < p; i++) + { + for (j = 0; j < q; j++) + { + for (x = 0; x < p; x++) + { + for (y = 0; y < q; y++) + { + int height = HMPI_Common_height( + trow[i*q+j], + (trow[i*q+j] + + + h[HMPI_RECT_INDEX(i, j, i, j, p, q)] + ), + trow[x*q+y], + (trow[x*q+y] + + + h[HMPI_RECT_INDEX(x, y, x, y, p, q)] + ) + ); + + h[HMPI_RECT_INDEX(i, j, x, y, p, q)] = height; + } + } + } + } + + if ((ci == NULL) + && (cj == NULL + ) + ) + { + return HMPI_OK; + } + + for (x = 0; x < p; x++) + { + for (y = 0; y < q; y++) + { + int hi = h[HMPI_RECT_INDEX(x, y, x, y, p, q)]; + int wi = w[HMPI_RECT_INDEX(x, y, x, y, p, q)]; + int tr = trow[x*q+y]; + int tc = tcolumn[x*q+y]; + + for (i = 0; i < hi; i++) + { + for (j = 0; j < wi; j++) + { + ci[(tr+i)*n+tc+j] = x; + cj[(tr+i)*n+tc+j] = y; + } + } + } + } + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int _HMPI_Factor( + int divisor, + int* quotient, + int **factors, + int *numf, + int *max_n_of_factors + ) + { + int i; + + while (((*quotient)%divisor) == 0) + { + (*factors)[(*numf)++] = divisor; + + /* + * Copy and enlarge the array + */ + if ((*numf) >= (*max_n_of_factors)) + { + int tempmax; + int *tempf = (int*)malloc( + sizeof(int) + * + (*max_n_of_factors) + ); + + if (tempf == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < (*max_n_of_factors); i++) + { + tempf[i] = (*factors)[i]; + } + + free(factors[0]); + + tempmax = (*max_n_of_factors); + (*max_n_of_factors) *= 2; + + factors[0] = (int*)malloc( + sizeof(int) + * + (*max_n_of_factors) + ); + + if (factors[0] == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < tempmax; i++) + { + (*factors)[i] = tempf[i]; + } + + free(tempf); + } + + (*quotient) /= divisor; + } + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int _HMPI_Get_factors( + int n, + int *numf, + int **result + ) + { + int rc; + int quotient = n; + int divisor, maxDivisor; + int max_n_of_factors = 2; + + *numf = 0; + + result[0] = (int*)malloc( + sizeof(int) + * + max_n_of_factors + ); + + if (result[0] == NULL) + { + return MPC_ERR_NOMEM; + } + + // + // Try special cases of 2 and 3 + rc = _HMPI_Factor( + 2, + "ient, + result, + numf, + &max_n_of_factors + ); + + if (rc != HMPI_OK) + { + return rc; + } + + rc = _HMPI_Factor( + 3, + "ient, + result, + numf, + &max_n_of_factors + ); + + if (rc != HMPI_OK) + { + return rc; + } + + // + // Try pairs of the form 6m-1 and 6m+1 + // (i.e. 5, 7, 11, 13, 17, 19, . .) + maxDivisor = sqrt(quotient); + for (divisor = 5; divisor <= maxDivisor; divisor+=6) + { + rc = _HMPI_Factor( + divisor, + "ient, + result, + numf, + &max_n_of_factors + ); + + if (rc != HMPI_OK) + { + return rc; + } + + rc = _HMPI_Factor( + divisor+2, + "ient, + result, + numf, + &max_n_of_factors + ); + + if (rc != HMPI_OK) + { + return rc; + } + } + + // store final factor + if (quotient > 1) + { + (*result)[(*numf)++] = quotient; + } + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int HMPI_Homogeneous_matrix_1d_no_mlimits + ( + int p, + int m, + int n, + int *w, + int *h, + int *trow, + int *tcol, + int *ci + ) + { + int *row_np, *column_np; + + /* + * This could be a open problem. + * The first question is can we use a + * 1D array as a logical 2D array of 1*p + * processes or p*1 processes. + * If not, what grid to use. Does it matter. + * We find all factors of p and form a + * grid. May not be a optimal one. + */ + int i, j; + int f1 = 1; + int f2 = 1; + + f1 = sqrt(p); + f2 = sqrt(p); + + if ((f1*f2) != p) + { + int numf, rc; + int **result = (int**)malloc( + sizeof(int*) + ); + + if (result == NULL) + { + return MPC_ERR_NOMEM; + } + + rc = _HMPI_Get_factors( + p, + &numf, + result + ); + + if (rc != HMPI_OK) + { + return rc; + } + + for (f1 = 1, j = 0; j < numf; j+=2) + { + f1 *= (*result)[j]; + } + + for (f2 = 1, j = 1; j < numf; j+=2) + { + f2 *= (*result)[j]; + } + + free(result[0]); + free(result); + } + + if (HMPI_Debug_flag) + { + printf( + "HMPI===> HMPI_Homogeneous_matrix_1d_no_mlimits: 1d array %d converted to 2d grid" + " (%d,%d)\n", + p, + f1, + f2 + ); + } + + row_np = (int*)malloc( + sizeof(int) + * + (f1*f2) + ); + + if (row_np == NULL) + { + return MPC_ERR_NOMEM; + } + + column_np = (int*)malloc( + sizeof(int) + * + (f1*f2) + ); + + if (column_np == NULL) + { + return MPC_ERR_NOMEM; + } + + if ((m < f1) + && (n < f2 + ) + ) + { + for (i = 0; i < m; i++) + { + for (j = 0; j < f2; j++) + { + row_np[i*f2+ j] = 1; + } + } + + for (i = m; i < f1; i++) + { + for (j = 0; j < f2; j++) + { + row_np[i*f2+ j] = 0; + } + } + + for (i = 0; i < n; i++) + { + for (j = 0; j < f1; j++) + { + column_np[i*f1 + j] = 1; + } + } + + for (i = n; i < f2; i++) + { + for (j = 0; j < f1; j++) + { + column_np[i*f1 + j] = 0; + } + } + } + + if ((m < f1) + && (n >= f2 + ) + ) + { + for (i = 0; i < m; i++) + { + for (j = 0; j < f2; j++) + { + row_np[i*f2+ j] = 1; + } + } + + for (i = m; i < f1; i++) + { + for (j = 0; j < f2; j++) + { + row_np[i*f2+ j] = 0; + } + } + + for (i = 0; i < f2; i++) + { + for (j = 0; j < m; j++) + { + column_np[i + j*f2] = n/f2; + } + + for (j = m; j < f1; j++) + { + column_np[i + j*f2] = 0; + } + } + + for (i = 0; i < m; i++) + { + column_np[i*f2] += n%f2; + } + } + + if ((m >= f1) + && (n >= f2 + ) + ) + { + for (i = 0; i < f1; i++) + { + for (j = 0; j < f2; j++) + { + row_np[i*f2+ j] = m/f1; + } + } + + for (i = 0; i < f2; i++) + { + row_np[i] += m%f1; + } + + for (i = 0; i < f2; i++) + { + for (j = 0; j < f1; j++) + { + column_np[i + j*f2] = n/f2; + } + } + + for (i = 0; i < f1; i++) + { + column_np[i*f2] += n%f2; + } + } + + if ((m >= f1) + && (n < f2 + ) + ) + { + for (i = 0; i < n; i++) + { + for (j = 0; j < f1; j++) + { + column_np[i*f1 + j] = 1; + } + } + + for (i = n; i < f2; i++) + { + for (j = 0; j < f1; j++) + { + column_np[i*f1 + j] = 0; + } + } + + for (i = 0; i < f1; i++) + { + for (j = 0; j < n; j++) + { + row_np[i*f2+ j] = m/f1; + } + + for (j = n; j < f2; j++) + { + row_np[i*f2+ j] = 0; + } + } + + for (i = 0; i < n; i++) + { + row_np[i] += m%f1; + } + } + + HMPI_Create_rectangles_1d( + f1, + f2, + m, + n, + row_np, + column_np, + w, + h, + trow, + tcol, + ci + ); + + free(row_np); + free(column_np); + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int HMPI_Homogeneous_matrix_2d_no_mlimits + ( + int p, + int q, + int m, + int n, + int *w, + int *h, + int *trow, + int *tcol, + int *ci, + int *cj + ) + { + int i, j, rc; + int *row_np, *column_np; + + row_np = (int*)malloc( + sizeof(int) + * + (p*q) + ); + + if (row_np == NULL) + { + return MPC_ERR_NOMEM; + } + + column_np = (int*)malloc( + sizeof(int) + * + (p*q) + ); + + if (column_np == NULL) + { + return MPC_ERR_NOMEM; + } + + if ((m < p) + && (n < q + ) + ) + { + for (i = 0; i < m; i++) + { + for (j = 0; j < q; j++) + { + row_np[i*q+ j] = 1; + } + } + + for (i = m; i < p; i++) + { + for (j = 0; j < q; j++) + { + row_np[i*q+ j] = 0; + } + } + + for (i = 0; i < n; i++) + { + for (j = 0; j < p; j++) + { + column_np[i*p + j] = 1; + } + } + + for (i = n; i < q; i++) + { + for (j = 0; j < p; j++) + { + column_np[i*p + j] = 0; + } + } + } + + if ((m < p) + && (n >= q + ) + ) + { + for (i = 0; i < m; i++) + { + for (j = 0; j < q; j++) + { + row_np[i*q+ j] = 1; + } + } + + for (i = m; i < p; i++) + { + for (j = 0; j < q; j++) + { + row_np[i*q+ j] = 0; + } + } + + for (i = 0; i < q; i++) + { + for (j = 0; j < m; j++) + { + column_np[i + j*q] = n/q; + } + + for (j = m; j < p; j++) + { + column_np[i + j*q] = 0; + } + } + + for (i = 0; i < m; i++) + { + column_np[i*q] += n%q; + } + } + + if ((m >= p) + && (n >= q + ) + ) + { + for (i = 0; i < p; i++) + { + for (j = 0; j < q; j++) + { + row_np[i*q+ j] = m/p; + } + } + + for (i = 0; i < (m%p); i++) + { + for (j = 0; j < q; j++) + { + row_np[i*q+ j] += 1; + } + } + + for (i = 0; i < q; i++) + { + for (j = 0; j < p; j++) + { + column_np[i + j*q] = n/q; + } + } + + for (i = 0; i < (n%q); i++) + { + for (j = 0; j < p; j++) + { + column_np[i + j*q] += 1; + } + } + } + + if ((m >= p) + && (n < q + ) + ) + { + for (i = 0; i < n; i++) + { + for (j = 0; j < p; j++) + { + column_np[i*p + j] = 1; + } + } + + for (i = n; i < q; i++) + { + for (j = 0; j < p; j++) + { + column_np[i*p + j] = 0; + } + } + + for (i = 0; i < p; i++) + { + for (j = 0; j < n; j++) + { + row_np[i*q+ j] = m/p; + } + + for (j = n; j < q; j++) + { + row_np[i*q+ j] = 0; + } + } + + for (i = 0; i < n; i++) + { + row_np[i] += m%p; + } + } + + HMPI_Create_rectangles_2d( + p, + q, + m, + n, + row_np, + column_np, + w, + h, + trow, + tcol, + ci, + cj + ); + + free(row_np); + free(column_np); + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int HMPI_Matrix_cq_recursive_bisection + ( + int p, + const double *speeds, + const int *mlimits, + int dimension, + int m, + int n, + int *w, + int *h, + int *trow, + int *tcol + ) + { + int i, j, rc, dimension_size; + int total_limits = 0; + double tspeeds = 0.0; + double first_half_speeds = 0.0; + int tmlimits = 0; + int first_half_mlimits = 0.0; + int second_half_mlimits = 0.0; + double first_half_allocation; + double second_half_allocation; + int H = 0; + int V = 1; + + if (mlimits != NULL) + { + for (i = 0; i < p; i++) + { + total_limits += mlimits[i]; + } + + if (total_limits < (m*n)) + { + printf( + "==>HMPI: Problems applying the limits during " + " matrix partitioning using RECURSIVE " + "one-dimensional distribution\n" + ); + + return HMPI_ERR_MLIMITS; + } + } + + if (p == 1) + { + w[0] = n; + h[0] = m; + trow[0] = 0; + tcol[0] = 0; + + return HMPI_OK; + } + + for (i = 0; i < p; i++) + { + if (i < (p/2)) + { + first_half_speeds += speeds[i]; + first_half_mlimits += mlimits[i]; + } + + tspeeds += speeds[i]; + tmlimits += mlimits[i]; + } + + if (dimension == H) + { + dimension_size = m; + first_half_mlimits = (first_half_mlimits)/n; + second_half_mlimits = (tmlimits/n) - first_half_mlimits; + } + + if (dimension == V) + { + dimension_size = n; + first_half_mlimits = (first_half_mlimits)/m; + second_half_mlimits = (tmlimits/m) - first_half_mlimits; + } + + { + int allocations[2]; + double speedsr[] = { + first_half_speeds, + (tspeeds - first_half_speeds) + }; + int mlimitsr[] = { + first_half_mlimits, + second_half_mlimits + }; + + rc = HMPI_Partition_set( + 2, + 1, + speedsr, + NULL, + mlimitsr, + dimension_size, + NULL, + 1, + 0, + -1, + NULL, + NULL, + allocations + ); + + if (rc == HMPI_ERR_PARTITION_SET) + { + printf( + "==>HMPI: Problems partitioning the matrix " + " using RECURSIVE one-dimensional distribution\n" + ); + return HMPI_ERR_PARTITION_MATRIX; + } + + if (rc == HMPI_ERR_MLIMITS) + { + printf( + "==>HMPI: Problems applying the limits during " + " matrix partitioning using RECURSIVE " + "one-dimensional distribution\n" + ); + return HMPI_ERR_MLIMITS; + } + + first_half_allocation = allocations[0]; + second_half_allocation = allocations[1]; + } + + if (p == 2) + { + if (dimension == H) + { + w[0] = n; + h[0] = first_half_allocation; + trow[0] = 0; + tcol[0] = 0; + + w[1] = n; + h[1] = second_half_allocation; + trow[0] = first_half_allocation; + tcol[0] = 0; + } + + if (dimension == V) + { + w[0] = first_half_allocation; + h[0] = m; + trow[0] = 0; + tcol[0] = 0; + + w[1] = second_half_allocation; + h[1] = m; + trow[0] = 0; + tcol[0] = first_half_allocation; + + } + + return HMPI_OK; + } + + { + int dimensionr; + int partitionm; + int partitionn; + + if (dimension == H) + { + dimensionr = V; + } + + if (dimension == V) + { + dimensionr = H; + } + + if (dimension == H) + { + partitionm = first_half_allocation; + partitionn = n; + } + + if (dimension == V) + { + partitionm = m; + partitionn = first_half_allocation; + } + + rc = HMPI_Matrix_cq_recursive_bisection( + (p/2), + speeds, + mlimits, + dimension, + partitionm, + partitionn, + w, + h, + trow, + tcol + ); + + if (rc != HMPI_OK) + { + return rc; + } + + for (i = 0; i < (p/2); i++) + { + trow[i] = 0; + tcol[i] = 0; + + for (j = 0; j < i; j++) + { + trow[i] += h[j]; + tcol[i] += w[j]; + } + } + + if (dimension == H) + { + partitionm = second_half_allocation; + partitionn = n; + } + + if (dimension == V) + { + partitionm = m; + partitionn = second_half_allocation; + } + + rc = HMPI_Matrix_cq_recursive_bisection( + p - (p/2), + speeds + (p/2), + mlimits + (p/2), + dimension, + partitionm, + partitionn, + w + (p/2), + h + (p/2), + trow + (p/2), + tcol + (p/2) + ); + + if (rc != HMPI_OK) + { + return rc; + } + + for (i = 0; i < (p - (p/2)); i++) + { + if (dimension == H) + { + trow[(p/2) + i] = first_half_allocation; + tcol[(p/2) + i] = 0; + } + + if (dimension == V) + { + trow[(p/2) + i] = 0; + tcol[(p/2) + i] = first_half_allocation; + } + + for (j = 0; j < i; j++) + { + trow[(p/2) + i] += h[j]; + tcol[(p/2) + i] += w[j]; + } + } + } + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int HMPI_Matrix_one_dimensional_recursive + ( + int p, + const double *speeds, + const int *mlimits, + int m, + int n, + int *w, + int *h, + int *trow, + int *tcol + ) + { + int rc; + int H = 0; + int V = 1; + + /* + * Use the Orthogonal recursive bisection scheme of + * Crandall and Quinn. + */ + int *wp, *hp; + + wp = (int*)malloc( + sizeof(int) + * + p + ); + + hp = (int*)malloc( + sizeof(int) + * + p + ); + + rc = HMPI_Matrix_cq_recursive_bisection( + p, + speeds, + mlimits, + V, + m, + n, + wp, + hp, + trow, + tcol + ); + + if (rc != HMPI_OK) + { + return rc; + } + + HMPI_Create_rectangles_1d_recursive( + p, + wp, + hp, + w, + h, + trow, + tcol + ); + + free(wp); + free(hp); + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int HMPI_Common_height + ( + int top_row_1, + int bottom_row_1, + int top_row_2, + int bottom_row_2 + ) + { + if ((top_row_1 == 0) + && (bottom_row_1 == 0 + ) + ) + { + return 0; + } + + if ((top_row_2 == 0) + && (bottom_row_2 == 0 + ) + ) + { + return 0; + } + + /* + * One area contains the other + */ + if ((top_row_1 >= top_row_2) + && (bottom_row_1 <= bottom_row_2) + ) + { + return (bottom_row_1 - top_row_1); + } + + if ((top_row_1 <= top_row_2) + && (bottom_row_1 >= bottom_row_2) + ) + { + return (bottom_row_2 - top_row_2); + } + + /* + * One area is followed or preceded by another + * with an overlap + */ + if ((top_row_1 <= top_row_2) + && (bottom_row_1 >= top_row_2) + && (bottom_row_1 <= bottom_row_2) + ) + { + return (bottom_row_1 - top_row_2); + } + + if ((top_row_1 >= top_row_2) + && (top_row_1 <= bottom_row_2) + && (bottom_row_1 >= bottom_row_2) + ) + { + return (bottom_row_2 - top_row_1); + } + + /* + * There is no overlap + */ + if ((bottom_row_1 < top_row_2) + || (top_row_1 > bottom_row_2) + ) + { + return 0; + } + + if ((top_row_1 < top_row_2) + && (bottom_row_1 < bottom_row_2) + ) + { + return 0; + } + + if ((top_row_1 > top_row_2) + && (bottom_row_1 > bottom_row_2) + ) + { + return 0; + } + + return 0; + } + + /*-----------------------------------------------------*/ + + void HMPI_Create_rectangles_1d_column_based + ( + int p, + int number_of_columns, + int *rectangles_in_each_column, + const double *areas, + int m, + int n, + int *w, + int *h, + int *trow, + int *tcolumn + ) + { + int i, j, ix = 0; + + for (i = 0; i < number_of_columns; i++) + { + int width = 0; + + for (j = 0; j < rectangles_in_each_column[i]; j++) + { + width += areas[p - j - 1 - ix]; + } + + for (j = 0; j < rectangles_in_each_column[i]; j++) + { + w[p - j - 1 - ix] = width*n; + h[p - j - 1 - ix] = (areas[p - j - 1 - ix]/width)*m; + } + + ix += rectangles_in_each_column[i]; + } + + /* + * Adjust the parameters w & h + * TBD + */ + return; + } + + /*-----------------------------------------------------*/ + + int HMPI_2d_column_based_speeds_single_numbers_with_mlimits + ( + int p, + int q, + const double *speeds, + const int *mlimits, + int m, + int n, + int *w, + int *h, + int *trow, + int *tcol, + int *ci, + int *cj + ) + { + /* + * TBD + */ + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int HMPI_2d_row_based_speeds_single_numbers_no_mlimits + ( + int p, + int q, + const double *speeds, + int m, + int n, + int *w, + int *h, + int *trow, + int *tcol, + int *ci, + int *cj + ) + { + int i, j, rc; + double *row_speed_sums; + int *row_np, *row_np_sub; + int *column_np; + + row_speed_sums = (double*)malloc( + sizeof(double) + * + p + ); + + if (row_speed_sums == NULL) + { + return MPC_ERR_NOMEM; + } + + row_np = (int*)malloc( + sizeof(int) + * + (p*q) + ); + + if (row_np == NULL) + { + return MPC_ERR_NOMEM; + } + + column_np = (int*)malloc( + sizeof(int) + * + (p*q) + ); + + if (column_np == NULL) + { + return MPC_ERR_NOMEM; + } + + row_np_sub = (int*)malloc( + sizeof(int) + * + p + ); + + if (row_np_sub == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < p; i++) + { + row_speed_sums[i] = 0.0; + for (j = 0; j < q; j++) + { + row_speed_sums[i] += speeds[i*q+j]; + } + } + + /* + * Partition of the row dimension among + * p processors + */ + rc = HMPI_Partition_set( + p, + 1, + row_speed_sums, + NULL, + NULL, + m, + NULL, + 0, + 0, + -1, + NULL, + NULL, + row_np_sub + ); + + if (rc == HMPI_ERR_PARTITION_SET) + { + return HMPI_ERR_PARTITION_MATRIX; + } + + if (rc == HMPI_ERR_MLIMITS) + { + return HMPI_ERR_MLIMITS; + } + + if (rc != HMPI_OK) + { + return rc; + } + + for (i = 0; i < p; i++) + { + for (j = 0; j < q; j++) + { + row_np[i*q+j] = row_np_sub[i]; + } + } + + free(row_speed_sums); + free(row_np_sub); + + for (i = 0; i < p; i++) + { + double *column_speed_sums; + int *column_np_sub; + + column_speed_sums = (double*)malloc( + sizeof(double) + * + q + ); + + if (column_speed_sums == NULL) + { + return MPC_ERR_NOMEM; + } + + column_np_sub = (int*)malloc( + sizeof(int) + * + q + ); + + if (column_np_sub == NULL) + { + return MPC_ERR_NOMEM; + } + + for (j = 0; j < q; j++) + { + column_speed_sums[j] = speeds[i*q + j]; + } + + /* + * Partition of the column dimension among + * q processors + */ + rc = HMPI_Partition_set( + q, + 1, + column_speed_sums, + NULL, + NULL, + n, + NULL, + 0, + 0, + -1, + NULL, + NULL, + column_np_sub + ); + + if (rc == HMPI_ERR_PARTITION_SET) + { + return HMPI_ERR_PARTITION_MATRIX; + } + + if (rc == HMPI_ERR_MLIMITS) + { + return HMPI_ERR_MLIMITS; + } + + if (rc != HMPI_OK) + { + return rc; + } + + for (j = 0; j < q; j++) + { + column_np[i*q + j] = column_np_sub[j]; + } + + free(column_np_sub); + free(column_speed_sums); + } + + HMPI_Create_rectangles_2d( + p, + q, + m, + n, + row_np, + column_np, + w, + h, + trow, + tcol, + ci, + cj + ); + + free(row_np); + free(column_np); + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int HMPI_2d_column_based_speeds_single_numbers_no_mlimits + ( + int p, + int q, + const double *speeds, + int m, + int n, + int *w, + int *h, + int *trow, + int *tcol, + int *ci, + int *cj + ) + { + int i, j, rc; + double *column_speed_sums; + int *column_np, *column_np_sub; + int *row_np; + + column_speed_sums = (double*)malloc( + sizeof(double) + * + q + ); + + if (column_speed_sums == NULL) + { + return MPC_ERR_NOMEM; + } + + row_np = (int*)malloc( + sizeof(int) + * + (p*q) + ); + + if (row_np == NULL) + { + return MPC_ERR_NOMEM; + } + + column_np = (int*)malloc( + sizeof(int) + * + (p*q) + ); + + if (column_np == NULL) + { + return MPC_ERR_NOMEM; + } + + column_np_sub = (int*)malloc( + sizeof(int) + * + q + ); + + if (column_np_sub == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < q; i++) + { + column_speed_sums[i] = 0.0; + for (j = 0; j < p; j++) + { + column_speed_sums[i] += speeds[j*q+i]; + } + } + + /* + * Partition of the column dimension among + * q processors + */ + rc = HMPI_Partition_set( + q, + 1, + column_speed_sums, + NULL, + NULL, + n, + NULL, + 0, + 0, + -1, + NULL, + NULL, + column_np_sub + ); + + if (rc == HMPI_ERR_PARTITION_SET) + { + return HMPI_ERR_PARTITION_MATRIX; + } + + if (rc == HMPI_ERR_MLIMITS) + { + return HMPI_ERR_MLIMITS; + } + + if (rc != HMPI_OK) + { + return rc; + } + + for (i = 0; i < p; i++) + { + for (j = 0; j < q; j++) + { + column_np[i*q+j] = column_np_sub[j]; + } + } + + free(column_speed_sums); + free(column_np_sub); + + for (i = 0; i < q; i++) + { + double *row_speed_sums; + int *row_np_sub; + + row_speed_sums = (double*)malloc( + sizeof(double) + * + p + ); + + if (row_speed_sums == NULL) + { + return MPC_ERR_NOMEM; + } + + row_np_sub = (int*)malloc( + sizeof(int) + * + p + ); + + if (row_np_sub == NULL) + { + return MPC_ERR_NOMEM; + } + + for (j = 0; j < p; j++) + { + row_speed_sums[j] = speeds[j*q + i]; + } + + /* + * Partition of the row dimension among + * p processors + */ + rc = HMPI_Partition_set( + p, + 1, + row_speed_sums, + NULL, + NULL, + m, + NULL, + 0, + 0, + -1, + NULL, + NULL, + row_np_sub + ); + + if (rc == HMPI_ERR_PARTITION_SET) + { + return HMPI_ERR_PARTITION_MATRIX; + } + + if (rc == HMPI_ERR_MLIMITS) + { + return HMPI_ERR_MLIMITS; + } + + if (rc != HMPI_OK) + { + return rc; + } + + for (j = 0; j < p; j++) + { + row_np[j*q + i] = row_np_sub[j]; + } + + free(row_np_sub); + free(row_speed_sums); + } + + HMPI_Create_rectangles_2d( + p, + q, + m, + n, + row_np, + column_np, + w, + h, + trow, + tcol, + ci, + cj + ); + + free(row_np); + free(column_np); + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int HMPI_2d_column_based_speed_functions_no_mlimits + ( + int p, + int q, + int pn, + const double *speeds, + const int *psizes, + int m, + int n, + int *w, + int *h, + int *trow, + int *tcol, + int *ci, + int *cj + ) + { + printf("Implementation currently not available\n"); + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int HMPI_2d_column_based_speed_functions_with_mlimits + ( + int p, + int q, + int pn, + const double *speeds, + const int *psizes, + const int *mlimits, + int m, + int n, + int *w, + int *h, + int *trow, + int *tcol, + int *ci, + int *cj + ) + { + printf("Implementation currently not available\n"); + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int HMPI_Matrix_two_dimensional_column_based + ( + int p, + int q, + int pn, + const double *speeds, + const int *psizes, + const int *mlimits, + int m, + int n, + int *w, + int *h, + int *trow, + int *tcol, + int *ci, + int *cj + ) + { + /* + * Homogeneous distribution + */ + if ((speeds == NULL) + && (mlimits == NULL + ) + ) + { + return HMPI_Homogeneous_matrix_2d_no_mlimits( + p, + q, + m, + n, + w, + h, + trow, + tcol, + ci, + cj + ); + } + + /* + * TBD: Meaning of mlimits + */ + if ((speeds == NULL) + && (mlimits != NULL + ) + ) + { + } + + if ((mlimits == NULL) + && (pn == 1 + ) + ) + { + return HMPI_2d_column_based_speeds_single_numbers_no_mlimits( + p, + q, + speeds, + m, + n, + w, + h, + trow, + tcol, + ci, + cj + ); + } + + if ((mlimits != NULL) + && (pn == 1 + ) + ) + { + /* + * TBD: Meaning of mlimits + * No known results for this case + */ + return HMPI_2d_column_based_speeds_single_numbers_with_mlimits( + p, + q, + speeds, + mlimits, + m, + n, + w, + h, + trow, + tcol, + ci, + cj + ); + } + + if ((mlimits == NULL) + && (pn > 1 + ) + ) + { + /* + * No known results for this case + */ + return HMPI_2d_column_based_speed_functions_no_mlimits( + p, + q, + pn, + speeds, + psizes, + m, + n, + w, + h, + trow, + tcol, + ci, + cj + ); + } + + if ((mlimits != NULL) + && (pn > 1 + ) + ) + { + /* + * No known results for this case + */ + return HMPI_2d_column_based_speed_functions_with_mlimits( + p, + q, + pn, + speeds, + psizes, + mlimits, + m, + n, + w, + h, + trow, + tcol, + ci, + cj + ); + } + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int HMPI_2d_row_based_speed_functions_with_mlimits + ( + int p, + int q, + int pn, + const double *speeds, + const int *psizes, + const int *mlimits, + int m, + int n, + int *w, + int *h, + int *trow, + int *tcol, + int *ci, + int *cj + ) + { + printf("Implementation currently not available\n"); + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int HMPI_2d_row_based_speed_functions_no_mlimits + ( + int p, + int q, + int pn, + const double *speeds, + const int *psizes, + int m, + int n, + int *w, + int *h, + int *trow, + int *tcol, + int *ci, + int *cj + ) + { + printf("Implementation currently not available\n"); + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int HMPI_2d_row_based_speeds_single_numbers_with_mlimits + ( + int p, + int q, + const double *speeds, + const int *psizes, + int m, + int n, + int *w, + int *h, + int *trow, + int *tcol, + int *ci, + int *cj + ) + { + printf("Implementation currently not available\n"); + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int HMPI_Matrix_two_dimensional_row_based + ( + int p, + int q, + int pn, + const double *speeds, + const int *psizes, + const int *mlimits, + int m, + int n, + int *w, + int *h, + int *trow, + int *tcol, + int *ci, + int *cj + ) + { + if ((speeds == NULL) + && (mlimits == NULL + ) + ) + { + return HMPI_Homogeneous_matrix_2d_no_mlimits( + p, + q, + m, + n, + w, + h, + trow, + tcol, + ci, + cj + ); + } + + /* + * No known results for this case + */ + if ((speeds == NULL) + && (mlimits != NULL + ) + ) + { + } + + if ((mlimits == NULL) + && (pn == 1 + ) + ) + { + return HMPI_2d_row_based_speeds_single_numbers_no_mlimits( + p, + q, + speeds, + m, + n, + w, + h, + trow, + tcol, + ci, + cj + ); + } + + /* + * No known results for this case + */ + if ((mlimits != NULL) + && (pn == 1 + ) + ) + { + return HMPI_2d_row_based_speeds_single_numbers_with_mlimits( + p, + q, + speeds, + mlimits, + m, + n, + w, + h, + trow, + tcol, + ci, + cj + ); + } + + /* + * No known results for this case + */ + if ((mlimits == NULL) + && (pn > 1 + ) + ) + { + return HMPI_2d_row_based_speed_functions_no_mlimits( + p, + q, + pn, + speeds, + psizes, + m, + n, + w, + h, + trow, + tcol, + ci, + cj + ); + } + + /* + * No known results for this case + */ + if ((mlimits != NULL) + && (pn > 1 + ) + ) + { + return HMPI_2d_row_based_speed_functions_with_mlimits( + p, + q, + pn, + speeds, + psizes, + mlimits, + m, + n, + w, + h, + trow, + tcol, + ci, + cj + ); + } + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int HMPI_2d_cartesian_speeds_single_numbers_no_mlimits + ( + int p, + int q, + const double *speeds, + int m, + int n, + int *w, + int *h, + int *trow, + int *tcol, + int *ci, + int *cj + ) + { + int i, j, rc; + double *row_speed_sums; + int *row_np, *row_np_sub; + double *column_speed_sums; + int *column_np, *column_np_sub; + + row_np = (int*)malloc( + sizeof(int) + * + (p*q) + ); + + if (row_np == NULL) + { + return MPC_ERR_NOMEM; + } + + row_np_sub = (int*)malloc( + sizeof(int) + * + p + ); + + if (row_np_sub == NULL) + { + return MPC_ERR_NOMEM; + } + + column_np = (int*)malloc( + sizeof(int) + * + (p*q) + ); + + if (column_np == NULL) + { + return MPC_ERR_NOMEM; + } + + column_np_sub = (int*)malloc( + sizeof(int) + * + q + ); + + if (column_np_sub == NULL) + { + return MPC_ERR_NOMEM; + } + + row_speed_sums = (double*)malloc( + sizeof(double) + * + p + ); + + if (row_speed_sums == NULL) + { + return MPC_ERR_NOMEM; + } + + column_speed_sums = (double*)malloc( + sizeof(double) + * + q + ); + + if (column_speed_sums == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < p; i++) + { + row_speed_sums[i] = 0.0; + for (j = 0; j < q; j++) + { + row_speed_sums[i] += speeds[i*q+j]; + } + } + + for (i = 0; i < q; i++) + { + column_speed_sums[i] = 0.0; + for (j = 0; j < p; j++) + { + column_speed_sums[i] += speeds[j*q+i]; + } + } + + /* + * Partition of the row dimension among + * p processors + */ + rc = HMPI_Partition_set( + p, + 1, + row_speed_sums, + NULL, + NULL, + m, + NULL, + 0, + 0, + -1, + NULL, + NULL, + row_np_sub + ); + + if (rc == HMPI_ERR_PARTITION_SET) + { + printf( + "Problems partitioning the matrix " + " using HMPI_CARTESIAN two-dimensional distribution\n" + ); + + return HMPI_ERR_PARTITION_MATRIX; + } + + if (rc == HMPI_ERR_MLIMITS) + { + printf( + "Problems applying the limits during " + " matrix partitioning using HMPI_CARTESIAN " + "two-dimensional distribution\n" + ); + + return HMPI_ERR_MLIMITS; + } + + if (rc != HMPI_OK) + { + return rc; + } + + /* + * Partition of the column dimension among + * q processors + */ + rc = HMPI_Partition_set( + q, + 1, + column_speed_sums, + NULL, + NULL, + n, + NULL, + 0, + 0, + -1, + NULL, + NULL, + column_np_sub + ); + + if (rc == HMPI_ERR_PARTITION_SET) + { + printf( + "Problems partitioning the matrix" + " using HMPI_CARTESIAN two-dimensional distribution\n" + ); + return HMPI_ERR_PARTITION_MATRIX; + } + + if (rc == HMPI_ERR_MLIMITS) + { + printf( + "Problems applying the limits during" + " matrix partitioning using HMPI_CARTESIAN " + "two-dimensional distribution\n" + ); + + return HMPI_ERR_MLIMITS; + } + + if (rc != HMPI_OK) + { + return rc; + } + + for (i = 0; i < p; i++) + { + for (j = 0; j < q; j++) + { + row_np[i*q+j] = row_np_sub[i]; + column_np[i*q+j] = column_np_sub[j]; + } + } + + HMPI_Create_rectangles_2d( + p, + q, + m, + n, + row_np, + column_np, + w, + h, + trow, + tcol, + ci, + cj + ); + + free(row_np); + free(column_np); + free(row_np_sub); + free(column_np_sub); + free(row_speed_sums); + free(column_speed_sums); + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int HMPI_2d_cartesian_speeds_single_numbers_with_mlimits + ( + int p, + int q, + const double *speeds, + const int *mlimits, + int m, + int n, + int *w, + int *h, + int *trow, + int *tcol, + int *ci, + int *cj + ) + { + printf("Implementation currently not available\n"); + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int HMPI_2d_cartesian_speed_functions_with_mlimits + ( + int p, + int q, + int pn, + const double *speeds, + const int *psizes, + const int *mlimits, + int m, + int n, + int *w, + int *h, + int *trow, + int *tcol, + int *ci, + int *cj + ) + { + printf("Implementation currently not available\n"); + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int HMPI_2d_cartesian_speed_functions_no_mlimits + ( + int p, + int q, + int pn, + const double *speeds, + const int *psizes, + int m, + int n, + int *w, + int *h, + int *trow, + int *tcol, + int *ci, + int *cj + ) + { + printf("Implementation currently not available\n"); + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int HMPI_Matrix_two_dimensional_cartesian + ( + int p, + int q, + int pn, + const double *speeds, + const int *psizes, + const int *mlimits, + int m, + int n, + int *w, + int *h, + int *trow, + int *tcol, + int *ci, + int *cj + ) + { + if ((speeds == NULL) + && (mlimits == NULL + ) + ) + { + return HMPI_Homogeneous_matrix_2d_no_mlimits( + p, + q, + m, + n, + w, + h, + trow, + tcol, + ci, + cj + ); + } + + if ((speeds == NULL) + && (mlimits != NULL + ) + ) + { + } + + if ((mlimits == NULL) + && (pn == 1 + ) + ) + { + return HMPI_2d_cartesian_speeds_single_numbers_no_mlimits( + p, + q, + speeds, + m, + n, + w, + h, + trow, + tcol, + ci, + cj + ); + } + + /* + * No known results for this case + */ + if ((mlimits != NULL) + && (pn == 1 + ) + ) + { + return HMPI_2d_cartesian_speeds_single_numbers_with_mlimits( + p, + q, + speeds, + mlimits, + m, + n, + w, + h, + trow, + tcol, + ci, + cj + ); + } + + /* + * No known results for this case + */ + if ((mlimits == NULL) + && (pn > 1 + ) + ) + { + return HMPI_2d_cartesian_speed_functions_no_mlimits( + p, + q, + pn, + speeds, + psizes, + m, + n, + w, + h, + trow, + tcol, + ci, + cj + ); + } + + /* + * No known results for this case + */ + if ((mlimits != NULL) + && (pn > 1 + ) + ) + { + return HMPI_2d_cartesian_speed_functions_with_mlimits( + p, + q, + pn, + speeds, + psizes, + mlimits, + m, + n, + w, + h, + trow, + tcol, + ci, + cj + ); + } + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int HMPI_Partition_matrix_2d + ( + int p, + int q, + int pn, + const double *speeds, + const int *psizes, + const int *mlimits, + int m, + int n, + int type_of_distribution, + int *w, + int *h, + int *trow, + int *tcol, + int *ci, + int *cj + ) + { + switch (type_of_distribution) + { + case HMPI_COLUMN_BASED: + { + return HMPI_Matrix_two_dimensional_column_based( + p, + q, + pn, + speeds, + psizes, + mlimits, + m, + n, + w, + h, + trow, + tcol, + ci, + cj + ); + } + break; + case HMPI_ROW_BASED: + { + return HMPI_Matrix_two_dimensional_row_based( + p, + q, + pn, + speeds, + psizes, + mlimits, + m, + n, + w, + h, + trow, + tcol, + ci, + cj + ); + } + break; + case HMPI_CARTESIAN: + { + return HMPI_Matrix_two_dimensional_cartesian( + p, + q, + pn, + speeds, + psizes, + mlimits, + m, + n, + w, + h, + trow, + tcol, + ci, + cj + ); + } + break; + default: + { + printf( + "Invalid type of distribution provided" + " for two-dimensional processor arrangement\n" + ); + return HMPI_ERR_PARTITION_MATRIX; + break; + } + } + + printf( + "Invalid type of distribution provided" + " for two-dimensional processor arrangement\n" + ); + + return HMPI_ERR_PARTITION_MATRIX; + } + + /*-----------------------------------------------------*/ + + int HMPI_1d_dynamic_row_based_speeds_single_numbers_no_mlimits + ( + int p, + const double *speeds, + int m, + int n, + HMPI_Lower_bound lb, + HMPI_DP_function dpf, + int *w, + int *h, + int *trow, + int *tcol + ) + { + return HMPI_1d_dynamic_column_based_speeds_single_numbers_no_mlimits( + p, + speeds, + n, + m, + lb, + dpf, + w, + h, + trow, + tcol + ); + } + + /*-----------------------------------------------------*/ + + int HMPI_1d_dynamic_column_based_speeds_single_numbers_no_mlimits + ( + int p, + const double *speeds, + int m, + int n, + HMPI_Lower_bound lb, + HMPI_DP_function dpf, + int *w, + int *h, + int *trow, + int *tcol + ) + { + int i, j, q, C, c_opt; + double S = 0.0, tspeed = 0.0, tarea = 0.0; + double **perimeter; + int **cumulative_r; + double *one_d_p; + int *one_d_r, *optimal_d; + double *areas, *rearranged_speeds; + int *rearrangedp; + double temp; + int temp_number; + + /* + * Sort the speeds in ascending order + */ + rearranged_speeds = (double*)malloc( + sizeof(double) + * + p + ); + + if (rearranged_speeds == NULL) + { + return MPC_ERR_NOMEM; + } + + rearrangedp = (int*)malloc( + sizeof(int) + * + p + ); + + if (rearrangedp == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < p; i++) + { + rearrangedp[i] = i; + } + + for (i = 0; i < p; i++) + { + rearranged_speeds[i] = speeds[i]; + } + + for (i = 0; i < p; i++) + { + for (j = 1; j < p; j++) + { + if (rearranged_speeds[j-1] > rearranged_speeds[j]) + { + temp = rearranged_speeds[j-1]; + rearranged_speeds[j-1] = rearranged_speeds[j]; + rearranged_speeds[j] = temp; + + temp_number = rearrangedp[j-1]; + rearrangedp[j-1] = rearrangedp[j]; + rearrangedp[j] = temp_number; + } + } + } + + /* + * Normalise to two decimal places + * Sum of the areas should be 1. + */ + areas = (double*)malloc( + sizeof(double) + * + p + ); + + if (areas == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < p; i++) + { + tspeed += rearranged_speeds[i]; + } + + for (i = 0; i < p; i++) + { + int to_2_decimals; + areas[i] = (rearranged_speeds[i]/tspeed)*100; + to_2_decimals = areas[i]; + areas[i] = to_2_decimals; + areas[i] = (areas[i]/100); + tarea += areas[i]; + } + + areas[0] = areas[0] + (1 - tarea); + + /* + * perimeter and cumulative_r are Upper Triangular arrangements + * Study the paper 'Matrix Multiplication on Heterogeneous + * Platforms' by Beaumont et al to see the layout of the + * arrays perimeter and cumulative_r + */ + one_d_p = (double*)malloc( + sizeof(double) + * + (p*(p+1)/2) + ); + + if (one_d_p == NULL) + { + return MPC_ERR_NOMEM; + } + + one_d_r = (int*)malloc( + sizeof(int) + * + (p*(p+1)/2) + ); + + if (one_d_r == NULL) + { + return MPC_ERR_NOMEM; + } + + perimeter = (double**)malloc( + sizeof(double*) + * + p + ); + + if (perimeter == NULL) + { + return MPC_ERR_NOMEM; + } + + cumulative_r = (int**)malloc( + sizeof(int*) + * + p + ); + + if (cumulative_r == NULL) + { + return MPC_ERR_NOMEM; + } + + optimal_d = (int*)malloc( + sizeof(int) + * + p + ); + + if (optimal_d == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < p; i++) + { + perimeter[i] = one_d_p - i; + one_d_p+=(p-i); + + cumulative_r[i] = one_d_r - i; + one_d_r+=(p-i); + } + + for (q = 0; q < p; q++) + { + S += speeds[q]; + perimeter[0][q] = 1 + S*q; + cumulative_r[0][q] = 0; + } + + for (C = 1; C < p; C++) + { + for (q = C; q < p; q++) + { + int r_opt; + + perimeter[C][q] = (*dpf)( + C, + q, + p, + rearranged_speeds, + perimeter, + &r_opt + ); + + cumulative_r[C][q] = r_opt; + } + } + + q = p; + c_opt = p; + i = 0; + + while (c_opt >= 2) + { + int temp, c_optimal = 1; + + temp = perimeter[0][q]; + + for (C = 1; C < p; C++) + { + if ((perimeter[C][q-1]) < temp) + { + c_optimal = C+1; + temp = perimeter[C][q-1]; + } + } + + optimal_d[i++] = q - cumulative_r[c_optimal-1][q-1]; + q = cumulative_r[c_optimal-1][q-1]; + c_opt = c_optimal; + } + + optimal_d[i] = q; + + /* + * Fill the output parameters + */ + HMPI_Create_rectangles_1d_column_based( + p, + i+1, + optimal_d, + areas, + m, + n, + w, + h, + trow, + tcol + ); + + free(one_d_p); + free(one_d_r); + free(perimeter); + free(optimal_d); + free(cumulative_r); + free(rearranged_speeds); + free(rearrangedp); + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + /* + * column-based partitioning/row-based partitioning + * of the matrix using the column-based heuristic approach + * proposed by Beaumont et al + */ + int HMPI_1d_dynamic_speeds_single_numbers_no_mlimits + ( + int p, + const double *speeds, + int m, + int n, + HMPI_Lower_bound lb, + HMPI_DP_function dpf, + int type_of_distribution, + int *w, + int *h, + int *trow, + int *tcol + ) + { + switch (type_of_distribution) + { + case HMPI_ROW_BASED: + { + return + HMPI_1d_dynamic_row_based_speeds_single_numbers_no_mlimits( + p, + speeds, + m, + n, + lb, + dpf, + w, + h, + trow, + tcol + ); + } + break; + case HMPI_COLUMN_BASED: + { + return + HMPI_1d_dynamic_column_based_speeds_single_numbers_no_mlimits( + p, + speeds, + m, + n, + lb, + dpf, + w, + h, + trow, + tcol + ); + } + break; + case HMPI_GENERAL: + { + } + break; + default: + { + printf( + "Invalid type of distribution provided" + " for one-dimensional processor arrangement " + " with DYNAMIC formulation\n" + ); + + return HMPI_ERR_PARTITION_MATRIX; + + break; + } + } + + printf( + "Invalid type of distribution provided for one-dimensional " + "processor arrangement with DYNAMIC formulation\n" + ); + + return HMPI_ERR_PARTITION_MATRIX; + } + + /*-----------------------------------------------------*/ + + int HMPI_1d_dynamic_speeds_single_numbers_with_mlimits + ( + int p, + const double *speeds, + const int *mlimits, + int m, + int n, + HMPI_Lower_bound lb, + HMPI_DP_function dpf, + int type_of_distribution, + int *w, + int *h, + int *trow, + int *tcol + ) + { + printf("Implementation currently not available\n"); + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int HMPI_1d_dynamic_speed_functions_no_mlimits + ( + int p, + int pn, + const double *speeds, + const int *psizes, + int m, + int n, + HMPI_Lower_bound lb, + HMPI_DP_function dpf, + int type_of_distribution, + int *w, + int *h, + int *trow, + int *tcol + ) + { + printf("Implementation currently not available\n"); + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int HMPI_1d_dynamic_speed_functions_with_mlimits + ( + int p, + int pn, + const double *speeds, + const int *psizes, + const int *mlimits, + int m, + int n, + HMPI_Lower_bound lb, + HMPI_DP_function dpf, + int type_of_distribution, + int *w, + int *h, + int *trow, + int *tcol + ) + { + printf("Implementation currently not available\n"); + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int HMPI_Partition_matrix_1d_dp + ( + int p, + int pn, + const double *speeds, + const int *psizes, + const int *mlimits, + int m, + int n, + HMPI_Lower_bound lb, + HMPI_DP_function dpf, + int type_of_distribution, + int *w, + int *h, + int *trow, + int *tcol, + int *ci + ) + { + if ((speeds == NULL) + && (mlimits == NULL + ) + ) + { + return HMPI_Homogeneous_matrix_1d_no_mlimits( + p, + m, + n, + w, + h, + trow, + tcol, + ci + ); + } + + if ((speeds == NULL) + && (mlimits != NULL + ) + ) + { + } + + if ((mlimits == NULL) + && (pn == 1 + ) + ) + { + return HMPI_1d_dynamic_speeds_single_numbers_no_mlimits( + p, + speeds, + m, + n, + lb, + dpf, + type_of_distribution, + w, + h, + trow, + tcol + ); + } + + /* + * No known results for this case + */ + if ((mlimits != NULL) + && (pn == 1 + ) + ) + { + return HMPI_1d_dynamic_speeds_single_numbers_with_mlimits( + p, + speeds, + mlimits, + m, + n, + lb, + dpf, + type_of_distribution, + w, + h, + trow, + tcol + ); + } + + /* + * No known results for this case + */ + if ((mlimits == NULL) + && (pn > 1 + ) + ) + { + return HMPI_1d_dynamic_speed_functions_no_mlimits( + p, + pn, + speeds, + psizes, + m, + n, + lb, + dpf, + type_of_distribution, + w, + h, + trow, + tcol + ); + } + + /* + * No known results for this case + */ + if ((mlimits != NULL) + && (pn > 1 + ) + ) + { + return HMPI_1d_dynamic_speed_functions_with_mlimits( + p, + pn, + speeds, + psizes, + mlimits, + m, + n, + lb, + dpf, + type_of_distribution, + w, + h, + trow, + tcol + ); + } + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + + int HMPI_Next_best_matrix_partition( + int m, + int n, + int* oldw, + int* oldh, + int* oldtrow, + int* oldtcol + ) + { + } + + /*-----------------------------------------------------*/ + + int HMPI_1d_iterative_speeds_single_numbers_no_mlimits + ( + int p, + const double *speeds, + int m, + int n, + HMPI_Lower_bound lb, + HMPI_Iterative_function cf, + int *w, + int *h, + int *trow, + int *tcol + ) + { + int i, rc; + int Lower_bound = (*lb)( + p, + speeds, + m, + n + ); + int *oldw, *oldh, *oldtrow, *oldtcol; + double cost; + + oldw = (int*)malloc( + sizeof(int) + * + p + ); + + if (oldw == NULL) + { + return MPC_ERR_NOMEM; + } + + oldh = (int*)malloc( + sizeof(int) + * + (p*p) + ); + + if (oldh == NULL) + { + return MPC_ERR_NOMEM; + } + + oldtrow = (int*)malloc( + sizeof(int) + * + p + ); + + if (oldtrow == NULL) + { + return MPC_ERR_NOMEM; + } + + oldtcol = (int*)malloc( + sizeof(int) + * + p + ); + + if (oldtcol == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < p; i++) + { + oldw[i] = w[i]; + } + + for (i = 0; i < (p*p); i++) + { + oldh[i] = h[i]; + } + + for (i = 0; i < p; i++) + { + oldtrow[i] = trow[i]; + } + + for (i = 0; i < p; i++) + { + oldtcol[i] = tcol[i]; + } + + do + { + cost = (*cf)( + p, + oldw, + oldh, + oldtrow, + oldtcol + ); + + if ((cost < 0) + || (cost <= Lower_bound + ) + ) + { + for (i = 0; i < p; i++) + { + w[i] = oldw[i]; + } + + for (i = 0; i < (p*p); i++) + { + h[i] = oldh[i]; + } + + for (i = 0; i < p; i++) + { + trow[i] = oldtrow[i]; + } + + for (i = 0; i < p; i++) + { + tcol[i] = oldtcol[i]; + } + } + else + { + /* + * TBD + */ + HMPI_Next_best_matrix_partition( + m, + n, + oldw, + oldh, + oldtrow, + oldtcol + ); + } + } while ((cost > 0) && (cost > Lower_bound)); + + free(oldw); + free(oldh); + free(oldtrow); + free(oldtcol); + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int HMPI_1d_iterative_speeds_single_numbers_with_mlimits + ( + int p, + const double *speeds, + const int *mlimits, + int m, + int n, + HMPI_Lower_bound lb, + HMPI_Iterative_function cf, + int *w, + int *h, + int *trow, + int *tcol + ) + { + printf("Implementation currently not available\n"); + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int HMPI_1d_iterative_speed_functions_no_mlimits + ( + int p, + int pn, + const double *speeds, + const int *psizes, + int m, + int n, + HMPI_Lower_bound lb, + HMPI_Iterative_function cf, + int *w, + int *h, + int *trow, + int *tcol + ) + { + printf("Implementation currently not available\n"); + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int HMPI_1d_iterative_speed_functions_with_mlimits + ( + int p, + int pn, + const double *speeds, + const int *psizes, + const int *mlimits, + int m, + int n, + HMPI_Lower_bound lb, + HMPI_Iterative_function cf, + int *w, + int *h, + int *trow, + int *tcol + ) + { + printf("Implementation currently not available\n"); + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int HMPI_Partition_matrix_1d_iterative + ( + int p, + int pn, + const double *speeds, + const int *psizes, + const int *mlimits, + int m, + int n, + HMPI_Lower_bound lb, + HMPI_Iterative_function cf, + int *w, + int *h, + int *trow, + int *tcol, + int *ci + ) + { + if ((speeds == NULL) + && (mlimits == NULL + ) + ) + { + return HMPI_Homogeneous_matrix_1d_no_mlimits( + p, + m, + n, + w, + h, + trow, + tcol, + ci + ); + } + + if ((speeds == NULL) + && (mlimits != NULL + ) + ) + { + } + + if ((mlimits == NULL) + && (pn == 1 + ) + ) + { + return HMPI_1d_iterative_speeds_single_numbers_no_mlimits( + p, + speeds, + m, + n, + lb, + cf, + w, + h, + trow, + tcol + ); + } + + /* + * No known results for this case + */ + if ((mlimits != NULL) + && (pn == 1 + ) + ) + { + return HMPI_1d_iterative_speeds_single_numbers_with_mlimits( + p, + speeds, + mlimits, + m, + n, + lb, + cf, + w, + h, + trow, + tcol + ); + } + + /* + * No known results for this case + */ + if ((mlimits == NULL) + && (pn > 1 + ) + ) + { + return HMPI_1d_iterative_speed_functions_no_mlimits( + p, + pn, + speeds, + psizes, + m, + n, + lb, + cf, + w, + h, + trow, + tcol + ); + } + + /* + * No known results for this case + */ + if ((mlimits != NULL) + && (pn > 1 + ) + ) + { + return HMPI_1d_iterative_speed_functions_with_mlimits( + p, + pn, + speeds, + psizes, + mlimits, + m, + n, + lb, + cf, + w, + h, + trow, + tcol + ); + } + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int HMPI_1d_refining_speeds_single_numbers_no_mlimits + ( + int p, + const double *speeds, + int m, + int n, + HMPI_Lower_bound lb, + HMPI_Refining_function rf, + int *w, + int *h, + int *trow, + int *tcol + ) + { + int i, rc; + int *oldw, *oldh, *oldtrow, *oldtcol; + + oldw = (int*)malloc( + sizeof(int) + * + p + ); + + if (oldw == NULL) + { + return MPC_ERR_NOMEM; + } + + oldh = (int*)malloc( + sizeof(int) + * + (p*p) + ); + + if (oldh == NULL) + { + return MPC_ERR_NOMEM; + } + + oldtrow = (int*)malloc( + sizeof(int) + * + p + ); + + if (oldtrow == NULL) + { + return MPC_ERR_NOMEM; + } + + oldtcol = (int*)malloc( + sizeof(int) + * + p + ); + + if (oldtcol == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < p; i++) + { + oldw[i] = w[i]; + } + + for (i = 0; i < (p*p); i++) + { + oldh[i] = h[i]; + } + + for (i = 0; i < p; i++) + { + oldtrow[i] = trow[i]; + } + + for (i = 0; i < p; i++) + { + oldtcol[i] = tcol[i]; + } + + do + { + rc = (*rf)( + p, + speeds, + m, + n, + oldw, + oldh, + oldtrow, + oldtcol, + w, + h, + trow, + tcol + ); + + if (rc > 0) + { + for (i = 0; i < p; i++) + { + oldw[i] = w[i]; + } + + for (i = 0; i < (p*p); i++) + { + oldh[i] = h[i]; + } + + for (i = 0; i < p; i++) + { + oldtrow[i] = trow[i]; + } + + for (i = 0; i < p; i++) + { + oldtcol[i] = tcol[i]; + } + } + else + { + for (i = 0; i < p; i++) + { + w[i] = oldw[i]; + } + + for (i = 0; i < (p*p); i++) + { + h[i] = oldh[i]; + } + + for (i = 0; i < p; i++) + { + trow[i] = oldtrow[i]; + } + + for (i = 0; i < p; i++) + { + tcol[i] = oldtcol[i]; + } + } + } while (rc > 0); + + free(oldw); + free(oldh); + free(oldtrow); + free(oldtcol); + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int HMPI_1d_refining_speeds_single_numbers_with_mlimits + ( + int p, + const double *speeds, + const int *mlimits, + int m, + int n, + HMPI_Lower_bound lb, + HMPI_Refining_function rf, + int *w, + int *h, + int *trow, + int *tcol + ) + { + printf("Implementation currently not available\n"); + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int HMPI_1d_refining_speed_functions_no_mlimits + ( + int p, + int pn, + const double *speeds, + const int *psizes, + int m, + int n, + HMPI_Lower_bound lb, + HMPI_Refining_function rf, + int *w, + int *h, + int *trow, + int *tcol + ) + { + printf("Implementation currently not available\n"); + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int HMPI_1d_refining_speed_functions_with_mlimits + ( + int p, + int pn, + const double *speeds, + const int *psizes, + const int *mlimits, + int m, + int n, + HMPI_Lower_bound lb, + HMPI_Refining_function rf, + int *w, + int *h, + int *trow, + int *tcol + ) + { + printf("Implementation currently not available\n"); + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int HMPI_Partition_matrix_1d_refining + ( + int p, + int pn, + const double *speeds, + const int *psizes, + const int *mlimits, + int m, + int n, + HMPI_Lower_bound lb, + HMPI_Refining_function rf, + int *w, + int *h, + int *trow, + int *tcol, + int *ci + ) + { + if ((speeds == NULL) + && (mlimits == NULL + ) + ) + { + return HMPI_Homogeneous_matrix_1d_no_mlimits( + p, + m, + n, + w, + h, + trow, + tcol, + ci + ); + } + + /* + * Meaning of mlimits + */ + if ((speeds == NULL) + && (mlimits != NULL + ) + ) + { + } + + if ((mlimits == NULL) + && (pn == 1 + ) + ) + { + return HMPI_1d_refining_speeds_single_numbers_no_mlimits( + p, + speeds, + m, + n, + lb, + rf, + w, + h, + trow, + tcol + ); + } + + /* + * No known results for this case + */ + if ((mlimits != NULL) + && (pn == 1 + ) + ) + { + return HMPI_1d_refining_speeds_single_numbers_with_mlimits( + p, + speeds, + mlimits, + m, + n, + lb, + rf, + w, + h, + trow, + tcol + ); + } + + /* + * No known results for this case + */ + if ((mlimits == NULL) + && (pn > 1 + ) + ) + { + return HMPI_1d_refining_speed_functions_no_mlimits( + p, + pn, + speeds, + psizes, + m, + n, + lb, + rf, + w, + h, + trow, + tcol + ); + } + + /* + * No known results for this case + */ + if ((mlimits != NULL) + && (pn > 1 + ) + ) + { + return HMPI_1d_refining_speed_functions_with_mlimits( + p, + pn, + speeds, + psizes, + mlimits, + m, + n, + lb, + rf, + w, + h, + trow, + tcol + ); + } + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int HMPI_Get_processor_2d + ( + int row, + int column, + int p, + int q, + const int *w, + const int *h, + const int *trow, + const int *tcol, + int *I, + int *J + ) + { + int x, y, i, j; + + for (x = 0; x < p; x++) + { + for (y = 0; y < q; y++) + { + int hi = h[HMPI_RECT_INDEX(x, y, x, y, p, q)]; + int wi = w[HMPI_RECT_INDEX(x, y, x, y, p, q)]; + int toprow = trow[x*q+y]; + int topcol = tcol[x*q+y]; + + for (i = 0; i < hi; i++) + { + for (j = 0; j < wi; j++) + { + if (((row >= (toprow + i)) + && (row < (toprow + hi) + ) + ) + && + ((column >= (topcol + j)) + && (column < (topcol + wi) + ) + ) + ) + { + *I = i; + *J = j; + + return HMPI_OK; + } + } + } + } + } + + return HMPI_ERR_PARTITION_NOT_EXISTS; + } + + /*-----------------------------------------------------*/ + + int HMPI_Get_processor_1d + ( + int row, + int column, + int p, + const int *w, + const int *h, + const int *trow, + const int *tcol, + int *I + ) + { + int x, y, i, j; + + for (x = 0; x < p; x++) + { + int hi = h[HMPI_RECT_INDEX(x, 0, x, 0, p, 1)]; + int wi = w[HMPI_RECT_INDEX(x, 0, x, 0, p, 1)]; + int toprow = trow[x]; + int topcol = tcol[x]; + + for (i = 0; i < hi; i++) + { + for (j = 0; j < wi; j++) + { + if (((row >= (toprow + i)) + && (row < (toprow + hi) + ) + ) + && + ((column >= (topcol + j)) + && (column < (topcol + wi) + ) + ) + ) + { + *I = i; + return HMPI_OK; + } + } + } + } + + return HMPI_ERR_PARTITION_NOT_EXISTS; + } + + /*-----------------------------------------------------*/ + + int HMPI_Print_rectangle_1d( + int p, + int m, + int n, + const int *w, + const int *h, + const int *trow, + const int *tcol, + const int *ci + ) + { + int i, j, k, l; + int q = 1; + + if (ci != NULL) + { + printf("The processor allocation is :\n"); + + for (i = 0; i < m; i++) + { + for (j = 0; j < n; j++) + { + printf("(%d) ", ci[i*n+j]); + } + printf("\n"); + } + + printf("\n"); + } + + printf("The top row coordinates are:\n"); + + for (i = 0; i < p; i++) + { + for (j = 0; j < q; j++) + { + printf("%d ", trow[i*q+j]); + } + printf("\n"); + } + + printf("\n"); + + printf("The top column coordinates are:\n"); + + for (i = 0; i < p; i++) + { + for (j = 0; j < q; j++) + { + printf("%d ", tcol[i*q+j]); + } + printf("\n"); + } + + printf("\n"); + + printf("The common widths of rectangles are:\n"); + + for (i = 0; i < p; i++) + { + for (j = 0; j < q; j++) + { + for (k = 0; k < p; k++) + { + for (l = 0; l < q; l++) + { + printf("%d ", w[HMPI_RECT_INDEX(i, j, k, l, p, q)]); + } + } + printf("\n"); + } + } + + printf("\n"); + + printf("The common heights of rectangles are:\n"); + + for (i = 0; i < p; i++) + { + for (j = 0; j < q; j++) + { + for (k = 0; k < p; k++) + { + for (l = 0; l < q; l++) + { + printf("%d ", h[HMPI_RECT_INDEX(i, j, k, l, p, q)]); + } + } + printf("\n"); + } + } + + printf("\n"); + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int HMPI_Print_rectangle_2d( + int p, + int q, + int m, + int n, + const int *w, + const int *h, + const int *trow, + const int *tcol, + const int *ci, + const int *cj + ) + { + int i, j, k, l; + + if ((ci != NULL) + && (cj != NULL + ) + ) + { + printf("The processor allocation is :\n"); + + for (i = 0; i < m; i++) + { + for (j = 0; j < n; j++) + { + printf("(%d,%d) ", ci[i*n+j], cj[i*n+j]); + } + printf("\n"); + } + + printf("\n"); + } + + printf("The top row coordinates are:\n"); + + for (i = 0; i < p; i++) + { + for (j = 0; j < q; j++) + { + printf("%d ", trow[i*q+j]); + } + printf("\n"); + } + + printf("\n"); + + printf("The top column coordinates are:\n"); + + for (i = 0; i < p; i++) + { + for (j = 0; j < q; j++) + { + printf("%d ", tcol[i*q+j]); + } + printf("\n"); + } + + printf("\n"); + + printf("The common widths of rectangles are:\n"); + + for (i = 0; i < p; i++) + { + for (j = 0; j < q; j++) + { + for (k = 0; k < p; k++) + { + for (l = 0; l < q; l++) + { + printf("%d ", w[HMPI_RECT_INDEX(i, j, k, l, p, q)]); + } + } + printf("\n"); + } + } + + printf("\n"); + + printf("The common heights of rectangles are:\n"); + + for (i = 0; i < p; i++) + { + for (j = 0; j < q; j++) + { + for (k = 0; k < p; k++) + { + for (l = 0; l < q; l++) + { + printf("%d ", h[HMPI_RECT_INDEX(i, j, k, l, p, q)]); + } + } + printf("\n"); + } + } + + printf("\n"); + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + /* + * If the user set trow and tcol as NULL, then the + * type_of_distribution has to be two-dimensional. + */ + int HMPI_Get_matrix_processor ( + int r, + int c, + int p, + int q, + const int *w, + const int *h, + const int *trow, + const int *tcol, + HMPI_Processor *root + ) + { + int i, j, rc; + + if ((p==1) || (q==1)) + { + if (p == 1) + { + rc = HMPI_Get_processor_1d( + r, + c, + q, + w, + h, + trow, + tcol, + &c + ); + + if (rc != HMPI_OK) + { + return rc; + } + + root->I = 0; + root->J = c; + + return HMPI_OK; + } + + rc = HMPI_Get_processor_1d( + r, + c, + p, + w, + h, + trow, + tcol, + &c + ); + + if (rc != HMPI_OK) + { + return rc; + } + + root->I = c; + root->J = 0; + + return HMPI_OK; + } + + rc = HMPI_Get_processor_2d( + r, + c, + p, + q, + w, + h, + trow, + tcol, + &i, + &j + ); + + if (rc != HMPI_OK) + { + return rc; + } + + root->I = i; + root->J = j; + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int HMPI_Get_my_width + ( + int i, + int j, + int p, + int q, + const double *speeds, + int type_of_distribution, + int m, + int n + ) + { + int rc; + int *w, *h, *tcol, *trow; + int width; + + w = (int*)malloc( + sizeof(int) + * + (p*q) + ); + + if (w == NULL) + { + return MPC_ERR_NOMEM; + } + + h = (int*)malloc( + sizeof(int) + * + (p*q*p*q) + ); + + if (h == NULL) + { + return MPC_ERR_NOMEM; + } + + trow = (int*)malloc( + sizeof(int) + * + (p*q) + ); + + if (trow == NULL) + { + return MPC_ERR_NOMEM; + } + + tcol = (int*)malloc( + sizeof(int) + * + (p*q) + ); + + if (tcol == NULL) + { + return MPC_ERR_NOMEM; + } + + rc = HMPI_Partition_matrix_2d( + p, + q, + 1, + speeds, + NULL, + NULL, + m, + n, + type_of_distribution, + w, + h, + trow, + tcol, + NULL, + NULL + ); + + if (rc != HMPI_OK) + { + return rc; + } + + width = w[i*q+j]; + + free(w); + free(h); + free(trow); + free(tcol); + + return width; + } + + /*-----------------------------------------------------*/ + + int HMPI_Get_my_height + ( + int i, + int j, + int p, + int q, + const double *speeds, + int type_of_distribution, + int m, + int n + ) + { + int rc; + int *w, *h, *tcol, *trow; + int height; + + w = (int*)malloc( + sizeof(int) + * + (p*q) + ); + + if (w == NULL) + { + return MPC_ERR_NOMEM; + } + + h = (int*)malloc( + sizeof(int) + * + (p*q*p*q) + ); + + if (h == NULL) + { + return MPC_ERR_NOMEM; + } + + trow = (int*)malloc( + sizeof(int) + * + (p*q) + ); + + if (trow == NULL) + { + return MPC_ERR_NOMEM; + } + + tcol = (int*)malloc( + sizeof(int) + * + (p*q) + ); + + if (tcol == NULL) + { + return MPC_ERR_NOMEM; + } + + rc = HMPI_Partition_matrix_2d( + p, + q, + 1, + speeds, + NULL, + NULL, + m, + n, + type_of_distribution, + w, + h, + trow, + tcol, + NULL, + NULL + ); + + if (rc != HMPI_OK) + { + return rc; + } + + height = HMPI_RECT_INDEX(i, j, i, j, p, q); + + free(w); + free(h); + free(trow); + free(tcol); + + return height; + } + + /*-----------------------------------------------------*/ + + int HMPI_Get_diagonal + ( + int i, + int j, + int p, + int q, + const int *w, + const int *h, + const int *trow, + const int *tcol + ) + { + return min(w[i*q+j], HMPI_RECT_INDEX(i, j, i, j, p, q)); + } + + /*-----------------------------------------------------*/ + + int _HMPI_a22elements + ( + int k, + int n, + int l, + int w, + int h, + int trow, + int tcol + ) + { + if (((k+1)%l) >= (tcol+w)) + { + return (_HMPI_myelements(n, l, w, h, trow, tcol) - + (1/2)*((k+1)/l+1)*(2*(n/l) - (k+1)/l)*_HMPI_myelements_g(n, l, w, h, trow, tcol)); + } + + if ((((k+1)%l) < trow) + && (((k+1)%l) < tcol + ) + ) + { + return (_HMPI_myelements(n, l, w, h, trow, tcol) - + ((1/2)*(n/l)*(n/l+1)- (1/2)*(n/l - (k+1)/l)*(n/l - (k+1)/l + 1))*_HMPI_myelements_g(n, l, w, h, trow, tcol)); + } + + if (((((k+1)%l) >= trow)) + && ((((k+1)%l) < (trow+h)) + ) + && + ((((k+1)%l) >= (tcol)) + && (((k+1)%l) < (tcol+w)) + ) + ) + { + return (_HMPI_myelements(n, l, w, h, trow, tcol) - + ((1/2)*(n/l)*(n/l+1)- (1/2)*(n/l - (k+1)/l)*(n/l - (k+1)/l + 1))*_HMPI_myelements_g(n, l, w, h, trow, tcol) - + ((n/l - (k+1)/l)*_HMPI_myelements_g(n, l, w, h, trow, tcol) - (1/2)*(tcol+w - (k+1)%l)*(tcol+w - (k+1)%l + 1)) + ); + } + + if (((((k+1)%l) >= trow)) + && ((((k+1)%l) < (trow+h)) + ) + && + (((k+1)%l) < (tcol) + ) + ) + { + return ((1/2)*(k+1/l)*(2*(n/l) - (k+1)/l - 1)*_HMPI_myelements_g(n, l, w, h, trow, tcol)); + } + + if ((((k+1)%l) > (trow+h)) + && + (((k+1)%l) < tcol + ) + ) + { + return ((1/2)*(k+1/l)*(2*(n/l) - (k+1)/l - 1)*_HMPI_myelements_g(n, l, w, h, trow, tcol)); + } + + if ((((k+1)%l) > (trow+h)) + && + ((((k+1)%l) >= (tcol)) + && (((k+1)%l) < (tcol+w)) + ) + ) + { + return (_HMPI_myelements(n, l, w, h, trow, tcol) - + (1/2)*(k+1/l)*(2*(n/l) - (k+1)/l - 1)*_HMPI_myelements_g(n, l, w, h, trow, tcol) - + (n/l - (k+1)/l)*h*((k+1)%l- tcol)); + } + + if (((((k+1)%l) < trow)) + && + ((((k+1)%l) >= (tcol)) + && (((k+1)%l) < (tcol+w)) + ) + ) + { + return (_HMPI_myelements(n, l, w, h, trow, tcol) - + ((1/2)*(n/l)*(n/l+1)- (1/2)*(n/l - (k+1)/l)*(n/l - (k+1)/l + 1))*_HMPI_myelements_g(n, l, w, h, trow, tcol) - + (n/l - (k+1)/l)*h*((k+1)%l- tcol)); + } + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int _HMPI_myelements_g + ( + int n, + int l, + int w, + int h, + int trow, + int tcol + ) + { + if ((w == h) + && ((trow+h) == (tcol+w) + ) + ) + { + return ((1/2)*w*(w+1)); + } + + if (trow >= (tcol+w)) + { + return w*h; + } + + if ((trow+h) <= tcol) + { + return 0; + } + + if ((trow == 0) + && (tcol == 0 + ) + ) + { + if ((trow < (tcol+w)) + && ((trow+h) > tcol + ) + ) + { + return 1/2*h*(h+1); + } + } + + if ((trow < (tcol+w)) + && ((trow+h) > tcol + ) + ) + { + if ((trow == tcol) + && ((trow+h) == (tcol+w) + ) + ) + { + return 1/2*w*(w+1); + } + if ((trow == tcol) + && ((trow+h) < (tcol+w) + ) + ) + { + return (1/2*((trow+h)-tcol)*((trow+h)-tcol+1)); + } + if ((trow == tcol) + && ((trow+h) > (tcol+w) + ) + ) + { + return (w*h - 1/2*((tcol+w)-trow)*((tcol+w)-trow+1)); + } + if (((trow+h) == (tcol+w)) + && (trow > tcol + ) + ) + { + return (w*h - 1/2*((tcol+w)-trow)*((tcol+w)-trow+1)); + } + if (((trow+h) == (tcol+w)) + && (trow < tcol + ) + ) + { + return (1/2*((trow+h)-tcol)*((trow+h)-tcol+1)); + } + if ((trow > tcol) + && ((trow+h) < (tcol+w) + ) + ) + { + return (1/2*h*(trow-tcol + trow+h-tcol)); + } + if ((trow < tcol) + && ((trow+h) > (tcol+w) + ) + ) + { + return (1/2*w*(tcol-trow + tcol+w-trow)); + } + } + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int _HMPI_myelements + ( + int n, + int l, + int w, + int h, + int trow, + int tcol + ) + { + if ((w == h) + && ((trow+h) == (tcol+w) + ) + ) + { + return (((((n/l)*(n/l+1))/2) - (n/l))*w*h + (n/l)*(1/2)*w*(w+1)); + } + + if (trow >= (tcol+w)) + { + return (((n/l)*(n/l+1))/2)*w*h; + } + + if ((trow+h) <= tcol) + { + return (((((n/l)*(n/l+1))/2) - (n/l))*w*h); + } + + if ((trow == 0) + && (tcol == 0 + ) + ) + { + if ((trow < (tcol+w)) + && ((trow+h) > tcol + ) + ) + { + return (((((n/l)*(n/l+1))/2) - (n/l))*w*h + 1/2*(n/l)*h*(h+1)); + } + } + + if ((trow < (tcol+w)) + && ((trow+h) > tcol + ) + ) + { + if ((trow == tcol) + && ((trow+h) == (tcol+w) + ) + ) + { + return (((n/l)*(n/l+1)/2 - n/l)*w*h + 1/2*w*(w+1)); + } + if ((trow == tcol) + && ((trow+h) < (tcol+w) + ) + ) + { + return (((n/l)*(n/l+1)/2 - n/l)*w*h + (1/2*((trow+h)-tcol)*((trow+h)-tcol+1))); + } + if ((trow == tcol) + && ((trow+h) > (tcol+w) + ) + ) + { + return ((((n/l)*(n/l+1)/2 - n/l)*w*h + w*h - 1/2*((tcol+w)-trow)*((tcol+w)-trow+1))); + } + if (((trow+h) == (tcol+w)) + && (trow > tcol + ) + ) + { + return (((n/l)*(n/l+1)/2 - n/l)*w*h + (w*h - 1/2*((tcol+w)-trow)*((tcol+w)-trow+1))); + } + if (((trow+h) == (tcol+w)) + && (trow < tcol + ) + ) + { + return (((n/l)*(n/l+1)/2 - n/l)*w*h + 1/2*(trow+h-tcol)*(trow+h-tcol+1)); + } + if ((trow > tcol) + && ((trow+h) < (tcol+w) + ) + ) + { + return (((n/l)*(n/l+1)/2 - n/l)*w*h + 1/2*h*(trow-tcol + trow+h-tcol)); + } + if ((trow < tcol) + && ((trow+h) > (tcol+w) + ) + ) + { + return (((n/l)*(n/l+1)/2 - n/l)*w*h + 1/2*w*(tcol-trow + tcol+w-trow)); + } + } + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + /* + * Currently only square dense matrices are assumed. + */ + int HMPI_Get_my_elements + ( + int m, + int n, + int gm, + int gn, + int i, + int j, + int p, + int q, + const int *w, + const int *h, + const int *trow, + const int *tcol, + int type_of_distribution, + char upper_or_lower + ) + { + int lower = _HMPI_myelements( + m, + gm, + w[i*q+j], + HMPI_RECT_INDEX(i, j, i, j, p, q), + trow[i*q+j], + tcol[i*q+j] + ); + + if (upper_or_lower == 'L') + { + return lower; + } + + return (n*n - lower); + } + + /*-----------------------------------------------------*/ + + /* + * Currently only square dense matrices are assumed. + */ + int HMPI_Get_my_kk_elements + ( + int k, + int m, + int n, + int gm, + int gn, + int i, + int j, + int p, + int q, + const int *w, + const int *h, + const int *trow, + const int *tcol, + int type_of_distribution, + char upper_or_lower + ) + { + int lower= _HMPI_a22elements( + k, + m, + gm, + w[i*q+j], + HMPI_RECT_INDEX(i, j, i, j, p, q), + trow[i*q+j], + tcol[i*q+j] + ); + + if (upper_or_lower == 'L') + { + return lower; + } + + return ((m-k)*(m-k) - lower); + } + + /*-----------------------------------------------------*/ diff --git a/hdpi/hmpi_partitioning_matrices.h b/hdpi/hmpi_partitioning_matrices.h new file mode 100644 index 0000000..a5883e6 --- /dev/null +++ b/hdpi/hmpi_partitioning_matrices.h @@ -0,0 +1,329 @@ + +/************************************************************************* +* * +* Heterogeneous Data Partitioning Interface * +* ========================================= * +* * +* Copyright (c) 2002 Department of Computer Science, * +* University College Dublin. * +* * +* All rights reserved. We assume no responsibility for the use * +* or reliability of our software. * +* * +*************************************************************************/ + + /************************************************/ + /* Partitioning interfaces for matrices */ + /* */ + /* Revision history */ + /* 19-05-2003 -- Initial version */ + /************************************************/ + + #ifndef __HMPI_PARTITIONING_MATRICES_HH + #define __HMPI_PARTITIONING_MATRICES_HH + + /* + * Two-dimensional distributions + */ + #define HMPI_ROW_BASED 1 + #define HMPI_COLUMN_BASED 2 + #define HMPI_CARTESIAN 3 + + /* + * General rectangular 1D distribution + */ + #define HMPI_GENERAL 4 + + /* + * Types of formulation for general one-dimensional + * rectangular distributions + */ + #define HMPI_DYNAMIC 1 + #define HMPI_ITERATIVE 2 + #define HMPI_REFINING 3 + + #define HMPI_RECT_INDEX(a, b, c, d, p, q) (a*p*q*q+b*p*q+c*q+d) + #define H(a, b, c, d, p, q) (a*p*q*q+b*p*q+c*q+d) + + typedef double (*HMPI_Lower_bound) ( + int p, + const double *speeds, + int m, + int n + ); + + typedef double (*HMPI_DP_function) ( + int rowsorcolumns, + int rectangles, + int p, + const double *speeds, + double **previous, + int *r + ); + + typedef double (*HMPI_Iterative_function) ( + int p, + const int *w, + const int *h, + const int *trow, + const int *tcol + ); + + typedef double (*HMPI_Refining_function) ( + int p, + const double *speeds, + int m, + int n, + const int *oldw, + const int *oldh, + const int *oldtrow, + const int *oldtcol, + const int *neww, + const int *newh, + const int *newtrow, + const int *newtcol + ); + + int HMPI_Partition_matrix_2d( + int p, + int q, + int pn, + const double *speeds, + const int *psizes, + const int *mlimits, + int m, + int n, + int type_of_distribution, + int *w, + int *h, + int *trow, + int *tcol, + int *ci, + int *cj + ); + + int HMPI_Partition_matrix_1d( + int p, + int pn, + const double *speeds, + const int *psizes, + const int *mlimits, + int m, + int n, + int formulation, + HMPI_Lower_bound lb, + HMPI_DP_function dpf, + HMPI_Iterative_function cf, + HMPI_Refining_function rf, + const int *iw, + const int *ih, + const int *itrow, + const int *itcol, + int type_of_distribution, + int *w, + int *h, + int *trow, + int *tcol, + int *ci + ); + + int HMPI_Partition_matrix_1d_dp( + int p, + int pn, + const double *speeds, + const int *psizes, + const int *mlimits, + int m, + int n, + HMPI_Lower_bound lb, + HMPI_DP_function dpf, + int type_of_distribution, + int *w, + int *h, + int *trow, + int *tcol, + int *ci + ); + + int HMPI_Partition_matrix_1d_iterative( + int p, + int pn, + const double *speeds, + const int *psizes, + const int *mlimits, + int m, + int n, + HMPI_Lower_bound lb, + HMPI_Iterative_function cf, + int *w, + int *h, + int *trow, + int *tcol, + int *ci + ); + + int HMPI_Partition_matrix_1d_refining( + int p, + int pn, + const double *speeds, + const int *psizes, + const int *mlimits, + int m, + int n, + HMPI_Lower_bound lb, + HMPI_Refining_function rf, + int *w, + int *h, + int *trow, + int *tcol, + int *ci + ); + + int HMPI_Get_processor_2d( + int i, + int j, + int p, + int q, + const int *w, + const int *h, + const int *trow, + const int *tcol, + int *ci, + int *cj + ); + + int HMPI_Get_processor_1d ( + int i, + int j, + int p, + const int *w, + const int *h, + const int *trow, + const int *tcol, + int *c + ); + + typedef struct { + int I; + int J; + } HMPI_Processor; + + int HMPI_Get_matrix_processor ( + int r, + int c, + int p, + int q, + const int *w, + const int *h, + const int *trow, + const int *tcol, + HMPI_Processor *root + ); + + int HMPI_Print_rectangle_1d( + int p, + int m, + int n, + const int *w, + const int *h, + const int *trow, + const int *tcol, + const int *ci + ); + + int HMPI_Print_rectangle_2d( + int p, + int q, + int m, + int n, + const int *w, + const int *h, + const int *trow, + const int *tcol, + const int *ci, + const int *cj + ); + + int HMPI_Common_height + ( + int top_row_1, + int bottom_row_1, + int top_row_2, + int bottom_row_2 + ); + + int HMPI_Get_my_width + ( + int i, + int j, + int p, + int q, + const double *speeds, + int type_of_distribution, + int m, + int n + ); + + int HMPI_Get_my_height + ( + int i, + int j, + int p, + int q, + const double *speeds, + int type_of_distribution, + int m, + int n + ); + + int HMPI_Get_diagonal + ( + int i, + int j, + int p, + int q, + const int *w, + const int *h, + const int *trow, + const int *tcol + ); + + int HMPI_Get_my_elements + ( + int m, + int n, + int gm, + int gn, + int i, + int j, + int p, + int q, + const int *w, + const int *h, + const int *trow, + const int *tcol, + int type_of_distribution, + char upper_or_lower + ); + + int HMPI_Get_my_kk_elements + ( + int k, + int m, + int n, + int gm, + int gn, + int i, + int j, + int p, + int q, + const int *w, + const int *h, + const int *trow, + const int *tcol, + int type_of_distribution, + char upper_or_lower + ); + + #endif /* __HMPI_PARTITIONING_MATRICES_HH */ + diff --git a/hdpi/hmpi_partitioning_sets.c b/hdpi/hmpi_partitioning_sets.c new file mode 100644 index 0000000..610e5ef --- /dev/null +++ b/hdpi/hmpi_partitioning_sets.c @@ -0,0 +1,3938 @@ + + /************************************************/ + /* Implementation of Partitioning Interfaces of */ + /* Sets */ + /* */ + /* Revision history */ + /* 20-05-2003 -- Initial version */ + /************************************************/ + + #include + #include + #include + #include + + #include + #include + + /*-----------------------------------------------------*/ + + /* + * All the processors are homogeneous. That is they exhibit + * same speeds. However each processor has an upper bound on + * number of elements it can store. + * This is of complexity O(p*p) + */ + int __HMPI_Homogeneous_distribution_with_mlimits + ( + int p, + int n, + const int* mlimits, + int *np + ) + { + int i, j, rc; + int sum = 0; + int bound_exceeded = 0; + + for (i = 0; i < p; i++) + { + np[i] = n/p; + } + + for (i = 0; i < (n%p); i++) + { + np[i] += 1; + } + + for (i = 0; i < p; i++) + { + if (np[i] > mlimits[i]) + { + np[i] = mlimits[i]; + bound_exceeded = 1; + break; + } + } + + /* + * For all the processors whose upper bounds + * are exceeded, we assign the number of elements + * equal to their upper bounds. However we proceed + * by one processor at the time. TBD + */ + if (bound_exceeded == 0) + { + return HMPI_OK; + } + + { + int *boundsm, *npm; + int nm = n - mlimits[i]; + int ind = 0; + + npm = (int*)malloc( + sizeof(int) + * + (p-1) + ); + + if (npm == NULL) + { + return MPC_ERR_NOMEM; + } + + boundsm = (int*)malloc( + sizeof(int) + * + (p-1) + ); + + if (boundsm == NULL) + { + return MPC_ERR_NOMEM; + } + + for (j = 0; j < p; j++) + { + if (j == i) + { + continue; + } + + npm[ind] = np[j]; + boundsm[ind] = mlimits[j]; + ind++; + } + + rc = __HMPI_Homogeneous_distribution_with_mlimits( + p-1, + nm, + boundsm, + npm + ); + + if (rc != HMPI_OK) + { + return rc; + } + + for (ind = 0, j = 0; j < p; j++) + { + if (j == i) + { + continue; + } + + np[j] = npm[ind]; + ind++; + } + + free(boundsm); + free(npm); + } + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + /* + * This is for ordered sets. + * All the processors are homogeneous. That is they exhibit + * same speeds. However each processor has an upper bound on + * number of elements it can store. The sum of weights in each + * partition should be the same. + */ + int __HMPI_Homogeneous_distribution_with_mlimits_and_weights_ordered_sets + ( + int p, + int n, + const int* mlimits, + const int* w, + int *np + ) + { + int i, j, k, rc; + double sumw = 0; + double sumwcum = 0; + int *Size_of_bin; + double *wallocations; + double sumcum = 0; + + wallocations = (double*)malloc( + sizeof(double) + * + (p+1) + ); + + if (wallocations == NULL) + { + return MPC_ERR_NOMEM; + } + + Size_of_bin = (int*)malloc( + sizeof(int) + * + p + ); + + if (Size_of_bin == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < n; i++) + { + sumw += w[i]; + } + + for (i = 0; i < p; i++) + { + Size_of_bin[i] = sumw/p; + sumwcum += Size_of_bin[i]; + np[i] = 0; + } + + Size_of_bin[0] += sumw - sumwcum; + + wallocations[0] = 0.0; + for (i = 1; i <= p; i++) + { + wallocations[i] = wallocations[i-1] + Size_of_bin[i-1]; + } + + for (i = 0; i < n; i++) + { + sumcum += w[i]; + + for (j = 0; j < p; j++) + { + if ((sumcum > wallocations[j]) + && (sumcum <= wallocations[j+1] + ) + ) + { + if (j == (p-1)) + { + np[j] = n-i; + + free(wallocations); + free(Size_of_bin); + + if (np[j] > mlimits[j]) + { + return HMPI_ERR_PARTITION_SET; + } + + return HMPI_OK; + } + + np[j]++; + + if (np[j] > mlimits[j]) + { + np[j] = mlimits[j]; + + free(wallocations); + free(Size_of_bin); + + return __HMPI_Homogeneous_distribution_with_mlimits_and_weights_ordered_sets( + p-(j+1), + n-i, + mlimits+j+1, + w+i, + np+j+1 + ); + } + } + } + } + + free(wallocations); + free(Size_of_bin); + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int __HMPI_Partition_set_homogeneous + ( + int p, + const int *mlimits, + int n, + const int *w, + int ordering, + int processor_ordering, + int type_of_metric, + User_defined_metric umf, + double *metric, + int *np + ) + { + int i, j, rc; + int sum = 0; + double sumd = 0; + + /* + * Homogeneous distribution for unordered sets. + * + * The following criterion and restriction should + * be satisfied: + * The number of elements in each partition should be + * proportional to the speed of the processor + * owning that partition. + * + * The number of elements in each partition must + * be less than the maximum number of elements a + * processor can hold. + */ + if ((w == NULL) + && (mlimits == NULL) + && (ordering == 0 + ) + ) + { + if (n < p) + { + for (i = 0; i < n; i++) + { + np[i] = 1; + } + + for (i = n; i < p; i++) + { + np[i] = 0; + } + + return HMPI_OK; + } + + for (i = 0; i < p; i++) + { + np[i] = n/p; + } + + for (i = 0; i < (n%p); i++) + { + np[i] += 1; + } + + return HMPI_OK; + } + + /* + * There is an upper bound on the number of elements + * that each processor can store. + */ + if ((w == NULL) + && (mlimits != NULL) + && (ordering == 0 + ) + ) + { + for (i = 0; i < p; i++) + { + sumd += mlimits[i]; + } + + if (sumd < n) + { + printf( + "This problem size %d cannot be solved, " + "sum of upper bounds on the number of elements exceeded\n", + n + ); + + return HMPI_ERR_MLIMITS; + } + + if (sumd == n) + { + for (i = 0; i < p; i++) + { + np[i] = mlimits[i]; + } + + return HMPI_OK; + } + + if (n < p) + { + for (i = 0; i < n; i++) + { + np[i] = 1; + } + + for (i = n; i < p; i++) + { + np[i] = 0; + } + + return HMPI_OK; + } + + rc = __HMPI_Homogeneous_distribution_with_mlimits( + p, + n, + mlimits, + np + ); + + if (rc != HMPI_OK) + { + printf( + "Problems with homogeneous partitioning of set " + "with upper bounds on the number of elements " + "that can be stored by each processor\n" + ); + + return HMPI_ERR_PARTITION_SET; + } + + return HMPI_OK; + } + + /* + * This is a NP-hard problem. + * The set should be split such that the sum + * of the weights in each subset is the same. + * A naive implementation is provided here + * This is of complexity O(n*n). + * Total complexity = O(n*n) + O(n*p) + * ^^^^^^ + * sorting of weights + */ + if ((w != NULL) + && (mlimits == NULL) + && (ordering == 0 + ) + ) + { + int *Size_of_bin, *Current_bin_capacity; + double sumw = 0; + double sumwcum = 0; + int *rearranged_weights; + double *speeds; + int *rearrangedw; + int temp, temp_number, *allocations, *chosen; + + Size_of_bin = (int*)malloc( + sizeof(int) + * + p + ); + + if (Size_of_bin == NULL) + { + return MPC_ERR_NOMEM; + } + + Current_bin_capacity = (int*)malloc( + sizeof(int) + * + p + ); + + if (Current_bin_capacity == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < n; i++) + { + sumw += w[i]; + } + + for (i = 0; i < p; i++) + { + Size_of_bin[i] = sumw/p; + sumwcum += Size_of_bin[i]; + Current_bin_capacity[i] = 0; + } + + Size_of_bin[0] += sumw - sumwcum; + + /* + * We rearrange the element weights + * in descending order. + */ + { + rearranged_weights = (int*)malloc( + sizeof(int) + * + n + ); + + if (rearranged_weights == NULL) + { + return MPC_ERR_NOMEM; + } + + rearrangedw = (int*)malloc( + sizeof(int) + * + n + ); + + if (rearrangedw == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < n; i++) + { + rearrangedw[i] = i; + rearranged_weights[i] = w[i]; + } + + for (i = 0; i < n; i++) + { + for (j = 1; j < n; j++) + { + if (rearranged_weights[j-1] < rearranged_weights[j]) + { + temp = rearranged_weights[j-1]; + rearranged_weights[j-1] = rearranged_weights[j]; + rearranged_weights[j] = temp; + + temp_number = rearrangedw[j-1]; + rearrangedw[j-1] = rearrangedw[j]; + rearrangedw[j] = temp_number; + } + } + } + } + + allocations = (int*)malloc( + sizeof(int) + * + n + ); + + if (allocations == NULL) + { + return MPC_ERR_NOMEM; + } + + chosen = (int*)malloc( + sizeof(int) + * + n + ); + + if (chosen == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < n; i++) + { + chosen[i] = 0; + } + + /* + * Fill into the bins until the bins are full + * or until any addition exceeds the capacity + */ + for (i = 0; i < p; i++) + { + for (j = 0; j < n; j++) + { + if (chosen[j] == 1) + { + continue; + } + + if (Current_bin_capacity[i] == Size_of_bin[i]) + { + continue; + } + + if ((Current_bin_capacity[i] + rearranged_weights[j]) > Size_of_bin[i]) + { + continue; + } + + Current_bin_capacity[i] += rearranged_weights[j]; + allocations[j] = i; + chosen[j] = 1; + } + } + + /* + * Fill into the bin that causes the minumum + * waste + */ + for (i = 0; i < n; i++) + { + int temp = INT_MAX; + int optimal_bin; + + if (chosen[i] == 1) + { + continue; + } + + for (j = 0; j < p; j++) + { + int waste = Current_bin_capacity[j] + + + rearranged_weights[i] + - + Size_of_bin[j]; + + if (waste < temp) + { + temp = waste; + optimal_bin = j; + } + } + + Current_bin_capacity[optimal_bin] += rearranged_weights[i]; + allocations[i] = optimal_bin; + chosen[i] = 1; + } + + for (i = 0; i < n; i++) + { + np[rearrangedw[i]] = allocations[i]; + } + + free(rearranged_weights); + free(rearrangedw); + free(allocations); + free(chosen); + + if (metric == NULL) + { + free(Size_of_bin); + free(Current_bin_capacity); + + return HMPI_OK; + } + + speeds = (double*)malloc( + sizeof(double) + * + p + ); + + if (speeds == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < p; i++) + { + speeds[i] = 1.0; + } + + /* + * The ideal sum of weights is given by + * elements of array Size_of_bin and the + * actual sum of weights is calculated for array elements + * of Current_bin_capacity. + */ + switch (type_of_metric) + { + case USER_SPECIFIED: + { + *metric = (*umf)( + p, + speeds, + Current_bin_capacity, + Size_of_bin + ); + } + break; + case SYSTEM_DEFINED: + { + *metric = __HMPI_System_defined_metric( + p, + speeds, + Current_bin_capacity, + Size_of_bin + ); + } + break; + default: + { + return HMPI_ERR_METRIC; + } + break; + } + + free(Size_of_bin); + free(Current_bin_capacity); + free(speeds); + + return HMPI_OK; + } + + /* + * This is an NP-hard problem. + * The set should be split such that the + * sum of the weights in each subset is the same and the + * number of elements assigned to each processor + * must not exceed the upper bound it can store. + * A naive implementation is provided here + */ + if ((w != NULL) + && (mlimits != NULL) + && (ordering == 0 + ) + ) + { + int *Size_of_bin, *Current_bin_capacity; + double *speeds; + int suml = 0; + double sumw = 0; + double sumwcum = 0; + int *rearranged_weights; + int temp, temp_number, temp_mlimit, *allocations, *chosen, *Number_in_bin; + int *rearrangedw; + int *rearranged_mlimits, *rearrangedp; + + for (i = 0; i < p; i++) + { + sumd += mlimits[i]; + } + + if (sumd < n) + { + printf( + "This problem %d size cannot be solved, " + "sum of upper bounds exceeded\n" + , n + ); + + return HMPI_ERR_MLIMITS; + } + + Size_of_bin = (int*)malloc( + sizeof(int) + * + p + ); + + if (Size_of_bin == NULL) + { + return MPC_ERR_NOMEM; + } + + Current_bin_capacity = (int*)malloc( + sizeof(int) + * + p + ); + + if (Current_bin_capacity == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < n; i++) + { + sumw += w[i]; + } + + for (i = 0; i < p; i++) + { + Current_bin_capacity[i] = 0; + Size_of_bin[i] = sumw/p; + sumwcum += Size_of_bin[i]; + } + + Size_of_bin[0] += sumw - sumwcum; + + /* + * We rearrange the element weights + * in descending order. + */ + { + rearranged_weights = (int*)malloc( + sizeof(int) + * + n + ); + + if (rearranged_weights == NULL) + { + return MPC_ERR_NOMEM; + } + + rearrangedw = (int*)malloc( + sizeof(int) + * + n + ); + + if (rearrangedw == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < n; i++) + { + rearrangedw[i] = i; + rearranged_weights[i] = w[i]; + } + + for (i = 0; i < n; i++) + { + for (j = 1; j < n; j++) + { + if (rearranged_weights[j-1] < rearranged_weights[j]) + { + temp = rearranged_weights[j-1]; + rearranged_weights[j-1] = rearranged_weights[j]; + rearranged_weights[j] = temp; + + temp_number = rearrangedw[j-1]; + rearrangedw[j-1] = rearrangedw[j]; + rearrangedw[j] = temp_number; + } + } + } + } + + allocations = (int*)malloc( + sizeof(int) + * + n + ); + + if (allocations == NULL) + { + return MPC_ERR_NOMEM; + } + + speeds = (double*)malloc( + sizeof(double) + * + p + ); + + if (speeds == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < p; i++) + { + speeds[i] = 1.0; + } + + if (sumd == n) + { + int ind = 0; + + /* + * TBD: + * This looks like a NP-hard problem. + * We know the number of elements in each subset + * given by the upper bound. + * The sum of weights of the elements in each + * subset should be the same. + * We provide a naive implementation here. + * This is of complexity O(n*n). + * We arrange the processors in increasing + * order of their upper bounds and we arrange + * the weights in decreasing order. + */ + rearranged_mlimits = (int*)malloc( + sizeof(int) + * + p + ); + + if (rearranged_mlimits == NULL) + { + return MPC_ERR_NOMEM; + } + + rearrangedp = (int*)malloc( + sizeof(int) + * + p + ); + + if (rearrangedp == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < p; i++) + { + rearrangedp[i] = i; + rearranged_mlimits[i] = mlimits[i]; + } + + for (i = 0; i < p; i++) + { + for (j = 1; j < p; j++) + { + if (rearranged_mlimits[j-1] > rearranged_mlimits[j]) + { + temp_number = rearrangedp[j-1]; + rearrangedp[j-1] = rearrangedp[j]; + rearrangedp[j] = temp_number; + + temp_mlimit = rearranged_mlimits[j-1]; + rearranged_mlimits[j-1] = rearranged_mlimits[j]; + rearranged_mlimits[j] = temp_mlimit; + } + } + } + + for (i = 0; i < p; i++) + { + for (j = 0; j < rearranged_mlimits[i]; j++) + { + allocations[ind] = i; + Current_bin_capacity[rearrangedp[i]] += rearranged_weights[ind]; + ind++; + } + } + + for (i = 0; i < n; i++) + { + np[rearrangedw[i]] = rearrangedp[allocations[i]]; + } + + free(rearranged_weights); + free(rearrangedw); + free(rearranged_mlimits); + free(rearrangedp); + free(allocations); + + if (metric == NULL) + { + free(Size_of_bin); + free(Current_bin_capacity); + free(speeds); + + return HMPI_OK; + } + + /* + * The ideal sum of weights is given by + * elements of array Size_of_bin and the + * actual sum of weights is calculated for array elements + * of Current_bin_capacity. + */ + switch (type_of_metric) + { + case USER_SPECIFIED: + { + *metric = (*umf)( + p, + speeds, + Current_bin_capacity, + Size_of_bin + ); + } + break; + case SYSTEM_DEFINED: + { + *metric = __HMPI_System_defined_metric( + p, + speeds, + Current_bin_capacity, + Size_of_bin + ); + } + break; + default: + { + return HMPI_ERR_METRIC; + } + break; + } + + free(Size_of_bin); + free(Current_bin_capacity); + free(speeds); + + return HMPI_OK; + } + + Number_in_bin = (int*)malloc( + sizeof(int) + * + p + ); + + if (Number_in_bin == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < p; i++) + { + Number_in_bin[i] = 0; + } + + chosen = (int*)malloc( + sizeof(int) + * + n + ); + + if (chosen == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < n; i++) + { + chosen[i] = 0; + } + + for (i = 0; i < p; i++) + { + for (j = 0; j < n; j++) + { + if (chosen[j] == 1) + { + continue; + } + + if (Current_bin_capacity[i] == Size_of_bin[i]) + { + continue; + } + + if ((Current_bin_capacity[i] + rearranged_weights[j]) > Size_of_bin[i]) + { + continue; + } + + if ((Number_in_bin[i] + 1) > mlimits[i]) + { + break; + } + + Number_in_bin[i]++; + Current_bin_capacity[i] += rearranged_weights[j]; + allocations[j] = i; + chosen[j] = 1; + } + } + + for (i = 0; i < n; i++) + { + int temp = INT_MAX; + int optimal_bin = -1; + + if (chosen[i] == 1) + { + continue; + } + + for (j = 0; j < p; j++) + { + int waste; + + if (Number_in_bin[j] >= mlimits[j]) + { + continue; + } + + waste = Current_bin_capacity[j] + + + rearranged_weights[i] + - + Size_of_bin[j]; + + if (waste < temp) + { + temp = waste; + optimal_bin = j; + } + } + + Current_bin_capacity[optimal_bin] += rearranged_weights[i]; + allocations[i] = optimal_bin; + chosen[i] = 1; + } + + for (i = 0; i < n; i++) + { + np[rearrangedw[i]] = allocations[i]; + } + + free(rearranged_weights); + free(rearrangedw); + free(allocations); + free(chosen); + free(Number_in_bin); + + if (metric == NULL) + { + free(Size_of_bin); + free(Current_bin_capacity); + free(speeds); + + return HMPI_OK; + } + + /* + * The ideal sum of weights is given by + * elements of array Size_of_bin and the + * actual sum of weights is calculated for + * array elements of Current_bin_capacity. + */ + switch (type_of_metric) + { + case USER_SPECIFIED: + { + *metric = (*umf)( + p, + speeds, + Current_bin_capacity, + Size_of_bin + ); + } + break; + case SYSTEM_DEFINED: + { + *metric = __HMPI_System_defined_metric( + p, + speeds, + Current_bin_capacity, + Size_of_bin + ); + } + break; + default: + { + return HMPI_ERR_METRIC; + } + break; + } + + free(Size_of_bin); + free(Current_bin_capacity); + free(speeds); + + return HMPI_OK; + } + + /* + * Homogeneous distribution for ordered sets + * + * The following criterion and restriction should + * be satisfied: + * The number of elements in each partition should be + * proportional to the speed of the processor + * owning that partition. + * + * The number of elements in each partition must + * be less than the maximum number of elements a + * processor can hold. + * + */ + if ((w == NULL) + && (mlimits == NULL) + && (ordering == 1 + ) + ) + { + if (n < p) + { + np[0] = 0; + + for (i = 1; i <= n; i++) + { + np[i] = np[i-1] + 1; + } + + for (i = n+1; i <= p; i++) + { + np[i] = np[n]; + } + + return HMPI_OK; + } + + np[0] = 0; + np[1] = n/p + n%p; + + for (i = 2; i <= p; i++) + { + np[i] = np[i-1] + (n/p); + } + + return HMPI_OK; + } + + if ((w == NULL) + && (mlimits != NULL) + && (ordering == 1 + ) + ) + { + int *allocations; + + for (i = 0; i < p; i++) + { + sumd += mlimits[i]; + } + + if (sumd < n) + { + printf( + "This problem size %d cannot be solved, " + "sum of memory bounds exceeded\n", + n + ); + + return HMPI_ERR_MLIMITS; + } + + if (sumd == n) + { + np[0] = 0; + + for (i = 1; i <= p; i++) + { + np[i] = np[i-1] + mlimits[i-1]; + } + + return HMPI_OK; + } + + if (n < p) + { + np[0] = 0; + + for (i = 1; i <= n; i++) + { + np[i] = np[i-1] + 1; + } + + for (i = n+1; i <= p; i++) + { + np[i] = np[n]; + } + + return HMPI_OK; + } + + allocations = (int*)malloc( + sizeof(int) + * + p + ); + + if (allocations == NULL) + { + return MPC_ERR_NOMEM; + } + + rc = __HMPI_Homogeneous_distribution_with_mlimits( + p, + n, + mlimits, + allocations + ); + + if (rc != HMPI_OK) + { + printf( + "Problems with homogeneous partitioning of set " + "with memory bounds on the number of elements " + "that can be stored by each processor\n" + ); + + return HMPI_ERR_PARTITION_SET; + } + + np[0] = 0; + for (i = 1; i <= p; i++) + { + np[i] = np[i-1] + allocations[i-1]; + } + + free(allocations); + + return HMPI_OK; + } + + /* + * The following criterion and restriction should + * be satisfied: + * The sum of weights of the elements in each + * partition should be proportional to the speeda + * of the processor owning that partition. + * + * The number of elements in each partition must + * be less than the maximum number of elements a + * processor can hold. + * + * This case is of complexity O(n*p) + */ + if ((w != NULL) + && (mlimits == NULL) + && (ordering == 1 + ) + ) + { + int *Size_of_bin; + double *wallocations; + int *allocations; + double sumcum = 0; + double sumwcum = 0; + double sumw = 0; + + allocations = (int*)malloc( + sizeof(int) + * + p + ); + + if (allocations == NULL) + { + return MPC_ERR_NOMEM; + } + + wallocations = (double*)malloc( + sizeof(double) + * + (p+1) + ); + + if (wallocations == NULL) + { + return MPC_ERR_NOMEM; + } + + Size_of_bin = (int*)malloc( + sizeof(int) + * + p + ); + + if (Size_of_bin == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < n; i++) + { + sumw += w[i]; + } + + for (i = 0; i < p; i++) + { + Size_of_bin[i] = sumw/p; + sumwcum += Size_of_bin[i]; + allocations[i] = 0; + } + + Size_of_bin[0] += sumw - sumwcum; + + wallocations[0] = 0.0; + for (i = 1; i <= p; i++) + { + wallocations[i] = wallocations[i-1] + Size_of_bin[i-1]; + } + + for (i = 0; i < n; i++) + { + sumcum += w[i]; + + for (j = 0; j < p; j++) + { + if ((sumcum > wallocations[j]) + && (sumcum <= wallocations[j+1] + ) + ) + { + allocations[j]++; + break; + } + } + } + + np[0] = 0; + for (i = 1; i <= p; i++) + { + np[i] = np[i-1] + allocations[i-1]; + } + + free(allocations); + free(wallocations); + free(Size_of_bin); + + return HMPI_OK; + } + + /* + * This looks like a NP-hard problem. + * Processors cannot be reordered. + * We allocate the elements taking into + * account the upper bounds of the processors and + * also the proportionality of the speeds of the + * processors to the sum of weights of the elements. + * It could happen that at the final step, the upper + * upper bound of the final processor is exceeded in + * which case we just allocate the number of elements + * equal to their upper bounds right from the start. + * This is just an approximation. + */ + if ((w != NULL) + && (mlimits != NULL) + && (ordering == 1) + && (processor_ordering == 0 + ) + ) + { + int *Size_of_bin; + int *Current_bin_capacity; + int *allocations; + double sumw = 0; + double sumwcum = 0; + int ind = 0; + double *speeds; + + for (i = 0; i < p; i++) + { + sumd += mlimits[i]; + } + + if (sumd < n) + { + printf( + "This problem size %d cannot be solved, " + "sum of upper bounds exceeded\n", + n + ); + + return HMPI_ERR_MLIMITS; + } + + if (sumd == n) + { + np[0] = 0; + + for (i = 1; i <= p; i++) + { + np[i] = np[i-1] + mlimits[i-1]; + } + + return HMPI_OK; + } + + allocations = (int*)malloc( + sizeof(int) + * + p + ); + + if (allocations == NULL) + { + return MPC_ERR_NOMEM; + } + + rc = __HMPI_Homogeneous_distribution_with_mlimits_and_weights_ordered_sets( + p, + n, + mlimits, + w, + allocations + ); + + if (rc == HMPI_ERR_PARTITION_SET) + { + rc = __HMPI_Homogeneous_distribution_with_mlimits( + p, + n, + mlimits, + allocations + ); + + if (rc != HMPI_OK) + { + printf( + "Problems with homogeneous partitioning of set " + "with memory bounds on the number of elements " + "that can be stored by each processor\n" + ); + + return HMPI_ERR_PARTITION_SET; + } + } + + if (rc != HMPI_OK) + { + return rc; + } + + np[0] = 0; + for (i = 1; i <= p; i++) + { + np[i] = np[i-1] + allocations[i-1]; + } + + if (metric == NULL) + { + free(allocations); + return HMPI_OK; + } + + Size_of_bin = (int*)malloc( + sizeof(int) + * + p + ); + + if (Size_of_bin == NULL) + { + return MPC_ERR_NOMEM; + } + + Current_bin_capacity = (int*)malloc( + sizeof(int) + * + p + ); + + if (Current_bin_capacity == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < n; i++) + { + sumw += w[i]; + } + + for (i = 0; i < p; i++) + { + Current_bin_capacity[i] = 0; + Size_of_bin[i] = sumw/p; + sumwcum += Size_of_bin[i]; + } + + Size_of_bin[0] += sumw - sumwcum; + + for (i = 0; i < p; i++) + { + for (j = 0; j < allocations[i]; j++) + { + Current_bin_capacity[i] += w[ind++]; + } + } + + speeds = (double*)malloc( + sizeof(double) + * + p + ); + + if (speeds == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < p; i++) + { + speeds[i] = 1.0; + } + + /* + * The ideal sum of weights is given by + * elements of array Size_of_bin and the + * actual sum of weights is calculated for array elements + * of Current_bin_capacity. + */ + switch (type_of_metric) + { + case USER_SPECIFIED: + { + *metric = (*umf)( + p, + speeds, + Current_bin_capacity, + Size_of_bin + ); + } + break; + case SYSTEM_DEFINED: + { + *metric = __HMPI_System_defined_metric( + p, + speeds, + Current_bin_capacity, + Size_of_bin + ); + } + break; + default: + { + return HMPI_ERR_METRIC; + } + break; + } + + free(Size_of_bin); + free(Current_bin_capacity); + free(speeds); + free(allocations); + + return HMPI_OK; + } + + /* + * This looks like a NP-hard problem. + * Processors can be reordered. + */ + if ((w != NULL) + && (mlimits != NULL) + && (ordering == 1) + && (processor_ordering == 1 + ) + ) + { + int *Size_of_bin; + int *Current_bin_capacity; + int *allocations; + int ind = 0; + double sumw = 0; + double sumwcum = 0; + int *rearranged_mlimits, *rearrangedp; + int temp; + double *speeds; + + for (i = 0; i < p; i++) + { + sumd += mlimits[i]; + } + + if (sumd < n) + { + printf( + "This problem size %d cannot be solved, " + "sum of upper bounds exceeded\n", + n + ); + + return HMPI_ERR_MLIMITS; + } + + if (sumd == n) + { + for (i = 0; i < p; i++) + { + np[2*i] = i; + } + + for (i = 0; i < p; i++) + { + np[2*i+1] = mlimits[i]; + } + + return HMPI_OK; + } + + rearranged_mlimits = (int*)malloc( + sizeof(int) + * + p + ); + + if (rearranged_mlimits == NULL) + { + return MPC_ERR_NOMEM; + } + + rearrangedp = (int*)malloc( + sizeof(int) + * + p + ); + + if (rearrangedp == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < p; i++) + { + rearrangedp[i] = i; + rearranged_mlimits[i] = mlimits[i]; + } + + for (i = 0; i < p; i++) + { + for (j = 1; j < p; j++) + { + if (rearranged_mlimits[j-1] > rearranged_mlimits[j]) + { + temp = rearrangedp[j-1]; + rearrangedp[j-1] = rearrangedp[j]; + rearrangedp[j] = temp; + + temp = rearranged_mlimits[j-1]; + rearranged_mlimits[j-1] = rearranged_mlimits[j]; + rearranged_mlimits[j] = temp; + } + } + } + + allocations = (int*)malloc( + sizeof(int) + * + p + ); + + if (allocations == NULL) + { + return MPC_ERR_NOMEM; + } + + rc = __HMPI_Homogeneous_distribution_with_mlimits_and_weights_ordered_sets( + p, + n, + rearranged_mlimits, + w, + allocations + ); + + if (rc == HMPI_ERR_PARTITION_SET) + { + rc = __HMPI_Homogeneous_distribution_with_mlimits( + p, + n, + rearranged_mlimits, + allocations + ); + + if (rc != HMPI_OK) + { + printf( + "Problems with homogeneous partitioning of set " + "with memory bounds on the number of elements " + "that can be stored by each processor\n" + ); + + return HMPI_ERR_PARTITION_SET; + } + } + + if (rc != HMPI_OK) + { + return rc; + } + + for (ind = 0, i = 0; i < p; i++) + { + np[ind++] = rearrangedp[i]; + np[ind++] = allocations[i]; + } + + if (metric == NULL) + { + free(allocations); + free(rearranged_mlimits); + free(rearrangedp); + + return HMPI_OK; + } + + Size_of_bin = (int*)malloc( + sizeof(int) + * + p + ); + + if (Size_of_bin == NULL) + { + return MPC_ERR_NOMEM; + } + + Current_bin_capacity = (int*)malloc( + sizeof(int) + * + p + ); + + if (Current_bin_capacity == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < n; i++) + { + sumw += w[i]; + } + + for (i = 0; i < p; i++) + { + Current_bin_capacity[i] = 0; + Size_of_bin[rearrangedp[i]] = sumw/p; + sumwcum += Size_of_bin[i]; + } + + Size_of_bin[rearrangedp[0]] += sumw - sumwcum; + + for (i = 0; i < p; i++) + { + for (j = 0; j < allocations[i]; j++) + { + Current_bin_capacity[i] += w[ind++]; + } + } + + speeds = (double*)malloc( + sizeof(double) + * + p + ); + + if (speeds == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < p; i++) + { + speeds[i] = 1.0; + } + + /* + * The ideal sum of weights is given by + * elements of array Size_of_bin and the + * actual sum of weights is calculated for array elements + * of Current_bin_capacity. + */ + switch (type_of_metric) + { + case USER_SPECIFIED: + { + *metric = (*umf)( + p, + speeds, + Current_bin_capacity, + Size_of_bin + ); + } + break; + case SYSTEM_DEFINED: + { + *metric = __HMPI_System_defined_metric( + p, + speeds, + Current_bin_capacity, + Size_of_bin + ); + } + break; + default: + { + return HMPI_ERR_METRIC; + } + break; + } + + free(Size_of_bin); + free(Current_bin_capacity); + free(speeds); + free(allocations); + free(rearranged_mlimits); + free(rearrangedp); + + return HMPI_OK; + } + + printf("Parameters provided are invalid\n"); + return HMPI_ERR_PARTITION_SET; + } + + /*-----------------------------------------------------*/ + + int __HMPI_Speeds_are_single_numbers_with_mlimits + ( + int p, + const double *speeds, + const int *bounds, + int n, + int *np + ) + { + int i, j, rc; + int bound_exceeded = 0; + int sum = 0; + double sumd = 0; + + for (i = 0; i < p; i++) + { + sumd += bounds[i]; + } + + if (sumd < n) + { + printf( + "The problem size %d cannot be solved because " + "memory bounds on the number of elements " + "that can be stored by each processor are exceeded\n", + n + ); + + return HMPI_ERR_PARTITION_SET; + } + + if (sumd == n) + { + for (i = 0; i < p; i++) + { + np[i] = bounds[i]; + } + + return HMPI_OK; + } + + rc = __HMPI_Number_of_elements_proportional_to_speed( + p, + n, + speeds, + np + ); + + if (rc != HMPI_OK) + { + return rc; + } + + for (i = 0; i < p; i++) + { + if (np[i] > bounds[i]) + { + np[i] = bounds[i]; + bound_exceeded = 1; + break; + } + } + + if (bound_exceeded == 1) + { + int k; + int ind = 0; + int sind = 0; + double *speedsm; + int *npm; + int *boundsm; + int nm = n - bounds[i]; + + speedsm = (double*)malloc( + sizeof(double) + * + (p-1) + ); + + if (speeds == NULL) + { + return MPC_ERR_NOMEM; + } + + npm = (int*)malloc( + sizeof(int) + * + (p-1) + ); + + if (npm == NULL) + { + return MPC_ERR_NOMEM; + } + + boundsm = (int*)malloc( + sizeof(int) + * + (p-1) + ); + + if (boundsm == NULL) + { + return MPC_ERR_NOMEM; + } + + for (j = 0; j < p; j++) + { + if (j == i) + { + continue; + } + + npm[ind] = np[j]; + boundsm[ind] = bounds[j]; + speedsm[sind++] = speeds[j]; + ind++; + } + + rc = __HMPI_Speeds_are_single_numbers_with_mlimits( + p-1, + speedsm, + boundsm, + nm, + npm + ); + + if (rc != HMPI_OK) + { + return rc; + } + + for (ind = 0, j = 0; j < p; j++) + { + if (j == i) + { + continue; + } + + np[j] = npm[ind]; + ind++; + } + + free(speedsm); + free(boundsm); + free(npm); + } + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int HMPI_Partition_set + ( + int p, + int pn, + const double *speeds, + const int *psizes, + const int *mlimits, + int n, + const int *w, + int ordering, + int processor_ordering, + int type_of_metric, + User_defined_metric umf, + double *metric, + int *np + ) + { + int i, j, rc; + + /* + * Check the parameters provided by the user + * If w is not NULL and the set is well ordered, then the + * user has to specify if the implementations can reorder + * the processors before partitioning. + */ + if ((w != NULL) + && (ordering == 1 + ) + ) + { + if ((speeds == NULL) + && (mlimits != NULL + ) + ) + { + if ((processor_ordering != 0) + && (processor_ordering != 1 + ) + ) + { + printf( + "If weights of the elements are not NULL and the set is well ordered, " + "processor reordering has to be 0 or 1\n" + ); + return HMPI_ERR_PARTITION_SET; + } + } + + if (speeds != NULL) + { + if ((processor_ordering != 0) + && (processor_ordering != 1 + ) + ) + { + printf( + "If weights of the elements are not NULL and the set is well ordered, " + "processor reordering has to be 0 or 1\n" + ); + return HMPI_ERR_PARTITION_SET; + } + } + } + + /* + * Distribution of the set amongst processors + * that are homogeneous + */ + if (speeds == NULL) + { + return __HMPI_Partition_set_homogeneous( + p, + mlimits, + n, + w, + ordering, + processor_ordering, + type_of_metric, + umf, + metric, + np + ); + } + + /* + * Heterogeneous distribution for non-ordered sets. + * + * The following criterion and restriction should + * be satisfied: + * The number of elements in each partition should be + * proportional to the speed of the processor + * owning that partition. + * + * The number of elements in each partition must + * be less than the maximum number of elements a + * processor can hold. + * + * Speeds are single numbers, Set elements has + * no weights and no bounds on the number of + * elements that can be stored by each processor + */ + if ((w == NULL) + && (mlimits == NULL) + && (ordering == 0) + && (pn == 1 + ) + ) + { + return __HMPI_Number_of_elements_proportional_to_speed( + p, + n, + speeds, + np + ); + } + + /* + * Speeds are single numbers, set elements have no + * weights and there is a limit on the number of + * elements that can be stored by each processor + */ + if ((w == NULL) + && (mlimits != NULL) + && (ordering == 0) + && (pn == 1 + ) + ) + { + return __HMPI_Speeds_are_single_numbers_with_mlimits( + p, + speeds, + mlimits, + n, + np + ); + } + + /* + * Speeds are functions of problem size, set elements + * have no weights and no bounds on the number of + * elements that can be stored by each processor + */ + if ((w == NULL) + && (mlimits == NULL) + && (ordering == 0) + && (pn > 1 + ) + ) + { + double *speeds_opt = (double*)malloc( + sizeof(double) + * + p + ); + + if (speeds_opt == NULL) + { + return MPC_ERR_NOMEM; + } + + rc = __HMPI_Speed_function_of_problem_size( + p, + pn, + speeds, + psizes, + n, + speeds_opt, + np + ); + + if (rc != HMPI_OK) + { + return rc; + } + + free(speeds_opt); + + return HMPI_OK; + } + + /* + * Speeds are functions of problem size, no weights and there + * is a limit on the number of elements that can + * be stored by each processor + */ + if ((w == NULL) + && (mlimits != NULL) + && (ordering == 0) + && (pn > 1 + ) + ) + { + double *speeds_opt = (double*)malloc( + sizeof(double) + * + p + ); + + if (speeds_opt == NULL) + { + return MPC_ERR_NOMEM; + } + + rc = __HMPI_Speed_function_of_problem_size_with_mlimits( + p, + pn, + speeds, + psizes, + mlimits, + n, + speeds_opt, + np + ); + + if (rc != HMPI_OK) + { + return rc; + } + + free(speeds_opt); + + return HMPI_OK; + } + + /* + * The following criterion and restriction should + * be satisfied: + * The sum of weights of the elements in each + * partition should be proportional to the speeds + * of the processor owning that partition. + * + * The number of elements in each partition must + * be less than the maximum number of elements a + * processor can hold. + * + * Speeds are single numbers, set has weighted weights + * and no bounds on the number of elements + * This is a NP-hard problem. + * A naive implementation is provided here. + * This is of complexity O(n*n) + */ + if ((w != NULL) + && (mlimits == NULL) + && (ordering == 0) + && (pn == 1 + ) + ) + { + double *rearranged_speeds; + int *rearranged_weights; + int *rearrangedp; + int *rearrangedw; + double temp; + int temp_number; + int *allocations; + + rearranged_speeds = (double*)malloc( + sizeof(double) + * + p + ); + + if (rearranged_speeds == NULL) + { + return MPC_ERR_NOMEM; + } + + rearrangedp = (int*)malloc( + sizeof(int) + * + p + ); + + if (rearrangedp == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < p; i++) + { + rearrangedp[i] = i; + rearranged_speeds[i] = speeds[i]; + } + + for (i = 0; i < p; i++) + { + for (j = 1; j < p; j++) + { + if (rearranged_speeds[j-1] < rearranged_speeds[j]) + { + temp = rearranged_speeds[j-1]; + rearranged_speeds[j-1] = rearranged_speeds[j]; + rearranged_speeds[j] = temp; + + temp_number = rearrangedp[j-1]; + rearrangedp[j-1] = rearrangedp[j]; + rearrangedp[j] = temp_number; + } + } + } + + /* + * We rearrange the element weights + * in descending order. + */ + { + rearranged_weights = (int*)malloc( + sizeof(int) + * + n + ); + + if (rearranged_weights == NULL) + { + return MPC_ERR_NOMEM; + } + + rearrangedw = (int*)malloc( + sizeof(int) + * + n + ); + + if (rearrangedw == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < n; i++) + { + rearrangedw[i] = i; + rearranged_weights[i] = w[i]; + } + + for (i = 0; i < n; i++) + { + for (j = 1; j < n; j++) + { + if (rearranged_weights[j-1] < rearranged_weights[j]) + { + temp_number = rearranged_weights[j-1]; + rearranged_weights[j-1] = rearranged_weights[j]; + rearranged_weights[j] = temp_number; + + temp_number = rearrangedw[j-1]; + rearrangedw[j-1] = rearrangedw[j]; + rearrangedw[j] = temp_number; + } + } + } + } + + allocations = (int*)malloc( + sizeof(int) + * + n + ); + + if (allocations == NULL) + { + return MPC_ERR_NOMEM; + } + + rc = __HMPI_Sum_of_weights_for_nonordered_set( + p, + n, + rearranged_speeds, + rearranged_weights, + type_of_metric, + umf, + metric, + allocations + ); + + if (rc != HMPI_OK) + { + return rc; + } + + for (i = 0; i < n; i++) + { + np[rearrangedw[i]] = rearrangedp[allocations[i]]; + } + + free(rearranged_speeds); + free(rearrangedp); + free(rearrangedw); + free(rearranged_weights); + free(allocations); + + return HMPI_OK; + } + + /* + * Speeds are single numbers, set has weighted elements + * and there is a limit on the number of elements that can + * be stored by each processor. + * This is a NP-hard problem. + * A naive implementation is provided here. + */ + if ((w != NULL) + && (mlimits != NULL) + && (ordering == 0) + && (pn == 1 + ) + ) + { + double *rearranged_speeds; + int *rearranged_weights; + int *rearrangedp; + int *rearrangedw; + int *rearranged_mlimits; + double temp; + int temp_number; + int *allocations; + int temp_mlimit; + + rearranged_mlimits = (int*)malloc( + sizeof(int) + * + p + ); + + if (rearranged_mlimits == NULL) + { + return MPC_ERR_NOMEM; + } + + rearranged_speeds = (double*)malloc( + sizeof(double) + * + p + ); + + if (rearranged_speeds == NULL) + { + return MPC_ERR_NOMEM; + } + + rearrangedp = (int*)malloc( + sizeof(int) + * + p + ); + + if (rearrangedp == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < p; i++) + { + rearrangedp[i] = i; + rearranged_speeds[i] = speeds[i]; + rearranged_mlimits[i] = mlimits[i]; + } + + for (i = 0; i < p; i++) + { + for (j = 1; j < p; j++) + { + if (rearranged_speeds[j-1] < rearranged_speeds[j]) + { + temp = rearranged_speeds[j-1]; + rearranged_speeds[j-1] = rearranged_speeds[j]; + rearranged_speeds[j] = temp; + + temp_number = rearrangedp[j-1]; + rearrangedp[j-1] = rearrangedp[j]; + rearrangedp[j] = temp_number; + + temp_mlimit = rearranged_mlimits[j-1]; + rearranged_mlimits[j-1] = rearranged_mlimits[j]; + rearranged_mlimits[j] = temp_mlimit; + } + } + } + + /* + * We rearrange the element weights + * in descending order. + */ + { + rearranged_weights = (int*)malloc( + sizeof(int) + * + n + ); + + if (rearranged_weights == NULL) + { + return MPC_ERR_NOMEM; + } + + rearrangedw = (int*)malloc( + sizeof(int) + * + n + ); + + if (rearrangedw == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < n; i++) + { + rearrangedw[i] = i; + rearranged_weights[i] = w[i]; + } + + for (i = 0; i < n; i++) + { + for (j = 1; j < n; j++) + { + if (rearranged_weights[j-1] < rearranged_weights[j]) + { + temp = rearranged_weights[j-1]; + rearranged_weights[j-1] = rearranged_weights[j]; + rearranged_weights[j] = temp; + + temp_number = rearrangedw[j-1]; + rearrangedw[j-1] = rearrangedw[j]; + rearrangedw[j] = temp_number; + } + } + } + } + + allocations = (int*)malloc( + sizeof(int) + * + n + ); + + if (allocations == NULL) + { + return MPC_ERR_NOMEM; + } + + rc = __HMPI_Apply_mlimits_to_unordered_sum_of_weights ( + p, + n, + rearranged_speeds, + rearranged_mlimits, + rearranged_weights, + type_of_metric, + umf, + metric, + allocations + ); + + if (rc != HMPI_OK) + { + return rc; + } + + for (i = 0; i < n; i++) + { + np[rearrangedw[i]] = rearrangedp[allocations[i]]; + } + + free(rearranged_speeds); + free(rearranged_mlimits); + free(rearranged_weights); + free(rearrangedp); + free(rearrangedw); + free(allocations); + + return HMPI_OK; + } + + /* + * Speeds are functions of problem size + * set with weighted elements and + * no bounds on the number of elements + * No known results + */ + if ((w != NULL) + && (mlimits == NULL) + && (ordering == 0) + && (pn > 1 + ) + ) + { + /* + * Naive implementation + */ + return __HMPI_Sum_of_weights_for_nonordered_set_speed_functions( + p, + pn, + speeds, + psizes, + n, + w, + type_of_metric, + umf, + metric, + np + ); + } + + /* + * Speeds are functions of problem size, + * set with weighted elements and there + * is a limit on the number of elements that can + * be stored by each processor. + * No known results. + */ + if ((w != NULL) + && (mlimits != NULL) + && (ordering == 0) + && (pn > 1 + ) + ) + { + /* + * Naive implementation + */ + return __HMPI_Sum_of_weights_for_nonordered_set_speed_functions_with_mlimits( + p, + pn, + speeds, + psizes, + mlimits, + n, + w, + type_of_metric, + umf, + metric, + np + ); + } + + /* + * Heterogeneous distribution for ordered sets. + * + * The following criterion and restriction should + * be satisfied: + * The number of elements in each partition should be + * proportional to the speed of the processor + * owning that partition. + * + * The number of elements in each partition must + * be less than the maximum number of elements a + * processor can hold. + * + * Speeds are single numbers, no weights and no + * bounds on the number of elements + */ + if ((w == NULL) + && (mlimits == NULL) + && (ordering == 1) + && (pn == 1 + ) + ) + { + int *allocations; + + allocations = (int*)malloc( + sizeof(int) + * + p + ); + + if (allocations == NULL) + { + return MPC_ERR_NOMEM; + } + + rc = __HMPI_Number_of_elements_proportional_to_speed( + p, + n, + speeds, + allocations + ); + + if (rc != HMPI_OK) + { + return rc; + } + + np[0] = 0; + for (i = 1; i <= p; i++) + { + np[i] = np[i-1] + allocations[i-1]; + } + + free(allocations); + + return HMPI_OK; + } + + /* + * Speeds are single numbers, no weights and there + * is a limit on the number of elements that can + * be stored by each processor + */ + if ((w == NULL) + && (mlimits != NULL) + && (ordering == 1) + && (pn == 1 + ) + ) + { + int *allocations; + + allocations = (int*)malloc( + sizeof(int) + * + p + ); + + if (allocations == NULL) + { + return MPC_ERR_NOMEM; + } + + rc = __HMPI_Speeds_are_single_numbers_with_mlimits( + p, + speeds, + mlimits, + n, + allocations + ); + + if (rc != HMPI_OK) + { + return rc; + } + + np[0] = 0; + for (i = 1; i <= p; i++) + { + np[i] = np[i-1] + allocations[i-1]; + } + + free(allocations); + + return HMPI_OK; + } + + /* + * Speeds are functions of problem size + * no weights and no bounds on the number of elements + */ + if ((w == NULL) + && (mlimits == NULL) + && (ordering == 1) + && (pn > 1 + ) + ) + { + double *speeds_opt; + int *allocations; + + allocations = (int*)malloc( + sizeof(int) + * + p + ); + + if (allocations == NULL) + { + return MPC_ERR_NOMEM; + } + + speeds_opt = (double*)malloc( + sizeof(double) + * + p + ); + + if (speeds_opt == NULL) + { + return MPC_ERR_NOMEM; + } + + rc = __HMPI_Speed_function_of_problem_size( + p, + pn, + speeds, + psizes, + n, + speeds_opt, + allocations + ); + + if (rc != HMPI_OK) + { + return rc; + } + + np[0] = 0; + for (i = 1; i <= p; i++) + { + np[i] = np[i-1] + allocations[i-1]; + } + + free(speeds_opt); + free(allocations); + + return HMPI_OK; + } + + /* + * Speeds are functions of problem size, no weights and there + * is a limit on the number of elements that can + * be stored by each processor + */ + if ((w == NULL) + && (mlimits != NULL) + && (ordering == 1) + && (pn > 1 + ) + ) + { + int *allocations; + double *speeds_opt; + + allocations = (int*)malloc( + sizeof(int) + * + p + ); + + if (allocations == NULL) + { + return MPC_ERR_NOMEM; + } + + speeds_opt = (double*)malloc( + sizeof(double) + * + p + ); + + if (speeds_opt == NULL) + { + return MPC_ERR_NOMEM; + } + + rc = __HMPI_Speed_function_of_problem_size_with_mlimits( + p, + pn, + speeds, + psizes, + mlimits, + n, + speeds_opt, + allocations + ); + + if (rc != HMPI_OK) + { + return rc; + } + + np[0] = 0; + for (i = 1; i <= p; i++) + { + np[i] = np[i-1] + allocations[i-1]; + } + + free(speeds_opt); + free(allocations); + + return HMPI_OK; + } + + /* + * The following criterion and restriction should + * be satisfied: + * The sum of weights of the elements in each + * partition should be proportional to the speeda + * of the processor owning that partition. + * + * The number of elements in each partition must + * be less than the maximum number of elements a + * processor can hold. + * + * Speeds are single numbers, set has weighted weights + * and no bounds on the number of elements + * Processors cannot be reordered. + * No known results. + */ + if ((w != NULL) + && (mlimits == NULL) + && (ordering == 1) + && (pn == 1) + && (processor_ordering == 0 + ) + ) + { + int *allocations; + + allocations = (int*)malloc( + sizeof(int) + * + p + ); + + if (allocations == NULL) + { + return MPC_ERR_NOMEM; + } + + rc = __HMPI_Sum_of_weights_for_ordered_set( + p, + n, + speeds, + w, + type_of_metric, + umf, + metric, + allocations + ); + + if (rc != HMPI_OK) + { + return rc; + } + + np[0] = 0; + for (i = 1; i <= p; i++) + { + np[i] = np[i-1] + allocations[i-1]; + } + + free(allocations); + + return HMPI_OK; + } + + /* + * Processors can be reordered. + * We rearrange the processors in decreasing order + * of speeds + * No known results. + * A naive implementation is provided here. + */ + if ((w != NULL) + && (mlimits == NULL) + && (ordering == 1) + && (pn == 1) + && (processor_ordering == 1 + ) + ) + { + double *rearranged_speeds; + int *rearrangedp; + double temp; + int temp_number; + int *allocations; + int ind; + + rearranged_speeds = (double*)malloc( + sizeof(double) + * + p + ); + + if (rearranged_speeds == NULL) + { + return MPC_ERR_NOMEM; + } + + rearrangedp = (int*)malloc( + sizeof(int) + * + p + ); + + if (rearrangedp == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < p; i++) + { + rearrangedp[i] = i; + rearranged_speeds[i] = speeds[i]; + } + + for (i = 0; i < p; i++) + { + for (j = 1; j < p; j++) + { + if (rearranged_speeds[j-1] < rearranged_speeds[j]) + { + temp = rearranged_speeds[j-1]; + rearranged_speeds[j-1] = rearranged_speeds[j]; + rearranged_speeds[j] = temp; + + temp_number = rearrangedp[j-1]; + rearrangedp[j-1] = rearrangedp[j]; + rearrangedp[j] = temp_number; + } + } + } + + allocations = (int*)malloc( + sizeof(int) + * + p + ); + + if (allocations == NULL) + { + return MPC_ERR_NOMEM; + } + + rc = __HMPI_Sum_of_weights_for_ordered_set( + p, + n, + rearranged_speeds, + w, + type_of_metric, + umf, + metric, + allocations + ); + + if (rc != HMPI_OK) + { + return rc; + } + + for (ind = 0, i = 0; i < p; i++) + { + np[ind++] = rearrangedp[i]; + np[ind++] = allocations[i]; + } + + free(rearranged_speeds); + free(rearrangedp); + free(allocations); + + return HMPI_OK; + } + + /* + * Speeds are single numbers, set has weighted elements and there + * is a limit on the number of elements that can + * be stored by each processor + * Processors cannot be reordered. + * No known results. + * A naive implementation is provided here + */ + if ((w != NULL) + && (mlimits != NULL) + && (ordering == 1) + && (pn == 1) + && (processor_ordering == 0 + ) + ) + { + int *allocations; + allocations = (int*)malloc( + sizeof(int) + * + p + ); + + if (allocations == NULL) + { + return MPC_ERR_NOMEM; + } + + rc = __HMPI_Apply_mlimits_to_ordered_sum_of_weights( + p, + n, + speeds, + mlimits, + w, + -1, + NULL, + NULL, + allocations + ); + + if (rc != HMPI_OK) + { + return rc; + } + + /* + * Metric is not calculated before. + * Do it now. + */ + if (metric != NULL) + { + int i, ind, j, sumw; + int *Size_of_bin, *Current_bin_capacity; + + Size_of_bin = (int*)malloc( + sizeof(int) + * + p + ); + + if (Size_of_bin == NULL) + { + return MPC_ERR_NOMEM; + } + + rc = __HMPI_Size_of_bins( + p, + n, + speeds, + w, + Size_of_bin, + &sumw + ); + + if (rc != HMPI_OK) + { + return rc; + } + + Current_bin_capacity = (int*)malloc( + sizeof(int) + * + p + ); + + if (Current_bin_capacity == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < p; i++) + { + Current_bin_capacity[i] = 0; + } + + for (i = 0, ind = 0; i < p; i++) + { + for (j = 0; j < allocations[i]; j++) + { + Current_bin_capacity[i] += w[ind++]; + } + } + + /* + * The ideal sum of weights is given by + * elements of array Size_of_bin and the + * actual sum of weights is calculated for array elements + * of Current_bin_capacity. + */ + switch (type_of_metric) + { + case USER_SPECIFIED: + { + *metric = (*umf)( + p, + speeds, + Current_bin_capacity, + Size_of_bin + ); + } + break; + case SYSTEM_DEFINED: + { + *metric = __HMPI_System_defined_metric( + p, + speeds, + Current_bin_capacity, + Size_of_bin + ); + } + break; + default: + { + return HMPI_ERR_METRIC; + } + break; + } + + free(Size_of_bin); + free(Current_bin_capacity); + } + + np[0] = 0; + for (i = 1; i <= p; i++) + { + np[i] = np[i-1] + allocations[i-1]; + } + + free(allocations); + + return HMPI_OK; + } + + /* + * Processors can be reordered. + * No known results. + * A naive implementation is provided here. + */ + if ((w != NULL) + && (mlimits != NULL) + && (ordering == 1) + && (pn == 1) + && (processor_ordering == 1 + ) + ) + { + double *rearranged_speeds; + int *rearrangedp; + int *rearranged_mlimits; + double temp; + int temp_number, temp_mlimit; + int *allocations; + int ind; + + rearranged_mlimits = (int*)malloc( + sizeof(int) + * + p + ); + + if (rearranged_mlimits == NULL) + { + return MPC_ERR_NOMEM; + } + + rearranged_speeds = (double*)malloc( + sizeof(double) + * + p + ); + + if (rearranged_speeds == NULL) + { + return MPC_ERR_NOMEM; + } + + rearrangedp = (int*)malloc( + sizeof(int) + * + p + ); + + if (rearrangedp == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < p; i++) + { + rearrangedp[i] = i; + rearranged_speeds[i] = speeds[i]; + rearranged_mlimits[i] = mlimits[i]; + } + + for (i = 0; i < p; i++) + { + for (j = 1; j < p; j++) + { + if (rearranged_speeds[j-1] < rearranged_speeds[j]) + { + temp = rearranged_speeds[j-1]; + rearranged_speeds[j-1] = rearranged_speeds[j]; + rearranged_speeds[j] = temp; + + temp_number = rearrangedp[j-1]; + rearrangedp[j-1] = rearrangedp[j]; + rearrangedp[j] = temp_number; + + temp_mlimit = rearranged_mlimits[j-1]; + rearranged_mlimits[j-1] = rearranged_mlimits[j]; + rearranged_mlimits[j] = temp_mlimit; + } + } + } + + allocations = (int*)malloc( + sizeof(int) + * + p + ); + + if (allocations == NULL) + { + return MPC_ERR_NOMEM; + } + + rc = __HMPI_Apply_mlimits_to_ordered_sum_of_weights ( + p, + n, + rearranged_speeds, + rearranged_mlimits, + w, + -1, + NULL, + NULL, + allocations + ); + + if (rc != HMPI_OK) + { + return rc; + } + + /* + * Metric is not calculated before. + * Do it now. + */ + if (metric != NULL) + { + int i, ind, j, sumw; + int *Size_of_bin, *Current_bin_capacity; + + Size_of_bin = (int*)malloc( + sizeof(int) + * + p + ); + + if (Size_of_bin == NULL) + { + return MPC_ERR_NOMEM; + } + + rc = __HMPI_Size_of_bins( + p, + n, + rearranged_speeds, + w, + Size_of_bin, + &sumw + ); + + if (rc != HMPI_OK) + { + return rc; + } + + Current_bin_capacity = (int*)malloc( + sizeof(int) + * + p + ); + + if (Current_bin_capacity == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < p; i++) + { + Current_bin_capacity[i] = 0; + } + + for (i = 0, ind = 0; i < p; i++) + { + for (j = 0; j < allocations[i]; j++) + { + Current_bin_capacity[i] += w[ind++]; + } + } + + /* + * The ideal sum of weights is given by + * elements of array Size_of_bin and the + * actual sum of weights is calculated for array elements + * of Current_bin_capacity. + */ + switch (type_of_metric) + { + case USER_SPECIFIED: + { + *metric = (*umf)( + p, + rearranged_speeds, + Current_bin_capacity, + Size_of_bin + ); + } + break; + case SYSTEM_DEFINED: + { + *metric = __HMPI_System_defined_metric( + p, + rearranged_speeds, + Current_bin_capacity, + Size_of_bin + ); + } + break; + default: + { + return HMPI_ERR_METRIC; + } + break; + } + + free(Size_of_bin); + free(Current_bin_capacity); + } + + for (ind = 0, i = 0; i < p; i++) + { + np[ind++] = rearrangedp[i]; + np[ind++] = allocations[i]; + } + + free(rearranged_speeds); + free(rearranged_mlimits); + free(rearrangedp); + free(allocations); + + return HMPI_OK; + } + + /* + * Speeds are functions of problem size + * set with weighted elements and + * no bounds on the number of elements + * Processors cannot be reordered. + * No known results. + */ + if ((w != NULL) + && (mlimits == NULL) + && (ordering == 1) + && (pn > 1) + && (processor_ordering == 0 + ) + ) + { + int *allocations; + allocations = (int*)malloc( + sizeof(int) + * + p + ); + + if (allocations == NULL) + { + return MPC_ERR_NOMEM; + } + + /* + * Naive implementation + */ + rc = __HMPI_Sum_of_weights_for_ordered_set_speed_functions( + p, + pn, + speeds, + psizes, + n, + w, + type_of_metric, + umf, + metric, + allocations + ); + + if (rc != HMPI_OK) + { + return rc; + } + + np[0] = 0; + + for (i = 1; i <= p; i++) + { + np[i] = np[i-1] + allocations[i-1]; + } + + free(allocations); + + return HMPI_OK; + } + + /* + * Processors can be reordered. + * No known results. + */ + if ((w != NULL) + && (mlimits == NULL) + && (ordering == 1) + && (pn > 1) + && (processor_ordering == 1 + ) + ) + { + /* + * Naive implementation + */ + return __HMPI_Sum_of_weights_for_ordered_set_speed_functions_processor_reordering( + p, + pn, + speeds, + psizes, + n, + w, + type_of_metric, + umf, + metric, + np + ); + } + + /* + * Speeds are functions of problem size, + * set with weighted elements and there + * is a limit on the number of elements that can + * be stored by each processor + * Processors cannot be reordered. + * No known results. + */ + if ((w != NULL) + && (mlimits != NULL) + && (ordering == 1) + && (pn > 1) + && (processor_ordering == 0 + ) + ) + { + int *allocations; + + allocations = (int*)malloc( + sizeof(int) + * + p + ); + + if (allocations == NULL) + { + return MPC_ERR_NOMEM; + } + + /* + * Naive implementation + */ + rc = __HMPI_Sum_of_weights_for_ordered_set_speed_functions_with_mlimits( + p, + pn, + speeds, + psizes, + mlimits, + n, + w, + type_of_metric, + umf, + metric, + allocations + ); + + if (rc != HMPI_OK) + { + return rc; + } + + /* + * Metric is not calculated before. + * Do it now. + */ + if (metric != NULL) + { + int i, ind, j, sumw; + int *Size_of_bin, *Current_bin_capacity; + + Size_of_bin = (int*)malloc( + sizeof(int) + * + p + ); + + if (Size_of_bin == NULL) + { + return MPC_ERR_NOMEM; + } + + rc = __HMPI_Size_of_bins( + p, + n, + speeds, + w, + Size_of_bin, + &sumw + ); + + if (rc != HMPI_OK) + { + return rc; + } + + Current_bin_capacity = (int*)malloc( + sizeof(int) + * + p + ); + + if (Current_bin_capacity == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < p; i++) + { + Current_bin_capacity[i] = 0; + } + + for (i = 0, ind = 0; i < p; i++) + { + for (j = 0; j < allocations[i]; j++) + { + Current_bin_capacity[i] += w[ind++]; + } + } + + /* + * The ideal sum of weights is given by + * elements of array Size_of_bin and the + * actual sum of weights is calculated for array elements + * of Current_bin_capacity. + */ + switch (type_of_metric) + { + case USER_SPECIFIED: + { + *metric = (*umf)( + p, + speeds, + Current_bin_capacity, + Size_of_bin + ); + } + break; + case SYSTEM_DEFINED: + { + *metric = __HMPI_System_defined_metric( + p, + speeds, + Current_bin_capacity, + Size_of_bin + ); + } + break; + default: + { + return HMPI_ERR_METRIC; + } + break; + } + + free(Size_of_bin); + free(Current_bin_capacity); + } + + np[0] = 0; + + for (i = 1; i <= p; i++) + { + np[i] = np[i-1] + allocations[i-1]; + } + + free(allocations); + + return HMPI_OK; + } + + /* + * Processors can be reordered. + * No known results. + */ + if ((w != NULL) + && (mlimits != NULL) + && (ordering == 1) + && (pn > 1) + && (processor_ordering == 1 + ) + ) + { + /* + * Naive implementation + */ + return __HMPI_Sum_of_weights_for_ordered_set_speed_functions_processor_reordering_with_mlimits( + p, + pn, + speeds, + psizes, + mlimits, + n, + w, + type_of_metric, + umf, + metric, + np + ); + } + + printf("Parameters provided are invalid\n"); + return HMPI_ERR_PARTITION_SET; + } + + /*-----------------------------------------------------*/ + + int HMPI_Get_set_processor( + int pos, + int n, + int p, + int processor_ordering, + const int *np + ) + { + int i, j; + + if (processor_ordering == 1) + { + int *cumnp = (int*)malloc( + sizeof(int) + * + (p+1) + ); + + if (cumnp == NULL) + { + printf("Can't allocate cumnp in Function HMPI_Get_set_processor\n"); + return MPC_ERR_NOMEM; + } + + cumnp[0] = 0; + + for (i = 1; i <= p; i++) + { + cumnp[i] = np[2*i - 1] + cumnp[i - 1]; + } + + for (i = 0; i < p; i++) + { + if ((pos >= cumnp[i]) + && (pos < cumnp[i+1] + ) + ) + { + free(cumnp); + return np[i]; + } + } + + free(cumnp); + return -1; + } + + { + int *cumnp = (int*)malloc( + sizeof(int) + * + (p+1) + ); + + if (cumnp == NULL) + { + printf("Can't allocate cumnp in Function HMPI_Get_set_processor\n"); + return MPC_ERR_NOMEM; + } + + for (i = 0; i <= p; i++) + { + cumnp[i] = 0; + for (j = 0; j < i; j++) + { + cumnp[i] += np[j]; + } + } + + for (i = 0; i < p; i++) + { + if ((pos >= cumnp[i]) + && (pos < cumnp[i+1] + ) + ) + { + free(cumnp); + return i; + } + } + + free(cumnp); + } + + return -1; + } + + /*-----------------------------------------------------*/ + + int HMPI_Partition_unordered_set ( + int p, + int pn, + const double *speeds, + const int *psizes, + const int *mlimits, + int n, + const int *w, + int type_of_metric, + User_defined_metric umf, + double *metric, + int *np + ) + { + return HMPI_Partition_set( + p, + pn, + speeds, + psizes, + mlimits, + n, + w, + 0, + -1, + type_of_metric, + umf, + metric, + np + ); + } + + /*-----------------------------------------------------*/ + + int HMPI_Partition_ordered_set ( + int p, + int pn, + const double *speeds, + const int *psizes, + const int *mlimits, + int n, + const int *w, + int processor_reordering, + int type_of_metric, + User_defined_metric umf, + double *metric, + int *np + ) + { + return HMPI_Partition_set( + p, + pn, + speeds, + psizes, + mlimits, + n, + w, + 1, + processor_reordering, + type_of_metric, + umf, + metric, + np + ); + } + + /*-----------------------------------------------------*/ + + int + HMPI_Get_my_partition + ( + int i, + int p, + const int *speeds, + int n + ) + { + int ind; + int rc, myd; + double *perf; + int *d = (int*)malloc( + sizeof(int) + * + p + ); + + if (d == NULL) + { + printf("Can't allocate cumnp in Function HMPI_Get_my_partition\n"); + return MPC_ERR_NOMEM; + } + + perf = (double*)malloc( + sizeof(double) + * + p + ); + + if (perf == NULL) + { + printf("Can't allocate perf in Function HMPI_Get_my_partition\n"); + return MPC_ERR_NOMEM; + } + + for (ind = 0; ind < p; ind++) + { + perf[ind] = speeds[ind]; + } + + rc = HMPI_Partition_set( + p, + 1, + perf, + NULL, + NULL, + n, + NULL, + 0, + 0, + -1, + NULL, + NULL, + d + ); + + if (rc != HMPI_OK) + { + printf("Problems partitioning\n"); + return -1; + } + + myd = d[i]; + + free(d); + free(perf); + + return myd; + } + + /*-----------------------------------------------------*/ diff --git a/hdpi/hmpi_partitioning_sets.h b/hdpi/hmpi_partitioning_sets.h new file mode 100644 index 0000000..e847a40 --- /dev/null +++ b/hdpi/hmpi_partitioning_sets.h @@ -0,0 +1,96 @@ + +/************************************************************************* +* * +* Heterogeneous Data Partitioning Interface * +* ========================================= * +* * +* Copyright (c) 2002 Department of Computer Science, * +* University College Dublin. * +* * +* All rights reserved. We assume no responsibility for the use * +* or reliability of our software. * +* * +*************************************************************************/ + + /************************************************/ + /* Partitioning interfaces for sets */ + /* */ + /* Revision history */ + /* 19-05-2003 -- Initial version */ + /************************************************/ + + #ifndef __HMPI_PARTITIONING_SETS_HH + #define __HMPI_PARTITIONING_SETS_HH + + #define USER_SPECIFIED 1 + #define SYSTEM_DEFINED 2 + + typedef double (*User_defined_metric)( + int p, + const double *speeds, + const int *actual, + const int *ideal + ); + + int HMPI_Partition_unordered_set ( + int p, + int pn, + const double *speeds, + const int *psizes, + const int *mlimits, + int n, + const int *w, + int type_of_metric, + User_defined_metric umf, + double *metric, + int *np + ); + + int HMPI_Partition_ordered_set ( + int p, + int pn, + const double *speeds, + const int *psizes, + const int *mlimits, + int n, + const int *w, + int processor_reordering, + int type_of_metric, + User_defined_metric umf, + double *metric, + int *np + ); + + int HMPI_Partition_set( + int p, + int pn, + const double *speeds, + const int *psizes, + const int *mlimits, + int n, + const int *w, + int ordering, + int processor_ordering, + int type_of_metric, + User_defined_metric umf, + double *metric, + int *np + ); + + int HMPI_Get_set_processor( + int i, + int n, + int p, + int processor_ordering, + const int *np + ); + + int HMPI_Get_my_partition( + int i, + int p, + const int *speeds, + int n + ); + + #endif /* __HMPI_PARTITIONING_SETS_HH */ + diff --git a/hdpi/hmpi_partitioning_sets_speed_function_of_problem_size.c b/hdpi/hmpi_partitioning_sets_speed_function_of_problem_size.c new file mode 100644 index 0000000..9fdb3a4 --- /dev/null +++ b/hdpi/hmpi_partitioning_sets_speed_function_of_problem_size.c @@ -0,0 +1,1194 @@ + + /************************************************/ + /* Implementation of Partitioning Interfaces of */ + /* Sets using processor graphs with speed and */ + /* memory */ + /* */ + /* Revision history */ + /* 01-07-2003 -- Initial version */ + /************************************************/ + + #include + #include + #include + #include + #include + + #include + #include + + static int _HMPI_Bisection_count = 0; + static int HMPI_Debug_flag = 0; + + /*-----------------------------------------------------*/ + + int + __HMPI_Distribute_with_single_number_for_speed + ( + int n, + int p, + const double *s, + double *npd + ) + { + int i, left, rc, sum = 0; + int* npp = (int*)malloc( + sizeof(int) + * + p + ); + + if (npp == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < p; i++) + { + sum += npd[i]; + } + + left = n - sum; + + if (left > 0) + { + rc = __HMPI_Number_of_elements_proportional_to_speed( + p, + left, + s, + npp + ); + + if (rc != HMPI_OK) + { + return rc; + } + + for (i = 0; i < p; i++) + { + npd[i] += npp[i]; + } + } + else + { + rc = __HMPI_Number_of_elements_proportional_to_speed( + p, + sum - n, + s, + npp + ); + + if (rc != HMPI_OK) + { + return rc; + } + + for (i = 0; i < p; i++) + { + npd[i] -= npp[i]; + } + } + + free(npp); + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int __HMPI_Recursive_bisection_middle_region + ( + int p, + int pn, + const double *speeds, + const int *psizes, + int n, + double slopei, + double slopef, + double *speeds_opt, + double *npd + ) + { + int i, j, rc; + double sumd; + double slope, slopeii, slopeff; + double slope_1_plus_2; + double *s = (double*)malloc( + sizeof(double) + * + p + ); + + if (s == NULL) + { + return MPC_ERR_NOMEM; + } + + /* + * Initialize npd + */ + for (i = 0; i < p; i++) + { + npd[i] = 0.; + } + + /* + * Now use the straight line with half slope + */ + slope_1_plus_2 = (double)(slopei+slopef) + / + (double)(1 - slopei*slopef); + + if (slope_1_plus_2 > 0) + { + slope = (double)(sqrt(1+pow(slope_1_plus_2, 2)) - 1) + / + (double)slope_1_plus_2; + } + + if (slope_1_plus_2 < 0) + { + slope = (double)(sqrt(1+pow(slope_1_plus_2, 2)) + 1) + / + (double)(-slope_1_plus_2); + } + + if (HMPI_Debug_flag) + { + printf( + "HMPI===> __HMPI_Recursive_bisection_middle_region: Slope is %0.10f, %0.10f, %0.10f\n", + slopei, slopef, slope + ); + } + + for (i = 0; i < p; i++) + { + double slopep; + double interceptp; + double x; + + int intersection_point_found = 0; + + for (j = 0; j < (pn - 1); j++) + { + /* + * Ignore problem sizes of 0 for the moment + */ + if (psizes[i*pn + j] == 0) + { + npd[i] = 0; + speeds_opt[i] = DBL_MAX; + s[i] = DBL_MAX; + continue; + } + + /* + * This is a extra/bad experimental point + * + * If this is the last point, assume a constant function + * for the jump + */ + if (psizes[i*pn + j + 1] == psizes[i*pn + j]) + { + if (j == (pn - 2)) + { + x = speeds[i*pn + j] + / + slope; + + if (x >= 0) + { + npd[i] = x; + speeds_opt[i] = s[i] = speeds[i*pn + j]; + } + + break; + } + + continue; + } + + slopep = (speeds[i*pn + j + 1] - speeds[i*pn + j]) + / + (double)(psizes[i*pn + j + 1] - psizes[i*pn + j]); + + interceptp = speeds[i*pn + j] - slopep*psizes[i*pn + j]; + + x = (double)interceptp + / + (double)(slope - slopep); + + /* + * The x-coordinate of the intersection is negative + * Proceed to the next straight line in the + * functional model + */ + if (x < 0) + { + continue; + } + + /* + * The intersection point lies beyond the end points + * of the straight line, so should we proceed to the next + * iteration? + */ + if ((x < psizes[i*pn + j]) + || (x > psizes[i*pn + j + 1] + ) + ) + { + continue; + } + + intersection_point_found = 1; + npd[i] = x; + speeds_opt[i] = s[i] = x*slope; + break; + } + + if (intersection_point_found == 1) + { + continue; + } + + /* + * Start with a function with the same slope as the first line segment + */ + slopep = (speeds[i*pn + 1] - speeds[i*pn]) + / + (double)(psizes[i*pn + 1] - psizes[i*pn]); + + interceptp = speeds[i*pn] - slopep*psizes[i*pn]; + + x = (double)interceptp + / + (double)(slope - slopep); + + if ((x >= 0) + && (x <= psizes[i*pn] + ) + ) + { + npd[i] = x; + s[i] = x*slope; + speeds_opt[i] = s[i]; + continue; + } + + /* + * Assume a constant function in the beginning + * for a problem size of 0 and the first experimental + * point + */ + if (psizes[i*pn] != 0) + { + x = (double)speeds[i*pn] + / + (double)slope; + + if ((x >= 0) + && (x <= psizes[i*pn] + ) + ) + { + npd[i] = x; + speeds_opt[i] = s[i] = speeds[i*pn]; + continue; + } + } + + /* + * Try the function with the same slope as before in the end + */ + slopep = (speeds[i*pn + pn - 1] - speeds[i*pn + pn - 2]) + / + (double)(psizes[i*pn + pn - 1] - psizes[i*pn + pn - 2]); + interceptp = speeds[i*pn + pn - 2] - slopep*psizes[i*pn + pn - 2]; + x = (double)interceptp + / + (double)(slope - slopep); + + if (x >= 0) + { + npd[i] = x; + speeds_opt[i] = s[i] = x*slope; + continue; + } + + /* + * Now assume a constant function in the end + */ + x = (double)speeds[i*pn + pn - 1] + / + (double)slope; + + if (x >= 0) + { + npd[i] = x; + speeds_opt[i] = s[i] = speeds[i*pn + pn - 1]; + continue; + } + + if (x < 0) + { + printf("HMPI===> __HMPI_Recursive_bisection_middle_region: Panic, no intersection\n"); + } + } + + /* + * If the sum is equal to n, we have a + * perfect fit. + */ + sumd = 0.0; + for (i = 0; i < p; i++) + { + sumd += npd[i]; + } + + if (((floor(sumd)) == n) + || ((ceil(sumd)) == n + ) + ) + { + int sum = 0; + + for (i = 0; i < p; i++) + { + sum += floor(npd[i]); + } + + if (sum == n) + { + return HMPI_OK; + } + + for (i = 0; i < p; i++) + { + if (npd[i] <= 1) + { + continue; + } + + npd[i] = npd[i] + 1; + + sum = 0; + + for (j = 0; j < p; j++) + { + sum += floor(npd[j]); + } + + if (sum == n) + { + break; + } + } + + free(s); + + return HMPI_OK; + } + + if (HMPI_Debug_flag) + { + printf("HMPI===> __HMPI_Recursive_bisection_middle_region: Sum is %0.6f\n", sumd); + } + + /* + * After HMPI_MAX_BISECTION_STEPS steps, we have not + * arrived at a solution. It is known that for bisection, no more + * than 53 iterations are needed to obtain full single precision. + * Distribute the rest of the elements of the set + * using the speeds at the current point. + */ + if (_HMPI_Bisection_count == HMPI_MAX_BISECTION_STEPS) + { + for (i = 0; i < p; i++) + { + speeds_opt[i] = s[i]; + } + + rc = __HMPI_Distribute_with_single_number_for_speed( + n, + p, + s, + npd + ); + + if (rc != HMPI_OK) + { + return rc; + } + + free(s); + + return HMPI_OK; + } + + free(s); + + _HMPI_Bisection_count++; + + if (sumd > n) + { + slopeii = slopei; + slopeff = slope; + + return __HMPI_Recursive_bisection_middle_region( + p, + pn, + speeds, + psizes, + n, + slopeii, + slopeff, + speeds_opt, + npd + ); + } + + slopeii = slope; + slopeff = slopef; + + return __HMPI_Recursive_bisection_middle_region( + p, + pn, + speeds, + psizes, + n, + slopeii, + slopeff, + speeds_opt, + npd + ); + } + + /*-----------------------------------------------------*/ + + int __HMPI_Speed_function_of_problem_size_with_mlimits + ( + int p, + int pn, + const double *speeds, + const int *psizes, + const int *bounds, + int n, + double *speeds_opt, + int *np + ) + { + int i, j, rc; + int bound_exceeded = 0; + double sumd = 0; + + _HMPI_Bisection_count = 0; + + for (i = 0; i < p; i++) + { + sumd += bounds[i]; + } + + if (sumd < n) + { + printf( + "Problem size %d cannot be solved, memory bounds " + "on the number of elements that can be stored by " + "each processor exceeded\n", n); + return HMPI_ERR_PARTITION_SET; + } + + if (sumd == n) + { + for (i = 0; i < p; i++) + { + np[i] = bounds[i]; + } + + return HMPI_OK; + } + + rc = __HMPI_Speed_function_of_problem_size( + p, + pn, + speeds, + psizes, + n, + speeds_opt, + np + ); + + if (rc != HMPI_OK) + { + return rc; + } + + for (i = 0; i < p; i++) + { + if (np[i] > bounds[i]) + { + np[i] = bounds[i]; + + bound_exceeded = 1; + + break; + } + } + + if (bound_exceeded == 1) + { + int k; + int ind = 0; + int sind = 0; + int mind = 0; + int *psizesm; + double *speedsm; + double *speedsm_opt; + int *npm; + int *boundsm; + int nm = n - bounds[i]; + + speedsm = (double*)malloc( + sizeof(double) + * + (p-1) + * + pn + ); + + if (speedsm == NULL) + { + return MPC_ERR_NOMEM; + } + + speedsm_opt = (double*)malloc( + sizeof(double) + * + (p-1) + * + pn + ); + + if (speedsm_opt == NULL) + { + return MPC_ERR_NOMEM; + } + + psizesm = (int*)malloc( + sizeof(int) + * + (p-1) + * + pn + ); + + if (psizesm == NULL) + { + return MPC_ERR_NOMEM; + } + + npm = (int*)malloc( + sizeof(int) + * + (p-1) + ); + + if (npm == NULL) + { + return MPC_ERR_NOMEM; + } + + boundsm = (int*)malloc( + sizeof(int) + * + (p-1) + ); + + if (boundsm == NULL) + { + return MPC_ERR_NOMEM; + } + + for (j = 0; j < p; j++) + { + if (j == i) + { + continue; + } + + npm[ind] = np[j]; + boundsm[ind] = bounds[j]; + ind++; + + for (k = 0; k < pn; k++) + { + speedsm[sind++] = speeds[j*pn + k]; + psizesm[mind++] = psizes[j*pn + k]; + } + } + + rc = __HMPI_Speed_function_of_problem_size_with_mlimits( + p-1, + pn, + speedsm, + psizesm, + boundsm, + nm, + speedsm_opt, + npm + ); + + if (rc != HMPI_OK) + { + return rc; + } + + for (ind = 0, j = 0; j < p; j++) + { + if (j == i) + { + continue; + } + + np[j] = npm[ind]; + speeds_opt[j] = speedsm_opt[ind]; + ind++; + } + + free(speedsm); + free(speedsm_opt); + free(psizesm); + free(boundsm); + free(npm); + } + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int __HMPI_Speed_function_of_problem_size + ( + int p, + int pn, + const double *speeds, + const int *psizes, + int n, + double *speeds_opt, + int *np + ) + { + int istart = p; + int negative_x_intersection = 0; + int rc, ind, i, j; + double temp, sumd; + double slopei, slopef; + double *npd = (double*)malloc( + sizeof(double) + * + p + ); + + if (npd == NULL) + { + return MPC_ERR_NOMEM; + } + + _HMPI_Bisection_count = 0; + + /* + * Initialize npd + */ + for (i = 0; i < p; i++) + { + npd[i] = 0.; + } + + /* + * The functions may start from the problem size of 0 + */ + for (i = 0; i < p; i++) + { + if (psizes[i*pn] != 0) + { + istart = i; + break; + } + } + + if (istart != p) + { + temp = (double)speeds[istart*pn] + / + (double)psizes[istart*pn]; + ind = istart; + for (i = istart+1; i < p; i++) + { + double temps; + + /* + * Ignore the point where the problem size is 0 + */ + if (psizes[i*pn] == 0) + { + continue; + } + + temps = (double)speeds[i*pn] + / + (double)psizes[i*pn]; + + if (temp < temps) + { + temp = temps; + ind = i; + } + } + } + else + { + ind = 0; + } + + /* + * Solve the equations + * y = (maximum slope)*x and + * y0 = c0, y1 = c1, ... for points + * x0, x1, x2, ...,xp-1 + */ + npd[ind] = psizes[ind*pn]; + speeds_opt[ind] = speeds[ind*pn]; + for (i = 0; i < p; i++) + { + double xj; + + if (i == ind) + { + continue; + } + + /* + * Ignore problem sizes of 0 for the moment + */ + if (psizes[i*pn] == 0) + { + npd[i] = 0; + speeds_opt[i] = speeds[i*pn]; + break; + } + + xj = ( + (double)psizes[ind*pn] + / + (double)speeds[ind*pn] + ) + * + speeds[i*pn]; + + npd[i] = xj; + speeds_opt[i] = speeds[i*pn]; + } + + /* + * If the sum is equal to n, we have a + * perfect fit. + */ + sumd = 0.0; + for (i = 0; i < p; i++) + { + sumd += npd[i]; + } + + if (((floor(sumd)) == n) + || ((ceil(sumd)) == n + ) + ) + { + int sum = 0; + + for (i = 0; i < p; i++) + { + sum += floor(npd[i]); + } + + if (sum == n) + { + for (i = 0; i < p; i++) + { + np[i] = floor(npd[i]); + } + + return HMPI_OK; + } + + for (i = 0; i < p; i++) + { + if (npd[i] <= 1) + { + continue; + } + + npd[i] = npd[i] + 1; + + sum = 0; + + for (j = 0; j < p; j++) + { + sum += floor(npd[j]); + } + + if (sum == n) + { + break; + } + } + + for (i = 0; i < p; i++) + { + np[i] = floor(npd[i]); + } + + free(npd); + + return HMPI_OK; + } + + /* + * The intersection points lie before the starting + * experimental point. So use the single number speeds + * This is the only thing we can do at this point. + */ + if (sumd > n) + { + double *s = (double*)malloc( + sizeof(double) + * + p + ); + + if (s == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < p; i++) + { + s[i] = speeds[i*pn]; + } + + rc = __HMPI_Number_of_elements_proportional_to_speed( + p, + n, + s, + np + ); + + if (rc != HMPI_OK) + { + return rc; + } + + free(npd); + free(s); + + return HMPI_OK; + } + + if (istart != p) + { + slopei = (double)speeds[ind*pn] + / + (double)psizes[ind*pn]; + } + else + { + slopei = DBL_MAX; + } + + /* + * Start with a straight line passing + * with minimum slope at last point + * + * Actually this is ridiculous. The + * last point can't have the problem size 0. + * But we take care of this situation as best as we can. + */ + istart = p; + for (i = 0; i < p; i++) + { + if (psizes[i*pn + pn - 1] != 0) + { + istart = i; + break; + } + } + + if (istart != p) + { + temp = (double)speeds[istart*pn + pn - 1] + / + (double)psizes[istart*pn + pn - 1]; + ind = istart; + for (i = istart+1; i < p; i++) + { + double temps = (double)speeds[i*pn + pn - 1] + / + (double)psizes[i*pn + pn - 1]; + if (temp > temps) + { + temp = temps; + ind = i; + } + } + } + else + { + ind = 0; + } + + /* + * Solve the equations for the last point + * y = (minimum slope)*x and for the processors + * y0 = b0*x+c0, y1 = b1*x1+c1, ... for points + * x0, x1, x2, ...,xp-1 + * + * Initialize npd + */ + for (i = 0; i < p; i++) + { + npd[i] = 0.; + } + + npd[ind] = psizes[ind*pn + pn - 1]; + speeds_opt[ind] = speeds[ind*pn + pn - 1]; + for (i = 0; i < p; i++) + { + if (i == ind) + { + continue; + } + + { + double slopep, slopeo; + double interceptp; + double x, y; + + /* + * Ignore problem sizes of 0 for the moment + */ + if (psizes[i*pn + pn - 2] == 0) + { + npd[i] = 0; + speeds_opt[i] = speeds[i*pn + pn - 2]; + continue; + } + + /* + * This is an extra or bad experimental point + */ + if (psizes[i*pn + pn - 1] == psizes[i*pn + pn - 2]) + { + /* + * Replace this jump by constant function + */ + slopeo = speeds_opt[ind] + / + (double)npd[ind]; + + x = speeds[i*pn + pn - 2] + / + slopeo; + + if (x >= 0) + { + npd[i] = x; + speeds_opt[i] = speeds[i*pn + pn - 2]; + } + + if (x < 0) + { + break; + } + + continue; + } + + slopep = (speeds[i*pn + pn - 1] - speeds[i*pn + pn - 2]) + / + (double)(psizes[i*pn + pn - 1] - psizes[i*pn + pn - 2]); + + interceptp = speeds[i*pn + pn - 2] - slopep*psizes[i*pn + pn - 2]; + + slopeo = speeds_opt[ind] + / + (double)npd[ind]; + + x = (double)interceptp + / + (double)(slopeo - slopep); + + /* + * The x-coordinate of the intersection is negative + */ + if (x < 0) + { + negative_x_intersection = 1; + break; + } + + npd[i] = x; + speeds_opt[i] = x*slopeo; + } + } + + if (negative_x_intersection == 0) + { + /* + * If the sum is equal to n, we have a + * perfect fit. + */ + sumd = 0.0; + for (i = 0; i < p; i++) + { + sumd += npd[i]; + } + + if (((floor(sumd)) == n) + || ((ceil(sumd)) == n + ) + ) + { + int sum = 0; + + for (i = 0; i < p; i++) + { + sum += floor(npd[i]); + } + + if (sum == n) + { + for (i = 0; i < p; i++) + { + np[i] = floor(npd[i]); + } + + return HMPI_OK; + } + + for (i = 0; i < p; i++) + { + if (npd[i] <= 1) + { + continue; + } + + npd[i] = npd[i] + 1; + + sum = 0; + + for (j = 0; j < p; j++) + { + sum += floor(npd[j]); + } + + if (sum == n) + { + break; + } + } + + for (i = 0; i < p; i++) + { + np[i] = floor(npd[i]); + } + + free(npd); + + return HMPI_OK; + } + + /* + * The functions constructed are inadequate. More + * experimental points are required. + * Should we alert the user? + */ + if (sumd < n) + { + double *s = (double*)malloc( + sizeof(double) + * + p + ); + + if (s == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < p; i++) + { + s[i] = ( + (double)speeds[ind*pn + pn - 1] + / + (double)npd[ind] + ) + * + npd[i]; + speeds_opt[i] = s[i]; + } + + rc = __HMPI_Number_of_elements_proportional_to_speed( + p, + n, + s, + np + ); + + if (rc != HMPI_OK) + { + return rc; + } + + free(s); + free(npd); + + return HMPI_OK; + } + } + + slopef = (double)speeds[ind*pn + pn - 1] + / + (double)psizes[ind*pn + pn - 1]; + + /* + * Use recursive Bisection to get a perfect fit + */ + rc = __HMPI_Recursive_bisection_middle_region( + p, + pn, + speeds, + psizes, + n, + slopei, + slopef, + speeds_opt, + npd + ); + + if (rc != HMPI_OK) + { + return rc; + } + + for (i = 0; i < p; i++) + { + np[i] = floor(npd[i]); + } + + free(npd); + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + diff --git a/hdpi/hmpi_partitioning_sets_speed_function_of_problem_size_weighted_elements.c b/hdpi/hmpi_partitioning_sets_speed_function_of_problem_size_weighted_elements.c new file mode 100644 index 0000000..17ace6e --- /dev/null +++ b/hdpi/hmpi_partitioning_sets_speed_function_of_problem_size_weighted_elements.c @@ -0,0 +1,2103 @@ + + + /************************************************/ + /* Implementation of Partitioning Interfaces of */ + /* Sets using processor graphs with speed and */ + /* memory. The elements of the set have weights.*/ + /* */ + /* Revision history */ + /* 01-07-2003 -- Initial version */ + /************************************************/ + + #include + #include + #include + #include + + #include + #include + + static int HMPI_Debug_flag = 0; + + /*-----------------------------------------------------*/ + + int __HMPI_Sum_of_weights_for_nonordered_set_speed_functions + ( + int p, + int pn, + const double *speeds, + const int *psizes, + int n, + const int *w, + int type_of_metric, + User_defined_metric umf, + double *metric, + int *np + ) + { + int *rearranged_weights; + int *rearrangedw; + int *allocations; + int sumw = 0; + int i, j, rc; + int *Size_of_bin, *Current_bin_capacity; + double *speeds_opt; + int temp, temp_number; + + for (i = 0; i < n; i++) + { + sumw += w[i]; + } + + Size_of_bin = (int*)malloc( + sizeof(int) + * + p + ); + + if (Size_of_bin == NULL) + { + return MPC_ERR_NOMEM; + } + + speeds_opt = (double*)malloc( + sizeof(double) + * + p + ); + + if (speeds_opt == NULL) + { + return MPC_ERR_NOMEM; + } + + /* + * Assume the application programmer represents + * speeds as function of problem size and + * problem size is measured in terms of the weights + * of the elements. + */ + rc = __HMPI_Speed_function_of_problem_size( + p, + pn, + speeds, + psizes, + sumw, + speeds_opt, + Size_of_bin + ); + + if (rc != HMPI_OK) + { + return rc; + } + + /* + * We rearrange the element weights + * in descending order. + */ + { + rearranged_weights = (int*)malloc( + sizeof(int) + * + n + ); + + if (rearranged_weights == NULL) + { + return MPC_ERR_NOMEM; + } + + rearrangedw = (int*)malloc( + sizeof(int) + * + n + ); + + if (rearrangedw == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < n; i++) + { + rearrangedw[i] = i; + rearranged_weights[i] = w[i]; + } + + for (i = 0; i < n; i++) + { + for (j = 1; j < n; j++) + { + if (rearranged_weights[j-1] < rearranged_weights[j]) + { + temp = rearranged_weights[j-1]; + rearranged_weights[j-1] = rearranged_weights[j]; + rearranged_weights[j] = temp; + + temp_number = rearrangedw[j-1]; + rearrangedw[j-1] = rearrangedw[j]; + rearrangedw[j] = temp_number; + } + } + } + } + + allocations = (int*)malloc( + sizeof(int) + * + n + ); + + if (allocations == NULL) + { + return MPC_ERR_NOMEM; + } + + Current_bin_capacity = (int*)malloc( + sizeof(int) + * + p + ); + + if (Current_bin_capacity == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < p; i++) + { + Current_bin_capacity[i] = 0; + } + + for (i = 0; i < n; i++) + { + int waste = INT_MAX; + int chosen = -1; + + for (j = 0; j < p; j++) + { + if ((Current_bin_capacity[j] + w[i]) <= Size_of_bin[j]) + { + int wastej = ( + Size_of_bin[j] + - + ( + Current_bin_capacity[j] + + + w[i] + ) + ); + + if (wastej < waste) + { + chosen = j; + waste = wastej; + } + } + } + + if (chosen == -1) + { + waste = INT_MAX; + + for (j = 0; j < p; j++) + { + int wastej = fabs( + Size_of_bin[j] + - + ( + Current_bin_capacity[j] + + + w[i] + ) + ); + + if (wastej < waste) + { + chosen = j; + waste = wastej; + } + } + } + + allocations[i] = chosen; + Current_bin_capacity[chosen] += w[i]; + } + + if (metric == NULL) + { + for (i = 0; i < n; i++) + { + np[rearrangedw[i]] = allocations[i]; + } + + free(Size_of_bin); + free(Current_bin_capacity); + free(rearrangedw); + free(rearranged_weights); + free(allocations); + free(speeds_opt); + + return HMPI_OK; + } + + /* + * The ideal sum of weights is given by + * elements of array Size_of_bin and the + * actual sum of weights is calculated for array elements + * of Current_bin_capacity. + */ + switch (type_of_metric) + { + case USER_SPECIFIED: + { + *metric = (*umf)( + p, + speeds_opt, + Current_bin_capacity, + Size_of_bin + ); + } + break; + case SYSTEM_DEFINED: + { + *metric = __HMPI_System_defined_metric( + p, + speeds_opt, + Current_bin_capacity, + Size_of_bin + ); + } + break; + default: + { + return HMPI_ERR_METRIC; + } + break; + } + + for (i = 0; i < n; i++) + { + np[rearrangedw[i]] = allocations[i]; + } + + { + free(Size_of_bin); + free(Current_bin_capacity); + free(rearrangedw); + free(rearranged_weights); + free(allocations); + free(speeds_opt); + } + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int __HMPI_Sum_of_weights_for_nonordered_set_speed_functions_with_mlimits + ( + int p, + int pn, + const double *speeds, + const int *psizes, + const int *mlimits, + int n, + const int *w, + int type_of_metric, + User_defined_metric umf, + double *metric, + int *np + ) + { + int i, j, k, rc; + int *rearranged_weights; + int *rearrangedw; + int temp; + int temp_number, temp_mlimit; + int *allocations; + int *Size_of_bin, *Current_bin_capacity; + int total_limits = 0; + int *Open, *Number_in_bin; + int sumw = 0; + double *speeds_opt; + int *shortlist; + + for (i = 0; i < p; i++) + { + total_limits += mlimits[i]; + } + + if (total_limits < n) + { + printf( + "The number of elements in the set" + " is greater than the sum of numbers of elements" + " the processors can hold or" + " Partitioning cannot be done with the restrictions" + " provided\n" + ); + + return HMPI_ERR_MLIMITS; + } + + for (i = 0; i < n; i++) + { + sumw += w[i]; + } + + Size_of_bin = (int*)malloc( + sizeof(int) + * + p + ); + + if (Size_of_bin == NULL) + { + return MPC_ERR_NOMEM; + } + + speeds_opt = (double*)malloc( + sizeof(double) + * + p + ); + + if (speeds_opt == NULL) + { + return MPC_ERR_NOMEM; + } + + /* + * Assume the application programmer represents + * speeds as function of problem size and + * problem size is measured in terms of the weights + * of the elements. + */ + rc = __HMPI_Speed_function_of_problem_size( + p, + pn, + speeds, + psizes, + sumw, + speeds_opt, + Size_of_bin + ); + + if (rc != HMPI_OK) + { + return rc; + } + + /* + * We rearrange the element weights + * in descending order. + */ + { + rearranged_weights = (int*)malloc( + sizeof(int) + * + n + ); + + if (rearranged_weights == NULL) + { + return MPC_ERR_NOMEM; + } + + rearrangedw = (int*)malloc( + sizeof(int) + * + n + ); + + if (rearrangedw == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < n; i++) + { + rearrangedw[i] = i; + rearranged_weights[i] = w[i]; + } + + for (i = 0; i < n; i++) + { + for (j = 1; j < n; j++) + { + if (rearranged_weights[j-1] < rearranged_weights[j]) + { + temp = rearranged_weights[j-1]; + rearranged_weights[j-1] = rearranged_weights[j]; + rearranged_weights[j] = temp; + + temp_number = rearrangedw[j-1]; + rearrangedw[j-1] = rearrangedw[j]; + rearrangedw[j] = temp_number; + } + } + } + } + + allocations = (int*)malloc( + sizeof(int) + * + n + ); + + if (allocations == NULL) + { + return MPC_ERR_NOMEM; + } + + Current_bin_capacity = (int*)malloc( + sizeof(int) + * + p + ); + + if (Current_bin_capacity == NULL) + { + return MPC_ERR_NOMEM; + } + + if (total_limits == n) + { + int ind = 0; + int *rearranged_mlimits, *rearrangedp; + double *rearranged_speeds_opt; + + rearrangedp = (int*)malloc( + sizeof(int) + * + p + ); + + if (rearrangedp == NULL) + { + return MPC_ERR_NOMEM; + } + + rearranged_mlimits = (int*)malloc( + sizeof(int) + * + p + ); + + if (rearranged_mlimits == NULL) + { + return MPC_ERR_NOMEM; + } + + rearranged_speeds_opt = (double*)malloc( + sizeof(double) + * + p + ); + + if (rearranged_speeds_opt == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < p; i++) + { + rearrangedp[i] = i; + rearranged_speeds_opt[i] = speeds_opt[i]; + rearranged_mlimits[i] = mlimits[i]; + + Current_bin_capacity[i] = 0; + } + + for (i = 0; i < p; i++) + { + for (j = 1; j < p; j++) + { + if (rearranged_mlimits[j-1] > rearranged_mlimits[j]) + { + temp = rearranged_speeds_opt[j-1]; + rearranged_speeds_opt[j-1] = rearranged_speeds_opt[j]; + rearranged_speeds_opt[j] = temp; + + temp_number = rearrangedp[j-1]; + rearrangedp[j-1] = rearrangedp[j]; + rearrangedp[j] = temp_number; + + temp_mlimit = rearranged_mlimits[j-1]; + rearranged_mlimits[j-1] = rearranged_mlimits[j]; + rearranged_mlimits[j] = temp_mlimit; + } + } + } + + /* + * This looks like a NP-hard problem. + * We know the number of elements in each subset + * given by the upper bound. + * We provide a naive implementation here. + * This is of complexity O(n*n). + * We arrange the processors in increasing + * order of their upper bounds and we arrange + * the weights in decreasing order. + */ + for (i = 0; i < p; i++) + { + for (j = 0; j < rearranged_mlimits[i]; j++) + { + allocations[ind] = rearrangedp[i]; + Current_bin_capacity[rearrangedp[i]] += w[ind]; + ind++; + } + } + + for (i = 0; i < n; i++) + { + np[rearrangedw[i]] = allocations[i]; + } + + if (metric == NULL) + { + free(Size_of_bin); + free(Current_bin_capacity); + free(rearranged_weights); + free(rearranged_mlimits); + free(rearrangedw); + free(rearrangedp); + free(allocations); + free(speeds_opt); + free(rearranged_speeds_opt); + + return HMPI_OK; + } + + /* + * The ideal sum of weights is given by + * elements of array Size_of_bin and the + * actual sum of weights is calculated for array elements + * of Current_bin_capacity. + */ + switch (type_of_metric) + { + case USER_SPECIFIED: + { + *metric = (*umf)( + p, + speeds_opt, + Current_bin_capacity, + Size_of_bin + ); + } + break; + case SYSTEM_DEFINED: + { + *metric = __HMPI_System_defined_metric( + p, + speeds_opt, + Current_bin_capacity, + Size_of_bin + ); + } + break; + default: + { + return HMPI_ERR_METRIC; + } + break; + } + + free(Size_of_bin); + free(Current_bin_capacity); + free(rearranged_weights); + free(rearranged_mlimits); + free(rearrangedw); + free(rearrangedp); + free(allocations); + free(speeds_opt); + free(rearranged_speeds_opt); + + return HMPI_OK; + } + + Open = (int*)malloc( + sizeof(int) + * + p + ); + + if (Open == NULL) + { + return MPC_ERR_NOMEM; + } + + Number_in_bin = (int*)malloc( + sizeof(int) + * + p + ); + + if (Number_in_bin == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < p; i++) + { + Current_bin_capacity[i] = 0; + Open[i] = 1; + Number_in_bin[i] = 0; + } + + shortlist = (int*)malloc( + sizeof(int) + * + p + ); + + if (shortlist == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < n; i++) + { + int nslist = 0; + int chosen = -1; + + for (j = 0; j < p; j++) + { + if (((Current_bin_capacity[j] + w[i]) <= Size_of_bin[j]) + && (Open[j] == 1 + ) + ) + { + shortlist[nslist++] = j; + } + } + + if (nslist > 0) + { + int temp = Size_of_bin[shortlist[0]] + - + Current_bin_capacity[shortlist[0]] + ; + chosen = shortlist[0]; + + for (k = 1; k < nslist; k++) + { + int tempk = Size_of_bin[shortlist[k]] + - + Current_bin_capacity[shortlist[k]] + ; + + if ((tempk >= temp) + && (Open[shortlist[k]] == 1 + ) + ) + { + temp = tempk; + chosen = shortlist[k]; + } + } + } + else + { + int waste = INT_MAX; + for (j = 0; j < p; j++) + { + if (Open[j] == 1) + { + int wastej = ( + Current_bin_capacity[j] + + + w[i] + - + Size_of_bin[j] + ); + + if (wastej < waste) + { + chosen = j; + waste = wastej; + } + } + } + } + + if ((Number_in_bin[chosen] + 1) == mlimits[chosen]) + { + allocations[i] = chosen; + Number_in_bin[chosen]++; + Current_bin_capacity[chosen] = Current_bin_capacity[chosen] + + + w[i] + ; + Open[chosen] = 0; + + continue; + } + + if ((Number_in_bin[chosen] + 1) > mlimits[chosen]) + { + printf("HMPI===> __HMPI_Sum_of_weights_for_nonordered_set_speed_functions_with_mlimits:" + " error in code, must not come into this part\n" + ); + + return HMPI_ERR_INTERNAL; + } + + allocations[i] = chosen; + Number_in_bin[chosen]++; + Current_bin_capacity[chosen] = Current_bin_capacity[chosen] + + + w[i] + ; + } + + free(shortlist); + free(Open); + free(Number_in_bin); + + if (metric == NULL) + { + for (i = 0; i < n; i++) + { + np[rearrangedw[i]] = allocations[i]; + } + + free(Size_of_bin); + free(Current_bin_capacity); + free(rearranged_weights); + free(rearrangedw); + free(allocations); + free(speeds_opt); + + return HMPI_OK; + } + + /* + * The ideal sum of weights is given by + * elements of array Size_of_bin and the + * actual sum of weights is calculated for array elements + * of Current_bin_capacity. + */ + if (HMPI_Debug_flag) + { + printf("Speeds opt are: \n"); + + for (i = 0; i < p; i++) + { + printf("%0.1f ", speeds_opt[i]); + } + + printf("\n"); + + printf("Current bin capacities are: \n"); + + for (i = 0; i < p; i++) + { + printf("%d ", Current_bin_capacity[i]); + } + + printf("\n"); + + printf("Sizes of bin are: \n"); + + for (i = 0; i < p; i++) + { + printf("%d ", Size_of_bin[i]); + } + + printf("\n"); + } + + switch (type_of_metric) + { + case USER_SPECIFIED: + { + *metric = (*umf)( + p, + speeds_opt, + Current_bin_capacity, + Size_of_bin + ); + } + break; + case SYSTEM_DEFINED: + { + *metric = __HMPI_System_defined_metric( + p, + speeds_opt, + Current_bin_capacity, + Size_of_bin + ); + } + break; + default: + { + return HMPI_ERR_METRIC; + } + break; + } + + for (i = 0; i < n; i++) + { + np[rearrangedw[i]] = allocations[i]; + } + + free(Size_of_bin); + free(Current_bin_capacity); + free(rearranged_weights); + free(rearrangedw); + free(allocations); + free(speeds_opt); + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int __HMPI_Sum_of_weights_for_ordered_set_speed_functions + ( + int p, + int pn, + const double *speeds, + const int *psizes, + int n, + const int *w, + int type_of_metric, + User_defined_metric umf, + double *metric, + int *np + ) + { + int sumw = 0; + int sumcum = 0; + int i, j, rc, prev_proc; + int *wallocationsc; + int *Size_of_bin, *Current_bin_capacity; + double *speeds_opt; + + for (i = 0; i < n; i++) + { + sumw += w[i]; + } + + Size_of_bin = (int*)malloc( + sizeof(int) + * + p + ); + + if (Size_of_bin == NULL) + { + return MPC_ERR_NOMEM; + } + + speeds_opt = (double*)malloc( + sizeof(double) + * + p + ); + + if (speeds_opt == NULL) + { + return MPC_ERR_NOMEM; + } + + /* + * Assume the application programmer represents + * speeds as function of problem size and + * problem size is measured in terms of the weights + * of the elements. + */ + rc = __HMPI_Speed_function_of_problem_size( + p, + pn, + speeds, + psizes, + sumw, + speeds_opt, + Size_of_bin + ); + + if (rc != HMPI_OK) + { + return rc; + } + + wallocationsc = (int*)malloc( + sizeof(int) + * + (p+1) + ); + + if (wallocationsc == NULL) + { + return MPC_ERR_NOMEM; + } + + wallocationsc[0] = 0; + for (i = 1; i <= p; i++) + { + wallocationsc[i] = wallocationsc[i-1] + Size_of_bin[i-1]; + } + + Current_bin_capacity = (int*)malloc( + sizeof(int) + * + p + ); + + if (Current_bin_capacity == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < p; i++) + { + Current_bin_capacity[i] = 0; + np[i] = 0; + } + + for (i = 0; i < n; i++) + { + prev_proc = 0; + sumcum += w[i]; + + for (j = 0; j < p; j++) + { + int Wastej_1, Wastej; + + if ((sumcum > wallocationsc[j]) + && (sumcum <= wallocationsc[j+1] + ) + ) + { + if (prev_proc == j) + { + np[j]++; + Current_bin_capacity[j] += w[i]; + break; + } + + /* + * The elements preceding the current one + * exactly fit into partition (j-1) + */ + if ((sumcum - w[i]) == wallocationsc[j]) + { + prev_proc = j; + np[j]++; + Current_bin_capacity[j] += w[i]; + break; + } + + /* + * This is a border element. + * The waste is calculated if this element + * goes to j-1 or to j. + */ + Wastej_1 = fabs( + Size_of_bin[j-1] + - + ( + Current_bin_capacity[j-1] + + + w[i] + ) + ); + + Wastej = fabs( ( + sumw - wallocationsc[j] + ) + - + ( + sumw - sumcum + w[i] + ) + ); + + if (Wastej_1 <= Wastej) + { + np[j-1]++; + Current_bin_capacity[j-1] += w[i]; + } + else + { + np[j]++; + Current_bin_capacity[j] += w[i]; + } + } + } + } + + if (metric == NULL) + { + free(wallocationsc); + free(Size_of_bin); + free(Current_bin_capacity); + free(speeds_opt); + + return HMPI_OK; + } + + /* + * The ideal sum of weights is given by + * elements of array Size_of_bin and the + * actual sum of weights is calculated for array elements + * of Current_bin_capacity. + */ + switch (type_of_metric) + { + case USER_SPECIFIED: + { + *metric = (*umf)( + p, + speeds_opt, + Current_bin_capacity, + Size_of_bin + ); + } + break; + case SYSTEM_DEFINED: + { + *metric = __HMPI_System_defined_metric( + p, + speeds_opt, + Current_bin_capacity, + Size_of_bin + ); + } + break; + default: + { + return HMPI_ERR_METRIC; + } + break; + } + + free(wallocationsc); + free(Size_of_bin); + free(Current_bin_capacity); + free(speeds_opt); + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int __HMPI_Sum_of_weights_for_ordered_set_speed_functions_with_mlimits + ( + int p, + int pn, + const double *speeds, + const int *psizes, + const int *mlimits, + int n, + const int *w, + int type_of_metric, + User_defined_metric umf, + double *metric, + int *np + ) + { + int i, j, k, rc, indl; + int total_limits = 0; + int mlimits_apply = 0; + int x, y, l, m, opt_start; + int wastei, sumtmp, wastef; + int sumw; + int *Size_of_bin; + int total_sub_mlimits; + double *speeds_opt; + + for (i = 0; i < p; i++) + { + total_limits += mlimits[i]; + } + + if (total_limits == n) + { + for (i = 0; i < p; i++) + { + np[i] = mlimits[i]; + } + + return HMPI_OK; + } + + if (total_limits < n) + { + printf( + "The number of elements in the set" + " is greater than the sum of numbers of elements" + " the processors can hold or" + " Partitioning cannot be done with the restrictions" + " provided\n" + ); + + return HMPI_ERR_MLIMITS; + } + + for (i = 0; i < n; i++) + { + sumw += w[i]; + } + + Size_of_bin = (int*)malloc( + sizeof(int) + * + p + ); + + if (Size_of_bin == NULL) + { + return MPC_ERR_NOMEM; + } + + speeds_opt = (double*)malloc( + sizeof(double) + * + p + ); + + if (speeds_opt == NULL) + { + return MPC_ERR_NOMEM; + } + + /* + * Assume the application programmer represents + * speeds as function of problem size and + * problem size is measured in terms of the weights + * of the elements. + */ + rc = __HMPI_Speed_function_of_problem_size( + p, + pn, + speeds, + psizes, + sumw, + speeds_opt, + Size_of_bin + ); + + if (rc != HMPI_OK) + { + return rc; + } + + /* + * Assume the application programmer represents + * speeds as function of problem size and + * problem size is measured in terms of the weights + * of the elements. + */ + rc = __HMPI_Sum_of_weights_for_ordered_set_speed_functions( + p, + pn, + speeds, + psizes, + n, + w, + type_of_metric, + umf, + metric, + np + ); + + if (rc != HMPI_OK) + { + return rc; + } + + for (i = 0; i < p; i++) + { + if (np[i] > mlimits[i]) + { + mlimits_apply = 1; + break; + } + } + + if (mlimits_apply == 0) + { + return HMPI_OK; + } + + for (i = 0; i < p; i++) + { + if (np[i] <= mlimits[i]) + { + continue; + } + + /* + * We try to distribute the remaining + * elements to the processors following it + */ + if (i == 0) + { + int reduced_set_size; + np[i] = mlimits[i]; + reduced_set_size = n - np[i]; + + if (HMPI_Debug_flag) + { + printf("HMPI===> __HMPI_Sum_of_weights_for_ordered_set_speed_functions_with_mlimits: mlimits = %d, Reduced set size = %d\n", mlimits[i], reduced_set_size); + } + + free(Size_of_bin); + free(speeds_opt); + + return __HMPI_Sum_of_weights_for_ordered_set_speed_functions_with_mlimits( + p-1, + pn, + (speeds + pn), + (psizes + pn), + (mlimits + 1), + reduced_set_size, + (w + np[i]), + type_of_metric, + umf, + metric, + np + 1 + ); + } + + /* + * If this is the last processor, + * we try to distribute the remaining + * elements to the processors preceding it + */ + if (i == (p - 1)) + { + int reduced_set_size = 0; + + for (j = 0; j < i; j++) + { + reduced_set_size += np[j]; + } + + reduced_set_size += (np[i] - mlimits[i]); + np[i] = mlimits[i]; + + free(Size_of_bin); + free(speeds_opt); + + return __HMPI_Sum_of_weights_for_ordered_set_speed_functions_with_mlimits( + p-1, + pn, + speeds, + psizes, + mlimits, + reduced_set_size, + w, + type_of_metric, + umf, + metric, + np + ); + } + + if (HMPI_Debug_flag) + { + printf( + "HMPI===> " + "__HMPI_Sum_of_weights_for_ordered_set_speed_functions_with_mlimits: " + "Processor %d has upper bound exceeded\n", + i + ); + + printf("HMPI===> Allocations are: \n"); + for (k = 0; k < p; k++) + { + printf("%d ", np[k]); + } + printf("\n"); + + printf("HMPI===> element limits are:\n"); + for (k = 0; k < p; k++) + { + printf("%d ", mlimits[k]); + } + printf("\n"); + } + + for (k = i+1, total_sub_mlimits = 0; k < p; k++) + { + total_sub_mlimits += mlimits[k]; + } + + /* + * Find the maximum subsequence of elements, the number + * of elements being equal to mlimits[i] and packing these + * element into bin i generates least amount of waste + */ + l = 0; + for (k = 0; k < i; k++) + { + l += np[k]; + } + + indl = l; + + do + { + wastei = INT_MAX; + + for (x = indl; x < (indl+np[i]); x++) + { + if (((indl+np[i]) - x) < mlimits[i]) + { + break; + } + + sumtmp = 0; + + for (y = 0; y < mlimits[i]; y++) + { + sumtmp += w[x+y]; + } + + wastef = fabs(sumtmp - Size_of_bin[i]); + + if (HMPI_Debug_flag) + { + printf("x=%d, wastef=%d, Size of bin=%d ", x, wastef, Size_of_bin[i]); + } + + if (wastef < wastei) + { + wastei = wastef; + opt_start = x; + } + } + + if (HMPI_Debug_flag) + { + printf("\n"); + } + + l = opt_start; + m = l + mlimits[i]; + + indl++; + } + while ((n-m) > total_sub_mlimits); + + np[i] = mlimits[i]; + + if (HMPI_Debug_flag) + { + printf( + "HMPI===> " + "__HMPI_Sum_of_weights_for_ordered_set_speed_functions_with_mlimits: " + "Total number of elements=%d," + "Number of elements to be redistributed before=%d," + " elements after the element %d to be redistributed\n", + n, + l, + m + ); + } + + /* + * spread the elements {0, 1, ..., l-1} + * amongst the processors before i + */ + rc = __HMPI_Sum_of_weights_for_ordered_set_speed_functions_with_mlimits( + i, + pn, + speeds, + psizes, + mlimits, + l, + w, + type_of_metric, + umf, + metric, + np + ); + + if (rc != HMPI_OK) + { + return rc; + } + + /* + * spread the elements {m+1, m+2, ..., n-1} + * amongst the processors following i + */ + rc = __HMPI_Sum_of_weights_for_ordered_set_speed_functions_with_mlimits( + p-(i+1), + pn, + (speeds+(i+1)*pn), + (psizes+(i+1)*pn), + (mlimits+i+1), + (n-l-mlimits[i]), + (w+l+mlimits[i]), + type_of_metric, + umf, + metric, + (np+i+1) + ); + + if (rc != HMPI_OK) + { + return rc; + } + + free(Size_of_bin); + free(speeds_opt); + + break; + } + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int __HMPI_Sum_of_weights_for_ordered_set_speed_functions_processor_reordering + ( + int p, + int pn, + const double *speeds, + const int *psizes, + int n, + const int *w, + int type_of_metric, + User_defined_metric umf, + double *metric, + int *np + ) + { + int sumw = 0; + int sumcum = 0; + int ind, i, j, rc, prev_proc; + int *wallocationsc, *allocations; + int *Size_of_bin, *Current_bin_capacity; + double *speeds_opt, *rearranged_speeds_opt; + int *rearranged_size_of_bin; + int *rearrangedp; + int temp, temp_number; + + for (i = 0; i < n; i++) + { + sumw += w[i]; + } + + Size_of_bin = (int*)malloc( + sizeof(int) + * + p + ); + + if (Size_of_bin == NULL) + { + return MPC_ERR_NOMEM; + } + + speeds_opt = (double*)malloc( + sizeof(double) + * + p + ); + + if (speeds_opt == NULL) + { + return MPC_ERR_NOMEM; + } + + rearranged_speeds_opt = (double*)malloc( + sizeof(double) + * + p + ); + + if (rearranged_speeds_opt == NULL) + { + return MPC_ERR_NOMEM; + } + + /* + * Assume the application programmer represents + * speeds as function of problem size and + * problem size is measured in terms of the weights + * of the elements. + */ + rc = __HMPI_Speed_function_of_problem_size( + p, + pn, + speeds, + psizes, + sumw, + speeds_opt, + Size_of_bin + ); + + if (rc != HMPI_OK) + { + return rc; + } + + rearranged_size_of_bin = (int*)malloc( + sizeof(int) + * + p + ); + + if (rearranged_size_of_bin == NULL) + { + return MPC_ERR_NOMEM; + } + + rearrangedp = (int*)malloc( + sizeof(int) + * + p + ); + + if (rearrangedp == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < p; i++) + { + rearrangedp[i] = i; + rearranged_size_of_bin[i] = Size_of_bin[i]; + rearranged_speeds_opt[i] = speeds_opt[i]; + } + + for (i = 0; i < p; i++) + { + for (j = 1; j < p; j++) + { + if (rearranged_size_of_bin[j-1] < rearranged_size_of_bin[j]) + { + temp = rearranged_size_of_bin[j-1]; + rearranged_size_of_bin[j-1] = rearranged_size_of_bin[j]; + rearranged_size_of_bin[j] = temp; + + temp = rearranged_speeds_opt[j-1]; + rearranged_speeds_opt[j-1] = rearranged_speeds_opt[j]; + rearranged_speeds_opt[j] = temp; + + temp_number = rearrangedp[j-1]; + rearrangedp[j-1] = rearrangedp[j]; + rearrangedp[j] = temp_number; + } + } + } + + wallocationsc = (int*)malloc( + sizeof(int) + * + (p+1) + ); + + if (wallocationsc == NULL) + { + return MPC_ERR_NOMEM; + } + + wallocationsc[0] = 0; + for (i = 1; i <= p; i++) + { + wallocationsc[i] = wallocationsc[i-1] + rearranged_size_of_bin[i-1]; + } + + Current_bin_capacity = (int*)malloc( + sizeof(int) + * + p + ); + + if (Current_bin_capacity == NULL) + { + return MPC_ERR_NOMEM; + } + + allocations = (int*)malloc( + sizeof(int) + * + p + ); + + if (allocations == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < p; i++) + { + Current_bin_capacity[i] = 0; + allocations[i] = 0; + } + + for (i = 0; i < n; i++) + { + prev_proc = 0; + sumcum += w[i]; + + for (j = 0; j < p; j++) + { + int Wastej_1, Wastej; + + if ((sumcum > wallocationsc[j]) + && (sumcum <= wallocationsc[j+1] + ) + ) + { + if (prev_proc == j) + { + allocations[j]++; + Current_bin_capacity[j] += w[i]; + break; + } + + /* + * The elements preceding the current one + * exactly fit into partition (j-1) + */ + if ((sumcum - w[i]) == wallocationsc[j]) + { + prev_proc = j; + allocations[j]++; + Current_bin_capacity[j] += w[i]; + break; + } + + /* + * This is a border element. + * The waste is calculated if this element + * goes to j-1 or to j. + */ + Wastej_1 = fabs( + rearranged_size_of_bin[j-1] + - + ( + Current_bin_capacity[j-1] + + + w[i] + ) + ); + + Wastej = fabs( ( + sumw - wallocationsc[j] + ) + - + ( + sumw - sumcum + w[i] + ) + ); + + if (Wastej_1 <= Wastej) + { + allocations[j-1]++; + Current_bin_capacity[j-1] += w[i]; + } + else + { + allocations[j]++; + Current_bin_capacity[j] += w[i]; + } + } + } + } + + if (metric == NULL) + { + for (ind = 0, i = 0; i < p; i++) + { + np[ind++] = rearrangedp[i]; + np[ind++] = allocations[i]; + } + + free(wallocationsc); + free(Size_of_bin); + free(rearranged_size_of_bin); + free(rearrangedp); + free(Current_bin_capacity); + free(speeds_opt); + free(rearranged_speeds_opt); + free(allocations); + + return HMPI_OK; + } + + /* + * The ideal sum of weights is given by + * elements of array Size_of_bin and the + * actual sum of weights is calculated for array elements + * of Current_bin_capacity. + */ + switch (type_of_metric) + { + case USER_SPECIFIED: + { + *metric = (*umf)( + p, + rearranged_speeds_opt, + Current_bin_capacity, + rearranged_size_of_bin + ); + } + break; + case SYSTEM_DEFINED: + { + *metric = __HMPI_System_defined_metric( + p, + rearranged_speeds_opt, + Current_bin_capacity, + rearranged_size_of_bin + ); + } + break; + default: + { + return HMPI_ERR_METRIC; + } + break; + } + + for (ind = 0, i = 0; i < p; i++) + { + np[ind++] = rearrangedp[i]; + np[ind++] = allocations[i]; + } + + free(wallocationsc); + free(Size_of_bin); + free(rearranged_size_of_bin); + free(rearrangedp); + free(Current_bin_capacity); + free(speeds_opt); + free(rearranged_speeds_opt); + free(allocations); + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int __HMPI_Sum_of_weights_for_ordered_set_speed_functions_processor_reordering_with_mlimits + ( + int p, + int pn, + const double *speeds, + const int *psizes, + const int *mlimits, + int n, + const int *w, + int type_of_metric, + User_defined_metric umf, + double *metric, + int *np + ) + { + int sumw = 0; + int *allocations; + int *Size_of_bin, *rearranged_size_of_bin; + double *speeds_opt, *rearranged_speeds_opt; + int *rearrangedp; + int *rearranged_mlimits; + double temp; + int temp_number; + int i, j, rc, ind; + int total_limits = 0; + int mlimits_apply = 0; + + for (i = 0; i < p; i++) + { + total_limits += mlimits[i]; + } + + if (total_limits == n) + { + for (i = 0, ind = 0; i < p; i++) + { + np[ind++] = i; + np[ind++] = mlimits[i]; + } + + return HMPI_OK; + } + + if (total_limits < n) + { + printf( + "The number of elements in the set" + " is greater than the sum of numbers of elements" + " the processors can hold or" + " Partitioning cannot be done with the restrictions" + " provided\n" + ); + + return HMPI_ERR_MLIMITS; + } + + for (i = 0; i < n; i++) + { + sumw += w[i]; + } + + Size_of_bin = (int*)malloc( + sizeof(int) + * + p + ); + + if (Size_of_bin == NULL) + { + return MPC_ERR_NOMEM; + } + + speeds_opt = (double*)malloc( + sizeof(double) + * + p + ); + + if (speeds_opt == NULL) + { + return MPC_ERR_NOMEM; + } + + /* + * Assume the application programmer represents + * speeds as function of problem size and + * problem size is measured in terms of the weights + * of the elements. + */ + rc = __HMPI_Speed_function_of_problem_size( + p, + pn, + speeds, + psizes, + sumw, + speeds_opt, + Size_of_bin + ); + + if (rc != HMPI_OK) + { + return rc; + } + + rearranged_speeds_opt = (double*)malloc( + sizeof(double) + * + p + ); + + if (rearranged_speeds_opt == NULL) + { + return MPC_ERR_NOMEM; + } + + rearranged_size_of_bin = (int*)malloc( + sizeof(int) + * + p + ); + + if (rearranged_size_of_bin == NULL) + { + return MPC_ERR_NOMEM; + } + + rearrangedp = (int*)malloc( + sizeof(int) + * + p + ); + + if (rearrangedp == NULL) + { + return MPC_ERR_NOMEM; + } + + rearranged_mlimits = (int*)malloc( + sizeof(int) + * + p + ); + + if (rearranged_mlimits == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < p; i++) + { + rearrangedp[i] = i; + rearranged_mlimits[i] = mlimits[i]; + rearranged_size_of_bin[i] = Size_of_bin[i]; + rearranged_speeds_opt[i] = speeds_opt[i]; + } + + for (i = 0; i < p; i++) + { + for (j = 1; j < p; j++) + { + if (rearranged_size_of_bin[j-1] < rearranged_size_of_bin[j]) + { + temp = rearranged_size_of_bin[j-1]; + rearranged_size_of_bin[j-1] = rearranged_size_of_bin[j]; + rearranged_size_of_bin[j] = temp; + + temp = rearranged_speeds_opt[j-1]; + rearranged_speeds_opt[j-1] = rearranged_speeds_opt[j]; + rearranged_speeds_opt[j] = temp; + + temp_number = rearrangedp[j-1]; + rearrangedp[j-1] = rearrangedp[j]; + rearrangedp[j] = temp_number; + + temp_number = rearranged_mlimits[j-1]; + rearranged_mlimits[j-1] = rearranged_mlimits[j]; + rearranged_mlimits[j] = temp_number; + } + } + } + + allocations = (int*)malloc( + sizeof(int) + * + p + ); + + if (allocations == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < p; i++) + { + allocations[i] = 0; + } + + if (HMPI_Debug_flag) + { + printf( + "HMPI===> " + "__HMPI_Sum_of_weights_for_ordered_set_speed_functions_processor_reordering_with_mlimits:" + " Optimal speeds are: \n" + ); + + for (i = 0; i < p; i++) + { + printf("%.1f ", rearranged_speeds_opt[i]); + } + + printf("\n"); + } + + rc = __HMPI_Apply_mlimits_to_ordered_sum_of_weights( + p, + n, + rearranged_speeds_opt, + rearranged_mlimits, + w, + -1, + NULL, + NULL, + allocations + ); + + if (rc != HMPI_OK) + { + return rc; + } + + /* + * Calculate the metric + */ + if (metric != NULL) + { + int i, ind, j; + int *Current_bin_capacity = (int*)malloc( + sizeof(int) + * + p + ); + + if (Current_bin_capacity == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < p; i++) + { + Current_bin_capacity[i] = 0; + } + + for (i = 0, ind = 0; i < p; i++) + { + for (j = 0; j < allocations[i]; j++) + { + Current_bin_capacity[i] += w[ind++]; + } + } + + /* + * The ideal sum of weights is given by + * elements of array Size_of_bin and the + * actual sum of weights is calculated for array elements + * of Current_bin_capacity. + */ + switch (type_of_metric) + { + case USER_SPECIFIED: + { + *metric = (*umf)( + p, + rearranged_speeds_opt, + Current_bin_capacity, + rearranged_size_of_bin + ); + } + break; + case SYSTEM_DEFINED: + { + *metric = __HMPI_System_defined_metric( + p, + rearranged_speeds_opt, + Current_bin_capacity, + rearranged_size_of_bin + ); + } + break; + default: + { + return HMPI_ERR_METRIC; + } + break; + } + + free(Current_bin_capacity); + } + + for (ind = 0, i = 0; i < p; i++) + { + np[ind++] = rearrangedp[i]; + np[ind++] = allocations[i]; + } + + free(Size_of_bin); + free(rearranged_size_of_bin); + free(rearrangedp); + free(speeds_opt); + free(rearranged_speeds_opt); + free(rearranged_mlimits); + free(allocations); + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + diff --git a/hdpi/hmpi_partitioning_sets_speed_single_numbers_weighted_elements.c b/hdpi/hmpi_partitioning_sets_speed_single_numbers_weighted_elements.c new file mode 100644 index 0000000..fd3ed55 --- /dev/null +++ b/hdpi/hmpi_partitioning_sets_speed_single_numbers_weighted_elements.c @@ -0,0 +1,1709 @@ + + /************************************************/ + /* Helpers for Partitioning Interfaces of */ + /* */ + /* Revision history */ + /* 20-05-2003 -- Initial version */ + /************************************************/ + + #include + #include + #include + #include + + #include + #include + + static int HMPI_Debug_flag = 0; + + /*-----------------------------------------------------*/ + + int __HMPI_Number_of_elements_proportional_to_speed + ( + int p, + int n, + const double *speeds, + int *allocations + ) + { + int i, j; + int total = 0; + double sum = 0.0; + + for (i = 0; i < p; i++) + { + sum += speeds[i]; + } + + for (i = 0; i < p; i++) + { + allocations[i] = ( + (double)speeds[i] + / + (double)sum + ) + * + n; + } + + for (i = 0; i < p; i++) + { + total += allocations[i]; + } + + if (total == n) + { + return HMPI_OK; + } + + for (i = total; i < n; i++) + { + int optimal_p; + int *revised_allocations; + double *allocation_ratios; + double temp; + + revised_allocations = (int*)malloc( + sizeof(int) + * + p + ); + + if (revised_allocations == NULL) + { + return MPC_ERR_NOMEM; + } + + allocation_ratios = (double*)malloc( + sizeof(double) + * + p + ); + + if (allocation_ratios == NULL) + { + return MPC_ERR_NOMEM; + } + + for (j = 0; j < p; j++) + { + revised_allocations[j] = allocations[j] + 1; + allocation_ratios[j] = (double)revised_allocations[j] + / + (double)speeds[j]; + } + + temp = allocation_ratios[0]; + optimal_p = 0; + for (j = 1; j < p; j++) + { + if (temp > allocation_ratios[j]) + { + temp = allocation_ratios[j]; + optimal_p = j; + } + } + + allocations[optimal_p] = allocations[optimal_p] + 1; + + free(revised_allocations); + free(allocation_ratios); + } + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + double __HMPI_System_defined_metric + ( + int p, + const double *speeds, + const int *actual, + const int *ideal + ) + { + int i; + double metric; + double sumd = 0.0; + + if (HMPI_Debug_flag) + { + printf( + "HMPI===> __HMPI_System_defined_metric: " + "speeds are\n"); + + for (i = 0; i < p; i++) + { + printf("%.1f ", speeds[i]); + } + + printf("\n"); + + printf("HMPI===> __HMPI_System_defined_metric: cumulative sumd = \n"); + } + + for (i = 0; i < p; i++) + { + if ((int)speeds[i] == 0) + { + continue; + } + + sumd += ((actual[i] - ideal[i])*(actual[i] - ideal[i])) + / + speeds[i]; + + if (HMPI_Debug_flag) + { + printf("%.1f ", sumd); + } + } + + if (HMPI_Debug_flag) + { + printf("\n"); + } + + metric = sqrt(sumd); + + return metric; + } + + /*-----------------------------------------------------*/ + + int __HMPI_Size_of_bins + ( + int p, + int n, + const double *speeds, + const int *w, + int *wallocations, + int *tsum + ) + { + int i, j, rc; + double sump = 0.0; + int sumw = 0; + int totalw = 0; + + for (i = 0; i < p; i++) + { + sump += speeds[i]; + } + + for (i = 0; i < n; i++) + { + sumw += w[i]; + } + + *tsum = sumw; + + for (i = 0; i < p; i++) + { + wallocations[i] = ( + (double)speeds[i] + / + (double)sump + ) + * + sumw; + } + + for (i = 0; i < p; i++) + { + totalw += wallocations[i]; + } + + for (i = totalw; i < sumw; i++) + { + int optimal_p; + int *revised_allocations; + double *allocation_ratios; + double temp; + + revised_allocations = (int*)malloc( + sizeof(int) + * + p + ); + + if (revised_allocations == NULL) + { + return MPC_ERR_NOMEM; + } + + allocation_ratios = (double*)malloc( + sizeof(double) + * + p + ); + + if (allocation_ratios == NULL) + { + return MPC_ERR_NOMEM; + } + + for (j = 0; j < p; j++) + { + revised_allocations[j] = wallocations[j] + 1; + allocation_ratios[j] = (double)revised_allocations[j] + / + (double)speeds[j]; + } + + temp = allocation_ratios[0]; + optimal_p = 0; + + for (j = 1; j < p; j++) + { + if (temp > allocation_ratios[j]) + { + temp = allocation_ratios[j]; + optimal_p = j; + } + } + + wallocations[optimal_p] = wallocations[optimal_p] + 1; + + free(revised_allocations); + free(allocation_ratios); + } + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int __HMPI_Sum_of_weights_for_ordered_set + ( + int p, + int n, + const double *speeds, + const int *w, + int type_of_metric, + User_defined_metric umf, + double *metric, + int *np + ) + { + int i, j, rc, prev_proc; + int *Size_of_bin; + int *wallocationsc; + int sumw; + int sumcum = 0; + int *Current_bin_capacity; + + Size_of_bin = (int*)malloc( + sizeof(int) + * + p + ); + + if (Size_of_bin == NULL) + { + return MPC_ERR_NOMEM; + } + + rc = __HMPI_Size_of_bins( + p, + n, + speeds, + w, + Size_of_bin, + &sumw + ); + + if (rc != HMPI_OK) + { + return rc; + } + + if (HMPI_Debug_flag) + { + printf("HMPI===> __HMPI_Sum_of_weights_for_ordered_set: Sizes of bins are: \n"); + printf("HMPI===> "); + + for (i = 0; i < p; i++) + { + printf("%d ", Size_of_bin[i]); + } + + printf("\n"); + } + + wallocationsc = (int*)malloc( + sizeof(int) + * + (p+1) + ); + + if (wallocationsc == NULL) + { + return MPC_ERR_NOMEM; + } + + wallocationsc[0] = 0; + for (i = 1; i <= p; i++) + { + wallocationsc[i] = wallocationsc[i-1] + Size_of_bin[i-1]; + } + + Current_bin_capacity = (int*)malloc( + sizeof(int) + * + p + ); + + if (Current_bin_capacity == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < p; i++) + { + np[i] = 0; + Current_bin_capacity[i] = 0; + } + + for (i = 0; i < n; i++) + { + prev_proc = 0; + sumcum += w[i]; + + for (j = 0; j < p; j++) + { + int Wastej_1, Wastej; + + if ((sumcum > wallocationsc[j]) + && (sumcum <= wallocationsc[j+1] + ) + ) + { + if (prev_proc == j) + { + np[j]++; + Current_bin_capacity[j] += w[i]; + break; + } + + /* + * The elements preceding the current one + * exactly fit into partition (j-1) + */ + if ((sumcum - w[i]) == wallocationsc[j]) + { + prev_proc = j; + np[j]++; + Current_bin_capacity[j] += w[i]; + break; + } + + /* + * This is a border element. + * The waste is calculated if this element + * goes to j-1 or to j. + */ + Wastej_1 = fabs( + Size_of_bin[j-1] + - + ( + Current_bin_capacity[j-1] + + + w[i] + ) + ); + + Wastej = fabs( ( + sumw - wallocationsc[j] + ) + - + ( + sumw - sumcum + w[i] + ) + ); + + if (Wastej_1 <= Wastej) + { + np[j-1]++; + Current_bin_capacity[j-1] += w[i]; + } + else + { + np[j]++; + Current_bin_capacity[j] += w[i]; + } + + break; + } + } + } + + if (metric == NULL) + { + free(wallocationsc); + free(Size_of_bin); + free(Current_bin_capacity); + + return HMPI_OK; + } + + /* + * The ideal sum of weights is given by + * elements of array Size_of_bin and the + * actual sum of weights is calculated for array elements + * of Current_bin_capacity. + */ + switch (type_of_metric) + { + case USER_SPECIFIED: + { + *metric = (*umf)( + p, + speeds, + Current_bin_capacity, + Size_of_bin + ); + } + break; + case SYSTEM_DEFINED: + { + *metric = __HMPI_System_defined_metric( + p, + speeds, + Current_bin_capacity, + Size_of_bin + ); + } + break; + default: + { + return HMPI_ERR_METRIC; + } + break; + } + + free(wallocationsc); + free(Size_of_bin); + free(Current_bin_capacity); + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int __HMPI_Apply_mlimits_to_ordered_sum_of_weights + ( + int p, + int n, + const double *speeds, + const int *mlimits, + const int *w, + int type_of_metric, + User_defined_metric umf, + double *metric, + int *np + ) + { + int i, j, k, rc, indl; + int *Size_of_bin; + int sumw; + int total_limits = 0; + int mlimits_apply = 0; + int x, y, l, m, opt_start; + int total_sub_mlimits; + int wastei, sumtmp, wastef; + + for (i = 0; i < p; i++) + { + total_limits += mlimits[i]; + } + + if (total_limits == n) + { + for (i = 0; i < p; i++) + { + np[i] = mlimits[i]; + } + return HMPI_OK; + } + + if (total_limits < n) + { + printf( + "The number of elements in the set" + " exceed the upper bounds of the processors\n" + ); + + return HMPI_ERR_MLIMITS; + } + + rc = __HMPI_Sum_of_weights_for_ordered_set( + p, + n, + speeds, + w, + type_of_metric, + umf, + metric, + np + ); + + if (rc != HMPI_OK) + { + return rc; + } + + for (i = 0; i < p; i++) + { + if (np[i] > mlimits[i]) + { + mlimits_apply = 1; + break; + } + } + + if (mlimits_apply == 0) + { + return HMPI_OK; + } + + Size_of_bin = (int*)malloc( + sizeof(int) + * + p + ); + + if (Size_of_bin == NULL) + { + return MPC_ERR_NOMEM; + } + + rc = __HMPI_Size_of_bins( + p, + n, + speeds, + w, + Size_of_bin, + &sumw + ); + + if (rc != HMPI_OK) + { + return rc; + } + + for (i = 0; i < p; i++) + { + if (np[i] <= mlimits[i]) + { + continue; + } + + /* + * We try to distribute the remaining + * elements to the processors following it + */ + if (i == 0) + { + int reduced_set_size; + np[i] = mlimits[i]; + reduced_set_size = n - np[i]; + + if (HMPI_Debug_flag) + { + printf("HMPI===> __HMPI_Apply_mlimits_to_ordered_sum_of_weights: mlimits = %d, Reduced set size = %d\n", mlimits[i], reduced_set_size); + } + + free(Size_of_bin); + + return __HMPI_Apply_mlimits_to_ordered_sum_of_weights( + p-1, + reduced_set_size, + (speeds + 1), + (mlimits + 1), + (w + np[i]), + type_of_metric, + umf, + metric, + np + 1 + ); + } + + /* + * If this is the last processor, + * we try to distribute the remaining + * elements to the processors preceding it + */ + if (i == (p - 1)) + { + int reduced_set_size = 0; + + for (j = 0; j < i; j++) + { + reduced_set_size += np[j]; + } + + reduced_set_size += (np[i] - mlimits[i]); + np[i] = mlimits[i]; + + free(Size_of_bin); + + return __HMPI_Apply_mlimits_to_ordered_sum_of_weights( + p-1, + reduced_set_size, + speeds, + mlimits, + w, + type_of_metric, + umf, + metric, + np + ); + } + + if (HMPI_Debug_flag) + { + printf( + "HMPI===> " + "__HMPI_Apply_mlimits_to_ordered_sum_of_weights: " + "Processor %d has upper bound exceeded\n", + i + ); + + printf("HMPI===> Allocations are: \n"); + for (k = 0; k < p; k++) + { + printf("%d ", np[k]); + } + printf("\n"); + + printf("HMPI===> element limits are:\n"); + for (k = 0; k < p; k++) + { + printf("%d ", mlimits[k]); + } + printf("\n"); + } + + for (k = i+1, total_sub_mlimits = 0; k < p; k++) + { + total_sub_mlimits += mlimits[k]; + } + + /* + * Find the maximum subsequence of elements, the number + * of elements being equal to mlimits[i] and packing these + * elements into bin i generates least amount of waste + */ + l = 0; + for (k = 0; k < i; k++) + { + l += np[k]; + } + + indl = l; + + do + { + wastei = INT_MAX; + + for (x = indl; x < (indl+np[i]); x++) + { + if (((indl+np[i]) - x) < mlimits[i]) + { + break; + } + + sumtmp = 0; + + for (y = 0; y < mlimits[i]; y++) + { + sumtmp += w[x+y]; + } + + wastef = fabs(sumtmp - Size_of_bin[i]); + + if (wastef < wastei) + { + wastei = wastef; + opt_start = x; + } + } + + if (HMPI_Debug_flag) + { + printf("x=%d ", x); + } + + l = opt_start; + m = l + mlimits[i]; + + indl++; + } + while ((n-m) > total_sub_mlimits); + + if (HMPI_Debug_flag) + { + printf("\n"); + } + + np[i] = mlimits[i]; + + if (HMPI_Debug_flag) + { + printf( + "HMPI===> " + "__HMPI_Sum_of_weights_for_ordered_set_speed_functions_with_mlimits: " + "Total number of elements=%d," + "Number of elements to be redistributed before=%d," + " elements after the element %d to be redistributed\n", + n, + l, + m + ); + } + + /* + * spread the elements {0, 1, ..., l-1} + * amongst the processors before i + */ + rc = __HMPI_Apply_mlimits_to_ordered_sum_of_weights( + i, + l, + speeds, + mlimits, + w, + type_of_metric, + umf, + metric, + np + ); + + if (rc != HMPI_OK) + { + return rc; + } + + /* + * spread the elements {m, m+1, ..., n-1} + * amongst the processors following i + */ + rc = __HMPI_Apply_mlimits_to_ordered_sum_of_weights( + p-(i+1), + (n-l-mlimits[i]), + (speeds+i+1), + (mlimits+i+1), + (w+l+mlimits[i]), + type_of_metric, + umf, + metric, + (np+i+1) + ); + + if (rc != HMPI_OK) + { + return rc; + } + + free(Size_of_bin); + + break; + } + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int __HMPI_Sum_of_weights_for_nonordered_set + ( + int p, + int n, + const double *speeds, + const int *w, + int type_of_metric, + User_defined_metric umf, + double *metric, + int *np + ) + { + int i, j, rc; + int sumw; + int *Size_of_bin, *Current_bin_capacity; + + Size_of_bin = (int*)malloc( + sizeof(int) + * + p + ); + + if (Size_of_bin == NULL) + { + return MPC_ERR_NOMEM; + } + + rc = __HMPI_Size_of_bins( + p, + n, + speeds, + w, + Size_of_bin, + &sumw + ); + + if (rc != HMPI_OK) + { + return rc; + } + + Current_bin_capacity = (int*)malloc( + sizeof(int) + * + p + ); + + if (Current_bin_capacity == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < p; i++) + { + Current_bin_capacity[i] = 0; + } + + for (i = 0; i < n; i++) + { + int waste = INT_MAX; + int chosen; + + for (j = 0; j < p; j++) + { + if (Current_bin_capacity[j] == Size_of_bin[j]) + { + continue; + } + + if ((Current_bin_capacity[j] + w[i]) <= Size_of_bin[j]) + { + np[i] = j; + Current_bin_capacity[j] += w[i]; + break; + } + } + + if (j == p) + { + for (j = 0; j < p; j++) + { + int wastej = fabs( + Size_of_bin[j] + - + ( + Current_bin_capacity[j] + + + w[i] + ) + ); + + if (wastej < waste) + { + chosen = j; + waste = wastej; + } + } + + np[i] = chosen; + Current_bin_capacity[chosen] += w[i]; + } + } + + if (metric == NULL) + { + free(Size_of_bin); + free(Current_bin_capacity); + + return HMPI_OK; + } + + /* + * The ideal sum of weights is given by + * elements of array Size_of_bin and the + * actual sum of weights is calculated for array elements + * of Current_bin_capacity. + */ + switch (type_of_metric) + { + case USER_SPECIFIED: + { + *metric = (*umf)( + p, + speeds, + Current_bin_capacity, + Size_of_bin + ); + } + break; + case SYSTEM_DEFINED: + { + *metric = __HMPI_System_defined_metric( + p, + speeds, + Current_bin_capacity, + Size_of_bin + ); + } + break; + default: + { + return HMPI_ERR_METRIC; + } + break; + } + + free(Size_of_bin); + free(Current_bin_capacity); + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int __HMPI_Apply_mlimits_to_unordered_sum_of_weights_algo_2 + ( + int p, + int n, + const double *speeds, + const int *mlimits, + const int *w, + int type_of_metric, + User_defined_metric umf, + double *metric, + int *np + ) + { + int i, j, k, rc; + int sumw; + int total_limits = 0; + int *Size_of_bin, *Current_bin_capacity; + int *Open, *Number_in_bin, *shortlist; + + for (i = 0; i < p; i++) + { + total_limits += mlimits[i]; + } + + if (total_limits < n) + { + printf( + "The number of elements in the set" + " is greater than the sum of numbers of elements" + " the processors can hold or" + " Partitioning cannot be done with the restrictions" + " provided\n" + ); + + return HMPI_ERR_MLIMITS; + } + + Size_of_bin = (int*)malloc( + sizeof(int) + * + p + ); + + if (Size_of_bin == NULL) + { + return MPC_ERR_NOMEM; + } + + rc = __HMPI_Size_of_bins( + p, + n, + speeds, + w, + Size_of_bin, + &sumw + ); + + if (rc != HMPI_OK) + { + return rc; + } + + Current_bin_capacity = (int*)malloc( + sizeof(int) + * + p + ); + + if (Current_bin_capacity == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < p; i++) + { + Current_bin_capacity[i] = 0; + } + + if (total_limits == n) + { + int ind = 0, temp_number, temp_mlimit; + int *rearranged_mlimits; + int *rearrangedp; + + rearranged_mlimits = (int*)malloc( + sizeof(int) + * + p + ); + + if (rearranged_mlimits == NULL) + { + return MPC_ERR_NOMEM; + } + + rearrangedp = (int*)malloc( + sizeof(int) + * + p + ); + + if (rearrangedp == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < p; i++) + { + rearrangedp[i] = i; + rearranged_mlimits[i] = mlimits[i]; + } + + for (i = 0; i < p; i++) + { + for (j = 1; j < p; j++) + { + if (rearranged_mlimits[j-1] > rearranged_mlimits[j]) + { + temp_number = rearrangedp[j-1]; + rearrangedp[j-1] = rearrangedp[j]; + rearrangedp[j] = temp_number; + + temp_mlimit = rearranged_mlimits[j-1]; + rearranged_mlimits[j-1] = rearranged_mlimits[j]; + rearranged_mlimits[j] = temp_mlimit; + } + } + } + + /* + * TBD: + * This looks like a NP-hard problem. + * We know the number of elements in each subset + * given by the upper bound. + * We provide a naive implementation here. + * This is of complexity O(n*n). + * We arrange the processors in increasing + * order of their upper bounds and we arrange + * the weights in decreasing order. + */ + for (i = 0; i < p; i++) + { + for (j = 0; j < rearranged_mlimits[i]; j++) + { + np[ind] = rearrangedp[i]; + Current_bin_capacity[rearrangedp[i]] += w[ind]; + ind++; + } + } + + if (metric == NULL) + { + free(Size_of_bin); + free(Current_bin_capacity); + free(rearranged_mlimits); + free(rearrangedp); + + return HMPI_OK; + } + + /* + * The ideal sum of weights is given by + * elements of array Size_of_bin and the + * actual sum of weights is calculated for array elements + * of Current_bin_capacity. + */ + switch (type_of_metric) + { + case USER_SPECIFIED: + { + *metric = (*umf)( + p, + speeds, + Current_bin_capacity, + Size_of_bin + ); + } + break; + case SYSTEM_DEFINED: + { + *metric = __HMPI_System_defined_metric( + p, + speeds, + Current_bin_capacity, + Size_of_bin + ); + } + break; + default: + { + return HMPI_ERR_METRIC; + } + break; + } + + free(Size_of_bin); + free(Current_bin_capacity); + free(rearranged_mlimits); + free(rearrangedp); + + return HMPI_OK; + } + + Open = (int*)malloc( + sizeof(int) + * + p + ); + + if (Open == NULL) + { + return MPC_ERR_NOMEM; + } + + Number_in_bin = (int*)malloc( + sizeof(int) + * + p + ); + + if (Number_in_bin == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < p; i++) + { + Open[i] = 1; + Number_in_bin[i] = 0; + } + + shortlist = (int*)malloc( + sizeof(int) + * + p + ); + + if (shortlist == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < n; i++) + { + int nslist = 0; + int chosen = -1; + + for (j = 0; j < p; j++) + { + if (Current_bin_capacity[j] == Size_of_bin[j]) + { + continue; + } + + if (((Current_bin_capacity[j] + w[i]) <= Size_of_bin[j]) + && (Open[j] == 1 + ) + ) + { + shortlist[nslist++] = j; + } + } + + if (nslist > 0) + { + int temp = Size_of_bin[shortlist[0]] + - + Current_bin_capacity[shortlist[0]] + ; + chosen = shortlist[0]; + + for (k = 1; k < nslist; k++) + { + int tempk = Size_of_bin[shortlist[k]] + - + Current_bin_capacity[shortlist[k]] + ; + + if ((tempk >= temp) + && (Open[shortlist[k]] == 1 + ) + ) + { + temp = tempk; + chosen = shortlist[k]; + } + } + } + else + { + int waste = INT_MAX; + for (j = 0; j < p; j++) + { + if (Open[j] == 1) + { + int wastej = ( + Current_bin_capacity[j] + + + w[i] + - + Size_of_bin[j] + ); + + if (wastej < waste) + { + chosen = j; + waste = wastej; + } + } + } + } + + np[i] = chosen; + Number_in_bin[chosen]++; + + if (Number_in_bin[chosen] >= mlimits[chosen]) + { + Open[chosen] = 0; + } + + Current_bin_capacity[chosen] = Current_bin_capacity[chosen] + + + w[i] + ; + } + + free(shortlist); + free(Open); + free(Number_in_bin); + + if (metric == NULL) + { + free(Size_of_bin); + free(Current_bin_capacity); + + return HMPI_OK; + } + + /* + * The ideal sum of weights is given by + * elements of array Size_of_bin and the + * actual sum of weights is calculated for array elements + * of Current_bin_capacity. + */ + switch (type_of_metric) + { + case USER_SPECIFIED: + { + *metric = (*umf)( + p, + speeds, + Current_bin_capacity, + Size_of_bin + ); + } + break; + case SYSTEM_DEFINED: + { + *metric = __HMPI_System_defined_metric( + p, + speeds, + Current_bin_capacity, + Size_of_bin + ); + } + break; + default: + { + return HMPI_ERR_METRIC; + } + break; + } + + free(Size_of_bin); + free(Current_bin_capacity); + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ + + int __HMPI_Apply_mlimits_to_unordered_sum_of_weights + ( + int p, + int n, + const double *speeds, + const int *mlimits, + const int *w, + int type_of_metric, + User_defined_metric umf, + double *metric, + int *np + ) + { + int i, j, k, rc; + int sumw; + int total_limits = 0; + int *Size_of_bin, *Current_bin_capacity; + int *Open, *Number_in_bin, *shortlist; + + for (i = 0; i < p; i++) + { + total_limits += mlimits[i]; + } + + if (total_limits < n) + { + printf( + "The number of elements in the set" + " is greater than the sum of numbers of elements" + " the processors can hold or" + " Partitioning cannot be done with the restrictions" + " provided\n" + ); + + return HMPI_ERR_MLIMITS; + } + + Size_of_bin = (int*)malloc( + sizeof(int) + * + p + ); + + if (Size_of_bin == NULL) + { + return MPC_ERR_NOMEM; + } + + rc = __HMPI_Size_of_bins( + p, + n, + speeds, + w, + Size_of_bin, + &sumw + ); + + if (rc != HMPI_OK) + { + return rc; + } + + Current_bin_capacity = (int*)malloc( + sizeof(int) + * + p + ); + + if (Current_bin_capacity == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < p; i++) + { + Current_bin_capacity[i] = 0; + } + + if (total_limits == n) + { + int ind = 0, temp_number, temp_mlimit; + int *rearranged_mlimits; + int *rearrangedp; + + rearranged_mlimits = (int*)malloc( + sizeof(int) + * + p + ); + + if (rearranged_mlimits == NULL) + { + return MPC_ERR_NOMEM; + } + + rearrangedp = (int*)malloc( + sizeof(int) + * + p + ); + + if (rearrangedp == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < p; i++) + { + rearrangedp[i] = i; + rearranged_mlimits[i] = mlimits[i]; + } + + for (i = 0; i < p; i++) + { + for (j = 1; j < p; j++) + { + if (rearranged_mlimits[j-1] > rearranged_mlimits[j]) + { + temp_number = rearrangedp[j-1]; + rearrangedp[j-1] = rearrangedp[j]; + rearrangedp[j] = temp_number; + + temp_mlimit = rearranged_mlimits[j-1]; + rearranged_mlimits[j-1] = rearranged_mlimits[j]; + rearranged_mlimits[j] = temp_mlimit; + } + } + } + + /* + * TBD: + * This looks like a NP-hard problem. + * We know the number of elements in each subset + * given by the upper bound. + * We provide a naive implementation here. + * This is of complexity O(n*n). + * We arrange the processors in increasing + * order of their upper bounds and we arrange + * the weights in decreasing order. + */ + for (i = 0; i < p; i++) + { + for (j = 0; j < rearranged_mlimits[i]; j++) + { + np[ind] = rearrangedp[i]; + Current_bin_capacity[rearrangedp[i]] += w[ind]; + ind++; + } + } + + if (metric == NULL) + { + free(Size_of_bin); + free(Current_bin_capacity); + free(rearranged_mlimits); + free(rearrangedp); + + return HMPI_OK; + } + + /* + * The ideal sum of weights is given by + * elements of array Size_of_bin and the + * actual sum of weights is calculated for array elements + * of Current_bin_capacity. + */ + switch (type_of_metric) + { + case USER_SPECIFIED: + { + *metric = (*umf)( + p, + speeds, + Current_bin_capacity, + Size_of_bin + ); + } + break; + case SYSTEM_DEFINED: + { + *metric = __HMPI_System_defined_metric( + p, + speeds, + Current_bin_capacity, + Size_of_bin + ); + } + break; + default: + { + return HMPI_ERR_METRIC; + } + break; + } + + free(Size_of_bin); + free(Current_bin_capacity); + free(rearranged_mlimits); + free(rearrangedp); + + return HMPI_OK; + } + + Open = (int*)malloc( + sizeof(int) + * + p + ); + + if (Open == NULL) + { + return MPC_ERR_NOMEM; + } + + Number_in_bin = (int*)malloc( + sizeof(int) + * + p + ); + + if (Number_in_bin == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < p; i++) + { + Open[i] = 1; + Number_in_bin[i] = 0; + } + + shortlist = (int*)malloc( + sizeof(int) + * + p + ); + + if (shortlist == NULL) + { + return MPC_ERR_NOMEM; + } + + for (i = 0; i < n; i++) + { + int nslist = 0; + int chosen = -1; + + for (j = 0; j < p; j++) + { + if (Current_bin_capacity[j] == Size_of_bin[j]) + { + continue; + } + + if (((Current_bin_capacity[j] + w[i]) <= Size_of_bin[j]) + && (Open[j] == 1 + ) + ) + { + chosen = j; + break; + } + } + + if (chosen == -1) + { + int waste = INT_MAX; + for (j = 0; j < p; j++) + { + if (Open[j] == 1) + { + int wastej = ( + Current_bin_capacity[j] + + + w[i] + - + Size_of_bin[j] + ); + + if (wastej < waste) + { + chosen = j; + waste = wastej; + } + } + } + } + + np[i] = chosen; + Number_in_bin[chosen]++; + + if (Number_in_bin[chosen] >= mlimits[chosen]) + { + Open[chosen] = 0; + } + + Current_bin_capacity[chosen] = Current_bin_capacity[chosen] + + + w[i] + ; + } + + free(shortlist); + free(Open); + free(Number_in_bin); + + if (metric == NULL) + { + free(Size_of_bin); + free(Current_bin_capacity); + + return HMPI_OK; + } + + /* + * The ideal sum of weights is given by + * elements of array Size_of_bin and the + * actual sum of weights is calculated for array elements + * of Current_bin_capacity. + */ + switch (type_of_metric) + { + case USER_SPECIFIED: + { + *metric = (*umf)( + p, + speeds, + Current_bin_capacity, + Size_of_bin + ); + } + break; + case SYSTEM_DEFINED: + { + *metric = __HMPI_System_defined_metric( + p, + speeds, + Current_bin_capacity, + Size_of_bin + ); + } + break; + default: + { + return HMPI_ERR_METRIC; + } + break; + } + + free(Size_of_bin); + free(Current_bin_capacity); + + return HMPI_OK; + } + + /*-----------------------------------------------------*/ diff --git a/hdpi/hmpi_partitioning_trees.h b/hdpi/hmpi_partitioning_trees.h new file mode 100644 index 0000000..8b3793d --- /dev/null +++ b/hdpi/hmpi_partitioning_trees.h @@ -0,0 +1,42 @@ + +/************************************************************************* +* * +* Heterogeneous Data Partitioning Interface * +* ========================================= * +* * +* Copyright (c) 2002 Department of Computer Science, * +* University College Dublin. * +* * +* All rights reserved. We assume no responsibility for the use * +* or reliability of our software. * +* * +*************************************************************************/ + + /************************************************/ + /* Partitioning interfaces for trees */ + /* */ + /* Revision history */ + /* 19-05-2003 -- Initial version */ + /************************************************/ + + #ifndef __HMPI_PARTITIONING_TREES_HH + #define __HMPI_PARTITIONING_TREES_HH + + int HMPI_Partition_tree( + int p, + int pn, + const double *speeds, + const int *psizes, + const int *mlimits, + int n, + int nedges, + const int *nwgt, + const int *xadj, + const int *adjacency, + const int *adjwgt, + int *vp, + int *edgecut + ); + + #endif /* __HMPI_PARTITIONING_TREES_HH */ + diff --git a/hdpi/hmpi_partitioning_types.h b/hdpi/hmpi_partitioning_types.h new file mode 100644 index 0000000..6d99b5d --- /dev/null +++ b/hdpi/hmpi_partitioning_types.h @@ -0,0 +1,27 @@ + +/************************************************************************* +* * +* Heterogeneous Data Partitioning Interface * +* ========================================= * +* * +* Copyright (c) 2002 Department of Computer Science, * +* University College Dublin. * +* * +* All rights reserved. We assume no responsibility for the use * +* or reliability of our software. * +* * +*************************************************************************/ + + /************************************************/ + /* Common typedefs used in partitioning */ + /* interfaces */ + /* Revision history */ + /* 19-05-2003 -- Initial version */ + /************************************************/ + + #ifndef __HMPI_PARTITIONING_TYPES_HH + #define __HMPI_PARTITIONING_TYPES_HH + + #define HMPI_MAX_BISECTION_STEPS 64 + + #endif diff --git a/main.cpp b/main.cpp new file mode 100644 index 0000000..3b50909 --- /dev/null +++ b/main.cpp @@ -0,0 +1,728 @@ + +/*-----------------------------------------------------------*/ + +#include +#include +#include +#include +#include +#include +#include +#include + +/*-----------------------------------------------------------*/ + +#include +#include +#include +#include + +/*-----------------------------------------------------------*/ + +#include "absdevs.c" + +/*-----------------------------------------------------------*/ + +#define MPI_MSG_TAG 0xff + +/*-----------------------------------------------------------*/ + +void rank2coord +( + int pnum, + const int *ppar, + int *pcoord +) +{ + int tmp; + tmp = * (ppar + 1); + * pcoord = pnum / tmp; + pnum = pnum % tmp; + * (pcoord + 1) = pnum; +} + +/*-----------------------------------------------------------*/ + +int print_stdin_2d_allocation_local( + int p, int q, + int m, int n, + const int *w, + const int *h, + const int *trow, + const int *tcol +) +{ + int i, j, k, l; + + printf("The widths of rectangles are:\n"); + + /* + * COLUMN BASED; HENCE ONLY widths of first row are enough + */ + for (j = 0; j < q; j++) + { + printf("%d ", w[HMPI_RECT_INDEX(0, j, 0, j, p, q)]); + } + printf("\n"); + + printf("The heights of rectangles are:\n"); + + for (i = 0; i < p; i++) + { + for (j = 0; j < q; j++) + { + for (k = 0; k < p; k++) + { + for (l = 0; l < q; l++) + { + printf("%d ", h[HMPI_RECT_INDEX(i, j, k, l, p, q)]); + } + } + printf("\n"); + } + } + + printf("The trows of rectangles are:\n"); + + for (i = 0; i < p; i++) + { + for (j = 0; j < q; j++) + { + printf("%d ", trow[i*q+j]); + } + printf("\n"); + } + + printf("The tcols of rectangles are:\n"); + + for (i = 0; i < p; i++) + { + for (j = 0; j < q; j++) + { + printf("%d ", tcol[i*q+j]); + } + printf("\n"); + } + + printf("\n"); + + return 0; +} + +/*-----------------------------------------------------------*/ + +int initializeData( + const int m, const int n, + double *A, double *b +) +{ + int i; + for (i = 0; i < (m*n); i++) + { + A[i] = 1.5; + } + + for (i = 0; i < n; i++) + { + + b[i] = 2.5; + } + + return 0; +} + +/*-----------------------------------------------------------*/ + +int main(int argc, char **argv) +{ + int rc, rc1, rc2, rc3, rc4, rc5; + int *w, *h, *trow, *tcol, + gme, gsize, myi, myj; + + int hostnamelen; + char hostname[MPI_MAX_PROCESSOR_NAME]; + + rc = MPI_Init(&argc, &argv); + + if (rc != MPI_SUCCESS) + { + fprintf( + stderr, + "MAIN: Problems initializing MPI...Exiting...\n" + ); + MPI_Finalize(); + exit(EXIT_FAILURE); + } + + rc = MPI_Comm_rank( + MPI_COMM_WORLD, + &gme + ); + + if (rc != MPI_SUCCESS) + { + fprintf( + stderr, + "MAIN: Problems getting rank...Exiting...\n" + ); + MPI_Finalize(); + exit(EXIT_FAILURE); + } + + unsigned int inputsIncorrect = 0; + + if (gme == 0) + { + if (argc != 6) + { + fprintf( + stderr, + "Usage: mpirun -np

%s n p q communicate(0 | 1) verbosity(0 | 1).\n" + "If communicate is 1, process 0 communicates the matrix A, vector b to all the " + "other processes before the parallel computations...\n", argv[0]); + inputsIncorrect = 1; + } + } + + rc = MPI_Bcast( + &inputsIncorrect, + 1, MPI_UNSIGNED, + 0, MPI_COMM_WORLD); + + if (rc != MPI_SUCCESS) + { + printf("(%d):Problems broadcasting w\n", gme); + } + + if (inputsIncorrect) + { + MPI_Finalize(); + exit(EXIT_SUCCESS); + } + + int n, p, q, verbosity, communicate; + + if (gme == 0) + { + n = atoi(argv[1]); + p = atoi(argv[2]); + q = atoi(argv[3]); + communicate = atoi(argv[4]); + verbosity = atoi(argv[5]); + } + + rc1 = MPI_Bcast( + &n, + 1, MPI_INT, + 0, MPI_COMM_WORLD); + + rc2 = MPI_Bcast( + &p, + 1, MPI_INT, + 0, MPI_COMM_WORLD); + + rc3 = MPI_Bcast( + &q, + 1, MPI_INT, + 0, MPI_COMM_WORLD); + + rc4 = MPI_Bcast( + &verbosity, + 1, MPI_INT, + 0, MPI_COMM_WORLD); + + rc5 = MPI_Bcast( + &communicate, + 1, MPI_INT, + 0, MPI_COMM_WORLD); + + if ((rc1 != MPI_SUCCESS) + || (rc2 != MPI_SUCCESS) + || (rc3 != MPI_SUCCESS) + || (rc4 != MPI_SUCCESS) + || (rc5 != MPI_SUCCESS + ) + ) + { + fprintf( + stderr, + "(%d):Problems broadcasting n | p | q | communicate | verbosity\n", gme + ); + MPI_Finalize(); + exit(EXIT_SUCCESS); + } + + /* + * Now bind the processes using the abstract devices table... + */ + cpu_set_t cpuSet; + CPU_ZERO(&cpuSet); + + if (verbosity) + { + printf( + "gme %d: start %d end %d.\n", + gme, hcl_coreindex[gme], hcl_coreindex[gme+1]); + printf( + "gme %d: Bound cores: ", gme); + } + + int cpu; + for (cpu = hcl_coreindex[gme]; cpu < hcl_coreindex[gme+1]; cpu++) + { + if (verbosity) + { + printf( + "%d ", hcl_corebindings[cpu]); + } + + CPU_SET(hcl_corebindings[cpu], &cpuSet); + } + + if (verbosity) + { + printf("\n"); + } + + int status = sched_setaffinity( + getpid(), + sizeof(cpu_set_t), + &cpuSet); + + if (status != 0) + { + fprintf( + stderr, + "%d: Problems setting sched_setaffinity.\n", + gme); + exit(EXIT_FAILURE); + } + + if (gme == 0) + { + printf("=======================================\n"); + printf("Executing Matrix-Vector Multiplication.\n"); + printf("=======================================\n"); + + printf( + "Inputs: n:%d, p:%d, q:%d, communicate %d.\n", + n, p, q, communicate); + } + + rc = MPI_Comm_size( + MPI_COMM_WORLD, + &gsize + ); + + if (rc != MPI_SUCCESS) + { + printf("MAIN:Problems getting size...Exiting...\n"); + } + + if (gsize != (p*q)) + { + if (gme == 0) + { + fprintf( + stderr, + "MAIN: MPI_COMM_WORLD size greater than p * q grid of processes...\n" + ); + } + MPI_Finalize(); + exit(EXIT_SUCCESS); + } + + w = (int*)malloc( + sizeof(int) + * + (p*q*p*q) + ); + + if (w == NULL) + { + fprintf(stderr, "(%d): Cannot allocate w\n", gme); + MPI_Finalize(); + exit(EXIT_FAILURE); + } + + h = (int*)malloc( + sizeof(int) + * + (p*q*p*q) + ); + + if (h == NULL) + { + printf("(%d): Cannot allocate h\n", gme); + MPI_Finalize(); + exit(EXIT_FAILURE); + } + + trow = (int*)malloc( + sizeof(int) + * + (p*q) + ); + + if (trow == NULL) + { + printf("(%d): Cannot allocate trow\n", gme); + MPI_Finalize(); + exit(EXIT_FAILURE); + } + + tcol = (int*)malloc( + sizeof(int) + * + (p*q) + ); + + if (tcol == NULL) + { + printf("(%d): Cannot allocate tcol\n", gme); + MPI_Finalize(); + exit(EXIT_FAILURE); + } + + if (gme == 0) + { + rc = HMPI_Partition_matrix_2d( + p, q, + 1, + NULL, + NULL, NULL, + n, n, + HMPI_COLUMN_BASED, + w, h, trow, tcol, + NULL, NULL + ); + + if (rc != HMPI_OK) + { + printf("(%d): Problems partitioning\n", gme); + } + } + + rc = MPI_Bcast( + w, p*q*p*q, MPI_INT, 0, MPI_COMM_WORLD + ); + + if (rc != MPI_SUCCESS) + { + printf("(%d):Problems broadcasting w\n", gme); + } + + rc = MPI_Bcast( + h, p*q*p*q, MPI_INT, 0, MPI_COMM_WORLD + ); + + if (rc != MPI_SUCCESS) + { + printf("(%d):Problems broadcasting h\n", gme); + } + + rc = MPI_Bcast( + trow, p*q, MPI_INT, 0, MPI_COMM_WORLD + ); + + if (rc != MPI_SUCCESS) + { + printf("(%d):Problems broadcasting trow\n", gme); + } + + rc = MPI_Bcast( + tcol, p*q, MPI_INT, 0, MPI_COMM_WORLD + ); + + if (rc != MPI_SUCCESS) + { + printf("(%d):Problems broadcasting tcol\n", gme); + } + + if ((verbosity > 0) && (gme == 0)) + { + printf("Partitioning of matrix is: \n"); + + print_stdin_2d_allocation_local( + p, q, n, n, + w, h, trow, tcol + ); + } + + /* + * Parallel Matrix-Vector Multiplication... + */ + const int ppar[] = {p, q}; + int* mycoords = (int*)malloc(sizeof(int)* 2); + rank2coord(gme, ppar, mycoords); + myi = mycoords[0]; + myj = mycoords[1]; + free(mycoords); + + /* + * My local store is a matrix of size lda x ldb + */ + const double alpha = 1.0; + const double beta = 0.0; + + int myh = h[HMPI_RECT_INDEX(myi, myj, myi, myj, p, q)]; + int mm = myh; + int myw = w[HMPI_RECT_INDEX(myi, myj, myi, myj, p, q)]; + int nn = myw; + + double *WA = (double*)malloc( + sizeof(double) + * + (mm*nn)); + if (WA == NULL) + { + printf("me=%d: No memory to allocate my A buffer, WA\n", gme); + } + + double *Wb = (double*)malloc( + sizeof(double) + * + nn); + if (Wb == NULL) + { + printf("me=%d: No memory to allocate buffer b, Wb\n", gme); + } + + double *Wc = (double*)malloc( + sizeof(double) + * + mm); + if (Wc == NULL) + { + printf("me=%d: No memory to allocate buffer c, Wc\n", gme); + } + + initializeData(mm, nn, WA, Wb); + + struct timeval start, end; + gettimeofday(&start, NULL); + + if (communicate) + { + if (gme == 0) + { + int i, j, cIndex = 0; + for (i = 0; i < p; i++) + { + for (j = 0; j < q; j++) + { + if ((i == 0) && (j == 0)) + { + continue; + } + int mm = h[HMPI_RECT_INDEX(i, j, i, j, p, q)]; + int nn = w[HMPI_RECT_INDEX(i, j, i, j, p, q)]; + + double* tmpA = (double*)malloc( + sizeof(double)*mm*nn); + + if (tmpA == NULL) + { + printf("me=%d: No memory to allocate buffer tmpA\n", gme); + } + + double* tmpB = (double*)malloc( + sizeof(double)*nn); + + if (tmpB == NULL) + { + printf("me=%d: No memory to allocate buffer tmpB.\n", gme); + } + + initializeData(mm, nn, tmpA, tmpB); + + if (verbosity) + { + printf("Communicating A, b to process %d.\n", i*q + j); + } + + MPI_Send( + tmpA, mm*nn, MPI_DOUBLE, + i*q + j, + MPI_MSG_TAG, + MPI_COMM_WORLD); + free(tmpA); + MPI_Send( + tmpB, nn, MPI_DOUBLE, + i*q + j, + MPI_MSG_TAG, + MPI_COMM_WORLD); + free(tmpB); + } + } + initializeData(mm, nn, WA, Wb); + } + else + { + MPI_Recv( + WA, mm*nn, MPI_DOUBLE, + 0, + MPI_MSG_TAG, + MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv( + Wb, nn, MPI_DOUBLE, + 0, + MPI_MSG_TAG, + MPI_COMM_WORLD, MPI_STATUS_IGNORE); + } + } + else + { + initializeData(mm, nn, WA, Wb); + } + + cblas_dgemv(CblasRowMajor, CblasNoTrans, + mm, nn, alpha, WA, nn, Wb, 1, beta, Wc, 1); + + if (gme == 0) + { + double *c = (double*)malloc( + n*sizeof(double)); + if (c == NULL) + { + printf("me=%d: No memory to allocate buffer c, Wc\n", gme); + } + memcpy(c, Wc, sizeof(double)*mm); + + int i, j, cIndex = 0; + for (i = 0; i < p; i++) + { + for (j = 0; j < q; j++) + { + if ((i == 0) && (j == 0)) + { + continue; + } + + int myh = h[HMPI_RECT_INDEX(i, j, i, j, p, q)]; + int mm = myh; + double* tmpC = (double*)malloc( + sizeof(double)*mm); + + if (tmpC == NULL) + { + printf("me=%d: No memory to allocate buffer tmpC\n", gme); + } + + MPI_Recv( + tmpC, mm, MPI_DOUBLE, + i*q + j, + MPI_MSG_TAG, + MPI_COMM_WORLD, MPI_STATUS_IGNORE); + + int elem; + for (elem = 0; elem < mm; elem++) + { + c[cIndex + elem] += tmpC[elem]; + } + + free(tmpC); + } + + cIndex += mm; + } + + if (verbosity > 1) + { + printf("Result c[]: "); + for (i = 0; i < n; i++) + { + printf("%lf ", c[i]); + } + printf("\n"); + } + + free(c); + } + else + { + if (verbosity > 1) + { + int i; + printf("Sending Wc[]: "); + for (i = 0; i < mm; i++) + { + printf("%lf ", Wc[i]); + } + printf("\n"); + } + + MPI_Send( + Wc, mm, MPI_DOUBLE, + 0, + MPI_MSG_TAG, + MPI_COMM_WORLD); + } + + gettimeofday(&end, NULL); + + double tstart = start.tv_sec + start.tv_usec/1000000.; + double tend = end.tv_sec + end.tv_usec/1000000.; + double myTime = (tend - tstart); + + free(WA); + free(Wb); + free(Wc); + + double avgSpeed, maxTime; + double myPsize = 2.0*mm*nn; + double mySpeed = (myPsize / myTime) * 1e-06; + + MPI_Reduce( + &mySpeed, + &avgSpeed, + 1, + MPI_DOUBLE, + MPI_SUM, + 0, + MPI_COMM_WORLD); + + MPI_Reduce( + &myTime, + &maxTime, + 1, + MPI_DOUBLE, + MPI_MAX, + 0, + MPI_COMM_WORLD); + + if (gme == 0) + { + double dN = n; + double nxn = dN * dN * 0.001 * 0.001; + double pspeed = 2.0 * nxn / maxTime; + + printf( + "Parallel MxV successful: n=%d, " + "Average speed(MFLOPs)=%3f, " + "Parallel speed(MFLOPs)=%3f, " + "MxV execution time(sec)=%3f\n", + n, avgSpeed / (double)gsize, + pspeed, maxTime); + } + + printf( + "Me=%d: " + "Speed(MFLOPs)=%3f, " + "MxV execution time(sec)=%3f\n", + gme, + mySpeed, + myTime + ); + + free(w); + free(h); + free(trow); + free(tcol); + + MPI_Finalize(); + + exit(EXIT_SUCCESS); +} + +/*---------------------------------------------------------------*/