Commit c000ca6818357246de5c3c25eb7e1b2451a51a1e

Authored by Ravi
1 parent ea38e672

hclmxv: Adding MXV MPI...

Too many changes to show.

To preserve performance only 17 of 27 files are displayed.

INSTALL.txt
@@ -24,7 +24,7 @@ To build and install: @@ -24,7 +24,7 @@ To build and install:
24 24
25 1). cd <hclmxv root directory> 25 1). cd <hclmxv root directory>
26 26
27 -2). make 27 +2). ./compile.sh
28 28
29 This will create the executables. 29 This will create the executables.
30 30
Makefile
@@ -3,12 +3,15 @@ @@ -3,12 +3,15 @@
3 ############################################################### 3 ###############################################################
4 4
5 CC = icc 5 CC = icc
  6 +MPICC = mpiicc
6 NVCC = nvcc 7 NVCC = nvcc
7 OPTFLAGS = -O3 -fopenmp 8 OPTFLAGS = -O3 -fopenmp
8 CPP11FLAGS = ${OPTFLAGS} -std=c++11 9 CPP11FLAGS = ${OPTFLAGS} -std=c++11
9 MKLFLAGS = ${OPTFLAGS} -mkl 10 MKLFLAGS = ${OPTFLAGS} -mkl
  11 +MPIMKLFLAGS = ${OPTFLAGS} -mkl -mt_mpi
  12 +HDPIHOME = hdpi/installation_dir
10 13
11 -all: dgemvcpu dgemvphi dgemvgpu 14 +all: dgemvcpu dgemvphi dgemvgpu absdevsreader runabsdevreader mpimain
12 15
13 dgemvcpu: 16 dgemvcpu:
14 ${CC} ${MKLFLAGS} -o dgemvcpu dgemvcpu.c -lm 17 ${CC} ${MKLFLAGS} -o dgemvcpu dgemvcpu.c -lm
@@ -20,7 +23,25 @@ dgemvgpu: @@ -20,7 +23,25 @@ dgemvgpu:
20 ${NVCC} -I/usr/local/cuda/include -o dgemvgpu dgemvgpu.c \ 23 ${NVCC} -I/usr/local/cuda/include -o dgemvgpu dgemvgpu.c \
21 -L/usr/local/cuda/lib64 -lcublas 24 -L/usr/local/cuda/lib64 -lcublas
22 25
  26 +absdevsreader:
  27 + @echo "--------------------------------------------"
  28 + @echo "Now compiling abstract devices reader file..."
  29 + @echo "--------------------------------------------"
  30 + @echo
  31 + ${CC} ${OPTFLAGS} -c cputopology.cpp
  32 + ${CC} ${CPP11FLAGS} -o absdevsreader absdevsreader.cpp cputopology.o
  33 +
  34 +runabsdevreader:
  35 + @echo "--------------------------------------------"
  36 + @echo "Now running abstract devices reader to generate absdevs.c..."
  37 + @echo "--------------------------------------------"
  38 + @echo
  39 + ./absdevsreader ./absdevs.lst 0
  40 +
  41 +mpimain:
  42 + $(MPICC) -I${HDPIHOME}/include ${MKLFLAGS} -o main main.cpp -L${HDPIHOME}/lib -lhdpi -lm
  43 +
23 clean: 44 clean:
24 - rm -f dgemvcpu dgemvphi dgemvgpu 45 + rm -f dgemvcpu dgemvphi dgemvgpu *.o absdevsreader main
25 46
26 ############################################################### 47 ###############################################################
absdevpowers.c 0 → 100644
  1 +
  2 +/*----------------------------------------------------------------------------*/
  3 +
  4 +const char* hcl_powerplatforms[] = {
  5 +"CPUPCM"
  6 +};
  7 +
  8 +/*----------------------------------------------------------------------------*/
  9 +
absdevs.c 0 → 100644
  1 +
  2 +/*----------------------------------------------------------------------------*/
  3 +
  4 +#include "absdevs.h"
  5 +
  6 +/*----------------------------------------------------------------------------*/
  7 +
  8 +const unsigned int hcl_coreindex[] = {
  9 +0,6,12,18,24
  10 +};
  11 +const unsigned int hcl_corebindings[] = {
  12 +0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23
  13 +};
  14 +
  15 +/*----------------------------------------------------------------------------*/
  16 +
absdevs.h 0 → 100644
  1 +
  2 +/*----------------------------------------------------------------------------*/
  3 +
  4 +#ifndef _ABSDEVS_HH
  5 +#define _ABSDEVS_HH
  6 +
  7 +/*----------------------------------------------------------------------------*/
  8 +
  9 +typedef struct _hcl_abstractdevicestable_ {
  10 + int(*init)(const int, const int, const int, const int,
  11 + const unsigned int, const unsigned int);
  12 + int(*gemm)(const int, const int*, const int*, const int*,
  13 + double*, double*, double*, double*);
  14 + int(*destroy)(const int);
  15 + const unsigned int nompt;
  16 +} hcl_abstractdevicestable;
  17 +
  18 +extern hcl_abstractdevicestable hcl_absdevtable[];
  19 +extern const unsigned int hcl_coreindex[];
  20 +extern const unsigned int hcl_corebindings[];
  21 +
  22 +/*----------------------------------------------------------------------------*/
  23 +
  24 +#endif
  25 +
  26 +/*----------------------------------------------------------------------------*/
  27 +
absdevs.lst 0 → 100644
  1 +#cores DGEMM No. of MPI processes No. of OpenMP threads
  2 +#----- ----- --------------------- ---------------------
  3 +#0-3 CPU,MKL 1 0
  4 +#4-7 CPU,MKL 1 0
  5 +#8-11 CPU,MKL 1 0
  6 +#24-27 CPU,MKL 1 0
  7 +#28-31 CPU,MKL 1 0
  8 +#32-35 CPU,MKL 1 0
  9 +#12-15 CPU,MKL 1 0
  10 +#16-19 CPU,MKL 1 0
  11 +#20-23 CPU,MKL 1 0
  12 +#36-39 CPU,MKL 1 0
  13 +#40-43 CPU,MKL 1 0
  14 +#44-47 CPU,MKL 1 0
  15 +#0-2 CPU,MKL 1 0
  16 +#3-5 CPU,MKL 1 0
  17 +#6-8 CPU,MKL 1 0
  18 +#9-11 CPU,MKL 1 0
  19 +#24-26 CPU,MKL 1 0
  20 +#27-29 CPU,MKL 1 0
  21 +#30-32 CPU,MKL 1 0
  22 +#33-35 CPU,MKL 1 0
  23 +#12-14 CPU,MKL 1 0
  24 +#15-17 CPU,MKL 1 0
  25 +#18-20 CPU,MKL 1 0
  26 +#21-23 CPU,MKL 1 0
  27 +#36-38 CPU,MKL 1 0
  28 +#39-41 CPU,MKL 1 0
  29 +#42-44 CPU,MKL 1 0
  30 +#45-47 CPU,MKL 1 0
  31 +0-5 CPU,MKL 1 0
  32 +6-11 CPU,MKL 1 0
  33 +12-17 CPU,MKL 1 0
  34 +18-23 CPU,MKL 1 0
absdevsreader.cpp 0 → 100644
  1 +
  2 +/*-----------------------------------------------------------*/
  3 +
  4 +#include <iostream>
  5 +#include <fstream>
  6 +#include <cstdlib>
  7 +#include <algorithm>
  8 +#include <string>
  9 +#include <sstream>
  10 +#include <vector>
  11 +#include <cstring>
  12 +#include "cputopology.hpp"
  13 +
  14 +/*-----------------------------------------------------------*/
  15 +
  16 +int main(int argc, char** argv)
  17 +{
  18 + if (argc != 3)
  19 + {
  20 + std::cerr << "Usage: " << argv[0]
  21 + << " <Abstract devices file> <verbosity (0|1)>"
  22 + << std::endl;
  23 + exit(EXIT_FAILURE);
  24 + }
  25 +
  26 + std::string abstractDevicesFile = argv[1];
  27 + bool verbosity = atoi(argv[2]);
  28 +
  29 + std::ifstream absDevices(abstractDevicesFile.c_str());
  30 + if (!absDevices.is_open())
  31 + {
  32 + std::cerr << "Unable to open "
  33 + << abstractDevicesFile
  34 + << std::endl;
  35 + exit(EXIT_FAILURE);
  36 + }
  37 +
  38 + unsigned int numLogicalCPUs, numPhysicalCPUs;
  39 + int rc = hcl::topology::getNumLogicalCpus(&numLogicalCPUs);
  40 + if (rc != 0)
  41 + {
  42 + std::cerr << "Error to get number of logical cores..."
  43 + << std::endl;
  44 + exit(EXIT_FAILURE);
  45 + }
  46 +
  47 + rc = hcl::topology::getNumPhysicalCpus(&numPhysicalCPUs);
  48 + if (rc != 0)
  49 + {
  50 + std::cerr << "Error to get number of physical cores..."
  51 + << std::endl;
  52 + exit(EXIT_FAILURE);
  53 + }
  54 +
  55 + std::cout << "Number of logical cores " << numLogicalCPUs
  56 + << std::endl;
  57 + std::cout << "Number of physical cores " << numPhysicalCPUs
  58 + << std::endl;
  59 +
  60 + std::vector<unsigned int> nCoresList;
  61 + std::vector<std::string> coresList;
  62 + std::vector<std::string> abstractDevicesTable;
  63 + std::vector<std::string> powerPlatforms;
  64 +
  65 + /*
  66 + * We always include the CPU
  67 + */
  68 + powerPlatforms.push_back("CPUPCM");
  69 +
  70 + unsigned int numCoresBound = 0;
  71 + std::string line;
  72 + nCoresList.push_back(numCoresBound);
  73 + while (std::getline(absDevices, line))
  74 + {
  75 + /*
  76 + * Ignore comment line...
  77 + */
  78 + if (line.find('#') != std::string::npos)
  79 + {
  80 + continue;
  81 + }
  82 +
  83 + std::stringstream ss(line);
  84 + std::string coreListing;
  85 + std::string gemmKernel;
  86 + unsigned int numMPIProcesses;
  87 + std::string numOMPThreads;
  88 +
  89 + ss >> coreListing
  90 + >> gemmKernel
  91 + >> numMPIProcesses
  92 + >> numOMPThreads;
  93 +
  94 + if (verbosity)
  95 + {
  96 + std::cout << coreListing << " "
  97 + << gemmKernel << " "
  98 + << numMPIProcesses << " "
  99 + << numOMPThreads << std::endl;
  100 + }
  101 +
  102 + if (coreListing.find('-') == std::string::npos)
  103 + {
  104 + /*
  105 + * Simplest case, just one core to bind.
  106 + */
  107 + if (coreListing.find(',') == std::string::npos)
  108 + {
  109 + coresList.push_back(coreListing);
  110 + numCoresBound++;
  111 + nCoresList.push_back(numCoresBound);
  112 + }
  113 + else
  114 + {
  115 + /*
  116 + * There are comma separated list of cores...
  117 + */
  118 + char* cstr = new char[coreListing.length() + 1];
  119 + strcpy(cstr, coreListing.c_str());
  120 + char* tok = strtok(cstr, ",");
  121 + while (tok != NULL)
  122 + {
  123 + coresList.push_back(tok);
  124 + numCoresBound++;
  125 + tok = strtok(NULL, ",");
  126 + }
  127 + delete []cstr;
  128 + nCoresList.push_back(numCoresBound);
  129 + }
  130 + }
  131 + else
  132 + {
  133 + /*
  134 + * Just one range token...
  135 + */
  136 + if (coreListing.find(',') == std::string::npos)
  137 + {
  138 + std::vector<unsigned int> coreRange;
  139 + char* cstr = new char[coreListing.length() + 1];
  140 + strcpy(cstr, coreListing.c_str());
  141 + char* tok = strtok(cstr, "-");
  142 + while (tok != NULL)
  143 + {
  144 + coreRange.push_back(atoi(tok));
  145 + tok = strtok(NULL, ",");
  146 + }
  147 + delete []cstr;
  148 +
  149 + /*
  150 + * We expect just two elements in core range...
  151 + */
  152 + unsigned int start = coreRange[0];
  153 + unsigned int end = coreRange[1];
  154 +
  155 + /*
  156 + * The MPI processes divide the cores equally amongst them...
  157 + */
  158 + for (size_t e = 0; e < numMPIProcesses; e++)
  159 + {
  160 + numCoresBound += (end - start + 1) / numMPIProcesses;
  161 + nCoresList.push_back(numCoresBound);
  162 + }
  163 +
  164 + for (size_t e = start; e <= end; e++)
  165 + {
  166 + coresList.push_back(std::to_string(e));
  167 + }
  168 + }
  169 + else
  170 + {
  171 + /*
  172 + * A mix of - and ,
  173 + */
  174 + char* cstr1 = (char*)coreListing.c_str();
  175 + char* saveptr1, *saveptr2;
  176 +
  177 + char* tok = strtok_r(cstr1, ",", &saveptr1);
  178 + while (tok != NULL)
  179 + {
  180 + std::cout << tok << std::endl;
  181 + char* tok2 = strtok_r(tok, "-", &saveptr2);
  182 + std::vector<unsigned int> coreRange;
  183 + while (tok2 != NULL)
  184 + {
  185 + std::cout << tok2 << std::endl;
  186 + coreRange.push_back(atoi(tok2));
  187 + tok2 = strtok_r(NULL, "-", &saveptr2);
  188 + }
  189 +
  190 + /*
  191 + * We expect just two elements in core range...
  192 + */
  193 + unsigned int start = coreRange[0];
  194 + unsigned int end = coreRange[1];
  195 +
  196 + /*
  197 + * The MPI processes divide the cores equally amongst them...
  198 + */
  199 + for (size_t e = 0; e < numMPIProcesses; e++)
  200 + {
  201 + numCoresBound += (end - start + 1) / numMPIProcesses;
  202 + nCoresList.push_back(numCoresBound);
  203 + }
  204 +
  205 + for (size_t e = start; e <= end; e++)
  206 + {
  207 + coresList.push_back(std::to_string(e));
  208 + }
  209 +
  210 + tok = strtok_r(NULL, ",", &saveptr1);
  211 + }
  212 + }
  213 + }
  214 +
  215 + if (gemmKernel.find("CPU,GPU") != std::string::npos)
  216 + {
  217 + powerPlatforms.push_back("GPULITE");
  218 + }
  219 +
  220 + if (gemmKernel.find("CPU,PHI") != std::string::npos)
  221 + {
  222 + powerPlatforms.push_back("PHILITE");
  223 + }
  224 +
  225 + if (gemmKernel.find("CPU,FPGA") != std::string::npos)
  226 + {
  227 + powerPlatforms.push_back("FPGA");
  228 + }
  229 +
  230 + for (size_t p = 0; p < numMPIProcesses; p++)
  231 + {
  232 + std::stringstream ssOut;
  233 +
  234 + if (gemmKernel.find("CPU") != std::string::npos)
  235 + {
  236 + ssOut << "cpuinit, cpudgemm, cpudestroy";
  237 + }
  238 +
  239 + if (gemmKernel.find("GPU") != std::string::npos)
  240 + {
  241 + ssOut << "gpuinit, gpudgemm, gpudestroy";
  242 + }
  243 +
  244 + if (gemmKernel.find("PHI") != std::string::npos)
  245 + {
  246 + ssOut << "phiinit, phidgemm, phidestroy";
  247 + }
  248 +
  249 + if (gemmKernel.find("FPGA") != std::string::npos)
  250 + {
  251 + ssOut << "fpgainit, fpgadgemm, fpgadestroy";
  252 + }
  253 +
  254 + if (numOMPThreads.find('-') != std::string::npos)
  255 + {
  256 + ssOut << ", 0";
  257 + }
  258 + else
  259 + {
  260 + ssOut << ", " << numOMPThreads;
  261 + }
  262 +
  263 + abstractDevicesTable.push_back(ssOut.str());
  264 + }
  265 + }
  266 +
  267 + if (numCoresBound > numLogicalCPUs)
  268 + {
  269 + std::cerr << "Number of cores bound "
  270 + << numCoresBound << " exceeded "
  271 + << "the allowed number of logical cores."
  272 + << std::endl;
  273 + exit(EXIT_FAILURE);
  274 + }
  275 +
  276 + if (numCoresBound > numPhysicalCPUs)
  277 + {
  278 + std::cerr << "Warning: Number of cores bound exceeded "
  279 + << "the allowed number of physical cores."
  280 + << std::endl;
  281 + }
  282 +
  283 + /*
  284 + * Spit out the abstract devices table...
  285 + */
  286 + std::ofstream abstractDevicesOFile;
  287 + abstractDevicesOFile.open("absdevs.c");
  288 +
  289 + abstractDevicesOFile << "\n/*----------------------------------"
  290 + << "------------------------------------------*/\n"
  291 + << std::endl;
  292 + abstractDevicesOFile << "#include \"absdevs.h\"" << std::endl;
  293 + abstractDevicesOFile << "\n/*----------------------------------"
  294 + << "------------------------------------------*/\n"
  295 + << std::endl;
  296 +
  297 + /*
  298 + * Core bindings here...
  299 + */
  300 + abstractDevicesOFile << "const unsigned int hcl_coreindex[] = {"
  301 + << std::endl;
  302 + size_t n = nCoresList.size();
  303 + for (size_t e = 0; e < n; e++)
  304 + {
  305 + if (e == (n-1))
  306 + {
  307 + abstractDevicesOFile << nCoresList[e];
  308 + }
  309 + else
  310 + {
  311 + abstractDevicesOFile << nCoresList[e] << ",";
  312 + }
  313 + }
  314 + abstractDevicesOFile << "\n};" << std::endl;
  315 +
  316 + abstractDevicesOFile << "const unsigned int hcl_corebindings[] = {"
  317 + << std::endl;
  318 + n = coresList.size();
  319 + for (size_t e = 0; e < n; e++)
  320 + {
  321 + if (e == (n-1))
  322 + {
  323 + abstractDevicesOFile << coresList[e];
  324 + }
  325 + else
  326 + {
  327 + abstractDevicesOFile << coresList[e] << ",";
  328 + }
  329 + }
  330 + abstractDevicesOFile << "\n};" << std::endl;
  331 +
  332 + abstractDevicesOFile << "\n/*----------------------------------"
  333 + << "------------------------------------------*/\n"
  334 + << std::endl;
  335 + abstractDevicesOFile.close();
  336 +
  337 + std::ofstream abstractDevicesPowersOFile;
  338 + abstractDevicesPowersOFile.open("absdevpowers.c");
  339 +
  340 + /*
  341 + * Compute Platforms here...
  342 + */
  343 + abstractDevicesPowersOFile << "\n/*----------------------------------"
  344 + << "------------------------------------------*/\n"
  345 + << std::endl;
  346 +
  347 + abstractDevicesPowersOFile << "const char* hcl_powerplatforms[] = {"
  348 + << std::endl;
  349 + n = powerPlatforms.size();
  350 + for (size_t e = 0; e < n; e++)
  351 + {
  352 + if (e == (n-1))
  353 + {
  354 + abstractDevicesPowersOFile << "\"" << powerPlatforms[e] << "\"";
  355 + }
  356 + else
  357 + {
  358 + abstractDevicesPowersOFile << "\"" << powerPlatforms[e] << "\",";
  359 + }
  360 + }
  361 + abstractDevicesPowersOFile << "\n};" << std::endl;
  362 +
  363 + abstractDevicesPowersOFile << "\n/*----------------------------------"
  364 + << "------------------------------------------*/\n"
  365 + << std::endl;
  366 +
  367 + abstractDevicesPowersOFile.close();
  368 +
  369 + std::cout << "Abstract devices file successfully parsed" << std::endl;
  370 +
  371 + exit(EXIT_SUCCESS);
  372 +}
  373 +
  374 +/*-----------------------------------------------------------*/
  375 +
compile.sh 0 → 100755
  1 +#!/bin/bash
  2 +
  3 +############################################################
  4 +
  5 +DGEMVBASEDIR=`pwd`
  6 +HDPIBASEDIR=${DGEMVBASEDIR}/hdpi
  7 +
  8 +############################################################
  9 +# HDPI Library
  10 +############################################################
  11 +
  12 +echo "Building HDPI library..."
  13 +
  14 +(cd hdpi \
  15 + && mkdir -p build \
  16 + && cd build \
  17 + && cmake -DCMAKE_INSTALL_PREFIX=${HDPIBASEDIR}/installation_dir .. \
  18 + && make \
  19 + && make install)
  20 +
  21 +############################################################
  22 +
  23 +echo "Setting MKL variables..."
  24 +source /opt/intel/mkl/bin/mklvars.sh intel64
  25 +
  26 +echo "Compiling heterogeneous dgemm..."
  27 +make clean all
  28 +
  29 +############################################################
  30 +
  31 +exit 0
  32 +
  33 +############################################################
cputopology.cpp 0 → 100755
  1 +
  2 +/*--------------------------------------------------------*/
  3 +
  4 +/*
  5 +@file
  6 +@author Ravi Reddy Manumachu <ravi.manumachu@ucd.ie>
  7 +@version 1.0
  8 +*/
  9 +
  10 +/*-----------------------------------------------------------*/
  11 +
  12 +#include <cstdio>
  13 +#include <cstdlib>
  14 +#include <iostream>
  15 +#include <cstring>
  16 +#include <algorithm>
  17 +#include <sstream>
  18 +#include <string>
  19 +#include <climits>
  20 +#include <vector>
  21 +
  22 +/*-----------------------------------------------------------*/
  23 +
  24 +namespace hcl {
  25 +
  26 +namespace topology {
  27 +
  28 +/*-----------------------------------------------------------*/
  29 +
  30 +/*
  31 + * One way to do this is to store the lines in a vector.
  32 + * Parse top-to-bottom and bottom-to-top to get the siblings.
  33 + */
  34 +int
  35 +getSibling(
  36 + const unsigned int cpu,
  37 + std::vector<unsigned int>& siblings
  38 +)
  39 +{
  40 + FILE* commandFp = popen("lscpu -p", "r");
  41 + if (commandFp == NULL)
  42 + {
  43 + std::cerr << "Error from execution of lscpu -p."
  44 + << std::endl;
  45 + return -1;
  46 + }
  47 +
  48 + char* line = NULL;
  49 + size_t len = 0;
  50 + std::vector<std::string> lscpuLines;
  51 +
  52 + while (getline(&line, &len, commandFp) != -1)
  53 + {
  54 + if (line[0] == '#')
  55 + {
  56 + continue;
  57 + }
  58 +
  59 + lscpuLines.push_back(line);
  60 + }
  61 +
  62 + free(line);
  63 +
  64 + int status = pclose(commandFp);
  65 + if (status == -1)
  66 + {
  67 + std::cerr << "Error from execution of lscpu -p."
  68 + << std::endl;
  69 + return -1;
  70 + }
  71 +
  72 + bool siblingsFound = false;
  73 + bool myCoreFound = false;
  74 + unsigned int myCore = UINT_MAX;
  75 +
  76 + for (unsigned int cpuLine = 0;
  77 + cpuLine < lscpuLines.size();
  78 + cpuLine++)
  79 + {
  80 + std::stringstream cpuLineStream(lscpuLines[cpuLine]);
  81 + std::string token;
  82 + std::vector<std::string> tokens;
  83 + while (std::getline(
  84 + cpuLineStream, token, ','
  85 + )
  86 + )
  87 + {
  88 + tokens.push_back(token);
  89 + }
  90 +
  91 + unsigned int cpuParsed = atoi(tokens[0].c_str());
  92 +
  93 + if (cpu == cpuParsed)
  94 + {
  95 + myCore = atoi(tokens[1].c_str());
  96 + myCoreFound = true;
  97 + }
  98 + else
  99 + {
  100 + unsigned int core = atoi(tokens[1].c_str());
  101 +
  102 + if (core == myCore)
  103 + {
  104 + siblings.push_back(
  105 + atoi(tokens[0].c_str())
  106 + );
  107 + siblingsFound = true;
  108 + }
  109 + }
  110 + }
  111 +
  112 + if (myCoreFound == false)
  113 + {
  114 + std::cerr << "Failed to find siblings." << std::endl;
  115 + return -1;
  116 + }
  117 +
  118 + /*
  119 + * If siblings are not found, then
  120 + * it is possible that we have to traverse in the
  121 + * reverse order to build the siblings.
  122 + */
  123 + if (siblingsFound)
  124 + {
  125 + return 0;
  126 + }
  127 +
  128 + std::vector<std::string>::reverse_iterator rit =
  129 + lscpuLines.rbegin();
  130 +
  131 + for (; rit != lscpuLines.rend(); ++rit)
  132 + {
  133 + std::stringstream cpuLineStream(*rit);
  134 + std::string token;
  135 + std::vector<std::string> tokens;
  136 + while (std::getline(
  137 + cpuLineStream, token, ','
  138 + )
  139 + )
  140 + {
  141 + tokens.push_back(token);
  142 + }
  143 +
  144 + unsigned int cpuParsed = atoi(tokens[0].c_str());
  145 +
  146 + if (cpu == cpuParsed)
  147 + {
  148 + myCore = atoi(tokens[1].c_str());
  149 + }
  150 + else
  151 + {
  152 + unsigned int core = atoi(tokens[1].c_str());
  153 +
  154 + if (core == myCore)
  155 + {
  156 + siblings.push_back(
  157 + atoi(tokens[0].c_str())
  158 + );
  159 + }
  160 + }
  161 + }
  162 +
  163 + std::reverse(
  164 + siblings.begin(), siblings.end());
  165 +
  166 + return 0;
  167 +}
  168 +
  169 +/*-----------------------------------------------------------*/
  170 +
  171 +int
  172 +getNumCpuNumaNodes(
  173 + unsigned int* numNUMAs
  174 +)
  175 +{
  176 + FILE* commandFp = popen("lscpu -p", "r");
  177 + if (commandFp == NULL)
  178 + {
  179 + std::cerr << "Error from execution of lscpu -p."
  180 + << std::endl;
  181 + return -1;
  182 + }
  183 +
  184 + char* line = NULL;
  185 + size_t len = 0;
  186 +
  187 + *numNUMAs = 0;
  188 +
  189 + while (getline(&line, &len, commandFp) != -1)
  190 + {
  191 + if (line[0] == '#')
  192 + {
  193 + continue;
  194 + }
  195 +
  196 + char* token = strtok(line, ",");
  197 +
  198 + /*
  199 + * The core...
  200 + */
  201 + if (token != NULL)
  202 + {
  203 + token = strtok(NULL, ",");
  204 + }
  205 +
  206 + /*
  207 + * The socket
  208 + */
  209 + if (token != NULL)
  210 + {
  211 + token = strtok(NULL, ",");
  212 + unsigned int numaNodeParsed = atoi(token);
  213 + if (numaNodeParsed > *numNUMAs)
  214 + {
  215 + *numNUMAs = numaNodeParsed;
  216 + }
  217 + }
  218 + }
  219 +
  220 + free(line);
  221 +
  222 + int status = pclose(commandFp);
  223 + if (status == -1)
  224 + {
  225 + std::cerr << "Error from execution of lscpu -p."
  226 + << std::endl;
  227 + return -1;
  228 + }
  229 +
  230 + *numNUMAs = *numNUMAs + 1;
  231 +
  232 + return 0;
  233 +}
  234 +
  235 +/*-----------------------------------------------------------*/
  236 +
  237 +int
  238 +getCpuNumaNode(
  239 + const unsigned int cpu,
  240 + unsigned int* numaNode
  241 +)
  242 +{
  243 + FILE* commandFp = popen("lscpu -p", "r");
  244 + if (commandFp == NULL)
  245 + {
  246 + std::cerr << "Error from execution of lscpu -p."
  247 + << std::endl;
  248 + return -1;
  249 + }
  250 +
  251 + char* line = NULL;
  252 + size_t len = 0;
  253 +
  254 + while (getline(&line, &len, commandFp) != -1)
  255 + {
  256 + if (line[0] == '#')
  257 + {
  258 + continue;
  259 + }
  260 +
  261 + char* token = strtok(line, ",");
  262 + unsigned int cpuParsed = atoi(token);
  263 +
  264 + if (cpuParsed == cpu)
  265 + {
  266 + ;
  267 + }
  268 + else
  269 + {
  270 + continue;
  271 + }
  272 +
  273 + /*
  274 + * The core...
  275 + */
  276 + if (token != NULL)
  277 + {
  278 + token = strtok(NULL, ",");
  279 + }
  280 +
  281 + /*
  282 + * The socket
  283 + */
  284 + if (token != NULL)
  285 + {
  286 + token = strtok(NULL, ",");
  287 + *numaNode = atoi(token);
  288 + break;
  289 + }
  290 + }
  291 +
  292 + free(line);
  293 +
  294 + int status = pclose(commandFp);
  295 + if (status == -1)
  296 + {
  297 + std::cerr << "Error from execution of lscpu -p."
  298 + << std::endl;
  299 + return -1;
  300 + }
  301 +
  302 + return 0;
  303 +}
  304 +
  305 +/*-----------------------------------------------------------*/
  306 +
  307 +int
  308 +getNumThreadsPerCore(
  309 + unsigned int* numTPerCore
  310 +)
  311 +{
  312 + FILE* commandFp = popen("lscpu -p", "r");
  313 + if (commandFp == NULL)
  314 + {
  315 + std::cerr << "Error from execution of lscpu -p."
  316 + << std::endl;
  317 + return -1;
  318 + }
  319 +
  320 + char* line = NULL;
  321 + size_t len = 0;
  322 +
  323 + *numTPerCore = 0;
  324 +
  325 + while (getline(&line, &len, commandFp) != -1)
  326 + {
  327 + if (line[0] == '#')
  328 + {
  329 + continue;
  330 + }
  331 +
  332 + char* token = strtok(line, ",");
  333 +
  334 + /*
  335 + * The core...
  336 + */
  337 + if (token != NULL)
  338 + {
  339 + token = strtok(NULL, ",");
  340 + unsigned int coreParsed = atoi(token);
  341 +
  342 + /*
  343 + * We will just count for core 0...
  344 + */
  345 + if (coreParsed == 0)
  346 + {
  347 + *numTPerCore = *numTPerCore + 1;
  348 + }
  349 + }
  350 + }
  351 +
  352 + free(line);
  353 +
  354 + int status = pclose(commandFp);
  355 + if (status == -1)
  356 + {
  357 + std::cerr << "Error from execution of lscpu -p."
  358 + << std::endl;
  359 + return -1;
  360 + }
  361 +
  362 + return 0;
  363 +}
  364 +
  365 +/*-------------------------------------------------------------------------*/
  366 +
  367 +int
  368 +getNumLogicalCpus(
  369 + unsigned int* numCPUs
  370 +)
  371 +{
  372 + FILE* commandFp = popen("lscpu -p", "r");
  373 + if (commandFp == NULL)
  374 + {
  375 + std::cerr << "Error from execution of lscpu -p."
  376 + << std::endl;
  377 + return -1;
  378 + }
  379 +
  380 + char* line = NULL;
  381 + size_t len = 0;
  382 +
  383 + *numCPUs = 0;
  384 +
  385 + while (getline(&line, &len, commandFp) != -1)
  386 + {
  387 + if (line[0] == '#')
  388 + {
  389 + continue;
  390 + }
  391 +
  392 + *numCPUs = *numCPUs + 1;
  393 + }
  394 +
  395 + free(line);
  396 +
  397 + int status = pclose(commandFp);
  398 + if (status == -1)
  399 + {
  400 + std::cerr << "Error from execution of lscpu -p."
  401 + << std::endl;
  402 + return -1;
  403 + }
  404 +
  405 + return 0;
  406 +}
  407 +
  408 +/*-------------------------------------------------------------------------*/
  409 +
  410 +int
  411 +getNumPhysicalCpus(
  412 + unsigned int* numPhysicalCPUs
  413 +)
  414 +{
  415 + unsigned int numLogicalCpus;
  416 +
  417 + int rc = getNumLogicalCpus(
  418 + &numLogicalCpus
  419 + );
  420 +
  421 + if (rc != 0)
  422 + {
  423 + return rc;
  424 + }
  425 +
  426 + unsigned int threadsPerCore;
  427 +
  428 + rc = getNumThreadsPerCore(&threadsPerCore);
  429 +
  430 + if (rc != 0)
  431 + {
  432 + return rc;
  433 + }
  434 +
  435 + *numPhysicalCPUs = numLogicalCpus / threadsPerCore;
  436 +
  437 + return 0;
  438 +}
  439 +
  440 +/*-------------------------------------------------------------------------*/
  441 +
  442 +int
  443 +getNumCpus(
  444 + const unsigned int numaNode,
  445 + unsigned int* numCPUs
  446 +)
  447 +{
  448 + FILE* commandFp = popen("lscpu -p", "r");
  449 + if (commandFp == NULL)
  450 + {
  451 + std::cerr << "Error from execution of lscpu -p."
  452 + << std::endl;
  453 + return -1;
  454 + }
  455 +
  456 + char* line = NULL;
  457 + size_t len = 0;
  458 +
  459 + *numCPUs = 0;
  460 +
  461 + while (getline(&line, &len, commandFp) != -1)
  462 + {
  463 + if (line[0] == '#')
  464 + {
  465 + continue;
  466 + }
  467 +
  468 + char* token = strtok(line, ",");
  469 +
  470 + /*
  471 + * The core...
  472 + */
  473 + if (token != NULL)
  474 + {
  475 + token = strtok(NULL, ",");
  476 + }
  477 +
  478 + /*
  479 + * The socket
  480 + */
  481 + if (token != NULL)
  482 + {
  483 + token = strtok(NULL, ",");
  484 + unsigned int numaNodeParsed = atoi(token);
  485 +
  486 + if (numaNodeParsed == numaNode)
  487 + {
  488 + *numCPUs = *numCPUs + 1;
  489 + }
  490 + }
  491 + }
  492 +
  493 + free(line);
  494 +
  495 + int status = pclose(commandFp);
  496 + if (status == -1)
  497 + {
  498 + std::cerr << "Error from execution of lscpu -p."
  499 + << std::endl;
  500 + return -1;
  501 + }
  502 +
  503 + return 0;
  504 +}
  505 +
  506 +/*-------------------------------------------------------------------------*/
  507 +
  508 +int
  509 +getCpus(
  510 + const unsigned int numaNode,
  511 + std::vector<unsigned int>& cpus
  512 +)
  513 +{
  514 + FILE* commandFp = popen("lscpu -p", "r");
  515 + if (commandFp == NULL)
  516 + {
  517 + std::cerr << "Error from execution of lscpu -p."
  518 + << std::endl;
  519 + return -1;
  520 + }
  521 +
  522 + char* line = NULL;
  523 + size_t len = 0;
  524 +
  525 + while (getline(&line, &len, commandFp) != -1)
  526 + {
  527 + if (line[0] == '#')
  528 + {
  529 + continue;
  530 + }
  531 +
  532 + char* token = strtok(line, ",");
  533 + unsigned int cpuParsed = atoi(token);
  534 +
  535 + /*
  536 + * The core...
  537 + */
  538 + if (token != NULL)
  539 + {
  540 + token = strtok(NULL, ",");
  541 + }
  542 +
  543 + /*
  544 + * The socket
  545 + */
  546 + if (token != NULL)
  547 + {
  548 + token = strtok(NULL, ",");
  549 + unsigned int numaNodeParsed = atoi(token);
  550 +
  551 + if (numaNodeParsed == numaNode)
  552 + {
  553 + cpus.push_back(cpuParsed);
  554 + }
  555 + }
  556 + }
  557 +
  558 + free(line);
  559 +
  560 + int status = pclose(commandFp);
  561 + if (status == -1)
  562 + {
  563 + std::cerr << "Error from execution of lscpu -p."
  564 + << std::endl;
  565 + return -1;
  566 + }
  567 +
  568 + return 0;
  569 +}
  570 +
  571 +/*-----------------------------------------------------------*/
  572 +
  573 +}
  574 +
  575 +}
  576 +
  577 +/*-----------------------------------------------------------*/
  578 +
cputopology.hpp 0 → 100755
  1 +/*--------------------------------------------------------*/
  2 +
  3 +/*
  4 +@file
  5 +@author Ravi Reddy Manumachu <ravi.manumachu@ucd.ie>
  6 +@version 1.0
  7 +*/
  8 +
  9 +/*-----------------------------------------------------------*/
  10 +
  11 +#ifndef _HCL_CPUTOPOLOGY_HPP_
  12 +#define _HCL_CPUTOPOLOGY_HPP_
  13 +
  14 +/*-----------------------------------------------------------*/
  15 +
  16 +namespace hcl {
  17 +
  18 +namespace topology {
  19 +
  20 +/*-----------------------------------------------------------*/
  21 +
  22 +/**
  23 + * Returns the sibling for a CPU.
  24 + *
  25 + * @param sibling Sibling for a CPU.
  26 + *
  27 + * @return HCL_SUCCESS if the query is successful.
  28 + */
  29 +int
  30 +getSibling(
  31 + const unsigned int cpu,
  32 + std::vector<unsigned int>& siblings
  33 +);
  34 +
  35 +/*-----------------------------------------------------------*/
  36 +
  37 +/**
  38 + * Returns the total number of threads per core.
  39 + *
  40 + * @param numTPerCore The number of threads per core.
  41 + *
  42 + * @return HCL_SUCCESS if the query is successful.
  43 + */
  44 +int
  45 +getNumThreadsPerCore(
  46 + unsigned int* numTPerCore
  47 +);
  48 +
  49 +/*-----------------------------------------------------------*/
  50 +
  51 +/**
  52 + * Returns the total number of logical cores.
  53 + *
  54 + * @param numLogicalCPUs The number of logical CPUs.
  55 + *
  56 + * @return HCL_SUCCESS if the query is successful.
  57 + */
  58 +int
  59 +getNumLogicalCpus(
  60 + unsigned int* numLogicalCPUs
  61 +);
  62 +
  63 +/*-----------------------------------------------------------*/
  64 +
  65 +/**
  66 + * Returns the total number of physical cores.
  67 + *
  68 + * @param numPhysicalCPUs The number of physical CPUs.
  69 + *
  70 + * @return HCL_SUCCESS if the query is successful.
  71 + */
  72 +int
  73 +getNumPhysicalCpus(
  74 + unsigned int* numPhysicalCPUs
  75 +);
  76 +
  77 +/*-----------------------------------------------------------*/
  78 +
  79 +/**
  80 + * Returns the number of CPUs in a NUMA node.
  81 + *
  82 + * @param numaNode The NUMA node identifier.
  83 + * @param numCPUs The number of CPUs.
  84 + *
  85 + * @return HCL_SUCCESS if the query is successful.
  86 + */
  87 +int
  88 +getNumCpus(
  89 + const unsigned int numaNode,
  90 + unsigned int* numCPUs
  91 +);
  92 +
  93 +/*-----------------------------------------------------------*/
  94 +
  95 +/**
  96 + * Returns the CPUs in a NUMA node.
  97 + *
  98 + * @param numaNode The NUMA node identifier.
  99 + * @param cpus The number of CPUs.
  100 + *
  101 + * @return HCL_SUCCESS if the query is successful.
  102 + */
  103 +int
  104 +getCpus(
  105 + const unsigned int numaNode,
  106 + std::vector<unsigned int>& cpus
  107 +);
  108 +
  109 +/*-----------------------------------------------------------*/
  110 +
  111 +/**
  112 + * Returns the number of CPU NUMA nodes.
  113 + *
  114 + * @param numNUMAs The number of CPU NUMA nodes.
  115 + *
  116 + * @return HCL_SUCCESS if the query is successful.
  117 + */
  118 +int
  119 +getNumCpuNumaNodes(
  120 + unsigned int* numNUMAs
  121 +);
  122 +
  123 +/*-----------------------------------------------------------*/
  124 +
  125 +/**
  126 + * Returns the CPU NUMA node that has the cpu.
  127 + *
  128 + * @param cpu The cpu.
  129 + * @param numaNode The numa node containing the cpu returned.
  130 + *
  131 + * @return HCL_SUCCESS if the query is successful.
  132 + */
  133 +int
  134 +getCpuNumaNode(
  135 + const unsigned int cpu,
  136 + unsigned int* numaNode
  137 +);
  138 +
  139 +/*-----------------------------------------------------------*/
  140 +
  141 +}
  142 +
  143 +}
  144 +
  145 +/*-----------------------------------------------------------*/
  146 +
  147 +#endif /*_HCL_CPUTOPOLOGY_HPP_ */
  148 +
  149 +/*-------------------------------------------------------------------------*/
  150 +
hdpi/CMakeLists.txt 0 → 100644
  1 +
  2 +#-----------------------------------------------------------#
  3 +
  4 +cmake_minimum_required(VERSION 2.8 FATAL_ERROR)
  5 +
  6 +#-----------------------------------------------------------#
  7 +
  8 +project(HDPI)
  9 +
  10 +#-----------------------------------------------------------#
  11 +
  12 +set (HEADERS hdpi.h
  13 + hmpi_err.h
  14 + hmpi_partitioning.h
  15 + hmpi_partitioning_internal.h
  16 + hmpi_partitioning_matrices.h
  17 + hmpi_partitioning_sets.h
  18 + hmpi_partitioning_graphs.h
  19 + hmpi_partitioning_trees.h
  20 + hmpi_partitioning_types.h)
  21 +
  22 +#-----------------------------------------------------------#
  23 +
  24 +set (SOURCES hmpi_partitioning_sets.c
  25 + hmpi_partitioning_sets_speed_function_of_problem_size.c
  26 + hmpi_partitioning_sets_speed_function_of_problem_size_weighted_elements.c
  27 + hmpi_partitioning_sets_speed_single_numbers_weighted_elements.c
  28 + hmpi_partitioning_graphs.c
  29 + hmpi_partitioning_matrices.c)
  30 +
  31 +#-----------------------------------------------------------#
  32 +
  33 +include_directories (${PROJECT_SOURCE_DIR})
  34 +
  35 +#-----------------------------------------------------------#
  36 +
  37 +add_library(hdpi ${HEADERS} ${SOURCES})
  38 +
  39 +#-----------------------------------------------------------#
  40 +
  41 +install(TARGETS hdpi DESTINATION lib)
  42 +install(FILES ${HEADERS} DESTINATION include)
  43 +
  44 +#-----------------------------------------------------------#
hdpi/hdpi.h 0 → 100644
  1 +
  2 + /**************************************************/
  3 + /* HeteroDPI - Heterogeneous Data */
  4 + /* Partitioning Interface */
  5 + /* */
  6 + /* Revision history */
  7 + /* 19-05-2003 -- Initial version */
  8 + /**************************************************/
  9 +
  10 + #ifndef __HDPI_HH
  11 + #define __HDPI_HH
  12 +
  13 + #ifdef __cplusplus
  14 + extern "C" {
  15 + #endif
  16 +
  17 +
  18 + #include "hmpi_err.h"
  19 + #include "hmpi_partitioning.h"
  20 +
  21 + /*
  22 + * sets
  23 + */
  24 + #define Partition_unordered_set HMPI_Partition_unordered_set
  25 + #define Partition_ordered_set HMPI_Partition_ordered_set
  26 + #define Get_set_processor HMPI_Get_set_processor
  27 + #define Get_my_partition HMPI_Get_my_partition
  28 +
  29 + /*
  30 + * matrices
  31 + */
  32 + #define Partition_matrix_2d HMPI_Partition_matrix_2d
  33 + #define Partition_matrix_1d_dp HMPI_Partition_matrix_1d_dp
  34 + #define Partition_matrix_1d_iterative HMPI_Partition_matrix_1d_iterative
  35 + #define Partition_matrix_1d_refining HMPI_Partition_matrix_1d_refining
  36 + #define Get_matrix_processor HMPI_Get_matrix_processor
  37 + #define Get_processor_2d HMPI_Get_processor_2d
  38 + #define Get_processor_1d HMPI_Get_processor_1d
  39 + #define Print_rectangle_1d HMPI_Print_rectangle_1d
  40 + #define Print_rectangle_2d HMPI_Print_rectangle_2d
  41 + #define Common_height HMPI_Common_height
  42 + #define Get_my_width HMPI_Get_my_width
  43 + #define Get_my_height HMPI_Get_my_height
  44 + #define Get_diagonal HMPI_Get_diagonal
  45 + #define Get_my_elements HMPI_Get_my_elements
  46 + #define Get_my_kk_elements HMPI_Get_my_kk_elements
  47 +
  48 + /*
  49 + * graphs
  50 + */
  51 + #define Partition_graph HMPI_Partition_graph
  52 + #define Partition_bipartite_graph HMPI_Partition_bipartite_graph
  53 + #define Partition_hypergraph HMPI_Partition_hypergraph
  54 +
  55 + /*
  56 + * trees
  57 + */
  58 + #define Partition_tree HMPI_Partition_tree
  59 +
  60 + #ifdef __cplusplus
  61 + }
  62 + #endif
  63 +
  64 + #endif /* __HDPI_HH */
hdpi/hmpi_err.h 0 → 100644
  1 +
  2 +/*************************************************************************
  3 +* *
  4 +* HeteroMPI Programming Environment *
  5 +* ================================= *
  6 +* *
  7 +* Copyright (c) 2005 Department of Computer Science, *
  8 +* University College Dublin. *
  9 +* *
  10 +* All rights reserved. We assume no responsibility for the use *
  11 +* or reliability of our software. *
  12 +* *
  13 +*************************************************************************/
  14 +
  15 + /************************************************/
  16 + /* Error codes for the HeteroMPI Library */
  17 + /* */
  18 + /* Revision history */
  19 + /* 01-02-2002 -- Initial version */
  20 + /************************************************/
  21 +
  22 + #ifndef __HMPI_ERR_HH
  23 + #define __HMPI_ERR_HH
  24 +
  25 + #define MPC_OK 0
  26 + #define MPC_ERR_NOMEM 17
  27 + #define MPC_ERR_LAST 36
  28 +
  29 + /*
  30 + * HMPI success and error codes
  31 + */
  32 + #define HMPI_OK MPC_OK
  33 + #define HMPI_SUCCESS MPC_OK
  34 + #define HMPI_ERR_NOMEM MPC_ERR_NOMEM
  35 + #define HMPI_NOT_MEMBER (MPC_ERR_LAST + 1)
  36 + #define HMPI_NULL_GROUP (MPC_ERR_LAST + 2)
  37 + #define HMPI_ERR_GROUP_NOT_EXIST (MPC_ERR_LAST + 3)
  38 + #define HMPI_INVALID_GROUP (MPC_ERR_LAST + 4)
  39 + #define HMPI_INVALID_PARAMS (MPC_ERR_LAST + 5)
  40 + #define HMPI_INVALID_OPERAND_TYPE (MPC_ERR_LAST + 6)
  41 + #define HMPI_INVALID_OPERATOR (MPC_ERR_LAST + 7)
  42 + #define HMPI_ERROR_CONDITION (MPC_ERR_LAST + 8)
  43 + #define HMPI_NOT_HOST (MPC_ERR_LAST + 9)
  44 + #define HMPI_NOT_HOST_AND_NOT_FREE (MPC_ERR_LAST + 10)
  45 + #define HMPI_ERR_INTERNAL (MPC_ERR_LAST + 11)
  46 + #define HMPI_ERR_PARTITION_SET (MPC_ERR_LAST + 12)
  47 + #define HMPI_ERR_MLIMITS (MPC_ERR_LAST + 13)
  48 + #define HMPI_ERR_INVALID_DIMP (MPC_ERR_LAST + 14)
  49 + #define HMPI_ERR_PARTITION_MATRIX (MPC_ERR_LAST + 15)
  50 + #define HMPI_ERR_PARTITION_NOT_EXISTS (MPC_ERR_LAST + 16)
  51 + #define HMPI_ERR_PARTITION_GRAPH (MPC_ERR_LAST + 17)
  52 + #define HMPI_ERR_PARTITION_TREE (MPC_ERR_LAST + 19)
  53 + #define HMPI_ERR_METRIC (MPC_ERR_LAST + 20)
  54 + #define HMPI_LAST_ERROR (MPC_ERR_LAST + 21)
  55 +
  56 + #define HMPI_UNDEFINED -1
  57 +
  58 + #endif /* __HMPI_ERR_HH */
hdpi/hmpi_partitioning.h 0 → 100644
  1 +
  2 +/*************************************************************************
  3 +* *
  4 +* Heterogeneous Data Partitioning Interface *
  5 +* ========================================= *
  6 +* *
  7 +* Copyright (c) 2002 Department of Computer Science, *
  8 +* University College Dublin. *
  9 +* *
  10 +* All rights reserved. We assume no responsibility for the use *
  11 +* or reliability of our software. *
  12 +* *
  13 +*************************************************************************/
  14 +
  15 + /************************************************/
  16 + /* hmpi_partitioning - Partitioning interfaces */
  17 + /* for the HMPI Library */
  18 + /* */
  19 + /* Revision history */
  20 + /* 19-05-2003 -- Initial version */
  21 + /************************************************/
  22 +
  23 + #ifndef __HMPI_PARTITIONING_HH
  24 + #define __HMPI_PARTITIONING_HH
  25 +
  26 + #include "hmpi_partitioning_types.h"
  27 + #include "hmpi_partitioning_sets.h"
  28 + #include "hmpi_partitioning_internal.h"
  29 + #include "hmpi_partitioning_matrices.h"
  30 + #include "hmpi_partitioning_graphs.h"
  31 + #include "hmpi_partitioning_trees.h"
  32 +
  33 + #endif /* __HMPI_PARTITIONING_HH */
hdpi/hmpi_partitioning_graphs.c 0 → 100644
  1 +
  2 + /************************************************/
  3 + /* Partitioning interfaces for graphs */
  4 + /* */
  5 + /* Revision history */
  6 + /* 22-04-2004 -- Initial version */
  7 + /************************************************/
  8 +
  9 + #include <hdpi.h>
  10 +
  11 + #include <stdio.h>
  12 + #include <stdlib.h>
  13 + #include <string.h>
  14 +
  15 + /*-----------------------------------------------------*/
  16 +
  17 + int HMPI_Partition_graph(
  18 + int p,
  19 + int pn,
  20 + const double *speeds,
  21 + const int *psizes,
  22 + const int *mlimits,
  23 + int n,
  24 + int m,
  25 + const int *vwgt,
  26 + const int *xadj,
  27 + const int *adjacency,
  28 + const int *adjwgt,
  29 + int *vp,
  30 + int *edgecut
  31 + )
  32 + {
  33 + int i, j, rc;
  34 +
  35 + /*
  36 + * Use the partitioning interface for Set when
  37 + * edges have no weights.
  38 + */
  39 + if ((vwgt == NULL)
  40 + && (xadj == NULL)
  41 + && (adjacency == NULL)
  42 + && (adjwgt == NULL
  43 + )
  44 + )
  45 + {
  46 + rc = HMPI_Partition_set(
  47 + p,
  48 + pn,
  49 + speeds,
  50 + psizes,
  51 + mlimits,
  52 + n,
  53 + NULL,
  54 + 0,
  55 + 1,
  56 + -1,
  57 + NULL,
  58 + NULL,
  59 + vp
  60 + );
  61 +
  62 + if (rc != HMPI_OK)
  63 + {
  64 + return rc;
  65 + }
  66 +
  67 + *edgecut = 0;
  68 +
  69 + for (i = 0; i < n; i++)
  70 + {
  71 + int owner_processor = vp[i];
  72 +
  73 + for (j = xadj[i]; j < xadj[i+1]; j++)
  74 + {
  75 + int neighbor = adjacency[j];
  76 +
  77 + if (vp[neighbor] != owner_processor)
  78 + {
  79 + (*edgecut)++;
  80 + }
  81 + }
  82 + }
  83 +
  84 + return HMPI_OK;
  85 + }
  86 +
  87 + /*
  88 + * Partition the graph such that the edgecut is minimal
  89 + * Edgecut is the total number of edges that straddle
  90 + * partitions.
  91 + * There is no upper bound on the number of elements
  92 + * stored by each processor.
  93 + */
  94 + if ((speeds == NULL)
  95 + && (mlimits == NULL)
  96 + && (vwgt == NULL)
  97 + && (adjwgt == NULL
  98 + )
  99 + )
  100 + {
  101 + printf("Implementation currently not available\n");
  102 + return HMPI_OK;
  103 + }
  104 +
  105 + /*
  106 + * Partition the graph such that the edgecut is minimal
  107 + * Edgecut is the total number of edges that straddle
  108 + * partitions.
  109 + * There is an upper bound on the number of elements
  110 + * stored by each processor.
  111 + */
  112 + if ((speeds == NULL)
  113 + && (mlimits != NULL)
  114 + && (vwgt == NULL)
  115 + && (adjwgt == NULL
  116 + )
  117 + )
  118 + {
  119 + printf("Implementation currently not available\n");
  120 + return HMPI_OK;
  121 + }
  122 +
  123 + /*
  124 + * Partition the graph such that the edgecut is minimal
  125 + * Edgecut is the sum of the weight of the straddling edges.
  126 + * There is no upper bound on the number of elements
  127 + * stored by each processor.
  128 + */
  129 + if ((speeds == NULL)
  130 + && (mlimits == NULL)
  131 + && (vwgt == NULL)
  132 + && (adjwgt != NULL
  133 + )
  134 + )
  135 + {
  136 + printf("Implementation currently not available\n");
  137 + return HMPI_OK;
  138 + }
  139 +
  140 + /*
  141 + * Partition the graph such that the edgecut is minimal
  142 + * Edgecut is the sum of the weight of the straddling edges.
  143 + * There is an upper bound on the number of elements
  144 + * stored by each processor.
  145 + */
  146 + if ((speeds == NULL)
  147 + && (mlimits != NULL)
  148 + && (vwgt == NULL)
  149 + && (adjwgt != NULL
  150 + )
  151 + )
  152 + {
  153 + printf("Implementation currently not available\n");
  154 + return HMPI_OK;
  155 + }
  156 +
  157 + /*
  158 + * Partition the graph such that the edgecut is minimal
  159 + * Edgecut is the total number of edges that straddle
  160 + * partitions.
  161 + * Ideally all the partitions should be equally weighted.
  162 + * There is no upper bound on the number of elements
  163 + * stored by each processor.
  164 + */
  165 + if ((speeds == NULL)
  166 + && (mlimits == NULL)
  167 + && (vwgt != NULL)
  168 + && (adjwgt == NULL
  169 + )
  170 + )
  171 + {
  172 + printf("Implementation currently not available\n");
  173 + return HMPI_OK;
  174 + }
  175 +
  176 + /*
  177 + * Partition the graph such that the edgecut is minimal
  178 + * Edgecut is the total number of edges that straddle
  179 + * partitions.
  180 + * Ideally all the partitions should be equally weighted.
  181 + * There is a upper bound on the number of elements
  182 + * stored by each processor.
  183 + */
  184 + if ((speeds == NULL)
  185 + && (mlimits != NULL)
  186 + && (vwgt != NULL)
  187 + && (adjwgt == NULL
  188 + )
  189 + )
  190 + {
  191 + printf("Implementation currently not available\n");
  192 + return HMPI_OK;
  193 + }
  194 +
  195 + /*
  196 + * Partition the graph such that the edgecut is minimal
  197 + * Edgecut is the sum of the weight of the straddling edges.
  198 + * There is an upper bound on the number of elements
  199 + * stored by each processor.
  200 + */
  201 + if ((speeds == NULL)
  202 + && (mlimits == NULL)
  203 + && (vwgt != NULL)
  204 + && (adjwgt != NULL
  205 + )
  206 + )
  207 + {
  208 + printf("Implementation currently not available\n");
  209 + return HMPI_OK;
  210 + }
  211 +
  212 + /*
  213 + * Partition the graph such that the edgecut is minimal
  214 + * Edgecut is the sum of the weight of the straddling edges.
  215 + * Ideally all the partitions should be equally weighted.
  216 + * There is a upper bound on the number of elements
  217 + * stored by each processor.
  218 + */
  219 + if ((speeds == NULL)
  220 + && (mlimits != NULL)
  221 + && (vwgt != NULL)
  222 + && (adjwgt != NULL
  223 + )
  224 + )
  225 + {
  226 + printf("Implementation currently not available\n");
  227 + return HMPI_OK;
  228 + }
  229 +
  230 + /*
  231 + * Partition the graph such that
  232 + * (a) The number of vertices in each partition is proportional
  233 + * to the speed of the processor owning that partition.
  234 + * (b) The edgecut is minimal. Edgecut is the total number
  235 + * of edges that straddle partitions.
  236 + * There is no upper bound on the number of elements
  237 + * stored by each processor.
  238 + */
  239 + if ((speeds != NULL)
  240 + && (pn == 1)
  241 + && (mlimits == NULL)
  242 + && (vwgt == NULL)
  243 + && (adjwgt == NULL
  244 + )
  245 + )
  246 + {
  247 + printf("Implementation currently not available\n");
  248 + return HMPI_OK;
  249 + }
  250 +
  251 + /*
  252 + * Partition the graph such that
  253 + * (a) The number of vertices in each partition is proportional
  254 + * to the speed of the processor owning that partition.
  255 + * (b) The edgecut is minimal.
  256 + * There is an upper bound on the number of elements
  257 + * stored by each processor.
  258 + */
  259 + if ((speeds != NULL)
  260 + && (pn == 1)
  261 + && (mlimits != NULL)
  262 + && (vwgt == NULL)
  263 + && (adjwgt == NULL
  264 + )
  265 + )
  266 + {
  267 + printf("Implementation currently not available\n");
  268 + return HMPI_OK;
  269 + }
  270 +
  271 + /*
  272 + * Partition the graph such that
  273 + * (a) The number of vertices in each partition is proportional
  274 + * to the speed of the processor owning that partition.
  275 + * (b) The edgecut is minimal.
  276 + * There is no upper bound on the number of elements
  277 + * stored by each processor.
  278 + * Speeds of processors are functions of problem size.
  279 + */
  280 + if ((speeds != NULL)
  281 + && (pn > 1)
  282 + && (mlimits == NULL)
  283 + && (vwgt == NULL)
  284 + && (adjwgt == NULL
  285 + )
  286 + )
  287 + {
  288 + printf("Implementation currently not available\n");
  289 + return HMPI_OK;
  290 + }
  291 +
  292 + /*
  293 + * Partition the graph such that
  294 + * (a) The number of vertices in each partition is proportional
  295 + * to the speed of the processor owning that partition.
  296 + * (b) The edgecut is minimal.
  297 + * Speeds of processors are functions of problem size.
  298 + * There is an upper bound on the number of elements
  299 + * stored by each processor.
  300 + */
  301 + if ((speeds != NULL)
  302 + && (pn > 1)
  303 + && (mlimits != NULL)
  304 + && (vwgt == NULL)
  305 + && (adjwgt == NULL
  306 + )
  307 + )
  308 + {
  309 + printf("Implementation currently not available\n");
  310 + return HMPI_OK;
  311 + }
  312 +
  313 + /*
  314 + * Partition the graph such that
  315 + * (a) The number of vertices in each partition is
  316 + * proportional to the speed of the processor
  317 + * owning that partition.
  318 + * (b) The edgecut is minimal. Edgecut is the sum
  319 + * of the weights of the straddling edges.
  320 + * There is no upper bound on the number of elements
  321 + * stored by each processor.
  322 + */
  323 + if ((speeds != NULL)
  324 + && (pn == 1)
  325 + && (mlimits == NULL)
  326 + && (vwgt == NULL)
  327 + && (adjwgt != NULL
  328 + )
  329 + )
  330 + {
  331 + printf("Implementation currently not available\n");
  332 + return HMPI_OK;
  333 + }
  334 +
  335 + /*
  336 + * Partition the graph such that
  337 + * (a) The number of vertices in each partition is
  338 + * proportional to the speed of the processor
  339 + * owning that partition.
  340 + * (b) The edgecut is minimal. Edgecut is the sum
  341 + * of the weights of the straddling edges.
  342 + * There is an upper bound on the number of elements
  343 + * stored by each processor.
  344 + */
  345 + if ((speeds != NULL)
  346 + && (pn == 1)
  347 + && (mlimits != NULL)
  348 + && (vwgt == NULL)
  349 + && (adjwgt != NULL
  350 + )
  351 + )
  352 + {
  353 + printf("Implementation currently not available\n");
  354 + return HMPI_OK;
  355 + }
  356 +
  357 + /*
  358 + * Partition the graph such that
  359 + * (a) The number of vertices in each partition is
  360 + * proportional to the speed of the processor
  361 + * owning that partition.
  362 + * (b) The edgecut is minimal. Edgecut is the sum
  363 + * of the weights of the straddling edges.
  364 + * There is no upper bound on the number of elements
  365 + * stored by each processor.
  366 + * Speeds of processors are functions of problem size.
  367 + */
  368 + if ((speeds != NULL)
  369 + && (pn > 1)
  370 + && (mlimits == NULL)
  371 + && (vwgt == NULL)
  372 + && (adjwgt != NULL
  373 + )
  374 + )
  375 + {
  376 + printf("Implementation currently not available\n");
  377 + return HMPI_OK;
  378 + }
  379 +
  380 + /*
  381 + * Partition the graph such that
  382 + * (a) The number of vertices in each partition is
  383 + * proportional to the speed of the processor
  384 + * owning that partition.
  385 + * (b) The edgecut is minimal. Edgecut is the sum
  386 + * of the weights of the straddling edges.
  387 + * There is an upper bound on the number of elements
  388 + * stored by each processor.
  389 + * Speeds of processors are functions of problem size.
  390 + */
  391 + if ((speeds != NULL)
  392 + && (pn > 1)
  393 + && (mlimits != NULL)
  394 + && (vwgt == NULL)
  395 + && (adjwgt != NULL
  396 + )
  397 + )
  398 + {
  399 + printf("Implementation currently not available\n");
  400 + return HMPI_OK;
  401 + }
  402 +
  403 + /*
  404 + * Partition the graph such that
  405 + * (a) The sum of weights of vertices in each partition is
  406 + * proportional to the speed of the processor owning that partition.
  407 + * (b) The edgecut is minimal.
  408 + * There is no upper bound on the number of elements
  409 + * stored by each processor.
  410 + */
  411 + if ((speeds != NULL)
  412 + && (pn == 1)
  413 + && (mlimits == NULL)
  414 + && (vwgt != NULL)
  415 + && (adjwgt == NULL
  416 + )
  417 + )
  418 + {
  419 + printf("Implementation currently not available\n");
  420 + return HMPI_OK;
  421 + }
  422 +
  423 + /*
  424 + * Partition the graph such that
  425 + * (a) The sum of weights of vertices in each partition is
  426 + * proportional to the speed of the processor owning that partition.
  427 + * (b) The edgecut is minimal.
  428 + * There is an upper bound on the number of elements
  429 + * stored by each processor.
  430 + */
  431 + if ((speeds != NULL)
  432 + && (pn == 1)
  433 + && (mlimits != NULL)
  434 + && (vwgt != NULL)
  435 + && (adjwgt == NULL
  436 + )
  437 + )
  438 + {
  439 + printf("Implementation currently not available\n");
  440 + return HMPI_OK;
  441 + }
  442 +
  443 + /*
  444 + * Partition the graph such that
  445 + * (a) The sum of weights of vertices in each partition is
  446 + * proportional to the speed of the processor owning that partition.
  447 + * (b) The edgecut is minimal.
  448 + * There is no upper bound on the number of elements
  449 + * stored by each processor.
  450 + * Speeds of processors are functions of problem size.
  451 + */
  452 + if ((speeds != NULL)
  453 + && (pn > 1)
  454 + && (mlimits == NULL)
  455 + && (vwgt != NULL)
  456 + && (adjwgt == NULL
  457 + )
  458 + )
  459 + {
  460 + printf("Implementation currently not available\n");
  461 + return HMPI_OK;
  462 + }
  463 +
  464 + /*
  465 + * Partition the graph such that
  466 + * (a) The sum of weights of vertices in each partition is
  467 + * proportional to the speed of the processor owning that partition.
  468 + * (b) The edgecut is minimal.
  469 + * There is an upper bound on the number of elements
  470 + * stored by each processor.
  471 + * Speeds of processors are functions of problem size.
  472 + */
  473 + if ((speeds != NULL)
  474 + && (pn > 1)
  475 + && (mlimits != NULL)
  476 + && (vwgt != NULL)
  477 + && (adjwgt == NULL
  478 + )
  479 + )
  480 + {
  481 + printf("Implementation currently not available\n");
  482 + return HMPI_OK;
  483 + }
  484 +
  485 + /*
  486 + * Partition the graph such that
  487 + * (a) The sum of weights of vertices in each partition is
  488 + * proportional to the speed of the processor owning that partition.
  489 + * (b) The edgecut is minimal.
  490 + * There is no upper bound on the number of elements
  491 + * stored by each processor.
  492 + */
  493 + if ((speeds != NULL)
  494 + && (pn == 1)
  495 + && (mlimits == NULL)
  496 + && (vwgt != NULL)
  497 + && (adjwgt != NULL
  498 + )
  499 + )
  500 + {
  501 + printf("Implementation currently not available\n");
  502 + return HMPI_OK;
  503 + }
  504 +
  505 + /*
  506 + * Partition the graph such that
  507 + * (a) The sum of weights of vertices in each partition is
  508 + * proportional to the speed of the processor owning that partition.
  509 + * (b) The edgecut is minimal.
  510 + * There is an upper bound on the number of elements
  511 + * stored by each processor.
  512 + */
  513 + if ((speeds != NULL)
  514 + && (pn == 1)
  515 + && (mlimits != NULL)
  516 + && (vwgt != NULL)
  517 + && (adjwgt != NULL
  518 + )
  519 + )
  520 + {
  521 + printf("Implementation currently not available\n");
  522 + return HMPI_OK;
  523 + }
  524 +
  525 + /*
  526 + * Partition the graph such that
  527 + * (a) The sum of weights of vertices in each partition is
  528 + * proportional to the speed of the processor owning that partition.
  529 + * (b) The edgecut is minimal.
  530 + * speeds are functions of problem size.
  531 + * There is no upper bound on the number of elements
  532 + * stored by each processor.
  533 + */
  534 + if ((speeds != NULL)
  535 + && (pn > 1)
  536 + && (mlimits == NULL)
  537 + && (vwgt != NULL)
  538 + && (adjwgt != NULL
  539 + )
  540 + )
  541 + {
  542 + printf("Implementation currently not available\n");
  543 + return HMPI_OK;
  544 + }
  545 +
  546 + /*
  547 + * Partition the graph such that
  548 + * (a) The sum of weights of vertices in each partition is
  549 + * proportional to the speed of the processor owning that partition.
  550 + * (b) The edgecut is minimal.
  551 + * speeds are functions of problem size.
  552 + * There is an upper bound on the number of elements
  553 + * stored by each processor.
  554 + */
  555 + if ((speeds != NULL)
  556 + && (pn > 1)
  557 + && (mlimits != NULL)
  558 + && (vwgt != NULL)
  559 + && (adjwgt != NULL
  560 + )
  561 + )
  562 + {
  563 + printf("Implementation currently not available\n");
  564 + return HMPI_OK;
  565 + }
  566 +
  567 + printf("Parameters are erroneous\n");
  568 + return HMPI_ERR_PARTITION_GRAPH;
  569 + }
  570 +
  571 + /*-----------------------------------------------------*/
  572 +
  573 + int HMPI_Partition_bipartite_graph(
  574 + int p,
  575 + int pn,
  576 + const double *speeds,
  577 + const int *psizes,
  578 + const int *mlimits,
  579 + int n,
  580 + int m,
  581 + const int *vtype,
  582 + const int *vwgt,
  583 + const int *xadj,
  584 + const int *adjacency,
  585 + const int *adjwgt,
  586 + int type_of_partitioning,
  587 + int *vp,
  588 + int *edgecut
  589 + )
  590 + {
  591 + int i, j, rc;
  592 +
  593 + /*
  594 + * Use the partitioning interface for Set when
  595 + * edges have no weights. And there is no
  596 + * adjacency matrix. The two disjoint subsets are
  597 + * not considered separately.
  598 + */
  599 + if ((vwgt == NULL)
  600 + && (xadj == NULL)
  601 + && (adjacency == NULL)
  602 + && (adjwgt == NULL)
  603 + && (type_of_partitioning == PARTITION_OTHER
  604 + )
  605 + )
  606 + {
  607 + rc = HMPI_Partition_set(
  608 + p,
  609 + pn,
  610 + speeds,
  611 + psizes,
  612 + mlimits,
  613 + n,
  614 + NULL,
  615 + 0,
  616 + 1,
  617 + -1,
  618 + NULL,
  619 + NULL,
  620 + vp
  621 + );
  622 +
  623 + if (rc != HMPI_OK)
  624 + {
  625 + return rc;
  626 + }
  627 +
  628 + *edgecut = 0;
  629 +
  630 + for (i = 0; i < n; i++)
  631 + {
  632 + int owner_processor = vp[i];
  633 +
  634 + for (j = xadj[i]; j < xadj[i+1]; j++)
  635 + {
  636 + int neighbor = adjacency[j];
  637 +
  638 + if (vp[neighbor] != owner_processor)
  639 + {
  640 + (*edgecut)++;
  641 + }
  642 + }
  643 + }
  644 +
  645 + return HMPI_OK;
  646 + }
  647 +
  648 + /*
  649 + * Use the partitioning interface for Set when
  650 + * edges have no weights. And there is no
  651 + * adjacency matrix. The two disjoint subsets are
  652 + * considered separately.
  653 + */
  654 + if ((vwgt == NULL)
  655 + && (xadj == NULL)
  656 + && (adjacency == NULL)
  657 + && (adjwgt == NULL)
  658 + && (type_of_partitioning == PARTITION_SUBSET
  659 + )
  660 + )
  661 + {
  662 + printf("Implementation currently not available\n");
  663 + return HMPI_OK;
  664 + }
  665 +
  666 + if (type_of_partitioning == PARTITION_OTHER)
  667 + {
  668 + return HMPI_Partition_graph(
  669 + p,
  670 + pn,
  671 + speeds,
  672 + psizes,
  673 + mlimits,
  674 + n,
  675 + m,
  676 + vwgt,
  677 + xadj,
  678 + adjacency,
  679 + adjwgt,
  680 + vp,
  681 + edgecut
  682 + );
  683 + }
  684 +
  685 + /*
  686 + * Partition the graph such that the edgecut is minimal
  687 + * Edgecut is the total number of edges that straddle
  688 + * partitions.
  689 + * There is no upper bound on the number of elements
  690 + * stored by each processor.
  691 + * The number of vertices in each partition in each subset
  692 + * should be the same.
  693 + */
  694 + if ((speeds == NULL)
  695 + && (mlimits == NULL)
  696 + && (vwgt == NULL)
  697 + && (adjwgt == NULL
  698 + )
  699 + )
  700 + {
  701 + printf("Implementation currently not available\n");
  702 + return HMPI_OK;
  703 + }
  704 +
  705 + /*
  706 + * Partition the graph such that the edgecut is minimal
  707 + * Edgecut is the total number of edges that straddle
  708 + * partitions.
  709 + * There is an upper bound on the number of elements
  710 + * stored by each processor.
  711 + * The number of vertices in each partition in each subset
  712 + * should be the same.
  713 + */
  714 + if ((speeds == NULL)
  715 + && (mlimits != NULL)
  716 + && (vwgt == NULL)
  717 + && (adjwgt == NULL
  718 + )
  719 + )
  720 + {
  721 + printf("Implementation currently not available\n");
  722 + return HMPI_OK;
  723 + }
  724 +
  725 + /*
  726 + * Partition the graph such that the edgecut is minimal
  727 + * Edgecut is the sum of the weight of the straddling edges.
  728 + * There is no upper bound on the number of elements
  729 + * stored by each processor.
  730 + * The number of vertices in each partition in each subset
  731 + * should be the same.
  732 + */
  733 + if ((speeds == NULL)
  734 + && (mlimits == NULL)
  735 + && (vwgt == NULL)
  736 + && (adjwgt != NULL
  737 + )
  738 + )
  739 + {
  740 + printf("Implementation currently not available\n");
  741 + return HMPI_OK;
  742 + }
  743 +
  744 + /*
  745 + * Partition the graph such that the edgecut is minimal
  746 + * Edgecut is the sum of the weight of the straddling edges.
  747 + * There is an upper bound on the number of elements
  748 + * stored by each processor.
  749 + * The number of vertices in each partition in each subset
  750 + * should be the same.
  751 + */
  752 + if ((speeds == NULL)
  753 + && (mlimits != NULL)
  754 + && (vwgt == NULL)
  755 + && (adjwgt != NULL
  756 + )
  757 + )
  758 + {
  759 + printf("Implementation currently not available\n");
  760 + return HMPI_OK;
  761 + }
  762 +
  763 + /*
  764 + * Partition the graph such that the edgecut is minimal
  765 + * Edgecut is the total number of edges that straddle
  766 + * partitions.
  767 + * Ideally all the partitions in each subset should
  768 + * be equally weighted.
  769 + * There is no upper bound on the number of elements
  770 + * stored by each processor.
  771 + */
  772 + if ((speeds == NULL)
  773 + && (mlimits == NULL)
  774 + && (vwgt != NULL)
  775 + && (adjwgt == NULL
  776 + )
  777 + )
  778 + {
  779 + printf("Implementation currently not available\n");
  780 + return HMPI_OK;
  781 + }
  782 +
  783 + /*
  784 + * Partition the graph such that the edgecut is minimal
  785 + * Edgecut is the total number of edges that straddle
  786 + * partitions.
  787 + * There is a upper bound on the number of elements
  788 + * stored by each processor.
  789 + * Ideally all the partitions in each subset should
  790 + * be equally weighted.
  791 + */
  792 + if ((speeds == NULL)
  793 + && (mlimits != NULL)
  794 + && (vwgt != NULL)
  795 + && (adjwgt == NULL
  796 + )
  797 + )
  798 + {
  799 + printf("Implementation currently not available\n");
  800 + return HMPI_OK;
  801 + }
  802 +
  803 + /*
  804 + * Partition the graph such that the edgecut is minimal
  805 + * Edgecut is the sum of the weight of the straddling edges.
  806 + * There is an upper bound on the number of elements
  807 + * stored by each processor.
  808 + * Ideally all the partitions in each subset should
  809 + * be equally weighted.
  810 + */
  811 + if ((speeds == NULL)
  812 + && (mlimits == NULL)
  813 + && (vwgt != NULL)
  814 + && (adjwgt != NULL
  815 + )
  816 + )
  817 + {
  818 + printf("Implementation currently not available\n");
  819 + return HMPI_OK;
  820 + }
  821 +
  822 + /*
  823 + * Partition the graph such that the edgecut is minimal
  824 + * Edgecut is the sum of the weight of the straddling edges.
  825 + * There is a upper bound on the number of elements
  826 + * stored by each processor.
  827 + * Ideally all the partitions in each subset should
  828 + * be equally weighted.
  829 + */
  830 + if ((speeds == NULL)
  831 + && (mlimits != NULL)
  832 + && (vwgt != NULL)
  833 + && (adjwgt != NULL
  834 + )
  835 + )
  836 + {
  837 + printf("Implementation currently not available\n");
  838 + return HMPI_OK;
  839 + }
  840 +
  841 + /*
  842 + * Partition the graph such that
  843 + * (a) The number of vertices in each partition in each subset
  844 + * is proportional to the speed of the processor
  845 + * owning that partition.
  846 + * (b) The edgecut is minimal. Edgecut is the total number
  847 + * of edges that straddle partitions.
  848 + * There is no upper bound on the number of elements
  849 + * stored by each processor.
  850 + */
  851 + if ((speeds != NULL)
  852 + && (pn == 1)
  853 + && (mlimits == NULL)
  854 + && (vwgt == NULL)
  855 + && (adjwgt == NULL
  856 + )
  857 + )
  858 + {
  859 + printf("Implementation currently not available\n");
  860 + return HMPI_OK;
  861 + }
  862 +
  863 + /*
  864 + * Partition the graph such that
  865 + * (a) The number of vertices in each partition in each subset
  866 + * is proportional to the speed of the processor
  867 + * owning that partition.
  868 + * (b) The edgecut is minimal. Edgecut is the total number
  869 + * of edges that straddle partitions.
  870 + * There is an upper bound on the number of elements
  871 + * stored by each processor.
  872 + */
  873 + if ((speeds != NULL)
  874 + && (pn == 1)
  875 + && (mlimits != NULL)
  876 + && (vwgt == NULL)
  877 + && (adjwgt == NULL
  878 + )
  879 + )
  880 + {
  881 + printf("Implementation currently not available\n");
  882 + return HMPI_OK;
  883 + }
  884 +
  885 + /*
  886 + * Partition the graph such that
  887 + * (a) The number of vertices in each partition in each subset
  888 + * is proportional to the speed of the processor
  889 + * owning that partition.
  890 + * (b) The edgecut is minimal. Edgecut is the total number
  891 + * of edges that straddle partitions.
  892 + * There is no upper bound on the number of elements
  893 + * stored by each processor.
  894 + * Speeds of processors are functions of problem size.
  895 + */
  896 + if ((speeds != NULL)
  897 + && (pn > 1)
  898 + && (mlimits == NULL)
  899 + && (vwgt == NULL)
  900 + && (adjwgt == NULL
  901 + )
  902 + )
  903 + {
  904 + printf("Implementation currently not available\n");
  905 + return HMPI_OK;
  906 + }
  907 +
  908 + /*
  909 + * Partition the graph such that
  910 + * (a) The number of vertices in each partition in each subset
  911 + * is proportional to the speed of the processor
  912 + * owning that partition.
  913 + * (b) The edgecut is minimal. Edgecut is the total number
  914 + * of edges that straddle partitions.
  915 + * Speeds of processors are functions of problem size.
  916 + * There is an upper bound on the number of elements
  917 + * stored by each processor.
  918 + */
  919 + if ((speeds != NULL)
  920 + && (pn > 1)
  921 + && (mlimits != NULL)
  922 + && (vwgt == NULL)
  923 + && (adjwgt == NULL
  924 + )
  925 + )
  926 + {
  927 + printf("Implementation currently not available\n");
  928 + return HMPI_OK;
  929 + }
  930 +
  931 + /*
  932 + * Partition the graph such that
  933 + * (a) The number of vertices in each partition in each subset
  934 + * is proportional to the speed of the processor
  935 + * owning that partition.
  936 + * (b) The edgecut is minimal. Edgecut is the sum
  937 + * of the weights of the straddling edges.
  938 + * There is no upper bound on the number of elements
  939 + * stored by each processor.
  940 + */
  941 + if ((speeds != NULL)
  942 + && (pn == 1)
  943 + && (mlimits == NULL)
  944 + && (vwgt == NULL)
  945 + && (adjwgt != NULL
  946 + )
  947 + )
  948 + {
  949 + printf("Implementation currently not available\n");
  950 + return HMPI_OK;
  951 + }
  952 +
  953 + /*
  954 + * Partition the graph such that
  955 + * (a) The number of vertices in each partition is
  956 + * in each subset is proportional to the speed
  957 + * of the processor owning that partition.
  958 + * (b) The edgecut is minimal. Edgecut is the sum
  959 + * of the weights of the straddling edges.
  960 + * There is an upper bound on the number of elements
  961 + * stored by each processor.
  962 + */
  963 + if ((speeds != NULL)
  964 + && (pn == 1)
  965 + && (mlimits != NULL)
  966 + && (vwgt == NULL)
  967 + && (adjwgt != NULL
  968 + )
  969 + )
  970 + {
  971 + printf("Implementation currently not available\n");
  972 + return HMPI_OK;
  973 + }
  974 +
  975 + /*
  976 + * Partition the graph such that
  977 + * (a) The number of vertices in each partition in each
  978 + * subset is proportional to the speed of the processor
  979 + * owning that partition.
  980 + * (b) The edgecut is minimal. Edgecut is the sum
  981 + * of the weights of the straddling edges.
  982 + * There is no upper bound on the number of elements
  983 + * stored by each processor.
  984 + * Speeds of processors are functions of problem size.
  985 + */
  986 + if ((speeds != NULL)
  987 + && (pn > 1)
  988 + && (mlimits == NULL)
  989 + && (vwgt == NULL)
  990 + && (adjwgt != NULL
  991 + )
  992 + )
  993 + {
  994 + printf("Implementation currently not available\n");
  995 + return HMPI_OK;
  996 + }
  997 +
  998 + /*
  999 + * Partition the graph such that
  1000 + * (a) The number of vertices in each partition in each
  1001 + * subset is proportional to the speed of the processor
  1002 + * owning that partition.
  1003 + * (b) The edgecut is minimal. Edgecut is the sum
  1004 + * of the weights of the straddling edges.
  1005 + * There is an upper bound on the number of elements
  1006 + * stored by each processor.
  1007 + * Speeds of processors are functions of problem size.
  1008 + */
  1009 + if ((speeds != NULL)
  1010 + && (pn > 1)
  1011 + && (mlimits != NULL)
  1012 + && (vwgt == NULL)
  1013 + && (adjwgt != NULL
  1014 + )
  1015 + )
  1016 + {
  1017 + printf("Implementation currently not available\n");
  1018 + return HMPI_OK;
  1019 + }
  1020 +
  1021 + /*
  1022 + * Partition the graph such that
  1023 + * (a) The sum of weights of vertices in each partition in each
  1024 + * subset is proportional to the speed of the processor
  1025 + * owning that partition.
  1026 + * (b) The edgecut is minimal.
  1027 + * There is no upper bound on the number of elements
  1028 + * stored by each processor.
  1029 + */
  1030 + if ((speeds != NULL)
  1031 + && (pn == 1)
  1032 + && (mlimits == NULL)
  1033 + && (vwgt != NULL)
  1034 + && (adjwgt == NULL
  1035 + )
  1036 + )
  1037 + {
  1038 + printf("Implementation currently not available\n");
  1039 + return HMPI_OK;
  1040 + }
  1041 +
  1042 + /*
  1043 + * Partition the graph such that
  1044 + * (a) The sum of weights of vertices in each partition
  1045 + * in each subset is proportional to the speed of
  1046 + * the processor owning that partition.
  1047 + * (b) The edgecut is minimal.
  1048 + * There is an upper bound on the number of elements
  1049 + * stored by each processor.
  1050 + */
  1051 + if ((speeds != NULL)
  1052 + && (pn == 1)
  1053 + && (mlimits != NULL)
  1054 + && (vwgt != NULL)
  1055 + && (adjwgt == NULL
  1056 + )
  1057 + )
  1058 + {
  1059 + printf("Implementation currently not available\n");
  1060 + return HMPI_OK;
  1061 + }
  1062 +
  1063 + /*
  1064 + * Partition the graph such that
  1065 + * (a) The sum of weights of vertices in each partition in each
  1066 + * subset is proportional to the speed of the processor
  1067 + * owning that partition.
  1068 + * (b) The edgecut is minimal.
  1069 + * There is no upper bound on the number of elements
  1070 + * stored by each processor.
  1071 + * Speeds of processors are functions of problem size.
  1072 + */
  1073 + if ((speeds != NULL)
  1074 + && (pn > 1)
  1075 + && (mlimits == NULL)
  1076 + && (vwgt != NULL)
  1077 + && (adjwgt == NULL
  1078 + )
  1079 + )
  1080 + {
  1081 + printf("Implementation currently not available\n");
  1082 + return HMPI_OK;
  1083 + }
  1084 +
  1085 + /*
  1086 + * Partition the graph such that
  1087 + * (a) The sum of weights of vertices in each partition in each
  1088 + * subset is proportional to the speed of the processor
  1089 + * owning that partition.
  1090 + * (b) The edgecut is minimal.
  1091 + * There is an upper bound on the number of elements
  1092 + * stored by each processor.
  1093 + * Speeds of processors are functions of problem size.
  1094 + */
  1095 + if ((speeds != NULL)
  1096 + && (pn > 1)
  1097 + && (mlimits != NULL)
  1098 + && (vwgt != NULL)
  1099 + && (adjwgt == NULL
  1100 + )
  1101 + )
  1102 + {
  1103 + printf("Implementation currently not available\n");
  1104 + return HMPI_OK;
  1105 + }
  1106 +
  1107 + /*
  1108 + * Partition the graph such that
  1109 + * (a) The sum of weights of vertices in each partition is
  1110 + * proportional to the speed of the processor owning
  1111 + * that partition.
  1112 + * (b) The edgecut is minimal.
  1113 + * There is no upper bound on the number of elements
  1114 + * stored by each processor.
  1115 + */
  1116 + if ((speeds != NULL)
  1117 + && (pn == 1)
  1118 + && (mlimits == NULL)
  1119 + && (vwgt != NULL)
  1120 + && (adjwgt != NULL
  1121 + )
  1122 + )
  1123 + {
  1124 + printf("Implementation currently not available\n");
  1125 + return HMPI_OK;
  1126 + }
  1127 +
  1128 + /*
  1129 + * Partition the graph such that
  1130 + * (a) The sum of weights of vertices in each partition is
  1131 + * proportional to the speed of the processor
  1132 + * owning that partition.
  1133 + * (b) The edgecut is minimal.
  1134 + * There is an upper bound on the number of elements
  1135 + * stored by each processor.
  1136 + */
  1137 + if ((speeds != NULL)
  1138 + && (pn == 1)
  1139 + && (mlimits != NULL)
  1140 + && (vwgt != NULL)
  1141 + && (adjwgt != NULL
  1142 + )
  1143 + )
  1144 + {
  1145 + printf("Implementation currently not available\n");
  1146 + return HMPI_OK;
  1147 + }
  1148 +
  1149 + /*
  1150 + * Partition the graph such that
  1151 + * (a) The sum of weights of vertices in each partition is
  1152 + * proportional to the speed of the processor owning that partition.
  1153 + * (b) The edgecut is minimal.
  1154 + * speeds are functions of problem size.
  1155 + * There is no upper bound on the number of elements
  1156 + * stored by each processor.
  1157 + */
  1158 + if ((speeds != NULL)
  1159 + && (pn > 1)
  1160 + && (mlimits == NULL)
  1161 + && (vwgt != NULL)
  1162 + && (adjwgt != NULL
  1163 + )
  1164 + )
  1165 + {
  1166 + printf("Implementation currently not available\n");
  1167 + return HMPI_OK;
  1168 + }
  1169 +
  1170 + /*
  1171 + * Partition the graph such that
  1172 + * (a) The sum of weights of vertices in each partition is
  1173 + * proportional to the speed of the processor owning that partition.
  1174 + * (b) The edgecut is minimal.
  1175 + * speeds are functions of problem size.
  1176 + * There is an upper bound on the number of elements
  1177 + * stored by each processor.
  1178 + */
  1179 + if ((speeds != NULL)
  1180 + && (pn > 1)
  1181 + && (mlimits != NULL)
  1182 + && (vwgt != NULL)
  1183 + && (adjwgt != NULL
  1184 + )
  1185 + )
  1186 + {
  1187 + printf("Implementation currently not available\n");
  1188 + return HMPI_OK;
  1189 + }
  1190 +
  1191 + printf("Parameters are erroneous\n");
  1192 + return HMPI_ERR_PARTITION_GRAPH;
  1193 + }
  1194 +
  1195 + /*-----------------------------------------------------*/
  1196 +
  1197 + /*
  1198 + * There are two methods provided by hMETIS to partition
  1199 + * a hypergraph.
  1200 + * One using multilevel recursive bisection and the other
  1201 + * using multilevel k-way partitioning
  1202 + * hMETIS provides options to define the quality criteria
  1203 + * that can be used for partitioning.
  1204 + * We use default options for the present but however when
  1205 + * hMETIS is integrated with HMPI, application programmers
  1206 + * will be allowed to choose the options.
  1207 + *
  1208 + */
  1209 + int HMPI_Partition_hypergraph(
  1210 + int p,
  1211 + int pn,
  1212 + const double *speeds,
  1213 + const int *psizes,
  1214 + const int *mlimits,
  1215 + int nv,
  1216 + int nedges,
  1217 + const int *vwgt,
  1218 + const int *hptr,
  1219 + const int *hind,
  1220 + const int *hwgt,
  1221 + int *vp,
  1222 + int *edgecut
  1223 + )
  1224 + {
  1225 + int i, j, rc;
  1226 +
  1227 + /*
  1228 + * Use the partitioning interface for Set when
  1229 + * edges have no weights.
  1230 + */
  1231 + if ((vwgt == NULL)
  1232 + && (hptr == NULL)
  1233 + && (hind == NULL)
  1234 + && (hwgt == NULL
  1235 + )
  1236 + )
  1237 + {
  1238 + rc = HMPI_Partition_set(
  1239 + p,
  1240 + pn,
  1241 + speeds,
  1242 + psizes,
  1243 + mlimits,
  1244 + nv,
  1245 + NULL,
  1246 + 0,
  1247 + 1,
  1248 + -1,
  1249 + NULL,
  1250 + NULL,
  1251 + vp
  1252 + );
  1253 +
  1254 + if (rc != HMPI_OK)
  1255 + {
  1256 + return rc;
  1257 + }
  1258 +
  1259 + *edgecut = 0;
  1260 +
  1261 + for (i = 0; i < nedges; i++)
  1262 + {
  1263 + int owner_processor, ix = 0;
  1264 + int *vertex = (int*)malloc(
  1265 + sizeof(int)
  1266 + *
  1267 + (hptr[i+1] - hptr[i])
  1268 + );
  1269 +
  1270 + if (vertex == NULL)
  1271 + {
  1272 + return MPC_ERR_NOMEM;
  1273 + }
  1274 +
  1275 + for (j = hptr[i]; j < hptr[i+1]; j++)
  1276 + {
  1277 + vertex[ix++] = hind[j];
  1278 + }
  1279 +
  1280 + owner_processor = vp[vertex[0]];
  1281 +
  1282 + for (j = 1; j < (hptr[i+1] - hptr[i]); j++)
  1283 + {
  1284 + if (vp[vertex[j]] != owner_processor)
  1285 + {
  1286 + (*edgecut)++;
  1287 + }
  1288 + }
  1289 +
  1290 + free(vertex);
  1291 + }
  1292 +
  1293 + return HMPI_OK;
  1294 + }
  1295 +
  1296 + /*
  1297 + * Partition the graph such that the edgecut is minimal
  1298 + * Edgecut is the total number of edges that straddle
  1299 + * partitions.
  1300 + * There is no upper bound on the number of elements
  1301 + * stored by each processor.
  1302 + */
  1303 + if ((speeds == NULL)
  1304 + && (mlimits == NULL)
  1305 + && (vwgt == NULL)
  1306 + && (hwgt == NULL
  1307 + )
  1308 + )
  1309 + {
  1310 + printf("Implementation currently not available\n");
  1311 + return HMPI_OK;
  1312 + }
  1313 +
  1314 + /*
  1315 + * Partition the graph such that the edgecut is minimal
  1316 + * Edgecut is the total number of edges that straddle
  1317 + * partitions.
  1318 + * There is an upper bound on the number of elements
  1319 + * stored by each processor.
  1320 + */
  1321 + if ((speeds == NULL)
  1322 + && (mlimits != NULL)
  1323 + && (vwgt == NULL)
  1324 + && (hwgt == NULL
  1325 + )
  1326 + )
  1327 + {
  1328 + printf("Implementation currently not available\n");
  1329 + return HMPI_OK;
  1330 + }
  1331 +
  1332 + /*
  1333 + * Partition the graph such that the edgecut is minimal
  1334 + * Edgecut is the sum of the weight of the straddling edges.
  1335 + * There is no upper bound on the number of elements
  1336 + * stored by each processor.
  1337 + */
  1338 + if ((speeds == NULL)
  1339 + && (mlimits == NULL)
  1340 + && (vwgt == NULL)
  1341 + && (hwgt != NULL
  1342 + )
  1343 + )
  1344 + {
  1345 + printf("Implementation currently not available\n");
  1346 + return HMPI_OK;
  1347 + }
  1348 +
  1349 + /*
  1350 + * Partition the graph such that the edgecut is minimal
  1351 + * Edgecut is the sum of the weight of the straddling edges.
  1352 + * There is an upper bound on the number of elements
  1353 + * stored by each processor.
  1354 + */
  1355 + if ((speeds == NULL)
  1356 + && (mlimits != NULL)
  1357 + && (vwgt == NULL)
  1358 + && (hwgt != NULL
  1359 + )
  1360 + )
  1361 + {
  1362 + printf("Implementation currently not available\n");
  1363 + return HMPI_OK;
  1364 + }
  1365 +
  1366 + /*
  1367 + * Partition the graph such that the edgecut is minimal
  1368 + * Edgecut is the total number of edges that straddle
  1369 + * partitions.
  1370 + * Ideally all the partitions should be equally weighted.
  1371 + * There is no upper bound on the number of elements
  1372 + * stored by each processor.
  1373 + */
  1374 + if ((speeds == NULL)
  1375 + && (mlimits == NULL)
  1376 + && (vwgt != NULL)
  1377 + && (hwgt == NULL
  1378 + )
  1379 + )
  1380 + {
  1381 + printf("Implementation currently not available\n");
  1382 + return HMPI_OK;
  1383 + }
  1384 +
  1385 + /*
  1386 + * Partition the graph such that the edgecut is minimal
  1387 + * Edgecut is the total number of edges that straddle
  1388 + * partitions.
  1389 + * Ideally all the partitions should be equally weighted.
  1390 + * There is a upper bound on the number of elements
  1391 + * stored by each processor.
  1392 + */
  1393 + if ((speeds == NULL)
  1394 + && (mlimits != NULL)
  1395 + && (vwgt != NULL)
  1396 + && (hwgt == NULL
  1397 + )
  1398 + )
  1399 + {
  1400 + printf("Implementation currently not available\n");
  1401 + return HMPI_OK;
  1402 + }
  1403 +
  1404 + /*
  1405 + * Partition the graph such that the edgecut is minimal
  1406 + * Edgecut is the sum of the weight of the straddling edges.
  1407 + * There is an upper bound on the number of elements
  1408 + * stored by each processor.
  1409 + */
  1410 + if ((speeds == NULL)
  1411 + && (mlimits == NULL)
  1412 + && (vwgt != NULL)
  1413 + && (hwgt != NULL
  1414 + )
  1415 + )
  1416 + {
  1417 + printf("Implementation currently not available\n");
  1418 + return HMPI_OK;
  1419 + }
  1420 +
  1421 + /*
  1422 + * Partition the graph such that the edgecut is minimal
  1423 + * Edgecut is the sum of the weight of the straddling edges.
  1424 + * Ideally all the partitions should be equally weighted.
  1425 + * There is a upper bound on the number of elements
  1426 + * stored by each processor.
  1427 + */
  1428 + if ((speeds == NULL)
  1429 + && (mlimits != NULL)
  1430 + && (vwgt != NULL)
  1431 + && (hwgt != NULL
  1432 + )
  1433 + )
  1434 + {
  1435 + printf("Implementation currently not available\n");
  1436 + return HMPI_OK;
  1437 + }
  1438 +
  1439 + /*
  1440 + * Partition the graph such that
  1441 + * (a) The number of vertices in each partition is proportional
  1442 + * to the speed of the processor owning that partition.
  1443 + * (b) The edgecut is minimal. Edgecut is the total number
  1444 + * of edges that straddle partitions.
  1445 + * There is no upper bound on the number of elements
  1446 + * stored by each processor.
  1447 + */
  1448 + if ((speeds != NULL)
  1449 + && (pn == 1)
  1450 + && (mlimits == NULL)
  1451 + && (vwgt == NULL)
  1452 + && (hwgt == NULL
  1453 + )
  1454 + )
  1455 + {
  1456 + printf("Implementation currently not available\n");
  1457 + return HMPI_OK;
  1458 + }
  1459 +
  1460 + /*
  1461 + * Partition the graph such that
  1462 + * (a) The number of vertices in each partition is proportional
  1463 + * to the speed of the processor owning that partition.
  1464 + * (b) The edgecut is minimal.
  1465 + * There is an upper bound on the number of elements
  1466 + * stored by each processor.
  1467 + */
  1468 + if ((speeds != NULL)
  1469 + && (pn == 1)
  1470 + && (mlimits != NULL)
  1471 + && (vwgt == NULL)
  1472 + && (hwgt == NULL
  1473 + )
  1474 + )
  1475 + {
  1476 + printf("Implementation currently not available\n");
  1477 + return HMPI_OK;
  1478 + }
  1479 +
  1480 + /*
  1481 + * Partition the graph such that
  1482 + * (a) The number of vertices in each partition is proportional
  1483 + * to the speed of the processor owning that partition.
  1484 + * (b) The edgecut is minimal.
  1485 + * There is no upper bound on the number of elements
  1486 + * stored by each processor.
  1487 + * Speeds of processors are functions of problem size.
  1488 + */
  1489 + if ((speeds != NULL)
  1490 + && (pn > 1)
  1491 + && (mlimits == NULL)
  1492 + && (vwgt == NULL)
  1493 + && (hwgt == NULL
  1494 + )
  1495 + )
  1496 + {
  1497 + printf("Implementation currently not available\n");
  1498 + return HMPI_OK;
  1499 + }
  1500 +
  1501 + /*
  1502 + * Partition the graph such that
  1503 + * (a) The number of vertices in each partition is proportional
  1504 + * to the speed of the processor owning that partition.
  1505 + * (b) The edgecut is minimal.
  1506 + * Speeds of processors are functions of problem size.
  1507 + * There is an upper bound on the number of elements
  1508 + * stored by each processor.
  1509 + */
  1510 + if ((speeds != NULL)
  1511 + && (pn > 1)
  1512 + && (mlimits != NULL)
  1513 + && (vwgt == NULL)
  1514 + && (hwgt == NULL
  1515 + )
  1516 + )
  1517 + {
  1518 + printf("Implementation currently not available\n");
  1519 + return HMPI_OK;
  1520 + }
  1521 +
  1522 + /*
  1523 + * Partition the graph such that
  1524 + * (a) The number of vertices in each partition is
  1525 + * proportional to the speed of the processor
  1526 + * owning that partition.
  1527 + * (b) The edgecut is minimal. Edgecut is the sum
  1528 + * of the weights of the straddling edges.
  1529 + * There is no upper bound on the number of elements
  1530 + * stored by each processor.
  1531 + */
  1532 + if ((speeds != NULL)
  1533 + && (pn == 1)
  1534 + && (mlimits == NULL)
  1535 + && (vwgt == NULL)
  1536 + && (hwgt != NULL
  1537 + )
  1538 + )
  1539 + {
  1540 + printf("Implementation currently not available\n");
  1541 + return HMPI_OK;
  1542 + }
  1543 +
  1544 + /*
  1545 + * Partition the graph such that
  1546 + * (a) The number of vertices in each partition is
  1547 + * proportional to the speed of the processor
  1548 + * owning that partition.
  1549 + * (b) The edgecut is minimal. Edgecut is the sum
  1550 + * of the weights of the straddling edges.
  1551 + * There is an upper bound on the number of elements
  1552 + * stored by each processor.
  1553 + */
  1554 + if ((speeds != NULL)
  1555 + && (pn == 1)
  1556 + && (mlimits != NULL)
  1557 + && (vwgt == NULL)
  1558 + && (hwgt != NULL
  1559 + )
  1560 + )
  1561 + {
  1562 + printf("Implementation currently not available\n");
  1563 + return HMPI_OK;
  1564 + }
  1565 +
  1566 + /*
  1567 + * Partition the graph such that
  1568 + * (a) The number of vertices in each partition is
  1569 + * proportional to the speed of the processor
  1570 + * owning that partition.
  1571 + * (b) The edgecut is minimal. Edgecut is the sum
  1572 + * of the weights of the straddling edges.
  1573 + * There is no upper bound on the number of elements
  1574 + * stored by each processor.
  1575 + * Speeds of processors are functions of problem size.
  1576 + */
  1577 + if ((speeds != NULL)
  1578 + && (pn > 1)
  1579 + && (mlimits == NULL)
  1580 + && (vwgt == NULL)
  1581 + && (hwgt != NULL
  1582 + )
  1583 + )
  1584 + {
  1585 + printf("Implementation currently not available\n");
  1586 + return HMPI_OK;
  1587 + }
  1588 +
  1589 + /*
  1590 + * Partition the graph such that
  1591 + * (a) The number of vertices in each partition is
  1592 + * proportional to the speed of the processor
  1593 + * owning that partition.
  1594 + * (b) The edgecut is minimal. Edgecut is the sum
  1595 + * of the weights of the straddling edges.
  1596 + * There is an upper bound on the number of elements
  1597 + * stored by each processor.
  1598 + * Speeds of processors are functions of problem size.
  1599 + */
  1600 + if ((speeds != NULL)
  1601 + && (pn > 1)
  1602 + && (mlimits != NULL)
  1603 + && (vwgt == NULL)
  1604 + && (hwgt != NULL
  1605 + )
  1606 + )
  1607 + {
  1608 + printf("Implementation currently not available\n");
  1609 + return HMPI_OK;
  1610 + }
  1611 +
  1612 + /*
  1613 + * Partition the graph such that
  1614 + * (a) The sum of weights of vertices in each partition is
  1615 + * proportional to the speed of the processor owning that partition.
  1616 + * (b) The edgecut is minimal.
  1617 + * There is no upper bound on the number of elements
  1618 + * stored by each processor.
  1619 + */
  1620 + if ((speeds != NULL)
  1621 + && (pn == 1)
  1622 + && (mlimits == NULL)
  1623 + && (vwgt != NULL)
  1624 + && (hwgt == NULL
  1625 + )
  1626 + )
  1627 + {
  1628 + printf("Implementation currently not available\n");
  1629 + return HMPI_OK;
  1630 + }
  1631 +
  1632 + /*
  1633 + * Partition the graph such that
  1634 + * (a) The sum of weights of vertices in each partition is
  1635 + * proportional to the speed of the processor owning that partition.
  1636 + * (b) The edgecut is minimal.
  1637 + * There is an upper bound on the number of elements
  1638 + * stored by each processor.
  1639 + */
  1640 + if ((speeds != NULL)
  1641 + && (pn == 1)
  1642 + && (mlimits != NULL)
  1643 + && (vwgt != NULL)
  1644 + && (hwgt == NULL
  1645 + )
  1646 + )
  1647 + {
  1648 + printf("Implementation currently not available\n");
  1649 + return HMPI_OK;
  1650 + }
  1651 +
  1652 + /*
  1653 + * Partition the graph such that
  1654 + * (a) The sum of weights of vertices in each partition is
  1655 + * proportional to the speed of the processor owning that partition.
  1656 + * (b) The edgecut is minimal.
  1657 + * There is no upper bound on the number of elements
  1658 + * stored by each processor.
  1659 + * Speeds of processors are functions of problem size.
  1660 + */
  1661 + if ((speeds != NULL)
  1662 + && (pn > 1)
  1663 + && (mlimits == NULL)
  1664 + && (vwgt != NULL)
  1665 + && (hwgt == NULL
  1666 + )
  1667 + )
  1668 + {
  1669 + printf("Implementation currently not available\n");
  1670 + return HMPI_OK;
  1671 + }
  1672 +
  1673 + /*
  1674 + * Partition the graph such that
  1675 + * (a) The sum of weights of vertices in each partition is
  1676 + * proportional to the speed of the processor owning that partition.
  1677 + * (b) The edgecut is minimal.
  1678 + * There is an upper bound on the number of elements
  1679 + * stored by each processor.
  1680 + * Speeds of processors are functions of problem size.
  1681 + */
  1682 + if ((speeds != NULL)
  1683 + && (pn > 1)
  1684 + && (mlimits != NULL)
  1685 + && (vwgt != NULL)
  1686 + && (hwgt == NULL
  1687 + )
  1688 + )
  1689 + {
  1690 + printf("Implementation currently not available\n");
  1691 + return HMPI_OK;
  1692 + }
  1693 +
  1694 + /*
  1695 + * Partition the graph such that
  1696 + * (a) The sum of weights of vertices in each partition is
  1697 + * proportional to the speed of the processor owning that partition.
  1698 + * (b) The edgecut is minimal.
  1699 + * There is no upper bound on the number of elements
  1700 + * stored by each processor.
  1701 + */
  1702 + if ((speeds != NULL)
  1703 + && (pn == 1)
  1704 + && (mlimits == NULL)
  1705 + && (vwgt != NULL)
  1706 + && (hwgt != NULL
  1707 + )
  1708 + )
  1709 + {
  1710 + printf("Implementation currently not available\n");
  1711 + return HMPI_OK;
  1712 + }
  1713 +
  1714 + /*
  1715 + * Partition the graph such that
  1716 + * (a) The sum of weights of vertices in each partition is
  1717 + * proportional to the speed of the processor owning that partition.
  1718 + * (b) The edgecut is minimal.
  1719 + * There is an upper bound on the number of elements
  1720 + * stored by each processor.
  1721 + */
  1722 + if ((speeds != NULL)
  1723 + && (pn == 1)
  1724 + && (mlimits != NULL)
  1725 + && (vwgt != NULL)
  1726 + && (hwgt != NULL
  1727 + )
  1728 + )
  1729 + {
  1730 + printf("Implementation currently not available\n");
  1731 + return HMPI_OK;
  1732 + }
  1733 +
  1734 + /*
  1735 + * Partition the graph such that
  1736 + * (a) The sum of weights of vertices in each partition is
  1737 + * proportional to the speed of the processor owning that partition.
  1738 + * (b) The edgecut is minimal.
  1739 + * speeds are functions of problem size.
  1740 + * There is no upper bound on the number of elements
  1741 + * stored by each processor.
  1742 + */
  1743 + if ((speeds != NULL)
  1744 + && (pn > 1)
  1745 + && (mlimits == NULL)
  1746 + && (vwgt != NULL)
  1747 + && (hwgt != NULL
  1748 + )
  1749 + )
  1750 + {
  1751 + printf("Implementation currently not available\n");
  1752 + return HMPI_OK;
  1753 + }
  1754 +
  1755 + /*
  1756 + * Partition the graph such that
  1757 + * (a) The sum of weights of vertices in each partition is
  1758 + * proportional to the speed of the processor owning that partition.
  1759 + * (b) The edgecut is minimal.
  1760 + * speeds are functions of problem size.
  1761 + * There is an upper bound on the number of elements
  1762 + * stored by each processor.
  1763 + */
  1764 + if ((speeds != NULL)
  1765 + && (pn > 1)
  1766 + && (mlimits != NULL)
  1767 + && (vwgt != NULL)
  1768 + && (hwgt != NULL
  1769 + )
  1770 + )
  1771 + {
  1772 + printf("Implementation currently not available\n");
  1773 + return HMPI_OK;
  1774 + }
  1775 +
  1776 + printf("Parameters are erroneous\n");
  1777 + return HMPI_ERR_PARTITION_GRAPH;
  1778 + }
  1779 +
  1780 + /*-----------------------------------------------------*/
hdpi/hmpi_partitioning_graphs.h 0 → 100644
  1 +
  2 +/*************************************************************************
  3 +* *
  4 +* Heterogeneous Data Partitioning Interface *
  5 +* ========================================= *
  6 +* *
  7 +* Copyright (c) 2002 Department of Computer Science, *
  8 +* University College Dublin. *
  9 +* *
  10 +* All rights reserved. We assume no responsibility for the use *
  11 +* or reliability of our software. *
  12 +* *
  13 +*************************************************************************/
  14 +
  15 + /************************************************/
  16 + /* Partitioning interfaces for graphs */
  17 + /* */
  18 + /* Revision history */
  19 + /* 19-05-2003 -- Initial version */
  20 + /************************************************/
  21 +
  22 + #ifndef __HMPI_PARTITIONING_GRAPHS_HH
  23 + #define __HMPI_PARTITIONING_GRAPHS_HH
  24 +
  25 + #define PARTITION_SUBSET 1
  26 + #define PARTITION_OTHER 2
  27 +
  28 + int HMPI_Partition_graph(
  29 + int p,
  30 + int pn,
  31 + const double *speeds,
  32 + const int *psizes,
  33 + const int *mlimits,
  34 + int n,
  35 + int m,
  36 + const int *vwgt,
  37 + const int *xadj,
  38 + const int *adjacency,
  39 + const int *adjwgt,
  40 + int *vp,
  41 + int *edgecut
  42 + );
  43 +
  44 + int HMPI_Partition_bipartite_graph(
  45 + int p,
  46 + int pn,
  47 + const double *speeds,
  48 + const int *psizes,
  49 + const int *mlimits,
  50 + int n,
  51 + int m,
  52 + const int *vtype,
  53 + const int *vwgt,
  54 + const int *xadj,
  55 + const int *adjacency,
  56 + const int *adjwgt,
  57 + int type_of_partitioning,
  58 + int *vp,
  59 + int *edgecut
  60 + );
  61 +
  62 + int HMPI_Partition_hypergraph(
  63 + int p,
  64 + int pn,
  65 + const double *speeds,
  66 + const int *psizes,
  67 + const int *mlimits,
  68 + int nv,
  69 + int nedges,
  70 + const int *vwgt,
  71 + const int *hptr,
  72 + const int *hind,
  73 + const int *hwgt,
  74 + int *vp,
  75 + int *edgecut
  76 + );
  77 +
  78 + #endif /* __HMPI_PARTITIONING_GRAPHS_HH */
hdpi/hmpi_partitioning_internal.h 0 → 100644
  1 +/*************************************************************************
  2 +* *
  3 +* Heterogeneous Data Partitioning Interface *
  4 +* ========================================= *
  5 +* *
  6 +* Copyright (c) 2002 Department of Computer Science, *
  7 +* University College Dublin. *
  8 +* *
  9 +* All rights reserved. We assume no responsibility for the use *
  10 +* or reliability of our software. *
  11 +* *
  12 +*************************************************************************/
  13 +
  14 + /************************************************/
  15 + /* partitioning internal interfaces */
  16 + /* Revision history */
  17 + /* 19-05-2003 -- Initial version */
  18 + /************************************************/
  19 +
  20 + #ifndef __HMPI_PARTITIONING_INTERNAL_HH
  21 + #define __HMPI_PARTITIONING_INTERNAL_HH
  22 +
  23 + int __HMPI_Homogeneous_distribution_with_mlimits
  24 + (
  25 + int p,
  26 + int n,
  27 + const int* mlimits,
  28 + int *np
  29 + );
  30 +
  31 + int __HMPI_Homogeneous_distribution_with_mlimits_and_weights_ordered_sets
  32 + (
  33 + int p,
  34 + int n,
  35 + const int* mlimits,
  36 + const int* w,
  37 + int *np
  38 + );
  39 +
  40 + int __HMPI_Partition_set_homogeneous
  41 + (
  42 + int p,
  43 + const int *mlimits,
  44 + int n,
  45 + const int *w,
  46 + int ordering,
  47 + int processor_ordering,
  48 + int type_of_metric,
  49 + User_defined_metric umf,
  50 + double *metric,
  51 + int *np
  52 + );
  53 +
  54 + int __HMPI_Speeds_are_single_numbers_with_mlimits
  55 + (
  56 + int p,
  57 + const double *speeds,
  58 + const int *bounds,
  59 + int n,
  60 + int *np
  61 + );
  62 +
  63 + int __HMPI_Number_of_elements_proportional_to_speed
  64 + (
  65 + int p,
  66 + int n,
  67 + const double *speeds,
  68 + int *allocations
  69 + );
  70 +
  71 + double __HMPI_System_defined_metric
  72 + (
  73 + int p,
  74 + const double *speeds,
  75 + const int *actual,
  76 + const int *ideal
  77 + );
  78 +
  79 + int __HMPI_Size_of_bins
  80 + (
  81 + int p,
  82 + int n,
  83 + const double *speeds,
  84 + const int *w,
  85 + int *wallocations,
  86 + int *tsum
  87 + );
  88 +
  89 + int __HMPI_Sum_of_weights_for_ordered_set
  90 + (
  91 + int p,
  92 + int n,
  93 + const double *speeds,
  94 + const int *w,
  95 + int type_of_metric,
  96 + User_defined_metric umf,
  97 + double *metric,
  98 + int *np
  99 + );
  100 +
  101 + int __HMPI_Apply_mlimits_to_ordered_sum_of_weights
  102 + (
  103 + int p,
  104 + int n,
  105 + const double *speeds,
  106 + const int *mlimits,
  107 + const int *w,
  108 + int type_of_metric,
  109 + User_defined_metric umf,
  110 + double *metric,
  111 + int *np
  112 + );
  113 +
  114 + int __HMPI_Sum_of_weights_for_nonordered_set
  115 + (
  116 + int p,
  117 + int n,
  118 + const double *speeds,
  119 + const int *w,
  120 + int type_of_metric,
  121 + User_defined_metric umf,
  122 + double *metric,
  123 + int *np
  124 + );
  125 +
  126 + int __HMPI_Apply_mlimits_to_unordered_sum_of_weights_algo_2
  127 + (
  128 + int p,
  129 + int n,
  130 + const double *speeds,
  131 + const int *mlimits,
  132 + const int *w,
  133 + int type_of_metric,
  134 + User_defined_metric umf,
  135 + double *metric,
  136 + int *np
  137 + );
  138 +
  139 + int __HMPI_Apply_mlimits_to_unordered_sum_of_weights
  140 + (
  141 + int p,
  142 + int n,
  143 + const double *speeds,
  144 + const int *mlimits,
  145 + const int *w,
  146 + int type_of_metric,
  147 + User_defined_metric umf,
  148 + double *metric,
  149 + int *np
  150 + );
  151 +
  152 + int
  153 + __HMPI_Distribute_with_single_number_for_speed
  154 + (
  155 + int n,
  156 + int p,
  157 + const double *s,
  158 + double *npd
  159 + );
  160 +
  161 + int __HMPI_Recursive_bisection_middle_region
  162 + (
  163 + int p,
  164 + int pn,
  165 + const double *speeds,
  166 + const int *psizes,
  167 + int n,
  168 + double slopei,
  169 + double slopef,
  170 + double *speeds_opt,
  171 + double *npd
  172 + );
  173 +
  174 + int __HMPI_Speed_function_of_problem_size_with_mlimits
  175 + (
  176 + int p,
  177 + int pn,
  178 + const double *speeds,
  179 + const int *psizes,
  180 + const int *bounds,
  181 + int n,
  182 + double *speeds_opt,
  183 + int *np
  184 + );
  185 +
  186 + int __HMPI_Speed_function_of_problem_size
  187 + (
  188 + int p,
  189 + int pn,
  190 + const double *speeds,
  191 + const int *psizes,
  192 + int n,
  193 + double *speeds_opt,
  194 + int *np
  195 + );
  196 +
  197 + int __HMPI_Sum_of_weights_for_nonordered_set_speed_functions
  198 + (
  199 + int p,
  200 + int pn,
  201 + const double *speeds,
  202 + const int *psizes,
  203 + int n,
  204 + const int *w,
  205 + int type_of_metric,
  206 + User_defined_metric umf,
  207 + double *metric,
  208 + int *np
  209 + );
  210 +
  211 + int __HMPI_Sum_of_weights_for_nonordered_set_speed_functions_with_mlimits
  212 + (
  213 + int p,
  214 + int pn,
  215 + const double *speeds,
  216 + const int *psizes,
  217 + const int *mlimits,
  218 + int n,
  219 + const int *w,
  220 + int type_of_metric,
  221 + User_defined_metric umf,
  222 + double *metric,
  223 + int *np
  224 + );
  225 +
  226 + int __HMPI_Sum_of_weights_for_ordered_set_speed_functions
  227 + (
  228 + int p,
  229 + int pn,
  230 + const double *speeds,
  231 + const int *psizes,
  232 + int n,
  233 + const int *w,
  234 + int type_of_metric,
  235 + User_defined_metric umf,
  236 + double *metric,
  237 + int *np
  238 + );
  239 +
  240 + int __HMPI_Sum_of_weights_for_ordered_set_speed_functions_with_mlimits
  241 + (
  242 + int p,
  243 + int pn,
  244 + const double *speeds,
  245 + const int *psizes,
  246 + const int *mlimits,
  247 + int n,
  248 + const int *w,
  249 + int type_of_metric,
  250 + User_defined_metric umf,
  251 + double *metric,
  252 + int *np
  253 + );
  254 +
  255 + int __HMPI_Sum_of_weights_for_ordered_set_speed_functions_processor_reordering
  256 + (
  257 + int p,
  258 + int pn,
  259 + const double *speeds,
  260 + const int *psizes,
  261 + int n,
  262 + const int *w,
  263 + int type_of_metric,
  264 + User_defined_metric umf,
  265 + double *metric,
  266 + int *np
  267 + );
  268 +
  269 + int __HMPI_Sum_of_weights_for_ordered_set_speed_functions_processor_reordering_with_mlimits
  270 + (
  271 + int p,
  272 + int pn,
  273 + const double *speeds,
  274 + const int *psizes,
  275 + const int *mlimits,
  276 + int n,
  277 + const int *w,
  278 + int type_of_metric,
  279 + User_defined_metric umf,
  280 + double *metric,
  281 + int *np
  282 + );
  283 +
  284 + #endif