Skip to content

Commit b0038e6

Browse files
committed
merged metis format parser by clstaudt (GraphChi/graphchi-cpp#2)
2 parents 4fab847 + ba7273a commit b0038e6

File tree

3 files changed

+103
-3
lines changed

3 files changed

+103
-3
lines changed

src/preprocessing/conversions.hpp

Lines changed: 103 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,11 @@
3434
#include <dirent.h>
3535
#include <sys/stat.h>
3636

37+
#include <fstream>
38+
#include <iostream>
39+
40+
41+
3742
#include "graphchi_types.hpp"
3843
#include "logger/logger.hpp"
3944
#include "preprocessing/sharder.hpp"
@@ -340,6 +345,98 @@ namespace graphchi {
340345
free(s);
341346
fclose(inf);
342347
}
348+
349+
350+
/**
351+
* Extract a vector of node indices from a line in the file.
352+
*
353+
* @param[in] line line from input file containing node indices
354+
* @param[out] adjacencies node indices extracted from line
355+
*/
356+
static std::vector<vid_t> parseLine(std::string line) {
357+
358+
std::stringstream stream(line);
359+
std::string token;
360+
char delim = ' ';
361+
std::vector<vid_t> adjacencies;
362+
363+
// split string and push adjacent nodes
364+
while (std::getline(stream, token, delim)) {
365+
if (token.size() != 0) {
366+
vid_t v = atoi(token.c_str());
367+
adjacencies.push_back(v);
368+
}
369+
}
370+
371+
return adjacencies;
372+
}
373+
374+
/**
375+
* Converts a graph from the METIS adjacency format.
376+
* See http://people.sc.fsu.edu/~jburkardt/data/metis_graph/metis_graph.html for format documentation.
377+
* NOTE: contributed by clstaudt
378+
*/
379+
template <typename EdgeDataType>
380+
void convert_metis(std::string inputPath, sharder<EdgeDataType> &sharderobj) {
381+
382+
std::cout << "[INFO] reading METIS graph file" << std::endl;
383+
384+
std::ifstream graphFile(inputPath.c_str());
385+
386+
if (! graphFile.good()) {
387+
logstream(LOG_FATAL) << "Could not load :" << inputPath << " error: " << strerror(errno) << std::endl;
388+
}
389+
390+
std::string line; // current line
391+
392+
// handle header line
393+
int n; // number of nodes
394+
int m; // number of edges
395+
int weighted; // indicates weight scheme:
396+
397+
if (std::getline(graphFile, line)) {
398+
while (line[0] == '%') { // skip comments
399+
std::getline(graphFile, line);
400+
}
401+
402+
std::vector<uint> tokens = parseLine(line);
403+
n = tokens[0];
404+
m = tokens[1];
405+
if (tokens.size() == 2) {
406+
weighted = 0;
407+
} if (tokens.size() == 3) {
408+
weighted = tokens[2];
409+
if (weighted != 0) {
410+
logstream(LOG_FATAL) << "node and edge weights currently not supported by parser" << std::endl;
411+
}
412+
}
413+
} else {
414+
logstream(LOG_FATAL) << "getting METIS file header failed" << std::endl;
415+
}
416+
417+
logstream(LOG_INFO) << "reading graph with n=" << n << ", m=" << m << std::endl;
418+
419+
vid_t u = 0; // starting node index
420+
421+
// handle content lines
422+
while (graphFile.good()) {
423+
do {
424+
std::getline(graphFile, line);
425+
} while (line[0] == '%'); // skip comments
426+
427+
// parse adjacency line
428+
std::vector<vid_t> adjacencies = parseLine(line);
429+
for (std::vector<vid_t>::iterator it=adjacencies.begin(); it != adjacencies.end(); ++it) {
430+
vid_t v = *it;
431+
if (u <= v) { // add edge only once; self-loops are allowed
432+
sharderobj.preprocessing_add_edge(u, v, EdgeDataType());
433+
}
434+
}
435+
436+
u++;
437+
}
438+
439+
}
343440

344441
/**
345442
* Converts a graph from cassovary's (Twitter) format. Edge values are not supported,
@@ -505,6 +602,7 @@ namespace graphchi {
505602

506603

507604

605+
508606
/**
509607
* Converts a graph input to shards. Preprocessing has several steps,
510608
* see sharder.hpp for more information.
@@ -513,10 +611,10 @@ namespace graphchi {
513611
int convert(std::string basefilename, std::string nshards_string) {
514612
sharder<EdgeDataType, FinalEdgeDataType> sharderobj(basefilename);
515613

516-
std::string file_type_str = get_option_string_interactive("filetype", "edgelist, adjlist");
614+
std::string file_type_str = get_option_string_interactive("filetype", "edgelist, adjlist, binedgelist, metis");
517615
if (file_type_str != "adjlist" && file_type_str != "edgelist" && file_type_str != "binedgelist" &&
518-
file_type_str != "multivalueedgelist") {
519-
logstream(LOG_ERROR) << "You need to specify filetype: 'edgelist' or 'adjlist'." << std::endl;
616+
file_type_str != "multivalueedgelist" && file_type_str != "metis") {
617+
logstream(LOG_ERROR) << "You need to specify filetype: 'edgelist', 'adjlist', 'binedgelist', or 'metis'." << std::endl;
520618
assert(false);
521619
}
522620

@@ -533,6 +631,8 @@ namespace graphchi {
533631
#endif
534632
} else if (file_type_str == "binedgelist") {
535633
convert_binedgelistval<EdgeDataType>(basefilename, sharderobj);
634+
} else if (file_type_str == "metis") {
635+
convert_metis<EdgeDataType>(basefilename, sharderobj);
536636
} else {
537637
assert(false);
538638
}

unittest.sh

100644100755
File mode changed.

unittest2.sh

100644100755
File mode changed.

0 commit comments

Comments
 (0)