34
34
#include < dirent.h>
35
35
#include < sys/stat.h>
36
36
37
+ #include < fstream>
38
+ #include < iostream>
39
+
40
+
41
+
37
42
#include " graphchi_types.hpp"
38
43
#include " logger/logger.hpp"
39
44
#include " preprocessing/sharder.hpp"
@@ -340,6 +345,98 @@ namespace graphchi {
340
345
free (s);
341
346
fclose (inf);
342
347
}
348
+
349
+
350
+ /* *
351
+ * Extract a vector of node indices from a line in the file.
352
+ *
353
+ * @param[in] line line from input file containing node indices
354
+ * @param[out] adjacencies node indices extracted from line
355
+ */
356
+ static std::vector<vid_t > parseLine (std::string line) {
357
+
358
+ std::stringstream stream (line);
359
+ std::string token;
360
+ char delim = ' ' ;
361
+ std::vector<vid_t > adjacencies;
362
+
363
+ // split string and push adjacent nodes
364
+ while (std::getline (stream, token, delim)) {
365
+ if (token.size () != 0 ) {
366
+ vid_t v = atoi (token.c_str ());
367
+ adjacencies.push_back (v);
368
+ }
369
+ }
370
+
371
+ return adjacencies;
372
+ }
373
+
374
+ /* *
375
+ * Converts a graph from the METIS adjacency format.
376
+ * See http://people.sc.fsu.edu/~jburkardt/data/metis_graph/metis_graph.html for format documentation.
377
+ * NOTE: contributed by clstaudt
378
+ */
379
+ template <typename EdgeDataType>
380
+ void convert_metis (std::string inputPath, sharder<EdgeDataType> &sharderobj) {
381
+
382
+ std::cout << " [INFO] reading METIS graph file" << std::endl;
383
+
384
+ std::ifstream graphFile (inputPath.c_str ());
385
+
386
+ if (! graphFile.good ()) {
387
+ logstream (LOG_FATAL) << " Could not load :" << inputPath << " error: " << strerror (errno) << std::endl;
388
+ }
389
+
390
+ std::string line; // current line
391
+
392
+ // handle header line
393
+ int n; // number of nodes
394
+ int m; // number of edges
395
+ int weighted; // indicates weight scheme:
396
+
397
+ if (std::getline (graphFile, line)) {
398
+ while (line[0 ] == ' %' ) { // skip comments
399
+ std::getline (graphFile, line);
400
+ }
401
+
402
+ std::vector<uint > tokens = parseLine (line);
403
+ n = tokens[0 ];
404
+ m = tokens[1 ];
405
+ if (tokens.size () == 2 ) {
406
+ weighted = 0 ;
407
+ } if (tokens.size () == 3 ) {
408
+ weighted = tokens[2 ];
409
+ if (weighted != 0 ) {
410
+ logstream (LOG_FATAL) << " node and edge weights currently not supported by parser" << std::endl;
411
+ }
412
+ }
413
+ } else {
414
+ logstream (LOG_FATAL) << " getting METIS file header failed" << std::endl;
415
+ }
416
+
417
+ logstream (LOG_INFO) << " reading graph with n=" << n << " , m=" << m << std::endl;
418
+
419
+ vid_t u = 0 ; // starting node index
420
+
421
+ // handle content lines
422
+ while (graphFile.good ()) {
423
+ do {
424
+ std::getline (graphFile, line);
425
+ } while (line[0 ] == ' %' ); // skip comments
426
+
427
+ // parse adjacency line
428
+ std::vector<vid_t > adjacencies = parseLine (line);
429
+ for (std::vector<vid_t >::iterator it=adjacencies.begin (); it != adjacencies.end (); ++it) {
430
+ vid_t v = *it;
431
+ if (u <= v) { // add edge only once; self-loops are allowed
432
+ sharderobj.preprocessing_add_edge (u, v, EdgeDataType ());
433
+ }
434
+ }
435
+
436
+ u++;
437
+ }
438
+
439
+ }
343
440
344
441
/* *
345
442
* Converts a graph from cassovary's (Twitter) format. Edge values are not supported,
@@ -505,6 +602,7 @@ namespace graphchi {
505
602
506
603
507
604
605
+
508
606
/* *
509
607
* Converts a graph input to shards. Preprocessing has several steps,
510
608
* see sharder.hpp for more information.
@@ -513,10 +611,10 @@ namespace graphchi {
513
611
int convert (std::string basefilename, std::string nshards_string) {
514
612
sharder<EdgeDataType, FinalEdgeDataType> sharderobj (basefilename);
515
613
516
- std::string file_type_str = get_option_string_interactive (" filetype" , " edgelist, adjlist" );
614
+ std::string file_type_str = get_option_string_interactive (" filetype" , " edgelist, adjlist, binedgelist, metis " );
517
615
if (file_type_str != " adjlist" && file_type_str != " edgelist" && file_type_str != " binedgelist" &&
518
- file_type_str != " multivalueedgelist" ) {
519
- logstream (LOG_ERROR) << " You need to specify filetype: 'edgelist' or 'adjlist'." << std::endl;
616
+ file_type_str != " multivalueedgelist" && file_type_str != " metis " ) {
617
+ logstream (LOG_ERROR) << " You need to specify filetype: 'edgelist', 'adjlist', 'binedgelist', or 'metis '." << std::endl;
520
618
assert (false );
521
619
}
522
620
@@ -533,6 +631,8 @@ namespace graphchi {
533
631
#endif
534
632
} else if (file_type_str == " binedgelist" ) {
535
633
convert_binedgelistval<EdgeDataType>(basefilename, sharderobj);
634
+ } else if (file_type_str == " metis" ) {
635
+ convert_metis<EdgeDataType>(basefilename, sharderobj);
536
636
} else {
537
637
assert (false );
538
638
}
0 commit comments