From 0787f03ef806831dde2e827bafe9fde7d9063724 Mon Sep 17 00:00:00 2001 From: Bastian Rieck Date: Tue, 2 Jul 2019 09:12:20 +0200 Subject: [PATCH 01/11] Added code for proper feature counting Currently, the main function contains a hard-coded list of data sets and their unique labels. It is easier to determine this automatically. Plus, the use of `PTC` in the code is misleading, because it comes in multiple versions. --- MLGkernel/MLGdataset.cpp | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/MLGkernel/MLGdataset.cpp b/MLGkernel/MLGdataset.cpp index 6ba63f8..f4e1136 100644 --- a/MLGkernel/MLGdataset.cpp +++ b/MLGkernel/MLGdataset.cpp @@ -29,7 +29,9 @@ #include "FLGkernel.hpp" #include "MatrixOF_ASCII.hpp" #include "params.hpp" + #include +#include void MLGdataset::condense(const int nlevels, const int leaf_radius){ assert(nlevels>0); @@ -125,6 +127,32 @@ void MLGdataset::loadDiscreteFeatures(std::string filename, int numFeatures){ int label; int numGraphs; ifs >> numGraphs; + + // Store the position of the stream so that we can roll it back later + // on *after* having counted the number of features. + auto position = ifs.tellg(); + + // Stores the labels that are encountered. Since this is a set, the + // label count is guaranteed to be unique. + set labels; + + while( ifs ) { + ifs >> numVertices; // ignore this for now as we do not need it for counting + + for( int i = 0; i < numVertices; i++ ) { + ifs >> label; + labels.insert( label ); + } + } + + numFeatures = static_cast( labels.size() ); + + // Reset the stream and convert the labels now into their graph + // representation. + + ifs.clear(); + ifs.seekg( position ); + while(ifs.good()){ ifs>>numVertices; if(!ifs.good()) break; From 649d7b9d75182c276038502c6e9180b311d2e5df Mon Sep 17 00:00:00 2001 From: Bastian Rieck Date: Tue, 2 Jul 2019 09:32:08 +0200 Subject: [PATCH 02/11] Fixed definitions for discrete feature loading --- MLGkernel/MLGdataset.cpp | 6 +++--- MLGkernel/MLGdataset.hpp | 2 +- MLGkernel/runMLG.cpp | 6 ++---- 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/MLGkernel/MLGdataset.cpp b/MLGkernel/MLGdataset.cpp index f4e1136..b7f478b 100644 --- a/MLGkernel/MLGdataset.cpp +++ b/MLGkernel/MLGdataset.cpp @@ -116,7 +116,7 @@ void MLGdataset::loadGraphs(std::string filename){ assert(numGraphs == graphs.size()); } -void MLGdataset::loadDiscreteFeatures(std::string filename, int numFeatures){ +void MLGdataset::loadDiscreteFeatures( std::string filename ) { ifstream ifs(filename); if(ifs.fail()){ cout << "Failed to open " << filename << "." << endl; @@ -145,11 +145,11 @@ void MLGdataset::loadDiscreteFeatures(std::string filename, int numFeatures){ } } - numFeatures = static_cast( labels.size() ); + int numFeatures = static_cast( labels.size() ); + cout << "Number of features: " << numFeatures << endl; // Reset the stream and convert the labels now into their graph // representation. - ifs.clear(); ifs.seekg( position ); diff --git a/MLGkernel/MLGdataset.hpp b/MLGkernel/MLGdataset.hpp index 224a642..59d3566 100644 --- a/MLGkernel/MLGdataset.hpp +++ b/MLGkernel/MLGdataset.hpp @@ -47,7 +47,7 @@ class MLGdataset{ public: void loadGraphs(std::string filename); - void loadDiscreteFeatures(std::string filename, int numFeatures); + void loadDiscreteFeatures(std::string filename); void loadFeatures(std::string filename); void saveGram(std::string filename); void fillGram(double *npmatrix, int rows, int cols); diff --git a/MLGkernel/runMLG.cpp b/MLGkernel/runMLG.cpp index da013e9..6d517a4 100644 --- a/MLGkernel/runMLG.cpp +++ b/MLGkernel/runMLG.cpp @@ -83,10 +83,8 @@ void runMLG(Params& p) { cout << "Computing degree features" << endl; for(auto g: dataset.graphs) g->computeDegreeFeatures(20); // all sample datasets have max degree < 20 } else { - cout << "Computing discrete features" << endl; - int num_features = get_num_features(p.features_path); - cout << "num features: " << num_features << endl; - dataset.loadDiscreteFeatures(p.features_path, num_features); + cout << "Loading discrete features" << endl; + dataset.loadDiscreteFeatures(p.features_path); } dataset.computeGram(p.levels, p.radius); From b0e71806d096b946bd6ba21f9f84fc9c38a19554 Mon Sep 17 00:00:00 2001 From: Bastian Rieck Date: Tue, 2 Jul 2019 09:49:30 +0200 Subject: [PATCH 03/11] Simplified file stream handling for feature loading --- MLGkernel/MLGdataset.cpp | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/MLGkernel/MLGdataset.cpp b/MLGkernel/MLGdataset.cpp index b7f478b..3e4c1a6 100644 --- a/MLGkernel/MLGdataset.cpp +++ b/MLGkernel/MLGdataset.cpp @@ -118,10 +118,12 @@ void MLGdataset::loadGraphs(std::string filename){ void MLGdataset::loadDiscreteFeatures( std::string filename ) { ifstream ifs(filename); - if(ifs.fail()){ + + if( !ifs ) { cout << "Failed to open " << filename << "." << endl; - exit(0); + exit( -1 ); } + int numVertices = 0; int graphIndex = 0; int label; @@ -153,16 +155,22 @@ void MLGdataset::loadDiscreteFeatures( std::string filename ) { ifs.clear(); ifs.seekg( position ); - while(ifs.good()){ - ifs>>numVertices; - if(!ifs.good()) break; - for(int i=0; i> numVertices; + + if( !ifs ) + break; + + for( int i = 0; i < numVertices; i++ ) { ifs >> label; + graphs[graphIndex]->labels[i] = Cvector::Zero(numFeatures+1); graphs[graphIndex]->labels[i](label) = 1; } + graphIndex++; } + assert(graphs.size() == numGraphs); assert(graphIndex == graphs.size()); } From 6de0336dd5dab618dd5edf86df6a9d31981b4cd3 Mon Sep 17 00:00:00 2001 From: Bastian Rieck Date: Tue, 2 Jul 2019 09:49:43 +0200 Subject: [PATCH 04/11] Removed obsolete function with hard-coded feature lengths --- MLGkernel/runMLG.cpp | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/MLGkernel/runMLG.cpp b/MLGkernel/runMLG.cpp index 6d517a4..e0acfef 100644 --- a/MLGkernel/runMLG.cpp +++ b/MLGkernel/runMLG.cpp @@ -61,19 +61,6 @@ string genSaveName(string data, double eta, double gamma, int radius, int levels return ss.str(); } -// These are the number of discrete node labels for each of the benchmark datasets. -int get_num_features(string features){ - if(features.find("MUTAG") != string::npos) return 7; - if(features.find("PTC") != string::npos) return 22; - if(features.find("PROTEINS") != string::npos) return 3; - if(features.find("NCI109") != string::npos) return 38; - if(features.find("NCI1") != string::npos) return 37; - - cout << "Supplied dataset is not one of the sample datasets! You can manually change this code to use the correct number of discrete features of your dataset." << endl; - exit(0); - return 0; -} - void runMLG(Params& p) { threadManager.maxthreads = p.num_threads; MLGdataset dataset(p.data_path, p.eta, p.gamma, p.grow_or_double); From 4475c2a1a05786b28c407af9f5db15bc0273d42a Mon Sep 17 00:00:00 2001 From: Bastian Rieck Date: Tue, 2 Jul 2019 09:51:05 +0200 Subject: [PATCH 05/11] Added simple `.gitignore` file --- .gitignore | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..3a774a3 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +*~ + +*.o +*.swp From 063c95ce0285fae3a2ca84fc0afe056ce2247c2f Mon Sep 17 00:00:00 2001 From: Bastian Rieck Date: Fri, 5 Jul 2019 13:36:08 +0200 Subject: [PATCH 06/11] Simplified graph reading function --- MLGkernel/MLGdataset.cpp | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/MLGkernel/MLGdataset.cpp b/MLGkernel/MLGdataset.cpp index 3e4c1a6..61a6b56 100644 --- a/MLGkernel/MLGdataset.cpp +++ b/MLGkernel/MLGdataset.cpp @@ -102,9 +102,16 @@ void MLGdataset::loadGraphs(std::string filename){ int i=0; int n; ifs >> numGraphs; - while(ifs.good()){ + + graphs.reserve( numGraphs ); + + while( ifs ){ ifs>>n; - if(!ifs.good()) break; + + // done with reading; skip! + if( !ifs ) + break; + //cout<<"Reading graph "<<++i<<" (n="< Date: Fri, 5 Jul 2019 15:10:50 +0200 Subject: [PATCH 07/11] Fixed discrete label loading for non-contiguous sequences --- MLGkernel/MLGdataset.cpp | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/MLGkernel/MLGdataset.cpp b/MLGkernel/MLGdataset.cpp index 61a6b56..cf8e34a 100644 --- a/MLGkernel/MLGdataset.cpp +++ b/MLGkernel/MLGdataset.cpp @@ -31,6 +31,7 @@ #include "params.hpp" #include +#include #include void MLGdataset::condense(const int nlevels, const int leaf_radius){ @@ -132,7 +133,6 @@ void MLGdataset::loadDiscreteFeatures( std::string filename ) { } int numVertices = 0; - int graphIndex = 0; int label; int numGraphs; ifs >> numGraphs; @@ -157,11 +157,24 @@ void MLGdataset::loadDiscreteFeatures( std::string filename ) { int numFeatures = static_cast( labels.size() ); cout << "Number of features: " << numFeatures << endl; + // Create a mapping of labels to indices. This makes it possible to + // load and handle graphs that have non-contiguous label sequences. + + unordered_map label_to_index; + + { + int index = 0; + for( auto&& label : labels ) + label_to_index[label] = index++; + } + // Reset the stream and convert the labels now into their graph // representation. ifs.clear(); ifs.seekg( position ); + int graphIndex = 0; + while( ifs ) { ifs >> numVertices; @@ -172,7 +185,7 @@ void MLGdataset::loadDiscreteFeatures( std::string filename ) { ifs >> label; graphs[graphIndex]->labels[i] = Cvector::Zero(numFeatures+1); - graphs[graphIndex]->labels[i](label) = 1; + graphs[graphIndex]->labels[i](label_to_index[label]) = 1; } graphIndex++; From d61c8161ef75e202633a1c727135ec383ea80189 Mon Sep 17 00:00:00 2001 From: Bastian Rieck Date: Fri, 5 Jul 2019 15:13:06 +0200 Subject: [PATCH 08/11] Added debug assertions for vector access --- matrices/Cvector.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/matrices/Cvector.hpp b/matrices/Cvector.hpp index 2ac07ac..6ce4bfd 100644 --- a/matrices/Cvector.hpp +++ b/matrices/Cvector.hpp @@ -116,8 +116,8 @@ class Cvector: public DenseVector, public Serializable{ public: // element access - FIELD& operator()(const int i){return array[i];} - FIELD operator()(const int i) const {return array[i];} + FIELD& operator()(const int i){ assert( i < n ); return array[i];} + FIELD operator()(const int i) const { assert( i < n ); return array[i];} void (foreach)(std::function lambda) {for(int i=0; i lambda) const {for(int i=0; i Date: Fri, 5 Jul 2019 17:11:07 +0200 Subject: [PATCH 09/11] Ensuring that kernel values are finite --- MLGkernel/FLGkernel.cpp | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/MLGkernel/FLGkernel.cpp b/MLGkernel/FLGkernel.cpp index 1f4ee2f..410b7be 100644 --- a/MLGkernel/FLGkernel.cpp +++ b/MLGkernel/FLGkernel.cpp @@ -35,10 +35,18 @@ double FLGkernel::operator()(const FLGinstance& x1, const FLGinstance& x2) const Cvector lambda=(x1.Sinv+x2.Sinv).eigenvalues(); //double detS=1; for(int i=0; i Date: Thu, 21 Nov 2019 16:34:38 +0100 Subject: [PATCH 10/11] Extended `README.md` in preparation for `CMake` integration --- README.md | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/README.md b/README.md index b96522e..226bc3f 100644 --- a/README.md +++ b/README.md @@ -9,6 +9,30 @@ R. Kondor, H. Pan, [The Multiscale Graph Laplacian](https://arxiv.org/abs/1603.0 * [Eigen](http://eigen.tuxfamily.org/index.php) ## Installation/Setup + +### Automated installation using `CMake` + +The project is configured to use `CMake` to provide a streamlined installation +experience. After installing `CMake` and `eigen3` using your favourite package +manager, the following commands are sufficient to compile the +executable: + +```bash +$ mkdir build +$ cd build +$ cmake ../ +$ make -j4 +``` + +When using Mac OS X, we recommend the [Homebrew](https://brew.sh) +package manager for installing the dependencies: + +```bash +$ brew install cmake eigen3 +``` + +### Manual installation using `make` + Change the EIGENDIR variable Makefile.options to the path to your installation of the Eigen library. Run the following command to create the runMLG executable in the MLGkernel directory. ```bash From d90127d48b3c288089391ce6311d0eabd3fd2bfe Mon Sep 17 00:00:00 2001 From: Bastian Rieck Date: Thu, 21 Nov 2019 16:36:36 +0100 Subject: [PATCH 11/11] Added `CMakeLists.txt` for proper integration The project can now be built as described in the manual. C++11 is automatically selected, but no optimisation flags are set yet. If this is required, the setup has to be adjusted. --- CMakeLists.txt | 10 ++++++++++ MLGkernel/CMakeLists.txt | 19 +++++++++++++++++++ 2 files changed, 29 insertions(+) create mode 100644 CMakeLists.txt create mode 100644 MLGkernel/CMakeLists.txt diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..ecdd4da --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,10 @@ +CMAKE_MINIMUM_REQUIRED( VERSION 3.3.1 ) +PROJECT( MLGkernel CXX ) + +SET( CMAKE_CXX_STANDARD 11 ) +SET( CMAKE_CXX_STANDARD_REQUIRED ON ) + +FIND_PACKAGE( Eigen3 REQUIRED ) +FIND_PACKAGE( Threads REQUIRED ) + +ADD_SUBDIRECTORY( MLGkernel ) diff --git a/MLGkernel/CMakeLists.txt b/MLGkernel/CMakeLists.txt new file mode 100644 index 0000000..36b9055 --- /dev/null +++ b/MLGkernel/CMakeLists.txt @@ -0,0 +1,19 @@ +FILE(GLOB SOURCES + *.cpp + ../utility/*.cpp + ../matrices/matrices.cpp +) + +INCLUDE_DIRECTORIES( + ${CMAKE_SOURCE_DIR}/include + ${CMAKE_SOURCE_DIR}/matrices + ${CMAKE_SOURCE_DIR}/utility + ${CMAKE_SOURCE_DIR}/utility/filetypes +) + +ADD_EXECUTABLE( runMLG + ${SOURCES} +) + +TARGET_INCLUDE_DIRECTORIES( runMLG SYSTEM PUBLIC ${EIGEN3_INCLUDE_DIR} ) +TARGET_LINK_LIBRARIES( runMLG Threads::Threads )