diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..3a774a3 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +*~ + +*.o +*.swp diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..ecdd4da --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,10 @@ +CMAKE_MINIMUM_REQUIRED( VERSION 3.3.1 ) +PROJECT( MLGkernel CXX ) + +SET( CMAKE_CXX_STANDARD 11 ) +SET( CMAKE_CXX_STANDARD_REQUIRED ON ) + +FIND_PACKAGE( Eigen3 REQUIRED ) +FIND_PACKAGE( Threads REQUIRED ) + +ADD_SUBDIRECTORY( MLGkernel ) diff --git a/MLGkernel/CMakeLists.txt b/MLGkernel/CMakeLists.txt new file mode 100644 index 0000000..36b9055 --- /dev/null +++ b/MLGkernel/CMakeLists.txt @@ -0,0 +1,19 @@ +FILE(GLOB SOURCES + *.cpp + ../utility/*.cpp + ../matrices/matrices.cpp +) + +INCLUDE_DIRECTORIES( + ${CMAKE_SOURCE_DIR}/include + ${CMAKE_SOURCE_DIR}/matrices + ${CMAKE_SOURCE_DIR}/utility + ${CMAKE_SOURCE_DIR}/utility/filetypes +) + +ADD_EXECUTABLE( runMLG + ${SOURCES} +) + +TARGET_INCLUDE_DIRECTORIES( runMLG SYSTEM PUBLIC ${EIGEN3_INCLUDE_DIR} ) +TARGET_LINK_LIBRARIES( runMLG Threads::Threads ) diff --git a/MLGkernel/FLGkernel.cpp b/MLGkernel/FLGkernel.cpp index 1f4ee2f..410b7be 100644 --- a/MLGkernel/FLGkernel.cpp +++ b/MLGkernel/FLGkernel.cpp @@ -35,10 +35,18 @@ double FLGkernel::operator()(const FLGinstance& x1, const FLGinstance& x2) const Cvector lambda=(x1.Sinv+x2.Sinv).eigenvalues(); //double detS=1; for(int i=0; i +#include +#include void MLGdataset::condense(const int nlevels, const int leaf_radius){ assert(nlevels>0); @@ -100,9 +103,16 @@ void MLGdataset::loadGraphs(std::string filename){ int i=0; int n; ifs >> numGraphs; - while(ifs.good()){ + + graphs.reserve( numGraphs ); + + while( ifs ){ ifs>>n; - if(!ifs.good()) break; + + // done with reading; skip! + if( !ifs ) + break; + //cout<<"Reading graph "<<++i<<" (n="<> numGraphs; - while(ifs.good()){ - ifs>>numVertices; - if(!ifs.good()) break; - for(int i=0; i labels; + + while( ifs ) { + ifs >> numVertices; // ignore this for now as we do not need it for counting + + for( int i = 0; i < numVertices; i++ ) { + ifs >> label; + labels.insert( label ); + } + } + + int numFeatures = static_cast( labels.size() ); + cout << "Number of features: " << numFeatures << endl; + + // Create a mapping of labels to indices. This makes it possible to + // load and handle graphs that have non-contiguous label sequences. + + unordered_map label_to_index; + + { + int index = 0; + for( auto&& label : labels ) + label_to_index[label] = index++; + } + + // Reset the stream and convert the labels now into their graph + // representation. + ifs.clear(); + ifs.seekg( position ); + + int graphIndex = 0; + + while( ifs ) { + ifs >> numVertices; + + if( !ifs ) + break; + + for( int i = 0; i < numVertices; i++ ) { ifs >> label; + graphs[graphIndex]->labels[i] = Cvector::Zero(numFeatures+1); - graphs[graphIndex]->labels[i](label) = 1; + graphs[graphIndex]->labels[i](label_to_index[label]) = 1; } + graphIndex++; } + assert(graphs.size() == numGraphs); assert(graphIndex == graphs.size()); } diff --git a/MLGkernel/MLGdataset.hpp b/MLGkernel/MLGdataset.hpp index 224a642..59d3566 100644 --- a/MLGkernel/MLGdataset.hpp +++ b/MLGkernel/MLGdataset.hpp @@ -47,7 +47,7 @@ class MLGdataset{ public: void loadGraphs(std::string filename); - void loadDiscreteFeatures(std::string filename, int numFeatures); + void loadDiscreteFeatures(std::string filename); void loadFeatures(std::string filename); void saveGram(std::string filename); void fillGram(double *npmatrix, int rows, int cols); diff --git a/MLGkernel/runMLG.cpp b/MLGkernel/runMLG.cpp index da013e9..e0acfef 100644 --- a/MLGkernel/runMLG.cpp +++ b/MLGkernel/runMLG.cpp @@ -61,19 +61,6 @@ string genSaveName(string data, double eta, double gamma, int radius, int levels return ss.str(); } -// These are the number of discrete node labels for each of the benchmark datasets. -int get_num_features(string features){ - if(features.find("MUTAG") != string::npos) return 7; - if(features.find("PTC") != string::npos) return 22; - if(features.find("PROTEINS") != string::npos) return 3; - if(features.find("NCI109") != string::npos) return 38; - if(features.find("NCI1") != string::npos) return 37; - - cout << "Supplied dataset is not one of the sample datasets! You can manually change this code to use the correct number of discrete features of your dataset." << endl; - exit(0); - return 0; -} - void runMLG(Params& p) { threadManager.maxthreads = p.num_threads; MLGdataset dataset(p.data_path, p.eta, p.gamma, p.grow_or_double); @@ -83,10 +70,8 @@ void runMLG(Params& p) { cout << "Computing degree features" << endl; for(auto g: dataset.graphs) g->computeDegreeFeatures(20); // all sample datasets have max degree < 20 } else { - cout << "Computing discrete features" << endl; - int num_features = get_num_features(p.features_path); - cout << "num features: " << num_features << endl; - dataset.loadDiscreteFeatures(p.features_path, num_features); + cout << "Loading discrete features" << endl; + dataset.loadDiscreteFeatures(p.features_path); } dataset.computeGram(p.levels, p.radius); diff --git a/README.md b/README.md index b96522e..226bc3f 100644 --- a/README.md +++ b/README.md @@ -9,6 +9,30 @@ R. Kondor, H. Pan, [The Multiscale Graph Laplacian](https://arxiv.org/abs/1603.0 * [Eigen](http://eigen.tuxfamily.org/index.php) ## Installation/Setup + +### Automated installation using `CMake` + +The project is configured to use `CMake` to provide a streamlined installation +experience. After installing `CMake` and `eigen3` using your favourite package +manager, the following commands are sufficient to compile the +executable: + +```bash +$ mkdir build +$ cd build +$ cmake ../ +$ make -j4 +``` + +When using Mac OS X, we recommend the [Homebrew](https://brew.sh) +package manager for installing the dependencies: + +```bash +$ brew install cmake eigen3 +``` + +### Manual installation using `make` + Change the EIGENDIR variable Makefile.options to the path to your installation of the Eigen library. Run the following command to create the runMLG executable in the MLGkernel directory. ```bash diff --git a/matrices/Cvector.hpp b/matrices/Cvector.hpp index 2ac07ac..6ce4bfd 100644 --- a/matrices/Cvector.hpp +++ b/matrices/Cvector.hpp @@ -116,8 +116,8 @@ class Cvector: public DenseVector, public Serializable{ public: // element access - FIELD& operator()(const int i){return array[i];} - FIELD operator()(const int i) const {return array[i];} + FIELD& operator()(const int i){ assert( i < n ); return array[i];} + FIELD operator()(const int i) const { assert( i < n ); return array[i];} void (foreach)(std::function lambda) {for(int i=0; i lambda) const {for(int i=0; i