diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644
index 0000000..2c318f4
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,37 @@
+CMAKE_MINIMUM_REQUIRED(VERSION 2.8)
+PROJECT(rcm)
+INCLUDE(cmake/SBELUtils.cmake)
+
+SET(EXECUTABLE_OUTPUT_PATH ${PROJECT_BINARY_DIR}/bin)
+
+enable_cuda_support()
+MESSAGE(STATUS "Cuda version: ${CUDA_VERSION}")
+
+SET(RCM_HEADERS
+	rcm/common.h
+	rcm/rcm_um.h
+	rcm/exception.h
+	rcm/timer.h
+  )
+
+SET(RCM_CUHEADERS
+  rcm/device/kernels.cuh
+  )
+
+SET(MMIO_FILES
+  mm_io/mm_io.h
+  mm_io/mm_io.c
+  )
+
+SOURCE_GROUP("Headers" FILES ${MC64_HEADERS})
+SOURCE_GROUP("CUDA Headers" FILES ${MC64_CUHEADERS})
+SOURCE_GROUP("MM_IO" FILES ${MMIO_FILES})
+
+INCLUDE_DIRECTORIES(
+    ${CMAKE_SOURCE_DIR}
+    )
+
+IF(NOT (${CUDA_VERSION} VERSION_LESS "6.0"))
+  cuda_add_executable(driver_um driver_um.cu ${MC64_HEADERS} ${MC64_CUHEADERS} ${MMIO_FILES})
+  cuda_add_executable(testing testing.cu ${MC64_HEADERS} ${MC64_CUHEADERS} ${MMIO_FILES})
+ENDIF()
diff --git a/SBELUtils.cmake b/SBELUtils.cmake
new file mode 100644
index 0000000..c2e36c5
--- /dev/null
+++ b/SBELUtils.cmake
@@ -0,0 +1,166 @@
+####################################################
+##   Only modify if you know what you're doing.   ##
+####################################################
+
+
+# Helps Eclipse/CDT find our include directories
+set(CMAKE_VERBOSE_MAKEFILE on)
+
+# Detect the bitness of our machine (eg 32- or 64-bit)
+# C-equiv: sizeof(void*)
+# Alt: 8*sizeof(void*)
+math(EXPR CMAKE_ARCH_BITNESS 8*${CMAKE_SIZEOF_VOID_P})
+
+# For non-multi-configuration generators (eg, make, Eclipse)
+# The Visual Studio generator creates a single project with all these
+set(CMAKE_BUILD_TYPE "Release" CACHE STRING "For single-configuration generators (e.g. make) set the type of build: Release, Debug, RelWithDebugInfo, MinSizeRel")
+SET_PROPERTY(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Release" "Debug" "RelWithDebugInfo" "MinSizeRel")
+
+
+####################################################
+## ---------------------------------------------- ##
+## -                                            - ##
+## -            Enable MPI Support              - ##
+## -                                            - ##
+## ---------------------------------------------- ##
+####################################################
+
+# Begin configuring MPI options
+macro(enable_mpi_support)
+
+		find_package("MPI" REQUIRED)
+
+		# Add the MPI-specific compiler and linker flags
+		# Also, search for #includes in MPI's paths
+
+		list(APPEND CMAKE_C_COMPILE_FLAGS ${MPI_C_COMPILE_FLAGS})
+		list(APPEND CMAKE_C_LINK_FLAGS ${MPI_C_LINK_FLAGS})
+		include_directories(${MPI_C_INCLUDE_PATH})
+
+		list(APPEND CMAKE_CXX_COMPILE_FLAGS ${MPI_CXX_COMPILE_FLAGS})
+		list(APPEND CMAKE_CXX_LINK_FLAGS ${MPI_CXX_LINK_FLAGS})
+		include_directories(${MPI_CXX_INCLUDE_PATH})
+
+endmacro(enable_mpi_support)
+# Done configuring MPI Options
+
+
+####################################################
+## ---------------------------------------------- ##
+## -                                            - ##
+## -            Enable OpenMP Support           - ##
+## -                                            - ##
+## ---------------------------------------------- ##
+####################################################
+
+# Begin configuring OpenMP options
+macro(enable_openmp_support)
+
+		find_package("OpenMP" REQUIRED)
+
+		# Add the OpenMP-specific compiler and linker flags
+		list(APPEND CMAKE_CXX_FLAGS ${OpenMP_CXX_FLAGS})
+		list(APPEND CMAKE_C_FLAGS ${OpenMP_C_FLAGS})
+
+endmacro(enable_openmp_support)
+# Done configuring OpenMP Options
+
+
+####################################################
+## ---------------------------------------------- ##
+## -                                            - ##
+## -            Enable CUDA Support             - ##
+## -                                            - ##
+## ---------------------------------------------- ##
+####################################################
+
+# Begin configuring CUDA options
+# This is ugly...
+macro(enable_cuda_support)
+
+		# Hide a number of options from the default CMake screen
+		mark_as_advanced(CLEAR CUDA_BUILD_CUBIN)
+		mark_as_advanced(CLEAR CUDA_SDK_ROOT_DIR)
+		mark_as_advanced(CLEAR CUDA_TOOLKIT_ROOT_DIR)
+		mark_as_advanced(CLEAR CUDA_VERBOSE_BUILD)
+		mark_as_advanced(CLEAR CUDA_FAST_MATH)
+		mark_as_advanced(CLEAR CUDA_USE_CUSTOM_COMPILER)
+		mark_as_advanced(CLEAR CUDA_VERBOSE_PTX)
+		mark_as_advanced(CLEAR CUDA_DEVICE_VERSION)
+
+		# select Compute Capability
+		# This needs to be manually updated when devices with new CCs come out
+		set(CUDA_DEVICE_VERSION "20" CACHE STRING "CUDA Device Version")
+		set_property(CACHE CUDA_DEVICE_VERSION PROPERTY STRINGS "10" "11" "12" "13"	"20" "21" "30" "35")
+
+		# Enable fast-math for CUDA (_not_ GCC)
+		set(CUDA_FAST_MATH TRUE CACHE BOOL "Use Fast Math Operations")
+
+		# Tell nvcc to use a separate compiler for non-CUDA code.
+		# This is useful if you need to use an older of GCC than comes by default
+		set(CUDA_USE_CUSTOM_COMPILER FALSE CACHE BOOL "Use Custom Compiler")
+		set(CUDA_CUSTOM_COMPILER "" CACHE STRING "Custom C++ Compiler for CUDA If Needed")
+
+		# Shows register usage, etc
+		set(CUDA_VERBOSE_PTX TRUE CACHE BOOL "Show Verbose Kernel Info During Compilation")
+
+
+		# Let's get going...
+		find_package("CUDA" REQUIRED)
+
+		# Frequently used in the examples
+		cuda_include_directories(${CUDA_SDK_ROOT_DIR}/common/inc)
+		cuda_include_directories(${CUDA_SDK_ROOT_DIR}/../shared/inc)
+
+		set(CUDA_SDK_LIB_DIR ${CUDA_SDK_ROOT_DIR}/common/lib
+				${CUDA_SDK_ROOT_DIR}/lib ${CUDA_SDK_ROOT_DIR}/../shared/lib)
+
+		# these are no longer needed
+		#		# Find path to shrutil libs, from CUDA SDK
+		#		find_library(LIBSHRUTIL
+		#				NAMES shrUtils${CMAKE_ARCH_BITNESS} shrutil_${CMAKE_SYSTEM_PROCESSOR}
+		#				PATHS ${CUDA_SDK_LIB_DIR})
+		#		find_library(LIBSHRUTIL_DBG
+		#				NAMES shrUtils${CMAKE_ARCH_BITNESS}D shrutil_${CMAKE_SYSTEM_PROCESSOR}D
+		#				PATHS ${CUDA_SDK_LIB_DIR})
+		#
+		#		# Find path to cutil libs, from CUDA SDK
+		#		find_library(LIBCUTIL
+		#				NAMES cutil${CMAKE_ARCH_BITNESS} cutil_${CMAKE_SYSTEM_PROCESSOR}
+		#				PATHS ${CUDA_SDK_LIB_DIR})
+		#		find_library(LIBCUTIL_DBG
+		#				NAMES cutil${arch}D cutil_${CMAKE_SYSTEM_PROCESSOR}D
+		#				PATHS ${CUDA_SDK_LIB_DIR})
+
+		# Set custom compiler flags
+		set(CUDA_NVCC_FLAGS "" CACHE STRING "" FORCE)
+
+		if(CUDA_USE_CUSTOM_COMPILER)
+				mark_as_advanced(CLEAR CUDA_CUSTOM_COMPILER)
+				list(APPEND CUDA_NVCC_FLAGS "-ccbin=${CUDA_CUSTOM_COMPILER}")
+		else()
+				mark_as_advanced(FORCE CUDA_CUSTOM_COMPILER)
+		endif()
+
+		# Macro for setting the Compute Capability
+		macro(set_compute_capability cc)
+				list(APPEND CUDA_NVCC_FLAGS "-gencode=arch=compute_${cc},code=sm_${cc}")
+				list(APPEND CUDA_NVCC_FLAGS "-gencode=arch=compute_${cc},code=compute_${cc}")
+		endmacro(set_compute_capability)
+
+		# Tell nvcc to compile for the selected Compute Capability
+		# This can also be called from the main CMakeLists.txt to enable
+		# support for additional CCs
+		set_compute_capability(${CUDA_DEVICE_VERSION})
+
+		# Enable fast-math if selected
+		if(CUDA_FAST_MATH)
+				list(APPEND CUDA_NVCC_FLAGS "-use_fast_math")
+		endif()
+
+		# Enable verbose compile if selected
+		if(CUDA_VERBOSE_PTX)
+				list(APPEND CUDA_NVCC_FLAGS "--ptxas-options=-v")
+		endif()
+endmacro(enable_cuda_support)
+# Done configuring CUDA options
diff --git a/cmake/SBELUtils.cmake b/cmake/SBELUtils.cmake
new file mode 100644
index 0000000..4df6ea1
--- /dev/null
+++ b/cmake/SBELUtils.cmake
@@ -0,0 +1,166 @@
+####################################################
+##   Only modify if you know what you're doing.   ##
+####################################################
+
+
+# Helps Eclipse/CDT find our include directories
+set(CMAKE_VERBOSE_MAKEFILE on)
+
+# Detect the bitness of our machine (eg 32- or 64-bit)
+# C-equiv: sizeof(void*)
+# Alt: 8*sizeof(void*)
+math(EXPR CMAKE_ARCH_BITNESS 8*${CMAKE_SIZEOF_VOID_P})
+
+# For non-multi-configuration generators (eg, make, Eclipse)
+# The Visual Studio generator creates a single project with all these
+set(CMAKE_BUILD_TYPE "Release" CACHE STRING "For single-configuration generators (e.g. make) set the type of build: Release, Debug, RelWithDebugInfo, MinSizeRel")
+SET_PROPERTY(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Release" "Debug" "RelWithDebugInfo" "MinSizeRel")
+
+
+####################################################
+## ---------------------------------------------- ##
+## -                                            - ##
+## -            Enable MPI Support              - ##
+## -                                            - ##
+## ---------------------------------------------- ##
+####################################################
+
+# Begin configuring MPI options
+macro(enable_mpi_support)
+
+		find_package("MPI" REQUIRED)
+
+		# Add the MPI-specific compiler and linker flags
+		# Also, search for #includes in MPI's paths
+
+		list(APPEND CMAKE_C_COMPILE_FLAGS ${MPI_C_COMPILE_FLAGS})
+		list(APPEND CMAKE_C_LINK_FLAGS ${MPI_C_LINK_FLAGS})
+		include_directories(${MPI_C_INCLUDE_PATH})
+
+		list(APPEND CMAKE_CXX_COMPILE_FLAGS ${MPI_CXX_COMPILE_FLAGS})
+		list(APPEND CMAKE_CXX_LINK_FLAGS ${MPI_CXX_LINK_FLAGS})
+		include_directories(${MPI_CXX_INCLUDE_PATH})
+
+endmacro(enable_mpi_support)
+# Done configuring MPI Options
+
+
+####################################################
+## ---------------------------------------------- ##
+## -                                            - ##
+## -            Enable OpenMP Support           - ##
+## -                                            - ##
+## ---------------------------------------------- ##
+####################################################
+
+# Begin configuring OpenMP options
+macro(enable_openmp_support)
+
+		find_package("OpenMP" REQUIRED)
+
+		# Add the OpenMP-specific compiler and linker flags
+		list(APPEND CMAKE_CXX_FLAGS ${OpenMP_CXX_FLAGS})
+		list(APPEND CMAKE_C_FLAGS ${OpenMP_C_FLAGS})
+
+endmacro(enable_openmp_support)
+# Done configuring OpenMP Options
+
+
+####################################################
+## ---------------------------------------------- ##
+## -                                            - ##
+## -            Enable CUDA Support             - ##
+## -                                            - ##
+## ---------------------------------------------- ##
+####################################################
+
+# Begin configuring CUDA options
+# This is ugly...
+macro(enable_cuda_support)
+
+		# Hide a number of options from the default CMake screen
+		mark_as_advanced(CLEAR CUDA_BUILD_CUBIN)
+		mark_as_advanced(CLEAR CUDA_SDK_ROOT_DIR)
+		mark_as_advanced(CLEAR CUDA_TOOLKIT_ROOT_DIR)
+		mark_as_advanced(CLEAR CUDA_VERBOSE_BUILD)
+		mark_as_advanced(CLEAR CUDA_FAST_MATH)
+		mark_as_advanced(CLEAR CUDA_USE_CUSTOM_COMPILER)
+		mark_as_advanced(CLEAR CUDA_VERBOSE_PTX)
+		mark_as_advanced(CLEAR CUDA_DEVICE_VERSION)
+
+		# select Compute Capability
+		# This needs to be manually updated when devices with new CCs come out
+		set(CUDA_DEVICE_VERSION "20" CACHE STRING "CUDA Device Version")
+		set_property(CACHE CUDA_DEVICE_VERSION PROPERTY STRINGS "10" "11" "12" "13"	"20" "21" "30" "35")
+
+		# Enable fast-math for CUDA (_not_ GCC)
+		set(CUDA_FAST_MATH TRUE CACHE BOOL "Use Fast Math Operations")
+
+		# Tell nvcc to use a separate compiler for non-CUDA code.
+		# This is useful if you need to use an older of GCC than comes by default
+		set(CUDA_USE_CUSTOM_COMPILER FALSE CACHE BOOL "Use Custom Compiler")
+		set(CUDA_CUSTOM_COMPILER "" CACHE STRING "Custom C++ Compiler for CUDA If Needed")
+
+		# Shows register usage, etc
+		set(CUDA_VERBOSE_PTX TRUE CACHE BOOL "Show Verbose Kernel Info During Compilation")
+
+
+		# Let's get going...
+		find_package("CUDA" REQUIRED)
+
+		# Frequently used in the examples
+		cuda_include_directories(${CUDA_SDK_ROOT_DIR}/common/inc)
+		cuda_include_directories(${CUDA_SDK_ROOT_DIR}/../shared/inc)
+
+		set(CUDA_SDK_LIB_DIR ${CUDA_SDK_ROOT_DIR}/common/lib
+				${CUDA_SDK_ROOT_DIR}/lib ${CUDA_SDK_ROOT_DIR}/../shared/lib)
+
+		# these are no longer needed
+		#		# Find path to shrutil libs, from CUDA SDK
+		#		find_library(LIBSHRUTIL
+		#				NAMES shrUtils${CMAKE_ARCH_BITNESS} shrutil_${CMAKE_SYSTEM_PROCESSOR}
+		#				PATHS ${CUDA_SDK_LIB_DIR})
+		#		find_library(LIBSHRUTIL_DBG
+		#				NAMES shrUtils${CMAKE_ARCH_BITNESS}D shrutil_${CMAKE_SYSTEM_PROCESSOR}D
+		#				PATHS ${CUDA_SDK_LIB_DIR})
+		#
+		#		# Find path to cutil libs, from CUDA SDK
+		#		find_library(LIBCUTIL
+		#				NAMES cutil${CMAKE_ARCH_BITNESS} cutil_${CMAKE_SYSTEM_PROCESSOR}
+		#				PATHS ${CUDA_SDK_LIB_DIR})
+		#		find_library(LIBCUTIL_DBG
+		#				NAMES cutil${arch}D cutil_${CMAKE_SYSTEM_PROCESSOR}D
+		#				PATHS ${CUDA_SDK_LIB_DIR})
+
+		# Set custom compiler flags
+		set(CUDA_NVCC_FLAGS "" CACHE STRING "" FORCE)
+
+		if(CUDA_USE_CUSTOM_COMPILER)
+				mark_as_advanced(CLEAR CUDA_CUSTOM_COMPILER)
+				list(APPEND CUDA_NVCC_FLAGS "-ccbin=${CUDA_CUSTOM_COMPILER}")
+		else()
+				mark_as_advanced(FORCE CUDA_CUSTOM_COMPILER)
+		endif()
+
+		# Macro for setting the Compute Capability
+		macro(set_compute_capability cc)
+				list(APPEND CUDA_NVCC_FLAGS "-gencode=arch=compute_${cc},code=sm_${cc}")
+				list(APPEND CUDA_NVCC_FLAGS "-gencode=arch=compute_${cc},code=compute_${cc}")
+		endmacro(set_compute_capability)
+
+		# Tell nvcc to compile for the selected Compute Capability
+		# This can also be called from the main CMakeLists.txt to enable
+		# support for additional CCs
+		set_compute_capability(${CUDA_DEVICE_VERSION})
+
+		# Enable fast-math if selected
+		if(CUDA_FAST_MATH)
+				list(APPEND CUDA_NVCC_FLAGS "-use_fast_math")
+		endif()
+
+		# Enable verbose compile if selected
+		if(CUDA_VERBOSE_PTX)
+				list(APPEND CUDA_NVCC_FLAGS "--ptxas-options=-v")
+		endif()
+endmacro(enable_cuda_support)
+# Done configuring CUDA options
diff --git a/driver_um.cu b/driver_um.cu
new file mode 100644
index 0000000..30db9ea
--- /dev/null
+++ b/driver_um.cu
@@ -0,0 +1,80 @@
+#include <algorithm>
+#include <fstream>
+#include <cmath>
+#include <string>
+
+extern "C" {
+#include "mm_io/mm_io.h"
+}
+
+#include <rcm/rcm_um.h>
+#include <rcm/common.h>
+
+typedef typename rcm::ManagedVector<int>     IntVector;
+typedef typename rcm::ManagedVector<double>  DoubleVector;
+
+int main(int argc, char **argv)
+{
+  if (argc < 3) {
+    std::cout << "Usage:\n  driver input_file output_file" << std::endl;
+    return 1;
+  }
+
+  // Read matrix from file (COO format)
+  MM_typecode matcode;
+  int M, N, nnz;
+  int* row_i;
+  int* col_j;
+  double* vals;
+
+  std::cout << "Read MTX file... ";
+  int err = mm_read_mtx_crd(argv[1], &M, &N, &nnz, &row_i, &col_j, &vals, &matcode);
+  if (err != 0) {
+    std::cout << "error: " << err << std::endl;
+    return 1;
+  }
+  std::cout << "M = " << M << " N = " << N << " nnz = " << nnz << std::endl;
+
+  // Switch to 0-based indices
+  for (int i = 0; i < nnz; i++) {
+    row_i[i]--;
+    col_j[i]--;
+  }
+  // Convert to CSR format and load into thrust vectors.
+  IntVector    row_offsets(N + 1);
+  IntVector    column_indices(nnz);
+  DoubleVector values(nnz);
+
+  std::cout << "Convert to CSR" << std::endl;
+  rcm::coo2csr(M, N, nnz, row_i, col_j, vals, row_offsets, column_indices, values);
+
+  // Print thrust vectors
+  /*
+  std::cout << "Row offsets\n";
+  thrust::copy(row_offsets.begin(), row_offsets.end(), std::ostream_iterator<int>(std::cout, "\n"));
+  std::cout << "Column indices\n";
+  thrust::copy(column_indices.begin(), column_indices.end(), std::ostream_iterator<int>(std::cout, "\n"));
+  std::cout << "Values\n";
+  thrust::copy(values.begin(), values.end(), std::ostream_iterator<double>(std::cout, "\n"));
+  */
+
+  // Run the RCM algorithm
+  rcm::RCM_UM algo(row_offsets, column_indices, values);
+
+  std::cout << "Run RCM... ";
+  try {
+    algo.execute();
+  } catch (const rcm::system_error& se) {
+    std::cout << "error " << se.reason() << std::endl;
+    return 1;
+  }
+  std::cout << "success" << std::endl;
+
+  std::cout << "Write output file " << argv[2] << std::endl;
+  std::ofstream fout;
+  fout.open(argv[2]);
+  //// algo.print(fout);
+  fout.close();
+
+  return 0;
+}
diff --git a/mm_io/mm_io.c b/mm_io/mm_io.c
new file mode 100644
index 0000000..6999991
--- /dev/null
+++ b/mm_io/mm_io.c
@@ -0,0 +1,844 @@
+# include <stdio.h>
+# include <string.h>
+# include <stdlib.h>
+# include <ctype.h>
+# include <time.h>
+
+# include "mm_io.h"
+
+/******************************************************************************/
+
+int mm_is_valid ( MM_typecode matcode )
+
+/******************************************************************************/
+/*
+  Purpose:
+
+    MM_IS_VALID checks whether the MM header information is valid.
+
+  Modified:
+
+    31 October 2008
+
+  Parameters:
+
+    Input, MM_typecode MATCODE, the header information.
+
+    Output, int MM_IS_VALID, is TRUE if the matrix code is valid.
+*/
+{
+  if ( !mm_is_matrix ( matcode ) ) 
+  {
+    return 0;
+  }
+
+  if ( mm_is_dense ( matcode ) && mm_is_pattern ( matcode ) ) 
+  {
+    return 0;
+  }
+
+  if ( mm_is_real ( matcode ) && mm_is_hermitian ( matcode ) ) 
+  {
+    return 0;
+  }
+
+  if ( mm_is_pattern ( matcode ) && 
+      ( mm_is_hermitian ( matcode ) || mm_is_skew ( matcode ) ) ) 
+  {
+    return 0;
+  }
+  return 1;
+}
+/******************************************************************************/
+
+int mm_read_banner ( FILE *f, MM_typecode *matcode )
+
+/******************************************************************************/
+/*
+  Purpose:
+
+    MM_READ_BANNER reads the header line of an MM file.
+
+  Modified:
+
+    31 October 2008
+
+  Parameters:
+
+    Input, FILE *F, a pointer to the input file.
+
+    Output, MM_typecode *MATCODE, the header information.
+*/
+{
+  char line[MM_MAX_LINE_LENGTH];
+  char banner[MM_MAX_TOKEN_LENGTH];
+  char mtx[MM_MAX_TOKEN_LENGTH]; 
+  char crd[MM_MAX_TOKEN_LENGTH];
+  char data_type[MM_MAX_TOKEN_LENGTH];
+  char storage_scheme[MM_MAX_TOKEN_LENGTH];
+  char *p;
+
+  mm_clear_typecode ( matcode );  
+
+  if ( fgets ( line, MM_MAX_LINE_LENGTH, f ) == NULL ) 
+  {
+    return MM_PREMATURE_EOF;
+  }
+
+  if ( sscanf ( line, "%s %s %s %s %s", banner, mtx, crd, data_type, 
+    storage_scheme ) != 5 )
+  {
+    return MM_PREMATURE_EOF;
+  }
+
+  for (p=mtx; *p!='\0'; *p=tolower(*p),p++);  /* convert to lower case */
+  for (p=crd; *p!='\0'; *p=tolower(*p),p++);  
+  for (p=data_type; *p!='\0'; *p=tolower(*p),p++);
+  for (p=storage_scheme; *p!='\0'; *p=tolower(*p),p++);
+/* 
+  check for banner 
+*/
+  if (strncmp(banner, MatrixMarketBanner, strlen(MatrixMarketBanner)) != 0)
+        return MM_NO_HEADER;
+
+/* 
+  first field should be "mtx" 
+*/
+  if (strcmp(mtx, MM_MTX_STR) != 0)
+        return  MM_UNSUPPORTED_TYPE;
+  mm_set_matrix(matcode);
+
+/* 
+  second field describes whether this is a sparse matrix (in coordinate
+  storgae) or a dense array 
+*/
+  if (strcmp(crd, MM_SPARSE_STR) == 0)
+        mm_set_sparse(matcode);
+  else
+    if (strcmp(crd, MM_DENSE_STR) == 0)
+            mm_set_dense(matcode);
+    else
+        return MM_UNSUPPORTED_TYPE;
+    
+/*
+  third field 
+*/
+  if (strcmp(data_type, MM_REAL_STR) == 0)
+  {
+    mm_set_real(matcode);
+  }
+  else if (strcmp(data_type, MM_COMPLEX_STR) == 0)
+  {
+    mm_set_complex(matcode);
+  }
+  else if (strcmp(data_type, MM_PATTERN_STR) == 0)
+  {
+    mm_set_pattern(matcode);
+  }
+  else if (strcmp(data_type, MM_INT_STR) == 0)
+  {
+    mm_set_integer(matcode);
+  }
+  else
+  {
+    return MM_UNSUPPORTED_TYPE;
+  }    
+/*
+  fourth field 
+*/
+
+    if (strcmp(storage_scheme, MM_GENERAL_STR) == 0)
+        mm_set_general(matcode);
+    else
+    if (strcmp(storage_scheme, MM_SYMM_STR) == 0)
+        mm_set_symmetric(matcode);
+    else
+    if (strcmp(storage_scheme, MM_HERM_STR) == 0)
+        mm_set_hermitian(matcode);
+    else
+    if (strcmp(storage_scheme, MM_SKEW_STR) == 0)
+        mm_set_skew(matcode);
+    else
+        return MM_UNSUPPORTED_TYPE;
+        
+
+    return 0;
+}
+/******************************************************************************/
+
+int mm_read_mtx_array_size ( FILE *f, int *M, int *N )
+
+/******************************************************************************/
+/*
+  Purpose:
+
+    MM_READ_MTX_ARRAY_SIZE reads the size line of an MM array file.
+
+  Modified:
+
+    03 November 2008
+
+  Parameters:
+
+    Input, FILE *F, a pointer to the input file.
+
+    Output, int *M, the number of rows, as read from the file.
+
+    Output, int *N, the number of columns, as read from the file.
+
+    Output, MM_READ_MTX_ARRAY_SIZE, an error flag.
+    0, no error.
+*/
+{
+  char line[MM_MAX_LINE_LENGTH];
+  int num_items_read;
+/* 
+  set return null parameter values, in case we exit with errors 
+*/
+  *M = 0;
+  *N = 0;
+/* 
+  now continue scanning until you reach the end-of-comments 
+*/
+  do 
+  {
+    if ( fgets ( line, MM_MAX_LINE_LENGTH, f ) == NULL ) 
+    {
+      return MM_PREMATURE_EOF;
+    }
+  } while ( line[0] == '%' );
+/* 
+  line[] is either blank or has M,N, nz 
+*/
+  if ( sscanf ( line, "%d %d", M, N ) == 2 )
+  {
+    return 0;
+  }
+  else
+  {
+    do
+    { 
+      num_items_read = fscanf ( f, "%d %d", M, N ); 
+      if ( num_items_read == EOF ) 
+      {
+        return MM_PREMATURE_EOF;
+      }
+    }
+    while ( num_items_read != 2 );
+  }
+  return 0;
+}
+/******************************************************************************/
+
+int mm_read_mtx_crd(char *fname, int *M, int *N, int *nz, int **I, int **J, 
+        double **val, MM_typecode *matcode)
+
+/******************************************************************************/
+/*
+  Purpose:
+
+    MM_READ_MTX_CRD reads the values in an MM coordinate file.
+
+  Discussion:
+
+    This function allocates the storage for the arrays.
+
+    mm_read_mtx_crd()  fills M, N, nz, array of values, and return
+                        type code, e.g. 'MCRS'
+
+                        if matrix is complex, values[] is of size 2*nz,
+                            (nz pairs of real/imaginary values)
+
+  Modified:
+
+    31 October 2008
+
+  Parameters:
+
+*/
+{
+  int ret_code;
+  FILE *f;
+
+    if (strcmp(fname, "stdin") == 0) f=stdin;
+    else
+    if ((f = fopen(fname, "r")) == NULL)
+        return MM_COULD_NOT_READ_FILE;
+
+
+    if ((ret_code = mm_read_banner(f, matcode)) != 0)
+        return ret_code;
+
+    if (!(mm_is_valid(*matcode) && mm_is_sparse(*matcode) && 
+            mm_is_matrix(*matcode)))
+        return MM_UNSUPPORTED_TYPE;
+
+    if ((ret_code = mm_read_mtx_crd_size(f, M, N, nz)) != 0)
+        return ret_code;
+
+
+    *I = (int *)  malloc(*nz * sizeof(int));
+    *J = (int *)  malloc(*nz * sizeof(int));
+    *val = NULL;
+
+    if (mm_is_complex(*matcode))
+    {
+        *val = (double *) malloc(*nz * 2 * sizeof(double));
+        ret_code = mm_read_mtx_crd_data(f, *M, *N, *nz, *I, *J, *val, 
+                *matcode);
+        if (ret_code != 0) return ret_code;
+    }
+    else if (mm_is_real(*matcode))
+    {
+        *val = (double *) malloc(*nz * sizeof(double));
+        ret_code = mm_read_mtx_crd_data(f, *M, *N, *nz, *I, *J, *val, 
+                *matcode);
+        if (ret_code != 0) return ret_code;
+    }
+
+    else if (mm_is_pattern(*matcode))
+    {
+        ret_code = mm_read_mtx_crd_data(f, *M, *N, *nz, *I, *J, *val, 
+                *matcode);
+        if (ret_code != 0) return ret_code;
+    }
+
+    if (f != stdin) fclose(f);
+    return 0;
+}
+/******************************************************************************/
+
+int mm_read_mtx_crd_data(FILE *f, int M, int N, int nz, int I[], int J[],
+        double val[], MM_typecode matcode)
+
+/******************************************************************************/
+/*
+  Purpose:
+
+    MM_READ_MTX_CRD_DATA reads the values in an MM coordinate file.
+
+  Discussion:
+
+    This function assumes the array storage has already been allocated.
+
+  Modified:
+
+    31 October 2008
+
+  Parameters:
+
+    Input, FILE *F, a pointer to the input file.
+*/
+{
+  int i;
+
+    if (mm_is_complex(matcode))
+    {
+        for (i=0; i<nz; i++)
+            if (fscanf(f, "%d %d %lg %lg", &I[i], &J[i], &val[2*i], &val[2*i+1])
+                != 4) return MM_PREMATURE_EOF;
+    }
+    else if (mm_is_real(matcode))
+    {
+        for (i=0; i<nz; i++)
+        {
+            if (fscanf(f, "%d %d %lg\n", &I[i], &J[i], &val[i])
+                != 3) return MM_PREMATURE_EOF;
+
+        }
+    }
+
+    else if (mm_is_pattern(matcode))
+    {
+        for (i=0; i<nz; i++)
+            if (fscanf(f, "%d %d", &I[i], &J[i])
+                != 2) return MM_PREMATURE_EOF;
+    }
+    else
+        return MM_UNSUPPORTED_TYPE;
+
+    return 0;
+        
+}
+/******************************************************************************/
+
+int mm_read_mtx_crd_entry(FILE *f, int *I, int *J,
+        double *real, double *imag, MM_typecode matcode)
+
+/******************************************************************************/
+/*
+  Purpose:
+
+    MM_READ_MTX_CRD_ENTRY ???
+
+  Modified:
+
+    31 October 2008
+
+  Parameters:
+
+    Input, FILE *F, a pointer to the input file.
+*/
+{
+    if (mm_is_complex(matcode))
+    {
+            if (fscanf(f, "%d %d %lg %lg", I, J, real, imag)
+                != 4) return MM_PREMATURE_EOF;
+    }
+    else if (mm_is_real(matcode))
+    {
+            if (fscanf(f, "%d %d %lg\n", I, J, real)
+                != 3) return MM_PREMATURE_EOF;
+
+    }
+
+    else if (mm_is_pattern(matcode))
+    {
+            if (fscanf(f, "%d %d", I, J) != 2) return MM_PREMATURE_EOF;
+    }
+    else
+        return MM_UNSUPPORTED_TYPE;
+
+    return 0;
+        
+}
+/******************************************************************************/
+
+int mm_read_mtx_crd_size(FILE *f, int *M, int *N, int *nz )
+
+/******************************************************************************/
+/*
+  Purpose:
+
+    MM_READ_MTX_CRD_SIZE reads the size line of an MM coordinate file.
+
+  Modified:
+
+    03 November 2008
+
+  Parameters:
+
+    Input, FILE *F, a pointer to the input file.
+
+    Output, int *M, the number of rows, as read from the file.
+
+    Output, int *N, the number of columns, as read from the file.
+
+    Output, int *NZ, the number of nonzero values, as read from the file.
+
+    Output, MM_READ_MTX_CRF_SIZE, an error flag.
+    0, no error.
+*/
+{
+  char line[MM_MAX_LINE_LENGTH];
+  int num_items_read;
+/* 
+  set return null parameter values, in case we exit with errors 
+*/
+  *M = 0;
+  *N = 0;
+  *nz = 0;
+/* 
+  now continue scanning until you reach the end-of-comments 
+*/
+  do 
+  {
+    if (fgets(line,MM_MAX_LINE_LENGTH,f) == NULL) 
+    {
+      return MM_PREMATURE_EOF;
+    }
+  } while (line[0] == '%');
+/* 
+  line[] is either blank or has M,N, nz 
+*/
+  if (sscanf(line, "%d %d %d", M, N, nz) == 3)
+  {
+    return 0;
+  }
+  else
+  {
+    do
+    { 
+      num_items_read = fscanf(f, "%d %d %d", M, N, nz); 
+      if (num_items_read == EOF) 
+      {
+        return MM_PREMATURE_EOF;
+      }
+    } while (num_items_read != 3);
+  }
+  return 0;
+}
+/******************************************************************************/
+
+int mm_read_unsymmetric_sparse(const char *fname, int *M_, int *N_, int *nz_,
+                double **val_, int **I_, int **J_)
+
+/******************************************************************************/
+/*
+  Purpose:
+
+    MM_READ_UNSYMMETRIC_SPARSE ???
+
+  Modified:
+
+    31 October 2008
+*/
+{
+    FILE *f;
+    MM_typecode matcode;
+    int M, N, nz;
+    int i;
+    double *val;
+    int *I;
+  int *J;
+ 
+    if ((f = fopen(fname, "r")) == NULL)
+            return -1;
+ 
+ 
+    if (mm_read_banner(f, &matcode) != 0)
+    {
+        printf("mm_read_unsymetric: Could not process Matrix Market banner ");
+        printf(" in file [%s]\n", fname);
+        return -1;
+    }
+ 
+ 
+ 
+    if ( !(mm_is_real(matcode) && mm_is_matrix(matcode) &&
+            mm_is_sparse(matcode)))
+    {
+        fprintf(stderr, "Sorry, this application does not support ");
+        fprintf(stderr, "Market Market type: [%s]\n",
+                mm_typecode_to_str(matcode));
+        return -1;
+    }
+ 
+/* 
+  find out size of sparse matrix: M, N, nz .... 
+*/
+ 
+    if (mm_read_mtx_crd_size(f, &M, &N, &nz) !=0)
+    {
+        fprintf(stderr, "read_unsymmetric_sparse(): could not parse matrix size.\n");
+        return -1;
+    }
+ 
+    *M_ = M;
+    *N_ = N;
+    *nz_ = nz;
+/* 
+  reseve memory for matrices 
+*/ 
+    I = (int *) malloc(nz * sizeof(int));
+    J = (int *) malloc(nz * sizeof(int));
+    val = (double *) malloc(nz * sizeof(double));
+ 
+    *val_ = val;
+    *I_ = I;
+    *J_ = J;
+/* 
+  NOTE: when reading in doubles, ANSI C requires the use of the "l"
+  specifier as in "%lg", "%lf", "%le", otherwise errors will occur
+  (ANSI C X3.159-1989, Sec. 4.9.6.2, p. 136 lines 13-15) 
+*/
+ 
+    for (i=0; i<nz; i++)
+    {
+        fscanf(f, "%d %d %lg\n", &I[i], &J[i], &val[i]);
+/*
+  Adjust from 1-based to 0-based.
+*/
+        I[i]--;
+        J[i]--;
+    }
+    fclose(f);
+ 
+    return 0;
+}
+/******************************************************************************/
+
+char *mm_strdup ( const char *s )
+
+/******************************************************************************/
+/*
+  Purpose:
+
+    MM_STRDUP creates a new copy of a string.
+
+  Discussion:
+
+    This is a common routine, but not part of ANSI C, so it is included here.  
+    It is used by mm_typecode_to_str().
+
+  Modified:
+
+    31 October 2008
+*/
+{
+  int len = strlen ( s );
+  char *s2 = (char *) malloc((len+1)*sizeof(char));
+  return strcpy(s2, s);
+}
+/******************************************************************************/
+
+char *mm_typecode_to_str ( MM_typecode matcode )
+
+/******************************************************************************/
+/*
+  Purpose:
+
+    MM_TYPECODE_TO_STR converts the internal typecode to an MM header string.
+
+  Modified:
+
+    31 October 2008
+*/
+{
+    char buffer[MM_MAX_LINE_LENGTH];
+    char *types[4];
+	char *mm_strdup(const char *);
+    int error =0;
+
+/* 
+  check for MTX type 
+*/
+    if (mm_is_matrix(matcode)) 
+        types[0] = MM_MTX_STR;
+    else
+        error=1;
+
+/* 
+  check for CRD or ARR matrix 
+*/
+    if (mm_is_sparse(matcode))
+        types[1] = MM_SPARSE_STR;
+    else
+    if (mm_is_dense(matcode))
+        types[1] = MM_DENSE_STR;
+    else
+        return NULL;
+
+/* 
+  check for element data type 
+*/
+    if (mm_is_real(matcode))
+        types[2] = MM_REAL_STR;
+    else
+    if (mm_is_complex(matcode))
+        types[2] = MM_COMPLEX_STR;
+    else
+    if (mm_is_pattern(matcode))
+        types[2] = MM_PATTERN_STR;
+    else
+    if (mm_is_integer(matcode))
+        types[2] = MM_INT_STR;
+    else
+        return NULL;
+/* 
+  check for symmetry type 
+*/
+    if (mm_is_general(matcode))
+        types[3] = MM_GENERAL_STR;
+    else
+    if (mm_is_symmetric(matcode))
+        types[3] = MM_SYMM_STR;
+    else 
+    if (mm_is_hermitian(matcode))
+        types[3] = MM_HERM_STR;
+    else 
+    if (mm_is_skew(matcode))
+        types[3] = MM_SKEW_STR;
+    else
+        return NULL;
+
+    sprintf(buffer,"%s %s %s %s", types[0], types[1], types[2], types[3]);
+    return mm_strdup(buffer);
+
+}
+/******************************************************************************/
+
+int mm_write_banner ( FILE *f, MM_typecode matcode )
+
+/******************************************************************************/
+/*
+  Purpose:
+
+    MM_WRITE_BANNER writes the header line of an MM file.
+
+  Modified:
+
+    31 October 2008
+
+  Parameters:
+
+    Input, FILE *F, a pointer to the output file.
+*/
+{
+    char *str = mm_typecode_to_str(matcode);
+    int ret_code;
+
+    ret_code = fprintf(f, "%s %s\n", MatrixMarketBanner, str);
+    free(str);
+    if (ret_code !=2 )
+        return MM_COULD_NOT_WRITE_FILE;
+    else
+        return 0;
+}
+/******************************************************************************/
+
+int mm_write_mtx_array_size(FILE *f, int M, int N)
+
+/******************************************************************************/
+/*
+  Purpose:
+
+    MM_WRITE_MTX_ARRAY_SIZE writes the size line of an MM array file.
+
+  Modified:
+
+    31 October 2008
+*/
+{
+    if (fprintf(f, "%d %d\n", M, N) != 2)
+        return MM_COULD_NOT_WRITE_FILE;
+    else 
+        return 0;
+}
+/******************************************************************************/
+
+int mm_write_mtx_crd(char fname[], int M, int N, int nz, int I[], int J[],
+        double val[], MM_typecode matcode)
+
+/******************************************************************************/
+/*
+  Purpose:
+
+    MM_WRITE_MTX_CRD writes an MM coordinate file.
+
+  Modified:
+
+    31 October 2008
+*/
+{
+    FILE *f;
+    int i;
+
+    if (strcmp(fname, "stdout") == 0) 
+        f = stdout;
+    else
+    if ((f = fopen(fname, "w")) == NULL)
+        return MM_COULD_NOT_WRITE_FILE;
+/*
+  print banner followed by typecode.
+*/
+    fprintf(f, "%s ", MatrixMarketBanner);
+    fprintf(f, "%s\n", mm_typecode_to_str(matcode));
+
+/*
+  print matrix sizes and nonzeros.
+*/
+    fprintf(f, "%d %d %d\n", M, N, nz);
+/* 
+  print values.
+*/
+    if (mm_is_pattern(matcode))
+        for (i=0; i<nz; i++)
+            fprintf(f, "%d %d\n", I[i], J[i]);
+    else
+    if (mm_is_real(matcode))
+        for (i=0; i<nz; i++)
+            fprintf(f, "%d %d %20.16g\n", I[i], J[i], val[i]);
+    else
+    if (mm_is_complex(matcode))
+        for (i=0; i<nz; i++)
+            fprintf(f, "%d %d %20.16g %20.16g\n", I[i], J[i], val[2*i], 
+                        val[2*i+1]);
+    else
+    {
+        if (f != stdout) fclose(f);
+        return MM_UNSUPPORTED_TYPE;
+    }
+
+  if ( f !=stdout ) 
+  {
+    fclose(f);
+  }
+  return 0;
+}
+/******************************************************************************/
+
+int mm_write_mtx_crd_size ( FILE *f, int M, int N, int nz )
+
+/******************************************************************************/
+/*
+  Purpose:
+
+    MM_WRITE_MTX_CRD_SIZE writes the size line of an MM coordinate file. 
+
+  Modified:
+
+    31 October 2008
+
+  Parameters:
+
+    Input, FILE *F, a pointer to the output file.
+*/
+{
+  if ( fprintf ( f, "%d %d %d\n", M, N, nz ) != 3 )
+  {
+    return MM_COULD_NOT_WRITE_FILE;
+  }
+  else 
+  {
+    return 0;
+  }
+}
+/******************************************************************************/
+
+void timestamp ( void )
+
+/******************************************************************************/
+/*
+  Purpose:
+
+    TIMESTAMP prints the current YMDHMS date as a time stamp.
+
+  Example:
+
+    31 May 2001 09:45:54 AM
+
+  Licensing:
+
+    This code is distributed under the GNU LGPL license. 
+
+  Modified:
+
+    24 September 2003
+
+  Author:
+
+    John Burkardt
+
+  Parameters:
+
+    None
+*/
+{
+# define TIME_SIZE 40
+
+  static char time_buffer[TIME_SIZE];
+  const struct tm *tm;
+  size_t len;
+  time_t now;
+
+  now = time ( NULL );
+  tm = localtime ( &now );
+
+  len = strftime ( time_buffer, TIME_SIZE, "%d %B %Y %I:%M:%S %p", tm );
+
+  printf ( "%s\n", time_buffer );
+
+  return;
+# undef TIME_SIZE
+}
diff --git a/mm_io/mm_io.h b/mm_io/mm_io.h
new file mode 100644
index 0000000..963c7d6
--- /dev/null
+++ b/mm_io/mm_io.h
@@ -0,0 +1,126 @@
+/* 
+*   Matrix Market I/O library for ANSI C
+*
+*   See http://math.nist.gov/MatrixMarket for details.
+*
+*
+*/
+
+# ifndef MM_IO_H
+# define MM_IO_H
+
+# define MM_MAX_LINE_LENGTH 1025
+# define MatrixMarketBanner "%%MatrixMarket"
+# define MM_MAX_TOKEN_LENGTH 64
+
+typedef char MM_typecode[4];
+
+int mm_is_valid ( MM_typecode matcode );
+int mm_read_banner ( FILE *f, MM_typecode *matcode );
+int mm_read_mtx_array_size ( FILE *f, int *M, int *N );
+int mm_read_mtx_crd(char *fname, int *M, int *N, int *nz, int **I, int **J, 
+  double **val, MM_typecode *matcode);
+int mm_read_mtx_crd_data ( FILE *f, int M, int N, int nz, int I[], int J[],
+  double val[], MM_typecode matcode );
+int mm_read_mtx_crd_entry ( FILE *f, int *I, int *J, double *real, double *img,
+  MM_typecode matcode );
+int mm_read_mtx_crd_size ( FILE *f, int *M, int *N, int *nz );
+int mm_read_unsymmetric_sparse ( const char *fname, int *M_, int *N_, int *nz_,
+  double **val_, int **I_, int **J_ );
+char *mm_strdup ( const char *s );
+char *mm_typecode_to_str ( MM_typecode matcode );
+int mm_write_banner ( FILE *f, MM_typecode matcode );
+int mm_write_mtx_array_size ( FILE *f, int M, int N );
+int mm_write_mtx_crd ( char fname[], int M, int N, int nz, int I[], int J[],
+  double val[], MM_typecode matcode );
+int mm_write_mtx_crd_size ( FILE *f, int M, int N, int nz );
+void timestamp ( void );
+
+/********************* MM_typecode query functions ***************************/
+
+# define mm_is_matrix(typecode)	((typecode)[0]=='M')
+
+# define mm_is_sparse(typecode)	((typecode)[1]=='C')
+# define mm_is_coordinate(typecode)((typecode)[1]=='C')
+# define mm_is_dense(typecode)	((typecode)[1]=='A')
+# define mm_is_array(typecode)	((typecode)[1]=='A')
+
+# define mm_is_complex(typecode)	((typecode)[2]=='C')
+# define mm_is_real(typecode)		((typecode)[2]=='R')
+# define mm_is_pattern(typecode)	((typecode)[2]=='P')
+# define mm_is_integer(typecode) ((typecode)[2]=='I')
+
+# define mm_is_symmetric(typecode)((typecode)[3]=='S')
+# define mm_is_general(typecode)	((typecode)[3]=='G')
+# define mm_is_skew(typecode)	((typecode)[3]=='K')
+# define mm_is_hermitian(typecode)((typecode)[3]=='H')
+
+
+/********************* MM_typecode modify functions ***************************/
+
+# define mm_set_matrix(typecode)	((*typecode)[0]='M')
+# define mm_set_coordinate(typecode)	((*typecode)[1]='C')
+# define mm_set_array(typecode)	((*typecode)[1]='A')
+# define mm_set_dense(typecode)	mm_set_array(typecode)
+# define mm_set_sparse(typecode)	mm_set_coordinate(typecode)
+
+# define mm_set_complex(typecode)((*typecode)[2]='C')
+# define mm_set_real(typecode)	((*typecode)[2]='R')
+# define mm_set_pattern(typecode)((*typecode)[2]='P')
+# define mm_set_integer(typecode)((*typecode)[2]='I')
+
+
+# define mm_set_symmetric(typecode)((*typecode)[3]='S')
+# define mm_set_general(typecode)((*typecode)[3]='G')
+# define mm_set_skew(typecode)	((*typecode)[3]='K')
+# define mm_set_hermitian(typecode)((*typecode)[3]='H')
+
+# define mm_clear_typecode(typecode) ((*typecode)[0]=(*typecode)[1]= \
+									(*typecode)[2]=' ',(*typecode)[3]='G')
+
+# define mm_initialize_typecode(typecode) mm_clear_typecode(typecode)
+
+
+/********************* Matrix Market error codes ***************************/
+
+
+# define MM_COULD_NOT_READ_FILE	11
+# define MM_PREMATURE_EOF		12
+# define MM_NOT_MTX				13
+# define MM_NO_HEADER			14
+# define MM_UNSUPPORTED_TYPE		15
+# define MM_LINE_TOO_LONG		16
+# define MM_COULD_NOT_WRITE_FILE	17
+
+
+/******************** Matrix Market internal definitions ********************
+
+   MM_matrix_typecode: 4-character sequence
+
+				    ojbect 		sparse/   	data        storage 
+						  		dense     	type        scheme
+
+   string position:	 [0]        [1]			[2]         [3]
+
+   Matrix typecode:  M(atrix)  C(oord)		R(eal)   	G(eneral)
+						        A(array)	C(omplex)   H(ermitian)
+											P(attern)   S(ymmetric)
+								    		I(nteger)	K(kew)
+
+ ***********************************************************************/
+
+# define MM_MTX_STR		"matrix"
+# define MM_ARRAY_STR	"array"
+# define MM_DENSE_STR	"array"
+# define MM_COORDINATE_STR "coordinate" 
+# define MM_SPARSE_STR	"coordinate"
+# define MM_COMPLEX_STR	"complex"
+# define MM_REAL_STR		"real"
+# define MM_INT_STR		"integer"
+# define MM_GENERAL_STR  "general"
+# define MM_SYMM_STR		"symmetric"
+# define MM_HERM_STR		"hermitian"
+# define MM_SKEW_STR		"skew-symmetric"
+# define MM_PATTERN_STR  "pattern"
+
+# endif
diff --git a/rcm/common.h b/rcm/common.h
new file mode 100644
index 0000000..5fd130c
--- /dev/null
+++ b/rcm/common.h
@@ -0,0 +1,189 @@
+#ifndef COMMON_H
+#define COMMON_H
+
+
+
+#define ALWAYS_ASSERT
+
+#ifdef WIN32
+typedef long long int64_t;
+#endif
+
+
+
+// ----------------------------------------------------------------------------
+// If ALWAYS_ASSERT is defined, we make sure that  assertions are triggered 
+// even if NDEBUG is defined.
+// ----------------------------------------------------------------------------
+#ifdef ALWAYS_ASSERT
+// If NDEBUG is actually defined, remember this so
+// we can restore it.
+#  ifdef NDEBUG
+#    define NDEBUG_ACTIVE
+#    undef NDEBUG
+#  endif
+// Include the assert.h header file here so that it can
+// do its stuff while NDEBUG is guaranteed to be disabled.
+#  include <assert.h>
+// Restore NDEBUG mode if it was active.
+#  ifdef NDEBUG_ACTIVE
+#    define NDEBUG
+#    undef NDEBUG_ACTIVE
+#  endif
+#else
+// Include the assert.h header file using whatever the
+// current definition of NDEBUG is.
+#  include <assert.h>
+#endif
+
+# include <memory.h>
+# include <thrust/device_ptr.h>
+# include <thrust/system/cuda/execution_policy.h>
+
+
+// ----------------------------------------------------------------------------
+
+
+namespace rcm {
+
+const unsigned int BLOCK_SIZE = 512;
+
+const unsigned int MAX_GRID_DIMENSION = 32768;
+
+inline
+void kernelConfigAdjust(int &numThreads, int &numBlockX, const int numThreadsMax) {
+	if (numThreads > numThreadsMax) {
+		numBlockX = (numThreads + numThreadsMax - 1) / numThreadsMax;
+		numThreads = numThreadsMax;
+	}
+}
+
+inline
+void kernelConfigAdjust(int &numThreads, int &numBlockX, int &numBlockY, const int numThreadsMax, const int numBlockXMax) {
+	kernelConfigAdjust(numThreads, numBlockX, numThreadsMax);
+	if (numBlockX > numBlockXMax) {
+		numBlockY = (numBlockX + numBlockXMax - 1) / numBlockXMax;
+		numBlockX = numBlockXMax;
+	}
+}
+
+template <typename T>
+class ManagedVector {
+	T*     m_p_array;
+	size_t m_size;
+
+public:
+	typedef typename thrust::device_ptr<T> iterator;
+
+	ManagedVector(): m_p_array(0), m_size(0) {}
+	ManagedVector(size_t n): m_size(n) {
+		cudaMallocManaged(&m_p_array, sizeof(T) * n);
+	}
+	ManagedVector(size_t n, T val): m_size(n) {
+		cudaMallocManaged(&m_p_array, sizeof(T) * n);
+		thrust::fill(thrust::cuda::par, m_p_array, m_p_array + n, val);
+		cudaDeviceSynchronize();
+	}
+	ManagedVector(const ManagedVector &a): m_size(a.m_size) {
+		cudaMallocManaged(&m_p_array, sizeof(T) * a.m_size);
+		thrust::copy(thrust::cuda::par, a.m_p_array, a.m_p_array + a.m_size, m_p_array);
+		cudaDeviceSynchronize();
+	}
+	~ManagedVector() {cudaFree(m_p_array);}
+
+	ManagedVector& operator=(const ManagedVector &a) {
+		if (m_size < a.m_size) {
+			m_size = a.m_size;
+			cudaFree(m_p_array);
+			cudaMallocManaged(&m_p_array, sizeof(T) * a.m_size);
+			thrust::copy(thrust::cuda::par, a.m_p_array, a.m_p_array + a.m_size, m_p_array);
+			cudaDeviceSynchronize();
+		} else {
+			m_size = a.m_size;
+			thrust::copy(thrust::cuda::par, a.m_p_array, a.m_p_array + a.m_size, m_p_array);
+			cudaDeviceSynchronize();
+		}
+
+		return *this;
+	}
+
+	thrust::device_ptr<T> begin() const {return thrust::device_pointer_cast(&m_p_array[0]);}
+	thrust::device_ptr<T> end()   const {return thrust::device_pointer_cast(&m_p_array[m_size]);}
+
+	T& operator[](size_t n)    {return m_p_array[n];}
+	const T& operator[](size_t n)  const  {return m_p_array[n];}
+
+	size_t size() const {return m_size;}
+
+	void resize(size_t n)  {
+		if (m_size >= n) m_size = n;
+		else {
+			T *p_tmp;
+			cudaMallocManaged(&p_tmp, sizeof(T) * n);
+
+			if (m_size > 0) {
+				thrust::copy(thrust::cuda::par, m_p_array, m_p_array + m_size, p_tmp);
+				cudaDeviceSynchronize();
+			}
+
+			m_size = n;
+			cudaFree(m_p_array);
+
+			m_p_array = p_tmp;
+		}
+	}
+
+};
+
+// -----------------------------------------------------------------------------
+// Convert matrix from COO to CSR format
+// -----------------------------------------------------------------------------
+template <typename Ivec, typename Rvec>
+void coo2csr(const int    n_row,
+             const int    n_col,
+             const int    nnz,
+             const int    Ai[],
+             const int    Aj[],
+             const double Ax[],
+             Ivec&        Bp,
+             Ivec&        Bj,
+             Rvec&        Bx)
+{
+  //compute number of non-zero entries per row of A 
+  for (int i = 0; i <= n_row; i++)
+    Bp[i] = 0;
+
+  for (int i = 0; i < nnz; i++)
+    Bp[Ai[i]]++;
+
+  //cumsum the nnz per row to get Bp[]
+  for(int i = 0, cumsum = 0; i < n_row; i++) {
+    int temp = Bp[i];
+    Bp[i] = cumsum;
+    cumsum += temp;
+  }
+  Bp[n_row] = nnz; 
+
+  //write Aj,Ax into Bj,Bx
+  for(int i = 0; i < nnz; i++) {
+    int row  = Ai[i];
+    int dest = Bp[row];
+
+    Bj[dest] = Aj[i];
+    Bx[dest] = Ax[i];
+
+    Bp[row]++;
+  }
+
+  for(int i = 0, last = 0; i <= n_row; i++) {
+    int temp = Bp[i];
+    Bp[i]  = last;
+    last   = temp;
+  }
+}
+
+
+} // namespace rcm
+
+
+#endif
diff --git a/rcm/device/kernels.cuh b/rcm/device/kernels.cuh
new file mode 100644
index 0000000..8866945
--- /dev/null
+++ b/rcm/device/kernels.cuh
@@ -0,0 +1,57 @@
+#ifndef KERNELS_CUH
+#define KERNELS_CUH
+
+namespace rcm {
+
+namespace device {
+
+__global__ void generalToSymmetric(int          nnz,
+								   const int *  row_indices,
+								   const int *  column_indices,
+								   int *        new_row_indices,
+								   int *        new_column_indices)
+{
+	int tid = threadIdx.x + (blockIdx.x + blockIdx.y * gridDim.x) * blockDim.x;
+
+	if (tid >= nnz) return;
+
+	new_row_indices[tid << 1] = row_indices[tid];
+	new_column_indices[tid << 1] = column_indices[tid];
+	new_row_indices[(tid << 1) + 1] = column_indices[tid];
+	new_column_indices[(tid << 1) + 1] = row_indices[tid];
+}
+
+__global__ void achieveLevels(int        N,
+							  const int* row_offsets,
+							  const int* column_indices,
+							  bool*      frontier,
+							  int*       visited,
+							  int*       updated_by,
+							  int*       levels)
+{
+	int bid = blockIdx.x + blockIdx.y * gridDim.x;
+
+	if (bid >= N) return;
+
+	if (!frontier[bid]) return;
+
+	if (threadIdx.x == 0)
+		frontier[bid] = false;
+
+	int start_idx = row_offsets[bid], end_idx = row_offsets[bid + 1];
+	int cur_cost  = levels[bid];
+
+	for (int tid = start_idx + threadIdx.x; tid < end_idx; tid += blockDim.x) {
+		int column = column_indices[tid];
+		if (visited[column]) continue;
+		visited[column] = true;
+		frontier[column] = true;
+		updated_by[column] = bid + 1;
+		levels[column]  = cur_cost + 1;
+	}
+}
+
+} // namespace device
+} // namespace rcm
+
+#endif
diff --git a/rcm/exception.h b/rcm/exception.h
new file mode 100644
index 0000000..3454a00
--- /dev/null
+++ b/rcm/exception.h
@@ -0,0 +1,41 @@
+#ifndef EXCEPTION_H
+#define EXCEPTION_H
+
+#include <stdexcept>
+#include <string>
+
+namespace rcm {
+
+class system_error : public std::runtime_error
+{
+public:
+	enum Reason
+	{
+		Negative_MC64_weight = -1,
+		Matrix_singular      = -2
+	};
+
+	system_error(Reason             reason,
+	             const std::string& what_arg)
+	: std::runtime_error(what_arg),
+	  m_reason(reason)
+	{}
+
+	system_error(Reason      reason,
+	             const char* what_arg)
+	: std::runtime_error(what_arg),
+	  m_reason(reason)
+	{}
+	
+	virtual ~system_error() throw() {}
+
+	Reason  reason() const {return m_reason;}
+
+private:
+	Reason        m_reason;
+};
+
+} // namespace rcm
+
+
+#endif
diff --git a/rcm/rcm_um.h b/rcm/rcm_um.h
new file mode 100644
index 0000000..14d2f5f
--- /dev/null
+++ b/rcm/rcm_um.h
@@ -0,0 +1,266 @@
+#ifndef RCM_UM_H
+#define RCM_UM_H
+
+#include <rcm/common.h>
+#include <rcm/exception.h>
+#include <rcm/device/kernels.cuh>
+
+#include <thrust/scan.h>
+#include <thrust/functional.h>
+#include <thrust/sequence.h>
+#include <thrust/iterator/zip_iterator.h>
+#include <thrust/gather.h>
+#include <thrust/binary_search.h>
+#include <thrust/system/cuda/execution_policy.h>
+#include <thrust/logical.h>
+#include <thrust/host_vector.h>
+#include <thrust/device_vector.h>
+#include <thrust/adjacent_difference.h>
+#include <thrust/inner_product.h>
+
+extern "C" {
+#include "mm_io/mm_io.h"
+}
+
+namespace rcm {
+
+class RCM_UM
+{
+private:
+  typedef ManagedVector<int>                      IntVector;
+  typedef ManagedVector<double>                   DoubleVector;
+  typedef ManagedVector<bool>                     BoolVector;
+
+  typedef typename thrust::tuple<int, int, int, int>       IntTuple;
+  typedef typename thrust::zip_iterator<IntTuple>          NodeIterator;
+
+  IntVector      m_row_offsets;
+  IntVector      m_column_indices;
+  DoubleVector   m_values;
+
+  IntVector      m_perm;
+  int            m_half_bandwidth;
+  int            m_half_bandwidth_original;
+
+  size_t         m_n;
+  size_t         m_nnz;
+
+
+  template <typename IVector>
+  static void offsets_to_indices(const IVector& offsets, IVector& indices)
+  {
+    // convert compressed row offsets into uncompressed row indices
+    thrust::fill(indices.begin(), indices.end(), 0);
+    thrust::scatter( thrust::counting_iterator<int>(0),
+        thrust::counting_iterator<int>(offsets.size()-1),
+        offsets.begin(),
+        indices.begin());
+    thrust::inclusive_scan(indices.begin(), indices.end(), indices.begin(), thrust::maximum<int>());
+  }
+
+  template <typename IVector>
+  static void indices_to_offsets(const IVector& indices, IVector& offsets)
+  {
+    // convert uncompressed row indices into compressed row offsets
+    thrust::lower_bound(indices.begin(),
+        indices.end(),
+        thrust::counting_iterator<int>(0),
+        thrust::counting_iterator<int>(offsets.size()),
+        offsets.begin());
+  }
+
+public:
+  RCM_UM(const IntVector&    row_offsets,
+         const IntVector&    column_indices,
+         const DoubleVector& values)
+  : m_row_offsets(row_offsets),
+    m_column_indices(column_indices),
+    m_values(values)
+  {
+    size_t n = row_offsets.size() - 1;
+    m_perm.resize(n);
+    m_n   = n;
+    m_nnz = m_values.size();
+  }
+
+  ~RCM_UM() {}
+
+  void execute();
+
+  int getHalfBandwidth() const         {return m_half_bandwidth;}
+  int getHalfBandwidthOriginal() const {return m_half_bandwidth_original;}
+
+  struct Difference: public thrust::binary_function<int, int, int>
+  {
+	  inline
+	  __host__ __device__
+	  int operator() (const int &a, const int &b) const {
+		  return abs(a-b);
+	  }
+  };
+
+  struct ExtendedDifference: public thrust::binary_function<int, int, int>
+  {
+	  int *m_perm;
+
+	  ExtendedDifference(int *perm): m_perm(perm) {}
+	  inline
+	  __host__ __device__
+	  int operator() (const int &a, const int &b) const {
+		  return abs(m_perm[a]-m_perm[b]);
+	  }
+  };
+
+  struct TupleCompare
+  {
+	  inline
+	  __host__ __device__
+	  bool operator() (IntTuple a, IntTuple b) const
+	  {
+		  int a_level = thrust::get<0>(a), b_level = thrust::get<0>(b);
+		  if (a_level != b_level) return a_level < b_level;
+		  int a_updated_by = thrust::get<1>(a), b_updated_by = thrust::get<1>(b);
+		  if (a_updated_by != b_updated_by) return a_updated_by < b_updated_by;
+		  return thrust::get<2>(a) < thrust::get<2>(b);
+	  }
+  };
+};
+
+void
+RCM_UM::execute()
+{
+	BoolVector       frontier(m_n);
+	IntVector        visited(m_n);
+	BoolVector       tried(m_n, false);
+
+	IntVector        updated_by(m_n);
+	IntVector        levels(m_n);
+
+	const int ITER_COUNT = 5;
+
+	IntVector        row_indices(m_nnz);
+
+	IntVector        tmp_row_offsets(m_n + 1);
+	IntVector        tmp_row_indices(m_nnz << 1);
+	IntVector        tmp_column_indices(m_nnz << 1);
+	IntVector        degrees(m_n + 1);
+	IntVector        tmp_degrees(m_n);
+
+	offsets_to_indices(m_row_offsets, row_indices);
+
+	{
+		int numThreads = m_nnz, numBlockX = 1, numBlockY = 1;
+
+		kernelConfigAdjust(numThreads, numBlockX, numBlockY, BLOCK_SIZE, MAX_GRID_DIMENSION);
+		dim3 grids(numBlockX, numBlockY);
+
+		const int*       p_row_indices = thrust::raw_pointer_cast(&row_indices[0]);
+		const int*       p_column_indices = thrust::raw_pointer_cast(&m_column_indices[0]);
+		int *            p_new_row_indices = thrust::raw_pointer_cast(&tmp_row_indices[0]);
+		int *            p_new_column_indices = thrust::raw_pointer_cast(&tmp_column_indices[0]);
+		device::generalToSymmetric<<<grids, numThreads>>>(m_nnz, p_row_indices, p_column_indices, p_new_row_indices, p_new_column_indices);
+
+		thrust::sort_by_key(tmp_row_indices.begin(), tmp_row_indices.end(), tmp_column_indices.begin());
+		indices_to_offsets(tmp_row_indices, tmp_row_offsets);
+		thrust::adjacent_difference(tmp_row_offsets.begin(), tmp_row_offsets.end(), degrees.begin());
+
+		m_half_bandwidth = m_half_bandwidth_original = thrust::inner_product(row_indices.begin(), row_indices.end(), m_column_indices.begin(), 0, thrust::maximum<int>(), Difference());
+
+		thrust::sequence(m_perm.begin(), m_perm.end());
+		cudaDeviceSynchronize();
+	}
+
+	IntVector tmp_reordering(m_n);
+	IntVector tmp_perm(m_n);
+
+	for (int i = 0; i < ITER_COUNT; i++) {
+		int S = 0;
+		
+		if (i > 0) {
+			while(tried[S])
+				S = rand() % m_n;
+		}
+
+		tried[S] = true;
+
+
+		thrust::fill(frontier.begin(), frontier.end(), false);
+		thrust::fill(visited.begin(),  visited.end(),  0);
+		thrust::sequence(tmp_reordering.begin(), tmp_reordering.end());
+		cudaDeviceSynchronize();
+
+		frontier[S] = true;
+		visited[S]  = 1;
+		levels[S] = 0;
+		updated_by[S] = -1;
+
+		int last = 0;
+
+
+		for (int l = 0; l < m_n; l ++)
+		{
+			int sum = thrust::reduce(visited.begin(), visited.end());
+
+			if (sum >= m_n) break;
+
+			sum = thrust::reduce(frontier.begin(), frontier.end());
+
+			if (sum == 0) {
+				for (int j = last; j < m_n; j++)
+					if (!visited[j]) {
+						visited[j] = 1;
+						frontier[j] = true;
+						levels[j] = l;
+						updated_by[j] = -1;
+						last = j;
+						tried[j] = true;
+						break;
+					}
+			}
+
+			int numBlockX = m_n, numBlockY = 1;
+			kernelConfigAdjust(numBlockX, numBlockY, MAX_GRID_DIMENSION);
+			dim3 grids(numBlockX, numBlockY);
+
+			const int *p_row_offsets    = thrust::raw_pointer_cast(&tmp_row_offsets[0]);
+			const int *p_column_indices = thrust::raw_pointer_cast(&tmp_column_indices[0]);
+			bool *     p_frontier       = thrust::raw_pointer_cast(&frontier[0]);
+			int  *     p_visited        = thrust::raw_pointer_cast(&visited[0]);
+			int  *     p_updated_by     = thrust::raw_pointer_cast(&updated_by[0]);
+			int  *     p_levels         = thrust::raw_pointer_cast(&levels[0]);
+
+			device::achieveLevels<<<grids, 64>>>(m_n, p_row_offsets, p_column_indices, p_frontier, p_visited, p_updated_by, p_levels);
+			cudaDeviceSynchronize();
+		}
+
+		thrust::copy(degrees.begin() + 1, degrees.end(), tmp_degrees.begin());
+
+
+		thrust::sort(thrust::make_zip_iterator(thrust::make_tuple(levels.begin(), updated_by.begin(), tmp_degrees.begin(), tmp_reordering.begin() )),
+				     thrust::make_zip_iterator(thrust::make_tuple(levels.end(), updated_by.end(), tmp_degrees.end(), tmp_reordering.end())),
+					 TupleCompare()
+				);
+
+
+		thrust::scatter(thrust::make_counting_iterator(0),
+				       thrust::make_counting_iterator((int)(m_n)),
+					   tmp_reordering.begin(),
+					   tmp_perm.begin());
+
+		int *p_perm = thrust::raw_pointer_cast(&tmp_perm[0]);
+		int tmp_half_bandwidth = thrust::inner_product(row_indices.begin(), row_indices.end(), m_column_indices.begin(), 0, thrust::maximum<int>(), ExtendedDifference(p_perm));
+
+		if (tmp_half_bandwidth < m_half_bandwidth) {
+			m_half_bandwidth = tmp_half_bandwidth;
+			m_perm           = tmp_perm;
+		}
+
+		cudaDeviceSynchronize();
+
+	}
+
+}
+
+} // end namespace rcm
+
+#endif
diff --git a/rcm/timer.h b/rcm/timer.h
new file mode 100644
index 0000000..153fc46
--- /dev/null
+++ b/rcm/timer.h
@@ -0,0 +1,126 @@
+/** \file timer.h
+ *  \brief CPU and GPU timer classes.
+ */
+
+#ifndef TIMER_H
+#define TIMER_H
+
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+#ifdef WIN32
+#include <Windows.h>
+#else
+#include <sys/time.h>
+#endif
+
+
+namespace rcm {
+
+
+/// Base timer class.
+class Timer {
+public:
+	virtual ~Timer() {}
+	virtual void Start()=0;
+	virtual void Stop()=0;
+	virtual double getElapsed()=0;
+};
+
+
+/// GPU timer.
+/**
+ * CUDA-based GPU timer.
+ */
+class GPUTimer : public Timer {
+protected:
+	int gpu_idx;
+	cudaEvent_t timeStart;
+	cudaEvent_t timeEnd;
+public:
+	GPUTimer(int g_idx = 0) {
+		gpu_idx = g_idx;
+
+		cudaEventCreate(&timeStart);
+		cudaEventCreate(&timeEnd);
+	}
+
+	virtual ~GPUTimer() {
+		cudaEventDestroy(timeStart);
+		cudaEventDestroy(timeEnd);
+	}
+
+	virtual void Start() {
+		cudaEventRecord(timeStart, 0);
+	}
+
+	virtual void Stop() {
+		cudaEventRecord(timeEnd, 0);
+		cudaEventSynchronize(timeEnd);
+	}
+
+	virtual double getElapsed() {
+		float elapsed;
+		cudaEventElapsedTime(&elapsed, timeStart, timeEnd);
+		return elapsed;
+	}
+};
+
+
+/// CPU timer.
+/**
+ * CPU timer using the performance counter for WIN32 and
+ * gettimeofday() for Linux.
+ */
+#ifdef WIN32
+
+class CPUTimer : public Timer
+{
+public:
+	CPUTimer()
+	{
+		QueryPerformanceFrequency(&m_frequency);
+	}
+	~CPUTimer()  {}
+
+	virtual void Start() {QueryPerformanceCounter(&m_start);}
+	virtual void Stop()  {QueryPerformanceCounter(&m_stop);}
+
+	virtual double getElapsed() {
+		return (m_stop.QuadPart - m_start.QuadPart) * 1000.0 / m_frequency.QuadPart;
+	}
+
+private:
+	LARGE_INTEGER m_frequency;
+	LARGE_INTEGER m_start;
+	LARGE_INTEGER m_stop;
+};
+
+#else // WIN32
+
+class CPUTimer : public Timer {
+protected:
+	timeval timeStart;
+	timeval timeEnd;
+public:
+	virtual ~CPUTimer() {}
+
+	virtual void Start() {
+		gettimeofday(&timeStart, 0);
+	}
+
+	virtual void Stop() {
+		gettimeofday(&timeEnd, 0);
+	}
+
+	virtual double getElapsed() {
+		return 1000.0 * (timeEnd.tv_sec - timeStart.tv_sec) + (timeEnd.tv_usec - timeStart.tv_usec) / 1000.0;
+	}
+};
+
+#endif // WIN32
+
+} // namespace mc64
+
+
+#endif
diff --git a/testing.cu b/testing.cu
new file mode 100644
index 0000000..9d313fb
--- /dev/null
+++ b/testing.cu
@@ -0,0 +1,126 @@
+#include <algorithm>
+#include <fstream>
+#include <cmath>
+#include <string>
+
+extern "C" {
+#include "mm_io/mm_io.h"
+}
+
+#include <rcm/rcm_um.h>
+#include <rcm/common.h>
+#include <rcm/timer.h>
+
+typedef typename rcm::ManagedVector<int>     IntVector;
+typedef typename rcm::ManagedVector<double>  DoubleVector;
+
+enum TestColor {
+	COLOR_NO = 0,
+	COLOR_RED,
+	COLOR_GREEN
+};
+
+// -----------------------------------------------------------------------------
+
+class OutputItem
+{
+	public:
+		OutputItem(std::ostream &o) : m_o(o), m_additional_item_count(19) {}
+
+		int  m_additional_item_count;
+
+		template <typename T>
+		void operator() (T item, TestColor c = COLOR_NO)
+		{
+			m_o << "<td style=\"border-style: inset;\">\n";
+			switch (c)
+			{
+				case COLOR_RED:
+					m_o << "<p> <FONT COLOR=\"Red\">" << item << " </FONT> </p>\n";
+					break;
+				case COLOR_GREEN:
+					m_o << "<p> <FONT COLOR=\"Green\">" << item << " </FONT> </p>\n";
+					break;
+				default:
+					m_o << "<p> " << item << " </p>\n";
+					break;
+			}
+			m_o << "</td>\n";
+		}
+
+	private:
+		std::ostream &m_o;
+};
+
+int main(int argc, char **argv)
+{
+  if (argc < 2) {
+    std::cout << "Usage:\n  driver input_file" << std::endl;
+    return 1;
+  }
+
+  // Read matrix from file (COO format)
+  MM_typecode matcode;
+  int M, N, nnz;
+  int* row_i;
+  int* col_j;
+  double* vals;
+
+  int err = mm_read_mtx_crd(argv[1], &M, &N, &nnz, &row_i, &col_j, &vals, &matcode);
+  if (err != 0) {
+    std::cout << "error: " << err << std::endl;
+    return 1;
+  }
+
+  // Switch to 0-based indices
+  for (int i = 0; i < nnz; i++) {
+    row_i[i]--;
+    col_j[i]--;
+  }
+  // Convert to CSR format and load into thrust vectors.
+  IntVector    row_offsets(N + 1);
+  IntVector    column_indices(nnz);
+  DoubleVector values(nnz);
+
+  rcm::coo2csr(M, N, nnz, row_i, col_j, vals, row_offsets, column_indices, values);
+
+  // Run the RCM algorithm
+  rcm::RCM_UM algo(row_offsets, column_indices, values);
+  OutputItem outputItem(std::cout);
+
+  std::cout << "<tr valing=top>" << std::endl;
+  {
+	  std::string fileMat(argv[1]);
+	  int i;
+	  for (i = fileMat.size()-1; i>=0 && fileMat[i] != '/' && fileMat[i] != '\\'; i--);
+	  i++;
+	  fileMat = fileMat.substr(i);
+
+	  size_t j = fileMat.rfind(".mtx");
+	  if (j != std::string::npos)
+		  outputItem( fileMat.substr(0, j));
+	  else
+		  outputItem( fileMat);
+  }
+  outputItem(N);
+  outputItem(nnz);
+
+  rcm::CPUTimer cpu_timer;
+  try {
+	  cpu_timer.Start();
+    algo.execute();
+	  cpu_timer.Stop();
+  } catch (const rcm::system_error& se) {
+	  outputItem("");
+	  outputItem("");
+	  outputItem("");
+	  std::cout << "</tr>" << std::endl;
+	  return 1;
+  }
+  outputItem(algo.getHalfBandwidthOriginal());
+  outputItem(algo.getHalfBandwidth());
+  outputItem(cpu_timer.getElapsed());
+  std::cout << "</tr>" << std::endl;
+
+  return 0;
+}