diff --git a/doc/specs/stdlib_sparse.md b/doc/specs/stdlib_sparse.md
new file mode 100644
index 000000000..fc9d0a0c0
--- /dev/null
+++ b/doc/specs/stdlib_sparse.md
@@ -0,0 +1,364 @@
+---
+title: sparse
+---
+
+# The `stdlib_sparse` module
+
+[TOC]
+
+## Introduction
+
+The `stdlib_sparse` module provides derived types for standard sparse matrix data structures. It also provides math kernels such as sparse matrix-vector product and conversion between matrix types.
+
+## Sparse matrix derived types
+
+<!-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -->
+### The `sparse_type` abstract derived type
+#### Status
+
+Experimental
+
+#### Description
+The parent `sparse_type` is as an abstract derived type holding the basic common meta data needed to define a sparse matrix, as well as shared APIs. All sparse matrix flavors are extended from the `sparse_type`.
+
+```Fortran
+type, public, abstract :: sparse_type
+    integer :: nrows   !! number of rows
+    integer :: ncols   !! number of columns
+    integer :: nnz     !! number of non-zero values
+    integer :: storage !! assumed storage symmetry
+end type
+```
+
+The storage integer label should be assigned from the module's internal enumerator containing the following three enums:
+
+```Fortran
+enum, bind(C)
+    enumerator :: sparse_full  !! Full Sparse matrix (no symmetry considerations)
+    enumerator :: sparse_lower !! Symmetric Sparse matrix with triangular inferior storage
+    enumerator :: sparse_upper !! Symmetric Sparse matrix with triangular supperior storage
+end enum
+```
+In the following, all sparse kinds will be presented in two main flavors: a data-less type `<matrix>_type` useful for topological graph operations. And real/complex valued types `<matrix>_<kind>_type` containing the `data` buffer for the matrix values. The following rectangular matrix will be used to showcase how each sparse matrix holds the data internally:
+
+$$ M = \begin{bmatrix} 
+    9 & 0 & 0  & 0 & -3 \\
+    4 & 7 & 0  & 0 & 0 \\
+    0 & 8 & -1 & 8 & 0 \\
+    4 & 0 & 5  & 6 & 0 \\
+  \end{bmatrix} $$
+<!-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -->
+### `COO`: The COOrdinates compressed sparse format
+#### Status
+
+Experimental
+
+#### Description
+The `COO`, triplet or `ijv` format defines all non-zero elements of the matrix by explicitly allocating the `i,j` index and the value of the matrix. While some implementations use separate `row` and `col` arrays for the index, here we use a 2D array in order to promote fast memory acces to `ij`.
+
+```Fortran
+type(COO_sp_type) :: COO
+call COO%malloc(4,5,10)
+COO%data(:)   = real([9,-3,4,7,8,-1,8,4,5,6])
+COO%index(1:2,1)  = [1,1]
+COO%index(1:2,2)  = [1,5]
+COO%index(1:2,3)  = [2,1]
+COO%index(1:2,4)  = [2,2]
+COO%index(1:2,5)  = [3,2]
+COO%index(1:2,6)  = [3,3]
+COO%index(1:2,7)  = [3,4]
+COO%index(1:2,8)  = [4,1]
+COO%index(1:2,9)  = [4,3]
+COO%index(1:2,10) = [4,4]
+```
+<!-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -->
+### `CSR`: The Compressed Sparse Row or Yale format
+#### Status
+
+Experimental
+
+#### Description
+The Compressed Sparse Row or Yale format `CSR` stores the matrix structure by compressing the row indices with a counter pointer `rowptr` enabling to know the first and last non-zero column index `col` of the given row. 
+
+```Fortran
+type(CSR_sp_type) :: CSR
+call CSR%malloc(4,5,10)
+CSR%data(:)   = real([9,-3,4,7,8,-1,8,4,5,6])
+CSR%col(:)    = [1,5,1,2,2,3,4,1,3,4]
+CSR%rowptr(:) = [1,3,5,8,11]
+```
+<!-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -->
+### `CSC`: The Compressed Sparse Column format
+#### Status
+
+Experimental
+
+#### Description
+The Compressed Sparse Colum `CSC` is similar to the `CSR` format but values are accesed first by column, thus an index counter is given by `colptr` which enables to know the first and last non-zero row index of a given colum. 
+
+```Fortran
+type(CSC_sp_type) :: CSC
+call CSC%malloc(4,5,10)
+CSC%data(:)   = real([9,4,4,7,8,-1,5,8,6,-3])
+CSC%row(:)    = [1,2,4,2,3,3,4,3,4,1]
+CSC%colptr(:) = [1,4,6,8,10,11]
+```
+<!-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -->
+### `ELLPACK`: ELL-pack storage format
+#### Status
+
+Experimental
+
+#### Description
+The `ELL` format stores data in a dense matrix of $nrows \times K$ in column major order. By imposing a constant number of elements per row $K$, this format will incur in additional zeros being stored, but it enables efficient vectorization as memory acces is carried out by constant sized strides. 
+
+```Fortran
+type(ELL_sp_type) :: ELL
+call ELL%malloc(num_rows=4,num_cols=5,num_nz_row=3)
+ELL%data(1,1:3)   = real([9,-3,0])
+ELL%data(2,1:3)   = real([4,7,0])
+ELL%data(3,1:3)   = real([8,-1,8])
+ELL%data(4,1:3)   = real([4,5,6])
+
+ELL%index(1,1:3) = [1,5,0]
+ELL%index(2,1:3) = [1,2,0]
+ELL%index(3,1:3) = [2,3,4]
+ELL%index(4,1:3) = [1,3,4]
+```
+<!-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -->
+### `SELL-C`: The Sliced ELLPACK with Constant blocks format
+#### Status
+
+Experimental
+
+#### Description
+The Sliced ELLPACK format `SELLC` is a variation of the `ELLPACK` format. This modification reduces the storage size compared to the `ELLPACK` format but maintaining its efficient data access scheme. It can be seen as an intermediate format between `CSR` and `ELLPACK`. For more details read [the reference](https://arxiv.org/pdf/1307.6209v1)
+
+<!-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -->
+## `add`- sparse matrix data accessors
+
+### Status
+
+Experimental
+
+### Description
+Type-bound procedures to enable adding data in a sparse matrix.
+
+### Syntax
+
+`call matrix%add(i,j,v)` or
+`call matrix%add(i(:),j(:),v(:,:))`
+
+### Arguments
+
+`i`: Shall be an integer value or rank-1 array. It is an `intent(in)` argument.
+
+`j`: Shall be an integer value or rank-1 array. It is an `intent(in)` argument.
+
+`v`: Shall be a `real` or `complex` value or rank-2 array. The type shall be in accordance to the declared sparse matrix object. It is an `intent(in)` argument.
+
+## `at`- sparse matrix data accessors
+
+### Status
+
+Experimental
+
+### Description
+Type-bound procedures to enable requesting data from a sparse matrix.
+
+### Syntax
+
+`v = matrix%at(i,j)`
+
+### Arguments
+
+`i` : Shall be an integer value. It is an `intent(in)` argument.
+
+`j` : Shall be an integer value. It is an `intent(in)` argument.
+
+`v` : Shall be a `real` or `complex` value in accordance to the declared sparse matrix object. If the `ij` tuple is within the sparse pattern, `v` contains the value in the data buffer. If the `ij` tuple is outside the sparse pattern, `v` is equal `0`. If the `ij` tuple is outside the matrix pattern `(nrows,ncols)`, `v` is `NaN`.
+
+## Example
+```fortran
+{!example/linalg/example_sparse_data_accessors.f90!}
+```
+
+<!-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -->
+## `spmv` - Sparse Matrix-Vector product
+
+### Status
+
+Experimental
+
+### Description
+
+Provide sparse matrix-vector product kernels for the current supported sparse matrix types.
+
+$$y=\alpha*op(M)*x+\beta*y$$
+
+### Syntax
+
+`call ` [[stdlib_sparse_spmv(module):spmv(interface)]] `(matrix,vec_x,vec_y [,alpha,beta,op])`
+
+### Arguments
+
+`matrix`: Shall be a `real` or `complex` sparse type matrix. It is an `intent(in)` argument.
+
+`vec_x`: Shall be a rank-1 or rank-2 array of `real` or `complex` type array. It is an `intent(in)` argument.
+
+`vec_y`: Shall be a rank-1 or rank-2 array of `real` or `complex` type array. . It is an `intent(inout)` argument.
+
+`alpha`, `optional` : Shall be a scalar value of the same type as `vec_x`. Default value `alpha=1`. It is an `intent(in)` argument.
+
+`beta`, `optional` : Shall be a scalar value of the same type as `vec_x`. Default value `beta=0`. It is an `intent(in)` argument.
+
+`op`, `optional`: In-place operator identifier. Shall be a `character(1)` argument. It can have any of the following values: `N`: no transpose, `T`: transpose, `H`: hermitian or complex transpose. These values are provided as constants by the `stdlib_sparse` module: `sparse_op_none`, `sparse_op_transpose`, `sparse_op_hermitian`
+
+<!-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -->
+## Sparse matrix to matrix conversions
+
+### Status
+
+Experimental
+
+### Description
+
+This module provides facility functions for converting between storage formats.
+
+### Syntax
+
+`call ` [[stdlib_sparse_conversion(module):coo2ordered(interface)]] `(coo[,sort_data])`
+
+### Arguments
+
+`COO` : Shall be any `COO` type. The same object will be returned with the arrays reallocated to the correct size after removing duplicates. It is an `intent(inout)` argument.
+
+`sort_data`, `optional` : Shall be a `logical` argument to determine whether data in the COO graph should be sorted while sorting the index array, default `.false.`. It is an `intent(in)` argument.
+
+### Syntax
+
+`call ` [[stdlib_sparse_conversion(module):from_ijv(interface)]] `(sparse,row,col[,data,nrows,ncols,num_nz_rows,chunk])`
+
+### Arguments
+
+`sparse` : Shall be a `COO`, `CSR`, `ELL` or `SELLC` type. The graph object will be returned with a canonical shape after sorting and removing duplicates from the `(row,col,data)` triplet. If the graph is `COO_type` no data buffer is allowed. It is an `intent(inout)` argument.
+
+`row` : rows index array. It is an `intent(in)` argument.
+
+`col` : columns index array. It is an `intent(in)` argument.
+
+`data`, `optional`: `real` or `complex` data array. It is an `intent(in)` argument.
+
+`nrows`, `optional`: number of rows, if not given it will be computed from the `row` array. It is an `intent(in)` argument.
+
+`ncols`, `optional`: number of columns, if not given it will be computed from the `col` array. It is an `intent(in)` argument.
+
+`num_nz_rows`, `optional`: number of non zeros per row, only valid in the case of an `ELL` matrix, by default it will computed from the largest row. It is an `intent(in)` argument.
+
+`chunk`, `optional`: chunk size, only valid in the case of a `SELLC` matrix, by default it will be taken from the `SELLC` default attribute chunk size. It is an `intent(in)` argument.
+
+## Example
+```fortran
+{!example/linalg/example_sparse_from_ijv.f90!}
+```
+### Syntax
+
+`call ` [[stdlib_sparse_conversion(module):diag(interface)]] `(matrix,diagonal)`
+
+### Arguments
+
+`matrix` : Shall be a `dense`, `COO`, `CSR` or `ELL` type. It is an `intent(in)` argument.
+
+`diagonal` : A rank-1 array of the same type as the `matrix`. It is an `intent(inout)` and `allocatable` argument.
+
+#### Note
+If the `diagonal` array has not been previously allocated, the `diag` subroutine will allocate it using the `nrows` of the `matrix`.
+
+### Syntax
+
+`call ` [[stdlib_sparse_conversion(module):dense2coo(interface)]] `(dense,coo)`
+
+### Arguments
+
+`dense` : Shall be a rank-2 array of `real` or `complex` type. It is an `intent(in)` argument.
+
+`coo` : Shall be a `COO` type of `real` or `complex` type. It is an `intent(out)` argument.
+
+### Syntax
+
+`call ` [[stdlib_sparse_conversion(module):coo2dense(interface)]] `(coo,dense)`
+
+### Arguments
+
+`coo` : Shall be a `COO` type of `real` or `complex` type. It is an `intent(in)` argument.
+
+`dense` : Shall be a rank-2 array of `real` or `complex` type. It is an `intent(out)` argument.
+
+### Syntax
+
+`call ` [[stdlib_sparse_conversion(module):coo2csr(interface)]] `(coo,csr)`
+
+### Arguments
+
+`coo` : Shall be a `COO` type of `real` or `complex` type. It is an `intent(in)` argument.
+
+`csr` : Shall be a `CSR` type of `real` or `complex` type. It is an `intent(out)` argument.
+
+### Syntax
+
+`call ` [[stdlib_sparse_conversion(module):coo2csc(interface)]] `(coo,csc)`
+
+### Arguments
+
+`coo` : Shall be a `COO` type of `real` or `complex` type. It is an `intent(in)` argument.
+
+`csc` : Shall be a `CSC` type of `real` or `complex` type. It is an `intent(out)` argument.
+
+### Syntax
+
+`call ` [[stdlib_sparse_conversion(module):csr2coo(interface)]] `(csr,coo)`
+
+### Arguments
+
+`csr` : Shall be a `CSR` type of `real` or `complex` type. It is an `intent(in)` argument.
+
+`coo` : Shall be a `COO` type of `real` or `complex` type. It is an `intent(out)` argument.
+
+### Syntax
+
+`call ` [[stdlib_sparse_conversion(module):csr2sellc(interface)]] `(csr,sellc[,chunk])`
+
+### Arguments
+
+`csr` : Shall be a `CSR` type of `real` or `complex` type. It is an `intent(in)` argument.
+
+`sellc` : Shall be a `SELLC` type of `real` or `complex` type. It is an `intent(out)` argument.
+
+`chunk`, `optional`: chunk size for the Sliced ELLPACK format. It is an `intent(in)` argument.
+
+### Syntax
+
+`call ` [[stdlib_sparse_conversion(module):csr2sellc(interface)]] `(csr,ell[,num_nz_rows])`
+
+### Arguments
+
+`csr` : Shall be a `CSR` type of `real` or `complex` type. It is an `intent(in)` argument.
+
+`ell` : Shall be a `ELL` type of `real` or `complex` type. It is an `intent(out)` argument.
+
+`num_nz_rows`, `optional`: number of non zeros per row. If not give, it will correspond to the size of the longest row in the `CSR` matrix. It is an `intent(in)` argument.
+
+### Syntax
+
+`call ` [[stdlib_sparse_conversion(module):csc2coo(interface)]] `(csc,coo)`
+
+### Arguments
+
+`csc` : Shall be a `CSC` type of `real` or `complex` type. It is an `intent(in)` argument.
+
+`coo` : Shall be a `COO` type of `real` or `complex` type. It is an `intent(out)` argument.
+
+## Example
+```fortran
+{!example/linalg/example_sparse_spmv.f90!}
+```
\ No newline at end of file
diff --git a/example/linalg/CMakeLists.txt b/example/linalg/CMakeLists.txt
index e12236375..fc3f823d9 100644
--- a/example/linalg/CMakeLists.txt
+++ b/example/linalg/CMakeLists.txt
@@ -33,6 +33,9 @@ ADD_EXAMPLE(get_norm)
 ADD_EXAMPLE(solve1)
 ADD_EXAMPLE(solve2)
 ADD_EXAMPLE(solve3)
+ADD_EXAMPLE(sparse_from_ijv)
+ADD_EXAMPLE(sparse_data_accessors)
+ADD_EXAMPLE(sparse_spmv)
 ADD_EXAMPLE(svd)
 ADD_EXAMPLE(svdvals)
 ADD_EXAMPLE(determinant)
diff --git a/example/linalg/example_sparse_data_accessors.f90 b/example/linalg/example_sparse_data_accessors.f90
new file mode 100644
index 000000000..e23164524
--- /dev/null
+++ b/example/linalg/example_sparse_data_accessors.f90
@@ -0,0 +1,49 @@
+program example_sparse_data_accessors
+    use stdlib_linalg_constants, only: dp
+    use stdlib_sparse
+    implicit none
+
+    real(dp) :: mat(2,2)
+    real(dp), allocatable :: dense(:,:)
+    type(CSR_dp_type) :: CSR
+    type(COO_dp_type) :: COO
+    integer :: i, j, locdof(2)
+
+    ! Initial data
+    mat(:,1) = [1._dp,2._dp]
+    mat(:,2) = [2._dp,1._dp]
+    allocate(dense(5,5) , source = 0._dp)
+    do i = 0, 3
+        dense(1+i:2+i,1+i:2+i) = dense(1+i:2+i,1+i:2+i) + mat
+    end do
+
+    print *, 'Original Matrix'
+    do j = 1 , 5
+        print '(5f8.1)',dense(j,:)
+    end do
+
+    ! Initialize CSR data and reset dense reference matrix
+    call dense2coo(dense,COO)
+    call coo2csr(COO,CSR)
+    CSR%data = 0._dp
+    dense = 0._dp
+
+    ! Iteratively add blocks of data
+    do i = 0, 3
+        locdof(1:2) = [1+i,2+i] 
+        call CSR%add(locdof,locdof,mat)
+        ! lets print a dense view of every step
+        call csr2dense(CSR,dense)
+        print '(A,I2)', 'Add block :', i+1
+        do j = 1 , 5
+            print '(5f8.1)',dense(j,:)
+        end do
+    end do
+
+    ! Request values from the matrix
+    print *, ''
+    print *, 'within sparse pattern  :',CSR%at(2,1)
+    print *, 'outside sparse pattern :',CSR%at(5,2)
+    print *, 'outside matrix pattern :',CSR%at(7,7)
+  
+end program example_sparse_data_accessors
\ No newline at end of file
diff --git a/example/linalg/example_sparse_from_ijv.f90 b/example/linalg/example_sparse_from_ijv.f90
new file mode 100644
index 000000000..628955361
--- /dev/null
+++ b/example/linalg/example_sparse_from_ijv.f90
@@ -0,0 +1,41 @@
+program example_sparse_from_ijv
+    use stdlib_linalg_constants, only: dp
+    use stdlib_sparse
+    implicit none
+
+    integer :: row(10), col(10)
+    real(dp) :: data(10)
+    type(COO_dp_type) :: COO
+    type(CSR_dp_type) :: CSR
+    type(ELL_dp_type) :: ELL
+    integer :: i, j
+
+    ! Initial data
+    row = [1,1,2,2,3,3,3,4,4,4]
+    col = [1,5,1,2,2,3,4,1,3,4]
+    data = real([9,-3,4,7,8,-1,8,4,5,6] , kind = dp )
+
+    ! Create a COO matrix from triplet
+    call from_ijv(COO,row,col,data)
+    print *, 'COO'
+    print *, '  i,  j,    v'
+    do i = 1, COO%nnz
+        print '(2I4,f8.1)', COO%index(:,i), COO%data(i)
+    end do
+
+    ! Create a CSR matrix from triplet
+    call from_ijv(CSR,row,col,data)
+    print *, 'CSR'
+    print '(A,5I8)',    'rowptr :', CSR%rowptr
+    print '(A,10I8)',   'col    :', CSR%col 
+    print '(A,10f8.1)', 'data   :', CSR%data
+    
+    ! Create an ELL matrix from triplet
+    call from_ijv(ELL,row,col,data)
+    print *, 'ELL'
+    print *, ' index        |         data'
+    do i = 1, ELL%nrows
+        print '(3I4,x,3f8.1)', ELL%index(i,:) , ELL%data(i,:)
+    end do
+  
+end program example_sparse_from_ijv
\ No newline at end of file
diff --git a/example/linalg/example_sparse_spmv.f90 b/example/linalg/example_sparse_spmv.f90
new file mode 100644
index 000000000..27b27b8db
--- /dev/null
+++ b/example/linalg/example_sparse_spmv.f90
@@ -0,0 +1,34 @@
+program example_sparse_spmv
+    use stdlib_linalg_constants, only: dp
+    use stdlib_sparse
+    implicit none
+
+    integer, parameter :: m = 4, n = 2
+    real(dp) :: A(m,n), x(n)
+    real(dp) :: y_dense(m), y_coo(m), y_csr(m)
+    real(dp) :: alpha, beta
+    type(COO_dp_type) :: COO
+    type(CSR_dp_type) :: CSR
+
+    call random_number(A)
+    ! Convert from dense to COO and CSR matrices
+    call dense2coo( A , COO )
+    call coo2csr( COO , CSR )
+    
+    ! Initialize vectors
+    x       = 1._dp
+    y_dense = 2._dp
+    y_coo   = y_dense
+    y_csr   = y_dense
+
+    ! Perform matrix-vector product
+    alpha = 3._dp; beta = 2._dp
+    y_dense = alpha * matmul(A,x) + beta * y_dense
+    call spmv( COO , x , y_coo , alpha = alpha, beta = beta )
+    call spmv( CSR , x , y_csr , alpha = alpha, beta = beta )
+    
+    print *, 'dense :', y_dense
+    print *, 'coo   :', y_coo
+    print *, 'csr   :', y_csr
+  
+end program example_sparse_spmv
\ No newline at end of file
diff --git a/include/common.fypp b/include/common.fypp
index b1169fcdb..451eebc8a 100644
--- a/include/common.fypp
+++ b/include/common.fypp
@@ -38,6 +38,7 @@
 
 #! Real types to be considered during templating
 #:set REAL_TYPES = ["real({})".format(k) for k in REAL_KINDS]
+#:set REAL_SUFFIX = REAL_KINDS
 
 #! Collected (kind, type) tuples for real types
 #:set REAL_KINDS_TYPES = list(zip(REAL_KINDS, REAL_TYPES, REAL_INIT))
@@ -75,6 +76,7 @@ $:"s" if cmplx=="c" else "d" if cmplx=="z" else "x" if cmplx=="y" else "q" if cm
 
 #! Complex types to be considered during templating
 #:set CMPLX_TYPES = ["complex({})".format(k) for k in CMPLX_KINDS]
+#:set CMPLX_SUFFIX = ["c{}".format(k) for k in CMPLX_KINDS]
 
 #! Collected (kind, type, initial) tuples for complex types
 #:set CMPLX_KINDS_TYPES = list(zip(CMPLX_KINDS, CMPLX_TYPES, CMPLX_INIT))
@@ -115,6 +117,9 @@ $:"s" if cmplx=="c" else "d" if cmplx=="z" else "x" if cmplx=="y" else "q" if cm
 #! Bitset types to be considered during templating
 #:set BITSET_TYPES = ["type({})".format(k) for k in BITSET_KINDS]
 
+#! Sparse types to be considered during templating
+#:set SPARSE_KINDS = ["COO", "CSR", "CSC", "ELL"]
+
 #! Whether Fortran 90 compatible code should be generated
 #:set VERSION90 = defined('VERSION90')
 
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index b31e6e807..ff9f39417 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -42,6 +42,10 @@ set(fppFiles
     stdlib_sorting_ord_sort.fypp
     stdlib_sorting_sort.fypp
     stdlib_sorting_sort_index.fypp
+    stdlib_sparse_constants.fypp
+    stdlib_sparse_conversion.fypp
+    stdlib_sparse_kinds.fypp
+    stdlib_sparse_spmv.fypp
     stdlib_specialfunctions_gamma.fypp
     stdlib_stats.fypp
     stdlib_stats_corr.fypp
@@ -114,6 +118,7 @@ set(SRC
     stdlib_logger.f90
     stdlib_sorting_radix_sort.f90
     stdlib_system.F90
+    stdlib_sparse.f90
     stdlib_specialfunctions.f90
     stdlib_specialfunctions_legendre.f90
     stdlib_quadrature_gauss.f90
diff --git a/src/stdlib_sparse.f90 b/src/stdlib_sparse.f90
new file mode 100644
index 000000000..4c1cbcf26
--- /dev/null
+++ b/src/stdlib_sparse.f90
@@ -0,0 +1,6 @@
+!! public API
+module stdlib_sparse
+    use stdlib_sparse_kinds
+    use stdlib_sparse_conversion
+    use stdlib_sparse_spmv
+end module stdlib_sparse
\ No newline at end of file
diff --git a/src/stdlib_sparse_constants.fypp b/src/stdlib_sparse_constants.fypp
new file mode 100644
index 000000000..1e8374bd9
--- /dev/null
+++ b/src/stdlib_sparse_constants.fypp
@@ -0,0 +1,32 @@
+#:include "common.fypp"
+#:set R_KINDS_TYPES = list(zip(REAL_KINDS, REAL_TYPES, REAL_SUFFIX))
+#:set C_KINDS_TYPES = list(zip(CMPLX_KINDS, CMPLX_TYPES, CMPLX_SUFFIX))
+module stdlib_sparse_constants
+    use stdlib_kinds, only: int8, int16, int32, int64, sp, dp, xdp, qp
+
+    implicit none
+    public
+
+    enum, bind(C)
+        enumerator :: sparse_full  !! Full Sparse matrix (no symmetry considerations)
+        enumerator :: sparse_lower !! Symmetric Sparse matrix with triangular inferior storage
+        enumerator :: sparse_upper !! Symmetric Sparse matrix with triangular supperior storage
+    end enum
+
+    character(1), parameter :: sparse_op_none = 'N' !! no transpose
+    character(1), parameter :: sparse_op_transpose = 'T' !! transpose
+    character(1), parameter :: sparse_op_hermitian = 'H' !! conjugate or hermitian transpose
+    
+    ! Integer size support for ILP64 builds should be done here
+    integer, parameter :: ilp = int32
+
+    #:for k1, t1, s1 in (R_KINDS_TYPES)
+    ${t1}$, parameter :: zero_${s1}$ = 0._${k1}$
+    ${t1}$, parameter :: one_${s1}$ = 1._${k1}$
+    #:endfor
+    #:for k1, t1, s1 in (C_KINDS_TYPES)
+    ${t1}$, parameter :: zero_${s1}$ = (0._${k1}$,0._${k1}$)
+    ${t1}$, parameter :: one_${s1}$ = (1._${k1}$,1._${k1}$)
+    #:endfor
+
+end module stdlib_sparse_constants
diff --git a/src/stdlib_sparse_conversion.fypp b/src/stdlib_sparse_conversion.fypp
new file mode 100644
index 000000000..d2f4936c5
--- /dev/null
+++ b/src/stdlib_sparse_conversion.fypp
@@ -0,0 +1,938 @@
+#:include "common.fypp"
+#:set R_KINDS_TYPES = list(zip(REAL_KINDS, REAL_TYPES, REAL_SUFFIX))
+#:set C_KINDS_TYPES = list(zip(CMPLX_KINDS, CMPLX_TYPES, CMPLX_SUFFIX))
+#:set KINDS_TYPES = R_KINDS_TYPES+C_KINDS_TYPES
+!! The `stdlib_sparse_conversion` submodule provides sparse to sparse matrix conversion utilities.
+!!
+! This code was modified from https://github.com/jalvesz/FSPARSE by its author: Alves Jose
+module stdlib_sparse_conversion
+    use stdlib_sorting, only: sort
+    use stdlib_sparse_constants
+    use stdlib_sparse_kinds
+    implicit none
+    private
+    !! Sort arrays of a COO matrix
+    !! 
+    interface sort_coo
+        module procedure :: sort_coo_unique
+    #:for k1, t1, s1 in (KINDS_TYPES)
+        module procedure :: sort_coo_unique_${s1}$
+    #:endfor
+    end interface
+
+    !! version: experimental
+    !!
+    !! Conversion from dense to coo
+    !! Enables extracting the non-zero elements of a dense 2D matrix and
+    !! storing those values in a COO format. The coo matrix is (re)allocated on the fly.
+    !! [Specifications](../page/specs/stdlib_sparse.html#sparse_conversion)
+    interface dense2coo
+        #:for k1, t1, s1 in (KINDS_TYPES)
+        module procedure :: dense2coo_${s1}$
+        #:endfor
+    end interface
+    public :: dense2coo
+
+    !! version: experimental
+    !!
+    !! Conversion from coo to dense
+    !! Enables creating a dense 2D matrix from the non-zero values stored in a COO format
+    !! The dense matrix can be allocated on the fly if not pre-allocated by the user.
+    !! [Specifications](../page/specs/stdlib_sparse.html#sparse_conversion)
+    interface coo2dense
+        #:for k1, t1, s1 in (KINDS_TYPES)
+        module procedure :: coo2dense_${s1}$
+        #:endfor
+    end interface
+    public :: coo2dense
+
+    !! version: experimental
+    !!
+    !! Conversion from coo to csr
+    !! Enables transferring data from a COO matrix to a CSR matrix
+    !! under the hypothesis that the COO is already ordered.
+    !! [Specifications](../page/specs/stdlib_sparse.html#sparse_conversion)
+    interface coo2csr
+        #:for k1, t1, s1 in (KINDS_TYPES)
+        module procedure :: coo2csr_${s1}$
+        #:endfor
+    end interface
+    public :: coo2csr
+
+    !! version: experimental
+    !!
+    !! Conversion from coo to csc
+    !! Enables transferring data from a COO matrix to a CSC matrix
+    !! under the hypothesis that the COO is already ordered.
+    !! [Specifications](../page/specs/stdlib_sparse.html#sparse_conversion)
+    interface coo2csc
+        #:for k1, t1, s1 in (KINDS_TYPES)
+        module procedure :: coo2csc_${s1}$
+        #:endfor
+    end interface
+    public :: coo2csc
+    
+    !! version: experimental
+    !!
+    !! Conversion from csr to dense
+    !! Enables creating a dense 2D matrix from the non-zero values stored in a CSR format
+    !! The dense matrix can be allocated on the fly if not pre-allocated by the user.
+    !! [Specifications](../page/specs/stdlib_sparse.html#sparse_conversion)
+    interface csr2dense
+        #:for k1, t1, s1 in (KINDS_TYPES)
+        module procedure :: csr2dense_${s1}$
+        #:endfor
+    end interface
+    public :: csr2dense
+
+    !! version: experimental
+    !!
+    !! Conversion from csr to coo
+    !! Enables transferring data from a CSR matrix to a COO matrix
+    !! under the hypothesis that the CSR is already ordered.
+    !! [Specifications](../page/specs/stdlib_sparse.html#sparse_conversion)
+    interface csr2coo
+        #:for k1, t1, s1 in (KINDS_TYPES)
+        module procedure :: csr2coo_${s1}$
+        #:endfor
+    end interface
+    public :: csr2coo
+
+    !! version: experimental
+    !!
+    !! Conversion from csr to ell
+    !! Enables transferring data from a CSR matrix to a ELL matrix
+    !! under the hypothesis that the CSR is already ordered.
+    !! [Specifications](../page/specs/stdlib_sparse.html#sparse_conversion)
+    interface csr2ell
+        #:for k1, t1, s1 in (KINDS_TYPES)
+        module procedure :: csr2ell_${s1}$
+        #:endfor
+    end interface
+    public :: csr2ell
+
+    !! version: experimental
+    !!
+    !! Conversion from csr to SELL-C
+    !! Enables transferring data from a CSR matrix to a SELL-C matrix
+    !! It takes an optional parameter to decide the chunck size 4, 8 or 16
+    !! [Specifications](../page/specs/stdlib_sparse.html#sparse_conversion)
+    interface csr2sellc
+    #:for k1, t1, s1 in (KINDS_TYPES)
+        module procedure :: csr2sellc_${s1}$
+    #:endfor
+    end interface
+    public :: csr2sellc
+
+    !! version: experimental
+    !!
+    !! Conversion from csc to coo
+    !! Enables transferring data from a CSC matrix to a COO matrix
+    !! under the hypothesis that the CSC is already ordered.
+    !! [Specifications](../page/specs/stdlib_sparse.html#sparse_conversion)
+    interface csc2coo
+        #:for k1, t1, s1 in (KINDS_TYPES)
+        module procedure :: csc2coo_${s1}$
+        #:endfor
+    end interface
+    public :: csc2coo
+
+    !! version: experimental
+    !!
+    !! Extraction of diagonal values
+    !! [Specifications](../page/specs/stdlib_sparse.html#sparse_conversion)
+    interface diag
+    #:for k1, t1, s1 in (KINDS_TYPES)
+        module procedure :: dense2diagonal_${s1}$
+        module procedure :: coo2diagonal_${s1}$
+        module procedure :: csr2diagonal_${s1}$
+        module procedure :: csc2diagonal_${s1}$
+        module procedure :: ell2diagonal_${s1}$
+    #:endfor
+    end interface
+    public :: diag
+
+    !! version: experimental
+    !!
+    !! Enable creating a sparse matrix from ijv (row,col,data) triplet
+    !! [Specifications](../page/specs/stdlib_sparse.html#sparse_conversion)
+    interface from_ijv
+        module procedure :: coo_from_ijv_type
+        #:for k1, t1, s1 in (KINDS_TYPES)
+        module procedure :: coo_from_ijv_${s1}$
+        module procedure :: csr_from_ijv_${s1}$
+        module procedure :: ell_from_ijv_${s1}$
+        module procedure :: sellc_from_ijv_${s1}$
+        #:endfor
+    end interface
+    public :: from_ijv
+
+    public :: coo2ordered
+
+contains
+
+    #:for k1, t1, s1 in (KINDS_TYPES)
+    subroutine dense2coo_${s1}$(dense,COO)
+        ${t1}$, intent(in) :: dense(:,:)
+        type(COO_${s1}$_type), intent(out) :: COO
+        integer(ilp) :: num_rows, num_cols, nnz
+        integer(ilp) :: i, j, idx
+
+        num_rows = size(dense,dim=1)
+        num_cols = size(dense,dim=2)
+        nnz      = count( abs(dense) > tiny(1._${k1}$) )
+
+        call COO%malloc(num_rows,num_cols,nnz)
+
+        idx = 1
+        do i = 1, num_rows
+            do j = 1, num_cols
+                if(abs(dense(i,j)) < tiny(1._${k1}$)) cycle
+                COO%index(1,idx) = i
+                COO%index(2,idx) = j
+                COO%data(idx) = dense(i,j)
+                idx = idx + 1
+            end do
+        end do
+        COO%is_sorted = .true.
+    end subroutine
+
+    #:endfor
+
+    #:for k1, t1, s1 in (KINDS_TYPES)
+    subroutine coo2dense_${s1}$(COO,dense)
+        type(COO_${s1}$_type), intent(in) :: COO
+        ${t1}$, allocatable, intent(out) :: dense(:,:)
+        integer(ilp) :: idx
+
+        if(.not.allocated(dense)) allocate(dense(COO%nrows,COO%nrows),source=zero_${s1}$)
+        do concurrent(idx = 1:COO%nnz)
+            dense( COO%index(1,idx) , COO%index(2,idx) ) = COO%data(idx)
+        end do
+    end subroutine
+
+    #:endfor
+
+    #:for k1, t1, s1 in (KINDS_TYPES)
+    subroutine coo2csr_${s1}$(COO,CSR)
+        type(COO_${s1}$_type), intent(in)  :: COO
+        type(CSR_${s1}$_type), intent(out) :: CSR
+        integer(ilp) :: i
+
+        CSR%nnz = COO%nnz; CSR%nrows = COO%nrows; CSR%ncols = COO%ncols
+        CSR%storage = COO%storage
+
+        if( allocated(CSR%col) ) then
+            CSR%col(1:COO%nnz)  = COO%index(2,1:COO%nnz)
+            CSR%rowptr(1:CSR%nrows) = 0
+            CSR%data(1:CSR%nnz) = COO%data(1:COO%nnz)
+        else 
+            allocate( CSR%col(CSR%nnz)  , source = COO%index(2,1:COO%nnz) )
+            allocate( CSR%rowptr(CSR%nrows+1) , source = 0 )
+            allocate( CSR%data(CSR%nnz) , source = COO%data(1:COO%nnz) )
+        end if
+
+        CSR%rowptr(1) = 1
+        do i = 1, COO%nnz
+            CSR%rowptr( COO%index(1,i)+1 ) = CSR%rowptr( COO%index(1,i)+1 ) + 1
+        end do
+        do i = 1, CSR%nrows
+            CSR%rowptr( i+1 ) = CSR%rowptr( i+1 ) + CSR%rowptr( i )
+        end do
+    end subroutine
+
+    #:endfor
+
+    #:for k1, t1, s1 in (KINDS_TYPES)
+    subroutine coo2csc_${s1}$(COO,CSC)
+        type(COO_${s1}$_type), intent(in)  :: COO
+        type(CSC_${s1}$_type), intent(out) :: CSC
+        ${t1}$, allocatable :: data(:)
+        integer(ilp), allocatable :: temp(:,:)
+        integer(ilp) :: i, nnz
+
+        CSC%nnz = COO%nnz; CSC%nrows = COO%nrows; CSC%ncols = COO%ncols
+        CSC%storage = COO%storage
+
+        allocate(temp(2,COO%nnz))
+        temp(1,1:COO%nnz) = COO%index(2,1:COO%nnz)
+        temp(2,1:COO%nnz) = COO%index(1,1:COO%nnz)
+        allocate(data, source = COO%data )
+        nnz = COO%nnz
+        call sort_coo_unique_${s1}$( temp, data, nnz, COO%nrows, COO%ncols )
+
+        if( allocated(CSC%row) ) then
+            CSC%row(1:COO%nnz)  = temp(2,1:COO%nnz)
+            CSC%colptr(1:CSC%ncols) = 0
+            CSC%data(1:CSC%nnz) = data(1:COO%nnz)
+        else 
+            allocate( CSC%row(CSC%nnz)  , source = temp(2,1:COO%nnz) )
+            allocate( CSC%colptr(CSC%ncols+1) , source = 0 )
+            allocate( CSC%data(CSC%nnz) , source = data(1:COO%nnz) )
+        end if
+
+        CSC%colptr(1) = 1
+        do i = 1, COO%nnz
+            CSC%colptr( temp(1,i)+1 ) = CSC%colptr( temp(1,i)+1 ) + 1
+        end do
+        do i = 1, CSC%ncols
+            CSC%colptr( i+1 ) = CSC%colptr( i+1 ) + CSC%colptr( i )
+        end do
+    end subroutine
+
+    #:endfor
+
+    #:for k1, t1, s1 in (KINDS_TYPES)
+    subroutine csr2dense_${s1}$(CSR,dense)
+        type(CSR_${s1}$_type), intent(in) :: CSR
+        ${t1}$, allocatable, intent(out) :: dense(:,:)
+        integer(ilp) :: i, j
+
+        if(.not.allocated(dense)) allocate(dense(CSR%nrows,CSR%nrows),source=zero_${s1}$)
+        if( CSR%storage == sparse_full) then
+            do i = 1, CSR%nrows
+                do j = CSR%rowptr(i), CSR%rowptr(i+1)-1
+                    dense(i,CSR%col(j)) = CSR%data(j)
+                end do
+            end do
+        else
+            do i = 1, CSR%nrows
+                do j = CSR%rowptr(i), CSR%rowptr(i+1)-1
+                    dense(i,CSR%col(j)) = CSR%data(j)
+                    if( i == CSR%col(j) ) cycle
+                    dense(CSR%col(j),i) = CSR%data(j)
+                end do
+            end do
+        end if
+    end subroutine
+
+    #:endfor
+
+    #:for k1, t1, s1 in (KINDS_TYPES)
+    subroutine csr2coo_${s1}$(CSR,COO)
+        type(CSR_${s1}$_type), intent(in)  :: CSR
+        type(COO_${s1}$_type), intent(out) :: COO
+        integer(ilp) :: i, j
+
+        COO%nnz = CSR%nnz; COO%nrows = CSR%nrows; COO%ncols = CSR%ncols
+        COO%storage = CSR%storage
+
+        if( .not.allocated(COO%data) ) then
+            allocate( COO%data(CSR%nnz) , source = CSR%data(1:CSR%nnz) )
+        else 
+            COO%data(1:CSR%nnz) = CSR%data(1:CSR%nnz)
+        end if
+
+        if( .not.allocated(COO%index) ) allocate( COO%index(2,CSR%nnz) )
+        
+        do i = 1, CSR%nrows
+            do j = CSR%rowptr(i), CSR%rowptr(i+1)-1
+                COO%index(1:2,j) = [i,CSR%col(j)]
+            end do
+        end do
+    end subroutine
+
+    #:endfor
+
+    #:for k1, t1, s1 in (KINDS_TYPES)
+    subroutine csc2coo_${s1}$(CSC,COO)
+        type(CSC_${s1}$_type), intent(in)  :: CSC
+        type(COO_${s1}$_type), intent(out) :: COO
+        integer(ilp) :: i, j
+
+        COO%nnz = CSC%nnz; COO%nrows = CSC%nrows; COO%ncols = CSC%ncols
+        COO%storage = CSC%storage
+
+        if( .not.allocated(COO%data) ) then
+            allocate( COO%data(CSC%nnz) , source = CSC%data(1:CSC%nnz) )
+        else 
+            COO%data(1:CSC%nnz) = CSC%data(1:CSC%nnz)
+        end if
+
+        if( .not.allocated(COO%index) ) allocate( COO%index(2,CSC%nnz) )
+        
+        do j = 1, CSC%ncols
+            do i = CSC%colptr(j), CSC%colptr(j+1)-1
+                COO%index(1:2,i) = [CSC%row(i),j]
+            end do
+        end do
+        call sort_coo_unique_${s1}$( COO%index, COO%data, COO%nnz, COO%nrows, COO%ncols )
+    end subroutine
+
+    #:endfor
+
+    #:for k1, t1, s1 in (KINDS_TYPES)
+    subroutine csr2ell_${s1}$(CSR,ELL,num_nz_rows)
+        type(CSR_${s1}$_type), intent(in)  :: CSR
+        type(ELL_${s1}$_type), intent(out) :: ELL
+        integer, intent(in), optional :: num_nz_rows !! number of non zeros per row
+
+        integer(ilp) :: i, j, num_nz_rows_, adr1, adr2
+        !-------------------------------------------
+        num_nz_rows_ = 0
+        if(present(num_nz_rows)) then
+            num_nz_rows_ = num_nz_rows
+        else 
+            do i = 1, CSR%nrows
+                num_nz_rows_ = max(num_nz_rows_, CSR%rowptr( i+1 ) - CSR%rowptr( i ) )
+            end do
+        end if
+        call ELL%malloc(CSR%nrows,CSR%ncols,num_nz_rows_)
+        ELL%storage = CSR%storage
+        !-------------------------------------------
+        do i = 1, CSR%nrows
+            adr1 = CSR%rowptr(i)
+            adr2 = min( adr1+num_nz_rows_ , CSR%rowptr(i+1)-1)
+            do j = adr1, adr2
+                ELL%index(i,j-adr1+1) = CSR%col(j)
+                ELL%data(i,j-adr1+1)  = CSR%data(j)
+            end do
+        end do
+    end subroutine
+
+    #:endfor
+
+    #:for k1, t1, s1 in (KINDS_TYPES)
+    subroutine csr2sellc_${s1}$(CSR,SELLC,chunk)
+        !! csr2sellc: This function enables transfering data from a CSR matrix to a SELL-C matrix
+        !! This algorithm was gracefully provided by Ivan Privec and adapted by Jose Alves
+        type(CSR_${s1}$_type), intent(in)    :: CSR
+        type(SELLC_${s1}$_type), intent(out) :: SELLC
+        integer, intent(in), optional :: chunk
+        ${t1}$, parameter :: zero = zero_${s1}$
+        integer(ilp) :: i, j, num_chunks
+
+        if(present(chunk)) SELLC%chunk_size = chunk
+
+        SELLC%nrows = CSR%nrows; SELLC%ncols = CSR%ncols
+        SELLC%storage = CSR%storage
+        associate( nrows=>SELLC%nrows, ncols=>SELLC%ncols, nnz=>SELLC%nnz, &
+        &         chunk_size=>SELLC%chunk_size     )
+        !-------------------------------------------
+        ! csr rowptr to SELL-C chunked rowptr
+        num_chunks = (nrows + chunk_size - 1)/chunk_size
+        allocate( SELLC%rowptr(num_chunks+1) )
+        block
+            integer :: cidx, rownnz, chunknnz
+            SELLC%rowptr(1) = 1
+            cidx = 1
+            do i = 1, nrows, chunk_size
+                chunknnz = 0
+                ! Iterate over rows in a given chunk
+                do j = i, min(i+chunk_size-1,nrows)
+                    rownnz = CSR%rowptr(j+1) - CSR%rowptr(j)
+                    chunknnz = max(chunknnz,rownnz)
+                end do
+                SELLC%rowptr(cidx+1) = SELLC%rowptr(cidx) + chunknnz
+                cidx = cidx + 1
+            end do
+            nnz = SELLC%rowptr(num_chunks+1) - 1
+        end block
+        !-------------------------------------------
+        ! copy values and colum index
+        allocate(SELLC%col(chunk_size,nnz), source = -1)
+        allocate(SELLC%data(chunk_size,nnz), source = zero )
+        block
+            integer :: lb, ri, iaa, iab, rownnz
+            do i = 1, num_chunks
+
+                lb = SELLC%rowptr(i)
+
+                ! Loop over rows of a chunk
+                do j = (i-1)*chunk_size + 1, min(i*chunk_size,nrows)
+    
+                    ri = j - (i - 1)*chunk_size
+                    
+                    rownnz = CSR%rowptr(j+1) - CSR%rowptr(j) - 1
+                    iaa    = CSR%rowptr(j)
+                    iab    = CSR%rowptr(j+1) - 1
+                    
+                    SELLC%col(ri,lb:lb+rownnz)  = CSR%col(iaa:iab)
+                    SELLC%data(ri,lb:lb+rownnz) = CSR%data(iaa:iab)
+                
+                end do
+            end do
+         end block
+        end associate
+    end subroutine
+
+    #:endfor
+
+    #:for k1, t1, s1 in (KINDS_TYPES)
+    recursive subroutine quicksort_i_${s1}$(a, b, first, last)
+        integer, parameter :: wp = sp
+        integer(ilp), intent(inout) :: a(*) !! reference table to sort
+        ${t1}$, intent(inout)  :: b(*) !! secondary real data to sort w.r.t. a
+        integer(ilp), intent(in)     :: first, last
+        integer(ilp)  :: i, j, x, t
+        ${t1}$ :: d
+
+        x = a( (first+last) / 2 )
+        i = first
+        j = last
+        do
+            do while (a(i) < x)
+                i=i+1
+            end do
+            do while (x < a(j))
+                j=j-1
+            end do
+            if (i >= j) exit
+            t = a(i);  a(i) = a(j);  a(j) = t
+            d = b(i);  b(i) = b(j);  b(j) = d
+            i=i+1
+            j=j-1
+        end do
+        if (first < i-1) call quicksort_i_${s1}$(a, b, first, i-1)
+        if (j+1 < last)  call quicksort_i_${s1}$(a, b, j+1, last)
+    end subroutine 
+
+    #:endfor
+
+    subroutine sort_coo_unique( a, n, num_rows, num_cols )
+        !! Sort a 2d array in increasing order first by index 1 and then by index 2
+        integer(ilp), intent(inout) :: a(2,*)
+        integer(ilp), intent(inout) :: n
+        integer(ilp), intent(in) :: num_rows
+        integer(ilp), intent(in) :: num_cols
+
+        integer(ilp) :: stride, adr0, adr1, dd
+        integer(ilp) :: n_i, pos, ed
+        integer(ilp), allocatable :: count_i(:), count_i_aux(:), rows_(:), cols_(:)
+        !---------------------------------------------------------
+        ! Sort a first time with respect to first index using count sort
+        allocate( count_i( 0:num_rows ) , source = 0 )
+        do ed = 1, n
+            count_i( a(1,ed) ) = count_i( a(1,ed) ) + 1
+        end do
+        do n_i = 2, num_rows
+            count_i(n_i) = count_i(n_i) + count_i(n_i-1)
+        end do
+        allocate( count_i_aux( 0:num_rows ) , source = count_i )
+
+        allocate( rows_(n), cols_(n) )
+        do ed = n, 1, -1
+            n_i = a(1,ed)
+            pos = count_i(n_i)
+            rows_(pos) = a(1,ed)
+            cols_(pos) = a(2,ed)
+            count_i(n_i) = count_i(n_i) - 1
+        end do
+        !---------------------------------------------------------
+        ! Sort with respect to second column
+        do n_i = 1, num_rows
+            adr0 = count_i_aux(n_i-1)+1
+            adr1 = count_i_aux(n_i)
+            dd = adr1-adr0+1
+            if(dd>0) call sort(cols_(adr0:adr1))
+        end do
+        !---------------------------------------------------------
+        ! Remove duplicates
+        do ed = 1,n
+            a(1:2,ed) = [rows_(ed),cols_(ed)]
+        end do
+        stride = 0
+        do ed = 2, n
+            if( a(1,ed) == a(1,ed-1) .and. a(2,ed) == a(2,ed-1) ) then
+                stride = stride + 1
+            else
+                a(1:2,ed-stride) = a(1:2,ed)
+            end if
+        end do
+        n = n - stride
+    end subroutine
+
+    #:for k1, t1, s1 in (KINDS_TYPES)
+    subroutine sort_coo_unique_${s1}$( a, data, n, num_rows, num_cols )
+        !! Sort a 2d array in increasing order first by index 1 and then by index 2
+        ${t1}$, intent(inout) :: data(*)
+        integer(ilp), intent(inout) :: a(2,*)
+        integer(ilp), intent(inout) :: n
+        integer(ilp), intent(in) :: num_rows
+        integer(ilp), intent(in) :: num_cols
+
+        integer(ilp) :: stride, adr0, adr1, dd
+        integer(ilp) :: n_i, pos, ed
+        integer(ilp), allocatable :: count_i(:), count_i_aux(:), rows_(:), cols_(:)
+        ${t1}$, allocatable :: temp(:)
+        !---------------------------------------------------------
+        ! Sort a first time with respect to first index using Count sort
+        allocate( count_i( 0:num_rows ) , source = 0 )
+        do ed = 1, n
+            count_i( a(1,ed) ) = count_i( a(1,ed) ) + 1
+        end do
+        do n_i = 2, num_rows
+            count_i(n_i) = count_i(n_i) + count_i(n_i-1)
+        end do
+        allocate( count_i_aux( 0:num_rows ) , source = count_i )
+
+        allocate( rows_(n), cols_(n), temp(n) )
+        do ed = n, 1, -1
+            n_i = a(1,ed)
+            pos = count_i(n_i)
+            rows_(pos) = a(1,ed)
+            cols_(pos) = a(2,ed)
+            temp(pos)  = data(ed)
+            count_i(n_i) = count_i(n_i) - 1
+        end do
+        !---------------------------------------------------------
+        ! Sort with respect to second colum using a quicksort
+        do n_i = 1, num_rows
+            adr0 = count_i_aux(n_i-1)+1
+            adr1 = count_i_aux(n_i)
+            dd = adr1-adr0+1
+            if(dd>0) call quicksort_i_${s1}$(cols_(adr0),temp(adr0),1,dd)
+        end do
+        !---------------------------------------------------------
+        ! Remove duplicates
+        do ed = 1,n
+            a(1:2,ed) = [rows_(ed),cols_(ed)]
+        end do
+        data(1:n) = temp(1:n)
+        stride = 0
+        do ed = 2, n
+            if( a(1,ed) == a(1,ed-1) .and. a(2,ed) == a(2,ed-1) ) then
+                data(ed-1-stride) = data(ed-1-stride) + data(ed)
+                data(ed) = data(ed-1-stride)
+                stride = stride + 1
+            else
+                a(1:2,ed-stride) = a(1:2,ed)
+                data(ed-stride) = data(ed)
+            end if
+        end do
+        n = n - stride
+    end subroutine
+
+    #:endfor
+
+    !! version: experimental
+    !!
+    !! Transform COO matrix to canonical form with ordered and unique entries
+    !! [Specifications](../page/specs/stdlib_sparse.html#sparse_conversion)
+    subroutine coo2ordered(COO,sort_data)
+        class(COO_type), intent(inout) :: COO
+        logical, intent(in), optional :: sort_data
+        integer(ilp), allocatable :: itemp(:,:)
+        logical :: sort_data_
+        
+        if(COO%is_sorted) return
+
+        sort_data_ = .false.
+        if(present(sort_data)) sort_data_ = sort_data
+
+        select type (COO)
+            type is( COO_type )
+                call sort_coo(COO%index, COO%nnz, COO%nrows, COO%ncols)
+            #:for k1, t1, s1 in (KINDS_TYPES)
+            type is( COO_${s1}$_type )
+                block
+                ${t1}$, allocatable :: temp(:)
+                if( sort_data_ ) then
+                    call sort_coo(COO%index, COO%data, COO%nnz, COO%nrows, COO%ncols)
+                    allocate( temp(COO%nnz) , source=COO%data(1:COO%nnz) )
+                else 
+                    call sort_coo(COO%index, COO%nnz, COO%nrows, COO%ncols)
+                    allocate( temp(COO%nnz) )
+                end if
+                call move_alloc( temp , COO%data )
+                end block
+            #:endfor
+        end select
+        
+        allocate( itemp(2,COO%nnz) , source=COO%index(1:2,1:COO%nnz) )
+        call move_alloc( itemp , COO%index )
+
+        COO%is_sorted = .true.
+    end subroutine
+
+    subroutine coo_from_ijv_type(COO,row,col,nrows,ncols)
+        type(COO_type), intent(inout) :: COO
+        integer(ilp), intent(in) :: row(:)
+        integer(ilp), intent(in) :: col(:)
+        integer(ilp), intent(in), optional :: nrows
+        integer(ilp), intent(in), optional :: ncols
+
+        integer(ilp) :: nrows_, ncols_, nnz, ed
+        !---------------------------------------------------------
+        if(present(nrows)) then
+            nrows_ = nrows
+        else 
+            nrows_ = size(row)
+        end if
+        if(present(ncols)) then
+            ncols_ = ncols
+        else 
+            ncols_ = size(col)
+        end if
+        nnz = size(row)
+        !---------------------------------------------------------
+        call COO%malloc(nrows_,ncols_,nnz)
+        do ed = 1, nnz 
+            COO%index(1:2,ed) = [row(ed),col(ed)]
+        end do
+
+        call coo2ordered(COO,.true.)
+    end subroutine
+
+    #:for k1, t1, s1 in (KINDS_TYPES)
+    subroutine coo_from_ijv_${s1}$(COO,row,col,data,nrows,ncols)
+        type(COO_${s1}$_type), intent(inout) :: COO
+        integer(ilp), intent(in) :: row(:)
+        integer(ilp), intent(in) :: col(:)
+        ${t1}$, intent(in), optional :: data(:)
+        integer(ilp), intent(in), optional :: nrows
+        integer(ilp), intent(in), optional :: ncols
+
+        integer(ilp) :: nrows_, ncols_, nnz, ed
+        !---------------------------------------------------------
+        if(present(nrows)) then
+            nrows_ = nrows
+        else 
+            nrows_ = maxval(row)
+        end if
+        if(present(ncols)) then
+            ncols_ = ncols
+        else 
+            ncols_ = maxval(col)
+        end if
+        nnz = size(row)
+        !---------------------------------------------------------
+        call COO%malloc(nrows_,ncols_,nnz)
+        do ed = 1, nnz 
+            COO%index(1:2,ed) = [row(ed),col(ed)]
+        end do
+        if(present(data)) COO%data = data
+
+        call coo2ordered(COO,.true.)
+    end subroutine
+    #:endfor
+
+    #:for k1, t1, s1 in (KINDS_TYPES)
+    subroutine csr_from_ijv_${s1}$(CSR,row,col,data,nrows,ncols)
+        type(CSR_${s1}$_type), intent(inout) :: CSR
+        integer(ilp), intent(in) :: row(:)
+        integer(ilp), intent(in) :: col(:)
+        ${t1}$, intent(in), optional :: data(:)
+        integer(ilp), intent(in), optional :: nrows
+        integer(ilp), intent(in), optional :: ncols
+
+        integer(ilp) :: nrows_, ncols_
+        !---------------------------------------------------------
+        if(present(nrows)) then
+            nrows_ = nrows
+        else 
+            nrows_ = maxval(row)
+        end if
+        if(present(ncols)) then
+            ncols_ = ncols
+        else 
+            ncols_ = maxval(col)
+        end if
+        !---------------------------------------------------------
+        block
+            type(COO_${s1}$_type) :: COO
+            if(present(data)) then
+                call from_ijv(COO,row,col,data=data,nrows=nrows_,ncols=ncols_)
+            else 
+                call from_ijv(COO,row,col,nrows=nrows_,ncols=ncols_)
+            end if
+            call coo2csr(COO,CSR)
+        end block
+    end subroutine
+    #:endfor
+
+    #:for k1, t1, s1 in (KINDS_TYPES)
+    subroutine ell_from_ijv_${s1}$(ELL,row,col,data,nrows,ncols,num_nz_rows)
+        type(ELL_${s1}$_type), intent(inout) :: ELL
+        integer(ilp), intent(in) :: row(:)
+        integer(ilp), intent(in) :: col(:)
+        ${t1}$, intent(in), optional :: data(:)
+        integer(ilp), intent(in), optional :: nrows
+        integer(ilp), intent(in), optional :: ncols
+        integer, intent(in), optional :: num_nz_rows
+
+        integer(ilp) :: nrows_, ncols_
+        !---------------------------------------------------------
+        if(present(nrows)) then
+            nrows_ = nrows
+        else 
+            nrows_ = maxval(row)
+        end if
+        if(present(ncols)) then
+            ncols_ = ncols
+        else 
+            ncols_ = maxval(col)
+        end if
+        !---------------------------------------------------------
+        block
+            type(COO_${s1}$_type) :: COO
+            type(CSR_${s1}$_type) :: CSR
+            if(present(data)) then
+                call from_ijv(COO,row,col,data=data,nrows=nrows_,ncols=ncols_)
+            else 
+                call from_ijv(COO,row,col,nrows=nrows_,ncols=ncols_)
+            end if
+            call coo2csr(COO,CSR)
+            if(present(num_nz_rows)) then
+                call csr2ell(CSR,ELL,num_nz_rows)
+            else 
+                call csr2ell(CSR,ELL)
+            end if
+        end block
+    end subroutine
+    #:endfor
+
+    #:for k1, t1, s1 in (KINDS_TYPES)
+    subroutine sellc_from_ijv_${s1}$(SELLC,row,col,data,nrows,ncols,chunk)
+        type(SELLC_${s1}$_type), intent(inout) :: SELLC
+        integer(ilp), intent(in) :: row(:)
+        integer(ilp), intent(in) :: col(:)
+        ${t1}$, intent(in), optional :: data(:)
+        integer(ilp), intent(in), optional :: nrows
+        integer(ilp), intent(in), optional :: ncols
+        integer, intent(in), optional :: chunk
+
+        integer(ilp) :: nrows_, ncols_
+        !---------------------------------------------------------
+        if(present(nrows)) then
+            nrows_ = nrows
+        else 
+            nrows_ = maxval(row)
+        end if
+        if(present(ncols)) then
+            ncols_ = ncols
+        else 
+            ncols_ = maxval(col)
+        end if
+        if(present(chunk)) SELLC%chunk_size = chunk
+        !---------------------------------------------------------
+        block
+            type(COO_${s1}$_type) :: COO
+            type(CSR_${s1}$_type) :: CSR
+            if(present(data)) then
+                call from_ijv(COO,row,col,data=data,nrows=nrows_,ncols=ncols_)
+            else 
+                call from_ijv(COO,row,col,nrows=nrows_,ncols=ncols_)
+            end if
+            call coo2csr(COO,CSR)
+            call csr2sellc(CSR,SELLC)
+        end block
+    end subroutine
+    #:endfor
+
+    !! Diagonal extraction
+
+    #:for k1, t1, s1 in (KINDS_TYPES)
+    subroutine dense2diagonal_${s1}$(dense,diagonal)
+        ${t1}$, intent(in) :: dense(:,:)
+        ${t1}$, intent(inout), allocatable :: diagonal(:)
+        integer :: num_rows
+        integer :: i
+
+        num_rows = size(dense,dim=1)
+        if(.not.allocated(diagonal)) allocate(diagonal(num_rows))
+
+        do i = 1, num_rows
+            diagonal(i) = dense(i,i)
+        end do
+    end subroutine
+
+    #:endfor
+
+    #:for k1, t1, s1 in (KINDS_TYPES)
+    subroutine coo2diagonal_${s1}$(COO,diagonal)
+        type(COO_${s1}$_type), intent(in) :: COO
+        ${t1}$, intent(inout), allocatable :: diagonal(:)
+        integer :: idx
+
+        if(.not.allocated(diagonal)) allocate(diagonal(COO%nrows))
+
+        do concurrent(idx = 1:COO%nnz)
+            if(COO%index(1,idx)==COO%index(2,idx)) &
+            & diagonal( COO%index(1,idx) ) = COO%data(idx)
+        end do
+    end subroutine
+
+    #:endfor
+
+    #:for k1, t1, s1 in (KINDS_TYPES)
+    subroutine csr2diagonal_${s1}$(CSR,diagonal)
+        type(CSR_${s1}$_type), intent(in) :: CSR
+        ${t1}$, intent(inout), allocatable :: diagonal(:)
+        integer :: i, j
+
+        if(.not.allocated(diagonal)) allocate(diagonal(CSR%nrows))
+
+        select case(CSR%storage)
+        case(sparse_lower)
+            do i = 1, CSR%nrows
+                diagonal(i) = CSR%data( CSR%rowptr(i+1)-1 )
+            end do
+        case(sparse_upper)
+            do i = 1, CSR%nrows
+                diagonal(i) = CSR%data( CSR%rowptr(i) )
+            end do
+        case(sparse_full)
+            do i = 1, CSR%nrows
+                do j = CSR%rowptr(i), CSR%rowptr(i+1)-1
+                    if( CSR%col(j) == i ) then
+                        diagonal(i) = CSR%data(j)
+                        exit
+                    end if
+                end do
+            end do
+        end select
+    end subroutine
+
+    #:endfor
+
+    #:for k1, t1, s1 in (KINDS_TYPES)
+    subroutine csc2diagonal_${s1}$(CSC,diagonal)
+        type(CSC_${s1}$_type), intent(in) :: CSC
+        ${t1}$, intent(inout), allocatable :: diagonal(:)
+        integer :: i, j
+
+        if(.not.allocated(diagonal)) allocate(diagonal(CSC%nrows))
+
+        select case(CSC%storage)
+        case(sparse_lower)
+            do i = 1, CSC%ncols
+                diagonal(i) = CSC%data( CSC%colptr(i+1)-1 )
+            end do
+        case(sparse_upper)
+            do i = 1, CSC%ncols
+                diagonal(i) = CSC%data( CSC%colptr(i) )
+            end do
+        case(sparse_full)
+            do i = 1, CSC%ncols
+                do j = CSC%colptr(i), CSC%colptr(i+1)-1
+                    if( CSC%row(j) == i ) then
+                        diagonal(i) = CSC%data(j)
+                        exit
+                    end if
+                end do
+            end do
+        end select
+    end subroutine
+
+    #:endfor
+
+    #:for k1, t1, s1 in (KINDS_TYPES)
+    subroutine ell2diagonal_${s1}$(ELL,diagonal)
+        type(ELL_${s1}$_type), intent(in) :: ELL
+        ${t1}$, intent(inout), allocatable :: diagonal(:)
+        integer :: i, k
+
+        if(.not.allocated(diagonal)) allocate(diagonal(ELL%nrows))
+        if( ELL%storage == sparse_full) then
+            do i = 1, ELL%nrows
+                do k = 1, ELL%K
+                    if(ELL%index(i,k)==i) diagonal(i) = ELL%data(i,k)
+                end do
+            end do
+        end if
+    end subroutine
+
+    #:endfor
+
+end module
\ No newline at end of file
diff --git a/src/stdlib_sparse_kinds.fypp b/src/stdlib_sparse_kinds.fypp
new file mode 100644
index 000000000..ceba2a62d
--- /dev/null
+++ b/src/stdlib_sparse_kinds.fypp
@@ -0,0 +1,593 @@
+#:include "common.fypp"
+#:set RANKS = range(1, 2+1)
+#:set R_KINDS_TYPES = list(zip(REAL_KINDS, REAL_TYPES, REAL_SUFFIX))
+#:set C_KINDS_TYPES = list(zip(CMPLX_KINDS, CMPLX_TYPES, CMPLX_SUFFIX))
+#:set KINDS_TYPES = R_KINDS_TYPES+C_KINDS_TYPES
+!! The `stdlib_sparse_kinds` module provides derived type definitions for different sparse matrices
+!!
+! This code was modified from https://github.com/jalvesz/FSPARSE by its author: Alves Jose
+module stdlib_sparse_kinds
+    use ieee_arithmetic
+    use stdlib_sparse_constants
+    implicit none
+    private
+    public :: sparse_full, sparse_lower, sparse_upper
+    public :: sparse_op_none, sparse_op_transpose, sparse_op_hermitian
+    !! version: experimental
+    !!
+    !! Base sparse type holding the meta data related to the storage capacity of a matrix.
+    type, public, abstract :: sparse_type
+      integer(ilp) :: nrows = 0 !! number of rows
+      integer(ilp) :: ncols = 0 !! number of columns
+      integer(ilp) :: nnz   = 0 !! number of non-zero values
+      integer :: storage = sparse_full !! assumed storage symmetry
+    end type
+
+    !! version: experimental
+    !!
+    !! COO: COOrdinates compresed format 
+    type, public, extends(sparse_type) :: COO_type
+      logical               :: is_sorted = .false. !! whether the matrix is sorted or not
+      integer(ilp), allocatable  :: index(:,:) !! Matrix coordinates index(2,nnz)
+    contains
+      procedure :: malloc => malloc_coo
+    end type
+
+    #:for k1, t1, s1 in (KINDS_TYPES)
+    type, public, extends(COO_type) :: COO_${s1}$_type
+        ${t1}$, allocatable :: data(:) 
+    contains
+        procedure, non_overridable :: at => at_value_coo_${s1}$
+        procedure, non_overridable :: add_value => add_value_coo_${s1}$
+        procedure, non_overridable :: add_block => add_block_coo_${s1}$
+        generic :: add => add_value, add_block
+    end type
+    #:endfor
+
+    !! version: experimental
+    !!
+    !! CSR: Compressed sparse row or Yale format
+    type, public, extends(sparse_type) :: CSR_type  
+      integer(ilp), allocatable  :: col(:)    !! matrix column pointer
+      integer(ilp), allocatable  :: rowptr(:) !! matrix row pointer
+    contains
+      procedure :: malloc => malloc_csr
+    end type
+  
+    #:for k1, t1, s1 in (KINDS_TYPES)
+    type, public, extends(CSR_type) :: CSR_${s1}$_type
+        ${t1}$, allocatable :: data(:) 
+    contains
+        procedure, non_overridable :: at => at_value_csr_${s1}$
+        procedure, non_overridable :: add_value => add_value_csr_${s1}$
+        procedure, non_overridable :: add_block => add_block_csr_${s1}$
+        generic :: add => add_value, add_block
+    end type
+    #:endfor
+
+    !! version: experimental
+    !!
+    !! CSC: Compressed sparse column
+    type, public, extends(sparse_type) :: CSC_type  
+      integer(ilp), allocatable  :: colptr(:) !! matrix column pointer
+      integer(ilp), allocatable  :: row(:)    !! matrix row pointer
+    contains
+      procedure :: malloc => malloc_csc
+    end type
+  
+    #:for k1, t1, s1 in (KINDS_TYPES)
+    type, public, extends(CSC_type) :: CSC_${s1}$_type
+        ${t1}$, allocatable :: data(:) 
+    contains
+        procedure, non_overridable :: at => at_value_csc_${s1}$
+        procedure, non_overridable :: add_value => add_value_csc_${s1}$
+        procedure, non_overridable :: add_block => add_block_csc_${s1}$
+        generic :: add => add_value, add_block
+    end type
+    #:endfor
+
+    !! version: experimental
+    !!
+    !! Compressed ELLPACK
+    type, public, extends(sparse_type) :: ELL_type 
+      integer               :: K = 0 !! maximum number of nonzeros per row
+      integer(ilp), allocatable :: index(:,:) !! column indices
+    contains
+      procedure :: malloc => malloc_ell
+    end type
+  
+    #:for k1, t1, s1 in (KINDS_TYPES)
+    type, public, extends(ELL_type) :: ELL_${s1}$_type
+        ${t1}$, allocatable :: data(:,:) 
+    contains
+        procedure, non_overridable :: at => at_value_ell_${s1}$
+        procedure, non_overridable :: add_value => add_value_ell_${s1}$
+        procedure, non_overridable :: add_block => add_block_ell_${s1}$
+        generic :: add => add_value, add_block
+    end type
+    #:endfor
+
+    !! version: experimental
+    !!
+    !! Compressed SELL-C 
+    !! Reference : https://library.eecs.utk.edu/storage/files/ut-eecs-14-727.pdf
+    type, public, extends(sparse_type) :: SELLC_type 
+      integer               :: chunk_size = 8 !!  default chunk size
+      integer(ilp), allocatable  :: rowptr(:) !! row pointer
+      integer(ilp), allocatable  :: col(:,:)  !! column indices
+    end type
+  
+    #:for k1, t1, s1 in (KINDS_TYPES)
+    type, public, extends(SELLC_type) :: SELLC_${s1}$_type
+        ${t1}$, allocatable :: data(:,:)
+    contains
+        procedure, non_overridable :: at => at_value_sellc_${s1}$
+        procedure, non_overridable :: add_value => add_value_sellc_${s1}$
+        procedure, non_overridable :: add_block => add_block_sellc_${s1}$
+        generic :: add => add_value, add_block
+    end type
+    #:endfor
+
+contains
+
+    !! (re)Allocate matrix memory for the COO type
+    subroutine malloc_coo(self,num_rows,num_cols,nnz)
+        class(COO_type) :: self
+        integer(ilp), intent(in) :: num_rows !! number of rows
+        integer(ilp), intent(in) :: num_cols !! number of columns
+        integer(ilp), intent(in) :: nnz      !! number of non zeros
+
+        integer(ilp),  allocatable :: temp_idx(:,:)
+        !-----------------------------------------------------
+
+        self%nrows = num_rows
+        self%ncols = num_cols
+        self%nnz   = nnz
+
+        if(.not.allocated(self%index)) then
+            allocate(temp_idx(2,nnz) , source = 0 )
+        else
+            allocate(temp_idx(2,nnz) , source = self%index )
+        end if
+        call move_alloc(from=temp_idx,to=self%index)
+
+        select type(self)
+            #:for k1, t1, s1 in (KINDS_TYPES)
+            type is(COO_${s1}$_type)
+                block
+                ${t1}$, allocatable :: temp_data_${s1}$(:)
+                if(.not.allocated(self%data)) then
+                    allocate(temp_data_${s1}$(nnz) , source = zero_${s1}$ )
+                else
+                    allocate(temp_data_${s1}$(nnz) , source = self%data )
+                end if
+                call move_alloc(from=temp_data_${s1}$,to=self%data)
+                end block
+            #:endfor
+        end select
+    end subroutine
+
+    !! (re)Allocate matrix memory for the CSR type
+    subroutine malloc_csr(self,num_rows,num_cols,nnz)
+        class(CSR_type) :: self
+        integer(ilp), intent(in) :: num_rows !! number of rows
+        integer(ilp), intent(in) :: num_cols !! number of columns
+        integer(ilp), intent(in) :: nnz      !! number of non zeros
+
+        integer(ilp),  allocatable :: temp_idx(:)
+        !-----------------------------------------------------
+
+        self%nrows = num_rows
+        self%ncols = num_cols
+        self%nnz   = nnz
+
+        if(.not.allocated(self%col)) then
+            allocate(temp_idx(nnz) , source = 0 )
+        else
+            allocate(temp_idx(nnz) , source = self%col )
+        end if
+        call move_alloc(from=temp_idx,to=self%col)
+
+        if(.not.allocated(self%rowptr)) then
+            allocate(temp_idx(num_rows+1) , source = 0 )
+        else
+            allocate(temp_idx(num_rows+1) , source = self%rowptr )
+        end if
+        call move_alloc(from=temp_idx,to=self%rowptr)
+
+        select type(self)
+            #:for k1, t1, s1 in (KINDS_TYPES)
+            type is(CSR_${s1}$_type)
+                block
+                ${t1}$, allocatable :: temp_data_${s1}$(:)
+                if(.not.allocated(self%data)) then
+                    allocate(temp_data_${s1}$(nnz) , source = zero_${s1}$ )
+                else
+                    allocate(temp_data_${s1}$(nnz) , source = self%data )
+                end if
+                call move_alloc(from=temp_data_${s1}$,to=self%data)
+                end block
+            #:endfor
+        end select
+    end subroutine
+
+    !! (re)Allocate matrix memory for the CSC type
+    subroutine malloc_csc(self,num_rows,num_cols,nnz)
+        class(CSC_type) :: self
+        integer(ilp), intent(in) :: num_rows !! number of rows
+        integer(ilp), intent(in) :: num_cols !! number of columns
+        integer(ilp), intent(in) :: nnz      !! number of non zeros
+
+        integer(ilp),  allocatable :: temp_idx(:)
+        !-----------------------------------------------------
+
+        self%nrows = num_rows
+        self%ncols = num_cols
+        self%nnz   = nnz
+
+        if(.not.allocated(self%row)) then
+            allocate(temp_idx(nnz) , source = 0 )
+        else
+            allocate(temp_idx(nnz) , source = self%row )
+        end if
+        call move_alloc(from=temp_idx,to=self%row)
+
+        if(.not.allocated(self%colptr)) then
+            allocate(temp_idx(num_cols+1) , source = 0 )
+        else
+            allocate(temp_idx(num_cols+1) , source = self%colptr )
+        end if
+        call move_alloc(from=temp_idx,to=self%colptr)
+
+        select type(self)
+            #:for k1, t1, s1 in (KINDS_TYPES)
+            type is(CSC_${s1}$_type)
+                block
+                ${t1}$, allocatable :: temp_data_${s1}$(:)
+                if(.not.allocated(self%data)) then
+                    allocate(temp_data_${s1}$(nnz) , source = zero_${s1}$ )
+                else
+                    allocate(temp_data_${s1}$(nnz) , source = self%data )
+                end if
+                call move_alloc(from=temp_data_${s1}$,to=self%data)
+                end block
+            #:endfor
+        end select
+    end subroutine
+
+    !! (re)Allocate matrix memory for the ELLPACK type
+    subroutine malloc_ell(self,num_rows,num_cols,num_nz_rows)
+        class(ELL_type) :: self
+        integer(ilp), intent(in) :: num_rows    !! number of rows
+        integer(ilp), intent(in) :: num_cols    !! number of columns
+        integer(ilp), intent(in) :: num_nz_rows !! number of non zeros per row
+
+        integer(ilp),  allocatable :: temp_idx(:,:)
+        !-----------------------------------------------------
+
+        self%nrows = num_rows
+        self%ncols = num_cols
+        self%K     = num_nz_rows
+
+        if(.not.allocated(self%index)) then
+            allocate(temp_idx(num_rows,num_nz_rows) , source = 0 )
+        else
+            allocate(temp_idx(num_rows,num_nz_rows) , source = self%index )
+        end if
+        call move_alloc(from=temp_idx,to=self%index)
+
+        select type(self)
+            #:for k1, t1, s1 in (KINDS_TYPES)
+            type is(ELL_${s1}$_type)
+                block
+                ${t1}$, allocatable :: temp_data_${s1}$(:,:)
+                if(.not.allocated(self%data)) then
+                    allocate(temp_data_${s1}$(num_rows,num_nz_rows) , source = zero_${s1}$ )
+                else
+                    allocate(temp_data_${s1}$(num_rows,num_nz_rows) , source = self%data )
+                end if
+                call move_alloc(from=temp_data_${s1}$,to=self%data)
+                end block
+            #:endfor
+        end select
+    end subroutine
+
+    !==================================================================
+    ! data accessors
+    !==================================================================
+
+    #:for k1, t1, s1 in (KINDS_TYPES)
+    pure ${t1}$ function at_value_coo_${s1}$(self,ik,jk) result(val)
+        class(COO_${s1}$_type), intent(in) :: self
+        integer(ilp), intent(in) :: ik, jk
+        integer(ilp) :: k, ik_, jk_
+        logical :: transpose
+        ! naive implementation
+        if( (ik<1 .or. ik>self%nrows) .or. (jk<1 .or. jk>self%ncols) ) then
+            val = ieee_value( 0._${k1}$ , ieee_quiet_nan)
+            return
+        end if
+        ik_ = ik; jk_ = jk
+        transpose = (self%storage == sparse_lower .and. ik > jk) .or. (self%storage == sparse_upper .and. ik < jk)
+        if(transpose) then ! allow extraction of symmetric elements
+            ik_ = jk; jk_ = ik
+        end if
+        do k = 1, self%nnz
+            if( ik_ == self%index(1,k) .and. jk_ == self%index(2,k) ) then
+                val = self%data(k)
+                return
+            end if
+        end do
+        val = zero_${s1}$
+    end function
+
+    subroutine add_value_coo_${s1}$(self,ik,jk,val)
+        class(COO_${s1}$_type), intent(inout) :: self
+        ${t1}$, intent(in) :: val
+        integer(ilp), intent(in) :: ik, jk
+        integer(ilp) :: k
+        ! naive implementation
+        do k = 1,self%nnz
+            if( ik == self%index(1,k) .and. jk == self%index(2,k) ) then
+                self%data(k) = self%data(k) + val
+                return
+            end if
+        end do
+    end subroutine
+
+    subroutine add_block_coo_${s1}$(self,ik,jk,val)
+        class(COO_${s1}$_type), intent(inout) :: self
+        ${t1}$, intent(in) :: val(:,:)
+        integer(ilp), intent(in) :: ik(:), jk(:)
+        integer(ilp) :: k, i, j
+        ! naive implementation
+        do k = 1, self%nnz
+            do i = 1, size(ik)
+                if( ik(i) /= self%index(1,k) ) cycle
+                do j = 1, size(jk)
+                    if( jk(j) /= self%index(2,k) ) cycle
+                    self%data(k) = self%data(k) + val(i,j)
+                end do
+            end do
+        end do
+    end subroutine
+
+    #:endfor
+
+    #:for k1, t1, s1 in (KINDS_TYPES)
+    pure ${t1}$ function at_value_csr_${s1}$(self,ik,jk) result(val)
+        class(CSR_${s1}$_type), intent(in) :: self
+        integer(ilp), intent(in) :: ik, jk
+        integer(ilp) :: k, ik_, jk_
+        logical :: transpose
+        ! naive implementation
+        if( (ik<1 .or. ik>self%nrows) .or. (jk<1 .or. jk>self%ncols) ) then
+            val = ieee_value( 0._${k1}$ , ieee_quiet_nan)
+            return
+        end if
+        ik_ = ik; jk_ = jk
+        transpose = (self%storage == sparse_lower .and. ik > jk) .or. (self%storage == sparse_upper .and. ik < jk)
+        if(transpose) then ! allow extraction of symmetric elements
+            ik_ = jk; jk_ = ik
+        end if
+        do k = self%rowptr(ik_), self%rowptr(ik_+1)-1
+            if( jk_ == self%col(k) ) then
+                val = self%data(k)
+                return
+            end if
+        end do
+        val = zero_${s1}$
+    end function
+
+    subroutine add_value_csr_${s1}$(self,ik,jk,val)
+        class(CSR_${s1}$_type), intent(inout) :: self
+        ${t1}$, intent(in) :: val
+        integer(ilp), intent(in) :: ik, jk
+        integer(ilp) :: k
+        ! naive implementation
+        do k = self%rowptr(ik), self%rowptr(ik+1)-1
+            if( jk == self%col(k) ) then
+                self%data(k) = self%data(k) + val
+                return
+            end if
+        end do
+    end subroutine
+
+    subroutine add_block_csr_${s1}$(self,ik,jk,val)
+        class(CSR_${s1}$_type), intent(inout) :: self
+        ${t1}$, intent(in) :: val(:,:)
+        integer(ilp), intent(in) :: ik(:), jk(:)
+        integer(ilp) :: k, i, j
+        ! naive implementation
+        do i = 1, size(ik)
+            do k = self%rowptr(ik(i)), self%rowptr(ik(i)+1)-1
+                do j = 1, size(jk)
+                    if( jk(j) == self%col(k) ) then
+                        self%data(k) = self%data(k) + val(i,j)
+                    end if
+                end do
+            end do
+        end do
+    end subroutine
+
+    #:endfor
+
+    #:for k1, t1, s1 in (KINDS_TYPES)
+    pure ${t1}$ function at_value_csc_${s1}$(self,ik,jk) result(val)
+        class(CSC_${s1}$_type), intent(in) :: self
+        integer(ilp), intent(in) :: ik, jk
+        integer(ilp) :: k, ik_, jk_
+        logical :: transpose
+        ! naive implementation
+        if( (ik<1 .or. ik>self%nrows) .or. (jk<1 .or. jk>self%ncols) ) then
+            val = ieee_value( 0._${k1}$ , ieee_quiet_nan)
+            return
+        end if
+        ik_ = ik; jk_ = jk
+        transpose = (self%storage == sparse_lower .and. ik > jk) .or. (self%storage == sparse_upper .and. ik < jk)
+        if(transpose) then ! allow extraction of symmetric elements
+            ik_ = jk; jk_ = ik
+        end if
+        do k = self%colptr(jk_), self%colptr(jk_+1)-1
+            if( ik_ == self%row(k) ) then
+                val = self%data(k)
+                return
+            end if
+        end do
+        val = zero_${s1}$
+    end function
+
+    subroutine add_value_csc_${s1}$(self,ik,jk,val)
+        class(CSC_${s1}$_type), intent(inout) :: self
+        ${t1}$, intent(in) :: val
+        integer(ilp), intent(in)  :: ik, jk
+        integer(ilp) :: k
+        ! naive implementation
+        do k = self%colptr(jk), self%colptr(jk+1)-1
+            if( ik == self%row(k) ) then
+                self%data(k) = self%data(k) + val
+                return
+            end if
+        end do
+    end subroutine
+
+    subroutine add_block_csc_${s1}$(self,ik,jk,val)
+        class(CSC_${s1}$_type), intent(inout) :: self
+        ${t1}$, intent(in) :: val(:,:)
+        integer(ilp), intent(in)  :: ik(:), jk(:)
+        integer(ilp) :: k, i, j
+        ! naive implementation
+        do j = 1, size(jk)
+            do k = self%colptr(jk(j)), self%colptr(jk(j)+1)-1
+                do i = 1, size(ik)
+                    if( ik(i) == self%row(k) ) then
+                        self%data(k) = self%data(k) + val(i,j)
+                    end if
+                end do
+            end do
+        end do
+    end subroutine
+
+    #:endfor
+
+    #:for k1, t1, s1 in (KINDS_TYPES)
+    pure ${t1}$ function at_value_ell_${s1}$(self,ik,jk) result(val)
+        class(ELL_${s1}$_type), intent(in) :: self
+        integer(ilp), intent(in) :: ik, jk
+        integer(ilp) :: k, ik_, jk_
+        logical :: transpose
+        ! naive implementation
+        if( (ik<1 .or. ik>self%nrows) .or. (jk<1 .or. jk>self%ncols) ) then
+            val = ieee_value( 0._${k1}$ , ieee_quiet_nan)
+            return
+        end if
+        ik_ = ik; jk_ = jk
+        transpose = (self%storage == sparse_lower .and. ik > jk) .or. (self%storage == sparse_upper .and. ik < jk)
+        if(transpose) then ! allow extraction of symmetric elements
+            ik_ = jk; jk_ = ik
+        end if
+        do k = 1 , self%K
+            if( jk_ == self%index(ik_,k) ) then
+                val = self%data(ik_,k)
+                return
+            end if
+        end do
+        val = zero_${s1}$
+    end function
+
+    subroutine add_value_ell_${s1}$(self,ik,jk,val)
+        class(ELL_${s1}$_type), intent(inout) :: self
+        ${t1}$, intent(in) :: val
+        integer(ilp), intent(in)  :: ik, jk
+        integer(ilp) :: k
+        ! naive implementation
+        do k = 1 , self%K
+            if( jk == self%index(ik,k) ) then
+                self%data(ik,k) = self%data(ik,k) + val
+                return
+            end if
+        end do
+    end subroutine
+
+    subroutine add_block_ell_${s1}$(self,ik,jk,val)
+        class(ELL_${s1}$_type), intent(inout) :: self
+        ${t1}$, intent(in) :: val(:,:)
+        integer(ilp), intent(in)  :: ik(:), jk(:)
+        integer(ilp) :: k, i, j
+        ! naive implementation
+        do k = 1 , self%K
+            do j = 1, size(jk)
+                do i = 1, size(ik)
+                    if( jk(j) == self%index(ik(i),k) ) then
+                        self%data(ik(i),k) = self%data(ik(i),k) + val(i,j)
+                    end if
+                end do
+            end do
+        end do
+    end subroutine
+
+    #:endfor
+
+    #:for k1, t1, s1 in (KINDS_TYPES)
+    pure ${t1}$ function at_value_sellc_${s1}$(self,ik,jk) result(val)
+        class(SELLC_${s1}$_type), intent(in) :: self
+        integer(ilp), intent(in) :: ik, jk
+        integer(ilp) :: k, ik_, jk_, idx
+        logical :: transpose
+        ! naive implementation
+        if( (ik<1 .or. ik>self%nrows) .or. (jk<1 .or. jk>self%ncols) ) then
+            val = ieee_value( 0._${k1}$ , ieee_quiet_nan)
+            return
+        end if
+        ik_ = ik; jk_ = jk
+        transpose = (self%storage == sparse_lower .and. ik > jk) .or. (self%storage == sparse_upper .and. ik < jk)
+        if(transpose) then ! allow extraction of symmetric elements
+            ik_ = jk; jk_ = ik
+        end if
+
+        idx = self%rowptr((ik_ - 1)/self%chunk_size + 1)
+        do k = 1, self%chunk_size
+            if ( jk_ == self%col(k,idx) )then
+                val = self%data(k,idx)
+                return
+            endif
+        end do
+        val = zero_${s1}$
+    end function
+
+    subroutine add_value_sellc_${s1}$(self,ik,jk,val)
+        class(SELLC_${s1}$_type), intent(inout) :: self
+        ${t1}$, intent(in) :: val
+        integer(ilp), intent(in)  :: ik, jk
+        integer(ilp) :: k, idx
+        ! naive implementation
+        idx = self%rowptr((ik - 1)/self%chunk_size + 1)
+        do k = 1, self%chunk_size
+            if ( jk == self%col(k,idx) )then
+                self%data(k,idx) = self%data(k,idx) + val
+                return
+            endif
+        end do
+    end subroutine
+
+    subroutine add_block_sellc_${s1}$(self,ik,jk,val)
+        class(SELLC_${s1}$_type), intent(inout) :: self
+        ${t1}$, intent(in) :: val(:,:)
+        integer(ilp), intent(in)  :: ik(:), jk(:)
+        integer(ilp) :: k, i, j, idx
+        ! naive implementation
+        do k = 1 , self%chunk_size
+            do j = 1, size(jk)
+                do i = 1, size(ik)
+                    idx = self%rowptr((ik(i) - 1)/self%chunk_size + 1)
+                    if( jk(j) == self%col(k,idx) ) then
+                        self%data(k,idx) = self%data(k,idx) + val(i,j)
+                    end if
+                end do
+            end do
+        end do
+    end subroutine
+
+    #:endfor
+    
+end module stdlib_sparse_kinds
\ No newline at end of file
diff --git a/src/stdlib_sparse_spmv.fypp b/src/stdlib_sparse_spmv.fypp
new file mode 100644
index 000000000..841c064e1
--- /dev/null
+++ b/src/stdlib_sparse_spmv.fypp
@@ -0,0 +1,527 @@
+#:include "common.fypp"
+#:set RANKS = range(1, 2+1)
+#:set R_KINDS_TYPES = list(zip(REAL_KINDS, REAL_TYPES, REAL_SUFFIX))
+#:set C_KINDS_TYPES = list(zip(CMPLX_KINDS, CMPLX_TYPES, CMPLX_SUFFIX))
+#:set KINDS_TYPES = R_KINDS_TYPES+C_KINDS_TYPES
+#! define ranks without parentheses
+#:def rksfx2(rank)
+#{if rank > 0}#${":," + ":," * (rank - 1)}$#{endif}#
+#:enddef
+!! The `stdlib_sparse_spmv` submodule provides matrix-vector product kernels.
+!!
+! This code was modified from https://github.com/jalvesz/FSPARSE by its author: Alves Jose
+module stdlib_sparse_spmv
+    use stdlib_sparse_constants
+    use stdlib_sparse_kinds
+    implicit none
+    private
+
+    !! Version experimental
+    !!
+    !! Applay the sparse matrix-vector product $$y = \alpha * op(M) * x + \beta * y $$
+    !! [Specifications](../page/specs/stdlib_sparse.html#spmv)
+    interface spmv
+        #:for k1, t1, s1 in (KINDS_TYPES)
+        #:for rank in RANKS
+        module procedure :: spmv_coo_${rank}$d_${s1}$
+        module procedure :: spmv_csr_${rank}$d_${s1}$
+        module procedure :: spmv_csc_${rank}$d_${s1}$
+        module procedure :: spmv_ell_${rank}$d_${s1}$
+        #:endfor
+        module procedure :: spmv_sellc_${s1}$
+        #:endfor
+    end interface
+    public :: spmv
+
+contains
+
+    !! spmv_coo
+    #:for k1, t1, s1 in (KINDS_TYPES)
+    #:for rank in RANKS
+    subroutine spmv_coo_${rank}$d_${s1}$(matrix,vec_x,vec_y,alpha,beta,op)
+        type(COO_${s1}$_type), intent(in) :: matrix
+        ${t1}$, intent(in)    :: vec_x${ranksuffix(rank)}$
+        ${t1}$, intent(inout) :: vec_y${ranksuffix(rank)}$
+        ${t1}$, intent(in), optional :: alpha
+        ${t1}$, intent(in), optional :: beta
+        character(1), intent(in), optional :: op
+        ${t1}$ :: alpha_, beta_
+        character(1) :: op_
+        integer(ilp) :: k, ik, jk
+
+        op_ = sparse_op_none; if(present(op)) op_ = op
+        alpha_ = one_${k1}$
+        if(present(alpha)) alpha_ = alpha
+        if(present(beta)) then
+            vec_y = beta * vec_y
+        else 
+            vec_y = zero_${s1}$
+        endif
+        associate( data => matrix%data, index => matrix%index, storage => matrix%storage, nnz => matrix%nnz )
+        select case(op_)
+        case(sparse_op_none)
+            if(storage == sparse_full) then
+                do concurrent (k = 1:nnz)
+                    ik = index(1,k)
+                    jk = index(2,k)
+                    vec_y(${rksfx2(rank-1)}$ik) = vec_y(${rksfx2(rank-1)}$ik) + alpha_*data(k) * vec_x(${rksfx2(rank-1)}$jk)
+                end do
+
+            else 
+                do concurrent (k = 1:nnz)
+                    ik = index(1,k)
+                    jk = index(2,k)
+                    vec_y(${rksfx2(rank-1)}$ik) = vec_y(${rksfx2(rank-1)}$ik) + alpha_*data(k) * vec_x(${rksfx2(rank-1)}$jk)
+                    if( ik==jk ) cycle
+                    vec_y(${rksfx2(rank-1)}$jk) = vec_y(${rksfx2(rank-1)}$jk) + alpha_*data(k) * vec_x(${rksfx2(rank-1)}$ik)
+                end do
+
+            end if
+        case(sparse_op_transpose)
+            if(storage == sparse_full) then
+                do concurrent (k = 1:nnz)
+                    jk = index(1,k)
+                    ik = index(2,k)
+                    vec_y(${rksfx2(rank-1)}$ik) = vec_y(${rksfx2(rank-1)}$ik) + alpha_*data(k) * vec_x(${rksfx2(rank-1)}$jk)
+                end do
+
+            else 
+                do concurrent (k = 1:nnz)
+                    jk = index(1,k)
+                    ik = index(2,k)
+                    vec_y(${rksfx2(rank-1)}$ik) = vec_y(${rksfx2(rank-1)}$ik) + alpha_*data(k) * vec_x(${rksfx2(rank-1)}$jk)
+                    if( ik==jk ) cycle
+                    vec_y(${rksfx2(rank-1)}$jk) = vec_y(${rksfx2(rank-1)}$jk) + alpha_*data(k) * vec_x(${rksfx2(rank-1)}$ik)
+                end do
+
+            end if
+        #:if t1.startswith('complex') 
+        case(sparse_op_hermitian)
+            if(storage == sparse_full) then
+                do concurrent (k = 1:nnz)
+                    jk = index(1,k)
+                    ik = index(2,k)
+                    vec_y(${rksfx2(rank-1)}$ik) = vec_y(${rksfx2(rank-1)}$ik) + alpha_*conjg(data(k)) * vec_x(${rksfx2(rank-1)}$jk)
+                end do
+
+            else 
+                do concurrent (k = 1:nnz)
+                    jk = index(1,k)
+                    ik = index(2,k)
+                    vec_y(${rksfx2(rank-1)}$ik) = vec_y(${rksfx2(rank-1)}$ik) + alpha_*conjg(data(k)) * vec_x(${rksfx2(rank-1)}$jk)
+                    if( ik==jk ) cycle
+                    vec_y(${rksfx2(rank-1)}$jk) = vec_y(${rksfx2(rank-1)}$jk) + alpha_*conjg(data(k)) * vec_x(${rksfx2(rank-1)}$ik)
+                end do
+
+            end if
+        #:endif
+        end select
+        end associate
+    end subroutine
+
+    #:endfor
+    #:endfor
+
+    !! spmv_csr
+    #:for k1, t1, s1 in (KINDS_TYPES)
+    #:for rank in RANKS
+    subroutine spmv_csr_${rank}$d_${s1}$(matrix,vec_x,vec_y,alpha,beta,op)
+        type(CSR_${s1}$_type), intent(in) :: matrix
+        ${t1}$, intent(in)    :: vec_x${ranksuffix(rank)}$
+        ${t1}$, intent(inout) :: vec_y${ranksuffix(rank)}$
+        ${t1}$, intent(in), optional :: alpha
+        ${t1}$, intent(in), optional :: beta
+        character(1), intent(in), optional :: op
+        ${t1}$ :: alpha_, beta_
+        character(1) :: op_
+        integer(ilp) :: i, j
+        #:if rank == 1
+        ${t1}$ :: aux, aux2
+        #:else
+        ${t1}$ :: aux(size(vec_x,dim=1)), aux2(size(vec_x,dim=1))
+        #:endif
+        
+        op_ = sparse_op_none; if(present(op)) op_ = op
+        alpha_ = one_${k1}$
+        if(present(alpha)) alpha_ = alpha
+        beta_ = zero_${k1}$
+        if(present(beta)) beta_ = beta
+
+        associate( data => matrix%data, col => matrix%col, rowptr => matrix%rowptr, &
+            & nnz => matrix%nnz, nrows => matrix%nrows, ncols => matrix%ncols, storage => matrix%storage )
+    
+            if( storage == sparse_full .and. op_==sparse_op_none ) then
+                do i = 1, nrows
+                    aux = zero_${k1}$
+                    do j = rowptr(i), rowptr(i+1)-1
+                        aux = aux + data(j) * vec_x(${rksfx2(rank-1)}$col(j))
+                    end do
+                    if(present(beta)) then
+                        vec_y(${rksfx2(rank-1)}$i) = beta_ * vec_y(${rksfx2(rank-1)}$i) + alpha_ * aux
+                    else
+                        vec_y(${rksfx2(rank-1)}$i) = alpha_ * aux
+                    end if
+                end do
+
+            else if( storage == sparse_full .and. op_==sparse_op_transpose ) then
+                if(present(beta)) then
+                    vec_y = beta * vec_y
+                else 
+                    vec_y = zero_${s1}$
+                endif
+                do i = 1, nrows
+                    aux = alpha_ * vec_x(${rksfx2(rank-1)}$i)
+                    do j = rowptr(i), rowptr(i+1)-1
+                        vec_y(${rksfx2(rank-1)}$col(j)) = vec_y(${rksfx2(rank-1)}$col(j)) + data(j) * aux
+                    end do
+                end do
+                
+            else if( storage == sparse_lower .and. op_/=sparse_op_hermitian )then
+                do i = 1 , nrows
+                    aux  = zero_${s1}$
+                    aux2 = alpha_ * vec_x(${rksfx2(rank-1)}$i)
+                    do j = rowptr(i), rowptr(i+1)-2
+                        aux = aux + data(j) * vec_x(${rksfx2(rank-1)}$col(j))
+                        vec_y(${rksfx2(rank-1)}$col(j)) = vec_y(${rksfx2(rank-1)}$col(j)) + data(j) * aux2
+                    end do
+                    aux = alpha_ * aux + data(j) * aux2
+
+                    if(present(beta)) then
+                        vec_y(${rksfx2(rank-1)}$i) = beta_ * vec_y(${rksfx2(rank-1)}$i) + aux
+                    else
+                        vec_y(${rksfx2(rank-1)}$i) = aux
+                    end if
+                end do
+
+            else if( storage == sparse_upper .and. op_/=sparse_op_hermitian )then
+                do i = 1 , nrows
+                    aux  = vec_x(${rksfx2(rank-1)}$i) * data(rowptr(i))
+                    aux2 = alpha_ * vec_x(${rksfx2(rank-1)}$i)
+                    do j = rowptr(i)+1, rowptr(i+1)-1
+                        aux = aux + data(j) * vec_x(${rksfx2(rank-1)}$col(j))
+                    end do
+                    if(present(beta)) then
+                        do j = rowptr(i)+1, rowptr(i+1)-1
+                            vec_y(${rksfx2(rank-1)}$col(j)) = beta_ * vec_y(${rksfx2(rank-1)}$col(j)) + data(j) * aux2
+                        end do
+                    else
+                        do j = rowptr(i)+1, rowptr(i+1)-1
+                            vec_y(${rksfx2(rank-1)}$col(j)) = data(j) * aux2
+                        end do
+                    end if
+                    vec_y(${rksfx2(rank-1)}$i) = vec_y(${rksfx2(rank-1)}$i) + alpha_ * aux
+                end do
+                
+            #:if t1.startswith('complex')
+            else if( storage == sparse_full .and. op_==sparse_op_hermitian) then
+                if(present(beta)) then
+                    vec_y = beta * vec_y
+                else 
+                    vec_y = zero_${s1}$
+                endif
+                do i = 1, nrows
+                    aux = alpha_ * vec_x(${rksfx2(rank-1)}$i)
+                    do j = rowptr(i), rowptr(i+1)-1
+                        vec_y(${rksfx2(rank-1)}$col(j)) = vec_y(${rksfx2(rank-1)}$col(j)) + conjg(data(j)) * aux
+                    end do
+                end do
+
+            else if( storage == sparse_lower .and. op_==sparse_op_hermitian )then
+                do i = 1 , nrows
+                    aux  = zero_${s1}$
+                    aux2 = alpha_ * vec_x(${rksfx2(rank-1)}$i)
+                    do j = rowptr(i), rowptr(i+1)-2
+                        aux = aux + conjg(data(j)) * vec_x(${rksfx2(rank-1)}$col(j))
+                        vec_y(${rksfx2(rank-1)}$col(j)) = vec_y(${rksfx2(rank-1)}$col(j)) + conjg(data(j)) * aux2
+                    end do
+                    aux = alpha_ * aux + conjg(data(j)) * aux2
+
+                    if(present(beta)) then
+                        vec_y(${rksfx2(rank-1)}$i) = beta_ * vec_y(${rksfx2(rank-1)}$i) + aux
+                    else
+                        vec_y(${rksfx2(rank-1)}$i) = aux
+                    end if
+                end do
+
+            else if( storage == sparse_upper .and. op_==sparse_op_hermitian )then
+                do i = 1 , nrows
+                    aux  = vec_x(${rksfx2(rank-1)}$i) * conjg(data(rowptr(i)))
+                    aux2 = alpha_ * vec_x(${rksfx2(rank-1)}$i)
+                    do j = rowptr(i)+1, rowptr(i+1)-1
+                        aux = aux + conjg(data(j)) * vec_x(${rksfx2(rank-1)}$col(j))
+                    end do
+                    if(present(beta)) then
+                        do j = rowptr(i)+1, rowptr(i+1)-1
+                            vec_y(${rksfx2(rank-1)}$col(j)) = beta_ * vec_y(${rksfx2(rank-1)}$col(j)) + conjg(data(j)) * aux2
+                        end do
+                    else
+                        do j = rowptr(i)+1, rowptr(i+1)-1
+                            vec_y(${rksfx2(rank-1)}$col(j)) = conjg(data(j)) * aux2
+                        end do
+                    end if
+                    vec_y(${rksfx2(rank-1)}$i) = vec_y(${rksfx2(rank-1)}$i) + alpha_ * aux
+                end do
+            #:endif
+            end if
+        end associate
+    end subroutine
+    
+    #:endfor
+    #:endfor
+
+    !! spmv_csc
+    #:for k1, t1, s1 in (KINDS_TYPES)
+    #:for rank in RANKS
+    subroutine spmv_csc_${rank}$d_${s1}$(matrix,vec_x,vec_y,alpha,beta,op)
+        type(CSC_${s1}$_type), intent(in) :: matrix
+        ${t1}$, intent(in)    :: vec_x${ranksuffix(rank)}$
+        ${t1}$, intent(inout) :: vec_y${ranksuffix(rank)}$
+        ${t1}$, intent(in), optional :: alpha
+        ${t1}$, intent(in), optional :: beta
+        character(1), intent(in), optional :: op
+        ${t1}$ :: alpha_, beta_
+        character(1) :: op_
+        integer(ilp) :: i, j
+        #:if rank == 1
+        ${t1}$ :: aux
+        #:else
+        ${t1}$ :: aux(size(vec_x,dim=1))
+        #:endif
+
+        op_ = sparse_op_none; if(present(op)) op_ = op
+        alpha_ = one_${k1}$
+        if(present(alpha)) alpha_ = alpha
+        if(present(beta)) then
+            vec_y = beta * vec_y
+        else 
+            vec_y = zero_${s1}$
+        endif
+
+        associate( data => matrix%data, colptr => matrix%colptr, row => matrix%row, &
+            & nnz => matrix%nnz, nrows => matrix%nrows, ncols => matrix%ncols, storage => matrix%storage )
+            if( storage == sparse_full .and. op_==sparse_op_none ) then
+                do concurrent(j=1:ncols)
+                    aux = alpha_ * vec_x(${rksfx2(rank-1)}$j)
+                    do i = colptr(j), colptr(j+1)-1
+                        vec_y(${rksfx2(rank-1)}$row(i)) = vec_y(${rksfx2(rank-1)}$row(i)) + data(i) * aux
+                    end do
+                end do
+
+            else if( storage == sparse_full .and. op_==sparse_op_transpose ) then
+                do concurrent(j=1:ncols)
+                    aux = zero_${k1}$
+                    do i = colptr(j), colptr(j+1)-1
+                        aux = aux + data(i) * vec_x(${rksfx2(rank-1)}$row(i))
+                    end do
+                    vec_y(${rksfx2(rank-1)}$j) = vec_y(${rksfx2(rank-1)}$j) + alpha_ * aux
+                end do
+
+            else if( storage == sparse_lower .and. op_/=sparse_op_hermitian )then
+                do j = 1 , ncols
+                    aux  = vec_x(${rksfx2(rank-1)}$j) * data(colptr(j))
+                    do i = colptr(j)+1, colptr(j+1)-1
+                        aux = aux + data(i) * vec_x(${rksfx2(rank-1)}$row(i))
+                        vec_y(${rksfx2(rank-1)}$row(i)) = vec_y(${rksfx2(rank-1)}$row(i)) + alpha_ * data(i) * vec_x(${rksfx2(rank-1)}$j)
+                    end do
+                    vec_y(${rksfx2(rank-1)}$j) = vec_y(${rksfx2(rank-1)}$j) + alpha_ * aux
+                end do
+
+            else if( storage == sparse_upper .and. op_/=sparse_op_hermitian )then
+                do j = 1 , ncols
+                    aux  = zero_${s1}$
+                    do i = colptr(j), colptr(i+1)-2
+                        aux = aux + data(i) * vec_x(${rksfx2(rank-1)}$j)
+                        vec_y(${rksfx2(rank-1)}$i) = vec_y(${rksfx2(rank-1)}$i) + alpha_ * data(i) * vec_x(${rksfx2(rank-1)}$row(i))
+                    end do
+                    aux = aux + data(colptr(j)) * vec_x(${rksfx2(rank-1)}$j)
+                    vec_y(${rksfx2(rank-1)}$j) = vec_y(${rksfx2(rank-1)}$j) + alpha_ * aux
+                end do
+
+            #:if t1.startswith('complex')
+            else if( storage == sparse_full .and. op_==sparse_op_hermitian ) then
+                do concurrent(j=1:ncols)
+                    aux = zero_${k1}$
+                    do i = colptr(j), colptr(j+1)-1
+                        aux = aux + conjg(data(i)) * vec_x(${rksfx2(rank-1)}$row(i))
+                    end do
+                    vec_y(${rksfx2(rank-1)}$j) = vec_y(${rksfx2(rank-1)}$j) + alpha_ * aux
+                end do
+
+            else if( storage == sparse_lower .and. op_==sparse_op_hermitian )then
+                do j = 1 , ncols
+                    aux  = vec_x(${rksfx2(rank-1)}$j) * conjg(data(colptr(j)))
+                    do i = colptr(j)+1, colptr(j+1)-1
+                        aux = aux + conjg(data(i)) * vec_x(${rksfx2(rank-1)}$row(i))
+                        vec_y(${rksfx2(rank-1)}$row(i)) = vec_y(${rksfx2(rank-1)}$row(i)) + alpha_ * conjg(data(i)) * vec_x(${rksfx2(rank-1)}$j)
+                    end do
+                    vec_y(${rksfx2(rank-1)}$j) = vec_y(${rksfx2(rank-1)}$j) + alpha_ * aux
+                end do
+
+            else if( storage == sparse_upper .and. op_==sparse_op_hermitian )then
+                do j = 1 , ncols
+                    aux  = zero_${s1}$
+                    do i = colptr(j), colptr(i+1)-2
+                        aux = aux + conjg(data(i)) * vec_x(${rksfx2(rank-1)}$j)
+                        vec_y(${rksfx2(rank-1)}$i) = vec_y(${rksfx2(rank-1)}$i) + alpha_ * conjg(data(i)) * vec_x(${rksfx2(rank-1)}$row(i))
+                    end do
+                    aux = aux + conjg(data(colptr(j))) * vec_x(${rksfx2(rank-1)}$j)
+                    vec_y(${rksfx2(rank-1)}$j) = vec_y(${rksfx2(rank-1)}$j) + alpha_ * aux
+                end do
+            #:endif
+            end if
+        end associate
+    end subroutine
+    
+    #:endfor
+    #:endfor
+
+    !! spmv_ell
+    #:for k1, t1, s1 in (KINDS_TYPES)
+    #:for rank in RANKS
+    subroutine spmv_ell_${rank}$d_${s1}$(matrix,vec_x,vec_y,alpha,beta,op)
+        type(ELL_${s1}$_type), intent(in) :: matrix
+        ${t1}$, intent(in)    :: vec_x${ranksuffix(rank)}$
+        ${t1}$, intent(inout) :: vec_y${ranksuffix(rank)}$
+        ${t1}$, intent(in), optional :: alpha
+        ${t1}$, intent(in), optional :: beta
+        character(1), intent(in), optional :: op
+        ${t1}$ :: alpha_, beta_
+        character(1) :: op_
+        integer(ilp) :: i, j, k
+        
+        op_ = sparse_op_none; if(present(op)) op_ = op
+        alpha_ = one_${k1}$
+        if(present(alpha)) alpha_ = alpha
+        if(present(beta)) then
+            vec_y = beta * vec_y
+        else 
+            vec_y = zero_${s1}$
+        endif
+        associate( data => matrix%data, index => matrix%index, MNZ_P_ROW => matrix%K, &
+            & nnz => matrix%nnz, nrows => matrix%nrows, ncols => matrix%ncols, storage => matrix%storage )
+            if( storage == sparse_full .and. op_==sparse_op_none ) then
+                do concurrent (i = 1:nrows, k = 1:MNZ_P_ROW)
+                    j = index(i,k)
+                    if(j>0) vec_y(${rksfx2(rank-1)}$i) = vec_y(${rksfx2(rank-1)}$i) + alpha_*data(i,k) * vec_x(${rksfx2(rank-1)}$j)
+                end do
+            else if( storage == sparse_full .and. op_==sparse_op_transpose ) then
+                do concurrent (i = 1:nrows, k = 1:MNZ_P_ROW)
+                    j = index(i,k)
+                    if(j>0) vec_y(${rksfx2(rank-1)}$j) = vec_y(${rksfx2(rank-1)}$j) + alpha_*data(i,k) * vec_x(${rksfx2(rank-1)}$i)
+                end do
+            else if( storage /= sparse_full .and. op_/=sparse_op_hermitian ) then
+                do concurrent (i = 1:nrows, k = 1:MNZ_P_ROW)
+                    j = index(i,k)
+                    if(j>0) then
+                        vec_y(${rksfx2(rank-1)}$i) = vec_y(${rksfx2(rank-1)}$i) + alpha_*data(i,k) * vec_x(${rksfx2(rank-1)}$j)
+                        if(i==j) cycle 
+                        vec_y(${rksfx2(rank-1)}$j) = vec_y(${rksfx2(rank-1)}$j) + alpha_*data(i,k) * vec_x(${rksfx2(rank-1)}$i)
+                    end if
+                end do
+            #:if t1.startswith('complex')
+            else if( storage == sparse_full .and. op_==sparse_op_hermitian ) then
+                do concurrent (i = 1:nrows, k = 1:MNZ_P_ROW)
+                    j = index(i,k)
+                    if(j>0) vec_y(${rksfx2(rank-1)}$j) = vec_y(${rksfx2(rank-1)}$j) + alpha_*conjg(data(i,k)) * vec_x(${rksfx2(rank-1)}$i)
+                end do
+            else if( storage /= sparse_full .and. op_==sparse_op_hermitian ) then
+                do concurrent (i = 1:nrows, k = 1:MNZ_P_ROW)
+                    j = index(i,k)
+                    if(j>0) then
+                        vec_y(${rksfx2(rank-1)}$i) = vec_y(${rksfx2(rank-1)}$i) + alpha_*conjg(data(i,k)) * vec_x(${rksfx2(rank-1)}$j)
+                        if(i==j) cycle 
+                        vec_y(${rksfx2(rank-1)}$j) = vec_y(${rksfx2(rank-1)}$j) + alpha_*conjg(data(i,k)) * vec_x(${rksfx2(rank-1)}$i)
+                    end if
+                end do
+            #:endif
+            end if
+        end associate
+    end subroutine
+    
+    #:endfor
+    #:endfor
+
+    !! spmv_sellc
+    #:set CHUNKS = [4,8,16]
+    #:for k1, t1, s1 in (KINDS_TYPES)
+    subroutine spmv_sellc_${s1}$(matrix,vec_x,vec_y,alpha,beta,op)
+        !! This algorithm was gracefully provided by Ivan Privec and adapted by Jose Alves
+        type(SELLC_${s1}$_type), intent(in) :: matrix
+        ${t1}$, intent(in)    :: vec_x(:)
+        ${t1}$, intent(inout) :: vec_y(:)
+        ${t1}$, intent(in), optional :: alpha
+        ${t1}$, intent(in), optional :: beta
+        character(1), intent(in), optional :: op
+        ${t1}$ :: alpha_, beta_
+        character(1) :: op_
+        integer(ilp) :: i, nz, rowidx, num_chunks, rm
+
+        op_ = sparse_op_none; if(present(op)) op_ = op
+        alpha_ = one_${s1}$
+        if(present(alpha)) alpha_ = alpha
+        if(present(beta)) then
+            vec_y = beta * vec_y
+        else 
+            vec_y = zero_${s1}$
+        endif
+        associate( data => matrix%data, ia => matrix%rowptr , ja => matrix%col, cs => matrix%chunk_size, &
+        &   nnz => matrix%nnz, nrows => matrix%nrows, ncols => matrix%ncols, storage => matrix%storage  )
+        num_chunks = nrows / cs
+        rm = nrows - num_chunks * cs
+        if( storage == sparse_full) then
+
+            select case(cs)
+            #:for chunk in CHUNKS
+            case(${chunk}$)
+                do i = 1, num_chunks
+                    nz = ia(i+1) - ia(i)
+                    rowidx = (i - 1)*${chunk}$ + 1 
+                    call chunk_kernel_${chunk}$(nz,data(:,ia(i)),ja(:,ia(i)),vec_x,vec_y(rowidx:))
+                end do
+            #:endfor
+            case default
+                print *, "error: chunk size not supported."
+                return
+            end select
+            
+            ! remainder
+            if(rm>0)then 
+                i = num_chunks + 1 
+                nz = ia(i+1) - ia(i)
+                rowidx = (i - 1)*cs + 1
+                call chunk_kernel_remainder(nz,cs,rm,data(:,ia(i)),ja(:,ia(i)),vec_x,vec_y(rowidx:))
+            end if
+
+        end if
+        end associate
+
+    contains
+    #:for chunk in CHUNKS
+    pure subroutine chunk_kernel_${chunk}$(nz,a,ja,x,y)
+        integer, value      :: nz
+        ${t1}$, intent(in)  :: a(${chunk}$,nz), x(*)
+        integer(ilp), intent(in) :: ja(${chunk}$,nz)
+        ${t1}$, intent(inout) :: y(${chunk}$)
+        integer :: j
+        do j = 1, nz
+            where(ja(:,j) > 0) y = y + alpha_ * a(:,j) * x(ja(:,j))               
+        end do
+    end subroutine
+    #:endfor
+
+    pure subroutine chunk_kernel_remainder(nz,cs,rm,a,ja,x,y)
+        integer, value      :: nz, cs, rm
+        ${t1}$, intent(in)  :: a(cs,nz), x(*)
+        integer(ilp), intent(in) :: ja(cs,nz)
+        ${t1}$, intent(inout) :: y(rm)
+        integer :: j
+        do j = 1, nz
+            where(ja(1:rm,j) > 0) y = y + alpha_ * a(1:rm,j) * x(ja(1:rm,j))               
+        end do
+    end subroutine  
+
+    end subroutine
+    
+    #:endfor
+    
+end module
\ No newline at end of file
diff --git a/test/linalg/CMakeLists.txt b/test/linalg/CMakeLists.txt
index f835872df..c3b3a721f 100644
--- a/test/linalg/CMakeLists.txt
+++ b/test/linalg/CMakeLists.txt
@@ -12,6 +12,7 @@ set(
   "test_linalg_qr.fypp"
   "test_linalg_svd.fypp"
   "test_linalg_matrix_property_checks.fypp"
+  "test_sparse_spmv.fypp"
 )
 fypp_f90("${fyppFlags}" "${fppFiles}" outFiles)
 
diff --git a/test/linalg/test_sparse_spmv.fypp b/test/linalg/test_sparse_spmv.fypp
new file mode 100644
index 000000000..838243505
--- /dev/null
+++ b/test/linalg/test_sparse_spmv.fypp
@@ -0,0 +1,404 @@
+#:include "common.fypp"
+#:set R_KINDS_TYPES = list(zip(REAL_KINDS, REAL_TYPES, REAL_SUFFIX))
+#:set KINDS_TYPES = R_KINDS_TYPES
+module test_sparse_spmv
+    use testdrive, only : new_unittest, unittest_type, error_type, check, skip_test
+    use stdlib_kinds, only: sp, dp, xdp, qp, int8, int16, int32, int64
+    use stdlib_sparse
+
+    implicit none
+
+contains
+
+
+    !> Collect all exported unit tests
+    subroutine collect_suite(testsuite)
+        !> Collection of tests
+        type(unittest_type), allocatable, intent(out) :: testsuite(:)
+
+        testsuite = [ &
+            new_unittest('coo', test_coo), &
+            new_unittest('coo2ordered', test_coo2ordered), &
+            new_unittest('csr', test_csr), &
+            new_unittest('csc', test_csc), &
+            new_unittest('ell', test_ell),  &
+            new_unittest('sellc', test_sellc),  &
+            new_unittest('symmetries', test_symmetries), &
+            new_unittest('diagonal', test_diagonal), &
+            new_unittest('add_get_values', test_add_get_values) &
+        ]
+    end subroutine
+
+    subroutine test_coo(error)
+        !> Error handling
+        type(error_type), allocatable, intent(out) :: error
+        #:for k1, t1, s1 in (KINDS_TYPES)
+        block
+            integer, parameter :: wp = ${k1}$
+            type(COO_${s1}$_type) :: COO
+            ${t1}$, allocatable :: dense(:,:)
+            ${t1}$, allocatable :: vec_x(:)
+            ${t1}$, allocatable :: vec_y1(:), vec_y2(:)
+
+            allocate( dense(4,5) , source = &
+                    reshape(real([9,4, 0,4, &
+                                  0,7, 8,0, &
+                                  0,0,-1,5, &
+                                  0,0, 8,6, &
+                                 -3,0, 0,0],kind=wp),[4,5]) )
+            
+            call dense2coo( dense , COO )
+            
+            allocate( vec_x(5)  , source = 1._wp )
+            allocate( vec_y1(4) , source = 0._wp )
+            allocate( vec_y2(4) , source = 0._wp )
+
+            vec_y1 = matmul( dense, vec_x )
+            
+            call check(error, all(vec_y1 == real([6,11,15,15],kind=wp)) )
+            if (allocated(error)) return
+
+            call spmv( COO, vec_x, vec_y2 )
+            call check(error, all(vec_y1 == vec_y2) )
+            if (allocated(error)) return
+
+            ! Test in-place transpose
+            vec_y1 = 1._wp
+            call spmv( COO, vec_y1, vec_x, op=sparse_op_transpose )
+            call check(error, all(vec_x == real([17,15,4,14,-3],kind=wp)) )
+            if (allocated(error)) return
+        end block
+        #:endfor
+    end subroutine
+
+    subroutine test_coo2ordered(error)
+        !> Error handling
+        type(error_type), allocatable, intent(out) :: error
+        type(COO_sp_type) :: COO
+        integer :: row(12), col(12)
+        real    :: data(12)
+
+        row = [1,1,1,2,2,3,2,2,2,3,3,4]
+        col = [2,3,4,3,4,4,3,4,5,4,5,5]
+        data = 1.0
+
+        call from_ijv(COO,row,col,data)
+
+        call coo2ordered(COO,sort_data=.true.)
+        call check(error, COO%nnz < 12 .and. COO%nnz == 9 )
+        if (allocated(error)) return
+
+        call check(error, all(COO%data==[1,1,1,2,2,1,2,1,1])  )
+        if (allocated(error)) return
+
+        call check(error, all(COO%index(1,:)==[1,1,1,2,2,2,3,3,4])  )
+        if (allocated(error)) return
+
+        call check(error, all(COO%index(2,:)==[2,3,4,3,4,5,4,5,5])  )
+        if (allocated(error)) return
+
+    end subroutine 
+
+    subroutine test_csr(error)
+        !> Error handling
+        type(error_type), allocatable, intent(out) :: error
+        #:for k1, t1, s1 in (KINDS_TYPES)
+        block
+            integer, parameter :: wp = ${k1}$
+            type(CSR_${s1}$_type) :: CSR
+            ${t1}$, allocatable :: vec_x(:)
+            ${t1}$, allocatable :: vec_y(:)
+
+            call CSR%malloc(4,5,10)
+            CSR%data(:)   = real([9,-3,4,7,8,-1,8,4,5,6],kind=wp)
+            CSR%col(:)    = [1,5,1,2,2,3,4,1,3,4]
+            CSR%rowptr(:) = [1,3,5,8,11]
+            
+            allocate( vec_x(5) , source = 1._wp )
+            allocate( vec_y(4) , source = 0._wp )
+            call spmv( CSR, vec_x, vec_y )
+            
+            call check(error, all(vec_y == real([6,11,15,15],kind=wp)) )
+            if (allocated(error)) return
+
+            ! Test in-place transpose
+            vec_y = 1._wp
+            call spmv( CSR, vec_y, vec_x, op=sparse_op_transpose )
+            call check(error, all(vec_x == real([17,15,4,14,-3],kind=wp)) )
+            if (allocated(error)) return
+        end block
+        #:endfor
+    end subroutine
+
+    subroutine test_csc(error)
+        !> Error handling
+        type(error_type), allocatable, intent(out) :: error
+        #:for k1, t1, s1 in (KINDS_TYPES)
+        block
+            integer, parameter :: wp = ${k1}$
+            type(CSC_${s1}$_type) :: CSC
+            ${t1}$, allocatable :: vec_x(:)
+            ${t1}$, allocatable :: vec_y(:)
+
+            call CSC%malloc(4,5,10)
+            CSC%data(:)   = real([9,4,4,7,8,-1,5,8,6,-3],kind=wp)
+            CSC%row(:)    = [1,2,4,2,3,3,4,3,4,1]
+            CSC%colptr(:) = [1,4,6,8,10,11]
+            
+            allocate( vec_x(5) , source = 1._wp )
+            allocate( vec_y(4) , source = 0._wp )
+            call spmv( CSC, vec_x, vec_y )
+            
+            call check(error, all(vec_y == real([6,11,15,15],kind=wp)) )
+            if (allocated(error)) return
+
+            ! Test in-place transpose
+            vec_y = 1._wp
+            call spmv( CSC, vec_y, vec_x, op=sparse_op_transpose )
+            call check(error, all(vec_x == real([17,15,4,14,-3],kind=wp)) )
+            if (allocated(error)) return
+        end block
+        #:endfor
+    end subroutine
+
+    subroutine test_ell(error)
+        !> Error handling
+        type(error_type), allocatable, intent(out) :: error
+        #:for k1, t1, s1 in (KINDS_TYPES)
+        block
+            integer, parameter :: wp = ${k1}$
+            type(ELL_${s1}$_type) :: ELL
+            ${t1}$, allocatable :: vec_x(:)
+            ${t1}$, allocatable :: vec_y(:)
+
+            call ELL%malloc(4,5,3)
+            ELL%data(1,1:3)   = real([9,-3,0],kind=wp)
+            ELL%data(2,1:3)   = real([4,7,0],kind=wp)
+            ELL%data(3,1:3)   = real([8,-1,8],kind=wp)
+            ELL%data(4,1:3)   = real([4,5,6],kind=wp)
+
+            ELL%index(1,1:3) = [1,5,0]
+            ELL%index(2,1:3) = [1,2,0]
+            ELL%index(3,1:3) = [2,3,4]
+            ELL%index(4,1:3) = [1,3,4]
+            
+            allocate( vec_x(5) , source = 1._wp )
+            allocate( vec_y(4) , source = 0._wp )
+            call spmv( ELL, vec_x, vec_y )
+            
+            call check(error, all(vec_y == real([6,11,15,15],kind=wp)) )
+            if (allocated(error)) return
+
+            ! Test in-place transpose
+            vec_y = 1._wp
+            call spmv( ELL, vec_y, vec_x, op=sparse_op_transpose )
+            call check(error, all(vec_x == real([17,15,4,14,-3],kind=wp)) )
+            if (allocated(error)) return
+        end block
+        #:endfor
+        
+    end subroutine
+
+    subroutine test_sellc(error)
+        !> Error handling
+        type(error_type), allocatable, intent(out) :: error
+        #:for k1, t1, s1 in (KINDS_TYPES)
+        block
+            integer, parameter :: wp = ${k1}$
+            type(SELLC_${s1}$_type) :: SELLC
+            type(CSR_${s1}$_type)   :: CSR
+            ${t1}$, allocatable :: vec_x(:)
+            ${t1}$, allocatable :: vec_y(:)
+            integer :: i
+
+            call CSR%malloc(6,6,17)
+            !           1   2   3   4   5   6 
+            CSR%col = [ 1,      3,  4,         &
+                            2,  3,      5,  6, &
+                        1,  2,  3,             &
+                                        5,  6, &
+                                    4,  5,     &
+                            2,          5,  6]
+            CSR%rowptr = [1,4,8,11,13,15,18]
+            CSR%data = [(real(i,kind=wp),i=1,CSR%nnz)] 
+            
+            call csr2sellc(CSR,SELLC,4)
+
+            allocate( vec_x(6) , source = 1._wp )
+            allocate( vec_y(6) , source = 0._wp )
+
+            call spmv( SELLC, vec_x, vec_y )
+            
+            call check(error, all(vec_y == real([6,22,27,23,27,48],kind=wp)) )
+            if (allocated(error)) return
+        end block
+        #:endfor
+    end subroutine
+
+    subroutine test_symmetries(error)
+        !> Error handling
+        type(error_type), allocatable, intent(out) :: error
+        #:for k1, t1, s1 in (KINDS_TYPES)
+        block
+            integer, parameter :: wp = ${k1}$
+            type(COO_${s1}$_type) :: COO
+            type(CSR_${s1}$_type) :: CSR
+            type(ELL_${s1}$_type) :: ELL
+            ${t1}$, allocatable :: dense(:,:)
+            ${t1}$, allocatable :: vec_x(:)
+            ${t1}$, allocatable :: vec_y1(:), vec_y2(:), vec_y3(:), vec_y4(:)
+
+            allocate( vec_x(4)  , source = 1._wp )
+            allocate( vec_y1(4) , source = 0._wp )
+            allocate( vec_y2(4) , source = 0._wp )
+            allocate( vec_y3(4) , source = 0._wp )
+            allocate( vec_y4(4) , source = 0._wp )
+
+            allocate( dense(4,4) , source = &
+                    reshape(real([1,0,0,0, &
+                                  2,1,0,0, &
+                                  0,2,1,0,&
+                                  0,0,2,1],kind=wp),[4,4]) )
+
+            call dense2coo( dense , COO )
+            COO%storage = sparse_upper
+            call coo2csr(COO, CSR)
+            call csr2ell(CSR, ELL)
+
+            dense(2,1) = 2._wp; dense(3,2) = 2._wp; dense(4,3) = 2._wp
+            vec_y1 = matmul( dense, vec_x )
+            call check(error, all(vec_y1 == [3,5,5,3]) )
+            if (allocated(error)) return
+
+            call spmv( COO , vec_x, vec_y2 )
+            call check(error, all(vec_y1 == vec_y2) )
+            if (allocated(error)) return
+
+            call spmv( CSR , vec_x, vec_y3 )
+            call check(error, all(vec_y1 == vec_y3) )
+            if (allocated(error)) return
+
+            call spmv( ELL , vec_x, vec_y4 )
+            call check(error, all(vec_y1 == vec_y4) )
+            if (allocated(error)) return
+        end block
+        #:endfor
+    end subroutine
+
+    subroutine test_diagonal(error)
+        !> Error handling
+        type(error_type), allocatable, intent(out) :: error
+        #:for k1, t1, s1 in (KINDS_TYPES)
+        block
+            integer, parameter :: wp = ${k1}$
+            ${t1}$, allocatable :: dense(:,:)
+            type(COO_${s1}$_type) :: COO
+            type(CSR_${s1}$_type) :: CSR
+            type(CSC_${s1}$_type) :: CSC
+            type(ELL_${s1}$_type) :: ELL
+            ${t1}$, allocatable :: diagonal(:)
+
+            allocate( dense(4,4) , source = &
+                    reshape(real([1,0,0,5, &
+                                  0,2,0,0, &
+                                  0,6,3,0,&
+                                  0,0,7,4],kind=wp),[4,4]) )
+
+            call diag(dense,diagonal)
+            call check(error, all(diagonal == [1,2,3,4]) )
+            if (allocated(error)) return
+
+            diagonal = 0.0
+            call dense2coo( dense , COO )
+            call diag( COO , diagonal )
+            call check(error, all(diagonal == [1,2,3,4]) )
+            if (allocated(error)) return
+
+            diagonal = 0.0
+            call coo2csr( COO, CSR )
+            call diag( CSR , diagonal )
+            call check(error, all(diagonal == [1,2,3,4]) )
+            if (allocated(error)) return
+
+            diagonal = 0.0
+            call coo2csc( COO, CSC )
+            call diag( CSC , diagonal )
+            call check(error, all(diagonal == [1,2,3,4]) )
+            if (allocated(error)) return
+        end block
+        #:endfor
+    end subroutine
+
+    subroutine test_add_get_values(error)
+        !> Error handling
+        type(error_type), allocatable, intent(out) :: error
+        #:for k1, t1, s1 in (KINDS_TYPES)
+        block
+            integer, parameter :: wp = ${k1}$
+            real(wp) :: dense(5,5), mat(2,2)
+            type(COO_${s1}$_type) :: COO
+            type(CSR_${s1}$_type) :: CSR
+            ${t1}$:: err
+            integer :: i, j, locdof(2)
+
+            mat(:,1) = [1,2]; mat(:,2) = [2,1]
+            dense = 0._wp
+            do i = 0, 3
+                dense(1+i:2+i,1+i:2+i) = dense(1+i:2+i,1+i:2+i) + mat
+            end do
+
+            call dense2coo(dense,COO)
+            call coo2csr(COO,CSR)
+
+            CSR%data = 0._wp
+
+            do i = 0, 3
+                locdof(1:2) = [1+i,2+i] 
+                call CSR%add(locdof,locdof,mat)
+            end do
+
+            call check(error, all(CSR%data == COO%data) )
+            if (allocated(error)) return
+
+            err = 0._wp
+            do i = 1, 5
+                do j = 1, 5
+                    err = err + abs(dense(i,j) - CSR%at(i,j))
+                end do
+            end do
+            err = err / 5*5
+
+            call check(error, err <= epsilon(0._wp) )
+            if (allocated(error)) return
+        end block
+        #:endfor
+    end subroutine
+
+end module
+
+
+program tester
+    use, intrinsic :: iso_fortran_env, only : error_unit
+    use testdrive, only : run_testsuite, new_testsuite, testsuite_type
+    use test_sparse_spmv, only : collect_suite
+    implicit none
+    integer :: stat, is
+    type(testsuite_type), allocatable :: testsuites(:)
+    character(len=*), parameter :: fmt = '("#", *(1x, a))'
+
+    stat = 0
+
+    testsuites = [ &
+        new_testsuite("sparse", collect_suite) &
+        ]
+
+    do is = 1, size(testsuites)
+        write(error_unit, fmt) "Testing:", testsuites(is)%name
+        call run_testsuite(testsuites(is)%collect, error_unit, stat)
+    end do
+
+    if (stat > 0) then
+        write(error_unit, '(i0, 1x, a)') stat, "test(s) failed!"
+        error stop
+    end if
+end program