diff --git a/benchmarks/src/Makefile b/benchmarks/src/Makefile
deleted file mode 100644
index ce403d231..000000000
--- a/benchmarks/src/Makefile
+++ /dev/null
@@ -1,103 +0,0 @@
-# USE G++-10 for baremetal testing, G++-12 for Docker use
-# Check if running within a Docker container
-IS_DOCKER := $(shell test -f /.dockerenv && echo 1)
-
-ifeq ($(IS_DOCKER),1)
-    # If running within a Docker container, use g++-12
-    CC := g++-12
-else
-    # If not running within a Docker container, use g++-10
-    CC := g++-10
-endif
-
-CCSTD	= -std=c++20 -march=native -lpthread
-CCFLGS	= -pg -g -Wall -Wextra -pedantic -Wno-unused-result -Wparentheses -Wsign-compare
-
-NVCC 	= nvcc
-
-PROJDIR = $(realpath $(CURDIR))
-SRCDIR	= $(PROJDIR)/src
-CPP		= $(shell find $(PROJDIR)/src -name '*.cpp')
-MAIN	= benchmark.cpp
-BIN 	= benchsys
-OBJDIR  = $(PROJDIR)/obj
-
-# check if nvcc (CUDA compiler) is available
-ifeq ($(shell command -v nvcc -V 2> /dev/null),)
-	HAS_NVCC 	=
-	OBJ_CUDA	=
-
-# has NVCC
-else
-	# set CUDA defs
-	CUDA    	= $(shell find $(PROJDIR)/src -name '*.cu')
-	HAS_NVCC 	= -D__HAS_NVCC__
-	NVCC_FLGS	= -pg -g -Wno-deprecated-gpu-targets
-	OBJ_CUDA	= $(patsubst $(SRCDIR)/%.cu,$(OBJDIR)/%.o,$(CUDA))
-
-$(OBJDIR)/%.o: $(SRCDIR)/%.cu
-	@mkdir -p $(@D)
-	$(NVCC) -c ${HAS_NVCC} $(NVCC_FLGS) $< -o $@
-
-endif
-OBJ_CPP = $(patsubst $(SRCDIR)/%.cpp,$(OBJDIR)/%.o,$(CPP))
-OBJ 	= $(OBJ_CPP) $(OBJ_CUDA)
-
-$(OBJDIR)/%.o: $(SRCDIR)/%.cpp
-	@mkdir -p $(@D)
-	$(CC) -c  $(HAS_NVCC) $(CCSTD) $(CCFLGS) $< -o $@
-
-$(BIN): $(OBJ)
-ifeq ($(HAS_NVCC), -D__HAS_NVCC__)
-	$(NVCC) $(MAIN) $(HAS_NVCC) $(NVCC_FLGS) $^ -o $@
-else
-	$(CC) $(MAIN) $(HAS_NVCC) $(CCSTD) $(CCFLGS) $^ -o $@
-endif
-
-run_bench:
-	./${BIN} -b
-
-run_daemon:
-	./${BIN} -d
-
-gprof:
-	gprof ${BIN} gmon.out > gprof.txt
-
-callgrind:
-	valgrind --tool=callgrind ./${BIN}
-	#callgrind_annotate callgrind.out
-
-flamegraph:
-	sudo perf record -g ./${BIN}
-	sudo perf script | sudo ../FlameGraph/stackcollapse-perf.pl | sudo ../FlameGraph/flamegraph.pl > rpi.svg
-
-docker_img:
-	sudo docker build . -t benchmarks
-
-docker_run:
-	sudo docker run --privileged -it benchmarks:latest /bin/bash
-
-gpu_docker_img:
-	sudo nvidia-docker build . -t benchmarks
-
-gpu_docker_run:
-	sudo nvidia-docker run --privileged -it benchmarks:latest /bin/bash
-
-avail_macros:
-	gcc -dM -E - </dev/null
-
-install:
-	mv ${BIN} /usr/bin
-
-uninstall:
-	rm -f /usr/bin/${BIN}
-
-clean:
-	rm -f ${BIN}
-	rm -f *.txt
-	rm -f *.out
-	rm -f *.data*
-	rm -f *perf
-	rm -f *.csv
-	rm -f *.o
-	rm -rf obj/
diff --git a/benchmarks/src/lib/montecarlo.cuh b/benchmarks/src/lib/montecarlo.cuh
deleted file mode 100644
index 2fd691398..000000000
--- a/benchmarks/src/lib/montecarlo.cuh
+++ /dev/null
@@ -1,22 +0,0 @@
-/**
- * Monte Carlo GPU methods header
- */
-#ifndef __MONTECARLO_CUH__
-#define __MONTECARLO_CUH__
-
-#define TRIALS_PER_THREAD 4096
-#define BLOCKS 256
-#define THREADS 256
-#define PI 3.1415926535 // known value of pi
-
-#include <cuda.h>
-#include <curand_kernel.h>
-
-/**
- * @brief Driver for CUDA Monte Carlo method
- * @param dev
- * @param devStates
- */
-void run_gpu_monte_carlo(float *dev, curandState *devStates);
-
-#endif
diff --git a/benchmarks/src/lib/montecarlo.hpp b/benchmarks/src/lib/montecarlo.hpp
deleted file mode 100644
index 206d53a75..000000000
--- a/benchmarks/src/lib/montecarlo.hpp
+++ /dev/null
@@ -1,11 +0,0 @@
-#ifndef MONTECARLO_HPP
-#define MONTECARLO_HPP
-#define PI 3.1415926535
-
-/**
- * @brief Monte Carlo method for predicting the number representing pi
- * TODO
- */
-double monte_carlo(int total_trials);
-
-#endif
diff --git a/benchmarks/src/lib/primes.cuh b/benchmarks/src/lib/primes.cuh
deleted file mode 100644
index 71de36061..000000000
--- a/benchmarks/src/lib/primes.cuh
+++ /dev/null
@@ -1,26 +0,0 @@
-/**
- * Primality test GPU methods header
- */
-#ifndef __PRIMES_CUH__
-#define __PRIMES_CUH__
-
-#define TRIALS_PER_THREAD 4096
-#define BLOCKS 256
-#define THREADS 256
-#define PI 3.1415926535 // known value of pi
-
-#include <cuda.h>
-#include <curand_kernel.h>
-
-/**
- * @brief Driver for CUDA Miller Rabin method
- * @param
- * @param
- */
-void run_gpu_miller_rabin(const uint32_t *input,
-                          bool *output,
-                          int iters,
-                          int threads,
-                          int blocks);
-
-#endif
diff --git a/benchmarks/src/lib/primes.hpp b/benchmarks/src/lib/primes.hpp
deleted file mode 100644
index 38dfac6e9..000000000
--- a/benchmarks/src/lib/primes.hpp
+++ /dev/null
@@ -1,50 +0,0 @@
-#ifndef PRIMES_HPP
-#define PRIMES_HPP
-#include <cstdint>
-
-/**
- * @brief Utility modulo multiply function finding (a * b) % m
- *
- * @param a first operand of the multiplication
- * @param b second operand of the multiplication
- * @param m modulus used to limit the result
- *
- * @return result of (a * b) % m
- */
-uint32_t mod_mul(uint32_t a, uint32_t b, uint32_t m);
-
-/**
- * @brief Utility modulo power function finding (a ^ b) % m
- *
- * @param a base for exponentiation
- * @param b exponent
- * @param m modulus used to limit the result
- *
- * @return result of (a^b) % m
- */
-uint32_t mod_pow(uint32_t a, uint32_t b, uint32_t m);
-
-/**
- * @brief witness utility function for miller_rabin checks if a given witness
- * 'a' indicates that 'n' is composite
- *
- * @param n number being tested for primality
- * @param d value 'd' computed from 'n' during the primality test
- * @param a witness value being tested
- * @param s number of times 'd' can be divided by 2 (s = log2(d))
- *
- * @return true/false (bool)
- */
-bool witness(uint32_t n, uint32_t d, uint32_t a, uint32_t s);
-
-/**
- * @brief Modified primes algorithm
- *
- * @param n target number (uint64_t)
- * @param iters iterations determine accuracy (uint64_t)
- *
- * return true/false (bool)
- */
-bool miller_rabin(uint32_t n, uint32_t iters);
-
-#endif
diff --git a/benchmarks/src/lib/threadpool.hpp b/benchmarks/src/lib/threadpool.hpp
deleted file mode 100644
index 89254f52c..000000000
--- a/benchmarks/src/lib/threadpool.hpp
+++ /dev/null
@@ -1,181 +0,0 @@
-#ifndef THREADS_HPP
-#define THREADS_HPP
-
-#include <condition_variable>
-#include <functional>
-#include <future>
-#include <mutex>
-#include <queue>
-#include <thread>
-#include <vector>
-
-class ThreadPool {
-  private:
-    // VECTOR of threads to execute tasks
-    std::vector<std::thread> workers;
-    // QUEUE of tasks to be executed
-    std::queue<std::function<void()>> tasks;
-    // MUTEX synchronizing access to the QUEUE of tasks
-    std::mutex queue_mutex;
-    // CONDITIONAL to notify waiting threads when queue gets populated
-    std::condition_variable condition;
-    // BOOL indicating if ThreadPool should stop execution
-    bool stop;
-
-  public:
-    /**
-     * @brief Default constructor that creates a ThreadPool with the number
-     * of threads
-     */
-    ThreadPool() : ThreadPool(std::thread::hardware_concurrency()) {
-    }
-
-    /**
-     * @brief Constructs a ThreadPool with a given number of worker
-     * threads to dispatch functions.
-     * @param numThreads The number of worker threads to be created.
-     * @details Constructs a ThreadPool object with the specified number of
-     * worker threads.
-     */
-    explicit ThreadPool(int numThreads) : stop(false) {
-
-        // traverse through the number of threads specified
-        for (int i = 0; i < numThreads; ++i) {
-            // add a new thread to the vector storing workers using lambda
-            // function
-            workers.emplace_back([this] {
-                for (;;) {
-                    // worker thread creates task object that holds next task to
-                    // be executed
-                    std::function<void()> task_obj;
-
-                    // this "symbolizes" the critical section of the TheadPool
-                    // class
-                    {
-                        // worker thread locks queue_mutex
-                        std::unique_lock<std::mutex> lock(this->queue_mutex);
-                        // wait on conditional_variable (ThreadPool stop OR
-                        // queued task), wait() locks/unlocks based on condition
-                        // result
-                        this->condition.wait(lock, [this] {
-                            return this->stop || !this->tasks.empty();
-                        });
-                        // based on stop OR awaiting tasks, return from the
-                        // thread
-                        if (this->stop && this->tasks.empty()) {
-                            return;
-                        }
-
-                        // if above isnt met, move first task in TASKS queue to
-                        // the task object to transfer ownership
-                        task_obj = std::move(this->tasks.front());
-
-                        // pop the handed off task to make room for a new one.
-                        // only ONE thread should remove a task from the queue
-                        // at a time
-                        this->tasks.pop();
-                    }
-
-                    // EXECUTE THE HANDED OFF TASK
-                    task_obj();
-                }
-            });
-        }
-    }
-
-    /**
-     * @brief Enqueues a task to the thread pool.
-     * @tparam F Type of the function to be enqueued.
-     * @tparam Args Variadic template parameter pack of the arguments
-     * passed to the function.
-     * @param f Function to be enqueued.
-     * @param args Arguments passed to the function
-     * @return std::future<typename std::result_of<F(Args...)>::type> A
-     * future object that will contain the result of the function
-     * execution.
-     * @throw std::runtime_error If the ThreadPool has already been
-     * stopped.
-     */
-    template <class F, class... Args>
-    auto enqueue(F &&f, Args &&...args)
-        -> std::future<typename std::result_of<F(Args...)>::type> {
-
-        // this is the return type of the passed in function
-        using return_type = typename std::result_of<F(Args...)>::type;
-        // * SHARED POINTER to PACKAGED TASK used to store the passed in i
-        //      function + its arguments
-        // * std::bind used to create function object binded to the
-        //      function `f` + its args to the packaged tasks
-        // * std::forward used for forwarding an argument to another
-        //      function
-        auto task = std::make_shared<std::packaged_task<return_type()>>(
-            std::bind(std::forward<F>(f), std::forward<Args>(args)...));
-
-        // the FUTURE obj retrieves the return value of the function passed in
-        std::future<return_type> res = task->get_future();
-        {
-            // aquire lock on queue_mutex for synchronization
-            std::unique_lock<std::mutex> lock(queue_mutex);
-            // check if threadpool stop is initiated
-            if (stop) {
-                throw std::runtime_error("enqueue on stopped ThreadPool");
-            }
-            // add a task using emplace to the queue as a lambda that calls the
-            // packaged task
-            tasks.emplace([task]() { (*task)(); });
-        } // once this is hit, unique_lock is out of scope & mutex is
-          // automatically unlocked
-        // notify one waiting thread of one new task added to the queue
-        condition.notify_one();
-        // the return is the future object
-        return res;
-    }
-
-    ~ThreadPool() {
-        {
-            // lock queue_mutex & set stop to true
-            std::unique_lock<std::mutex> lock(queue_mutex);
-            stop = true;
-        }
-        // unblock all threads
-        condition.notify_all();
-        // treaverse threads and join
-        for (std::thread &worker : workers) {
-            worker.join();
-        }
-    }
-};
-
-/**
- * @brief A class that provides a function to dispatch a function call to a
- * thread pool and return a future object for obtaining the result.
- */
-class ThreadDispatch {
-  public:
-    /**
-     * @brief Dispatches a function call to a ThreadPool and returns a
-     * future object for obtaining the result.
-     * @tparam Function The type of the function to be dispatched.
-     * @tparam Args The types of the arguments to be passed to the
-     * function.
-     * @param pool The ThreadPool object to which the function call is
-     * dispatched.
-     * @param func The function to be dispatched.
-     * @param args The arguments to be passed to the function.
-     * @return A future object for obtaining the result of the dispatched
-     * function call.
-     */
-    template <typename Function, typename... Args>
-    auto dispatch(ThreadPool &pool, Function &&func, Args &&...args)
-        -> std::future<typename std::result_of<Function(Args...)>::type> {
-
-        // enqueue the function call to the thread pool
-        auto result = pool.enqueue(std::forward<Function>(func),
-                                   std::forward<Args>(args)...);
-
-        // return the future object to get the result later
-        return result;
-    }
-};
-
-#endif
diff --git a/benchmarks/src/src/fourier.cpp b/benchmarks/src/src/fourier.cpp
deleted file mode 100644
index 769d5ede5..000000000
--- a/benchmarks/src/src/fourier.cpp
+++ /dev/null
@@ -1,40 +0,0 @@
-/**
- * Fast & Discrete Fourier Transforms
- */
-
-#include <cmath>
-#include <complex>
-#include <iostream>
-#include <vector>
-
-// compute the Discrete Fourier Transform (DFT) of a sequence
-std::vector<std::complex<double>>
-DFT(const std::vector<std::complex<double>> &x) {
-    int N = x.size(); // Size of the input sequence
-    std::vector<std::complex<double>> X(N);
-
-    for (int k = 0; k < N; k++) {
-        X[k] = 0;
-        for (int n = 0; n < N; n++) {
-            std::complex<double> exp_term =
-                std::polar(1.0, -2 * M_PI * k * n / N);
-            X[k] += x[n] * exp_term;
-        }
-    }
-
-    return X;
-}
-
-/*
-int main() {
-    std::vector<std::complex<double>> input_signal =
-        {1.0, 2.0, 3.0, 4.0, 9.0, 1.0, 20.0, 11.0};
-    std::vector<std::complex<double>> result = DFT(input_signal);
-
-    // Print the DFT result
-    for (int k = 0; k < result.size(); k++) {
-        std::cout << "X[" << k << "] = " << result[k] << std::endl;
-    }
-
-    return 0;
-}*/
diff --git a/benchmarks/src/src/montecarlo.cpp b/benchmarks/src/src/montecarlo.cpp
deleted file mode 100644
index 014394962..000000000
--- a/benchmarks/src/src/montecarlo.cpp
+++ /dev/null
@@ -1,41 +0,0 @@
-/**
- * A Monte Carlo method is really a problem dealing with random distribution
- * and random sampling related technique. In this case and within this file
- * we use a "Monte Carlo Method" from what is talked about in the video below
- * specifically when the author talks about predicting the number pi π using
- * random sampling by "dropping marbles" into a square and determining how
- * many are dropped within 1/4 of the square. The number of successes divided
- * by the number of total attempts will our prediction of pi π
- *
- * https://www.youtube.com/watch?v=7ESK5SaP-bc
- */
-#include "../lib/montecarlo.hpp"
-#include <chrono>
-#include <cstdint>
-#include <iostream>
-#include <random>
-
-#define PI 3.1415926535
-
-double monte_carlo(int total_trials) {
-    int successes = 0;
-    double x, y;
-
-    // Initialize a random number generator
-    // TODO set a seed to be used for this PRNG and in montecarlo.cu
-    std::random_device rd;
-    std::mt19937 gen(rd());
-    std::uniform_real_distribution<double> dis(0.0, 1.0);
-
-    for (int i = 0; i < total_trials; i++) {
-        x = dis(gen);
-        y = dis(gen);
-        // Check if the point is inside the unit circle
-        successes += (x * x + y * y <= 1.0);
-    }
-
-    // Estimate pi
-    double predicted_pi = 4.0 * static_cast<double>(successes) / total_trials;
-
-    return predicted_pi;
-}
diff --git a/benchmarks/src/src/montecarlo_gpu.cu b/benchmarks/src/src/montecarlo_gpu.cu
deleted file mode 100644
index 0681563d1..000000000
--- a/benchmarks/src/src/montecarlo_gpu.cu
+++ /dev/null
@@ -1,40 +0,0 @@
-#include "../lib/montecarlo.cuh"
-#include <cuda.h>
-#include <curand_kernel.h>
-#include <math.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <time.h>
-
-__global__ void gpu_monte_carlo(float *estimate, curandState *states) {
-    unsigned int thread_id = threadIdx.x + blockDim.x * blockIdx.x;
-    int points_in_circle = 0;
-    float x, y;
-    // Initialize CURAND
-    curand_init(1234, thread_id, 0, &states[thread_id]);
-
-    for (int i = 0; i < TRIALS_PER_THREAD; i++) {
-        x = curand_uniform(&states[thread_id]);
-        y = curand_uniform(&states[thread_id]);
-        points_in_circle +=
-            (x * x + y * y <= 1.0f); // count if x & y is in the circle.
-    }
-    estimate[thread_id] = 4.0f * points_in_circle /
-                          (float)TRIALS_PER_THREAD; // return estimate of pi
-}
-
-float host_monte_carlo(long trials) {
-    float x, y;
-    long points_in_circle = 0;
-    for (long i = 0; i < trials; i++) {
-        x = rand() / (float)RAND_MAX;
-        y = rand() / (float)RAND_MAX;
-        points_in_circle += (x * x + y * y <= 1.0f);
-    }
-    return 4.0f * points_in_circle / trials;
-}
-
-void run_gpu_monte_carlo(float *dev, curandState *devStates) {
-
-    gpu_monte_carlo<<<BLOCKS, THREADS>>>(dev, devStates);
-}
diff --git a/benchmarks/src/src/mtx.cpp b/benchmarks/src/src/mtx.cpp
deleted file mode 100644
index e69de29bb..000000000
diff --git a/benchmarks/src/src/mtx.cu b/benchmarks/src/src/mtx.cu
deleted file mode 100644
index e69de29bb..000000000
diff --git a/benchmarks/src/src/primes.cpp b/benchmarks/src/src/primes.cpp
deleted file mode 100644
index 128d6383a..000000000
--- a/benchmarks/src/src/primes.cpp
+++ /dev/null
@@ -1,71 +0,0 @@
-#include "../lib/primes.hpp"
-#include <iostream>
-#include <random>
-#include <vector>
-
-uint32_t mod_mul(uint32_t a, uint32_t b, uint32_t m) {
-    uint32_t res = 0;
-    while (b > 0) {
-        if (b & 1) {
-            res = (res + a) % m;
-        }
-        a = (2 * a) % m;
-        b >>= 1;
-    }
-    return res;
-}
-
-uint32_t mod_pow(uint32_t a, uint32_t b, uint32_t m) {
-    uint32_t res = 1;
-
-    a %= m;
-    while (b > 0) {
-        if (b & 1) {
-            res = mod_mul(res, a, m);
-        }
-        a = mod_mul(a, a, m);
-        b >>= 1;
-    }
-    return res;
-}
-
-bool witness(uint32_t n, uint32_t d, uint32_t a, uint32_t s) {
-    uint32_t x = mod_pow(a, d, n);
-
-    // likely prime, return false
-    if (x == 1 || x == n - 1) {
-        return false;
-    }
-
-    for (uint32_t r = 1; r < s; r++) {
-        x = mod_mul(x, x, n);
-        if (x == n - 1) {
-            return false;
-        }
-    }
-    return true;
-}
-
-bool miller_rabin(uint32_t n, uint32_t iters = 10) {
-    if (n < 2) {
-        return false;
-    }
-    if (n == 2 || n == 3) {
-        return true;
-    }
-    if (n % 2 == 0) {
-        return false;
-    }
-    uint32_t d = n - 1, s = 0;
-    while (d % 2 == 0) {
-        d /= 2;
-        s++;
-    }
-    for (uint32_t i = 0; i < iters; i++) {
-        uint32_t a = rand() % (n - 3) + 2;
-        if (witness(n, d, a, s)) {
-            return false;
-        }
-    }
-    return true;
-}
diff --git a/benchmarks/src/src/primes_gpu.cu b/benchmarks/src/src/primes_gpu.cu
deleted file mode 100644
index aac8c329d..000000000
--- a/benchmarks/src/src/primes_gpu.cu
+++ /dev/null
@@ -1,96 +0,0 @@
-#include "../lib/primes.cuh"
-#include <ctime>
-#include <curand_kernel.h>
-#include <iostream>
-#include <random>
-#include <vector>
-
-__device__ uint32_t gpu_mod_mul(uint32_t a, uint32_t b, uint32_t m) {
-    uint32_t res = 0;
-    while (b > 0) {
-        if (b & 1) {
-            res = (res + a) % m;
-        }
-        a = (2 * a) % m;
-        b >>= 1;
-    }
-    return res;
-}
-
-__device__ uint32_t gpu_mod_pow(uint32_t a, uint32_t b, uint32_t m) {
-    uint32_t res = 1;
-
-    a %= m;
-    while (b > 0) {
-        if (b & 1) {
-            res = gpu_mod_mul(res, a, m);
-        }
-        a = gpu_mod_mul(a, a, m);
-        b >>= 1;
-    }
-    return res;
-}
-
-__device__ bool gpu_witness(uint32_t n, uint32_t d, uint32_t a, uint32_t s) {
-    uint32_t x = gpu_mod_pow(a, d, n);
-
-    if (x == 1 || x == n - 1) {
-        return false;
-    }
-
-    for (uint32_t r = 1; r < s; r++) {
-        x = gpu_mod_mul(x, x, n);
-        if (x == n - 1) {
-            return false;
-        }
-    }
-    return true;
-}
-
-__global__ void
-miller_rabin_kernel(const uint32_t *input, bool *output, int iters) {
-    int idx = blockIdx.x * blockDim.x + threadIdx.x;
-
-    uint32_t num = input[idx];
-    uint32_t d = num - 1, s = 0;
-
-    if (num < 2) {
-        output[idx] = false;
-        return;
-    }
-    if (num == 2 || num == 3) {
-        output[idx] = true;
-        return;
-    }
-    if (num % 2 == 0) {
-        output[idx] = false;
-        return;
-    }
-
-    while (d % 2 == 0) {
-        d /= 2;
-        s++;
-    }
-
-    curandState state;
-    curand_init(clock64(), idx, 0, &state);
-
-    for (int i = 0; i < iters; i++) {
-        uint32_t a = curand(&state) % (num - 3) + 2;
-        if (gpu_witness(num, d, a, s)) {
-            output[idx] = false;
-            return;
-        }
-    }
-
-    output[idx] = true;
-}
-
-void run_gpu_miller_rabin(const uint32_t *input,
-                          bool *output,
-                          int iters,
-                          int threads,
-                          int blocks) {
-
-    miller_rabin_kernel<<<blocks, threads>>>(input, output, iters);
-}
diff --git a/benchmarks/src/src/sys.cpp b/benchmarks/src/sys.cpp
similarity index 100%
rename from benchmarks/src/src/sys.cpp
rename to benchmarks/src/sys.cpp
diff --git a/benchmarks/src/lib/sys.hpp b/benchmarks/src/sys.hpp
similarity index 100%
rename from benchmarks/src/lib/sys.hpp
rename to benchmarks/src/sys.hpp
diff --git a/experiment/blas1.c b/experiment/blas/blas1.c
similarity index 100%
rename from experiment/blas1.c
rename to experiment/blas/blas1.c
diff --git a/experiment/blas2.c b/experiment/blas/blas2.c
similarity index 100%
rename from experiment/blas2.c
rename to experiment/blas/blas2.c
diff --git a/experiment/csv.cpp b/experiment/csv/csv.cpp
similarity index 100%
rename from experiment/csv.cpp
rename to experiment/csv/csv.cpp
diff --git a/experiment/csv2.cpp b/experiment/csv/csv2.cpp
similarity index 100%
rename from experiment/csv2.cpp
rename to experiment/csv/csv2.cpp
diff --git a/experiment/csv3.cpp b/experiment/csv/csv3.cpp
similarity index 100%
rename from experiment/csv3.cpp
rename to experiment/csv/csv3.cpp
diff --git a/experiment/csv_intrin.cpp b/experiment/csv/csv_intrin.cpp
similarity index 100%
rename from experiment/csv_intrin.cpp
rename to experiment/csv/csv_intrin.cpp
diff --git a/experiment/sigproc.cpp b/experiment/sigproc/sigproc.cpp
similarity index 100%
rename from experiment/sigproc.cpp
rename to experiment/sigproc/sigproc.cpp
diff --git a/experiment/sigproc.hpp b/experiment/sigproc/sigproc.hpp
similarity index 100%
rename from experiment/sigproc.hpp
rename to experiment/sigproc/sigproc.hpp
diff --git a/experiment/t_sigproc.cpp b/experiment/sigproc/t_sigproc.cpp
similarity index 100%
rename from experiment/t_sigproc.cpp
rename to experiment/sigproc/t_sigproc.cpp