Add files via upload

vanislekahuna · Aug 19, 2019 · 9a1f73f · 9a1f73f
1 parent 89e7a0a
commit 9a1f73f
Show file tree

Hide file tree

Showing 13 changed files with 782 additions and 2 deletions.
diff --git a/.gitattributes b/.gitattributes
@@ -0,0 +1,2 @@
+# Auto detect text files and perform LF normalization
+* text=auto
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,5 @@
+
+class.csv
+output/Figure_1.png
+output/Figure_2.png
+output/Figure_3.png
diff --git a/README.md b/README.md
@@ -1,2 +1,33 @@
-# bcws-psu-research
-research software for image data integration and analysis developed in collaboration between CITZ DSAB and BCWS PSU, for wildfire managment decision-making support 
+# bcws
+ bcws image analysis project
+
+multispectral image viewer and clustering algorithm included
+
+## requirements:
+python 2 (for image viewer) and gnu/g++ (for discretization algorithm)
+tested on ubuntu
+
+## how to:
+1) view the sample input data:
+
+    python read_multispectral.py sentinel2_cut.bin
+
+2) compile and run the clustering algorithm:
+
+    ./run.sh
+
+## Results
+### r,g,b <- bands 12, 9, 3
+Taking K, the number of k-nearest neighbours to be: 222, 444 and 666 resp.:
+![alt text](output/b_12-9-3_k222-444-666.gif)
+
+### y = log(n_segments), x = number of k-nearest neighbours 
+![alt text](output/plot.png)
+
+Hypothetically for a one-level analysis (non-hierarchical) taking K=100 is highly information-preserving choice, as the curve seems to depart strongly from monotonicity after K=200..
+
+..hence K=200 or so provides efficiency without excessive info. loss
+### output formats
+The clustering algorithm output is provided in two formats:
+    Cluster labels in IEEE 32-bit Floating-point format (0 is unlabelled, labels start at 1)
+    Image where the pixels are colored according to the cluster "centres" to which they're assigned
diff --git a/ansicolor.h b/ansicolor.h
@@ -0,0 +1,27 @@
+#pragma once
+#ifndef HEADER_ANSICOLOR_H
+#define HEADER_ANSICOLOR_H
+#define KNRM "\x1B[0m" //normal
+#define KBLK "\x1B[30m" //black
+#define KRED "\x1B[31m" //red
+#define KGRN "\x1B[32m" //green
+#define KYEL "\x1B[33m" //yellow
+#define KBLU "\x1B[34m" //blue
+#define KMAG "\x1B[35m" //magenta
+#define KCYN "\x1B[36m" //cyan
+#define KWHT "\x1B[37m" //white
+#define KBLD "\x1B[1m" //bold
+#define KRES "\x1B[0m" //reset
+#define KITA "\x1B[3m" //italics
+#define KUND "\x1B[4m" //underline
+#define KSTR "\x1B[9m" //strikethrough
+#define KINV "\x1B[7m" //inverseON
+#define BBLK "\x1B[30m" //black
+#define BRED "\x1B[31m" //red
+#define BGRN "\x1B[32m" //green
+#define BYEL "\x1B[33m" //yellow
+#define BBLU "\x1B[34m" //blue
+#define BMAG "\x1B[35m" //magenta
+#define BCYN "\x1B[36m" //cyan
+#define BWHT "\x1B[37m" //white
+#endif
diff --git a/ansicolor.py b/ansicolor.py
@@ -0,0 +1,32 @@
+#!/usr/bin/env python
+'''  ansi color codes for posix terminal'''
+
+color = {"KNRM": "\\x1B[0m",  #normal
+         "KBLK": "\\x1B[30m", #black
+         "KRED": "\\x1B[31m", #red
+         "KGRN": "\\x1B[32m", #green
+         "KYEL": "\\x1B[33m", #yellow
+         "KBLU": "\\x1B[34m", #blue
+         "KMAG": "\\x1B[35m", #magenta
+         "KCYN": "\\x1B[36m", #cyan
+         "KWHT": "\\x1B[37m", #white
+         "KBLD": "\\x1B[1m",  #bold
+         "KRES": "\\x1B[0m",  #reset
+         "KITA": "\\x1B[3m",  #italics
+         "KUND": "\\x1B[4m",  #underline
+         "KSTR": "\\x1B[9m",  #strikethrough
+         "KINV": "\\x1B[7m",  #inverseON
+         "BBLK": "\\x1B[30m", #black
+         "BRED": "\\x1B[31m", #red
+         "BGRN": "\\x1B[32m", #green
+         "BYEL": "\\x1B[33m", #yellow
+         "BBLU": "\\x1B[34m", #blue
+         "BMAG": "\\x1B[35m", #magenta
+         "BCYN": "\\x1B[36m", #cyan
+         "BWHT": "\\x1B[37m"} #white
+
+for c in color:
+  exec(c + ' = "'  + color[c] + '"')
+
+if __name__ == '__main__':
+  print KMAG + "ansicolor" + KYEL + "." + KGRN + "py" + KNRM
diff --git a/cluster.cpp b/cluster.cpp
@@ -0,0 +1,286 @@
+/* 20190818 ultra-minimalist reinterpretation of kgc-2010 algorithm (IGARSS 2010) */
+#include"misc.h"
+#include<cmath>
+#include<pthread.h>
+
+class f_idx{
+  public: // float, index tuple object
+  float d;
+  unsigned int idx;
+  f_idx(float d_ = 0., unsigned int idx_ = 0){
+    d = d_;
+    idx = idx_;
+  }
+  f_idx(const f_idx &a){
+    d = a.d;
+    idx = a.idx;
+  }
+};
+
+bool operator<(const f_idx& a, const f_idx&b){
+  return a.d > b.d; // priority_queue max first: we want min first
+}
+
+// read header file
+void hread(str hfn, int & nrow, int & ncol, int & nband){
+  str line;
+  vector<str> words;
+  ifstream hf(hfn);
+  nrow = ncol = nband = 0;
+  if(!hf.is_open()) err(str("failed to open header file: ") + hfn);
+  while(getline(hf, line)){
+    words = split(line, '=');
+    if(words.size() == 2){
+      strip(words[0]);
+      str w(words[0]);
+      int n = atoi(words[1].c_str());
+      if(w == str("samples")) ncol = n;
+      if(w == str("lines")) nrow = n;
+      if(w == str("bands")) nband = n;
+    }
+  }
+  hf.close();
+}
+
+float * falloc(size_t nf){
+  return (float *) alloc(nf * (size_t)sizeof(float));
+}
+
+// read binary file
+float * bread(str bfn, int nrow, int ncol, int nband){
+  FILE * f = fopen(bfn.c_str(), "rb");
+  size_t nf = (size_t)nrow * (size_t)ncol * (size_t)nband;
+  float * dat = falloc(nf);
+  size_t nr = fread(dat, nf * (size_t)sizeof(float), 1, f);
+  if(nr != 1) err("failed to read data");
+  fclose(f);
+  return dat;
+}
+
+pthread_mutex_t print_mutex;
+
+void cprint(str s){
+  pthread_mutex_lock(&print_mutex);
+  cout << s << endl;
+  pthread_mutex_unlock(&print_mutex);
+}
+
+size_t kmax, k_use;
+pthread_attr_t attr; // specify threads joinable
+pthread_mutex_t next_j_mutex;
+size_t next_j;
+size_t np;
+int nrow, ncol, nband;
+float * dat;
+
+float * dmat_d;// = falloc(np * kmax);
+unsigned int * dmat_i;// = (unsigned int *)alloc(np * (size_t)kmax * (size_t)sizeof(unsigned int));
+vector<unsigned int> top_i;
+
+float * rho;
+unsigned int * label;
+unsigned int next_label;
+
+// thread worker function, incl. sorted distance matrix row calculation
+void * dmat_threadfun(void * arg){
+  unsigned int i;
+  long unsigned int my_next_j;
+  long k = (long)arg;
+  cprint(str("dmat_threadfun(") + std::to_string(k) + str(")"));
+  float d, df;
+  unsigned int pi;
+  unsigned int my_i = 0;
+  unsigned int u;
+
+  while(1){
+    // try to pick up a job
+    pthread_mutex_lock(&next_j_mutex);
+    my_next_j = next_j ++; // index of data this thread should pick up if it can
+    pthread_mutex_unlock(&next_j_mutex);
+
+    if(my_next_j > np -1){
+      cprint(str("\texit thread ") + to_string(k));
+      return(NULL);
+    }
+
+    if(my_next_j % 37 == 0){
+      float pct = 100. * (float)next_j / (float) np;
+      cprint(str(" worker: ") + to_string(k) + str(" pickup: ") + to_string(my_next_j)
+      + str(" %") + to_string(pct));
+    }
+    priority_queue<f_idx> pq;
+    for0(i, np){
+      if(i != my_next_j){
+        d = 0.;
+        for(u = 0; u < nband; u++){
+          pi = np * u;
+          df = dat[pi + i] - dat[pi + my_next_j];
+          d += df * df;
+        }
+        pq.push(f_idx(d, i)); //my_next_j));
+      }
+    }
+    //dmat row is sorted
+    for0(i, kmax){
+      f_idx x(pq.top());
+      unsigned int ki = (my_next_j * kmax) + i;
+      dmat_d[ki] = x.d;
+      dmat_i[ki] = x.idx;
+      pq.pop();
+    }
+    while(pq.size() != 0) pq.pop();
+    my_i ++;
+  }
+}
+
+unsigned int top(unsigned int j){
+  unsigned int i, ki, ni;
+  if(label[j] > 0) return label[j];
+  else{
+    float rho_max = rho[j];
+    unsigned int max_i = j;
+
+    for0(i, k_use){
+      ki = (j * kmax) + i;
+      ni = dmat_i[ki];
+      if(rho[ni] > rho_max){
+        rho_max = rho[ni];
+        max_i = ni;
+      }
+    }
+    if(max_i != j){
+      return top(max_i);
+    }
+    else{
+      label[j] = next_label ++;
+      top_i.push_back(j);
+      // printf("next_label %ld np %ld\n", (long int)next_label, (long int)np);
+      return label[j];
+    }
+  }
+}
+
+int main(int argc, char** argv){
+
+  kmax = 2222;
+
+  if(argc < 2) err("cluster [bin file name. hdr file must also be present");
+  str bfn(argv[1]);
+  //int nrow, ncol, nband;
+  str hfn(split(bfn, '.')[0] + str(".hdr"));
+  hread(hfn, nrow, ncol, nband);
+  printf("nrow %d ncol %d nband %d\n", nrow, ncol, nband);
+
+  dat = bread(bfn, nrow, ncol, nband);
+
+  np = nrow * ncol;
+  printf("np %d\n", np);
+  printf("(np^2 - n) / 2=%f\n", (((float)np * (float)np) - (float)np) / 2.);
+
+  // scale the data first?
+
+  dmat_d = falloc(np * kmax);
+  dmat_i = (unsigned int *)alloc(np * (size_t)kmax * (size_t)sizeof(unsigned int));
+
+  if(fsize("dmat.d") < nrow * ncol * kmax * sizeof(float)){
+
+    //----------------------------------------
+    next_j = 0; // put a lock on this variable
+
+    int numCPU = sysconf(_SC_NPROCESSORS_ONLN);
+    cout << "Number of cores: " << numCPU << endl;
+
+    // mutex setup
+    pthread_mutex_init(&print_mutex, NULL);
+    pthread_mutex_init(&next_j_mutex, NULL);
+
+    // make the threads joinable
+    pthread_attr_init(&attr);
+    pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
+
+    // allocate threads
+    pthread_t * my_pthread = new pthread_t[numCPU];
+    unsigned int j;
+    for0(j, numCPU){
+      long k = j;
+      pthread_create(&my_pthread[j], &attr, dmat_threadfun, (void *) k);
+    }
+
+    // wait for threads to finish
+    for0(j, numCPU) pthread_join(my_pthread[j], NULL);
+    cout << "end dmat_calc()" << endl;
+
+    FILE * f;
+    f = fopen("dmat.d", "wb");
+    fwrite(dmat_d, np * kmax * sizeof(float), 1, f);
+    fclose(f);
+    f = fopen("dmat.i", "wb");
+    fwrite(dmat_i, np * kmax * sizeof(unsigned int), 1, f);
+    fclose(f);
+
+  }
+  else{
+    FILE * f = fopen("dmat.d", "rb");
+    fread(dmat_d, np * kmax * sizeof(float), 1, f);
+    fclose(f);
+    f = fopen("dmat.i", "rb");
+    fread(dmat_i, np * kmax * sizeof(unsigned int), 1, f);
+    fclose(f);
+  }
+
+  rho = (float *)alloc(np * kmax * sizeof(float));
+  label = (unsigned int *) alloc(np * sizeof(unsigned int));
+
+  FILE * f = fopen("class.csv", "wb");
+  fprintf(f, "k_use,n_classes");
+  fclose(f);
+
+  for(k_use = 1; k_use <= kmax; k_use++){
+    top_i.clear();
+    if(k_use > kmax) err("kuse > kmax");
+    //printf("density estimation..\n");
+    unsigned int i, j;
+    for0(i, np){
+      float d_avg = 0.;
+      for0(j, k_use){
+        d_avg += dmat_d[i * kmax + j];
+      }
+      rho[i] = 1. / d_avg;
+    }
+
+    //printf("hill climbing..\n");
+    top_i.push_back(0); // null is self-top
+    next_label = 1;
+    for0(i, np) label[i] = 0; // default label: unlabeled
+
+    // do the clustering
+    for0(i, np) label[i] = top(i);
+
+    printf("%ld %ld\n", (long int)k_use, (long int)(next_label-1));
+    f = fopen("nclass.csv", "ab");
+    fprintf(f, "\n%ld,%ld", (long int)k_use, (long int)(next_label-1));
+    fclose(f);
+
+    // outputs
+    f = fopen((str("label_") + to_string(k_use)).c_str(), "wb");
+    float * label_float = falloc(np);
+    for0(i, np) label_float[i] = (float)label[i];
+    fwrite(label_float, np *sizeof(float), 1, f);
+    free(label_float);
+    fclose(f);
+
+    f = fopen((str("out_") + to_string(k_use) + str(".bin")).c_str(), "wb");
+    int u;
+    float df;
+    for0(u, nband){
+      for0(i, np){
+        unsigned int ti = top_i[label[i]];
+        unsigned int pi = np * u;
+        df = dat[pi + ti];
+        fwrite(&df, sizeof(float), 1, f);
+      }
+    }
+    fclose(f);
+  }
+  return 0;
+}
diff --git a/compile.sh b/compile.sh
@@ -0,0 +1,2 @@
+# compile the clustering method
+g++ -w -O3 -march=native -o cluster.exe cluster.cpp misc.cpp -lpthread
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		# Auto detect text files and perform LF normalization
		* text=auto
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		# compile the clustering method
		g++ -w -O3 -march=native -o cluster.exe cluster.cpp misc.cpp -lpthread