From 1cb4f1e217442b7b88dba654087d1d949c679e1d Mon Sep 17 00:00:00 2001 From: David Frantz Date: Fri, 26 Jan 2024 14:54:08 +0100 Subject: [PATCH 01/23] 1st draft sample size computation --- rstats/force-sample-size.r | 98 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) create mode 100644 rstats/force-sample-size.r diff --git a/rstats/force-sample-size.r b/rstats/force-sample-size.r new file mode 100644 index 00000000..a368b3e0 --- /dev/null +++ b/rstats/force-sample-size.r @@ -0,0 +1,98 @@ +#!/usr/bin/env Rscript + +# load libraries #################################################### +library(dplyr) + + +# input ############################################################# +args <- commandArgs(trailingOnly = TRUE) +n_args <- 3 + +if (length(args) != n_args) { + c( + "\nWrong input!\n", + " 1: path_hist\n", + " 2: file_output\n" + ) %>% + stop() +} + + +library(dplyr) + +standard_error <- 0.01 +minsize <- 175 + +pixel_counts <- data.frame( + value = 0:2, + count = c( + 1868242749, + 496472240, + 502851998 + ) +) + +users_accuracy <- data.frame( + value = 0:2, + UA = c( + 0.9, + 0.9, + 0.9 + ) +) + +df <- users_accuracy %>% + inner_join( + pixel_counts, + by = "value" + ) + +if (nrow(df) != nrow(pixel_counts)){ + "input tables do not match\n" + stop() +} + +df <- df %>% + mutate(proportion = count / sum(count)) %>% + mutate(stdev = sqrt(UA*(1-UA))) %>% + mutate(WS = proportion * stdev) + +number <- (sum(df$WS)/standard_error)**2 +number + +df <- df %>% + mutate(equal = round(number / nrow(df))) %>% + mutate(proportional = round(number * proportion)) %>% + mutate(compromise = NA) + +if (min(df$proportional) < minsize){ + + rare <- df %>% + filter(proportional < minsize) %>% + mutate(compromise = minsize) + + n_rare <- sum(rare$compromise) + + big <- df %>% + filter(proportional >= minsize) %>% + mutate(compromise = (number-n_rare) * proportion) + + if (any(big$compromise < minsize)){ + c( + "compromising the sample allocation failed...\n", + "either decrease minsize or adjust input parameters to \n", + "have more samples overall." + ) + } + + df <- rbind(rare, big) + +} + +df %>% + mutate(deviation = (compromise - proportional) / proportional * 100) + +write.csv(df, file_output, row.names = FALSE, quote = FALSE) + + + From 93e99bf9733ffe1a2f1e1ee1289d1803fefe143a Mon Sep 17 00:00:00 2001 From: David Frantz Date: Sat, 27 Jan 2024 12:42:44 +0100 Subject: [PATCH 02/23] added license --- src/aux-level/_hist.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/aux-level/_hist.c b/src/aux-level/_hist.c index 802fc967..c4e4d992 100644 --- a/src/aux-level/_hist.c +++ b/src/aux-level/_hist.c @@ -3,7 +3,7 @@ This file is part of FORCE - Framework for Operational Radiometric Correction for Environmental monitoring. -Copyright (C) 2013-2022 David Frantz +Copyright (C) 2013-2024 David Frantz FORCE is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by From b198df75096398c64f4262641ed7153faeed3878 Mon Sep 17 00:00:00 2001 From: David Frantz Date: Sat, 27 Jan 2024 12:42:59 +0100 Subject: [PATCH 03/23] getopt --- rstats/force-sample-size.r | 132 ++++++++++++++++++++++++++++++++++--- 1 file changed, 122 insertions(+), 10 deletions(-) diff --git a/rstats/force-sample-size.r b/rstats/force-sample-size.r index a368b3e0..60a795a7 100644 --- a/rstats/force-sample-size.r +++ b/rstats/force-sample-size.r @@ -1,24 +1,136 @@ #!/usr/bin/env Rscript +# This file is part of FORCE - Framework for Operational Radiometric +# Correction for Environmental monitoring. +# +# Copyright (C) 2013-2024 David Frantz +# +# FORCE is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# FORCE is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with FORCE. If not, see . + + # load libraries #################################################### library(dplyr) +library(getopt) # input ############################################################# -args <- commandArgs(trailingOnly = TRUE) -n_args <- 3 -if (length(args) != n_args) { - c( - "\nWrong input!\n", - " 1: path_hist\n", - " 2: file_output\n" - ) %>% - stop() +# usage function in same style as other FORCE tools, +# do not use getop's built-in for consistency +usage <- function(exit){ + + message <- c( + sprintf( + "Usage: %s [-h] [-v] [-i] [-e error] [-s min_size] [-o output-file] -c count-file -u user_acc-file\n", + get_Rscript_filename() + ), + "\n", + " -h = show this help\n", + " -v = show version\n", + " -i = show program's purpose\n", + "\n", + " -e error = standard error, defaults to 0.01\n", + "\n", + " -s min_size = minimum sample size per class\n", + "\n", + " -o output-file = output file path with extension,\n", + " defaults to './sample-size.csv'\n", + "\n", + " -c count-file = csv table with pixel counts per class\n", + "\n", + " -u user_acc-file = csv table with expected user accuracy per class\n", + "\n", + + ) + + cat( + message, + file = if (exit == 0) stdout else stderr() + ) + + quit( + save = "no", + status = exit + ) + } +version <- function() { + cat("Printing function not implemented yet. Sorry.\n") + quit( + save = "no", + status = exit + ) +} + +info <- function() { + cat("Suggest sample size for validation of a classification.\n") + quit( + save = "no", + status = exit + ) +} + +mandatory_missing <- function(argument) { + cat( + sprintf("%s is missing!\n", argument), + file = stderr() + ) + usage(1) +} + +file_existing <- function(path) { + if (!file.exist(path)){ + cat( + sprintf("file %s does not exist\n", path), + file = stderr() + ) + usage(1) + } +} + +spec <- matrix( + c( + "help", "h", 0, "logical", + "version", "v", 0, "logical", + "info", "i", 0, "logical", + "error", "e", 2, "numeric", + "min_size", "s", 2, "integer", + "counts", "c", 1, "character", + "user_acc", "u", 1, "character", + "output", "o", 1, "character" + ), + byrow = TRUE, + ncol = 4 +) + +opt <- getopt(spec) + +if (!is.null(opt$help)) usage() +if (!is.null(opt$info)) info() +if (!is.null(opt$version)) version() + +if (is.null(opt$counts)) mandatory_missing("count-file") +if (is.null(opt$user_acc)) mandatory_missing("user_acc-file") + +file_existing(opt$counts) +file_existing(opt$user_acc) + +if (is.null(opt$error)) opt$error <- 0.01 +if (is.null(opt$min_size)) opt$min_size <- 50 +if (is.null(opt$output)) opt$output <- "sample-size.csv" -library(dplyr) standard_error <- 0.01 minsize <- 175 From 0c90f65832e731f23888de802974fa6d56b6aba5 Mon Sep 17 00:00:00 2001 From: David Frantz Date: Sat, 27 Jan 2024 16:26:59 +0100 Subject: [PATCH 04/23] tidied up --- rstats/force-sample-size.r | 123 ++++++++++++++++++------------------- 1 file changed, 60 insertions(+), 63 deletions(-) diff --git a/rstats/force-sample-size.r b/rstats/force-sample-size.r index 60a795a7..3be99b91 100644 --- a/rstats/force-sample-size.r +++ b/rstats/force-sample-size.r @@ -27,7 +27,7 @@ library(getopt) # input ############################################################# # usage function in same style as other FORCE tools, -# do not use getop's built-in for consistency +# do not use getop's built-in usage function for consistency usage <- function(exit){ message <- c( @@ -48,8 +48,10 @@ usage <- function(exit){ " defaults to './sample-size.csv'\n", "\n", " -c count-file = csv table with pixel counts per class\n", + " 2 columns named value and count", "\n", " -u user_acc-file = csv table with expected user accuracy per class\n", + " 2 columns named value and UA", "\n", ) @@ -66,25 +68,19 @@ usage <- function(exit){ } -version <- function() { - cat("Printing function not implemented yet. Sorry.\n") - quit( - save = "no", - status = exit +exit_normal <- function(argument) { + cat( + sprintf("%s\n", argument) ) -} - -info <- function() { - cat("Suggest sample size for validation of a classification.\n") quit( save = "no", - status = exit + status = 0 ) } -mandatory_missing <- function(argument) { +exit_with_error <- function(argument) { cat( - sprintf("%s is missing!\n", argument), + sprintf("%s\n", argument), file = stderr() ) usage(1) @@ -118,11 +114,11 @@ spec <- matrix( opt <- getopt(spec) if (!is.null(opt$help)) usage() -if (!is.null(opt$info)) info() -if (!is.null(opt$version)) version() +if (!is.null(opt$info)) exit_normal("Suggest sample size for validation of a classification.\n") +if (!is.null(opt$version)) exit_normal("Printing function not implemented yet. Sorry.\n") -if (is.null(opt$counts)) mandatory_missing("count-file") -if (is.null(opt$user_acc)) mandatory_missing("user_acc-file") +if (is.null(opt$counts)) exit_with_error("count-file is missing!") +if (is.null(opt$user_acc)) exit_with_error("user_acc-file is missing!") file_existing(opt$counts) file_existing(opt$user_acc) @@ -132,79 +128,80 @@ if (is.null(opt$min_size)) opt$min_size <- 50 if (is.null(opt$output)) opt$output <- "sample-size.csv" -standard_error <- 0.01 -minsize <- 175 +pixel_counts <- read.csv(opt$counts) +users_accuracy <- read.csv(opt$user_acc) -pixel_counts <- data.frame( - value = 0:2, - count = c( - 1868242749, - 496472240, - 502851998 - ) -) -users_accuracy <- data.frame( - value = 0:2, - UA = c( - 0.9, - 0.9, - 0.9 - ) -) +# main thing ######################################################## -df <- users_accuracy %>% +# join input tables +table <- users_accuracy %>% inner_join( pixel_counts, by = "value" ) -if (nrow(df) != nrow(pixel_counts)){ - "input tables do not match\n" - stop() +# join worked? +if (nrow(table) != nrow(pixel_counts)){ + exit_with_error("count and user_acc could not be joined") } -df <- df %>% - mutate(proportion = count / sum(count)) %>% +# compute proportional area, and standard deviation of UA +table <- table %>% + mutate(area = count / sum(count)) %>% mutate(stdev = sqrt(UA*(1-UA))) %>% - mutate(WS = proportion * stdev) + mutate(areaXstdev = area * stdev) -number <- (sum(df$WS)/standard_error)**2 -number +# number of recommended samples +number <- (sum(table$areaXstdev)/standard_error)**2 -df <- df %>% - mutate(equal = round(number / nrow(df))) %>% - mutate(proportional = round(number * proportion)) %>% +sprintf("%d samples are suggested.\n", number) %>% + cat() + +# compute class-wise sample size for equal and proportional allocation +table <- table %>% + mutate(equal = round(number / nrow(table))) %>% + mutate(proportional = round(number * area)) %>% mutate(compromise = NA) -if (min(df$proportional) < minsize){ - - rare <- df %>% + +# do we have enough samples in proportional allocation? +if (min(table$proportional) < minsize){ + + cat("Proportional allocation yields too few samples.\n") + cat("A compromise between equal and proportional allocation is recommended.\n") + + # first, assign minimum sample size to small classes + rare <- table %>% filter(proportional < minsize) %>% mutate(compromise = minsize) n_rare <- sum(rare$compromise) - big <- df %>% + # allocate remaining samples to big classes proportionally + big <- table %>% filter(proportional >= minsize) %>% - mutate(compromise = (number-n_rare) * proportion) + mutate(compromise = (number-n_rare) * area) + # check if proportionally allocated classes are big enough if (any(big$compromise < minsize)){ - c( - "compromising the sample allocation failed...\n", - "either decrease minsize or adjust input parameters to \n", - "have more samples overall." - ) + exit_with_error("Compromising failed. Adjust input parameters.") } - df <- rbind(rare, big) + table <- rbind(rare, big) +} else { + cat("Proportional allocation recommended.\n") } -df %>% +# compute deviation of compromised allocation from proportional in percent +table <- table %>% mutate(deviation = (compromise - proportional) / proportional * 100) -write.csv(df, file_output, row.names = FALSE, quote = FALSE) - - - +# write output +write.csv( + table, + opt$output, + row.names = FALSE, + quote = FALSE +) From 9ff118457c81dfff3f33ec07b44853080f9b1b8c Mon Sep 17 00:00:00 2001 From: David Frantz Date: Sun, 28 Jan 2024 13:30:23 +0100 Subject: [PATCH 05/23] worked on map accuracy script --- rstats/force-map-accuracy.r | 262 ++++++++++++++++++++++++++++++++++++ 1 file changed, 262 insertions(+) create mode 100644 rstats/force-map-accuracy.r diff --git a/rstats/force-map-accuracy.r b/rstats/force-map-accuracy.r new file mode 100644 index 00000000..6ee1dd16 --- /dev/null +++ b/rstats/force-map-accuracy.r @@ -0,0 +1,262 @@ +#!/usr/bin/env Rscript + +# This file is part of FORCE - Framework for Operational Radiometric +# Correction for Environmental monitoring. +# +# Copyright (C) 2013-2024 David Frantz +# +# FORCE is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# FORCE is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with FORCE. If not, see . + +info <- "Compute map accuracy and area statistics.\n" + +# load libraries #################################################### +library(dplyr) +library(getopt) + + +# input ############################################################# + +# usage function in same style as other FORCE tools, +# do not use getop's built-in usage function for consistency +usage <- function(exit){ + + message <- c( + sprintf( + "Usage: %s [-h] [-v] [-i] [-o output-file] -c count-file -m map-file -r reference-file\n", + get_Rscript_filename() + ), + "\n", + " -h = show this help\n", + " -v = show version\n", + " -i = show program's purpose\n", + "\n", + " -o output-file = output file path with extension,\n", + " defaults to './accuracy-assessment.txt'\n", + "\n", + " -c count-file = csv table with pixel counts per class\n", + " 2 columns named class and count", + "\n", + " -m map-file = csv table with predicted class labels\n", + " 2 columns named ID and map", + "\n", + " -r reference-file = csv table with reference class labels\n", + " 2 columns named ID and reference", + "\n", + + ) + + cat( + message, + file = if (exit == 0) stdout else stderr() + ) + + quit( + save = "no", + status = exit + ) + +} + +exit_normal <- function(argument) { + cat( + sprintf("%s\n", argument) + ) + quit( + save = "no", + status = 0 + ) +} + +exit_with_error <- function(argument) { + cat( + sprintf("%s\n", argument), + file = stderr() + ) + usage(1) +} + +file_existing <- function(path) { + if (!file.exist(path)){ + cat( + sprintf("file %s does not exist\n", path), + file = stderr() + ) + usage(1) + } +} + +spec <- matrix( + c( + "help", "h", 0, "logical", + "version", "v", 0, "logical", + "info", "i", 0, "logical", + "output", "o", 2, "character", + "counts", "c", 1, "character", + "map", "m", 1, "character", + "reference", "r", 1, "character" + ), + byrow = TRUE, + ncol = 4 +) + +opt <- getopt(spec) + +if (!is.null(opt$help)) usage() +if (!is.null(opt$info)) exit_normal(info) +if (!is.null(opt$version)) exit_normal("Printing function not implemented yet. Sorry.\n") + +if (is.null(opt$counts)) exit_with_error("count-file is missing!") +if (is.null(opt$map)) exit_with_error("map-file is missing!") +if (is.null(opt$reference)) exit_with_error("reference-file is missing!") + +file_existing(opt$counts) +file_existing(opt$map) +file_existing(opt$reference) + +if (is.null(opt$output)) opt$output <- "accuracy-assessment.txt" + + +cnt <- read.csv(opt$counts) +map <- read.csv(opt$map) +ref <- read.csv(opt$reference) + +# columns OK? +if (!"ID" %in% colnames(map)) exit_with_error("ID column in map-file missing") +if (!"ID" %in% colnames(ref)) exit_with_error("ID column in reference-file missing") +if (!"map" %in% colnames(map)) exit_with_error("map column in map-file missing") +if (!"reference" %in% colnames(ref)) exit_with_error("reference column in reference-file missing") +if (!"class" %in% colnames(cnt)) exit_with_error("class column in count-file missing") +if (!"count" %in% colnames(cnt)) exit_with_error("count column in count-file missing") + + +# main thing ######################################################## + +# join input tables +table <- reference %>% + inner_join( + map, + by = "ID" + ) + +# join worked? +if (nrow(table) != nrow(reference)){ + exit_with_error("map and reference files could not be joined") +} + +table <- data.frame(ID = 1:1000, map = round(runif(1000, 8, 12)), reference = round(runif(1000, 8, 12))) + +# get all classes +classes <- c( + table$map, + table$reference + ) %>% + unique() %>% + sort() +n_classes <- length(classes) + + +# initialize confusion matrix +confusion_counts <- matrix( + NA, + n_classes, + n_classes, + dimnames = list( + map = classes, + reference = classes + ) +) + +# populate confusion matrix +for (m in 1:n_classes){ + for (r in 1:n_classes){ + + confusion_counts[m, r] <- + table %>% + filter( + map == classes[m] & + reference == classes[r] + ) %>% + nrow() + + } +} + + +acc_metrics <- function(confusion_matrix) { + + sum_all <- sum(confusion_matrix) + sum_map_class <- rowSums(confusion_matrix) + sum_ref_class <- colSums(confusion_matrix) + + oa <- confusion_matrix %>% + diag() %>% + sum() %>% + `/`(sum_all) + oa + + pa <- confusion_matrix %>% + diag() %>% + `/`(sum_ref_class) + pa + + ua <- confusion_matrix %>% + diag() %>% + `/`(sum_map_class) + ua + + oe <- 1 - pa + ce <- 1 - ua + + + list( + oa = oa, + pa = pa, + ua = ua, + oe = oe, + ce = ce + ) %>% + return() + +} + + + +cnt <- data.frame(class = 12:8, count = runif(5, 1e4, 1e6)) + +cnt <- cnt %>% + arrange(class) %>% + mutate(weight = count / sum(count)) + +if (!any(cnt$class == classes)) exit_with_error("classes in file-count do not match with map or reference classes") + +confusion_adjusted <- + confusion_counts / + matrix( + rowSums(confusion_counts), + n_classes, + n_classes, + byrow = FALSE + ) * + matrix( + cnt$weight, + n_classes, + n_classes, + byrow = FALSE + ) + +confusion_counts +confusion_adjusted + +acc_metrics(confusion_counts) +acc_metrics(confusion_adjusted) From dfa8793697da1bb4bc44f68c1b596a2495341f73 Mon Sep 17 00:00:00 2001 From: David Frantz Date: Sun, 28 Jan 2024 13:30:47 +0100 Subject: [PATCH 06/23] some refinement --- rstats/force-sample-size.r | 30 +++++++++++++++++++----------- src/aux-level/_hist.c | 2 +- 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/rstats/force-sample-size.r b/rstats/force-sample-size.r index 3be99b91..f271f350 100644 --- a/rstats/force-sample-size.r +++ b/rstats/force-sample-size.r @@ -18,6 +18,7 @@ # You should have received a copy of the GNU General Public License # along with FORCE. If not, see . +info <- "Suggest sample size for estimating area and accuracy of a classification.\n" # load libraries #################################################### library(dplyr) @@ -40,18 +41,18 @@ usage <- function(exit){ " -v = show version\n", " -i = show program's purpose\n", "\n", - " -e error = standard error, defaults to 0.01\n", + " -e error = standard error of the estimated overall accuracy (default: 0.01)\n", "\n", - " -s min_size = minimum sample size per class\n", + " -s min_size = minimum sample size per class (default: 50)\n", "\n", " -o output-file = output file path with extension,\n", " defaults to './sample-size.csv'\n", "\n", " -c count-file = csv table with pixel counts per class\n", - " 2 columns named value and count", + " 2 columns named class and count", "\n", " -u user_acc-file = csv table with expected user accuracy per class\n", - " 2 columns named value and UA", + " 2 columns named class and UA", "\n", ) @@ -103,9 +104,9 @@ spec <- matrix( "info", "i", 0, "logical", "error", "e", 2, "numeric", "min_size", "s", 2, "integer", + "output", "o", 2, "character", "counts", "c", 1, "character", - "user_acc", "u", 1, "character", - "output", "o", 1, "character" + "user_acc", "u", 1, "character" ), byrow = TRUE, ncol = 4 @@ -114,7 +115,7 @@ spec <- matrix( opt <- getopt(spec) if (!is.null(opt$help)) usage() -if (!is.null(opt$info)) exit_normal("Suggest sample size for validation of a classification.\n") +if (!is.null(opt$info)) exit_normal(info) if (!is.null(opt$version)) exit_normal("Printing function not implemented yet. Sorry.\n") if (is.null(opt$counts)) exit_with_error("count-file is missing!") @@ -131,6 +132,12 @@ if (is.null(opt$output)) opt$output <- "sample-size.csv" pixel_counts <- read.csv(opt$counts) users_accuracy <- read.csv(opt$user_acc) +# columns OK? +if (!"class" %in% colnames(pixel_counts)) exit_with_error("class column in count-file missing") +if (!"class" %in% colnames(users_accuracy)) exit_with_error("class column in user_acc-file missing") +if (!"count" %in% colnames(pixel_counts)) exit_with_error("count column in count-file missing") +if (!"UA" %in% colnames(user_acc)) exit_with_error("UA column in user_acc-file missing") + # main thing ######################################################## @@ -138,7 +145,7 @@ users_accuracy <- read.csv(opt$user_acc) table <- users_accuracy %>% inner_join( pixel_counts, - by = "value" + by = "class" ) # join worked? @@ -155,7 +162,7 @@ table <- table %>% # number of recommended samples number <- (sum(table$areaXstdev)/standard_error)**2 -sprintf("%d samples are suggested.\n", number) %>% +sprintf("Suggested sample size: %d\n", number) %>% cat() # compute class-wise sample size for equal and proportional allocation @@ -183,13 +190,14 @@ if (min(table$proportional) < minsize){ filter(proportional >= minsize) %>% mutate(compromise = (number-n_rare) * area) + table <- rbind(rare, big) + # check if proportionally allocated classes are big enough if (any(big$compromise < minsize)){ + print(table) exit_with_error("Compromising failed. Adjust input parameters.") } - table <- rbind(rare, big) - } else { cat("Proportional allocation recommended.\n") } diff --git a/src/aux-level/_hist.c b/src/aux-level/_hist.c index c4e4d992..61c9e00b 100644 --- a/src/aux-level/_hist.c +++ b/src/aux-level/_hist.c @@ -203,7 +203,7 @@ FILE *fout = NULL; fprintf(stderr, "Unable to open output file %s\n", args.file_output); return FAILURE;} - fprintf(fout, "value,count\n"); + fprintf(fout, "class,count\n"); for (i=0; i Date: Sun, 28 Jan 2024 16:33:35 +0100 Subject: [PATCH 07/23] cont. implementation --- rstats/force-map-accuracy.r | 136 ++++++++++++++++++++++++++++-------- 1 file changed, 106 insertions(+), 30 deletions(-) diff --git a/rstats/force-map-accuracy.r b/rstats/force-map-accuracy.r index 6ee1dd16..20bb8342 100644 --- a/rstats/force-map-accuracy.r +++ b/rstats/force-map-accuracy.r @@ -33,9 +33,10 @@ usage <- function(exit){ message <- c( sprintf( - "Usage: %s [-h] [-v] [-i] [-o output-file] -c count-file -m map-file -r reference-file\n", + "Usage: %s [-h] [-v] [-i] [-o output-file] -c count-file \n", get_Rscript_filename() ), + " -m map-file -r reference-file -a pixel-area\n", "\n", " -h = show this help\n", " -v = show version\n", @@ -53,6 +54,10 @@ usage <- function(exit){ " -r reference-file = csv table with reference class labels\n", " 2 columns named ID and reference", "\n", + " -a pixel-area = area of one pixel in desired reporting unit, e.g.\n", + " 100 for a Sentinel-2 based map to be reported in m², or\n", + " 0.01 for a Sentinel-2 based map to be reported in ha\n", + "\n", ) @@ -98,13 +103,14 @@ file_existing <- function(path) { spec <- matrix( c( - "help", "h", 0, "logical", - "version", "v", 0, "logical", - "info", "i", 0, "logical", - "output", "o", 2, "character", - "counts", "c", 1, "character", - "map", "m", 1, "character", - "reference", "r", 1, "character" + "help", "h", 0, "logical", + "version", "v", 0, "logical", + "info", "i", 0, "logical", + "output", "o", 2, "character", + "counts", "c", 1, "character", + "map", "m", 1, "character", + "reference", "r", 1, "character", + "pixel_area", "a", 1, "numeric" ), byrow = TRUE, ncol = 4 @@ -119,6 +125,7 @@ if (!is.null(opt$version)) exit_normal("Printing function not implemented yet. S if (is.null(opt$counts)) exit_with_error("count-file is missing!") if (is.null(opt$map)) exit_with_error("map-file is missing!") if (is.null(opt$reference)) exit_with_error("reference-file is missing!") +if (is.null(opt$area)) exit_with_error("pixel-area is missing!") file_existing(opt$counts) file_existing(opt$map) @@ -143,7 +150,8 @@ if (!"count" %in% colnames(cnt)) exit_with_error("count column in count-file mis # main thing ######################################################## # join input tables -table <- reference %>% +table <- + reference %>% inner_join( map, by = "ID" @@ -157,7 +165,8 @@ if (nrow(table) != nrow(reference)){ table <- data.frame(ID = 1:1000, map = round(runif(1000, 8, 12)), reference = round(runif(1000, 8, 12))) # get all classes -classes <- c( +classes <- + c( table$map, table$reference ) %>% @@ -167,15 +176,16 @@ n_classes <- length(classes) # initialize confusion matrix -confusion_counts <- matrix( - NA, - n_classes, - n_classes, - dimnames = list( - map = classes, - reference = classes +confusion_counts <- + matrix( + NA, + n_classes, + n_classes, + dimnames = list( + map = classes, + reference = classes + ) ) -) # populate confusion matrix for (m in 1:n_classes){ @@ -195,26 +205,30 @@ for (m in 1:n_classes){ acc_metrics <- function(confusion_matrix) { - sum_all <- sum(confusion_matrix) + sum_all <- sum(confusion_matrix) sum_map_class <- rowSums(confusion_matrix) sum_ref_class <- colSums(confusion_matrix) - oa <- confusion_matrix %>% + # overall accuracy + oa <- + confusion_matrix %>% diag() %>% sum() %>% `/`(sum_all) - oa - pa <- confusion_matrix %>% + # producer's accuracy + pa <- + confusion_matrix %>% diag() %>% `/`(sum_ref_class) - pa - ua <- confusion_matrix %>% + # user's accuracy + ua <- + confusion_matrix %>% diag() %>% `/`(sum_map_class) - ua + # error of omission and commission oe <- 1 - pa ce <- 1 - ua @@ -234,12 +248,18 @@ acc_metrics <- function(confusion_matrix) { cnt <- data.frame(class = 12:8, count = runif(5, 1e4, 1e6)) -cnt <- cnt %>% +# compute propoertional area per class, area in reporting unit, and sort the classes +cnt <- + cnt %>% arrange(class) %>% - mutate(weight = count / sum(count)) + mutate(weight = count / sum(count)) %>% + mutate(area = count * opt$pixel_area) -if (!any(cnt$class == classes)) exit_with_error("classes in file-count do not match with map or reference classes") +# classes should now align with map/reference dataset +if (!any(cnt$class == classes)) + exit_with_error("classes in file-count do not match with map or reference classes") +# Olofsson et al. 2013, eq. 1 confusion_adjusted <- confusion_counts / matrix( @@ -258,5 +278,61 @@ confusion_adjusted <- confusion_counts confusion_adjusted -acc_metrics(confusion_counts) -acc_metrics(confusion_adjusted) + +# Olofsson et al. 2013, eq. 2 +area_adjusted <- + colSums(confusion_adjusted) * sum(cnt$area) + + + +proportions <- + confusion_counts / + matrix( + rowSums(confusion_counts), + n_classes, + n_classes, + byrow = FALSE + ) + + +# Olofsson et al. 2013, eq. 3-5 +confidence_area_adjusted <- +{ +proportions * +(1 - proportions) / +matrix( + rowSums(confusion_counts) - 1, + n_classes, + n_classes, + byrow = FALSE +) * +matrix( + cnt$weight**2, + n_classes, + n_classes, + byrow = FALSE +) + } %>% + rowSums() %>% + sqrt() %>% + `*`(sum(cnt$area)) %>% + `*`(1.96) +confidence_area_adjusted + + +# Olofsson et al. 2013, eq. 6-8 +acc_traditional <- acc_metrics(confusion_counts) +acc_adjusted <- acc_metrics(confusion_adjusted) + +# Olofsson et al. 2014, eq. 5 +oa_se <- sum(cnt$weight**2 * acc_adjusted$ua * (1 - acc_adjusted$ua) / (rowSums(confusion_counts) - 1)) %>% +sqrt() %>% +`*`(1.96) + +# Olofsson et al. 2014, eq. 6 +ua_se <- (acc_adjusted$ua * (1 - acc_adjusted$ua) / (rowSums(confusion_counts) - 1)) %>% +sqrt() %>% +`*`(1.96) + +# Olofsson et al. 2014, eq. 7 +pa_se <- # todo, looks comlicated From a854843b5f6bdeb25003f43c0735141d1f715e30 Mon Sep 17 00:00:00 2001 From: David Frantz Date: Tue, 30 Jan 2024 09:19:30 +0100 Subject: [PATCH 08/23] area adjusted accuracies implemented --- rstats/force-map-accuracy.r | 104 ++++++++++++++++++++++++++++++++++-- 1 file changed, 99 insertions(+), 5 deletions(-) diff --git a/rstats/force-map-accuracy.r b/rstats/force-map-accuracy.r index 20bb8342..21e88637 100644 --- a/rstats/force-map-accuracy.r +++ b/rstats/force-map-accuracy.r @@ -60,12 +60,12 @@ usage <- function(exit){ "\n", ) - + cat( message, file = if (exit == 0) stdout else stderr() ) - + quit( save = "no", status = exit @@ -202,6 +202,22 @@ for (m in 1:n_classes){ } } +classes <- 1:4 +n_classes <- 4 +confusion_counts <- matrix( + c( + 150, 12, 1, 2, + 32, 100, 21, 3, + 0, 32, 120, 0, + 0, 0, 5, 130 + ), byrow = TRUE, + n_classes, + n_classes, + dimnames = list( + map = classes, + reference = classes + ) +) acc_metrics <- function(confusion_matrix) { @@ -246,7 +262,8 @@ acc_metrics <- function(confusion_matrix) { -cnt <- data.frame(class = 12:8, count = runif(5, 1e4, 1e6)) +cnt <- data.frame(class = 1:4, count = c(20000,200000,300000,350000)) +opt <- list(pixel_area = 30^2/10000) # ha # compute propoertional area per class, area in reporting unit, and sort the classes cnt <- @@ -313,7 +330,7 @@ matrix( byrow = FALSE ) } %>% - rowSums() %>% + colSums() %>% sqrt() %>% `*`(sum(cnt$area)) %>% `*`(1.96) @@ -335,4 +352,81 @@ sqrt() %>% `*`(1.96) # Olofsson et al. 2014, eq. 7 -pa_se <- # todo, looks comlicated + + +Nj <- + { + matrix( + cnt$count, + n_classes, + n_classes, + byrow = FALSE + ) / + matrix( + rowSums(confusion_counts), + n_classes, + n_classes, + byrow = FALSE + ) * + confusion_counts + } %>% + colSums() + + +term1 <- cnt$count**2 * +(1 - acc_adjusted$pa)**2 * +acc_adjusted$ua * +(1 - acc_adjusted$ua) / +(colSums(confusion_counts) - 1) + +leave_class_out <- +matrix( + 1, n_classes, + n_classes +) +diag(leave_class_out) <- 0 + +term2 <- { +matrix( + cnt$count**2, + n_classes, + n_classes, + byrow = FALSE +) * +confusion_counts / +matrix( + rowSums(confusion_counts), + n_classes, + n_classes, + byrow = FALSE +) * +( + 1 - + confusion_counts / + matrix( + rowSums(confusion_counts), + n_classes, + n_classes, + byrow = FALSE + ) +) / +matrix( +(rowSums(confusion_counts) - 1), + n_classes, + n_classes, + byrow = FALSE +) * +leave_class_out +} %>% +colSums() %>% +`*`(acc_adjusted$pa**2) + + + +pa_se <- +{ + (1/Nj**2) * + (term1 + term2) +} %>% +sqrt() %>% +`*`(1.96) From 19feb7f8996cc10e5b8203dd0ff674bb5d86f1e6 Mon Sep 17 00:00:00 2001 From: David Frantz Date: Wed, 31 Jan 2024 08:17:29 +0100 Subject: [PATCH 09/23] further development, u+x in rstats/* --- rstats/force-level2-report.Rmd | 0 rstats/force-map-accuracy.r | 9 +- rstats/force-sample-size.r | 61 ++++---- src/aux-level/_stratified_sample.c | 230 +++++++++++++++++++++++++++++ src/cross-level/read-cl.c | 2 +- 5 files changed, 272 insertions(+), 30 deletions(-) mode change 100644 => 100755 rstats/force-level2-report.Rmd mode change 100644 => 100755 rstats/force-map-accuracy.r mode change 100644 => 100755 rstats/force-sample-size.r create mode 100644 src/aux-level/_stratified_sample.c diff --git a/rstats/force-level2-report.Rmd b/rstats/force-level2-report.Rmd old mode 100644 new mode 100755 diff --git a/rstats/force-map-accuracy.r b/rstats/force-map-accuracy.r old mode 100644 new mode 100755 index 21e88637..06b5e4f3 --- a/rstats/force-map-accuracy.r +++ b/rstats/force-map-accuracy.r @@ -263,7 +263,7 @@ acc_metrics <- function(confusion_matrix) { cnt <- data.frame(class = 1:4, count = c(20000,200000,300000,350000)) -opt <- list(pixel_area = 30^2/10000) # ha +opt <- list(pixel_area = 30^2/10000, output = "accuracy-assessment.txt") # ha # compute propoertional area per class, area in reporting unit, and sort the classes cnt <- @@ -430,3 +430,10 @@ pa_se <- } %>% sqrt() %>% `*`(1.96) + +fo <- file(opt$output, "w") + +cat("# Accuracy assessment", file = fo) + +close(fo) + diff --git a/rstats/force-sample-size.r b/rstats/force-sample-size.r old mode 100644 new mode 100755 index f271f350..3ede6d5f --- a/rstats/force-sample-size.r +++ b/rstats/force-sample-size.r @@ -25,8 +25,13 @@ library(dplyr) library(getopt) -# input ############################################################# +# program name ###################################################### +progname <<- + get_Rscript_filename() %>% + basename() + +# input ############################################################# # usage function in same style as other FORCE tools, # do not use getop's built-in usage function for consistency usage <- function(exit){ @@ -34,7 +39,7 @@ usage <- function(exit){ message <- c( sprintf( "Usage: %s [-h] [-v] [-i] [-e error] [-s min_size] [-o output-file] -c count-file -u user_acc-file\n", - get_Rscript_filename() + progname ), "\n", " -h = show this help\n", @@ -53,15 +58,14 @@ usage <- function(exit){ "\n", " -u user_acc-file = csv table with expected user accuracy per class\n", " 2 columns named class and UA", - "\n", - + "\n" ) - + cat( message, - file = if (exit == 0) stdout else stderr() + file = if (exit == 0) stdout() else stderr() ) - + quit( save = "no", status = exit @@ -81,14 +85,14 @@ exit_normal <- function(argument) { exit_with_error <- function(argument) { cat( - sprintf("%s\n", argument), + sprintf("%s\n", argument), file = stderr() ) usage(1) } file_existing <- function(path) { - if (!file.exist(path)){ + if (!file.exists(path)) { cat( sprintf("file %s does not exist\n", path), file = stderr() @@ -107,14 +111,14 @@ spec <- matrix( "output", "o", 2, "character", "counts", "c", 1, "character", "user_acc", "u", 1, "character" - ), - byrow = TRUE, + ), + byrow = TRUE, ncol = 4 ) opt <- getopt(spec) -if (!is.null(opt$help)) usage() +if (!is.null(opt$help)) usage(0) if (!is.null(opt$info)) exit_normal(info) if (!is.null(opt$version)) exit_normal("Printing function not implemented yet. Sorry.\n") @@ -136,7 +140,7 @@ users_accuracy <- read.csv(opt$user_acc) if (!"class" %in% colnames(pixel_counts)) exit_with_error("class column in count-file missing") if (!"class" %in% colnames(users_accuracy)) exit_with_error("class column in user_acc-file missing") if (!"count" %in% colnames(pixel_counts)) exit_with_error("count column in count-file missing") -if (!"UA" %in% colnames(user_acc)) exit_with_error("UA column in user_acc-file missing") +if (!"UA" %in% colnames(users_accuracy)) exit_with_error("UA column in user_acc-file missing") # main thing ######################################################## @@ -154,46 +158,47 @@ if (nrow(table) != nrow(pixel_counts)){ } # compute proportional area, and standard deviation of UA -table <- table %>% +table <- table %>% mutate(area = count / sum(count)) %>% - mutate(stdev = sqrt(UA*(1-UA))) %>% + mutate(stdev = sqrt(UA * (1 - UA))) %>% mutate(areaXstdev = area * stdev) # number of recommended samples -number <- (sum(table$areaXstdev)/standard_error)**2 +number <- (sum(table$areaXstdev) / opt$error)**2 %>% + as.integer() sprintf("Suggested sample size: %d\n", number) %>% cat() # compute class-wise sample size for equal and proportional allocation -table <- table %>% +table <- table %>% mutate(equal = round(number / nrow(table))) %>% mutate(proportional = round(number * area)) %>% mutate(compromise = NA) # do we have enough samples in proportional allocation? -if (min(table$proportional) < minsize){ +if (min(table$proportional) < opt$min_size) { cat("Proportional allocation yields too few samples.\n") cat("A compromise between equal and proportional allocation is recommended.\n") # first, assign minimum sample size to small classes rare <- table %>% - filter(proportional < minsize) %>% - mutate(compromise = minsize) - + filter(proportional < opt$min_size) %>% + mutate(compromise = opt$min_size) + n_rare <- sum(rare$compromise) - + # allocate remaining samples to big classes proportionally big <- table %>% - filter(proportional >= minsize) %>% - mutate(compromise = (number-n_rare) * area) + filter(proportional >= opt$min_size) %>% + mutate(compromise = (number - n_rare) * area) table <- rbind(rare, big) # check if proportionally allocated classes are big enough - if (any(big$compromise < minsize)){ + if (any(big$compromise < opt$min_size)) { print(table) exit_with_error("Compromising failed. Adjust input parameters.") } @@ -208,8 +213,8 @@ table <- table %>% # write output write.csv( - table, - opt$output, - row.names = FALSE, + table, + opt$output, + row.names = FALSE, quote = FALSE ) diff --git a/src/aux-level/_stratified_sample.c b/src/aux-level/_stratified_sample.c new file mode 100644 index 00000000..777733ca --- /dev/null +++ b/src/aux-level/_stratified_sample.c @@ -0,0 +1,230 @@ +/**+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +This file is part of FORCE - Framework for Operational Radiometric +Correction for Environmental monitoring. + +Copyright (C) 2013-2024 David Frantz + +FORCE is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +FORCE is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with FORCE. If not, see . + ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++**/ + +/**+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +This program computes a histogram of the given image ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++**/ + + +#include // core input and output functions +#include // standard general utilities library + +#include // testing and mapping characters +#include // standard symbolic constants and types + +#include "../cross-level/const-cl.h" +#include "../cross-level/konami-cl.h" +#include "../cross-level/string-cl.h" + +/** Geospatial Data Abstraction Library (GDAL) **/ +#include "gdal.h" // public (C callable) GDAL entry points + + +typedef struct { + int n; + int band; + char file_input[NPOW_10]; + char file_output[NPOW_10]; +} args_t; + + +void usage(char *exe, int exit_code){ + + + printf("Usage: %s [-h] [-v] [-i] [-b band] [-o output-file] input-image\n", exe); + printf("\n"); + printf(" -h = show this help\n"); + printf(" -v = show version\n"); + printf(" -i = show program's purpose\n"); + printf("\n"); + printf(" -b band = band to use,\n"); + printf(" defaults to 1\n"); + printf("\n"); + printf(" -o output-file = output file path with extension,\n"); + printf(" defaults to './histogram.csv'\n"); + printf("\n"); + printf(" Positional arguments:\n"); + printf(" - 'input-image': image for computing the histogram\n"); + printf("\n"); + + exit(exit_code); + return; +} + + +void parse_args(int argc, char *argv[], args_t *args){ +int opt; + + + opterr = 0; + + // default parameters + copy_string(args->file_output, NPOW_10, "histogram.csv"); + args->band = 1; + + // optional parameters + while ((opt = getopt(argc, argv, "hvio:b:")) != -1){ + switch(opt){ + case 'h': + usage(argv[0], SUCCESS); + case 'v': + printf("FORCE version: %s\n", _VERSION_); + exit(SUCCESS); + case 'i': + printf("Compute image histogram\n"); + exit(SUCCESS); + case 'o': + copy_string(args->file_output, NPOW_10, optarg); + break; + case 'b': + args->band = atoi(optarg); + if (args->band < 0){ + fprintf(stderr, "Band must be >= 1\n"); + usage(argv[0], FAILURE); + } + break; + case '?': + if (isprint(optopt)){ + fprintf(stderr, "Unknown option `-%c'.\n", optopt); + } else { + fprintf(stderr, "Unknown option character `\\x%x'.\n", optopt); + } + usage(argv[0], FAILURE); + default: + fprintf(stderr, "Error parsing arguments.\n"); + usage(argv[0], FAILURE); + } + } + + // non-optional parameters + args->n = 1; + + if (optind < argc){ + konami_args(argv[optind]); + if (argc-optind == args->n){ + copy_string(args->file_input, NPOW_10, argv[optind++]); + } else if (argc-optind < args->n){ + fprintf(stderr, "some non-optional arguments are missing.\n"); + usage(argv[0], FAILURE); + } else if (argc-optind > args->n){ + fprintf(stderr, "too many non-optional arguments.\n"); + usage(argv[0], FAILURE); + } + } else { + fprintf(stderr, "non-optional arguments are missing.\n"); + usage(argv[0], FAILURE); + } + + return; +} + + +int main(int argc, char *argv[]){ +args_t args; +GDALDatasetH fp; +GDALRasterBandH band; +int i, j, nx, ny; +short *line = NULL; +short nodata; +int has_nodata; + +int offset = SHRT_MAX+1; +int length = USHRT_MAX+1; +off_t counts[length]; +FILE *fout = NULL; + + +double **sample_size = NULL +int n_class, n_column; + + parse_args(argc, argv, &args); + + sample_size = read_table(args.file_size, &n_class, &n_column); + if (n_column != 3){ + fprintf(stderr, "input-size does not have 3 columns %s.\n", args.file_input); exit(1);} + + GDALAllRegister(); + if ((fp = GDALOpen(args.file_input, GA_ReadOnly)) == NULL){ + fprintf(stderr, "could not open %s.\n", args.file_input); exit(1);} + + nx = GDALGetRasterXSize(fp); + ny = GDALGetRasterYSize(fp); + + alloc((void**)&line, nx, sizeof(short)); + + band = GDALGetRasterBand(fp, args.band); + + nodata = (short)GDALGetRasterNoDataValue(band, &has_nodata); + if (!has_nodata){ + fprintf(stderr, "input image has no nodata value.\n"); + exit(1); + } + + + + memset(counts, 0, sizeof(off_t)*length); + + for (i=0; i Date: Wed, 31 Jan 2024 09:11:42 +0100 Subject: [PATCH 10/23] collected vars at end --- rstats/force-map-accuracy.r | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/rstats/force-map-accuracy.r b/rstats/force-map-accuracy.r index 06b5e4f3..f0e54a95 100755 --- a/rstats/force-map-accuracy.r +++ b/rstats/force-map-accuracy.r @@ -292,8 +292,6 @@ confusion_adjusted <- byrow = FALSE ) -confusion_counts -confusion_adjusted # Olofsson et al. 2013, eq. 2 @@ -431,6 +429,23 @@ pa_se <- sqrt() %>% `*`(1.96) + + +confusion_counts +confusion_adjusted + +acc_traditional +acc_adjusted + +cnt$area +area_adjusted +confidence_area_adjusted + +oa_se +pa_se +ua_se + + fo <- file(opt$output, "w") cat("# Accuracy assessment", file = fo) From c203ffde9f5df50fa6299cbad6e93b6eddbc1453 Mon Sep 17 00:00:00 2001 From: David Frantz Date: Wed, 31 Jan 2024 13:00:58 +0100 Subject: [PATCH 11/23] skip lines when writing table --- src/cross-level/table-cl.c | 4 +++- src/cross-level/table-cl.h | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/cross-level/table-cl.c b/src/cross-level/table-cl.c index 74d49265..cad59b02 100755 --- a/src/cross-level/table-cl.c +++ b/src/cross-level/table-cl.c @@ -347,7 +347,7 @@ int width, *max_width = NULL; --- separator: column separator +++ Return: void +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++**/ -void write_table(table_t *table, char *fname, char *separator){ +void write_table(table_t *table, char *fname, char *separator, bool skip_rows){ int row, col; FILE *fp = NULL; @@ -369,6 +369,8 @@ FILE *fp = NULL; for (row=0; rownrow; row++){ + if (skip_rows && !table->row_mask[row]) continue; + if (table->has_row_names) printf("%s%s", table->row_names[row], separator); for (col=0; col<(table->ncol-1); col++) printf("%.2f%s", table->data[row][col], separator); printf("%.2f\n", table->data[row][col]); diff --git a/src/cross-level/table-cl.h b/src/cross-level/table-cl.h index f7340278..a349acfa 100755 --- a/src/cross-level/table-cl.h +++ b/src/cross-level/table-cl.h @@ -63,7 +63,7 @@ table_t read_table(char *fname, bool has_row_names, bool has_col_names); table_t allocate_table(int nrow, int ncol, bool has_row_names, bool has_col_names); void init_table(table_t *table); void print_table(table_t *table, bool truncate); -void write_table(table_t *table, char *fname, char *separator); +void write_table(table_t *table, char *fname, char *separator, bool skip_rows); void free_table(table_t *table); #ifdef __cplusplus From cd7830d81e23976a4f32848fec0a79f0c6a12830 Mon Sep 17 00:00:00 2001 From: David Frantz Date: Wed, 31 Jan 2024 13:18:42 +0100 Subject: [PATCH 12/23] used gerneric table in force-hist --- src/aux-level/_hist.c | 35 ++++++++++++++--------------------- 1 file changed, 14 insertions(+), 21 deletions(-) diff --git a/src/aux-level/_hist.c b/src/aux-level/_hist.c index 61c9e00b..08e64631 100644 --- a/src/aux-level/_hist.c +++ b/src/aux-level/_hist.c @@ -34,6 +34,7 @@ This program computes a histogram of the given image #include "../cross-level/const-cl.h" #include "../cross-level/konami-cl.h" #include "../cross-level/string-cl.h" +#include "../cross-level/table-cl.h" /** Geospatial Data Abstraction Library (GDAL) **/ #include "gdal.h" // public (C callable) GDAL entry points @@ -146,11 +147,10 @@ int i, j, nx, ny; short *line = NULL; short nodata; int has_nodata; - int offset = SHRT_MAX+1; int length = USHRT_MAX+1; -off_t counts[length]; -FILE *fout = NULL; +table_t counts; +int row; parse_args(argc, argv, &args); @@ -173,8 +173,10 @@ FILE *fout = NULL; } - - memset(counts, 0, sizeof(off_t)*length); + counts = allocate_table(length, 2, false, true); + copy_string(counts.col_names[0], NPOW_10, "class"); + copy_string(counts.col_names[1], NPOW_10, "count"); + for (row=0; row Date: Wed, 31 Jan 2024 15:34:21 +0100 Subject: [PATCH 13/23] added new program force-stratified-sample --- Makefile | 7 ++- ...ratified_sample.c => _stratified-sample.c} | 52 ++++++------------- 2 files changed, 21 insertions(+), 38 deletions(-) rename src/aux-level/{_stratified_sample.c => _stratified-sample.c} (82%) diff --git a/Makefile b/Makefile index 6c1ad4d9..83b2fcb8 100755 --- a/Makefile +++ b/Makefile @@ -78,7 +78,7 @@ FORCE_EXE = force force-cube force-higher-level force-import-modis \ force-procmask force-pyramid force-qai-inflate force-stack \ force-synthmix force-tabulate-grid force-tile-extent \ force-tile-finder force-train force-level2-report force-cube-init \ - force-init force-datacube-size force-hist + force-init force-datacube-size force-hist force-stratified-sample FORCE_MISC = force-level2-report.Rmd @@ -118,7 +118,7 @@ cross: string_cl enum_cl cite_cl utils_cl alloc_cl brick_cl imagefuns_cl param_c lower: table_ll param_ll meta_ll cube_ll equi7_ll glance7_ll atc_ll sunview_ll read_ll radtran_ll topo_ll cloud_ll gas_ll brdf_ll atmo_ll aod_ll resmerge_ll coreg_ll coregfuns_ll acix_ll modwvp_ll higher: param_hl progress_hl tasks_hl read-aux_hl read-ard_hl quality_hl bap_hl level3_hl cso_hl tsa_hl index_hl interpolate_hl stm_hl fold_hl standardize_hl pheno_hl polar_hl trend_hl ml_hl texture_hl lsm_hl lib_hl sample_hl imp_hl cfimp_hl l2imp_hl spec-adjust_hl pyp_hl rsp_hl udf_hl aux: param_aux param_train_aux train_aux -exe: force force-parameter force-qai-inflate force-tile-finder force-tabulate-grid force-l2ps force-higher-level force-train force-lut-modis force-mdcp force-stack force-import-modis force-cube-init force-hist +exe: force force-parameter force-qai-inflate force-tile-finder force-tabulate-grid force-l2ps force-higher-level force-train force-lut-modis force-mdcp force-stack force-import-modis force-cube-init force-hist force-stratified-sample .PHONY: temp all install install_ bash python external clean build check ### TEMP @@ -427,6 +427,9 @@ force-cube-init: temp cross lower $(DA)/_init-cube.c force-hist: temp cross $(DA)/_hist.c $(G11) $(CFLAGS) $(GDAL) $(GSL) $(CURL) -o $(TB)/force-hist $(DA)/_hist.c $(TC)/*.o $(TL)/*.o $(LDGDAL) $(LDGSL) $(LDCURL) +force-stratified-sample: temp cross $(DA)/_stratified-sample.c + $(G11) $(CFLAGS) $(GDAL) $(GSL) $(CURL) -o $(TB)/force-stratified-sample $(DA)/_stratified-sample.c $(TC)/*.o $(TL)/*.o $(LDGDAL) $(LDGSL) $(LDCURL) + ### dummy code for testing stuff dummy: temp cross aux higher src/dummy.c diff --git a/src/aux-level/_stratified_sample.c b/src/aux-level/_stratified-sample.c similarity index 82% rename from src/aux-level/_stratified_sample.c rename to src/aux-level/_stratified-sample.c index 777733ca..40c847ba 100644 --- a/src/aux-level/_stratified_sample.c +++ b/src/aux-level/_stratified-sample.c @@ -34,6 +34,7 @@ This program computes a histogram of the given image #include "../cross-level/const-cl.h" #include "../cross-level/konami-cl.h" #include "../cross-level/string-cl.h" +#include "../cross-level/table-cl.h" /** Geospatial Data Abstraction Library (GDAL) **/ #include "gdal.h" // public (C callable) GDAL entry points @@ -42,8 +43,10 @@ This program computes a histogram of the given image typedef struct { int n; int band; - char file_input[NPOW_10]; + char file_input_image[NPOW_10]; + char file_input_sample_size[NPOW_10]; char file_output[NPOW_10]; + char column_sample_size[NPOW_10]; } args_t; @@ -116,12 +119,14 @@ int opt; } // non-optional parameters - args->n = 1; + args->n = 3; if (optind < argc){ konami_args(argv[optind]); if (argc-optind == args->n){ - copy_string(args->file_input, NPOW_10, argv[optind++]); + copy_string(args->file_input_image, NPOW_10, argv[optind++]); + copy_string(args->file_input_sample_size, NPOW_10, argv[optind++]); + copy_string(args->column_sample_size, NPOW_10, argv[optind++]); } else if (argc-optind < args->n){ fprintf(stderr, "some non-optional arguments are missing.\n"); usage(argv[0], FAILURE); @@ -147,24 +152,20 @@ short *line = NULL; short nodata; int has_nodata; -int offset = SHRT_MAX+1; -int length = USHRT_MAX+1; -off_t counts[length]; -FILE *fout = NULL; +table_t sample_size; -double **sample_size = NULL -int n_class, n_column; parse_args(argc, argv, &args); - sample_size = read_table(args.file_size, &n_class, &n_column); - if (n_column != 3){ - fprintf(stderr, "input-size does not have 3 columns %s.\n", args.file_input); exit(1);} + sample_size = read_table(args.file_input_sample_size, false, true); + print_table(&sample_size, false); +// if (n_column != 3){ +// fprintf(stderr, "input-size does not have 3 columns %s.\n", args.file_input); exit(1);} GDALAllRegister(); - if ((fp = GDALOpen(args.file_input, GA_ReadOnly)) == NULL){ - fprintf(stderr, "could not open %s.\n", args.file_input); exit(1);} + if ((fp = GDALOpen(args.file_input_image, GA_ReadOnly)) == NULL){ + fprintf(stderr, "could not open %s.\n", args.file_input_image); exit(1);} nx = GDALGetRasterXSize(fp); ny = GDALGetRasterYSize(fp); @@ -180,9 +181,6 @@ int n_class, n_column; } - - memset(counts, 0, sizeof(off_t)*length); - for (i=0; i Date: Wed, 31 Jan 2024 16:30:54 +0100 Subject: [PATCH 14/23] find correct columns --- src/aux-level/_stratified-sample.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/src/aux-level/_stratified-sample.c b/src/aux-level/_stratified-sample.c index 40c847ba..285375a4 100644 --- a/src/aux-level/_stratified-sample.c +++ b/src/aux-level/_stratified-sample.c @@ -151,15 +151,26 @@ int i, j, nx, ny; short *line = NULL; short nodata; int has_nodata; - - table_t sample_size; +int col_size, col_class; parse_args(argc, argv, &args); sample_size = read_table(args.file_input_sample_size, false, true); + if ((col_size = find_table_col(&sample_size, args.column_sample_size)) < 0){ + printf("could not find column name in file-sample-size\n"); exit(FAILURE);} + if ((col_class = find_table_col(&sample_size, "class")) < 0){ + printf("could not find column name in file-sample-size\n"); exit(FAILURE);} + print_table(&sample_size, false); + printf("column %s in column %d\n", "class", col_class); + printf("column %s in column %d\n", args.column_sample_size, col_size); + + print("min/max class: %d/%d\n", table.min[col_class], table.max[col_class]); + + + // if (n_column != 3){ // fprintf(stderr, "input-size does not have 3 columns %s.\n", args.file_input); exit(1);} From ecf8d82f164d3e64feb4bd906637e97d94190880 Mon Sep 17 00:00:00 2001 From: David Frantz Date: Wed, 31 Jan 2024 16:51:03 +0100 Subject: [PATCH 15/23] dictionary for faster indexing --- src/aux-level/_stratified-sample.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/src/aux-level/_stratified-sample.c b/src/aux-level/_stratified-sample.c index 285375a4..698e62ef 100644 --- a/src/aux-level/_stratified-sample.c +++ b/src/aux-level/_stratified-sample.c @@ -154,6 +154,9 @@ int has_nodata; table_t sample_size; int col_size, col_class; +int offset = SHRT_MAX+1; +int length = USHRT_MAX+1; +enum { _CLASS_, _SIZE_, _PROBABILITY_, _DICT_LENGTH_ }; parse_args(argc, argv, &args); @@ -167,12 +170,22 @@ int col_size, col_class; printf("column %s in column %d\n", "class", col_class); printf("column %s in column %d\n", args.column_sample_size, col_size); - print("min/max class: %d/%d\n", table.min[col_class], table.max[col_class]); + printf("min/max class: %d/%d\n", (int)sample_size.min[col_class], (int)sample_size.max[col_class]); + + + + alloc_2D((void**)&dictionary, length, _DICT_LENGTH_, sizeof(double)); + + for (row=0; row Date: Wed, 31 Jan 2024 20:54:03 +0100 Subject: [PATCH 16/23] random number generator pretty close to target --- src/aux-level/_stratified-sample.c | 68 +++++++++++++++++++++++++----- 1 file changed, 57 insertions(+), 11 deletions(-) diff --git a/src/aux-level/_stratified-sample.c b/src/aux-level/_stratified-sample.c index 698e62ef..34f01b3f 100644 --- a/src/aux-level/_stratified-sample.c +++ b/src/aux-level/_stratified-sample.c @@ -31,6 +31,8 @@ This program computes a histogram of the given image #include // testing and mapping characters #include // standard symbolic constants and types +#include + #include "../cross-level/const-cl.h" #include "../cross-level/konami-cl.h" #include "../cross-level/string-cl.h" @@ -152,39 +154,52 @@ short *line = NULL; short nodata; int has_nodata; table_t sample_size; -int col_size, col_class; +int col_size, col_class, col_count, row; int offset = SHRT_MAX+1; int length = USHRT_MAX+1; -enum { _CLASS_, _SIZE_, _PROBABILITY_, _DICT_LENGTH_ }; +double **dictionary = NULL; +enum { _CLASS_, _SIZE_, _PROBABILITY_, _COUNT_, _DICT_LENGTH_ }; parse_args(argc, argv, &args); sample_size = read_table(args.file_input_sample_size, false, true); if ((col_size = find_table_col(&sample_size, args.column_sample_size)) < 0){ - printf("could not find column name in file-sample-size\n"); exit(FAILURE);} + printf("could not find column name %s in file-sample-size\n", args.column_sample_size); exit(FAILURE);} if ((col_class = find_table_col(&sample_size, "class")) < 0){ - printf("could not find column name in file-sample-size\n"); exit(FAILURE);} + printf("could not find column name %s in file-sample-size\n", "class"); exit(FAILURE);} + if ((col_count = find_table_col(&sample_size, "count")) < 0){ + printf("could not find column name %s in file-sample-size\n", "count"); exit(FAILURE);} print_table(&sample_size, false); printf("column %s in column %d\n", "class", col_class); + printf("column %s in column %d\n", "count", col_count); printf("column %s in column %d\n", args.column_sample_size, col_size); printf("min/max class: %d/%d\n", (int)sample_size.min[col_class], (int)sample_size.max[col_class]); - alloc_2D((void**)&dictionary, length, _DICT_LENGTH_, sizeof(double)); + alloc_2D((void***)&dictionary, length, _DICT_LENGTH_, sizeof(double)); for (row=0; row= probability){ + dictionary[line[j] + offset][_COUNT_]++; + printf("%d samples selected (row %d, col %d)\n", cnt++, i, j); + } + + } } + + for (row=0; row Date: Fri, 2 Feb 2024 15:34:43 +0100 Subject: [PATCH 17/23] option to skip rows when printing table --- src/cross-level/table-cl.c | 4 +++- src/cross-level/table-cl.h | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/cross-level/table-cl.c b/src/cross-level/table-cl.c index 0a821e34..6b7b387d 100755 --- a/src/cross-level/table-cl.c +++ b/src/cross-level/table-cl.c @@ -338,7 +338,7 @@ int row; --- table: table +++ Return: void +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++**/ -void print_table(table_t *table, bool truncate){ +void print_table(table_t *table, bool truncate, bool skip_rows){ int row, col; int nrow_print = 6, ncol_print = 6; int width, *max_width = NULL; @@ -383,6 +383,8 @@ int width, *max_width = NULL; for (row=0; rowrow_mask[row]) continue; + if (table->has_row_names) printf("%*s ", max_width[0], table->row_names[row]); for (col=0; coldata[row][col]); if (truncate && col < table->ncol) printf("..."); diff --git a/src/cross-level/table-cl.h b/src/cross-level/table-cl.h index da7fe84d..60efc4bb 100755 --- a/src/cross-level/table-cl.h +++ b/src/cross-level/table-cl.h @@ -67,7 +67,7 @@ table_t allocate_table(int nrow, int ncol, bool has_row_names, bool has_col_name int find_table_col(table_t *table, const char *name); int find_table_row(table_t *table, const char *name); void init_table(table_t *table); -void print_table(table_t *table, bool truncate); +void print_table(table_t *table, bool truncate, bool skip_rows); void write_table(table_t *table, char *fname, const char *separator, bool skip_rows); void free_table(table_t *table); From 6e9b29ed8553df30438ed5b3a60f187253c9db7b Mon Sep 17 00:00:00 2001 From: David Frantz Date: Fri, 2 Feb 2024 15:35:20 +0100 Subject: [PATCH 18/23] fixed band request --- src/aux-level/_hist.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/aux-level/_hist.c b/src/aux-level/_hist.c index 08e64631..899c4368 100644 --- a/src/aux-level/_hist.c +++ b/src/aux-level/_hist.c @@ -98,7 +98,7 @@ int opt; break; case 'b': args->band = atoi(optarg); - if (args->band < 0){ + if (args->band < 1){ fprintf(stderr, "Band must be >= 1\n"); usage(argv[0], FAILURE); } @@ -143,7 +143,7 @@ int main(int argc, char *argv[]){ args_t args; GDALDatasetH fp; GDALRasterBandH band; -int i, j, nx, ny; +int i, j, nx, ny, nbands; short *line = NULL; short nodata; int has_nodata; @@ -159,11 +159,14 @@ int row; if ((fp = GDALOpen(args.file_input, GA_ReadOnly)) == NULL){ fprintf(stderr, "could not open %s.\n", args.file_input); exit(1);} - nx = GDALGetRasterXSize(fp); - ny = GDALGetRasterYSize(fp); - + nx = GDALGetRasterXSize(fp); + ny = GDALGetRasterYSize(fp); + nbands = GDALGetRasterCount(fp); + alloc((void**)&line, nx, sizeof(short)); + if (args.band > nbands){ + fprintf(stderr, "Input image has %d band(s), band %d was requested.\n", nbands, args.band); exit(1);} band = GDALGetRasterBand(fp, args.band); nodata = (short)GDALGetRasterNoDataValue(band, &has_nodata); From b9ea0a5f22006d243a46b1986e9ffe9a75338528 Mon Sep 17 00:00:00 2001 From: David Frantz Date: Fri, 2 Feb 2024 15:35:37 +0100 Subject: [PATCH 19/23] usage format --- rstats/force-sample-size.r | 4 ---- 1 file changed, 4 deletions(-) diff --git a/rstats/force-sample-size.r b/rstats/force-sample-size.r index 3ede6d5f..ef8cbfe1 100755 --- a/rstats/force-sample-size.r +++ b/rstats/force-sample-size.r @@ -47,15 +47,11 @@ usage <- function(exit){ " -i = show program's purpose\n", "\n", " -e error = standard error of the estimated overall accuracy (default: 0.01)\n", - "\n", " -s min_size = minimum sample size per class (default: 50)\n", - "\n", " -o output-file = output file path with extension,\n", " defaults to './sample-size.csv'\n", - "\n", " -c count-file = csv table with pixel counts per class\n", " 2 columns named class and count", - "\n", " -u user_acc-file = csv table with expected user accuracy per class\n", " 2 columns named class and UA", "\n" From 611c0ec841ee55f9f045e7a439c31fa6191dda70 Mon Sep 17 00:00:00 2001 From: David Frantz Date: Fri, 2 Feb 2024 15:36:14 +0100 Subject: [PATCH 20/23] finalized map_accuracy script, commenting still to do --- rstats/force-map-accuracy.r | 210 +++++++++++++++++------------------- 1 file changed, 101 insertions(+), 109 deletions(-) diff --git a/rstats/force-map-accuracy.r b/rstats/force-map-accuracy.r index f0e54a95..238d9915 100755 --- a/rstats/force-map-accuracy.r +++ b/rstats/force-map-accuracy.r @@ -22,9 +22,16 @@ info <- "Compute map accuracy and area statistics.\n" # load libraries #################################################### library(dplyr) +library(sf) library(getopt) +# program name ###################################################### +progname <<- + get_Rscript_filename() %>% + basename() + + # input ############################################################# # usage function in same style as other FORCE tools, @@ -33,10 +40,10 @@ usage <- function(exit){ message <- c( sprintf( - "Usage: %s [-h] [-v] [-i] [-o output-file] -c count-file \n", - get_Rscript_filename() + "Usage: %s [-h] [-v] [-i] [-o output-file] \n", + progname ), - " -m map-file -r reference-file -a pixel-area\n", + " -c count-file -s sample-file -a pixel-area\n", "\n", " -h = show this help\n", " -v = show version\n", @@ -44,21 +51,14 @@ usage <- function(exit){ "\n", " -o output-file = output file path with extension,\n", " defaults to './accuracy-assessment.txt'\n", - "\n", " -c count-file = csv table with pixel counts per class\n", - " 2 columns named class and count", - "\n", - " -m map-file = csv table with predicted class labels\n", - " 2 columns named ID and map", - "\n", - " -r reference-file = csv table with reference class labels\n", - " 2 columns named ID and reference", - "\n", + " 2 columns named class and count\n", + " -s sample-file = vector file with predicted and reference class labels\n", + " 2 columns named label_map and label_reference\n", " -a pixel-area = area of one pixel in desired reporting unit, e.g.\n", " 100 for a Sentinel-2 based map to be reported in m², or\n", " 0.01 for a Sentinel-2 based map to be reported in ha\n", - "\n", - + "\n" ) cat( @@ -92,7 +92,7 @@ exit_with_error <- function(argument) { } file_existing <- function(path) { - if (!file.exist(path)){ + if (!file.exists(path)) { cat( sprintf("file %s does not exist\n", path), file = stderr() @@ -108,11 +108,10 @@ spec <- matrix( "info", "i", 0, "logical", "output", "o", 2, "character", "counts", "c", 1, "character", - "map", "m", 1, "character", - "reference", "r", 1, "character", + "sample", "s", 1, "character", "pixel_area", "a", 1, "numeric" - ), - byrow = TRUE, + ), + byrow = TRUE, ncol = 4 ) @@ -120,55 +119,36 @@ opt <- getopt(spec) if (!is.null(opt$help)) usage() if (!is.null(opt$info)) exit_normal(info) -if (!is.null(opt$version)) exit_normal("Printing function not implemented yet. Sorry.\n") +if (!is.null(opt$version)) exit_normal("Printing function not implemented yet. Sorry.") if (is.null(opt$counts)) exit_with_error("count-file is missing!") -if (is.null(opt$map)) exit_with_error("map-file is missing!") -if (is.null(opt$reference)) exit_with_error("reference-file is missing!") -if (is.null(opt$area)) exit_with_error("pixel-area is missing!") +if (is.null(opt$sample)) exit_with_error("sample-file is missing!") file_existing(opt$counts) -file_existing(opt$map) -file_existing(opt$reference) +file_existing(opt$sample) if (is.null(opt$output)) opt$output <- "accuracy-assessment.txt" -cnt <- read.csv(opt$counts) -map <- read.csv(opt$map) -ref <- read.csv(opt$reference) +# read data +count <- read.csv(opt$counts) +sample <- read_sf(opt$sample) + # columns OK? -if (!"ID" %in% colnames(map)) exit_with_error("ID column in map-file missing") -if (!"ID" %in% colnames(ref)) exit_with_error("ID column in reference-file missing") -if (!"map" %in% colnames(map)) exit_with_error("map column in map-file missing") -if (!"reference" %in% colnames(ref)) exit_with_error("reference column in reference-file missing") -if (!"class" %in% colnames(cnt)) exit_with_error("class column in count-file missing") -if (!"count" %in% colnames(cnt)) exit_with_error("count column in count-file missing") +if (!"label_map" %in% colnames(sample)) exit_with_error("label_map column in sample-file missing") +if (!"label_reference" %in% colnames(sample)) exit_with_error("label_reference column in sample-file missing") +if (!"class" %in% colnames(count)) exit_with_error("class column in count-file missing") +if (!"count" %in% colnames(count)) exit_with_error("count column in count-file missing") # main thing ######################################################## -# join input tables -table <- - reference %>% - inner_join( - map, - by = "ID" - ) - -# join worked? -if (nrow(table) != nrow(reference)){ - exit_with_error("map and reference files could not be joined") -} - -table <- data.frame(ID = 1:1000, map = round(runif(1000, 8, 12)), reference = round(runif(1000, 8, 12))) - # get all classes -classes <- +classes <- c( - table$map, - table$reference + sample$label_map, + sample$label_reference ) %>% unique() %>% sort() @@ -176,13 +156,13 @@ n_classes <- length(classes) # initialize confusion matrix -confusion_counts <- +confusion_counts <- matrix( - NA, - n_classes, - n_classes, + NA, + n_classes, + n_classes, dimnames = list( - map = classes, + map = classes, reference = classes ) ) @@ -191,33 +171,17 @@ confusion_counts <- for (m in 1:n_classes){ for (r in 1:n_classes){ - confusion_counts[m, r] <- - table %>% + confusion_counts[m, r] <- + sample %>% filter( - map == classes[m] & - reference == classes[r] + label_map == classes[m] & + label_reference == classes[r] ) %>% nrow() } } -classes <- 1:4 -n_classes <- 4 -confusion_counts <- matrix( - c( - 150, 12, 1, 2, - 32, 100, 21, 3, - 0, 32, 120, 0, - 0, 0, 5, 130 - ), byrow = TRUE, - n_classes, - n_classes, - dimnames = list( - map = classes, - reference = classes - ) -) acc_metrics <- function(confusion_matrix) { @@ -261,19 +225,15 @@ acc_metrics <- function(confusion_matrix) { } - -cnt <- data.frame(class = 1:4, count = c(20000,200000,300000,350000)) -opt <- list(pixel_area = 30^2/10000, output = "accuracy-assessment.txt") # ha - # compute propoertional area per class, area in reporting unit, and sort the classes -cnt <- - cnt %>% +count <- + count %>% arrange(class) %>% mutate(weight = count / sum(count)) %>% mutate(area = count * opt$pixel_area) # classes should now align with map/reference dataset -if (!any(cnt$class == classes)) +if (!any(count$class == classes)) exit_with_error("classes in file-count do not match with map or reference classes") # Olofsson et al. 2013, eq. 1 @@ -286,7 +246,7 @@ confusion_adjusted <- byrow = FALSE ) * matrix( - cnt$weight, + count$weight, n_classes, n_classes, byrow = FALSE @@ -296,7 +256,7 @@ confusion_adjusted <- # Olofsson et al. 2013, eq. 2 area_adjusted <- - colSums(confusion_adjusted) * sum(cnt$area) + colSums(confusion_adjusted) * sum(count$area) @@ -322,7 +282,7 @@ matrix( byrow = FALSE ) * matrix( - cnt$weight**2, + count$weight**2, n_classes, n_classes, byrow = FALSE @@ -330,9 +290,8 @@ matrix( } %>% colSums() %>% sqrt() %>% - `*`(sum(cnt$area)) %>% + `*`(sum(count$area)) %>% `*`(1.96) -confidence_area_adjusted # Olofsson et al. 2013, eq. 6-8 @@ -340,7 +299,7 @@ acc_traditional <- acc_metrics(confusion_counts) acc_adjusted <- acc_metrics(confusion_adjusted) # Olofsson et al. 2014, eq. 5 -oa_se <- sum(cnt$weight**2 * acc_adjusted$ua * (1 - acc_adjusted$ua) / (rowSums(confusion_counts) - 1)) %>% +oa_se <- sum(count$weight**2 * acc_adjusted$ua * (1 - acc_adjusted$ua) / (rowSums(confusion_counts) - 1)) %>% sqrt() %>% `*`(1.96) @@ -355,7 +314,7 @@ sqrt() %>% Nj <- { matrix( - cnt$count, + count$count, n_classes, n_classes, byrow = FALSE @@ -371,7 +330,7 @@ Nj <- colSums() -term1 <- cnt$count**2 * +term1 <- count$count**2 * (1 - acc_adjusted$pa)**2 * acc_adjusted$ua * (1 - acc_adjusted$ua) / @@ -386,7 +345,7 @@ diag(leave_class_out) <- 0 term2 <- { matrix( - cnt$count**2, + count$count**2, n_classes, n_classes, byrow = FALSE @@ -431,24 +390,57 @@ sqrt() %>% -confusion_counts -confusion_adjusted - -acc_traditional -acc_adjusted - -cnt$area -area_adjusted -confidence_area_adjusted - -oa_se -pa_se -ua_se - fo <- file(opt$output, "w") +sink(file = fo, append = TRUE, type = "output") + +cat("Traditional Accuracy assessment\n") +cat("-----------------------------------------------------------------\n") +cat("\n") +cat("Traditional confusion matrix, expressed in terms of pixel counts:\n") +cat("\n") +print(confusion_counts) +cat("\n") +cat(sprintf("Overall Accuracy (OA): %.2f\n", acc_traditional$oa)) +cat("\n") +print_stats <- cbind( + sprintf("%.2f", acc_traditional$pa), + sprintf("%.2f", acc_traditional$ua), + sprintf("%.2f", acc_traditional$oe), + sprintf("%.2f", acc_traditional$ce) +) +colnames(print_stats) <- c("Producer's Accuracy", "User's Accuracy", "Error of Omission", "Error of Commission") +rownames(print_stats) <- classes +print(print_stats, quote = FALSE) +cat("\n") +cat("\n") +cat("Area-Adjusted Accuracy\n") +cat("-----------------------------------------------------------------\n") +cat("\n") +cat("Confusion matrix, expressed in terms of proportion of area:\n") +cat("\n") +print(confusion_adjusted) +cat("\n") +cat(sprintf("Overall Accuracy (OA): %.2f \u00b1 %.2f\n", acc_adjusted$oa, oa_se)) +cat("\n") +print_stats <- cbind( + sprintf("%.2f \u00b1 %.2f", acc_adjusted$pa, pa_se), + sprintf("%.2f \u00b1 %.2f", acc_adjusted$ua, ua_se), + sprintf("%.2f \u00b1 %.2f", acc_adjusted$oe, pa_se), + sprintf("%.2f \u00b1 %.2f", acc_adjusted$ce, ua_se) +) +colnames(print_stats) <- c("Producer's Accuracy", "User's Accuracy", "Error of Omission", "Error of Commission") +rownames(print_stats) <- classes +print(print_stats, quote = FALSE) +cat("\n") +print_area <- cbind( + sprintf("%.2f \u00b1 %.2f", area_adjusted, confidence_area_adjusted), + sprintf("%.2f", count$area) +) +colnames(print_area) <- c("Estimated Area", "Mapped Area") +rownames(print_area) <- classes +print(print_area, quote = FALSE) -cat("# Accuracy assessment", file = fo) - +sink(file = NULL) close(fo) From 7a36cb02e967c237ea8565af4fd24027580b2b1f Mon Sep 17 00:00:00 2001 From: David Frantz Date: Fri, 2 Feb 2024 15:36:47 +0100 Subject: [PATCH 21/23] finalized stratified sampling, code still messy, commenting still necessary --- src/aux-level/_stratified-sample.c | 262 +++++++++++++++++++++++------ 1 file changed, 212 insertions(+), 50 deletions(-) diff --git a/src/aux-level/_stratified-sample.c b/src/aux-level/_stratified-sample.c index 34f01b3f..70145974 100644 --- a/src/aux-level/_stratified-sample.c +++ b/src/aux-level/_stratified-sample.c @@ -37,9 +37,12 @@ This program computes a histogram of the given image #include "../cross-level/konami-cl.h" #include "../cross-level/string-cl.h" #include "../cross-level/table-cl.h" +#include "../cross-level/dir-cl.h" /** Geospatial Data Abstraction Library (GDAL) **/ #include "gdal.h" // public (C callable) GDAL entry points +#include "ogr_spatialref.h" // coordinate systems services +#include "ogr_api.h" // OGR geometry and feature definition typedef struct { @@ -49,26 +52,35 @@ typedef struct { char file_input_sample_size[NPOW_10]; char file_output[NPOW_10]; char column_sample_size[NPOW_10]; + char format[NPOW_10]; } args_t; void usage(char *exe, int exit_code){ - printf("Usage: %s [-h] [-v] [-i] [-b band] [-o output-file] input-image\n", exe); + printf("Usage: %s [-h] [-v] [-i] [-b band] [-o output-file] [-f format] \n", exe); + printf(" classification sample-size-table sample-size-column \n"); printf("\n"); printf(" -h = show this help\n"); printf(" -v = show version\n"); printf(" -i = show program's purpose\n"); printf("\n"); - printf(" -b band = band to use,\n"); + printf(" -b band = band to use in classification image,\n"); printf(" defaults to 1\n"); - printf("\n"); printf(" -o output-file = output file path with extension,\n"); - printf(" defaults to './histogram.csv'\n"); + printf(" defaults to './sample.gpkg'\n"); + printf(" -f format = output format (GDAL vector driver short name)\n"); + printf(" defaults to GPKG\n"); printf("\n"); printf(" Positional arguments:\n"); - printf(" - 'input-image': image for computing the histogram\n"); + printf(" - 'classification': classification image\n"); + printf(" - 'sample-size-table': table with sample size per class with at least 3 named columns\n"); + printf(" class: class ID, needs to match classes in classification\n"); + printf(" count: number of pixels per class\n"); + printf(" ***: number of sample points per class;.\n"); + printf(" column name is variable (see next argument)\n"); + printf(" - 'sample-size-column': column name of sample size\n"); printf("\n"); exit(exit_code); @@ -78,16 +90,19 @@ void usage(char *exe, int exit_code){ void parse_args(int argc, char *argv[], args_t *args){ int opt; +bool o, f; opterr = 0; // default parameters - copy_string(args->file_output, NPOW_10, "histogram.csv"); + copy_string(args->file_output, NPOW_10, "sample.gpkg"); + copy_string(args->format, NPOW_10, "GPKG"); args->band = 1; + o = f = false; // optional parameters - while ((opt = getopt(argc, argv, "hvio:b:")) != -1){ + while ((opt = getopt(argc, argv, "hvio:f:b:")) != -1){ switch(opt){ case 'h': usage(argv[0], SUCCESS); @@ -100,9 +115,13 @@ int opt; case 'o': copy_string(args->file_output, NPOW_10, optarg); break; + case 'f': + copy_string(args->format, NPOW_10, optarg); + f = true; + break; case 'b': args->band = atoi(optarg); - if (args->band < 0){ + if (args->band < 1){ fprintf(stderr, "Band must be >= 1\n"); usage(argv[0], FAILURE); } @@ -141,6 +160,14 @@ int opt; usage(argv[0], FAILURE); } + if ((!o && f) || (!f && o)){ + fprintf(stderr, "If -f is given, -o need to be given, too.\n"); usage(argv[0], FAILURE); + } + + if (fileexist(args->file_output)){ + fprintf(stderr, "sample already exists: %s.\n", args->file_output); usage(argv[0], FAILURE); + } + return; } @@ -149,17 +176,39 @@ int main(int argc, char *argv[]){ args_t args; GDALDatasetH fp; GDALRasterBandH band; -int i, j, nx, ny; +int i, j, nx, ny, nbands; short *line = NULL; short nodata; int has_nodata; table_t sample_size; int col_size, col_class, col_count, row; +char *wkt = NULL; +char projection[NPOW_10]; +double geotran[6]; int offset = SHRT_MAX+1; int length = USHRT_MAX+1; double **dictionary = NULL; -enum { _CLASS_, _SIZE_, _PROBABILITY_, _COUNT_, _DICT_LENGTH_ }; +enum { _DICT_CLASS_, _DICT_SIZE_, _DICT_PROBABILITY_, _DICT_COUNT_, _DICT_LENGTH_ }; + +table_t sample; +int count = 0; +enum { _SAMPLE_CLASS_, _SAMPLE_X_, _SAMPLE_Y_, _SAMPLE_PROBABILITY_, _SAMPLE_LENGTH_ }; + +double probability; + + +int class_ID, size_target, size_sampled; +int row2, row_max; +double max; +OGRSFDriverH driver; +OGRSpatialReferenceH srs; +GDALDatasetH fp_out; +OGRLayerH layer; +OGRFieldDefnH field; +OGRFeatureH feature; +OGRGeometryH point; + parse_args(argc, argv, &args); @@ -171,44 +220,51 @@ enum { _CLASS_, _SIZE_, _PROBABILITY_, _COUNT_, _DICT_LENGTH_ }; if ((col_count = find_table_col(&sample_size, "count")) < 0){ printf("could not find column name %s in file-sample-size\n", "count"); exit(FAILURE);} - print_table(&sample_size, false); + #ifdef FORCE_DEBUG + print_table(&sample_size, false, false); printf("column %s in column %d\n", "class", col_class); printf("column %s in column %d\n", "count", col_count); printf("column %s in column %d\n", args.column_sample_size, col_size); - printf("min/max class: %d/%d\n", (int)sample_size.min[col_class], (int)sample_size.max[col_class]); - - + #endif alloc_2D((void***)&dictionary, length, _DICT_LENGTH_, sizeof(double)); for (row=0; row nbands){ + fprintf(stderr, "Input image has %d band(s), band %d was requested.\n", nbands, args.band); exit(1);} alloc((void**)&line, nx, sizeof(short)); band = GDALGetRasterBand(fp, args.band); @@ -219,17 +275,9 @@ enum { _CLASS_, _SIZE_, _PROBABILITY_, _COUNT_, _DICT_LENGTH_ }; exit(1); } - int cnt = 0; -double probability; - /** FILE *fp_ = NULL; - fp_ = fopen("test.txt", "w"); - for (i=0; i<346589861; i++){ - probability = (double)rand()/(double)RAND_MAX; - fprintf(fp_, "%f\n", probability); - } - fclose(fp_); - exit(0); - **/ + + srand(time(NULL)); + for (i=0; i= probability){ + dictionary[line[j] + offset][_DICT_COUNT_]++; + //printf("%d samples pre-selected (row %d, col %d)\n", count, i, j); + + if (count < sample.nrow){ + sample.data[count][_SAMPLE_CLASS_] = line[j]; + sample.data[count][_SAMPLE_X_] = geotran[0] + j * geotran[1] + geotran[1] * 0.5; + sample.data[count][_SAMPLE_Y_] = geotran[3] + i * geotran[5] + geotran[5] * 0.5; + sample.data[count][_SAMPLE_PROBABILITY_] = probability; + sample.row_mask[count] = true; + count++; + } - if (dictionary[line[j] + offset][_PROBABILITY_] >= probability){ - dictionary[line[j] + offset][_COUNT_]++; - printf("%d samples selected (row %d, col %d)\n", cnt++, i, j); } - } } - for (row=0; row max){ + max = sample.data[row2][_SAMPLE_PROBABILITY_]; + row_max = row2; + } + + } + + printf("class %d, size %d <-> %d, row_max %d, max_prob %f\n", + class_ID, size_target, size_sampled, row_max, max); + + sample.row_mask[row_max] = false; + + size_sampled--; + } + + } + + + + OGRRegisterAll(); + + // get driver + if ((driver = OGRGetDriverByName(args.format)) == NULL){ + fprintf(stderr, "%s driver is not available.\n", args.format); usage(argv[0], FAILURE); + } + + + // create file + if ((fp_out = OGR_Dr_CreateDataSource(driver, args.file_output, NULL)) == NULL){ + printf("Error creating output file.\n"); return FAILURE;} + + + // set output projection + srs = OSRNewSpatialReference(NULL); + OSRImportFromWkt(srs, &wkt); + + // create layer + if ((layer = OGR_DS_CreateLayer(fp_out, "sample", srs, + wkbPoint, NULL)) == NULL){ + printf("Error creating layer.\n"); return FAILURE;} + + // add field + field = OGR_Fld_Create("FID", OFTInteger); + OGR_Fld_SetWidth(field, 4); + if (OGR_L_CreateField(layer, field, TRUE) != OGRERR_NONE){ + printf("Error creating field.\n"); return FAILURE;} + OGR_Fld_Destroy(field); + + // add field + field = OGR_Fld_Create("label_map", OFTInteger); + OGR_Fld_SetWidth(field, 4); + if (OGR_L_CreateField(layer, field, TRUE) != OGRERR_NONE){ + printf("Error creating field.\n"); return FAILURE;} + OGR_Fld_Destroy(field); + + // add field + field = OGR_Fld_Create("label_reference", OFTInteger); + OGR_Fld_SetWidth(field, 4); + if (OGR_L_CreateField(layer, field, TRUE) != OGRERR_NONE){ + printf("Error creating field.\n"); return FAILURE;} + OGR_Fld_Destroy(field); + + count = 0; + + for (row=0; row Date: Fri, 2 Feb 2024 15:39:32 +0100 Subject: [PATCH 22/23] install R validation scripts --- Makefile | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 83b2fcb8..aa844159 100755 --- a/Makefile +++ b/Makefile @@ -119,7 +119,7 @@ lower: table_ll param_ll meta_ll cube_ll equi7_ll glance7_ll atc_ll sunview_ll r higher: param_hl progress_hl tasks_hl read-aux_hl read-ard_hl quality_hl bap_hl level3_hl cso_hl tsa_hl index_hl interpolate_hl stm_hl fold_hl standardize_hl pheno_hl polar_hl trend_hl ml_hl texture_hl lsm_hl lib_hl sample_hl imp_hl cfimp_hl l2imp_hl spec-adjust_hl pyp_hl rsp_hl udf_hl aux: param_aux param_train_aux train_aux exe: force force-parameter force-qai-inflate force-tile-finder force-tabulate-grid force-l2ps force-higher-level force-train force-lut-modis force-mdcp force-stack force-import-modis force-cube-init force-hist force-stratified-sample -.PHONY: temp all install install_ bash python external clean build check +.PHONY: temp all install install_ bash python rstats external clean build check ### TEMP @@ -478,6 +478,8 @@ python: temp rstats: temp cp $(DR)/force-level2-report.Rmd $(TM)/force-level2-report.Rmd + cp $(DR)/force-sample-size.r $(TB)/force-sample-size + cp $(DR)/force-map-accuracy.r $(TB)/force-map-accuracy install: bash python rstats external install_ clean check From 43c5e6553053b2c32d9d2e1de091e634d85ffdbd Mon Sep 17 00:00:00 2001 From: David Frantz Date: Fri, 2 Feb 2024 15:43:31 +0100 Subject: [PATCH 23/23] updated docs --- docs/source/history/vdev.rst | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/docs/source/history/vdev.rst b/docs/source/history/vdev.rst index a54dc04f..460cc63b 100755 --- a/docs/source/history/vdev.rst +++ b/docs/source/history/vdev.rst @@ -68,7 +68,20 @@ Develop version - new auxilliary program `force-hist`. This program computes the histogram of image values (can be vrt) and writes a csv table. - This is intended to be used in a validation workflow (planned feature). + This is intended to be used in a validation workflow. + + - new auxilliary program `force-sample-size`. + This program computes the required sample size following Olofsson et al. 2013/2014, + which should be used to properly validate a classification map. + This is intended to be used in a validation workflow. + + - new auxilliary program `force-stratified-sample`. + This program draws a stratified random sample based on a classification map and sample size computations. + This is intended to be used in a validation workflow. + + - new auxilliary program `force-map-accuracy`. + This program computes area-adjusted accuracies following Olofsson et al. 2013/2014. + This is intended to be used in a validation workflow. - ``force-tabulate-grid`` has been updated to produce properly named output files. The default output file name is ``grid.kml``, created in the current directory, using the ``KML`` format.