Merge pull request #7 from madgik/dev_exareme_v19

Changes to the algorithms for exareme v19
madgik · Apr 1, 2019 · 8a7ac57 · 8a7ac57
2 parents c31f690 + 5b1a5a3
commit 8a7ac57
Show file tree

Hide file tree

Showing 48 changed files with 2,638 additions and 780 deletions.
diff --git a/WP_LINEAR_REGRESSION/1/global.template.sql → LINEAR_REGRESSION/1/global.template.sql b/WP_LINEAR_REGRESSION/1/global.template.sql → LINEAR_REGRESSION/1/global.template.sql
diff --git a/WP_LINEAR_REGRESSION/1/local.template.sql → LINEAR_REGRESSION/1/local.template.sql b/WP_LINEAR_REGRESSION/1/local.template.sql → LINEAR_REGRESSION/1/local.template.sql
@@ -1,19 +1,27 @@
 requirevars 'defaultDB' 'input_local_tbl' 'x' 'y' 'dataset';
 attach database '%{defaultDB}' as defaultDB;
 
+-- It is used for testing
+--drop table if exists mydata;
+--create table mydata as select * from (file header:t 'epfl_flattable.csv');
+--var 'input_local_tbl' 'mydata';
+--var 'y' 'av45';
+--var 'x' 'adnicategory*apoe4+subjectage+minimentalstate+gender';
+--var 'dataset' 'adni';
+-------------------------------
+
 drop table if exists datasets;
 create table datasets as
 select strsplitv('%{dataset}','delimiter:,') as d;
 
 drop table if exists xvariables;
 create table xvariables as
-select strsplitv(regexpr("\+|\:|\*|\-",'%{x}',"+") ,'delimiter:+') as xname;		
+select strsplitv(regexpr("\+|\:|\*|\-",'%{x}',"+") ,'delimiter:+') as xname;
 
---1. Keep only the correct colnames
-drop table if exists localinputtbl_1; 
-create table localinputtbl_1 as
-select __rid as rid,__colname as colname, tonumber(__val) as val
-from %{input_local_tbl};
+--1. Keep only the correct colnames from a flat table
+drop table if exists localinputtbl_1a;
+create table localinputtbl_1a as
+select rid,colname, val from (toeav select * from %{input_local_tbl});
 
 --Check if x is empty
 var 'empty' from select case when (select '%{x}')='' then 0 else 1 end;
@@ -27,39 +35,44 @@ emptyfield '%{empty}';
 var 'empty' from select case when (select '%{dataset}')='' then 0 else 1 end;
 emptyset '%{empty}';
 ------------------
-create table columnexist as setschema 'colname' select distinct(colname) from (postgresraw);
+create table columnexist as setschema 'colname' select distinct(colname) as colname2 from localinputtbl_1a;
 --Check if x exist in dataset
 var 'counts' from select count(distinct(colname)) from columnexist where colname in (select xname from xvariables);
 var 'result' from select count(xname) from xvariables;
-var 'valExists' from select case when(select %{counts})=%{result} then 1 else 0 end;			
+var 'valExists' from select case when(select %{counts})=%{result} then 1 else 0 end;
 vars '%{valExists}';
 --Check if y exist in dataset
 var 'valExists' from select case when (select exists (select colname from columnexist where colname='%{y}'))=0 then 0 else 1 end;
 vars '%{valExists}';
 ----------
+--1. Keep only the correct colnames from a flat table
+drop table if exists localinputtbl_1;
+create table localinputtbl_1 as
+select rid,colname, tonumber(val) as val from localinputtbl_1a
+where colname in (select * from xvariables) or colname  ='%{y}' or colname ='dataset';
 
 --2. Keep only patients of the correct dataset
-drop table if exists localinputtbl_2; 
+drop table if exists localinputtbl_2;
 create table localinputtbl_2 as
 select rid, colname, val
 from localinputtbl_1
-where rid in (select distinct rid  
-              from localinputtbl_1 
+where rid in (select distinct rid
+              from localinputtbl_1
               where colname ='dataset' and val in (select d from datasets));
 
 delete from localinputtbl_2
 where colname = 'dataset';
 
---3.  Delete patients with null values 
-drop table if exists localinputtbl; 
+--3.  Delete patients with null values
+drop table if exists localinputtbl;
 create table localinputtbl as
 select rid, colname, val
 from localinputtbl_2
-where rid not in (select distinct rid from localinputtbl_2 
+where rid not in (select distinct rid from localinputtbl_2
                   where val is null or val = '' or val = 'NA')
 order by rid, colname, val;
 
---y value:Real,Float or Integer.   
+--y value:Real,Float or Integer.
 --Some values could be null (type:Text). We want to make sure that if "rid-colname('%{y}')-val" exist in a node, colname type is not "Text". That is why
 --we previously Delete patients with null values.
 var 'type' from select case when (select distinct(typeof(tonumber(val))) as val from localinputtbl where colname='%{y}')='integer' or  (select distinct(typeof(tonumber(val))) as val from localinputtbl where colname = '%{y}')='real' or (select distinct(typeof(tonumber(val))) as val from localinputtbl where colname='%{y}')='float' then 1 else 0 end;
@@ -72,9 +85,9 @@ vartypey '%{final}';
 var 'minimumrecords' 10;
 create table emptytable(rid  text primary key, colname, val);
 var 'privacycheck' from select case when (select count(distinct(rid)) from localinputtbl) < %{minimumrecords} then 0 else 1 end;
-create table localinputtbl2 as setschema 'rid , colname, val' 
+create table localinputtbl2 as setschema 'rid , colname, val'
 select * from localinputtbl where %{privacycheck}=1
-union 
+union
 select * from emptytable where %{privacycheck}=0;
 drop table if exists localinputtbl;
 alter table localinputtbl2 rename to localinputtbl;
@@ -88,7 +101,7 @@ select * from localinputtbl
 where colname in (select xname from xvariables) or colname = "%{y}";
 
 -- A. Dummy code of categorical variables
-drop table if exists T; 
+drop table if exists T;
 create table T as
 select rid, colname||'('||val||')' as colname, 1 as val
 from input_local_tbl_LR
@@ -115,14 +128,14 @@ select modelFormulae(rid,colname,val, "%{x}") from input_local_tbl_LR group by r
 
 var 'colnames' from select jmergeregexp(jgroup(colname)) from (select colname from localinputtbl group by colname having count(distinct val)=1); --NEW
 drop table if exists defaultDB.deletedcolumns; --NEW
-create table defaultDB.deletedcolumns as setschema 'colname' 
+create table defaultDB.deletedcolumns as setschema 'colname'
 select distinct colname from defaultDB.input_local_tbl_LR_Final where regexprmatches('%{colnames}' ,colname); --NEW
 
 delete from  defaultDB.input_local_tbl_LR_Final --NEW
 where colname in (select * from defaultDB.deletedcolumns); --NEW
 
 insert into defaultDB.input_local_tbl_LR_Final
-select rid,colname,val from input_local_tbl_LR where colname = '%{y}';  
+select rid,colname,val from input_local_tbl_LR where colname = '%{y}';
 --
 insert into defaultDB.input_local_tbl_LR_Final
 select distinct rid as rid,'(Intercept)' as colname, 1.0 as val from input_local_tbl_LR;

diff --git a/WP_LINEAR_REGRESSION/2/global.template.sql → LINEAR_REGRESSION/2/global.template.sql b/WP_LINEAR_REGRESSION/2/global.template.sql → LINEAR_REGRESSION/2/global.template.sql
diff --git a/WP_LINEAR_REGRESSION/2/local.template.sql → LINEAR_REGRESSION/2/local.template.sql b/WP_LINEAR_REGRESSION/2/local.template.sql → LINEAR_REGRESSION/2/local.template.sql
diff --git a/WP_LINEAR_REGRESSION/3/global.template.sql → LINEAR_REGRESSION/3/global.template.sql b/WP_LINEAR_REGRESSION/3/global.template.sql → LINEAR_REGRESSION/3/global.template.sql
diff --git a/WP_LINEAR_REGRESSION/3/local.template.sql → LINEAR_REGRESSION/3/local.template.sql b/WP_LINEAR_REGRESSION/3/local.template.sql → LINEAR_REGRESSION/3/local.template.sql
diff --git a/WP_LINEAR_REGRESSION/4/global.template.sql → LINEAR_REGRESSION/4/global.template.sql b/WP_LINEAR_REGRESSION/4/global.template.sql → LINEAR_REGRESSION/4/global.template.sql
diff --git a/WP_LINEAR_REGRESSION/4/local.template.sql → LINEAR_REGRESSION/4/local.template.sql b/WP_LINEAR_REGRESSION/4/local.template.sql → LINEAR_REGRESSION/4/local.template.sql
diff --git a/LINEAR_REGRESSION/properties.json b/LINEAR_REGRESSION/properties.json
@@ -0,0 +1,40 @@
+{
+	"name": "LINEAR_REGRESSION",
+	"desc": "",
+	"type": "multiple_local_global",
+	"status": "enabled",
+	"parameters": [{
+			"name": "x",
+			"desc": "The right part of the linear predictor function, that contains the indepedent variables in an equation supporting the symbols +, :, * , R notation.The independent variables are variables of the input dataset and they should be Real, Float, Integer or Text. It cannot be empty",
+			"type": "column",
+			"value": "adnicategory*apoe4+subjectage+minimentalstate+gender",
+			"valueNotBlank": true,
+			"valueMultiple": true,
+			"valueType": "string"
+		}, {
+			"name": "y",
+			"desc": "The left part of the linear predictor function, that contains the dependent variable.The dependent variable is a variable of the input dataset and it should be Real, Float or Integer. It cannot be empty",
+			"type": "column",
+			"value": "av45",
+			"valueNotBlank": true,
+			"valueMultiple": false,
+			"valueType": "string"
+		}, {
+			"name": "dataset",
+			"desc": "It contains the names of one or more datasets, in which the algorithm will be executed. It cannot be empty.",
+			"type": "dataset",
+			"value": "adni,ppmi",
+			"valueNotBlank": true,
+			"valueMultiple": true,
+			"valueType": "string"
+		}, {
+			"name": "filter",
+			"desc": "",
+			"type": "filter",
+			"value": "",
+			"valueNotBlank": false,
+			"valueMultiple": true,
+			"valueType": "string"
+		}
+	]
+}
diff --git a/WP_LIST_DATASET/global.template.sql → LIST_DATASET/global.template.sql b/WP_LIST_DATASET/global.template.sql → LIST_DATASET/global.template.sql
diff --git a/LIST_DATASET/local.template.sql b/LIST_DATASET/local.template.sql
@@ -0,0 +1,8 @@
+requirevars 'input_local_tbl';
+
+select execprogram(null, "/root/exareme/set-local-datasets.sh");
+
+var 'a' from select count(distinct(rid)) as sum1 from (select distinct rid from (toeav select * from %{input_local_tbl}));
+
+var 'b' from select execprogram(null,'cat','/root/exareme/etc/exareme/name');
+select var('a') as sum1, val as val, var('b') as who from (select distinct val from (toeav select * from %{input_local_tbl}) where colname = 'dataset') group by val;
diff --git a/LIST_DATASET/properties.json b/LIST_DATASET/properties.json
@@ -0,0 +1,7 @@
+{
+	"name": "LIST_DATASET",
+	"desc": "",
+	"type": "local_global",
+	"status": "enabled",
+	"parameters": []
+}
diff --git a/LIST_VARIABLES/local.template.sql b/LIST_VARIABLES/local.template.sql
@@ -0,0 +1,12 @@
+requirevars 'input_local_tbl' ;
+
+select jdictgroup('variables', variable) as variables
+from (
+  select jgroup(variable, t) as variable
+  from (
+    select distinct colname as variable, typeof(tonumber(val)) as t
+    from (toeav select * from %{input_local_tbl})
+    where val is not null
+    group by colname
+    )
+);
diff --git a/LIST_VARIABLES/properties.json b/LIST_VARIABLES/properties.json
@@ -0,0 +1,8 @@
+{
+	"name": "LIST_VARIABLES",
+	"desc": "",
+	"type": "local",
+	"status": "enabled",
+	"parameters": [
+	]
+}
diff --git a/PIPELINE_ISOUP_MODEL_TREE_SERIALIZER/local.template.sql b/PIPELINE_ISOUP_MODEL_TREE_SERIALIZER/local.template.sql
@@ -1,6 +1,6 @@
 requirevars 'target_attributes' 'descriptive_attributes' 'input_local_tbl' ;
 
-------- Create the correct dataset 
+------- Create the correct dataset
 drop table if exists datasets;
 create table datasets as
 select strsplitv('%{dataset}','delimiter:,') as d;
@@ -14,8 +14,8 @@ create table columnstable as
 select strsplitv('%{target_attributes},%{descriptive_attributes}' ,'delimiter:,') as xname;
 
 create temp table localinputtbl_1 as
-select __rid as rid,__colname as colname, __val as val
-from %{input_local_tbl};
+select rid,colname,  val from (toeav select * from %{input_local_tbl});
+
 
 var 'target_vars' from
 ( select group_concat('"'||targetname||'"',', ') from targetstable);
@@ -34,12 +34,12 @@ emptyfield '%{empty}';
 var 'empty' from select case when (select '%{dataset}')='' then 0 else 1 end;
 emptyset '%{empty}';
 ------------------
-create table columnexist as setschema 'colname' select distinct(colname) from (postgresraw);
+create table columnexist as setschema 'colname' select distinct(colname) from localinputtbl_1;
 --Check if columns exist
 var 'counts' from select count(distinct(colname)) from columnexist where colname in (select xname from columnstable);
 var 'result' from select count(distinct(xname)) from columnstable;
-var 'valExists' from select case when(select %{counts})=%{result} then 1 else 0 end;			
-vars '%{valExists}'; 
+var 'valExists' from select case when(select %{counts})=%{result} then 1 else 0 end;
+vars '%{valExists}';
 ------
 
 var 'select_vars' from
@@ -53,7 +53,7 @@ create temp table data as select %{select_vars}  from (fromeav select * from loc
 var 'minimumrecords' 10;
 create temp table emptytable as select * from data limit 0;
 var 'privacycheck' from select case when (select count(*) from data) < %{minimumrecords} then 0 else 1 end;
-create temp table safeData as 
+create temp table safeData as
 select * from data where %{privacycheck}=1
 union all
 select * from emptytable where %{privacycheck}=0;

diff --git a/PIPELINE_ISOUP_MODEL_TREE_SERIALIZER/localupdate.template.sql b/PIPELINE_ISOUP_MODEL_TREE_SERIALIZER/localupdate.template.sql
@@ -1,6 +1,6 @@
 requirevars 'prv_output_local_tbl' 'target_attributes' 'descriptive_attributes' 'input_local_tbl';
 
-------- Create the correct dataset 
+------- Create the correct dataset
 drop table if exists datasets;
 create table datasets as
 select strsplitv('%{dataset}','delimiter:,') as d;
@@ -14,8 +14,7 @@ create table columnstable as
 select strsplitv('%{target_attributes},%{descriptive_attributes}' ,'delimiter:,') as xname;
 
 create temp table localinputtbl_1 as
-select __rid as rid,__colname as colname, __val as val
-from %{input_local_tbl};
+select rid,colname,  val from (toeav select * from %{input_local_tbl});
 
 var 'target_vars' from
 ( select group_concat('"'||targetname||'"',', ') from targetstable);
@@ -36,12 +35,12 @@ emptyfield '%{empty}';
 var 'empty' from select case when (select '%{dataset}')='' then 0 else 1 end;
 emptyset '%{empty}';
 ------------------
-create table columnexist as setschema 'colname' select distinct(colname) from (postgresraw);
+create table columnexist as setschema 'colname' select distinct(colname) from localinputtbl_1;
 --Check if columns exist
 var 'counts' from select count(distinct(colname)) from columnexist where colname in (select xname from columnstable);
 var 'result' from select count(distinct(xname)) from columnstable;
-var 'valExists' from select case when(select %{counts})=%{result} then 1 else 0 end;			
-vars '%{valExists}'; 
+var 'valExists' from select case when(select %{counts})=%{result} then 1 else 0 end;
+vars '%{valExists}';
 ------
 
 var 'select_vars' from
@@ -55,7 +54,7 @@ create temp table data as select %{select_vars}  from (fromeav select * from loc
 var 'minimumrecords' 10;
 create temp table emptytable as select * from data limit 0;
 var 'privacycheck' from select case when (select count(*) from data) < %{minimumrecords} then 0 else 1 end;
-create temp table safeData as 
+create temp table safeData as
 select * from data where %{privacycheck}=1
 union all
 select * from emptytable where %{privacycheck}=0;

diff --git a/PIPELINE_ISOUP_MODEL_TREE_SERIALIZER/properties.json b/PIPELINE_ISOUP_MODEL_TREE_SERIALIZER/properties.json
@@ -1,11 +1,42 @@
 {
-  "name" : "PIPELINE_ISOUP_MODEL_TREE_SERIALIZER",
-  "desc" : "PIPELINE_ISOUP_MODEL_TREE_SERIALIZER",
-  "type" : "pipeline",
-  "status" : "disabled",
-  "vtype" : "none",
-  "responseContentType" : "application/visjs+javascript",
-  "parameters" : [ {"name" : "target_attributes", "desc" : "Dependent parameters. One or more can be selected.", "value" : "subjectageyears"}, {"name" : "descriptive_attributes", "desc" : "Independent parameters. One or more can be selected.", "value" : "apoe4,av45"},
-    {"name" : "dataset", "desc" : "It contains the names of one or more datasets, in which the algorithm will be executed. It cannot be empty.", "value" : "adni,chuv_adni"},
-    {"name" : "filter", "desc" : "", "value" : ""}]
+	"name": "PIPELINE_ISOUP_MODEL_TREE_SERIALIZER",
+	"desc": "PIPELINE_ISOUP_MODEL_TREE_SERIALIZER",
+	"type": "pipeline",
+	"status": "disabled",
+	"vtype": "none",
+	"responseContentType": "application/visjs+javascript",
+	"parameters": [{
+			"name": "target_attributes",
+			"desc": "Dependent parameters. One or more can be selected.",
+			"type": "column",
+			"value": "subjectageyears",
+			"valueNotBlank": true,
+			"valueMultiple": true,
+			"valueType": "string"
+		}, {
+			"name": "descriptive_attributes",
+			"desc": "Independent parameters. One or more can be selected.",
+			"type": "column",
+			"value": "apoe4,av45",
+			"valueNotBlank": true,
+			"valueMultiple": true,
+			"valueType": "string"
+		}, {
+			"name": "dataset",
+			"desc": "It contains the names of one or more datasets, in which the algorithm will be executed. It cannot be empty.",
+			"type": "dataset",
+			"value": "adni,chuv_adni",
+			"valueNotBlank": true,
+			"valueMultiple": true,
+			"valueType": "string"
+		}, {
+			"name": "filter",
+			"desc": "",
+			"type": "filter",
+			"value": "",
+			"valueNotBlank": false,
+			"valueMultiple": true,
+			"valueType": "string"
+		}
+	]
 }
diff --git a/PIPELINE_ISOUP_REGRESSION_TREE_SERIALIZER/local.template.sql b/PIPELINE_ISOUP_REGRESSION_TREE_SERIALIZER/local.template.sql
@@ -1,6 +1,6 @@
 requirevars 'target_attributes' 'descriptive_attributes' 'input_local_tbl' ;
 
-------- Create the correct dataset 
+------- Create the correct dataset
 drop table if exists datasets;
 create table datasets as
 select strsplitv('%{dataset}','delimiter:,') as d;
@@ -14,8 +14,7 @@ create table columnstable as
 select strsplitv('%{target_attributes},%{descriptive_attributes}' ,'delimiter:,') as xname;
 
 create temp table localinputtbl_1 as
-select __rid as rid,__colname as colname, __val as val
-from %{input_local_tbl};
+select rid,colname, val from (toeav select * from %{input_local_tbl});
 
 var 'target_vars' from
 ( select group_concat('"'||targetname||'"',', ') from targetstable);
@@ -34,12 +33,12 @@ emptyfield '%{empty}';
 var 'empty' from select case when (select '%{dataset}')='' then 0 else 1 end;
 emptyset '%{empty}';
 ------------------
-create table columnexist as setschema 'colname' select distinct(colname) from (postgresraw);
+create table columnexist as setschema 'colname' select distinct(colname) from localinputtbl_1;
 --Check if columns exist
 var 'counts' from select count(distinct(colname)) from columnexist where colname in (select xname from columnstable);
 var 'result' from select count(distinct(xname)) from columnstable;
-var 'valExists' from select case when(select %{counts})=%{result} then 1 else 0 end;			
-vars '%{valExists}'; 
+var 'valExists' from select case when(select %{counts})=%{result} then 1 else 0 end;
+vars '%{valExists}';
 -------------
 
 var 'select_vars' from
@@ -53,7 +52,7 @@ create temp table data as select %{select_vars}  from (fromeav select * from loc
 var 'minimumrecords' 10;
 create temp table emptytable as select * from data limit 0;
 var 'privacycheck' from select case when (select count(*) from data) < %{minimumrecords} then 0 else 1 end;
-create temp table safeData as 
+create temp table safeData as
 select * from data where %{privacycheck}=1
 union all
 select * from emptytable where %{privacycheck}=0;