Skip to content

Commit

Permalink
Merge pull request #7 from madgik/dev_exareme_v19
Browse files Browse the repository at this point in the history
Changes to the algorithms for exareme v19
  • Loading branch information
sofiakarb authored Apr 1, 2019
2 parents c31f690 + 5b1a5a3 commit 8a7ac57
Show file tree
Hide file tree
Showing 48 changed files with 2,638 additions and 780 deletions.
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,19 +1,27 @@
requirevars 'defaultDB' 'input_local_tbl' 'x' 'y' 'dataset';
attach database '%{defaultDB}' as defaultDB;

-- It is used for testing
--drop table if exists mydata;
--create table mydata as select * from (file header:t 'epfl_flattable.csv');
--var 'input_local_tbl' 'mydata';
--var 'y' 'av45';
--var 'x' 'adnicategory*apoe4+subjectage+minimentalstate+gender';
--var 'dataset' 'adni';
-------------------------------

drop table if exists datasets;
create table datasets as
select strsplitv('%{dataset}','delimiter:,') as d;

drop table if exists xvariables;
create table xvariables as
select strsplitv(regexpr("\+|\:|\*|\-",'%{x}',"+") ,'delimiter:+') as xname;
select strsplitv(regexpr("\+|\:|\*|\-",'%{x}',"+") ,'delimiter:+') as xname;

--1. Keep only the correct colnames
drop table if exists localinputtbl_1;
create table localinputtbl_1 as
select __rid as rid,__colname as colname, tonumber(__val) as val
from %{input_local_tbl};
--1. Keep only the correct colnames from a flat table
drop table if exists localinputtbl_1a;
create table localinputtbl_1a as
select rid,colname, val from (toeav select * from %{input_local_tbl});

--Check if x is empty
var 'empty' from select case when (select '%{x}')='' then 0 else 1 end;
Expand All @@ -27,39 +35,44 @@ emptyfield '%{empty}';
var 'empty' from select case when (select '%{dataset}')='' then 0 else 1 end;
emptyset '%{empty}';
------------------
create table columnexist as setschema 'colname' select distinct(colname) from (postgresraw);
create table columnexist as setschema 'colname' select distinct(colname) as colname2 from localinputtbl_1a;
--Check if x exist in dataset
var 'counts' from select count(distinct(colname)) from columnexist where colname in (select xname from xvariables);
var 'result' from select count(xname) from xvariables;
var 'valExists' from select case when(select %{counts})=%{result} then 1 else 0 end;
var 'valExists' from select case when(select %{counts})=%{result} then 1 else 0 end;
vars '%{valExists}';
--Check if y exist in dataset
var 'valExists' from select case when (select exists (select colname from columnexist where colname='%{y}'))=0 then 0 else 1 end;
vars '%{valExists}';
----------
--1. Keep only the correct colnames from a flat table
drop table if exists localinputtbl_1;
create table localinputtbl_1 as
select rid,colname, tonumber(val) as val from localinputtbl_1a
where colname in (select * from xvariables) or colname ='%{y}' or colname ='dataset';

--2. Keep only patients of the correct dataset
drop table if exists localinputtbl_2;
drop table if exists localinputtbl_2;
create table localinputtbl_2 as
select rid, colname, val
from localinputtbl_1
where rid in (select distinct rid
from localinputtbl_1
where rid in (select distinct rid
from localinputtbl_1
where colname ='dataset' and val in (select d from datasets));

delete from localinputtbl_2
where colname = 'dataset';

--3. Delete patients with null values
drop table if exists localinputtbl;
--3. Delete patients with null values
drop table if exists localinputtbl;
create table localinputtbl as
select rid, colname, val
from localinputtbl_2
where rid not in (select distinct rid from localinputtbl_2
where rid not in (select distinct rid from localinputtbl_2
where val is null or val = '' or val = 'NA')
order by rid, colname, val;

--y value:Real,Float or Integer.
--y value:Real,Float or Integer.
--Some values could be null (type:Text). We want to make sure that if "rid-colname('%{y}')-val" exist in a node, colname type is not "Text". That is why
--we previously Delete patients with null values.
var 'type' from select case when (select distinct(typeof(tonumber(val))) as val from localinputtbl where colname='%{y}')='integer' or (select distinct(typeof(tonumber(val))) as val from localinputtbl where colname = '%{y}')='real' or (select distinct(typeof(tonumber(val))) as val from localinputtbl where colname='%{y}')='float' then 1 else 0 end;
Expand All @@ -72,9 +85,9 @@ vartypey '%{final}';
var 'minimumrecords' 10;
create table emptytable(rid text primary key, colname, val);
var 'privacycheck' from select case when (select count(distinct(rid)) from localinputtbl) < %{minimumrecords} then 0 else 1 end;
create table localinputtbl2 as setschema 'rid , colname, val'
create table localinputtbl2 as setschema 'rid , colname, val'
select * from localinputtbl where %{privacycheck}=1
union
union
select * from emptytable where %{privacycheck}=0;
drop table if exists localinputtbl;
alter table localinputtbl2 rename to localinputtbl;
Expand All @@ -88,7 +101,7 @@ select * from localinputtbl
where colname in (select xname from xvariables) or colname = "%{y}";

-- A. Dummy code of categorical variables
drop table if exists T;
drop table if exists T;
create table T as
select rid, colname||'('||val||')' as colname, 1 as val
from input_local_tbl_LR
Expand All @@ -115,14 +128,14 @@ select modelFormulae(rid,colname,val, "%{x}") from input_local_tbl_LR group by r

var 'colnames' from select jmergeregexp(jgroup(colname)) from (select colname from localinputtbl group by colname having count(distinct val)=1); --NEW
drop table if exists defaultDB.deletedcolumns; --NEW
create table defaultDB.deletedcolumns as setschema 'colname'
create table defaultDB.deletedcolumns as setschema 'colname'
select distinct colname from defaultDB.input_local_tbl_LR_Final where regexprmatches('%{colnames}' ,colname); --NEW

delete from defaultDB.input_local_tbl_LR_Final --NEW
where colname in (select * from defaultDB.deletedcolumns); --NEW

insert into defaultDB.input_local_tbl_LR_Final
select rid,colname,val from input_local_tbl_LR where colname = '%{y}';
select rid,colname,val from input_local_tbl_LR where colname = '%{y}';
--
insert into defaultDB.input_local_tbl_LR_Final
select distinct rid as rid,'(Intercept)' as colname, 1.0 as val from input_local_tbl_LR;
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
40 changes: 40 additions & 0 deletions LINEAR_REGRESSION/properties.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
{
"name": "LINEAR_REGRESSION",
"desc": "",
"type": "multiple_local_global",
"status": "enabled",
"parameters": [{
"name": "x",
"desc": "The right part of the linear predictor function, that contains the indepedent variables in an equation supporting the symbols +, :, * , R notation.The independent variables are variables of the input dataset and they should be Real, Float, Integer or Text. It cannot be empty",
"type": "column",
"value": "adnicategory*apoe4+subjectage+minimentalstate+gender",
"valueNotBlank": true,
"valueMultiple": true,
"valueType": "string"
}, {
"name": "y",
"desc": "The left part of the linear predictor function, that contains the dependent variable.The dependent variable is a variable of the input dataset and it should be Real, Float or Integer. It cannot be empty",
"type": "column",
"value": "av45",
"valueNotBlank": true,
"valueMultiple": false,
"valueType": "string"
}, {
"name": "dataset",
"desc": "It contains the names of one or more datasets, in which the algorithm will be executed. It cannot be empty.",
"type": "dataset",
"value": "adni,ppmi",
"valueNotBlank": true,
"valueMultiple": true,
"valueType": "string"
}, {
"name": "filter",
"desc": "",
"type": "filter",
"value": "",
"valueNotBlank": false,
"valueMultiple": true,
"valueType": "string"
}
]
}
File renamed without changes.
8 changes: 8 additions & 0 deletions LIST_DATASET/local.template.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
requirevars 'input_local_tbl';

select execprogram(null, "/root/exareme/set-local-datasets.sh");

var 'a' from select count(distinct(rid)) as sum1 from (select distinct rid from (toeav select * from %{input_local_tbl}));

var 'b' from select execprogram(null,'cat','/root/exareme/etc/exareme/name');
select var('a') as sum1, val as val, var('b') as who from (select distinct val from (toeav select * from %{input_local_tbl}) where colname = 'dataset') group by val;
7 changes: 7 additions & 0 deletions LIST_DATASET/properties.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{
"name": "LIST_DATASET",
"desc": "",
"type": "local_global",
"status": "enabled",
"parameters": []
}
12 changes: 12 additions & 0 deletions LIST_VARIABLES/local.template.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
requirevars 'input_local_tbl' ;

select jdictgroup('variables', variable) as variables
from (
select jgroup(variable, t) as variable
from (
select distinct colname as variable, typeof(tonumber(val)) as t
from (toeav select * from %{input_local_tbl})
where val is not null
group by colname
)
);
8 changes: 8 additions & 0 deletions LIST_VARIABLES/properties.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"name": "LIST_VARIABLES",
"desc": "",
"type": "local",
"status": "enabled",
"parameters": [
]
}
14 changes: 7 additions & 7 deletions PIPELINE_ISOUP_MODEL_TREE_SERIALIZER/local.template.sql
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
requirevars 'target_attributes' 'descriptive_attributes' 'input_local_tbl' ;

------- Create the correct dataset
------- Create the correct dataset
drop table if exists datasets;
create table datasets as
select strsplitv('%{dataset}','delimiter:,') as d;
Expand All @@ -14,8 +14,8 @@ create table columnstable as
select strsplitv('%{target_attributes},%{descriptive_attributes}' ,'delimiter:,') as xname;

create temp table localinputtbl_1 as
select __rid as rid,__colname as colname, __val as val
from %{input_local_tbl};
select rid,colname, val from (toeav select * from %{input_local_tbl});


var 'target_vars' from
( select group_concat('"'||targetname||'"',', ') from targetstable);
Expand All @@ -34,12 +34,12 @@ emptyfield '%{empty}';
var 'empty' from select case when (select '%{dataset}')='' then 0 else 1 end;
emptyset '%{empty}';
------------------
create table columnexist as setschema 'colname' select distinct(colname) from (postgresraw);
create table columnexist as setschema 'colname' select distinct(colname) from localinputtbl_1;
--Check if columns exist
var 'counts' from select count(distinct(colname)) from columnexist where colname in (select xname from columnstable);
var 'result' from select count(distinct(xname)) from columnstable;
var 'valExists' from select case when(select %{counts})=%{result} then 1 else 0 end;
vars '%{valExists}';
var 'valExists' from select case when(select %{counts})=%{result} then 1 else 0 end;
vars '%{valExists}';
------

var 'select_vars' from
Expand All @@ -53,7 +53,7 @@ create temp table data as select %{select_vars} from (fromeav select * from loc
var 'minimumrecords' 10;
create temp table emptytable as select * from data limit 0;
var 'privacycheck' from select case when (select count(*) from data) < %{minimumrecords} then 0 else 1 end;
create temp table safeData as
create temp table safeData as
select * from data where %{privacycheck}=1
union all
select * from emptytable where %{privacycheck}=0;
Expand Down
13 changes: 6 additions & 7 deletions PIPELINE_ISOUP_MODEL_TREE_SERIALIZER/localupdate.template.sql
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
requirevars 'prv_output_local_tbl' 'target_attributes' 'descriptive_attributes' 'input_local_tbl';

------- Create the correct dataset
------- Create the correct dataset
drop table if exists datasets;
create table datasets as
select strsplitv('%{dataset}','delimiter:,') as d;
Expand All @@ -14,8 +14,7 @@ create table columnstable as
select strsplitv('%{target_attributes},%{descriptive_attributes}' ,'delimiter:,') as xname;

create temp table localinputtbl_1 as
select __rid as rid,__colname as colname, __val as val
from %{input_local_tbl};
select rid,colname, val from (toeav select * from %{input_local_tbl});

var 'target_vars' from
( select group_concat('"'||targetname||'"',', ') from targetstable);
Expand All @@ -36,12 +35,12 @@ emptyfield '%{empty}';
var 'empty' from select case when (select '%{dataset}')='' then 0 else 1 end;
emptyset '%{empty}';
------------------
create table columnexist as setschema 'colname' select distinct(colname) from (postgresraw);
create table columnexist as setschema 'colname' select distinct(colname) from localinputtbl_1;
--Check if columns exist
var 'counts' from select count(distinct(colname)) from columnexist where colname in (select xname from columnstable);
var 'result' from select count(distinct(xname)) from columnstable;
var 'valExists' from select case when(select %{counts})=%{result} then 1 else 0 end;
vars '%{valExists}';
var 'valExists' from select case when(select %{counts})=%{result} then 1 else 0 end;
vars '%{valExists}';
------

var 'select_vars' from
Expand All @@ -55,7 +54,7 @@ create temp table data as select %{select_vars} from (fromeav select * from loc
var 'minimumrecords' 10;
create temp table emptytable as select * from data limit 0;
var 'privacycheck' from select case when (select count(*) from data) < %{minimumrecords} then 0 else 1 end;
create temp table safeData as
create temp table safeData as
select * from data where %{privacycheck}=1
union all
select * from emptytable where %{privacycheck}=0;
Expand Down
49 changes: 40 additions & 9 deletions PIPELINE_ISOUP_MODEL_TREE_SERIALIZER/properties.json
Original file line number Diff line number Diff line change
@@ -1,11 +1,42 @@
{
"name" : "PIPELINE_ISOUP_MODEL_TREE_SERIALIZER",
"desc" : "PIPELINE_ISOUP_MODEL_TREE_SERIALIZER",
"type" : "pipeline",
"status" : "disabled",
"vtype" : "none",
"responseContentType" : "application/visjs+javascript",
"parameters" : [ {"name" : "target_attributes", "desc" : "Dependent parameters. One or more can be selected.", "value" : "subjectageyears"}, {"name" : "descriptive_attributes", "desc" : "Independent parameters. One or more can be selected.", "value" : "apoe4,av45"},
{"name" : "dataset", "desc" : "It contains the names of one or more datasets, in which the algorithm will be executed. It cannot be empty.", "value" : "adni,chuv_adni"},
{"name" : "filter", "desc" : "", "value" : ""}]
"name": "PIPELINE_ISOUP_MODEL_TREE_SERIALIZER",
"desc": "PIPELINE_ISOUP_MODEL_TREE_SERIALIZER",
"type": "pipeline",
"status": "disabled",
"vtype": "none",
"responseContentType": "application/visjs+javascript",
"parameters": [{
"name": "target_attributes",
"desc": "Dependent parameters. One or more can be selected.",
"type": "column",
"value": "subjectageyears",
"valueNotBlank": true,
"valueMultiple": true,
"valueType": "string"
}, {
"name": "descriptive_attributes",
"desc": "Independent parameters. One or more can be selected.",
"type": "column",
"value": "apoe4,av45",
"valueNotBlank": true,
"valueMultiple": true,
"valueType": "string"
}, {
"name": "dataset",
"desc": "It contains the names of one or more datasets, in which the algorithm will be executed. It cannot be empty.",
"type": "dataset",
"value": "adni,chuv_adni",
"valueNotBlank": true,
"valueMultiple": true,
"valueType": "string"
}, {
"name": "filter",
"desc": "",
"type": "filter",
"value": "",
"valueNotBlank": false,
"valueMultiple": true,
"valueType": "string"
}
]
}
13 changes: 6 additions & 7 deletions PIPELINE_ISOUP_REGRESSION_TREE_SERIALIZER/local.template.sql
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
requirevars 'target_attributes' 'descriptive_attributes' 'input_local_tbl' ;

------- Create the correct dataset
------- Create the correct dataset
drop table if exists datasets;
create table datasets as
select strsplitv('%{dataset}','delimiter:,') as d;
Expand All @@ -14,8 +14,7 @@ create table columnstable as
select strsplitv('%{target_attributes},%{descriptive_attributes}' ,'delimiter:,') as xname;

create temp table localinputtbl_1 as
select __rid as rid,__colname as colname, __val as val
from %{input_local_tbl};
select rid,colname, val from (toeav select * from %{input_local_tbl});

var 'target_vars' from
( select group_concat('"'||targetname||'"',', ') from targetstable);
Expand All @@ -34,12 +33,12 @@ emptyfield '%{empty}';
var 'empty' from select case when (select '%{dataset}')='' then 0 else 1 end;
emptyset '%{empty}';
------------------
create table columnexist as setschema 'colname' select distinct(colname) from (postgresraw);
create table columnexist as setschema 'colname' select distinct(colname) from localinputtbl_1;
--Check if columns exist
var 'counts' from select count(distinct(colname)) from columnexist where colname in (select xname from columnstable);
var 'result' from select count(distinct(xname)) from columnstable;
var 'valExists' from select case when(select %{counts})=%{result} then 1 else 0 end;
vars '%{valExists}';
var 'valExists' from select case when(select %{counts})=%{result} then 1 else 0 end;
vars '%{valExists}';
-------------

var 'select_vars' from
Expand All @@ -53,7 +52,7 @@ create temp table data as select %{select_vars} from (fromeav select * from loc
var 'minimumrecords' 10;
create temp table emptytable as select * from data limit 0;
var 'privacycheck' from select case when (select count(*) from data) < %{minimumrecords} then 0 else 1 end;
create temp table safeData as
create temp table safeData as
select * from data where %{privacycheck}=1
union all
select * from emptytable where %{privacycheck}=0;
Expand Down
Loading

0 comments on commit 8a7ac57

Please sign in to comment.