-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathsortMlastData3.m
195 lines (173 loc) · 6.2 KB
/
sortMlastData3.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
% sortMlastData3
% Mary Kate Montgomery
% May 2019
%
% Function to sort MLAST results according to subject ID and date. Matches
% subject order from reference log.
function [dataSorted, metaDataSorted, datesSorted] = sortMlastData3(dataIn, tagNames, metadata)
allData = dataIn(~cellfun('isempty',dataIn))';
% Identify scan dates to match to
datesAll = cell(numel(allData),1);
for i = 1:numel(allData)
date = allData{i}.scanDate;
% Crop time from date so all scans for given day read the same
dateCropped = date(1:strfind(date,' '));
datesAll{i} = dateCropped;
end
datesUnique = unique(datesAll);
[~, sortOrder] = sort(datenum(datesUnique));
datesSorted = datesUnique(sortOrder);
numCols = numel(datesSorted);
% Are there repeating tag names (from scan log)?
[tagNamesUnique, repeatNames] = cellNanUnique(tagNames);
repeatTags = ~(numel(unique(tagNames))==numel(tagNames));
% Preallocate
rowsFound = []; %rowPad = 0;
for tagInd = 1:numel(tagNames)
idTag = tagNames(tagInd);
if isempty(getNumTag(idTag{1}))
relDatAll(tagInd,:) = cell(1,numel(datesSorted));
metaDataAll(tagInd,:) = cell(1,size(metadata,2));
continue;
end
% Find all rows of metadata w/ ID Tag
idRowAll = []; idColAll = [];
for col = 1:size(metadata,2)
metaDataCol = metadata(:,col);
% Reassign empty cells
metaDataCol(find(cellfun('isempty',metaDataCol))) = {' '};
idRow = compareTags(metaDataCol,idTag);
idRowAll = [idRowAll; idRow];
idColAll = [idColAll; repmat(col,[length(idRow),1])];
end
[idRowAll, sortOrder] = sort(idRowAll);
idColAll = idColAll(sortOrder);
thisIdData = allData(idRowAll);
thisIdMetaData = metadata(idRowAll,:);
% Keep log of rows used
rowsFound = [rowsFound; idRowAll];
if isempty(thisIdData)
relDatAll(tagInd,:) = cell(1,numel(datesSorted));
metaDataAll(tagInd,:) = cell(1,size(metadata,2));
continue;
end
if ~isempty(compareTags(idTag,repeatNames))
% Group according to pre-tag columns
preMetaData = thisIdMetaData;
for c = 1:numel(idColAll)
if idColAll(c) < size(preMetaData,2)
preMetaData(c,idColAll(c)+1:end) = {' '};
end
end
preMetaDataMat = cell(numel(idColAll),1);
for c = 1:numel(idColAll); preMetaDataMat{c} = cell2mat(preMetaData(c,:)); end
[preMetaDataMatUnq, uniqueInd] = unique(preMetaDataMat);
thisDataGrouped = {};
% Group. Assign correct rows
for g = 1:length(uniqueInd)
groupInd = find(contains(preMetaDataMat,preMetaDataMatUnq{g}));
thisDataGrouped(g,1:length(groupInd)) = thisIdData(groupInd);
rowAssigns = compareTags(tagNames, idTag);
end
else
thisDataGrouped = thisIdData';
rowAssigns = tagInd;
end
for g = 1:size(thisDataGrouped,1)
% Match each scan w/ a date
% Get date
relDat = cell(1,numCols); relDat(1:size(thisDataGrouped,2)) = thisDataGrouped(g,:);
sortDateInd = [];
datesRel = cell(numel(relDat),1);
relDatSorted = cell(1,numCols);
for j = 1:numel(relDat)
if ~isempty(relDat{j})
date = relDat{j}.scanDate;
dateCropped = date(1:strfind(date,' '));
% Match date
dateInd = find(strcmp(dateCropped,datesSorted));
datesRel{dateInd} = dateCropped;
relDatSorted{dateInd} = relDat{j};
end
end
% Assign data
relDatAll(rowAssigns(g),1:numel(relDatSorted)) = relDatSorted;
% Assign metadata
if ~isempty(compareTags(idTag,repeatNames))
metaDataAll(rowAssigns(g),1:size(preMetaData,2)) = preMetaData(uniqueInd(g),:);
else
preMetaData = thisIdMetaData;
for c = 1:numel(idColAll)
if idColAll(c) < size(preMetaData,2)
preMetaData(c,idColAll(c)+1:end) = {' '};
end
end
% Find most common set of pre-Tag metadata
preMetaDataMat = cell(numel(idColAll),1); matchCt = [];
for c = 1:numel(idColAll)
preMetaDataMat{c} = cell2mat(preMetaData(c,:));
if c > 1
numMatches = sum(contains(preMetaDataMat{c,1},preMetaDataMat(1:c-1)));
if numMatches > 0
matchCt(c) = numMatches;
else
matchCt(c) = 0;
end
else
matchCt(c) = 0;
end
end
[~, modeInd] = max(matchCt);
metaDataAll(rowAssigns(g),1:size(preMetaData,2)) = preMetaData(modeInd,:);
end
end
end
dataSorted = relDatAll;
metaDataSorted = metaDataAll;
% Add in unidentified data
dataRemainder = allData;
dataRemainder(rowsFound,:) = [];
dataSorted(end+1:end+numel(dataRemainder),1) = dataRemainder;
metaDataRemainder = metadata;
metaDataRemainder(rowsFound,:) = [];
metaDataSorted = [metaDataSorted; metaDataRemainder];
end
function [outCell, repeatCells] = cellNanUnique(inCell)
outCell = {}; ct = 1; repeatCells = {}; ct2 = 1;
for n = 1:numel(inCell)
cellContents = inCell{n};
if strcmp(cellContents,'NaN')
outCell{ct,1} = cellContents; ct = ct+1;
else
% Check for repeats
isRepeat = sum(contains(outCell,cellContents));
if ~isRepeat
outCell{ct,1} = cellContents; ct = ct+1;
else
repeatCells{ct2,1} = cellContents; ct2 = ct2+1;
end
end
end
end
function sametag = compareTags(tag1,tag2)
% Function to compare only numeric portions of tag IDs
tag1Num = cell(size(tag1)); tag2Num = cell(size(tag2));
for i = 1:numel(tag1)
tag1Num{i} = getNumTag(tag1{i});
end
for i2 = 1:numel(tag2)
tag2Num{i2} = getNumTag(tag2{i2});
end
% sametag = find(strcmp(tag1Num,tag2Num));
sametag = find(strcmp(tag1,tag2));
end
function numTag = getNumTag(fullTag)
% Function to compute only numeric portion of input tag ID
numInd = zeros(size(fullTag));
for i = 1:length(fullTag)
numInd(i) = min(~isnan(str2double(fullTag(i))),isreal(str2double(fullTag(i))));
end
% Find largest continuous numeric portion of tag ID
numInd = bwareafilt(logical(numInd),1);
numTag = fullTag(numInd);
end