-
Notifications
You must be signed in to change notification settings - Fork 1
/
BatchHexBin.m
78 lines (64 loc) · 2.29 KB
/
BatchHexBin.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
% bin spatial data into hexagonal bins and output count files
% pool all data from files listed in the batch_input_file
% Xiaoyan, 2017
%% modify here
batch_input_file = 'files_to_pool.txt';
hexagon_radius = 500; % in pixel
output_fileprefix = 'pooled_bincounts';
%% do not modify
% which files to pool
fid = fopen(batch_input_file, 'r');
files = textscan(fid, '%s', 'delimiter', '\n');
files = files{:};
fclose(fid);
% start processing
allNames = {};
mapNames = cell(numel(files), 1);
binCounts = cell(numel(files), 1);
binPos = cell(numel(files), 1);
for i = 1:numel(files)
% load name and coordinates and do hexbin
[name, pos] = getinsitudata(files{i});
[name, pos] = removereads(name, 'NNNN', pos);
[uNames, ~, iName] = unique(name);
[inbin, bincenters] = hexbin(pos, hexagon_radius);
% count transcripts in each bin
counts = histcounts2(inbin, iName,...
[unique(inbin)', max(inbin)+1],...
1:numel(uNames)+1);
binCounts{i} = counts;
binPos{i} = [unique(inbin), bincenters];
% add genes that have not appeared yet
allNames = [allNames, setdiff(uNames, allNames)];
% name index in the pool
iuNames = cellfun(@(v) find(strcmp(v, allNames)), uNames);
mapNames{i} = iuNames;
end
% organize into one big matrix
rNames = {};
for i = 1:numel(files)
rNames = [rNames;...
catstrnum([strtok(files{i}, '.'), '_r', num2str(hexagon_radius), '_hexbin'], binPos{i}(:,1))];
end
matCounts = zeros(length(rNames), numel(allNames));
nrow = 0;
for i = 1:numel(files)
matCounts(nrow+1:nrow+size(binPos{i}),mapNames{i}) = binCounts{i};
nrow = nrow + size(binPos{i});
end
[sortedNames, orderNames] = sort(allNames);
matCounts = matCounts(:,orderNames);
% write count file
binCountsWrite = [rNames, num2cell(matCounts)]';
fid = fopen([output_fileprefix, '_count.csv'], 'w');
fprintf(fid, 'file_bin,');
fprintf(fid, lineformat('%s', numel(sortedNames)), sortedNames{:});
fprintf(fid, ['%s,' lineformat('%d', numel(sortedNames))], binCountsWrite{:});
fclose(fid);
% write hexbin position file
binPos = cat(1, binPos{:});
binPosWrite = [rNames, num2cell(binPos(:,2:3))]';
fid = fopen([output_fileprefix, '_binpos.csv'], 'w');
fprintf(fid, 'file_bin,bincenter_x,bincenter_y\n');
fprintf(fid, '%s,%d,%d\n', binPosWrite{:});
fclose(fid);