-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathio.c
408 lines (366 loc) · 11.8 KB
/
io.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
// collection of functions for accessing data in external memory
#include "util.h"
#include "io.h"
// open a file for each input BWT, file pointers are stored to bwf[]
void open_bw_files(g_data *g) {
assert(g->extMem);
g->bwf = malloc(g->numBwt*sizeof(FILE *));
if(g->bwf==NULL) die(__func__);
for(int i=0; i< g->numBwt; i++) {
g->bwf[i] = fopen(g->bwfname,"r");
if(!g->bwf[i]) die(__func__);
}
}
// use bws[] to make bwf[i] point at the beginning of bws[i]
void rewind_bw_files(g_data *g) {
assert(g->extMem);
for(int i=0; i< g->numBwt; i++) {
int e = fseek(g->bwf[i],sizeof(symbol)*(g->bws[i]-g->bws[0]+g->symb_offset),SEEK_SET);
if(e!=0) die(__func__);
}
}
// close the files bwf[i]
void close_bw_files(g_data *g) {
assert(g->extMem);
for(int i=0; i< g->numBwt; i++) {
int e = fclose(g->bwf[i]);
if(e!=0) die(__func__);
}
free(g->bwf);
}
/******************************************************************************/
// open a file for each input BWT, file pointers are stored to bwf[]
void open_sa_files(g_data *g) {
assert(g->extMem);
g->saf = malloc(g->numBwt*sizeof(FILE *));
if(g->saf==NULL) die(__func__);
for(int i=0; i< g->numBwt; i++) {
g->saf[i] = fopen(g->safname,"r");
if(!g->saf[i]) die(__func__);
}
}
// use bws[] to make saf[i] point at the beginning of da[i]
void rewind_sa_files(g_data *g) {
assert(g->extMem);
for(int i=0; i< g->numBwt; i++) {
int e = fseek(g->saf[i],(g->outputSA)*(g->bws[i]-g->bws[0]+g->symb_offset),SEEK_SET);
if(e!=0) die(__func__);
}
}
// close saf[i] files
void close_sa_files(g_data *g) {
assert(g->extMem);
for(int i=0; i< g->numBwt; i++) {
int e = fclose(g->saf[i]);
if(e!=0) die(__func__);
}
free(g->saf);
}
/******************************************************************************/
// open a file for each input BWT, file pointers are stored to bwf[]
void open_sl_files(g_data *g) {
assert(g->extMem);
g->slf = malloc(g->numBwt*sizeof(FILE *));
if(g->slf==NULL) die(__func__);
for(int i=0; i< g->numBwt; i++) {
g->slf[i] = fopen(g->slfname,"r");
if(!g->slf[i]) die(__func__);
}
}
// use bws[] to make slf[i] point at the beginning of sl[i]
void rewind_sl_files(g_data *g) {
assert(g->extMem);
for(int i=0; i< g->numBwt; i++) {
int e = fseek(g->slf[i],(g->outputSL)*(g->bws[i]-g->bws[0]+g->symb_offset),SEEK_SET);
if(e!=0) die(__func__);
}
}
// close slf[i] files
void close_sl_files(g_data *g) {
assert(g->extMem);
for(int i=0; i< g->numBwt; i++) {
int e = fclose(g->slf[i]);
if(e!=0) die(__func__);
}
free(g->slf);
}
/******************************************************************************/
// open a file for each input BWT, file pointers are stored to bwf[]
void open_qs_files(g_data *g) {
assert(g->extMem);
g->qsf = malloc(g->numBwt*sizeof(FILE *));
if(g->qsf==NULL) die(__func__);
for(int i=0; i< g->numBwt; i++) {
g->qsf[i] = fopen(g->qsfname,"r");
if(!g->qsf[i]) die(__func__);
}
}
// use bws[] to make qsf[i] point at the beginning of qs[i]
void rewind_qs_files(g_data *g) {
assert(g->extMem);
for(int i=0; i< g->numBwt; i++) {
int e = fseek(g->qsf[i],sizeof(symbol)*(g->bws[i]-g->bws[0]+g->symb_offset),SEEK_SET);
if(e!=0) die(__func__);
}
}
// close qsf[i] files
void close_qs_files(g_data *g) {
assert(g->extMem);
for(int i=0; i< g->numBwt; i++) {
int e = fclose(g->qsf[i]);
if(e!=0) die(__func__);
}
free(g->qsf);
}
/******************************************************************************/
// open a file for each input BWT, file pointers are stored to bwf[]
void open_da_files(g_data *g) {
assert(g->extMem);
g->daf = malloc(g->numBwt*sizeof(FILE *));
if(g->daf==NULL) die(__func__);
for(int i=0; i< g->numBwt; i++) {
g->daf[i] = fopen(g->dafname,"r");
if(!g->daf[i]) die(__func__);
}
}
// use bws[] to make daf[i] point at the beginning of da[i]
void rewind_da_files(g_data *g) {
assert(g->extMem);
for(int i=0; i< g->numBwt; i++) {
int e = fseek(g->daf[i],(g->outputDA)*(g->bws[i]-g->bws[0]+g->symb_offset),SEEK_SET);
if(e!=0) die(__func__);
}
}
// close daf[i] files
void close_da_files(g_data *g) {
assert(g->extMem);
for(int i=0; i< g->numBwt; i++) {
int e = fclose(g->daf[i]);
if(e!=0) die(__func__);
}
free(g->daf);
}
/******************************************************************************/
// creation of temporary files for irrelevant blocks
// the file is not visible since it is deleted after creation
FILE *gap_tmpfile(char* path)
{
// create local copy of template
char s[strlen(path)+11];
sprintf(s,"%s.tmpXXXXXX",path);
assert(strlen(s)==strlen(path) + 10);
// get file descriptor for tmp file
int fd = mkstemp(s);
if(fd == -1) die("gap_tmpfile: Tempfile creation failed (1)");
// get the FILE * (we need buffering)
FILE *f = fdopen(fd,"w+");
if(f==NULL) die("gap_tmpfile: Tempfile creation failed (2)");
// unlink file so it is deleted as soon as it is closed
int e = unlink(s);
if(e!=0) die("gap_tmpfile: Tempfile creation failed (3)");
return f;
}
// read/write huge blocks to file using multiple pread/pwrite calls
void huge_pwrite(int fd, const void *vbuf, size_t count, off_t offset)
{
const char *buf = (const char *) vbuf;
while(count>0) {
ssize_t w = pwrite(fd,buf,count,offset);
if(w==0) die(__func__);
count -= w;
buf += w;
offset += w;
}
}
void huge_pread(int fd, void *vbuf, size_t count, off_t offset)
{
char *buf = (char *) vbuf;
while(count>0) {
ssize_t w = pread(fd,buf,count,offset);
if(w==0) die(__func__);
count -= w;
buf += w;
offset += w;
}
}
// ----- color writer functions
static void cwriter_flush(cwriter *w)
{
if(w->cur>0) {
huge_pwrite(w->fd,w->buffer,w->cur*sizeof(palette),w->offset);
w->offset += w->cur*sizeof(palette);
w->cur=0;
}
}
void cwriter_put(cwriter *w, palette c)
{
if(w->cur==w->size) cwriter_flush(w);
assert(w->cur < w->size);
w->buffer[w->cur++] = c;
}
void cwriter_skip(cwriter *w, uint64_t s) {
cwriter_flush(w);
w->offset += s*sizeof(palette);
}
void cwriter_close(cwriter *w) {
cwriter_flush(w);
free(w->buffer);
}
off_t cwriter_tell(cwriter *w) {
return w->offset + (w->cur*sizeof(palette));
}
void cwriter_init(cwriter *w, int fd, size_t size, off_t o) {
assert(size>0);
w->buffer = malloc(size*sizeof(palette));
if(!w->buffer) die(__func__);
w->size = size;
w->cur = 0;
w->offset = o;
w->fd = fd;
}
// --- bit file structure and related functions ---
// save b->cur bits to b->fd. correspondingly advance b->offset
static void bitfile_save(bitfile *b, bool endfile)
{
if(b->cur>0) {
if(b->cur%8!=0 && !endfile) die("Illegal bitfile save");
size_t bytes = (b->cur+7)/8;
assert(bytes*8 <= b->size);
huge_pwrite(b->fd,b->buffer,bytes,b->offset);
b->offset += bytes;
b->cur=b->size=0;
}
}
// save to file the bits currently in buffer
// used only at the end of a reading/writing cycle
void bitfile_flush(bitfile *b)
{
bitfile_save(b, true);
assert(b->offset==(b->filesize+7)/8);
}
// init a bitfile: opening file and filling it with size zero bits
// if order==0 the file is anonymous and immediately deleted, otherwise
// the file has .bitfile extension and maintained after the end of the computation
void bitfile_create(bitfile *b, size_t size, char *path, int order) {
// create local copy of template
char s[Filename_size];
if(order==0) { // this is a temp file create unique name
sprintf(s,"%s.bitXXXXXX",path);
// get file descriptor for tmp file fill it with 0s and delete file
b->fd = mkstemp(s);
if(b->fd == -1) die("bitfile_create: Tempfile creation failed");
int e = ftruncate(b->fd,(size+7)/8); // fill with size 0 bits
if(e!=0) die("bitfile_create: Tempfile ftruncate failed");
e = unlink(s);
if(e!=0) die("bitfile_create: Tempfile unlink failed");
}
else { // we keep this file (to compute the DB-graph)
sprintf(s,"%s.%d.lcpbit1",path,order);
// get file descriptor for bitfile fill it with 0s
b->fd = open(s,O_RDWR|O_CREAT|O_TRUNC, 0666);
if(b->fd == -1) die("bitfile_create: bitfile creation failed");
int e = ftruncate(b->fd,(size+7)/8); // fill with size 0 bits
if(e!=0) die("bitfile_create: bitfile ftruncate failed");
}
// initialization of other fields for b
b->filesize = size; // total size of the bitfile (in bits)
b->buffer = malloc(Bitfile_bufsize_bytes);
if(!b->buffer) die("bitfile_create: malloc error");
b->size = 0; // current size of the buffer in bits
b->cur = 0; // bit index inside buffer
b->offset = 0; // offset in bytes in the file
}
// destroy a bitfile closing the corresponding file and freeing the buffer
void bitfile_destroy(bitfile *b) {
free(b->buffer);
b->buffer= NULL;
if(close(b->fd)!=0) die(__func__);
}
// set virtual pointer at the beginning of the file
void bitfile_rewind(bitfile *b)
{
b->size = 0; // current size of the buffer in bits
b->cur = 0; // bit index inside buffer
b->offset = 0; // offset in bytes in the file
}
// read the next bit b from bitfile
// change it to b|new and return b
bool bitfile_read_or_write(bitfile *b, bool new)
{
// make sure there is a bit to read in the buffer, possibily reading from disk
if(b->cur >= b->size) {
assert(b->cur==b->size);
bitfile_save(b,false);
assert(b->cur==0);
assert(b->size==0);
ssize_t n = pread(b->fd,b->buffer,Bitfile_bufsize_bytes,b->offset);
if(n<=0) die("Unable to read bitfile data (bitfile_read_or_write)");
b->size = n*8; // number of bits available for reading
// note we did not change offset since we are going to write at this offset
}
assert(b->cur<b->size);
int i = b->cur/8;
int j = b->cur%8;
b->cur++;
bool old = b->buffer[i] & (1<<j); // get old bit value
if(new) b->buffer[i] |= (1<<j);
return old;
}
// skip an assigned number of bits
void bitfile_skip(bitfile *b, uint64_t s) {
if(b->cur+s <= b->size) { // easy case: we stay in the current buffer
b->cur+= s;
return;
}
// complete current byte
int delta = (8 - (b->cur%8)) % 8;
s -= delta;
b->cur += delta;
assert(b->cur%8==0 && b->cur <= b->size&& s>0);
bitfile_save(b,false); // advance offset to the current b->cur
// skip as many full bytes as possible
b->offset += s/8;
s = s%8; // we are left with < 8 bits
ssize_t n = pread(b->fd,b->buffer,Bitfile_bufsize_bytes,b->offset); // read a full buffer
if(n<0 ) die("Error reading bitfile data (bitfile_skip)");
else if(n==0 && s>0) die("Unable to read bitfile data (bitfile_skip)");
b->size = n*8; // number of bits available for reading
b->cur = s; // virtually skip the remaining bits
}
// return index of the next bit to be read
off_t bitfile_tell(bitfile *b) {
return 8*b->offset + b->cur;
}
// save hi bit of name to bitfile0
// used for extracting info for dbGraph
void extract_bitfile(char *name, size_t size, char *outpath, int order)
{
FILE *f = fopen(name,"rb");
if(f==NULL) die("extract_bitfile: unable to open input file");
char s[Filename_size];
sprintf(s,"%s.%d.lcpbit0",outpath,order);
assert(strlen(s)==strlen(outpath) + 9);
FILE *g = fopen(s,"wb");
if(f==NULL) die("extract_bitfile: unable to open output file");
uint8_t buf=0;
palette p;
palette mask = ((uint64_t) 1) << ((8*sizeof(palette)) -1);
for(size_t i=0; i<size;i++) {
if(fread(&p, sizeof(palette), 1, f)!=1) {
die("extract_bitfile: error reading from input file");
}
if(p&mask) // last bit is 1
buf |= (1u << (i%8));
if(i%8==7) {
if(fwrite(&buf,1,1,g)!=1)
die("extract_bitfile: error writing to output file");
buf = 0;
}
}
if(size%8 != 0)
if(fwrite(&buf,1,1,g)!=1)
die("extract_bitfile: error writing to output file");
assert(ftell(g)==(size+7)/8);
fclose(g);
fclose(f);
}