diff --git a/Makefile.am b/Makefile.am index 6e40be1..6b05503 100644 --- a/Makefile.am +++ b/Makefile.am @@ -12,4 +12,4 @@ EXTRA_DIST = reconf configure SUBDIRS = m4 src doc tests pkgconfigdir = $(libdir)/pkgconfig -pkgconfig_DATA = gtextutils-0.2.pc +pkgconfig_DATA = gtextutils-0.3.pc diff --git a/configure.ac b/configure.ac index d0ad004..67803bd 100644 --- a/configure.ac +++ b/configure.ac @@ -9,7 +9,7 @@ # implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. AC_INIT([Gordon-Text_utils-Library], - [0.2], + [0.3], [A. Gordon gordon@cshl.edu], [libgtextutils]) AC_CONFIG_AUX_DIR(config) @@ -60,6 +60,18 @@ else fi +dnl --enable-tuple-parser-check +AC_ARG_ENABLE(tuple-parser-check, +[ --enable-tuple-parser-check Enable Tuple Parser Check (default disabled, requires g++ > 4.3.2)], +[case "${enableval}" in + yes) tuple_parser_check=true ;; + no) tuple_parser_check=false ;; + *) AC_MSG_ERROR(bad value ${enableval} for --enable-tuple-parser-check) ;; +esac],[tuple_parser_check=false]) +AM_CONDITIONAL([TUPLE_PARSER_CHECK], [test x$tuple_parser_check = xtrue]) + + + AC_CONFIG_FILES([ Makefile README @@ -67,7 +79,7 @@ AC_CONFIG_FILES([ m4/Makefile src/Makefile src/gtextutils/Makefile - gtextutils-0.2.pc + gtextutils-0.3.pc tests/Makefile ]) diff --git a/gtextutils-0.2.pc.in b/gtextutils-0.3.pc.in similarity index 100% rename from gtextutils-0.2.pc.in rename to gtextutils-0.3.pc.in diff --git a/src/gtextutils/Makefile.am b/src/gtextutils/Makefile.am index 049d88d..f2e674d 100644 --- a/src/gtextutils/Makefile.am +++ b/src/gtextutils/Makefile.am @@ -9,9 +9,9 @@ # implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -lib_LIBRARIES = libgtextutils-0.2.a +lib_LIBRARIES = libgtextutils-0.3.a -libgtextutils_0_2_a_SOURCES = stream_wrapper.cpp stream_wrapper.h \ +libgtextutils_0_3_a_SOURCES = stream_wrapper.cpp stream_wrapper.h \ text_line_reader.cpp text_line_reader.h \ container_join.h \ natsort.h \ @@ -21,9 +21,9 @@ libgtextutils_0_2_a_SOURCES = stream_wrapper.cpp stream_wrapper.h \ inbuf1.hpp \ pipe_fitter.c pipe_fitter.h -libgtextutils_0_2_a_includedir = $(includedir)/gtextutils-0.2/gtextutils +libgtextutils_0_3_a_includedir = $(includedir)/gtextutils-$(VERSION)/gtextutils -libgtextutils_0_2_a_include_HEADERS = container_join.h \ +libgtextutils_0_3_a_include_HEADERS = container_join.h \ text_line_reader.h \ stream_wrapper.h \ natsort.h \ diff --git a/src/gtextutils/inbuf1.hpp b/src/gtextutils/inbuf1.hpp new file mode 100644 index 0000000..5122b95 --- /dev/null +++ b/src/gtextutils/inbuf1.hpp @@ -0,0 +1,145 @@ +/* + Gordon's Text-Utilities Library + Copyright (C) 2009 Assaf Gordon (gordon@cshl.edu) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see +*/ + +#ifndef __JOSUTTIS_FD_INBUF_H__ +#define __JOSUTTIS_FD_INBUF_H__ + +/* The following code example is taken from the book + * "The C++ Standard Library - A Tutorial and Reference" + * by Nicolai M. Josuttis, Addison-Wesley, 1999 + * + * (C) Copyright Nicolai M. Josuttis 1999. + * Permission to copy, use, modify, sell and distribute this software + * is granted provided this copyright notice appears in all copies. + * This software is provided "as is" without express or implied + * warranty, and with no claim as to its suitability for any purpose. + * + * Added by A. Gordon: + * The file is available as "io/inbuf1.hpp" in the examples tarball at + * http://www.josuttis.com/libbook/ + * + * And in the book at Chapter 13, Page 678. + * + * Modifications: + * 1. Larger buffer, with vector + * 2. Accepts input file descriptor in c'tor + */ + +#include +#include +#include +#include + +// for read(): +#ifdef _MSC_VER +# include +#else +# include +#endif + +class josuttis_fd_inbuf : public std::streambuf { + protected: + /* data buffer: + * - at most, four characters in putback area plus + * - at most, six characters in ordinary read buffer + */ + static const int putBackSize = 4 ; + static const int bufferSize = putBackSize + 32768; // size of the data buffer + std::vector buffer_vector ; + char* buffer; + int input_fd; + + public: + /* constructor + * - initialize empty data buffer + * - no putback area + * => force underflow() + */ + josuttis_fd_inbuf( int _input_fd ) : + buffer_vector(bufferSize), + buffer(&buffer_vector[0]), + input_fd(_input_fd) + { + setg (buffer+putBackSize, // beginning of putback area + buffer+putBackSize, // read position + buffer+putBackSize); // end position + } + protected: + // insert new characters into the buffer + virtual int_type underflow () { + + // is read position before end of buffer? + if (gptr() < egptr()) { + return traits_type::to_int_type(*gptr()); + } + + /* process size of putback area + * - use number of characters read + * - but at most four + */ + int numPutback; + numPutback = gptr() - eback(); + if (numPutback > putBackSize) { + numPutback = putBackSize; + } + + /* copy up to four characters previously read into + * the putback buffer (area of first four characters) + */ + std::memmove (buffer+(putBackSize-numPutback), gptr()-numPutback, + numPutback); + + // read new characters + int num; + num = read (input_fd, buffer+putBackSize, bufferSize-putBackSize); + if (num <= 0) { + // ERROR or EOF + return EOF; + } + + // reset buffer pointers + setg (buffer+(putBackSize-numPutback), // beginning of putback area + buffer+putBackSize, // read position + buffer+putBackSize+num); // end of buffer + + // return next character + return traits_type::to_int_type(*gptr()); + } +}; + +/* + * An output stream that uses the above inbuf + * + * Based on code example from page 673 (class fdostream) + */ + +class josuttis_fdistream : public std::istream +{ +private: + josuttis_fd_inbuf buf ; +public: + josuttis_fdistream ( int fd ) : + std::istream(0), + buf(fd) + { + rdbuf(&buf) ; + } +}; + + +#endif diff --git a/src/gtextutils/pipe_fitter.c b/src/gtextutils/pipe_fitter.c new file mode 100644 index 0000000..2696e2e --- /dev/null +++ b/src/gtextutils/pipe_fitter.c @@ -0,0 +1,122 @@ +/* + Gordon's Text-Utilities Library + Copyright (C) 2009 Assaf Gordon (gordon@cshl.edu) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see +*/ +#include +#include +#include +#include +#include +#include +#include + +#include "pipe_fitter.h" + +int pipe_close ( int fd, pid_t pid ) +{ + int i, status ; + pid_t p; + int exit_code; + + i = close(fd); + if ( fd == -1 ) + err(1,"close(in pipe_close) failed"); + + p = waitpid(pid, &status, 0); + if (p==-1) + err(1,"waitpid(in pipe_close) failed"); + + if (!WIFEXITED(status)) + errx(1,"child process terminated abnormally (in pipe_close), status=%08x", status); + + exit_code = WEXITSTATUS(status); + if (exit_code != 0) + errx(1,"child process terminated with error code %d (in pipe_close)", exit_code); + + return 0; +} + +static int pipe_open ( + const char* executable, + const char* filename, + int pipe_input, + pid_t* /*OUTPUT*/ child_pid ) +{ + int file_fd=-1; + pid_t pid; + int parent_pipe[2]; + + //Create the pipe descriptoers + if (pipe(parent_pipe)!=0) + err(1,"pipe (for '%s') failed", executable); + + if (filename != NULL) { + file_fd = open(filename, + pipe_input?(O_RDONLY):(O_CREAT|O_WRONLY), + 0666 ) ; + + //create the file descriptor (input or output, based on function argument) + if ( file_fd == -1 ) + err(1,"failed to %s file '%s'", + pipe_input?"open":"create", + filename ) ; + } + + pid = fork(); + + if (pid == -1) + err(1,"Fork failed for '%s'", executable); + + + if (pid>0) { + /* The parent process */ + *child_pid = pid; + close(file_fd); //the parent process doesn't need the file handle. + close(parent_pipe[ pipe_input?1:0 ]); + return (parent_pipe[ pipe_input?0:1 ]); + } + + /* The child process */ + + if (pipe_input) { + dup2(parent_pipe[1], STDOUT_FILENO); + close(parent_pipe[0]); + + if (file_fd != -1) + dup2(file_fd, STDIN_FILENO); + } else { + dup2(parent_pipe[0], STDIN_FILENO); + close(parent_pipe[1]); + + if (file_fd!=-1) + dup2(file_fd, STDOUT_FILENO); + } + + execlp(executable,executable,NULL); + + //Should never get here... + err(1,"execlp(%s) failed",executable); +} + +int pipe_output_command ( const char* command, const char* output_filename, pid_t* /*OUTPUT*/ child_pid ) +{ + return pipe_open ( command, output_filename, 0, child_pid ) ; +} + +int pipe_input_command ( const char* command, const char* input_filename, pid_t* /*OUTPUT*/ child_pid ) +{ + return pipe_open ( command, input_filename, 1, child_pid ) ; +} diff --git a/src/gtextutils/pipe_fitter.h b/src/gtextutils/pipe_fitter.h new file mode 100644 index 0000000..de6575e --- /dev/null +++ b/src/gtextutils/pipe_fitter.h @@ -0,0 +1,37 @@ +/* + Gordon's Text-Utilities Library + Copyright (C) 2009 Assaf Gordon (gordon@cshl.edu) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see +*/ + +#ifndef __PIPE_FITTER_H__ +#define __PIPE_FITTER_H__ + + +#ifdef __cplusplus +extern "C" +{ +#endif + +int pipe_output_command ( const char* command, const char* output_filename, pid_t* /*OUTPUT*/ child_pid ); +int pipe_input_command ( const char* command, const char* input_filename, pid_t* /*OUTPUT*/ child_pid ); + +int pipe_close ( int fd, pid_t pid ) ; + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/tests/Makefile.am b/tests/Makefile.am index efeb0e3..b2dddfd 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -9,8 +9,15 @@ # implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +EXTRA_DIST = test.txt noinst_DATA = test.txt +if TUPLE_PARSER_CHECK +TUPLE_PROG = test_tuple_parser \ + test_tuple_parser_file +else +TUPLE_PROG = +endif check_PROGRAMS = test_container_join \ test_natural_sort \ @@ -20,21 +27,22 @@ check_PROGRAMS = test_container_join \ test_fd_outbuf \ test_fd_inbuf \ test_in_out_buf \ - test_tuple_parser \ - test_pipe_fitter + test_pipe_fitter \ + $(TUPLE_PROG) TESTS = $(check_PROGRAMS) LDADD = $(top_srcdir)/src/gtextutils/libgtextutils-$(VERSION).a INCLUDES = -I$(top_srcdir)/src -test_container_join_SOURCES = test_container_join.cpp -test_natural_sort_SOURCES = test_natural_sort.cpp -test_input_stream_wrapper_SOURCES = test_input_stream_wrapper.cpp -test_text_reader_SOURCES = test_text_reader.cpp -test_text_reader_unget_SOURCES = test_text_reader_unget.cpp -test_fd_outbuf_SOURCES = test_fd_outbuf.cpp -test_fd_inbuf_SOURCES = test_fd_inbuf.cpp -test_in_out_buf_SOURCES = test_in_out_buf.cpp -test_pipe_fitter_SOURCES = test_pipe_fitter.c -test_tuple_parser_SOURCES = test_tuple_parser.cpp +test_container_join_SOURCES = test_container_join.cpp tests_assertion.h +test_natural_sort_SOURCES = test_natural_sort.cpp tests_assertion.h +test_input_stream_wrapper_SOURCES = test_input_stream_wrapper.cpp tests_assertion.h +test_text_reader_SOURCES = test_text_reader.cpp tests_assertion.h +test_text_reader_unget_SOURCES = test_text_reader_unget.cpp tests_assertion.h +test_fd_outbuf_SOURCES = test_fd_outbuf.cpp tests_assertion.h +test_fd_inbuf_SOURCES = test_fd_inbuf.cpp tests_assertion.h +test_in_out_buf_SOURCES = test_in_out_buf.cpp tests_assertion.h +test_pipe_fitter_SOURCES = test_pipe_fitter.c tests_assertion.h +test_tuple_parser_SOURCES = test_tuple_parser.cpp tests_assertion.h +test_tuple_parser_file_SOURCES = test_tuple_parser_file.cpp tests_assertion.h diff --git a/tests/test.txt b/tests/test.txt new file mode 100644 index 0000000..7a4a73a --- /dev/null +++ b/tests/test.txt @@ -0,0 +1,4 @@ +first line +second line +third line +fourth line diff --git a/tests/test_fd_inbuf.cpp b/tests/test_fd_inbuf.cpp new file mode 100644 index 0000000..e319acc --- /dev/null +++ b/tests/test_fd_inbuf.cpp @@ -0,0 +1,72 @@ +/* + Gordon's Text-Utilities Library + Copyright (C) 2009 Assaf Gordon (gordon@cshl.edu) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see +*/ +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include "tests_assertion.h" + +/* + * Test for File-Descriptor-based input buffer. + */ + +using namespace std; + +const char* filename = "test.txt" ; + +int main() +{ + //Open a file using C API + int fd = open(filename, O_RDONLY); + if (fd == -1) + perror ("Failed to open test file 'test.txt'"); + + //Connect the file-descriptor to an input stream + josuttis_fdistream in1(fd); + string s; + + //Read and verify the file content + in1 >> s ; + ASSERT ( s == "first" ) ; + in1 >> s ; + ASSERT ( s == "line" ) ; + in1 >> s ; + ASSERT ( s == "second" ) ; + in1 >> s ; + ASSERT ( s == "line" ) ; + in1 >> s ; + ASSERT ( s == "third" ) ; + in1 >> s ; + ASSERT ( s == "line" ) ; + in1 >> s ; + ASSERT ( s == "fourth" ) ; + in1 >> s ; + ASSERT ( s == "line" ) ; + + close(fd); +} diff --git a/tests/test_in_out_buf.cpp b/tests/test_in_out_buf.cpp new file mode 100644 index 0000000..37efbb0 --- /dev/null +++ b/tests/test_in_out_buf.cpp @@ -0,0 +1,92 @@ +/* + Gordon's Text-Utilities Library + Copyright (C) 2009 Assaf Gordon (gordon@cshl.edu) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see +*/ +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include "tests_assertion.h" + +/* + * Test for File-Descriptor-based input and output buffers, through a system pipe. + */ + +using namespace std; + +int fork_pipe_command ( const char* program, const char* output_filename) +{ + int fd; + pid_t child_pid; + int parent_pipe[2]; + if (pipe(parent_pipe)!=0) + err(1,"pipe (for gzip) failed"); + + child_pid = fork(); + if (child_pid>0) { + /* The parent process */ + fd = parent_pipe[1]; + close(parent_pipe[0]); + return fd; + } + + /* The child process */ + + //the compressor's STDIN is the pipe from the parent + dup2(parent_pipe[0], STDIN_FILENO); + close(parent_pipe[1]); + + //the compressor's STDOUT is the output file + //(which can be the parent's STDOUT, too) + fd = open(output_filename, O_WRONLY | O_CREAT, 0666 ); + if (fd==-1) + err(1,"Failed to open output '%s'", output_filename); + dup2(fd, STDOUT_FILENO); + + //Run GZIP + execlp(program,program,"-f",NULL); + + //Should never get here... + err(1,"execlp(%s) failed",program); +} + +void test_compress_output() +{ + int pipe_out_fd = fork_pipe_command ( "gzip", "fdout.txt.gz" ) ; + josuttis_fdostream out(pipe_out_fd); + + out << "first line" << endl; + out << "second line" << endl; + out << "third line" << endl; + + close(pipe_out_fd); +} + +int main() +{ + test_compress_output(); +} diff --git a/tests/test_pipe_fitter.c b/tests/test_pipe_fitter.c new file mode 100644 index 0000000..8076543 --- /dev/null +++ b/tests/test_pipe_fitter.c @@ -0,0 +1,74 @@ +/* + Gordon's Text-Utilities Library + Copyright (C) 2009 Assaf Gordon (gordon@cshl.edu) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see +*/ +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include "tests_assertion.h" + +const char* text = "Hello World\n"; + +int main() +{ + int fd; + int pid; + char temp[1000]; + char str[1000]; + int n ; + + //Pipe output through a GZIP program + fd = pipe_output_command ( "gzip", "pipe_out.txt.gz", &pid ) ; + write ( fd, text, strlen(text)) ; + pipe_close(fd, pid); + + + //Read input through a GUNZIP program + fd = pipe_input_command ( "gunzip", "pipe_out.txt.gz", &pid ); + + //Read entire input from the pipe. + //Note: + // 'read' doesn't have to return the entire input in one read - so + // we loop until the end of the file and concatnate the input. + strcat(str,""); + while (1) { + n = read ( fd, temp, sizeof(temp)-1) ; + if (n==-1) + err(1,"Read from GZIP input pipe failed"); + + if (n==0) + break; //End-Of-File + + //Ensure proper NULL termination + temp[n] = 0 ; + + strncpy ( str, temp, sizeof(str)-1); + } + pipe_close(fd,pid); + + + //Validate the input + ASSERT ( strcmp(str, text)==0 ) ; + + return 0 ; +} diff --git a/tests/test_tuple_parser_file.cpp b/tests/test_tuple_parser_file.cpp new file mode 100644 index 0000000..9d928ac --- /dev/null +++ b/tests/test_tuple_parser_file.cpp @@ -0,0 +1,100 @@ +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include "tests_assertion.h" + +using namespace std; +using namespace std::tr1; + +std::string input_filename = "test.bed"; +size_t chrom_column = 1 ; +size_t start_column = 2 ; +size_t end_column = 3 ; +bool verbose = false ; + +void read_and_parse_file() +{ + InputStreamWrapper input(input_filename); + + TextLineReader reader ( input.stream() ); + + Tuple_Parser parser( chrom_column, + start_column, + end_column); + while ( reader.next_line() ) { + reader.line_stream() >> parser ; + } +} + +void usage() +{ + exit(0); +} + +void parse_command_line(int argc, char* argv[]) +{ + int opt; + + while ((opt=getopt(argc, argv,"hi:c:s:ev")) != -1) { + switch(opt) + { + case 'h': + usage(); + + case 'i': + input_filename = optarg ; + break; + + case 'c': + if ( sscanf(optarg, "%zu", &chrom_column ) != 1 ) + errx(1,"Error: invalid chromosome column (%s)", optarg ) ; + if ( chrom_column <= 0 ) + errx(1,"Error: invalid chromosome column (%s)", optarg ) ; + break; + + case 's': + if ( sscanf(optarg, "%zu", &start_column ) != 1 ) + errx(1,"Error: invalid start column (%s)", optarg ) ; + if ( start_column <= 0 ) + errx(1,"Error: invalid start column (%s)", optarg ) ; + break; + + case 'e': + if ( sscanf(optarg, "%zu", &end_column ) != 1 ) + errx(1,"Error: invalid end column (%s)", optarg ) ; + if ( end_column <= 0 ) + errx(1,"Error: invalid end column (%s)", optarg ) ; + break; + + case 'v': + verbose=true; + break ; + + default: + exit(1); + } + } +} + +int main(int argc, char* argv[]) +{ + parse_command_line(argc, argv) ; + + read_and_parse_file(); + + return 0; +} diff --git a/tests/tests_assertion.h b/tests/tests_assertion.h new file mode 100644 index 0000000..03d4956 --- /dev/null +++ b/tests/tests_assertion.h @@ -0,0 +1,13 @@ +#ifndef __UNIT_TESTS_ASSERTION_H__ +#define __UNIT_TESTS_ASSERTION_H__ + +#define ASSERT(x) \ + do { \ + if (!(x)) { \ + errx(1,"ASSERTION FAILED (%s:%d): "#x, \ + __FILE__, __LINE__ ) ; \ + } \ + } while (0) + + +#endif