diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..15e74aa --- /dev/null +++ b/.gitignore @@ -0,0 +1,9 @@ +.vscode +build +cmake-build-debug +tags + +bminor +*.o + +test/**/*.out \ No newline at end of file diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..9cb22c0 --- /dev/null +++ b/Makefile @@ -0,0 +1,12 @@ +CC = gcc +.DEFAULT_GOAL = bminor + +encoder.o: encoder.c + $(CC) -c $^ -o $@ + +bminor: bminor.c encoder.o + $(CC) $^ -o $@ + +clean: + rm -f *.o + rm -f bminor \ No newline at end of file diff --git a/bminor.c b/bminor.c new file mode 100644 index 0000000..67147c8 --- /dev/null +++ b/bminor.c @@ -0,0 +1,55 @@ +#include +#include +#include + +#include "encoder.h" + +void usage(int exit_code) +{ + printf("Usage of bminor.\n"); + exit(exit_code); +} + +int main(int argc, char* argv[]) +{ + + if (argc == 1) + usage(1); + + for (int i = 1; i < argc; i++) + { + if (argv[i][0] == '-') + { + if (strcmp(argv[i], "--help") == 0) + usage(0); + else if (strcmp(argv[i], "--encode") == 0) + { + char* filename = argv[++i]; + if (filename) + { + if (decode(filename) == 0) + { +// printf("Successfully decoded file %s\n", filename); + return EXIT_SUCCESS; + } else + { + fprintf(stderr, "Failed to decode file %s\n", filename); + return EXIT_FAILURE; + } + } + else + { + fprintf(stderr, "Missing filename to be encoded\n"); + return EXIT_FAILURE; + } + } + else + { + fprintf(stderr, "Unknown option '%s'\n", argv[i]); + usage(1); + } + } + } + + return EXIT_SUCCESS; +} diff --git a/encoder.c b/encoder.c new file mode 100644 index 0000000..f480f51 --- /dev/null +++ b/encoder.c @@ -0,0 +1,256 @@ +#include +#include +#include + +#define MAX_STRING_LEN 255 + +int is_hex(char c) +{ + return c >= '0' && c <= '9' || + c >= 'A' && c <= 'F' || + c >= 'a' && c <= 'f'; +} + +int string_decode(const char* es, char* s) +{ +// printf("Decoding: %s\n", es); + + // Check for start and end quotes + size_t es_len = strlen(es); + if (es_len < 2 || *es != '"' || *(es + es_len - 1) != '"') + { + fprintf(stderr, "Invalid string: does not start or end with a quote\n"); + return 1; + } + + const char* es_ptr = es + 1; + char* s_ptr = s; + size_t len = 0; + + while (es_ptr < es + es_len - 1) + { + // Check current length of decoded string + if (len >= MAX_STRING_LEN) + { + fprintf(stderr, "Invalid string: too long\n"); + return 1; + } + + // Check for printable characters + if (*es_ptr < 32 || *es_ptr > 126) + { + fprintf(stderr, "Invalid string: invalid character\n"); + return 1; + } + + // Check for escape sequences + if (*es_ptr == '\\') + { + es_ptr++; + switch (*es_ptr) + { + case 'a': + *s_ptr = '\a'; + break; + case 'b': + *s_ptr = '\b'; + break; + case 'e': + *s_ptr = 27; + break; + case 'f': + *s_ptr = '\f'; + break; + case 'n': + *s_ptr = '\n'; + break; + case 'r': + *s_ptr = '\r'; + break; + case 't': + *s_ptr = '\t'; + break; + case 'v': + *s_ptr = '\v'; + break; + case '\\': + *s_ptr = '\\'; + break; + case '\'': + *s_ptr = '\''; + break; + case '"': + if (*(es_ptr + 1) == '\0') + { + fprintf(stderr, "Invalid string: escape sequence at end of string\n"); + return 1; + } + *s_ptr = '"'; + break; + case '0': + es_ptr++; + if (*es_ptr == 'x') + { + es_ptr++; + if (is_hex(*es_ptr) && is_hex(*(es_ptr + 1))) + { + char hex[2] = { *es_ptr, *(es_ptr + 1) }; + char val = (char)strtol(hex, NULL, 16); + if (val < 0 || val > 127) + { + fprintf(stderr, "Invalid string: hex character not in ASCII\n"); + return 1; + } + *s_ptr = val; + es_ptr += 1; + } + else + { + fprintf(stderr, "Invalid string: invalid hex escape sequence\n"); + return 1; + } + } + break; + default: + fprintf(stderr, "Invalid string: invalid escape sequence\n"); + return 1; + } + es_ptr++; + s_ptr++; + len++; + continue; + } + + *s_ptr = *es_ptr; + + es_ptr++; + s_ptr++; + len++; + } + +// printf("Decoded result: %s\n", s); + + return 0; +} + +int string_encode(const char* s, char* es) +{ + const char* s_ptr = s; + char* es_ptr = es; + + *es_ptr = '"'; + es_ptr++; + + while (*s_ptr != '\0') + { + // printf("Current: %s\n", s); + // Handle printable characters + if (*s_ptr >= 32 && *s_ptr <= 126) + { + if (*s_ptr == '"' || *s_ptr == '\\') + { + *es_ptr = '\\'; + es_ptr++; + } + *es_ptr = *s_ptr; + es_ptr++; + s_ptr++; + continue; + } + + // Handle escape sequences + *es_ptr = '\\'; + es_ptr++; + switch (*s_ptr) + { + case '\a': + *es_ptr = 'a'; + break; + case '\b': + *es_ptr = 'b'; + break; + case 27: + *es_ptr = 'e'; + break; + case '\f': + *es_ptr = 'f'; + break; + case '\n': + *es_ptr = 'n'; + break; + case '\r': + *es_ptr = 'r'; + break; + case '\t': + *es_ptr = 't'; + break; + case '\v': + *es_ptr = 'v'; + break; + + default: + sprintf(es_ptr, "0x%X", *s_ptr); + es_ptr += 3; + s_ptr++; + break; + } + es_ptr++; + s_ptr++; +// printf("Encoded result: %s\n", es); + } + + *es_ptr = '"'; + es_ptr++; + *es_ptr = '\0'; + return 0; +} + +int decode(char* filename) +{ + FILE* file = fopen(filename, "r"); + if (file == NULL) + { + printf("Could not open file %s\n", filename); + return 1; + } + + // Find the size of the file + fseek(file, 0, SEEK_END); + long file_size = ftell(file); + if (file_size > (MAX_STRING_LEN * 5 + 2) * sizeof(char)) + { + fprintf(stderr, "Invalid string: too long\n"); + fclose(file); + return 1; + } + rewind(file); + + // Allocate memory for the file content + // Reserve space for \0 + char* file_content = (char*)malloc(file_size + sizeof(char)); + if (file_content == NULL) + { + perror("Could not allocate memory"); + fclose(file); + return 1; + } + + // Read the file content into the allocated memory + size_t chars_read = fread(file_content, sizeof(char), file_size, file); + file_content[chars_read] = '\0'; + fclose(file); + + char s[MAX_STRING_LEN + 1] = { 0 }; + + if (string_decode(file_content, s) == 0) + { + char es[chars_read]; + memset(es, 0, chars_read); + string_encode(s, es); + printf("%s\n", es); + return 0; + } + else + return 1; + +} diff --git a/encoder.h b/encoder.h new file mode 100644 index 0000000..0d58c70 --- /dev/null +++ b/encoder.h @@ -0,0 +1,5 @@ +int string_decode(const char* es, char* s); + +int string_encode(const char* s, char* es); + +int decode(char* filename); \ No newline at end of file diff --git a/runtest.sh b/runtest.sh new file mode 100755 index 0000000..5ae48ca --- /dev/null +++ b/runtest.sh @@ -0,0 +1,19 @@ +#!/bin/sh + +export PATH=$PATH:. + +for testfile in test/encode/good*.bminor; do + if bminor --encode "$testfile" >"$testfile.out"; then + echo "$testfile success (as expected)" + else + echo "$testfile failure (INCORRECT)" + fi +done + +for testfile in test/encode/bad*.bminor; do + if bminor --encode "$testfile" >"$testfile.out"; then + echo "$testfile success (INCORRECT)" + else + echo "$testfile failure (as expected)" + fi +done diff --git a/test/encode/bad0.bminor b/test/encode/bad0.bminor new file mode 100644 index 0000000..e69de29 diff --git a/test/encode/bad1.bminor b/test/encode/bad1.bminor new file mode 100644 index 0000000..9d68933 --- /dev/null +++ b/test/encode/bad1.bminor @@ -0,0 +1 @@ +" \ No newline at end of file diff --git a/test/encode/bad2.bminor b/test/encode/bad2.bminor new file mode 100644 index 0000000..dacd1ae --- /dev/null +++ b/test/encode/bad2.bminor @@ -0,0 +1 @@ +"a \" \ No newline at end of file diff --git a/test/encode/bad3.bminor b/test/encode/bad3.bminor new file mode 100644 index 0000000..29dcb53 --- /dev/null +++ b/test/encode/bad3.bminor @@ -0,0 +1 @@ +"\\\" \ No newline at end of file diff --git a/test/encode/bad4.bminor b/test/encode/bad4.bminor new file mode 100644 index 0000000..b49ad6e --- /dev/null +++ b/test/encode/bad4.bminor @@ -0,0 +1 @@ +"\ " \ No newline at end of file diff --git a/test/encode/bad5.bminor b/test/encode/bad5.bminor new file mode 100644 index 0000000..d6488fa --- /dev/null +++ b/test/encode/bad5.bminor @@ -0,0 +1 @@ +"\0x" \ No newline at end of file diff --git a/test/encode/bad6.bminor b/test/encode/bad6.bminor new file mode 100644 index 0000000..fda73f0 --- /dev/null +++ b/test/encode/bad6.bminor @@ -0,0 +1 @@ +" 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789..." \ No newline at end of file diff --git a/test/encode/bad7.bminor b/test/encode/bad7.bminor new file mode 100644 index 0000000..6dd894e --- /dev/null +++ b/test/encode/bad7.bminor @@ -0,0 +1 @@ +"\"\\\a\0xA " \ No newline at end of file diff --git a/test/encode/bad8.bminor b/test/encode/bad8.bminor new file mode 100644 index 0000000..a52e51e --- /dev/null +++ b/test/encode/bad8.bminor @@ -0,0 +1 @@ +" \ n \0xAB" \ No newline at end of file diff --git a/test/encode/bad9.bminor b/test/encode/bad9.bminor new file mode 100644 index 0000000..b8b2e2e --- /dev/null +++ b/test/encode/bad9.bminor @@ -0,0 +1 @@ +"\ n" \ No newline at end of file diff --git a/test/encode/good0.bminor b/test/encode/good0.bminor new file mode 100644 index 0000000..8d008a7 --- /dev/null +++ b/test/encode/good0.bminor @@ -0,0 +1 @@ +"Hello \n World!" \ No newline at end of file diff --git a/test/encode/good1.bminor b/test/encode/good1.bminor new file mode 100644 index 0000000..0be824e --- /dev/null +++ b/test/encode/good1.bminor @@ -0,0 +1 @@ +"\a\b\e\f\n\r\t\v\\\'\"and\0x7F s \0x0Aabs" \ No newline at end of file diff --git a/test/encode/good2.bminor b/test/encode/good2.bminor new file mode 100644 index 0000000..56fbf6d --- /dev/null +++ b/test/encode/good2.bminor @@ -0,0 +1 @@ +" \t Aa 1024 \n \"" \ No newline at end of file diff --git a/test/encode/good3.bminor b/test/encode/good3.bminor new file mode 100644 index 0000000..8e66bcf --- /dev/null +++ b/test/encode/good3.bminor @@ -0,0 +1 @@ +"\"\"\'8abc1\'\"\"" \ No newline at end of file diff --git a/test/encode/good4.bminor b/test/encode/good4.bminor new file mode 100644 index 0000000..3cc762b --- /dev/null +++ b/test/encode/good4.bminor @@ -0,0 +1 @@ +"" \ No newline at end of file diff --git a/test/encode/good5.bminor b/test/encode/good5.bminor new file mode 100644 index 0000000..3f03165 --- /dev/null +++ b/test/encode/good5.bminor @@ -0,0 +1 @@ +" 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789\0x1F " \ No newline at end of file diff --git a/test/encode/good6.bminor b/test/encode/good6.bminor new file mode 100644 index 0000000..721743a --- /dev/null +++ b/test/encode/good6.bminor @@ -0,0 +1 @@ +"^samuel(g&huang@outlook.com^)# " \ No newline at end of file diff --git a/test/encode/good7.bminor b/test/encode/good7.bminor new file mode 100644 index 0000000..9987773 --- /dev/null +++ b/test/encode/good7.bminor @@ -0,0 +1 @@ +"1234567890qwertyuiop[];',./" \ No newline at end of file diff --git a/test/encode/good8.bminor b/test/encode/good8.bminor new file mode 100644 index 0000000..502fb92 --- /dev/null +++ b/test/encode/good8.bminor @@ -0,0 +1 @@ +" a b c 3\t\n\n80\n\n\nn " \ No newline at end of file diff --git a/test/encode/good9.bminor b/test/encode/good9.bminor new file mode 100644 index 0000000..5b04dbc --- /dev/null +++ b/test/encode/good9.bminor @@ -0,0 +1 @@ +" " \ No newline at end of file