diff --git a/pdf2dataset/extract_task.py b/pdf2dataset/extract_task.py
index dd425e6..6f190d5 100644
--- a/pdf2dataset/extract_task.py
+++ b/pdf2dataset/extract_task.py
@@ -20,12 +20,13 @@ def decorator(feature_method):
         feature_method.is_feature = True
         feature_method.is_helper = is_helper
 
-        type_ = getattr(pa, pyarrow_type)(**type_args)
+        if pyarrow_type is not None:
+            type_ = getattr(pa, pyarrow_type)(**type_args)
 
-        if isinstance(type_, pa.DataType):
-            feature_method.pyarrow_type = type_
-        else:
-            raise ValueError(f'Invalid PyArrow type {pyarrow_type}!')
+            if isinstance(type_, pa.DataType):
+                feature_method.pyarrow_type = type_
+            else:
+                raise ValueError(f'Invalid PyArrow type {pyarrow_type}!')
 
         @wraps(feature_method)
         def inner(*args, **kwargs):
@@ -45,13 +46,9 @@ def inner(*args, **kwargs):
 # TODO: Eventually, I'll make this a new lib
 class ExtractTask(ABC):
 
-    fixed_featues = ('path')
+    fixed_featues = ('path',)
     _feature_prefix = 'get_'  # Optional
 
-    # Memoization
-    _helper_list = None
-    _features_list = {}
-
     def __init__(self, path, file_bin=None, sel_features='all'):
         self.path = path
         self.file_bin = file_bin
@@ -62,6 +59,11 @@ def __init__(self, path, file_bin=None, sel_features='all'):
 
         self._init_all_features()
 
+    def __init_subclass__(cls, **kwargs):
+        # Memoization
+        cls._helper_list = None
+        cls._features_list = {}
+
     @classmethod
     def list_helper_features(cls):
         if cls._helper_list is not None:
diff --git a/pyproject.toml b/pyproject.toml
index 23f9492..e28b473 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "pdf2dataset"
-version = "0.5.2"
+version = "0.5.3"
 readme = "README.md"
 description = "Easily convert a subdirectory with big volume of PDF documents into a dataset, supports extracting text and images"
 authors = ["Ícaro Pires <icaropsa@gmail.com>"]
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 0000000..0ec440e
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,47 @@
+import pytest
+from pathlib import Path
+import pandas as pd
+
+
+SAMPLES_DIR = Path('tests/samples')
+SAMPLE_IMAGE = SAMPLES_DIR / 'single_page1_1.jpeg'
+PARQUET_ENGINE = 'pyarrow'
+
+
+@pytest.fixture
+def complete_df():
+
+    def read_image(path, page):
+        if page == -1:
+            return None
+
+        path = Path(path).with_suffix('')
+        image_name = f'{path}_{page}.jpeg'
+        image_path = Path(SAMPLES_DIR) / image_name
+
+        with open(image_path, 'rb') as f:
+            image_bin = f.read()
+
+        return image_bin
+
+    rows = [
+        ['path', 'page', 'text', 'error_bool'],
+
+        ['multi_page1.pdf', 1, 'First page', False],
+        ['multi_page1.pdf', 2, 'Second page', False],
+        ['multi_page1.pdf', 3, 'Third page', False],
+        ['sub1/copy_multi_page1.pdf', 1, 'First page', False],
+        ['sub1/copy_multi_page1.pdf', 2, 'Second page', False],
+        ['sub1/copy_multi_page1.pdf', 3, 'Third page', False],
+        ['single_page1.pdf', 1, 'My beautiful sample!', False],
+        ['sub2/copy_single_page1.pdf', 1, 'My beautiful sample!', False],
+        ['invalid1.pdf', -1, None, True]
+    ]
+
+    names = rows.pop(0)
+    expected_dict = {n: r for n, r in zip(names, zip(*rows))}
+
+    df = pd.DataFrame(expected_dict)
+    df['image'] = df.apply(lambda row: read_image(row.path, row.page), axis=1)
+
+    return df
diff --git a/tests/test_extract_task.py b/tests/test_extract_task.py
new file mode 100644
index 0000000..5a6e16c
--- /dev/null
+++ b/tests/test_extract_task.py
@@ -0,0 +1,105 @@
+import pytest
+from pathlib import Path
+
+import pyarrow as pa
+import numpy as np
+from PIL import Image
+from pdf2dataset import (
+    PdfExtractTask,
+    extract,
+    feature,
+    image_to_bytes,
+    image_from_bytes,
+)
+
+from .conftest import SAMPLES_DIR, SAMPLE_IMAGE
+
+
+class MyCustomTask(PdfExtractTask):
+
+    @feature('bool_')
+    def get_is_page_even(self):
+        return self.page % 2 == 0
+
+    @feature(is_helper=True)
+    def get_doc_first_bytes(self):
+        return self.file_bin[:10]
+
+    @feature('list_', value_type=pa.string())
+    def get_list(self):
+        return ['E0', 'E1', 'My super string!']
+
+    @feature('string', exceptions=[ValueError])
+    def get_wrong(self):
+        raise ValueError("There was a problem!")
+
+
+@pytest.fixture
+def image():
+    return Image.open(SAMPLE_IMAGE)
+
+
+@pytest.fixture
+def image_bytes():
+    with open(SAMPLE_IMAGE, 'rb') as f:
+        bytes_ = f.read()
+
+    return bytes_
+
+
+def test_imagefrombytes(image, image_bytes):
+
+    assert image_from_bytes(image_bytes) == image
+
+
+def test_imagetobytes(image, image_bytes):
+    # png because jpeg change pixel values
+    calculated = image_from_bytes(image_to_bytes(image, 'png'))
+
+    assert (np.array(calculated) == np.array(image)).all()
+
+
+def test_list_features():
+    inherited_features = PdfExtractTask.list_features()
+    custom_features = MyCustomTask.list_features()
+
+    # 3 because I've defined this number of (not helpers) custom features
+    expected_num_features = len(inherited_features) + 3
+    assert expected_num_features == len(custom_features)
+
+    assert set(inherited_features) < set(custom_features)
+
+    assert set(['is_page_even', 'wrong', 'list']) < set(custom_features)
+
+
+def test_list_helper_features():
+    inherited_features = PdfExtractTask.list_helper_features()
+    custom_features = MyCustomTask.list_helper_features()
+
+    # 1 because I've defined one helpers custom feature
+    expected_num_features = len(inherited_features) + 1
+    assert expected_num_features == len(custom_features)
+
+    assert set(inherited_features) < set(custom_features)
+
+    assert set(['doc_first_bytes']) < set(custom_features)
+
+
+def test_saving_to_disk(tmp_path):
+    out_file = tmp_path / 'my_df.parquet.gzip'
+    extract(SAMPLES_DIR, out_file, task_class=MyCustomTask)
+
+    assert Path(out_file).exists()
+
+
+def test_columns_present():
+    df = extract('tests/samples', small=True, task_class=MyCustomTask)
+    assert set(MyCustomTask.list_features()) < set(df.columns)
+
+
+def test_error_recorded():
+    df = extract('tests/samples', small=True, task_class=MyCustomTask)
+    error_feature, error_msg = 'wrong', 'There was a problem'
+
+    assert error_msg in df.iloc[0].error
+    assert f'{error_feature}:' in df.iloc[0].error
diff --git a/tests/test_extraction.py b/tests/test_extraction.py
index 72c37ca..7732f6e 100644
--- a/tests/test_extraction.py
+++ b/tests/test_extraction.py
@@ -4,76 +4,17 @@
 
 import pytest
 import pandas as pd
-import numpy as np
 from PIL import Image
 
 from pdf2dataset import (
     ExtractionFromMemory,
     PdfExtractTask,
     extract,
-    extract_text,
-    image_to_bytes,
-    image_from_bytes,
+    extract_text
 )
 
 from .testing_dataframe import check_and_compare
-
-
-SAMPLES_DIR = Path('tests/samples')
-TEST_IMAGE = SAMPLES_DIR / 'single_page1_1.jpeg'
-PARQUET_ENGINE = 'pyarrow'
-
-
-@pytest.fixture
-def expected_all():
-
-    def read_image(path, page):
-        if page == -1:
-            return None
-
-        path = Path(path).with_suffix('')
-        image_name = f'{path}_{page}.jpeg'
-        image_path = Path(SAMPLES_DIR) / image_name
-
-        with open(image_path, 'rb') as f:
-            image_bin = f.read()
-
-        return image_bin
-
-    rows = [
-        ['path', 'page', 'text', 'error_bool'],
-
-        ['multi_page1.pdf', 1, 'First page', False],
-        ['multi_page1.pdf', 2, 'Second page', False],
-        ['multi_page1.pdf', 3, 'Third page', False],
-        ['sub1/copy_multi_page1.pdf', 1, 'First page', False],
-        ['sub1/copy_multi_page1.pdf', 2, 'Second page', False],
-        ['sub1/copy_multi_page1.pdf', 3, 'Third page', False],
-        ['single_page1.pdf', 1, 'My beautiful sample!', False],
-        ['sub2/copy_single_page1.pdf', 1, 'My beautiful sample!', False],
-        ['invalid1.pdf', -1, None, True]
-    ]
-
-    names = rows.pop(0)
-    expected_dict = {n: r for n, r in zip(names, zip(*rows))}
-
-    df = pd.DataFrame(expected_dict)
-    df['image'] = df.apply(lambda row: read_image(row.path, row.page), axis=1)
-
-    return df
-
-
-@pytest.fixture
-def image():
-    return Image.open(TEST_IMAGE)
-
-
-@pytest.fixture
-def image_bytes():
-    with open(TEST_IMAGE, 'rb') as f:
-        bytes_ = f.read()
-
-    return bytes_
+from .conftest import SAMPLES_DIR, PARQUET_ENGINE
 
 
 class TestExtractionCore:
@@ -82,7 +23,7 @@ class TestExtractionCore:
         True,
         False,
     ))
-    def test_extraction_big(self, tmp_path, is_ocr, expected_all):
+    def test_extraction_big(self, tmp_path, is_ocr, complete_df):
         result_path = tmp_path / 'result.parquet.gzip'
 
         extract(SAMPLES_DIR, result_path,
@@ -93,9 +34,9 @@ def test_extraction_big(self, tmp_path, is_ocr, expected_all):
         if is_ocr:
             df['text'] = df['text'].str.strip()
 
-        check_and_compare(df, expected_all, is_ocr=is_ocr)
+        check_and_compare(df, complete_df, is_ocr=is_ocr)
 
-    def test_append_result(self, tmp_path, expected_all):
+    def test_append_result(self, tmp_path, complete_df):
         result_path = tmp_path / 'result.parquet.gzip'
 
         extract(SAMPLES_DIR, result_path, saving_interval=1, features='all')
@@ -103,9 +44,9 @@ def test_append_result(self, tmp_path, expected_all):
         # Small 'chunk_df_size' to append to result multiple times
         df = pd.read_parquet(result_path, engine=PARQUET_ENGINE)
 
-        check_and_compare(df, expected_all)
+        check_and_compare(df, complete_df)
 
-    def test_passing_paths_list(self, tmp_path, expected_all):
+    def test_passing_paths_list(self, tmp_path, complete_df):
         result_path = tmp_path / 'result.parquet.gzip'
         files_list = Path(SAMPLES_DIR).rglob('*.pdf')
 
@@ -114,11 +55,11 @@ def test_passing_paths_list(self, tmp_path, expected_all):
 
         df = extract(files_list, result_path, small=True)
 
-        # Paths will be relative to pwd, so adapting expected_all
-        expected_all['path'] = expected_all['path'].apply(
+        # Paths will be relative to pwd, so adapting complete_df
+        complete_df['path'] = complete_df['path'].apply(
             lambda p: str(SAMPLES_DIR / p)
         )
-        check_and_compare(df, expected_all)
+        check_and_compare(df, complete_df)
 
     def test_filter_processed(self, tmp_path):
         with open(SAMPLES_DIR / 'single_page1.pdf', 'rb') as f:
@@ -166,13 +107,13 @@ class TestExtractionSmall:
         True,
         False,
     ))
-    def test_extraction_small(self, is_ocr, expected_all):
+    def test_extraction_small(self, is_ocr, complete_df):
         df = extract(SAMPLES_DIR, small=True, ocr_lang='eng', ocr=is_ocr)
 
         if is_ocr:
             df['text'] = df['text'].str.strip()
 
-        check_and_compare(df, expected_all, is_ocr=is_ocr)
+        check_and_compare(df, complete_df, is_ocr=is_ocr)
 
     def test_return_list(self):
         def sort(doc):
@@ -226,30 +167,30 @@ def hash_images(doc):
 
 
 class TestParams:
-    def test_features_as_list(self, expected_all):
+    def test_features_as_list(self, complete_df):
         df = extract(SAMPLES_DIR, small=True, features=['text', 'image'])
-        check_and_compare(df, expected_all)
+        check_and_compare(df, complete_df)
 
     @pytest.mark.parametrize('excluded', [
         'text',
         'image',
     ])
-    def test_exclude_feature(self, excluded, expected_all):
+    def test_exclude_feature(self, excluded, complete_df):
         features = PdfExtractTask.list_features()
         features.remove(excluded)
 
         df = extract(SAMPLES_DIR, small=True, features=features)
 
-        columns = list(expected_all.columns)
+        columns = list(complete_df.columns)
         columns.remove(excluded)
 
-        check_and_compare(df, expected_all[columns])
+        check_and_compare(df, complete_df[columns])
 
-    def test_empty_feature(self, expected_all):
+    def test_empty_feature(self, complete_df):
         df = extract(SAMPLES_DIR, small=True, features='')
 
         columns = list(PdfExtractTask.fixed_featues) + ['error_bool']
-        check_and_compare(df, expected_all[columns])
+        check_and_compare(df, complete_df[columns])
 
     @pytest.mark.parametrize('size', (
         ('10x10'),
@@ -285,7 +226,7 @@ def test_image_format(self, format_):
         (200, True),
         (2000, False),
     ))
-    def test_low_ocr_image(self, expected_all, ocr_image_size, is_low):
+    def test_low_ocr_image(self, complete_df, ocr_image_size, is_low):
         df = extract_text(
             SAMPLES_DIR, small=True, ocr=True,
             ocr_image_size=ocr_image_size, ocr_lang='eng'
@@ -294,7 +235,7 @@ def test_low_ocr_image(self, expected_all, ocr_image_size, is_low):
         df = df.dropna(subset=['text'])
         serie = df.iloc[0]
 
-        expected = expected_all.dropna(subset=['text'])
+        expected = complete_df.dropna(subset=['text'])
         expected = expected[(expected.path == serie.path)
                             & (expected.page == serie.page)]
 
@@ -304,51 +245,3 @@ def test_low_ocr_image(self, expected_all, ocr_image_size, is_low):
             assert serie.text.strip() != expected_serie.text.strip()
         else:
             assert serie.text.strip() == expected_serie.text.strip()
-
-    def test_imagefrombytes(self, image, image_bytes):
-
-        assert image_from_bytes(image_bytes) == image
-
-    def test_imagetobytes(self, image, image_bytes):
-        # png because jpeg change pixel values
-        calculated = image_from_bytes(image_to_bytes(image, 'png'))
-
-        assert (np.array(calculated) == np.array(image)).all()
-
-
-class TestExtractionFromMemory:
-
-    @pytest.mark.parametrize('small', (
-        True,
-        False,
-    ))
-    def test_passing_tasks(self, tmp_path, small):
-        with open(SAMPLES_DIR / 'single_page1.pdf', 'rb') as f:
-            pdf1_bin = f.read()
-
-        with open(SAMPLES_DIR / 'multi_page1.pdf', 'rb') as f:
-            pdf2_bin = f.read()
-
-        tasks = [
-            ('doc1.pdf', pdf1_bin),  # All pages
-            ('2.pdf', pdf2_bin, 2),  # Just page 2
-            ('pdf2.pdf', pdf2_bin, 3),  # Just page 3
-        ]
-
-        expected_dict = {
-            'path': ['pdf2.pdf', '2.pdf', 'doc1.pdf'],
-            'page': [3, 2, 1],
-            'text': ['Third page', 'Second page', 'My beautiful sample!'],
-            'error': [None, None, None],
-        }
-        expected = pd.DataFrame(expected_dict)
-
-        if small:
-            df = extract_text(tasks=tasks, small=small)
-        else:
-            result_path = tmp_path / 'result.parquet.gzip'
-            extract_text(tasks, result_path)
-
-            df = pd.read_parquet(result_path, engine=PARQUET_ENGINE)
-
-        check_and_compare(df, expected, list(expected.columns))
diff --git a/tests/test_extraction_memory.py b/tests/test_extraction_memory.py
new file mode 100644
index 0000000..c18130c
--- /dev/null
+++ b/tests/test_extraction_memory.py
@@ -0,0 +1,42 @@
+import pytest
+import pandas as pd
+from pdf2dataset import extract_text
+
+from .testing_dataframe import check_and_compare
+from .conftest import SAMPLES_DIR, PARQUET_ENGINE
+
+
+@pytest.mark.parametrize('small', (
+    True,
+    False,
+))
+def test_passing_tasks(tmp_path, small):
+    with open(SAMPLES_DIR / 'single_page1.pdf', 'rb') as f:
+        pdf1_bin = f.read()
+
+    with open(SAMPLES_DIR / 'multi_page1.pdf', 'rb') as f:
+        pdf2_bin = f.read()
+
+    tasks = [
+        ('doc1.pdf', pdf1_bin),  # All pages
+        ('2.pdf', pdf2_bin, 2),  # Just page 2
+        ('pdf2.pdf', pdf2_bin, 3),  # Just page 3
+    ]
+
+    expected_dict = {
+        'path': ['pdf2.pdf', '2.pdf', 'doc1.pdf'],
+        'page': [3, 2, 1],
+        'text': ['Third page', 'Second page', 'My beautiful sample!'],
+        'error': [None, None, None],
+    }
+    expected = pd.DataFrame(expected_dict)
+
+    if small:
+        df = extract_text(tasks=tasks, small=small)
+    else:
+        result_path = tmp_path / 'result.parquet.gzip'
+        extract_text(tasks, result_path)
+
+        df = pd.read_parquet(result_path, engine=PARQUET_ENGINE)
+
+    check_and_compare(df, expected, list(expected.columns))