diff --git a/.gitignore b/.gitignore
index 1777e86..53d473c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -14,7 +14,7 @@ dist/
 downloads/
 eggs/
 .eggs/
-lib/
+# lib/
 lib64/
 parts/
 sdist/
diff --git a/README.md b/README.md
index 3bee2e0..f6e0292 100644
--- a/README.md
+++ b/README.md
@@ -60,9 +60,9 @@ Don't forget to update the path to the tokenizer in the config file and set `num
 The model consist of a ViT [[1](#References)] encoder with a ResNet backbone and a Transformer [[2](#References)] decoder.
 
 ### Performance
-| BLEU score | normed edit distance |
-| ---------- | -------------------- |
-| 0.88       | 0.10                 |
+| BLEU score | normed edit distance | token accuracy |
+| ---------- | -------------------- | -------------- |
+| 0.88       | 0.10                 | 0.60           |
 
 ## Data
 We need paired data for the network to learn. Luckily there is a lot of LaTeX code on the internet, e.g. [wikipedia](https://www.wikipedia.org), [arXiv](https://www.arxiv.org). We also use the formulae from the [im2latex-100k](https://zenodo.org/record/56198#.V2px0jXT6eA) [[3](#References)] dataset.
diff --git a/pix2tex/__init__.py b/pix2tex/__init__.py
index e69de29..7c990f2 100644
--- a/pix2tex/__init__.py
+++ b/pix2tex/__init__.py
@@ -0,0 +1,2 @@
+import os
+os.environ['FOR_DISABLE_CONSOLE_CTRL_HANDLER'] = '1'
diff --git a/pix2tex/dataset/__init__.py b/pix2tex/dataset/__init__.py
index a7ec24a..e69de29 100644
--- a/pix2tex/dataset/__init__.py
+++ b/pix2tex/dataset/__init__.py
@@ -1,6 +0,0 @@
-import pix2tex.dataset.arxiv
-import pix2tex.dataset.extract_latex
-import pix2tex.dataset.latex2png
-import pix2tex.dataset.render
-import pix2tex.dataset.scraping
-import pix2tex.dataset.dataset
diff --git a/pix2tex/dataset/arxiv.py b/pix2tex/dataset/arxiv.py
index 01c98af..5351042 100644
--- a/pix2tex/dataset/arxiv.py
+++ b/pix2tex/dataset/arxiv.py
@@ -1,7 +1,7 @@
 # modified from https://github.com/soskek/arxiv_leaks
 
 import argparse
-import json
+import subprocess
 import os
 import glob
 import re
@@ -10,7 +10,6 @@
 import logging
 import tarfile
 import tempfile
-import chardet
 import logging
 import requests
 import urllib.request
@@ -22,7 +21,7 @@
 
 # logging.getLogger().setLevel(logging.INFO)
 arxiv_id = re.compile(r'(?<!\d)(\d{4}\.\d{5})(?!\d)')
-arxiv_base = 'https://arxiv.org/e-print/'
+arxiv_base = 'https://export.arxiv.org/e-print/'
 
 
 def get_all_arxiv_ids(text):
@@ -48,7 +47,7 @@ def download(url, dir_path='./'):
         return 0
 
 
-def read_tex_files(file_path):
+def read_tex_files(file_path, demacro=False):
     tex = ''
     try:
         with tempfile.TemporaryDirectory() as tempdir:
@@ -59,17 +58,22 @@ def read_tex_files(file_path):
                 texfiles = [os.path.abspath(x) for x in glob.glob(os.path.join(tempdir, '**', '*.tex'), recursive=True)]
             except tarfile.ReadError as e:
                 texfiles = [file_path]  # [os.path.join(tempdir, file_path+'.tex')]
+            if demacro:
+                ret = subprocess.run(['de-macro', *texfiles], cwd=tempdir, capture_output=True)
+                if ret.returncode == 0:
+                    texfiles = glob.glob(os.path.join(tempdir, '**', '*-clean.tex'), recursive=True)
             for texfile in texfiles:
                 try:
-                    tex += open(texfile, 'r', encoding=chardet.detect(open(texfile, 'br').readline())['encoding']).read()
-                except UnicodeDecodeError:
+                    ct = open(texfile, 'r', encoding='utf-8').read()
+                    tex += ct
+                except UnicodeDecodeError as e:
+                    logging.debug(e)
                     pass
-            tex = unfold(convert(tex))
     except Exception as e:
         logging.debug('Could not read %s: %s' % (file_path, str(e)))
-        pass
-    # remove comments
-    return re.sub(r'(?<!\\)%.*\n', '', tex)
+        raise e
+    tex = pydemacro(tex)
+    return tex
 
 
 def download_paper(arxiv_id, dir_path='./'):
@@ -77,20 +81,21 @@ def download_paper(arxiv_id, dir_path='./'):
     return download(url, dir_path)
 
 
-def read_paper(targz_path, delete=True):
+def read_paper(targz_path, delete=False, demacro=False):
     paper = ''
     if targz_path != 0:
-        paper = read_tex_files(targz_path)
+        paper = read_tex_files(targz_path, demacro=demacro)
         if delete:
             os.remove(targz_path)
     return paper
 
 
-def parse_arxiv(id):
-    tempdir = tempfile.gettempdir()
-    text = read_paper(download_paper(id, tempdir))
-    #print(text, file=open('paper.tex', 'w'))
-    #linked = list(set([l for l in re.findall(arxiv_id, text)]))
+def parse_arxiv(id, save=None, demacro=True):
+    if save is None:
+        dir = tempfile.gettempdir()
+    else:
+        dir = save
+    text = read_paper(download_paper(id, dir), delete=save is None, demacro=demacro)
 
     return find_math(text, wiki=False), []
 
@@ -98,11 +103,14 @@ def parse_arxiv(id):
 if __name__ == '__main__':
     # logging.getLogger().setLevel(logging.DEBUG)
     parser = argparse.ArgumentParser(description='Extract math from arxiv')
-    parser.add_argument('-m', '--mode', default='top100', choices=['top100', 'ids', 'dir'],
-                        help='Where to extract code from. top100: current 100 arxiv papers, id: specific arxiv ids. \
-                              Usage: `python arxiv.py -m id id001 id002`, dir: a folder full of .tar.gz files. Usage: `python arxiv.py -m dir directory`')
+    parser.add_argument('-m', '--mode', default='top100', choices=['top', 'ids', 'dirs'],
+                        help='Where to extract code from. top: current 100 arxiv papers (-m top int for any other number of papers), id: specific arxiv ids. \
+                              Usage: `python arxiv.py -m id id001 id002`, dirs: a folder full of .tar.gz files. Usage: `python arxiv.py -m dir directory`')
     parser.add_argument(nargs='*', dest='args', default=[])
     parser.add_argument('-o', '--out', default=os.path.join(os.path.dirname(os.path.realpath(__file__)), 'data'), help='output directory')
+    parser.add_argument('-d', '--demacro', dest='demacro', action='store_true',
+                        help='Deprecated - Use de-macro (Slows down extraction, may but improves quality). Install https://www.ctan.org/pkg/de-macro')
+    parser.add_argument('-s', '--save', default=None, type=str, help='When downloading files from arxiv. Where to save the .tar.gz files. Default: Only temporary')
     args = parser.parse_args()
     if '.' in args.out:
         args.out = os.path.dirname(args.out)
@@ -111,30 +119,47 @@ def parse_arxiv(id):
         skip = open(skips, 'r', encoding='utf-8').read().split('\n')
     else:
         skip = []
-    if args.mode == 'ids':
-        visited, math = recursive_search(parse_arxiv, args.args, skip=skip, unit='paper')
-    elif args.mode == 'top100':
-        url = 'https://arxiv.org/list/physics/pastweek?skip=0&show=100' #'https://arxiv.org/list/hep-th/2203?skip=0&show=100'
-        ids = get_all_arxiv_ids(requests.get(url).text)
-        math, visited = [], ids
-        for id in tqdm(ids):
-            m, _ = parse_arxiv(id)
-            math.extend(m)
-    elif args.mode == 'dir':
-        dirs = os.listdir(args.args[0])
-        math, visited = [], []
-        for f in tqdm(dirs):
-            try:
-                text = read_paper(os.path.join(args.args[0], f), False)
-                math.extend(find_math(text, wiki=False))
-                visited.append(os.path.basename(f)) 
-            except Exception as e:
-                logging.debug(e)
-                pass
-    else:
-        raise NotImplementedError
-    print('\n'.join(math))
-    sys.exit(0)
+    if args.save is not None:
+        os.makedirs(args.save, exist_ok=True)
+    try:
+        if args.mode == 'ids':
+            visited, math = recursive_search(parse_arxiv, args.args, skip=skip, unit='paper', save=args.save, demacro=args.demacro)
+        elif args.mode == 'top':
+            num = 100 if len(args.args) == 0 else int(args.args[0])
+            url = 'https://arxiv.org/list/physics/pastweek?skip=0&show=%i' % num  # 'https://arxiv.org/list/hep-th/2203?skip=0&show=100'
+            ids = get_all_arxiv_ids(requests.get(url).text)
+            math, visited = [], ids
+            for id in tqdm(ids):
+                try:
+                    m, _ = parse_arxiv(id, save=args.save, demacro=args.demacro)
+                    math.extend(m)
+                except ValueError:
+                    pass
+        elif args.mode == 'dirs':
+            files = []
+            for folder in args.args:
+                files.extend([os.path.join(folder, p) for p in os.listdir(folder)])
+            math, visited = [], []
+            for f in tqdm(files):
+                try:
+                    text = read_paper(f, delete=False, demacro=args.demacro)
+                    math.extend(find_math(text, wiki=False))
+                    visited.append(os.path.basename(f))
+                except DemacroError as e:
+                    logging.debug(f + str(e))
+                    pass
+                except KeyboardInterrupt:
+                    break
+                except Exception as e:
+                    logging.debug(e)
+                    raise e
+        else:
+            raise NotImplementedError
+    except KeyboardInterrupt:
+        pass
+    print('Found %i instances of math latex code' % len(math))
+    # print('\n'.join(math))
+    # sys.exit(0)
     for l, name in zip([visited, math], ['visited_arxiv.txt', 'math_arxiv.txt']):
         f = os.path.join(args.out, name)
         if not os.path.exists(f):
diff --git a/pix2tex/dataset/dataset.py b/pix2tex/dataset/dataset.py
index 5ea8dd6..d38ff34 100644
--- a/pix2tex/dataset/dataset.py
+++ b/pix2tex/dataset/dataset.py
@@ -254,7 +254,7 @@ def generate_tokenizer(equations, output, vocab_size):
     tokenizer = Tokenizer(BPE())
     tokenizer.pre_tokenizer = pre_tokenizers.ByteLevel(add_prefix_space=False)
     trainer = BpeTrainer(special_tokens=["[PAD]", "[BOS]", "[EOS]"], vocab_size=vocab_size, show_progress=True)
-    tokenizer.train(trainer, equations)
+    tokenizer.train(equations, trainer)
     tokenizer.save(path=output, pretty=False)
 
 
diff --git a/pix2tex/dataset/demacro.py b/pix2tex/dataset/demacro.py
index ffb8cde..2350eed 100644
--- a/pix2tex/dataset/demacro.py
+++ b/pix2tex/dataset/demacro.py
@@ -2,14 +2,20 @@
 
 import argparse
 import re
+import logging
+from collections import Counter
+import time
 from pix2tex.dataset.extract_latex import remove_labels
 
 
+class DemacroError(Exception):
+    pass
+
+
 def main():
     args = parse_command_line()
     data = read(args.input)
-    data = convert(data)
-    data = unfold(data)
+    data = pydemacro(data)
     if args.output is not None:
         write(args.output, data)
     else:
@@ -28,16 +34,6 @@ def read(path):
         return handle.read()
 
 
-def convert(data):
-    return re.sub(
-        r'((?:\\(?:expandafter|global|long|outer|protected)'
-        r'(?: +|\r?\n *)?)*)?'
-        r'\\def *(\\[a-zA-Z]+) *(?:#+([0-9]))*\{',
-        replace,
-        data,
-    )
-
-
 def bracket_replace(string: str) -> str:
     '''
     replaces all layered brackets with special symbols
@@ -66,7 +62,9 @@ def sweep(t, cmds):
         nargs = int(c[1][1]) if c[1] != r'' else 0
         optional = c[2] != r''
         if nargs == 0:
-            t = re.sub(r'\\%s([\W_^\d])' % c[0], r'%s\1' % c[-1].replace('\\', r'\\'), t)
+            num_matches += len(re.findall(r'\\%s([\W_^\dĊ])' % c[0], t))
+            if num_matches > 0:
+                t = re.sub(r'\\%s([\W_^\dĊ])' % c[0], r'%s\1' % c[-1].replace('\\', r'\\'), t)
         else:
             matches = re.findall(r'(\\%s(?:\[(.+?)\])?' % c[0]+r'{(.+?)}'*(nargs-(1 if optional else 0))+r')', t)
             num_matches += len(matches)
@@ -81,18 +79,49 @@ def sweep(t, cmds):
 
 
 def unfold(t):
-    t = remove_labels(t).replace('\n', 'Ċ')
-
-    cmds = re.findall(r'\\(?:re)?newcommand\*?{\\(.+?)}\s*(\[\d\])?(\[.+?\])?{(.+?)}Ċ', t)
+    #t = queue.get()
+    t = t.replace('\n', 'Ċ')
+    t = bracket_replace(t)
+    commands_pattern = r'\\(?:re)?newcommand\*?{\\(.+?)}[\sĊ]*(\[\d\])?[\sĊ]*(\[.+?\])?[\sĊ]*{(.*?)}\s*(?:Ċ|\\)'
+    cmds = re.findall(commands_pattern, t)
+    t = re.sub(r'(?<!\\)'+commands_pattern, 'Ċ', t)
     cmds = sorted(cmds, key=lambda x: len(x[0]))
-    for _ in range(10):
-        # check for up to 10 nested commands
-        t = bracket_replace(t)
-        t, N = sweep(t, cmds)
-        t = undo_bracket_replace(t)
-        if N == 0:
-            break
-    return t.replace('Ċ', '\n')
+    cmd_names = Counter([c[0] for c in cmds])
+    for i in reversed(range(len(cmds))):
+        if cmd_names[cmds[i][0]] > 1:
+            # something went wrong here. No multiple definitions allowed
+            del cmds[i]
+        elif '\\newcommand' in cmds[i][-1]:
+            logging.debug("Command recognition pattern didn't work properly. %s" % (undo_bracket_replace(cmds[i][-1])))
+            del cmds[i]
+    start = time.time()
+    try:
+        for i in range(10):
+            # check for up to 10 nested commands
+            if i > 0:
+                t = bracket_replace(t)
+            t, N = sweep(t, cmds)
+            if time.time()-start > 5: # not optimal. more sophisticated methods didnt work or are slow
+                raise TimeoutError
+            t = undo_bracket_replace(t)
+            if N == 0 or i == 9:
+                #print("Needed %i iterations to demacro" % (i+1))
+                break
+            elif N > 4000:
+                raise ValueError("Too many matches. Processing would take too long.")
+    except ValueError:
+        pass
+    except TimeoutError:
+        pass
+    except re.error as e:
+        raise DemacroError(e)
+    t = remove_labels(t.replace('Ċ', '\n'))
+    # queue.put(t)
+    return t
+
+
+def pydemacro(t):
+    return unfold(convert(re.sub('\n+', '\n', re.sub(r'(?<!\\)%.*\n', '\n', t))))
 
 
 def replace(match):
@@ -120,6 +149,15 @@ def replace(match):
     return result
 
 
+def convert(data):
+    data = re.sub(
+        r'((?:\\(?:expandafter|global|long|outer|protected)(?:\s+|\r?\n\s*)?)*)?\\def\s*(\\[a-zA-Z]+)\s*(?:#+([0-9]))*\{',
+        replace,
+        data,
+    )
+    return re.sub(r'\\let\s*(\\[a-zA-Z]+)\s*=?\s*(\\?\w+)*', r'\\newcommand*{\1}{\2}\n', data)
+
+
 def write(path, data):
     with open(path, mode='w') as handle:
         handle.write(data)
diff --git a/pix2tex/dataset/extract_latex.py b/pix2tex/dataset/extract_latex.py
index 2c80797..5e3f2cc 100644
--- a/pix2tex/dataset/extract_latex.py
+++ b/pix2tex/dataset/extract_latex.py
@@ -10,7 +10,7 @@
 displaymath = re.compile(r'(\\displaystyle)(.{%i,%i}?)(\}(?:<|"))' % (1, MAX_CHARS))
 outer_whitespace = re.compile(
     r'^\\,|\\,$|^~|~$|^\\ |\\ $|^\\thinspace|\\thinspace$|^\\!|\\!$|^\\:|\\:$|^\\;|\\;$|^\\enspace|\\enspace$|^\\quad|\\quad$|^\\qquad|\\qquad$|^\\hspace{[a-zA-Z0-9]+}|\\hspace{[a-zA-Z0-9]+}$|^\\hfill|\\hfill$')
-label_names = [re.compile(r'\\%s\s?\{(.*?)\}' % s) for s in ['ref', 'cite', 'label', 'caption', 'eqref']]
+label_names = [re.compile(r'\\%s\s?\{(.*?)\}' % s) for s in ['ref', 'cite', 'label', 'eqref']]
 
 def check_brackets(s):
     a = []
diff --git a/pix2tex/dataset/preprocessing/preprocess_latex.js b/pix2tex/dataset/preprocessing/preprocess_latex.js
index 0af2f94..224e462 100644
--- a/pix2tex/dataset/preprocessing/preprocess_latex.js
+++ b/pix2tex/dataset/preprocessing/preprocess_latex.js
@@ -207,7 +207,9 @@ groupTypes.array = function(group, options) {
 groupTypes.sqrt = function(group, options) {
     var node;
     if (group.value.index) {
-        norm_str = norm_str + "\\sqrt [ " + group.value.index + " ] ";
+        norm_str = norm_str + "\\sqrt [ ";
+        buildExpression(group.value.index.value, options);
+        norm_str = norm_str + "] ";
         buildGroup(group.value.body, options);
     } else {
         norm_str = norm_str + "\\sqrt ";
diff --git a/pix2tex/dataset/preprocessing/third_party/match-at/lib/matchAt.js b/pix2tex/dataset/preprocessing/third_party/match-at/lib/matchAt.js
new file mode 100644
index 0000000..6e6b57e
--- /dev/null
+++ b/pix2tex/dataset/preprocessing/third_party/match-at/lib/matchAt.js
@@ -0,0 +1,42 @@
+/** @flow */
+
+"use strict";
+
+function getRelocatable(re) {
+  // In the future, this could use a WeakMap instead of an expando.
+  if (!re.__matchAtRelocatable) {
+    // Disjunctions are the lowest-precedence operator, so we can make any
+    // pattern match the empty string by appending `|()` to it:
+    // https://people.mozilla.org/~jorendorff/es6-draft.html#sec-patterns
+    var source = re.source + "|()";
+
+    // We always make the new regex global.
+    var flags = "g" + (re.ignoreCase ? "i" : "") + (re.multiline ? "m" : "") + (re.unicode ? "u" : "")
+    // sticky (/.../y) doesn't make sense in conjunction with our relocation
+    // logic, so we ignore it here.
+    ;
+
+    re.__matchAtRelocatable = new RegExp(source, flags);
+  }
+  return re.__matchAtRelocatable;
+}
+
+function matchAt(re, str, pos) {
+  if (re.global || re.sticky) {
+    throw new Error("matchAt(...): Only non-global regexes are supported");
+  }
+  var reloc = getRelocatable(re);
+  reloc.lastIndex = pos;
+  var match = reloc.exec(str);
+  // Last capturing group is our sentinel that indicates whether the regex
+  // matched at the given location.
+  if (match[match.length - 1] == null) {
+    // Original regex matched.
+    match.length = match.length - 1;
+    return match;
+  } else {
+    return null;
+  }
+}
+
+module.exports = matchAt;
\ No newline at end of file
diff --git a/pix2tex/dataset/scraping.py b/pix2tex/dataset/scraping.py
index cc4644e..d1412e7 100644
--- a/pix2tex/dataset/scraping.py
+++ b/pix2tex/dataset/scraping.py
@@ -27,7 +27,7 @@ def parse_wiki(url):
 
 
 # recursive search
-def recursive_search(parser,  seeds, depth=2, skip=[], unit='links', base_url=None):
+def recursive_search(parser,  seeds, depth=2, skip=[], unit='links', base_url=None, **kwargs):
     visited, links = set(skip), set(seeds)
     math = []
     try:
@@ -39,9 +39,9 @@ def recursive_search(parser,  seeds, depth=2, skip=[], unit='links', base_url=No
                 if not link in visited:
                     t_bar.set_description('searching %s' % (link))
                     if base_url:
-                        m, l = parser(base_url+link)
+                        m, l = parser(base_url+link, **kwargs)
                     else:
-                        m, l = parser(link)
+                        m, l = parser(link, **kwargs)
                     # check if we got any math from this wiki page and
                     # if not terminate the tree
                     if len(m) > 0:
@@ -72,9 +72,12 @@ def recursive_wiki(seeds, depth=4, skip=[]):
         url = [sys.argv[1]]
     else:
         url = ['https://en.wikipedia.org/wiki/Mathematics', 'https://en.wikipedia.org/wiki/Physics']
-    visited, math = recursive_wiki(url)
+    try:
+        visited, math = recursive_wiki(url)
+    except KeyboardInterrupt:
+        pass
     for l, name in zip([visited, math], ['visited_wiki.txt', 'math_wiki.txt']):
-        f = open(os.path.join(sys.path[0], 'dataset', 'data', name), 'a', encoding='utf-8')
+        f = open(os.path.join(sys.path[0], 'data', name), 'a', encoding='utf-8')
         for element in l:
             f.write(element)
             f.write('\n')
diff --git a/pix2tex/eval.py b/pix2tex/eval.py
index b0735ca..b81c0d6 100644
--- a/pix2tex/eval.py
+++ b/pix2tex/eval.py
@@ -44,13 +44,12 @@ def evaluate(model: Model, dataset: Im2LatexDataset, args: Munch, num_batches: i
     assert len(dataset) > 0
     device = args.device
     log = {}
-    bleus, edit_dists = [], []
-    bleu_score, edit_distance = 0, 1
+    bleus, edit_dists, token_acc = [], [], []
+    bleu_score, edit_distance, token_accuracy = 0, 1, 0
     pbar = tqdm(enumerate(iter(dataset)), total=len(dataset))
     for i, (seq, im) in pbar:
         if seq is None or im is None:
             continue
-        tgt_seq, tgt_mask = seq['input_ids'].to(device), seq['attention_mask'].bool().to(device)
         encoded = model.encoder(im.to(device))
         #loss = decoder(tgt_seq, mask=tgt_mask, context=encoded)
         dec = model.decoder.generate(torch.LongTensor([args.bos_token]*len(encoded))[:, None].to(device), args.max_seq_len,
@@ -62,7 +61,17 @@ def evaluate(model: Model, dataset: Im2LatexDataset, args: Munch, num_batches: i
             ts = post_process(truthi)
             if len(ts) > 0:
                 edit_dists.append(distance(post_process(predi), ts)/len(ts))
-        pbar.set_description('BLEU: %.3f, ED: %.2e' % (np.mean(bleus), np.mean(edit_dists)))
+        dec = dec.cpu()
+        tgt_seq = seq['input_ids'][:, 1:]
+        shape_diff = dec.shape[1]-tgt_seq.shape[1]
+        if shape_diff < 0:
+            dec = torch.nn.functional.pad(dec, (0, -shape_diff), "constant", args.pad_token)
+        elif shape_diff > 0:
+            tgt_seq = torch.nn.functional.pad(tgt_seq, (0, shape_diff), "constant", args.pad_token)
+        mask = torch.logical_or(tgt_seq != args.pad_token, dec != args.pad_token)
+        tok_acc = (dec == tgt_seq)[mask].float().mean().item()
+        token_acc.append(tok_acc)
+        pbar.set_description('BLEU: %.3f, ED: %.2e, ACC: %.3f' % (np.mean(bleus), np.mean(edit_dists), np.mean(token_acc)))
         if num_batches is not None and i >= num_batches:
             break
     if len(bleus) > 0:
@@ -71,6 +80,9 @@ def evaluate(model: Model, dataset: Im2LatexDataset, args: Munch, num_batches: i
     if len(edit_dists) > 0:
         edit_distance = np.mean(edit_dists)
         log[name+'/edit_distance'] = edit_distance
+    if len(token_acc) > 0:
+        token_accuracy = np.mean(token_acc)
+        log[name+'/token_acc'] = token_accuracy
     if args.wandb:
         # samples
         pred = token2str(dec, dataset.tokenizer)
@@ -83,7 +95,7 @@ def evaluate(model: Model, dataset: Im2LatexDataset, args: Munch, num_batches: i
     else:
         print('\n%s\n%s' % (truth, pred))
         print('BLEU: %.2f' % bleu_score)
-    return bleu_score, edit_distance
+    return bleu_score, edit_distance, token_accuracy
 
 
 if __name__ == '__main__':
diff --git a/setup.py b/setup.py
index 41a5d79..ee8a93e 100644
--- a/setup.py
+++ b/setup.py
@@ -22,7 +22,7 @@
 
 setuptools.setup(
     name='pix2tex',
-    version='0.0.15',
+    version='0.0.20',
     description='pix2tex: Using a ViT to convert images of equations into LaTeX code.',
     long_description=long_description,
     long_description_content_type='text/markdown',
@@ -58,7 +58,6 @@
         'PyYAML>=5.4.1',
         'pandas>=1.0.0',
         'timm',
-        'chardet>=3.0.4',
         'python-Levenshtein>=0.12.2',
         'torchtext>=0.6.0',
         'albumentations>=0.5.2',