Skip to content

Commit

Permalink
adjust file paths and license info
Browse files Browse the repository at this point in the history
  • Loading branch information
rsennrich committed Apr 26, 2016
1 parent dd85e68 commit e4545d0
Show file tree
Hide file tree
Showing 10 changed files with 18 additions and 13 deletions.
5 changes: 3 additions & 2 deletions preprocess/normalise-romanian.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Author: Barry Haddow
# Distributed under MIT license

#
# Normalise Romanian s-comma and t-comma
#
# author: Barry Haddow

import io
import sys
Expand Down
5 changes: 3 additions & 2 deletions preprocess/remove-diacritics.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Author: Barry Haddow
# Distributed under MIT license

#
# Remove Romanian diacritics. Assumes s-comma and t-comma are normalised
#
# author: Barry Haddow

import io
import sys
Expand Down
4 changes: 2 additions & 2 deletions r2l/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,11 @@ use the same vocabulary, and apply reverse.py *after* truecasing/BPE, to simplif

3. at test time, produce an n-best list with the l2r model(s):

time THEANO_FLAGS=mode=FAST_RUN,floatX=float32,device=gpu,on_unused_input=warn python /path/to/nematus/nmt/translate.py -m model.npz -i test.bpe.de -o test.output.50best -k 50 -n -p 1 --n-best
time THEANO_FLAGS=mode=FAST_RUN,floatX=float32,device=gpu,on_unused_input=warn python /path/to/nematus/nematus/translate.py -m model.npz -i test.bpe.de -o test.output.50best -k 50 -n -p 1 --n-best

4. reverse the outputs in the n-best list, and re-score with the r2l model(s).

python reverse_nbest.py < test.output.50best > test.output.50best.reversed

time THEANO_FLAGS=mode=FAST_RUN,floatX=float32,device=gpu,on_unused_input=warn python /path/to/nematus/nmt/rescore.py -m /path/to/r2l_model/model.npz -s test.bpe.de -i test.output.50best.reversed -o test.output.50best.rescored -b 80 -n
time THEANO_FLAGS=mode=FAST_RUN,floatX=float32,device=gpu,on_unused_input=warn python /path/to/nematus/nematus/rescore.py -m /path/to/r2l_model/model.npz -s test.bpe.de -i test.output.50best.reversed -o test.output.50best.rescored -b 80 -n
python rerank.py < test.output.50best.rescored | python reverse.py > test.output.reranked
1 change: 1 addition & 0 deletions r2l/rerank.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
# Author: Rico Sennrich
# Distributed under MIT license

import sys
from collections import defaultdict
Expand Down
2 changes: 2 additions & 0 deletions r2l/reverse.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
# Author: Rico Sennrich
# Distributed under MIT license

import sys

Expand Down
2 changes: 2 additions & 0 deletions r2l/reverse_nbest.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
# Author: Rico Sennrich
# Distributed under MIT license

import sys

Expand Down
4 changes: 1 addition & 3 deletions sample/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,7 @@
import os
import sys

sys.path.append('/path/to/nematus/nmt')

from nmt import train
sys.path.append('/path/to/nematus/nematus')

VOCAB_SIZE = 90000
SRC = "ro"
Expand Down
2 changes: 1 addition & 1 deletion sample/preprocess.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#/bin/sh
#!/bin/sh

# this sample script preprocesses a sample corpus, including tokenization,
# truecasing, and subword segmentation.
Expand Down
4 changes: 2 additions & 2 deletions sample/translate.sh
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
#/bin/sh
#!/bin/sh

# theano device
device=gpu

# path to nematus ( https://www.github.com/rsennrich/nematus )
nematus=/path/to/nematus

THEANO_FLAGS=mode=FAST_RUN,floatX=float32,device=$device,on_unused_input=warn python $nematus/nmt/translate.py \
THEANO_FLAGS=mode=FAST_RUN,floatX=float32,device=$device,on_unused_input=warn python $nematus/nematus/translate.py \
-m model/model.npz \
-i data/newsdev2016.bpe.ro \
-o data/newsdev2016.output \
Expand Down
2 changes: 1 addition & 1 deletion sample/validate.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ dev=data/newsdev2016.bpe.ro
ref=data/newsdev2016.tok.en

# decode
THEANO_FLAGS=mode=FAST_RUN,floatX=float32,device=$device,on_unused_input=warn python $nematus/nmt/translate.py \
THEANO_FLAGS=mode=FAST_RUN,floatX=float32,device=$device,on_unused_input=warn python $nematus/nematus/translate.py \
-m $prefix.dev.npz \
-i $dev \
-o $dev.output.dev \
Expand Down

0 comments on commit e4545d0

Please sign in to comment.