Skip to content

Commit

Permalink
Single warning for holes. (#1303)
Browse files Browse the repository at this point in the history
* Single warning for holes.

* Dummy.
  • Loading branch information
Narsil authored Jul 25, 2023
1 parent d6326b2 commit bb38f39
Showing 1 changed file with 10 additions and 4 deletions.
14 changes: 10 additions & 4 deletions tokenizers/src/models/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,20 +34,26 @@ impl<'a> Serialize for OrderedVocabIter<'a> {
S: Serializer,
{
// There could be holes so max + 1 is more correct than vocab_r.len()
if let Some(max) = self.vocab_r.iter().map(|(key, _)| key).max() {
let mut holes = vec![];
let result = if let Some(max) = self.vocab_r.iter().map(|(key, _)| key).max() {
let iter = (0..*max + 1).filter_map(|i| {
if let Some(token) = self.vocab_r.get(&i){
Some((token, i))
}else{
warn!("The OrderedVocab you are attempting to save contains a hole for index {}, your vocabulary could be corrupted !", i);
println!("The OrderedVocab you are attempting to save contains a hole for index {}, your vocabulary could be corrupted !", i);
holes.push(i);
None
}
});
});
serializer.collect_map(iter)
} else {
serializer.collect_map(std::iter::empty::<(&str, u32)>())
};

if !holes.is_empty(){
warn!("The OrderedVocab you are attempting to save contains holes for indices {:?}, your vocabulary could be corrupted !", holes);
println!("The OrderedVocab you are attempting to save contains holes for indices {:?}, your vocabulary could be corrupted !", holes);
}
result
}
}

Expand Down

0 comments on commit bb38f39

Please sign in to comment.