Skip to content

Commit

Permalink
fix: Fix CAPS sentense splitting
Browse files Browse the repository at this point in the history
  • Loading branch information
3y3 committed Mar 11, 2024
1 parent c21f1ed commit 0346489
Show file tree
Hide file tree
Showing 5 changed files with 59 additions and 64 deletions.
50 changes: 23 additions & 27 deletions __tests__/rules/abbreviations.ts
Original file line number Diff line number Diff line change
Expand Up @@ -123,32 +123,28 @@ describe('pairAbbreviation', () => {
});

describe('leftPairsTailAbbreviation', () => {
it('evaluates to true if left is tail of the pair abbreviation', () => {
const go = compose(reduce(and, true), map(leftPairsTailAbbreviation));
const generateInput = compose(
unnest,
juxt([keysUpperWithRightUpper, keysSpacesWithRightLower]),
keys,
);
const input = [
...generateInput(HEAD_PAIR),
...generateInput(TAIL_PAIR),
...generateInput(OTHER_PAIR),
];
const expected = true;
const actual = go(input);
expect(actual).toBe(expected);
});
const test = (expected: boolean) => (pair: string[]) => {
it('handles "' + pair.join('') + '"', () => {
expect(leftPairsTailAbbreviation(pair)).toBe(expected);
});
}

it('evaluates to false otherwise', () => {
const go = compose(reduce(or, false), map(leftPairsTailAbbreviation));
const input = [
['фоо.бар. ', 'амбар'],
['not.an.abbr.', ' not happening'],
['not. an. abbr.', ' not happening'],
];
const expected = false;
const actual = go(input);
expect(actual).toBe(expected);
});
const generateInput = compose(
unnest,
juxt([keysUpperWithRightUpper, keysSpacesWithRightLower]),
keys,
);

[
...generateInput(HEAD_PAIR),
...generateInput(TAIL_PAIR),
...generateInput(OTHER_PAIR),
].forEach(test(true));

[
['фоо.бар. ', 'амбар'],
['not.an.abbr.', ' not happening'],
['not. an. abbr.', ' not happening'],
['и т.п.', ' В очереди'],
].forEach(test(false));
});
6 changes: 5 additions & 1 deletion src/rules/abbreviations.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ import {
anyPass,
toLower,
Pred,
length,
lt,
} from 'ramda';

import {
Expand Down Expand Up @@ -98,8 +100,10 @@ export const leftAbbreviation = compose(
fst,
);

const isCaps = allPass([isUpper, compose(lt(1), length)]);

// right join condition is to be uppercase or lowercase word
const rightLowercaseOrCaps = compose(anyPass([startsWithLower, isUpper]), fstWord, snd);
const rightLowercaseOrCaps = compose(anyPass([startsWithLower, isCaps]), fstWord, snd);

// portion of the source <s> before target <t>
const before = (s: string) => (t: string) => s.slice(0, Math.max(s.indexOf(t), 0));
Expand Down
35 changes: 31 additions & 4 deletions src/rules/base.ts
Original file line number Diff line number Diff line change
@@ -1,12 +1,27 @@
import { call, zipWith, compose, map, all, not, always, Pred, identity, allPass } from 'ramda';
import {
call,
zipWith,
compose,
map,
all,
not,
always,
Pred,
identity,
allPass,
equals,
length,
juxt,
toUpper
} from 'ramda';

import {
startsWithLower,
startsWithUpper,
startsWithNewline,
startsWithHardbreak,
endsWithHardbreak,
lengthNonZero,
lengthNonZero, allEqual, hasAlpha,
} from '../utilities';
import {
fstToken,
Expand All @@ -17,9 +32,19 @@ import {
quotationClosePrefix,
bracketsClosePrefix,
delimiterPrefix,
spaces,
spaces, dotSuffix, lstWord,
} from '../parsers';

// determine if left is part of the initials
// conditions:
// * left delimiter is dot
// * left last word is single letter
// * left last word is in upper case
// * left has alpha characters
const isLeftDotDelimiter = compose(lengthNonZero, dotSuffix);
const isLeftSingleLetter = compose(equals(1), length, lstWord);
const isLeftUpper = compose(allEqual, juxt([toUpper, identity]), lstWord);
const leftHasAlpha = compose(hasAlpha, lstWord);
const isSpaceSuffix = compose(lengthNonZero, spaceSuffix);
const isSpacePrefix = compose(lengthNonZero, spacePrefix);

Expand Down Expand Up @@ -72,4 +97,6 @@ export const leftEndsWithHardbreak = rule('leftEndsWithHardbreak', [endsWithHard

export const rightStartsWithHardbreak = rule('rightStartsWithHardbreak', [_, startsWithHardbreak]);

export const rightStartsNewlineUppercased = rule('rightStartsNewlineUppercased', [_, allPass([startsWithNewline, startsWithUpper])]);
export const rightStartsNewlineUppercased = rule('rightStartsNewlineUppercased', [_, allPass([startsWithNewline, startsWithUpper])]);

export const leftInitials = rule('leftInitials', [allPass([isLeftDotDelimiter, isLeftSingleLetter, isLeftUpper, leftHasAlpha]), _]);
1 change: 0 additions & 1 deletion src/rules/index.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
export * from './base';
export * from './initials';
export * from './abbreviations';
31 changes: 0 additions & 31 deletions src/rules/initials.ts

This file was deleted.

0 comments on commit 0346489

Please sign in to comment.