-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsinoname.go
177 lines (151 loc) · 3.72 KB
/
sinoname.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
package sinoname
import (
"context"
"errors"
"sync"
)
// Generator provides extra functionality on top of the layers.
type Generator struct {
cfg *Config
layers Layers
}
var splitOnDefault = []string{
".",
" ",
"-",
" ",
"_",
" ",
",",
" ",
}
// New creates a new generator with the provided config.
func New(conf *Config) *Generator {
if conf == nil {
return nil
}
if conf.Tokenize == nil {
conf.Tokenize = tokenizeDefault
}
if conf.StripNumbers == nil {
conf.StripNumbers = stripNumbersASCII
}
// if adjectives provided, create a pool to share shuffle buffers around all circumfix,
// suffix or prefix transformer go routines.
if conf.Adjectives != nil {
conf.shufflePool = sync.Pool{
New: func() any {
chunkSize := len(conf.Adjectives) / chunks
slc := conf.RandSrc.Perm(chunkSize)
return slc
},
}
}
g := &Generator{
cfg: conf,
}
return g
}
// WithUniformTransformers adds the provided transformers in a uniform layer.
func (g *Generator) WithUniformTransformers(tFact ...TransformerFactory) *Generator {
uLayer := &UniformTransformerLayer{
cfg: g.cfg,
transformers: make([]Transformer, len(tFact)),
transformerFactories: make([]TransformerFactory, 0),
}
for i, f := range tFact {
t, statefull := f(g.cfg)
if statefull {
uLayer.transformerFactories = append(uLayer.transformerFactories, f)
}
uLayer.transformers[i] = t
}
g.layers = append(g.layers, uLayer)
return g
}
// WithTransformers adds the provided transformers in a layer (grouped together).
//
// This is the layer configuration which suits most use-cases, you should generally look
// no further.
func (g *Generator) WithTransformers(tFact ...TransformerFactory) *Generator {
tLayer := &TransformerLayer{
cfg: g.cfg,
transformers: make([]Transformer, len(tFact)),
transformerFactories: make([]TransformerFactory, 0),
}
for i, f := range tFact {
t, statefull := f(g.cfg)
if statefull {
tLayer.transformerFactories = append(tLayer.transformerFactories, f)
}
tLayer.transformers[i] = t
}
g.layers = append(g.layers, tLayer)
return g
}
// WithLayers adds the provided layers to the generator in order.
func (g *Generator) WithLayers(lFact ...LayerFactory) *Generator {
for _, f := range lFact {
l := f(g.cfg)
g.layers = append(g.layers, l)
}
return g
}
// Generate passes the in field through the pipeline of transformers. The process can be
// aborted by cancelling the context passed.
func (g *Generator) Generate(ctx context.Context, in string) ([]string, error) {
if len(in) > g.cfg.MaxBytes {
return nil, errors.New("sinoname: value is too long")
}
msgPacket := MessagePacket{
Message: in,
Changes: 0,
Skip: 0,
}
inC, clnUp, err := g.layers.Run(ctx, msgPacket)
if err != nil {
clnUp()
return nil, err
}
var read int
var vals []string
readVals := make(map[string]bool)
readVals[in] = g.cfg.PreventDefault
L:
for {
select {
// if ctx cancelled no need to call clean up.
case <-ctx.Done():
return nil, ctx.Err()
case val, ok := <-inC:
if readVals[val.Message] {
continue
}
if g.cfg.PreventDuplicates {
readVals[val.Message] = true
}
// increment read here so we dont have to wait for next itteration
// to check if we are at the last value.
//
// the next itteration can be slow.
read++
if read == g.cfg.MaxVals || !ok {
// last value.
if ok {
vals = append(vals, val.Message)
}
break L
}
vals = append(vals, val.Message)
}
}
if err := clnUp(); err != nil {
// this exception occurs when the maxVals value is reached and
// there still are live layers.
if err == context.Canceled {
return vals, nil
}
return nil, err
}
return vals, nil
}