Skip to content

Commit

Permalink
compatibility with gnames 0.8.0 (close #89)
Browse files Browse the repository at this point in the history
  • Loading branch information
dimus committed Feb 24, 2022
1 parent 621feea commit c468966
Show file tree
Hide file tree
Showing 21 changed files with 164 additions and 222 deletions.
5 changes: 5 additions & 0 deletions .markdownlint.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"default": true,
"MD003": { "style": "atx" },
"no-hard-tabs": false
}
27 changes: 16 additions & 11 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# Changelog

## [v0.8.0] - 2022-02-24 Thu

- Add[#89]: compatibility with gnames v0.8.0

## [v0.7.3] - 2022-02-14 Mon

- Add: make gnverifier compatible with gnames v0.7.1
Expand Down Expand Up @@ -139,17 +143,18 @@

This document follows [changelog guidelines]

[v0.7.3]: https://github.com/gnames/gnverifier/compare/v0.7.2...v1.7.3
[v0.7.2]: https://github.com/gnames/gnverifier/compare/v0.7.1...v1.7.2
[v0.7.1]: https://github.com/gnames/gnverifier/compare/v0.7.0...v1.7.1
[v0.7.0]: https://github.com/gnames/gnverifier/compare/v0.6.6...v1.7.0
[v0.6.6]: https://github.com/gnames/gnverifier/compare/v0.6.5...v1.6.6
[v0.6.5]: https://github.com/gnames/gnverifier/compare/v0.6.4...v1.6.5
[v0.6.4]: https://github.com/gnames/gnverifier/compare/v0.6.3...v1.6.4
[v0.6.3]: https://github.com/gnames/gnverifier/compare/v0.6.2...v1.6.3
[v0.6.2]: https://github.com/gnames/gnverifier/compare/v0.6.1...v1.6.2
[v0.6.1]: https://github.com/gnames/gnverifier/compare/v0.6.0...v1.6.1
[v0.6.0]: https://github.com/gnames/gnverifier/compare/v0.5.2...v1.6.0
[v0.8.0]: https://github.com/gnames/gnverifier/compare/v0.7.3...v0.8.0
[v0.7.3]: https://github.com/gnames/gnverifier/compare/v0.7.2...v0.7.3
[v0.7.2]: https://github.com/gnames/gnverifier/compare/v0.7.1...v0.7.2
[v0.7.1]: https://github.com/gnames/gnverifier/compare/v0.7.0...v0.7.1
[v0.7.0]: https://github.com/gnames/gnverifier/compare/v0.6.6...v0.7.0
[v0.6.6]: https://github.com/gnames/gnverifier/compare/v0.6.5...v0.6.6
[v0.6.5]: https://github.com/gnames/gnverifier/compare/v0.6.4...v0.6.5
[v0.6.4]: https://github.com/gnames/gnverifier/compare/v0.6.3...v0.6.4
[v0.6.3]: https://github.com/gnames/gnverifier/compare/v0.6.2...v0.6.3
[v0.6.2]: https://github.com/gnames/gnverifier/compare/v0.6.1...v0.6.2
[v0.6.1]: https://github.com/gnames/gnverifier/compare/v0.6.0...v0.6.1
[v0.6.0]: https://github.com/gnames/gnverifier/compare/v0.5.2...v0.6.0
[v0.5.2]: https://github.com/gnames/gnverifier/compare/v0.5.1...v0.5.2
[v0.5.1]: https://github.com/gnames/gnverifier/compare/v0.5.0...v0.5.1
[v0.5.0]: https://github.com/gnames/gnverifier/compare/v0.4.1...v0.5.0
Expand Down
16 changes: 0 additions & 16 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ search feature.
* [capitalize](#capitalize)
* [format](#format)
* [jobs](#jobs)
* [only_preferred](#only_preferred)
* [quiet](#quiet)
* [sources](#sources)
* [web-logs](#web-logs)
Expand Down Expand Up @@ -299,20 +298,6 @@ gnverifier -j 1 file.txt

This option is ignored by advanced search.

#### only_preferred

Sometimes a users wants to map a list of names to a DataSource. They
are not interested if name matched anywhere else. In such case you can use
the ``only_preferred`` flag.

```bash
gnverifier -o -s '12' file.txt
# or
gnverifier --only_preferred --sources='1,12' file.tsv
```

In case of advanced search use `all:t` together with this flag.

#### quiet

Removes log messages from the output. Note that results of verification go
Expand Down Expand Up @@ -398,7 +383,6 @@ possible to use environment variables for configuration.
| Env. Var. | Configuration |
| :---------------------- | :----------------- |
| GNV_FORMAT | Format |
| GNV_PREFERRED_ONLY | PreferredOnly |
| GNV_DATA_SOURCES | DataSources |
| GNV_WITH_ALL_MATCHES | WithAllMatches |
| GNV_WITH_CAPITALIZATION | WithCapitalization |
Expand Down
12 changes: 0 additions & 12 deletions config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,6 @@ type Config struct {
// Example: `api\/v(0|1)`
NsqdRegexFilter *regexp.Regexp

// PreferredOnly hides BestResult if the user wants to see only
// preferred results.
PreferredOnly bool

// VerifierURL URL for gnames verification service. It only needs to
// be changed if user sets local version of gnames.
VerifierURL string
Expand Down Expand Up @@ -126,14 +122,6 @@ func OptNsqdTCPAddress(s string) Option {
}
}

// OptPreferredOnly sets PreferredOnly field. If it is true output only
// contains results from preferred data-sources.
func OptPreferredOnly(b bool) Option {
return func(cnf *Config) {
cnf.PreferredOnly = b
}
}

// OptVerifierURL sets URL of the verification resource.
func OptVerifierURL(s string) Option {
return func(cnf *Config) {
Expand Down
19 changes: 8 additions & 11 deletions config/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,23 +16,21 @@ func TestConfigDefault(t *testing.T) {
Format: gnfmt.CSV,
VerifierURL: "https://verifier.globalnames.org/api/v0/",
}
assert.Equal(t, cnf.Format, deflt.Format)
assert.Equal(t, cnf.VerifierURL, deflt.VerifierURL)
assert.Equal(t, deflt.Format, cnf.Format)
assert.Equal(t, deflt.VerifierURL, cnf.VerifierURL)
}

func TestConfigOpts(t *testing.T) {
opts := opts()
cnf := config.New(opts...)
updt := config.Config{
Format: gnfmt.PrettyJSON,
PreferredOnly: true,
DataSources: []int{1, 2, 3},
VerifierURL: url,
Format: gnfmt.PrettyJSON,
DataSources: []int{1, 2, 3},
VerifierURL: url,
}
assert.Equal(t, cnf.Format, updt.Format)
assert.Equal(t, cnf.PreferredOnly, updt.PreferredOnly)
assert.Equal(t, cnf.DataSources, updt.DataSources)
assert.Equal(t, cnf.VerifierURL, updt.VerifierURL)
assert.Equal(t, updt.Format, cnf.Format)
assert.Equal(t, updt.DataSources, cnf.DataSources)
assert.Equal(t, updt.VerifierURL, cnf.VerifierURL)
}

type formatTest struct {
Expand All @@ -43,7 +41,6 @@ type formatTest struct {
func opts() []config.Option {
return []config.Option{
config.OptFormat(gnfmt.PrettyJSON),
config.OptPreferredOnly(true),
config.OptDataSources([]int{1, 2, 3}),
config.OptVerifierURL(url),
}
Expand Down
42 changes: 22 additions & 20 deletions ent/output/output.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package output

import (
"fmt"
"strconv"
"strings"

Expand All @@ -12,6 +13,7 @@ type csvField int

const (
kind csvField = iota
sortScore
matchType
editDistance
input
Expand All @@ -26,27 +28,27 @@ const (
error
)

const prefMatch = "PreferredMatch"
const sortedMatch = "SortedMatch"

// NameOutput takes result of verification for one string and converts it into
// required format (CSV or JSON).
func NameOutput(ver vlib.Name, f gnfmt.Format, prefOnly bool) string {
func NameOutput(ver vlib.Name, f gnfmt.Format) string {
switch f {
case gnfmt.CSV:
return csvOutput(ver, prefOnly, ',')
return csvOutput(ver, ',')
case gnfmt.TSV:
return csvOutput(ver, prefOnly, '\t')
return csvOutput(ver, '\t')
case gnfmt.CompactJSON:
return jsonOutput(ver, prefOnly, false)
return jsonOutput(ver, false)
case gnfmt.PrettyJSON:
return jsonOutput(ver, prefOnly, true)
return jsonOutput(ver, true)
}
return "N/A"
}

// CSVHeader returns the header string for CSV output format.
func CSVHeader(f gnfmt.Format) string {
header := []string{"Kind", "MatchType", "EditDistance", "ScientificName",
header := []string{"Kind", "SortScore", "MatchType", "EditDistance", "ScientificName",
"MatchedName", "MatchedCanonical", "TaxonId", "CurrentName", "Synonym",
"DataSourceId", "DataSourceTitle", "ClassificationPath", "Error"}
switch f {
Expand All @@ -59,14 +61,13 @@ func CSVHeader(f gnfmt.Format) string {
}
}

func csvOutput(ver vlib.Name, prefOnly bool, sep rune) string {
func csvOutput(ver vlib.Name, sep rune) string {
var res []string
if !prefOnly {
if ver.BestResult != nil {
best := csvRow(ver, -1, sep)
res = append(res, best)
}
if prefOnly && len(ver.Results) == 0 {
res = append(res, csvNoPrefRow(ver, sep))
} else if len(ver.Results) == 0 {
res = append(res, csvEmptyRow(ver, sep))
}
for i := range ver.Results {
pref := csvRow(ver, i, sep)
Expand All @@ -76,9 +77,9 @@ func csvOutput(ver vlib.Name, prefOnly bool, sep rune) string {
return strings.Join(res, "\n")
}

func csvNoPrefRow(ver vlib.Name, sep rune) string {
func csvEmptyRow(ver vlib.Name, sep rune) string {
s := []string{
prefMatch, vlib.NoMatch.String(), "", ver.Name,
sortedMatch, "0.0", vlib.NoMatch.String(), "", ver.Name,
"", "", "", "", "", "", "", "", ver.Error,
}
return gnfmt.ToCSV(s, sep)
Expand All @@ -89,17 +90,21 @@ func csvRow(ver vlib.Name, prefIndex int, sep rune) string {
res := ver.BestResult

if prefIndex > -1 {
kind = prefMatch
if prefIndex > 0 {
kind = sortedMatch
}
res = ver.Results[prefIndex]
}

s := []string{
kind, ver.MatchType.String(), "", ver.Name,
kind, "0.0", vlib.NoMatch.String(), "", ver.Name,
"", "", "", "", "", "", "", "", ver.Error,
}

if res != nil {
s[editDistance] = strconv.Itoa(res.EditDistance)
s[sortScore] = fmt.Sprintf("%0.5f", res.SortScore)
s[matchType] = res.MatchType.String()
s[matchedName] = res.MatchedName
s[matchedCanonical] = res.MatchedCanonicalFull
s[taxonID] = res.RecordID
Expand All @@ -113,11 +118,8 @@ func csvRow(ver vlib.Name, prefIndex int, sep rune) string {
return gnfmt.ToCSV(s, sep)
}

func jsonOutput(ver vlib.Name, prefOnly bool, pretty bool) string {
func jsonOutput(ver vlib.Name, pretty bool) string {
enc := gnfmt.GNjson{Pretty: pretty}
if prefOnly {
ver.BestResult = nil
}
res, _ := enc.Encode(ver)
return string(res)
}
41 changes: 17 additions & 24 deletions ent/output/output_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,53 +19,46 @@ func TestOutput(t *testing.T) {
msg string
input vlib.Name
format gnfmt.Format
prefOnly bool
test func(*testing.T, string)
linesNum int
}{
{
msg: "csv_prefOnly_false",
input: verifs[0],
format: gnfmt.CSV,
prefOnly: false,
msg: "csv",
input: verifs[0],
format: gnfmt.CSV,
test: func(t *testing.T, res string) {
assert.NotContains(t, res, "inputID", "csv,false 1")
assert.Contains(t, res, "BestMatch", "csv, false 2")
assert.Contains(t, res, "PreferredMatch", "csv, false 3")
assert.Contains(t, res, "SortedMatch", "csv, false 3")
assert.True(t, strings.HasPrefix(res, "BestMatch"), "csv, false 4")
},
linesNum: 3,
},
{
msg: "not_name_csv_prefOnly_true",
input: verifs[2],
format: gnfmt.CSV,
prefOnly: true,
msg: "not_name_csv",
input: verifs[2],
format: gnfmt.CSV,
test: func(t *testing.T, res string) {
assert.NotContains(t, res, "inputID", "noname, csv, true 1")
assert.NotContains(t, res, "BestMatch", "noname, csv, true 2")
assert.Contains(t, res, "PreferredMatch", "noname, csv, true 3")
assert.Contains(t, res, "SortedMatch", "noname, csv, true 3")
assert.Contains(t, res, "NoMatch", "noname, csv, true 4")
},
linesNum: 1,
},
{
msg: "pretty",
input: verifs[0],
format: gnfmt.PrettyJSON,
prefOnly: false,
msg: "pretty",
input: verifs[0],
format: gnfmt.PrettyJSON,
test: func(t *testing.T, res string) {
assert.Contains(t, res, "id", "pretty 1")
assert.Contains(t, res, "bestResult", "pretty 2")
assert.Contains(t, res, "results", "pretty 3")
},
linesNum: 104,
linesNum: 107,
},
{
msg: "compact",
input: verifs[0],
format: gnfmt.CompactJSON,
prefOnly: false,
msg: "compact",
input: verifs[0],
format: gnfmt.CompactJSON,
test: func(t *testing.T, res string) {
assert.Contains(t, res, "id", "compact 1")
assert.Contains(t, res, "bestResult", "compact 2")
Expand All @@ -77,9 +70,9 @@ func TestOutput(t *testing.T) {

for i := range tests {
t.Run(tests[i].msg, func(t *testing.T) {
res := output.NameOutput(tests[i].input, tests[i].format, tests[i].prefOnly)
res := output.NameOutput(tests[i].input, tests[i].format)
lines := strings.Split(res, "\n")
assert.Equal(t, len(lines), tests[i].linesNum)
assert.Equal(t, tests[i].linesNum, len(lines))
tests[i].test(t, res)
})
}
Expand Down
5 changes: 0 additions & 5 deletions gnverifier/cmd/gnverifier.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,6 @@
#
# Format: csv

# PreferredOnly if true, do not show BestResult, only Preferred Results.
# Its valid values are 'true' and 'false'.
#
# PreferredOnly: false

# DataSources is a list of data-source IDs that should always return
# matched records if they are found.
# You can find list of all data-sources at
Expand Down
Loading

0 comments on commit c468966

Please sign in to comment.