Skip to content

Commit

Permalink
Fix Issue go-gota#169 - NA is still converted to NaN even in a string…
Browse files Browse the repository at this point in the history
… column

Copied from this PR:
go-gota#175
  • Loading branch information
shivamthapar committed Nov 20, 2024
1 parent f705409 commit a7d1a6c
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 2 deletions.
9 changes: 7 additions & 2 deletions dataframe/dataframe.go
Original file line number Diff line number Diff line change
Expand Up @@ -1219,17 +1219,22 @@ func LoadRecords(records [][]string, options ...LoadOption) DataFrame {
types := make([]series.Type, len(headers))
rawcols := make([][]string, len(headers))
for i, colname := range headers {
t, useCustomType := cfg.types[colname]
rawcol := make([]string, len(records))
for j := 0; j < len(records); j++ {
rawcol[j] = records[j][i]
if useCustomType && t == series.String {
// skip the convertion when using custom string type
continue
}
if findInStringSlice(rawcol[j], cfg.nanValues) != -1 {
rawcol[j] = "NaN"
}
}
rawcols[i] = rawcol

t, ok := cfg.types[colname]
if !ok {
// try to auto detect the data type
if !useCustomType {
t = cfg.defaultType
if cfg.detectTypes {
if l, err := findType(rawcol); err == nil {
Expand Down
43 changes: 43 additions & 0 deletions dataframe/dataframe_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1420,6 +1420,49 @@ Spain,2012-02-01,66,555.42,00241
}
}

// test case for issue #169
func TestReadCSV_Issue169(t *testing.T) {
// Load the data from a CSV string and try to infer the type of the
// columns, but NA won't be converted to NaN when data type is specified
// as string.
const ExampleData = `
Country,Region,Date,Age,Amount,Id
"United States",NA,2012-02-01,50,112.1,01234
"United States",US,2012-02-01,32,321.31,54320
"United Kingdom",GB,2012-02-01,17,18.2,12345
"United States",NA,2012-02-01,32,321.31,54320
"United States","NA",2012-02-01,17,321.31,54320
"United Kingdom",GB,2012-02-01,NA,18.2,12345
"United States",NA,2012-02-01,32,321.31,54320
Spain,EU,2012-02-01,66,555.42,00241
`

df := ReadCSV(
strings.NewReader(ExampleData),
WithTypes(map[string]series.Type{
"Region": series.String,
"Age": series.String,
}),
)

if df.Err != nil {
t.Errorf("Expected success, got error: %v", df.Err)
}

for _, v := range df.Col("Region").Records() {
if v == "NaN" {
t.Errorf("Expected not to convert NA to NaN, but it does")
}
}

for _, v := range df.Col("Age").Records() {
if v == "NaN" {
t.Errorf("Expected not to convert NA to NaN, but it does")
}
}

}

func TestReadJSON(t *testing.T) {
table := []struct {
jsonStr string
Expand Down

0 comments on commit a7d1a6c

Please sign in to comment.