diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..8aaea2b
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,7 @@
+# IntelliJ
+.idea/
+*.iml
+
+# vscode
+.vscode/
+*.code-workspace
diff --git a/.travis.yml b/.travis.yml
index c1be208..06428d6 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,3 +1,13 @@
language: go
go: master
-script: go test -v ./dataframe ./series
+before_script:
+ - echo 'Checking code quality issues.'
+ - go vet ./...
+ - echo 'Checking that gofmt was used.'
+ - diff -u <(echo -n) <(gofmt -d .)
+ - echo 'Checking tidiness of go mod.'
+ - go mod tidy
+ - test -z "$(git status --porcelain)"
+script:
+ - echo 'Running tests.'
+ - go test -v ./...
diff --git a/CHANGELOG.md b/CHANGELOG.md
index db736fe..aad6416 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,26 @@
All notable changes to this project will be documented in this file.
This project adheres to [Semantic Versioning](http://semver.org/).
+## [0.11.0] - 2021-04-25
+### Added
+- Rolling window Mean and StdDev
+- GroupBy and Aggregate
+- Numeric column index
+- Read HTML tables
+- extra checks for TravisCI
+- Combining filters with AND
+- User-defined filters
+- Concatination of Dataframes
+
+### Changed
+- Make fixColnames faster
+- Use Go 1.16
+- Update dependencies
+
+### Fixed
+- Linter issues
+- Failing tests
+
## [0.10.1] - 2019-11-08
### Fixed
- LoadRecords printing type debug information
diff --git a/README.md b/README.md
index 5b8bf55..49e5742 100644
--- a/README.md
+++ b/README.md
@@ -182,14 +182,80 @@ column "B" is greater than 4:
fil := df.Filter(
dataframe.F{"A", series.Eq, "a"},
dataframe.F{"B", series.Greater, 4},
+)
+
+filAlt := df.FilterAggregation(
+ dataframe.Or,
+ dataframe.F{"A", series.Eq, "a"},
+ dataframe.F{"B", series.Greater, 4},
)
+```
+
+Filters inside Filter are combined as OR operations, alternatively we can use `df.FilterAggragation` with `dataframe.Or`.
+
+If we want to combine filters with AND operations, we can use `df.FilterAggregation` with `dataframe.And`.
+
+```go
+fil := df.FilterAggregation(
+ dataframe.And,
+ dataframe.F{"A", series.Eq, "a"},
+ dataframe.F{"D", series.Eq, true},
+)
+```
+
+To combine AND and OR operations, we can use chaining of filters.
+
+```go
+// combine filters with OR
+fil := df.Filter(
+ dataframe.F{"A", series.Eq, "a"},
+ dataframe.F{"B", series.Greater, 4},
+)
+// apply AND for fil and fil2
fil2 := fil.Filter(
dataframe.F{"D", series.Eq, true},
)
```
-Filters inside Filter are combined as OR operations whereas if we chain
-Filter methods, they will behave as AND.
+Filtering is based on predefined comparison operators:
+* `series.Eq`
+* `series.Neq`
+* `series.Greater`
+* `series.GreaterEq`
+* `series.Less`
+* `series.LessEq`
+* `series.In`
+
+However, if these filter operations are not sufficient, we can use user-defined comparators.
+We use `series.CompFunc` and a user-defined function with the signature `func(series.Element) bool` to provide user-defined filters to `df.Filter` and `df.FilterAggregation`.
+
+```go
+hasPrefix := func(prefix string) func(el series.Element) bool {
+ return func (el series.Element) bool {
+ if el.Type() == String {
+ if val, ok := el.Val().(string); ok {
+ return strings.HasPrefix(val, prefix)
+ }
+ }
+ return false
+ }
+ }
+
+fil := df.Filter(
+ dataframe.F{"A", series.CompFunc, hasPrefix("aa")},
+)
+```
+
+This example filters rows based on whether they have a cell value starting with `"aa"` in column `"A"`.
+
+#### GroupBy && Aggregation
+
+GroupBy && Aggregation
+
+```go
+groups := df.GroupBy("key1", "key2") // Group by column "key1", and column "key2"
+aggre := groups.Aggregation([]AggregationType{Aggregation_MAX, Aggregation_MIN}, []string{"values", "values2"}) // Maximum value in column "values", Minimum value in column "values2"
+```
#### Arrange
diff --git a/dataframe/dataframe.go b/dataframe/dataframe.go
index cf1ae41..df95d55 100644
--- a/dataframe/dataframe.go
+++ b/dataframe/dataframe.go
@@ -14,6 +14,8 @@ import (
"unicode/utf8"
"github.com/go-gota/gota/series"
+ "golang.org/x/net/html"
+ "golang.org/x/net/html/atom"
)
// DataFrame is a data structure designed for operating on table like data (Such
@@ -233,7 +235,7 @@ func (df DataFrame) print(
}
}
if i < len(notShowing) {
- notShownArr = append(notShownArr, notShowing[i:len(notShowing)])
+ notShownArr = append(notShownArr, notShowing[i:])
}
for k, ns := range notShownArr {
notShown += strings.Join(ns, ", ")
@@ -250,7 +252,7 @@ func (df DataFrame) print(
// Subsetting, mutating and transforming DataFrame methods
// =======================================================
-// Set will update the values of a DataFrame for all rows selected via indexes.
+// Set will update the values of a DataFrame for the rows selected via indexes.
func (df DataFrame) Set(indexes series.Indexes, newvalues DataFrame) DataFrame {
if df.Err != nil {
return df
@@ -371,6 +373,148 @@ func (df DataFrame) Drop(indexes SelectIndexes) DataFrame {
return df
}
+const KEY_ERROR = "KEY_ERROR"
+
+//GroupBy Group dataframe by columns
+func (df DataFrame) GroupBy(colnames ...string) *Groups {
+ if len(colnames) <= 0 {
+ return nil
+ }
+ groupDataFrame := make(map[string]DataFrame)
+ groupSeries := make(map[string][]map[string]interface{})
+ // Check that colname exist on dataframe
+ for _, c := range colnames {
+ if idx := findInStringSlice(c, df.Names()); idx == -1 {
+ return &Groups{Err: fmt.Errorf("GroupBy: can't find column name: %s", c)}
+ }
+ }
+
+ for _, s := range df.Maps() {
+ // Gen Key for per Series
+ key := ""
+ for i, c := range colnames {
+ format := ""
+ if i == 0 {
+ format = "%s%"
+ } else {
+ format = "%s_%"
+ }
+ switch s[c].(type) {
+ case string, bool:
+ format += "s"
+ case int, int16, int32, int64:
+ format += "d"
+ case float32, float64:
+ format += "f"
+ default:
+ return &Groups{Err: fmt.Errorf("GroupBy: type not found")}
+ }
+ key = fmt.Sprintf(format, key, s[c])
+ }
+ groupSeries[key] = append(groupSeries[key], s)
+ }
+
+ for k, cMaps := range groupSeries {
+ groupDataFrame[k] = LoadMaps(cMaps)
+ }
+ groups := &Groups{groups: groupDataFrame, colnames: colnames}
+ return groups
+}
+
+//AggregationType Aggregation method type
+type AggregationType int
+
+const (
+ Aggregation_MAX AggregationType = 0
+ Aggregation_MIN AggregationType = 1
+ Aggregation_MEAN AggregationType = 2
+ Aggregation_MEDIAN AggregationType = 3
+ Aggregation_STD AggregationType = 4
+ Aggregation_SUM AggregationType = 5
+ Aggregation_COUNT AggregationType = 6
+)
+
+func (aggregation AggregationType) String() string {
+ switch aggregation {
+ case Aggregation_MAX:
+ return "MAX"
+ case Aggregation_MIN:
+ return "MIN"
+ case Aggregation_MEAN:
+ return "MEAN"
+ case Aggregation_MEDIAN:
+ return "MEDIAN"
+ case Aggregation_STD:
+ return "STD"
+ case Aggregation_SUM:
+ return "SUM"
+ case Aggregation_COUNT:
+ return "COUNT"
+ default:
+ return "UNKNOWN"
+ }
+}
+
+//Groups : structure generated by groupby
+type Groups struct {
+ groups map[string]DataFrame
+ colnames []string
+ aggregation DataFrame
+ Err error
+}
+
+// Aggregation :Aggregate dataframe by aggregation type and aggregation column name
+func (gps Groups) Aggregation(typs []AggregationType, colnames []string) DataFrame {
+ if gps.groups == nil {
+ return DataFrame{Err: fmt.Errorf("Aggregation: input is nil")}
+ }
+ if len(typs) != len(colnames) {
+ return DataFrame{Err: fmt.Errorf("Aggregation: len(typs) != len(colanmes)")}
+ }
+ dfMaps := make([]map[string]interface{}, 0)
+ for _, df := range gps.groups {
+ targetMap := df.Maps()[0]
+ curMap := make(map[string]interface{})
+ // add columns of group by
+ for _, c := range gps.colnames {
+ if value, ok := targetMap[c]; ok {
+ curMap[c] = value
+ } else {
+ return DataFrame{Err: fmt.Errorf("Aggregation: can't find column name: %s", c)}
+ }
+ }
+ // Aggregation
+ for i, c := range colnames {
+ curSeries := df.Col(c)
+ var value float64
+ switch typs[i] {
+ case Aggregation_MAX:
+ value = curSeries.Max()
+ case Aggregation_MEAN:
+ value = curSeries.Mean()
+ case Aggregation_MEDIAN:
+ value = curSeries.Median()
+ case Aggregation_MIN:
+ value = curSeries.Min()
+ case Aggregation_STD:
+ value = curSeries.StdDev()
+ case Aggregation_SUM:
+ value = curSeries.Sum()
+ case Aggregation_COUNT:
+ value = float64(curSeries.Len())
+ default:
+ return DataFrame{Err: fmt.Errorf("Aggregation: this method %s not found", typs[i])}
+
+ }
+ curMap[fmt.Sprintf("%s_%s", c, typs[i])] = value
+ }
+ dfMaps = append(dfMaps, curMap)
+
+ }
+ gps.aggregation = LoadMaps(dfMaps)
+ return gps.aggregation
+}
+
// Rename changes the name of one of the columns of a DataFrame
func (df DataFrame) Rename(newname, oldname string) DataFrame {
if df.Err != nil {
@@ -427,6 +571,54 @@ func (df DataFrame) RBind(dfb DataFrame) DataFrame {
return New(expandedSeries...)
}
+// Concat concatenates rows of two DataFrames like RBind, but also including
+// unmatched columns.
+func (df DataFrame) Concat(dfb DataFrame) DataFrame {
+ if df.Err != nil {
+ return df
+ }
+ if dfb.Err != nil {
+ return dfb
+ }
+
+ uniques := make(map[string]struct{})
+ cols := []string{}
+ for _, t := range []DataFrame{df, dfb} {
+ for _, u := range t.Names() {
+ if _, ok := uniques[u]; !ok {
+ uniques[u] = struct{}{}
+ cols = append(cols, u)
+ }
+ }
+ }
+
+ expandedSeries := make([]series.Series, len(cols))
+ for k, v := range cols {
+ aidx := findInStringSlice(v, df.Names())
+ bidx := findInStringSlice(v, dfb.Names())
+
+ // aidx and bidx must not be -1 at the same time.
+ var a, b series.Series
+ if aidx != -1 {
+ a = df.columns[aidx]
+ } else {
+ bb := dfb.columns[bidx]
+ a = series.New(make([]struct{}, df.nrows), bb.Type(), bb.Name)
+ }
+ if bidx != -1 {
+ b = dfb.columns[bidx]
+ } else {
+ b = series.New(make([]struct{}, dfb.nrows), a.Type(), a.Name)
+ }
+ newSeries := a.Concat(b)
+ if err := newSeries.Err; err != nil {
+ return DataFrame{Err: fmt.Errorf("concat: %v", err)}
+ }
+ expandedSeries[k] = newSeries
+ }
+ return New(expandedSeries...)
+}
+
// Mutate changes a column of the DataFrame with the given Series or adds it as
// a new column if the column name does not exist.
func (df DataFrame) Mutate(s series.Series) DataFrame {
@@ -463,6 +655,7 @@ func (df DataFrame) Mutate(s series.Series) DataFrame {
// F is the filtering structure
type F struct {
+ Colidx int
Colname string
Comparator series.Comparator
Comparando interface{}
@@ -473,14 +666,47 @@ type F struct {
// whereas if we chain Filter calls, every filter will act as an AND operation
// with regards to the rest.
func (df DataFrame) Filter(filters ...F) DataFrame {
+ return df.FilterAggregation(Or, filters...)
+}
+
+// Aggregation defines the filter aggregation
+type Aggregation int
+
+func (a Aggregation) String() string {
+ switch a {
+ case Or:
+ return "or"
+ case And:
+ return "and"
+ }
+ return fmt.Sprintf("unknown aggragation %d", a)
+}
+
+const (
+ // Or aggregates filters with logical or
+ Or Aggregation = iota
+ // And aggregates filters with logical and
+ And
+)
+
+// FilterAggregation will filter the rows of a DataFrame based on the given filters. All
+// filters on the argument of a Filter call are aggregated depending on the supplied
+// aggregation.
+func (df DataFrame) FilterAggregation(agg Aggregation, filters ...F) DataFrame {
if df.Err != nil {
return df
}
+
compResults := make([]series.Series, len(filters))
for i, f := range filters {
- idx := findInStringSlice(f.Colname, df.Names())
- if idx < 0 {
- return DataFrame{Err: fmt.Errorf("filter: can't find column name")}
+ var idx int
+ if f.Colname == "" {
+ idx = f.Colidx
+ } else {
+ idx = findInStringSlice(f.Colname, df.Names())
+ if idx < 0 {
+ return DataFrame{Err: fmt.Errorf("filter: can't find column name")}
+ }
}
res := df.columns[idx].Compare(f.Comparator, f.Comparando)
if err := res.Err; err != nil {
@@ -488,10 +714,11 @@ func (df DataFrame) Filter(filters ...F) DataFrame {
}
compResults[i] = res
}
- // Join compResults via "OR"
+
if len(compResults) == 0 {
return df.Copy()
}
+
res, err := compResults[0].Bool()
if err != nil {
return DataFrame{Err: fmt.Errorf("filter: %v", err)}
@@ -502,7 +729,14 @@ func (df DataFrame) Filter(filters ...F) DataFrame {
return DataFrame{Err: fmt.Errorf("filter: %v", err)}
}
for j := 0; j < len(res); j++ {
- res[j] = res[j] || nextRes[j]
+ switch agg {
+ case Or:
+ res[j] = res[j] || nextRes[j]
+ case And:
+ res[j] = res[j] && nextRes[j]
+ default:
+ panic(agg)
+ }
}
}
return df.Subset(res)
@@ -1106,7 +1340,9 @@ func ReadCSV(r io.Reader, options ...LoadOption) DataFrame {
// resulting records.
func ReadJSON(r io.Reader, options ...LoadOption) DataFrame {
var m []map[string]interface{}
- err := json.NewDecoder(r).Decode(&m)
+ d := json.NewDecoder(r)
+ d.UseNumber()
+ err := d.Decode(&m)
if err != nil {
return DataFrame{Err: err}
}
@@ -1160,6 +1396,131 @@ func (df DataFrame) WriteJSON(w io.Writer) error {
return json.NewEncoder(w).Encode(df.Maps())
}
+// Internal state for implementing ReadHTML
+type remainder struct {
+ index int
+ text string
+ nrows int
+}
+
+func readRows(trs []*html.Node) [][]string {
+ rems := []remainder{}
+ rows := [][]string{}
+ for _, tr := range trs {
+ xrems := []remainder{}
+ row := []string{}
+ index := 0
+ text := ""
+ for j, td := 0, tr.FirstChild; td != nil; j, td = j+1, td.NextSibling {
+ if td.Type == html.ElementNode && td.DataAtom == atom.Td {
+
+ for len(rems) > 0 {
+ v := rems[0]
+ if v.index > index {
+ break
+ }
+ v, rems = rems[0], rems[1:]
+ row = append(row, v.text)
+ if v.nrows > 1 {
+ xrems = append(xrems, remainder{v.index, v.text, v.nrows - 1})
+ }
+ index++
+ }
+
+ rowspan, colspan := 1, 1
+ for _, attr := range td.Attr {
+ switch attr.Key {
+ case "rowspan":
+ if k, err := strconv.Atoi(attr.Val); err == nil {
+ rowspan = k
+ }
+ case "colspan":
+ if k, err := strconv.Atoi(attr.Val); err == nil {
+ colspan = k
+ }
+ }
+ }
+ for c := td.FirstChild; c != nil; c = c.NextSibling {
+ if c.Type == html.TextNode {
+ text = strings.TrimSpace(c.Data)
+ }
+ }
+
+ for k := 0; k < colspan; k++ {
+ row = append(row, text)
+ if rowspan > 1 {
+ xrems = append(xrems, remainder{index, text, rowspan - 1})
+ }
+ index++
+ }
+ }
+ }
+ for j := 0; j < len(rems); j++ {
+ v := rems[j]
+ row = append(row, v.text)
+ if v.nrows > 1 {
+ xrems = append(xrems, remainder{v.index, v.text, v.nrows - 1})
+ }
+ }
+ rows = append(rows, row)
+ rems = xrems
+ }
+ for len(rems) > 0 {
+ xrems := []remainder{}
+ row := []string{}
+ for i := 0; i < len(rems); i++ {
+ v := rems[i]
+ row = append(row, v.text)
+ if v.nrows > 1 {
+ xrems = append(xrems, remainder{v.index, v.text, v.nrows - 1})
+ }
+ }
+ rows = append(rows, row)
+ rems = xrems
+ }
+ return rows
+}
+
+func ReadHTML(r io.Reader, options ...LoadOption) []DataFrame {
+ var err error
+ var dfs []DataFrame
+ var doc *html.Node
+ var f func(*html.Node)
+
+ doc, err = html.Parse(r)
+ if err != nil {
+ return []DataFrame{DataFrame{Err: err}}
+ }
+
+ f = func(n *html.Node) {
+ if n.Type == html.ElementNode && n.DataAtom == atom.Table {
+ trs := []*html.Node{}
+ for c := n.FirstChild; c != nil; c = c.NextSibling {
+ if c.Type == html.ElementNode && c.DataAtom == atom.Tbody {
+ for cc := c.FirstChild; cc != nil; cc = cc.NextSibling {
+ if cc.Type == html.ElementNode && (cc.DataAtom == atom.Th || cc.DataAtom == atom.Tr) {
+ trs = append(trs, cc)
+ }
+ }
+ }
+ }
+
+ df := LoadRecords(readRows(trs), options...)
+ if df.Err == nil {
+ dfs = append(dfs, df)
+ }
+ return
+ }
+
+ for c := n.FirstChild; c != nil; c = c.NextSibling {
+ f(c)
+ }
+ }
+
+ f(doc)
+ return dfs
+}
+
// Getters/Setters for DataFrame fields
// ====================================
@@ -1392,7 +1753,7 @@ func (df DataFrame) LeftJoin(b DataFrame, keys ...string) DataFrame {
newCols[ii].Append(elem)
ii++
}
- for _ = range iNotKeysB {
+ for range iNotKeysB {
newCols[ii].Append(nil)
ii++
}
@@ -1496,7 +1857,7 @@ func (df DataFrame) RightJoin(b DataFrame, keys ...string) DataFrame {
newCols[ii].Append(elem)
ii++
}
- for _ = range iNotKeysA {
+ for range iNotKeysA {
newCols[ii].Append(nil)
ii++
}
@@ -1598,7 +1959,7 @@ func (df DataFrame) OuterJoin(b DataFrame, keys ...string) DataFrame {
newCols[ii].Append(elem)
ii++
}
- for _ = range iNotKeysB {
+ for range iNotKeysB {
newCols[ii].Append(nil)
ii++
}
@@ -1624,7 +1985,7 @@ func (df DataFrame) OuterJoin(b DataFrame, keys ...string) DataFrame {
newCols[ii].Append(elem)
ii++
}
- for _ = range iNotKeysA {
+ for range iNotKeysA {
newCols[ii].Append(nil)
ii++
}
@@ -1717,7 +2078,7 @@ func (df DataFrame) Elem(r, c int) series.Element {
// fixColnames assigns a name to the missing column names and makes it so that the
// column names are unique.
func fixColnames(colnames []string) {
- // Find duplicated colnames
+ // Find duplicated and missing colnames
dupnamesidx := make(map[string][]int)
var missingnames []int
for i := 0; i < len(colnames); i++ {
@@ -1726,16 +2087,17 @@ func fixColnames(colnames []string) {
missingnames = append(missingnames, i)
continue
}
- for j := 0; j < len(colnames); j++ {
- b := colnames[j]
- if i != j && a == b {
- temp := dupnamesidx[a]
- if !inIntSlice(i, temp) {
- dupnamesidx[a] = append(temp, i)
- }
- }
+ // for now, dupnamesidx contains the indices of *all* the columns
+ // the columns with unique locations will be removed after this loop
+ dupnamesidx[a] = append(dupnamesidx[a], i)
+ }
+ // NOTE: deleting a map key in a range is legal and correct in Go.
+ for k, places := range dupnamesidx {
+ if len(places) < 2 {
+ delete(dupnamesidx, k)
}
}
+ // Now: dupnameidx contains only keys that appeared more than once
// Autofill missing column names
counter := 0
diff --git a/dataframe/dataframe_test.go b/dataframe/dataframe_test.go
index 114c0e4..564d193 100644
--- a/dataframe/dataframe_test.go
+++ b/dataframe/dataframe_test.go
@@ -2,6 +2,7 @@ package dataframe
import (
"bytes"
+ "fmt"
"reflect"
"strconv"
"strings"
@@ -553,6 +554,117 @@ func TestDataFrame_RBind(t *testing.T) {
}
}
+func TestDataFrame_Concat(t *testing.T) {
+ type NA struct{}
+
+ a := New(
+ series.New([]string{"b", "a", "b", "c", "d"}, series.String, "COL.1"),
+ series.New([]int{1, 2, 4, 5, 4}, series.Int, "COL.2"),
+ series.New([]float64{3.0, 4.0, 5.3, 3.2, 1.2}, series.Float, "COL.3"),
+ )
+ table := []struct {
+ dfa DataFrame
+ dfb DataFrame
+ expDf DataFrame
+ }{
+ {
+ a,
+ New(
+ series.New([]string{"b", "a", "b", "c", "d"}, series.String, "COL.1"),
+ series.New([]int{1, 2, 4, 5, 4}, series.Int, "COL.2"),
+ series.New([]float64{3.0, 4.0, 5.3, 3.2, 1.2}, series.Float, "COL.3"),
+ ),
+ New(
+ series.New([]string{"b", "a", "b", "c", "d", "b", "a", "b", "c", "d"}, series.String, "COL.1"),
+ series.New([]int{1, 2, 4, 5, 4, 1, 2, 4, 5, 4}, series.Int, "COL.2"),
+ series.New([]float64{3.0, 4.0, 5.3, 3.2, 1.2, 3.0, 4.0, 5.3, 3.2, 1.2}, series.Float, "COL.3"),
+ ),
+ },
+ {
+ a,
+ New(
+ series.New([]int{1, 2, 4, 5, 4}, series.Int, "COL.1"),
+ series.New([]int{1, 2, 4, 5, 4}, series.Int, "COL.2"),
+ series.New([]float64{3.0, 4.0, 5.3, 3.2, 1.2}, series.Float, "COL.3"),
+ ),
+ New(
+ series.New([]string{"b", "a", "b", "c", "d", "1", "2", "4", "5", "4"}, series.String, "COL.1"),
+ series.New([]int{1, 2, 4, 5, 4, 1, 2, 4, 5, 4}, series.Int, "COL.2"),
+ series.New([]float64{3.0, 4.0, 5.3, 3.2, 1.2, 3.0, 4.0, 5.3, 3.2, 1.2}, series.Float, "COL.3"),
+ ),
+ },
+
+ {
+ a,
+ New(
+ series.New([]string{"b", "a", "b", "c", "d"}, series.String, "COL.1"),
+ series.New([]float64{3.0, 4.0, 5.3, 3.2, 1.2}, series.Float, "COL.3"),
+ ),
+ New(
+ series.New([]string{"b", "a", "b", "c", "d", "b", "a", "b", "c", "d"}, series.String, "COL.1"),
+ series.New([]int{1, 2, 4, 5, 4}, series.Int, "COL.2").Concat(series.New([]NA{NA{}, NA{}, NA{}, NA{}, NA{}}, series.Int, "")),
+ series.New([]float64{3.0, 4.0, 5.3, 3.2, 1.2, 3.0, 4.0, 5.3, 3.2, 1.2}, series.Float, "COL.3"),
+ ),
+ },
+ {
+ a,
+ New(
+ series.New([]int{1, 2, 4, 5, 4}, series.Int, "COL.1"),
+ series.New([]int{1, 2, 4, 5, 4}, series.Int, "COL.2"),
+ series.New([]float64{3.0, 4.0, 5.3, 3.2, 1.2}, series.Float, "COL.3"),
+ series.New([]string{"a", "b", "c", "d", "e"}, series.String, "COL.4"),
+ ),
+ New(
+ series.New([]string{"b", "a", "b", "c", "d", "1", "2", "4", "5", "4"}, series.String, "COL.1"),
+ series.New([]int{1, 2, 4, 5, 4, 1, 2, 4, 5, 4}, series.Int, "COL.2"),
+ series.New([]float64{3.0, 4.0, 5.3, 3.2, 1.2, 3.0, 4.0, 5.3, 3.2, 1.2}, series.Float, "COL.3"),
+ series.New([]NA{NA{}, NA{}, NA{}, NA{}, NA{}}, series.String, "COL.4").Concat(series.New([]string{"a", "b", "c", "d", "e"}, series.String, "COL.4")),
+ ),
+ },
+ {
+ a,
+ New(
+ series.New([]string{"a", "b", "c", "d", "e"}, series.String, "COL.0"),
+ series.New([]int{1, 2, 4, 5, 4}, series.Int, "COL.1"),
+ series.New([]int{1, 2, 4, 5, 4}, series.Int, "COL.2"),
+ series.New([]float64{3.0, 4.0, 5.3, 3.2, 1.2}, series.Float, "COL.3"),
+ ),
+ New(
+ series.New([]string{"b", "a", "b", "c", "d", "1", "2", "4", "5", "4"}, series.String, "COL.1"),
+ series.New([]int{1, 2, 4, 5, 4, 1, 2, 4, 5, 4}, series.Int, "COL.2"),
+ series.New([]float64{3.0, 4.0, 5.3, 3.2, 1.2, 3.0, 4.0, 5.3, 3.2, 1.2}, series.Float, "COL.3"),
+ series.New([]NA{NA{}, NA{}, NA{}, NA{}, NA{}}, series.String, "COL.0").Concat(series.New([]string{"a", "b", "c", "d", "e"}, series.String, "COL.0")),
+ ),
+ },
+ {
+ DataFrame{},
+ a,
+ a,
+ },
+ }
+ for i, tc := range table {
+ b := tc.dfa.Concat(tc.dfb)
+
+ if b.Err != nil {
+ t.Errorf("Test: %d\nError:%v", i, b.Err)
+ }
+ //if err := checkAddrDf(a, b); err != nil {
+ //t.Error(err)
+ //}
+ // Check that the types are the same between both DataFrames
+ if !reflect.DeepEqual(tc.expDf.Types(), b.Types()) {
+ t.Errorf("Test: %d\nDifferent types:\nA:%v\nB:%v", i, tc.expDf.Types(), b.Types())
+ }
+ // Check that the colnames are the same between both DataFrames
+ if !reflect.DeepEqual(tc.expDf.Names(), b.Names()) {
+ t.Errorf("Test: %d\nDifferent colnames:\nA:%v\nB:%v", i, tc.expDf.Names(), b.Names())
+ }
+ // Check that the values are the same between both DataFrames
+ if !reflect.DeepEqual(tc.expDf.Records(), b.Records()) {
+ t.Errorf("Test: %d\nDifferent values:\nA:%v\nB:%v", i, tc.expDf.Records(), b.Records())
+ }
+ }
+}
func TestDataFrame_Records(t *testing.T) {
a := New(
series.New([]string{"a", "b", "c"}, series.String, "COL.1"),
@@ -634,7 +746,7 @@ func TestDataFrame_Mutate(t *testing.T) {
}
}
-func TestDataFrame_Filter(t *testing.T) {
+func TestDataFrame_Filter_Or(t *testing.T) {
a := New(
series.New([]string{"b", "a", "b", "c", "d"}, series.String, "COL.1"),
series.New([]int{1, 2, 4, 5, 4}, series.Int, "COL.2"),
@@ -645,7 +757,7 @@ func TestDataFrame_Filter(t *testing.T) {
expDf DataFrame
}{
{
- []F{{"COL.2", series.GreaterEq, 4}},
+ []F{{0, "COL.2", series.GreaterEq, 4}},
New(
series.New([]string{"b", "c", "d"}, series.String, "COL.1"),
series.New([]int{4, 5, 4}, series.Int, "COL.2"),
@@ -654,8 +766,8 @@ func TestDataFrame_Filter(t *testing.T) {
},
{
[]F{
- {"COL.2", series.Greater, 4},
- {"COL.2", series.Eq, 1},
+ {0, "COL.2", series.Greater, 4},
+ {0, "COL.2", series.Eq, 1},
},
New(
series.New([]string{"b", "c"}, series.String, "COL.1"),
@@ -665,9 +777,21 @@ func TestDataFrame_Filter(t *testing.T) {
},
{
[]F{
- {"COL.2", series.Greater, 4},
- {"COL.2", series.Eq, 1},
- {"COL.1", series.Eq, "d"},
+ {0, "COL.2", series.Greater, 4},
+ {0, "COL.2", series.Eq, 1},
+ {0, "COL.1", series.Eq, "d"},
+ },
+ New(
+ series.New([]string{"b", "c", "d"}, series.String, "COL.1"),
+ series.New([]int{1, 5, 4}, series.Int, "COL.2"),
+ series.New([]float64{3.0, 3.2, 1.2}, series.Float, "COL.3"),
+ ),
+ },
+ {
+ []F{
+ {1, "", series.Greater, 4},
+ {1, "", series.Eq, 1},
+ {0, "", series.Eq, "d"},
},
New(
series.New([]string{"b", "c", "d"}, series.String, "COL.1"),
@@ -697,6 +821,117 @@ func TestDataFrame_Filter(t *testing.T) {
if !reflect.DeepEqual(tc.expDf.Records(), b.Records()) {
t.Errorf("Test: %d\nDifferent values:\nA:%v\nB:%v", i, tc.expDf.Records(), b.Records())
}
+
+ b2 := a.FilterAggregation(Or, tc.filters...)
+
+ // Check that the types are the same between both DataFrames
+ if !reflect.DeepEqual(b.Types(), b2.Types()) {
+ t.Errorf("Test: %d\nDifferent types:\nB:%v\nB2:%v", i, b.Types(), b2.Types())
+ }
+ // Check that the colnames are the same between both DataFrames
+ if !reflect.DeepEqual(b.Names(), b2.Names()) {
+ t.Errorf("Test: %d\nDifferent colnames:\nB:%v\nB2:%v", i, b.Names(), b2.Names())
+ }
+ // Check that the values are the same between both DataFrames
+ if !reflect.DeepEqual(b.Records(), b2.Records()) {
+ t.Errorf("Test: %d\nDifferent values:\nB:%v\nB2:%v", i, b.Records(), b2.Records())
+ }
+ }
+}
+
+func TestDataFrame_Filter_And(t *testing.T) {
+ a := New(
+ series.New([]string{"b", "a", "b", "c", "d"}, series.String, "COL.1"),
+ series.New([]int{1, 2, 4, 5, 4}, series.Int, "COL.2"),
+ series.New([]float64{3.0, 4.0, 5.3, 3.2, 1.2}, series.Float, "COL.3"),
+ )
+ table := []struct {
+ filters []F
+ expDf DataFrame
+ }{
+ {
+ []F{{Colname: "COL.2", Comparator: series.GreaterEq, Comparando: 4}},
+ New(
+ series.New([]string{"b", "c", "d"}, series.String, "COL.1"),
+ series.New([]int{4, 5, 4}, series.Int, "COL.2"),
+ series.New([]float64{5.3, 3.2, 1.2}, series.Float, "COL.3"),
+ ),
+ },
+ {
+ []F{{Colidx: 1, Comparator: series.GreaterEq, Comparando: 4}},
+ New(
+ series.New([]string{"b", "c", "d"}, series.String, "COL.1"),
+ series.New([]int{4, 5, 4}, series.Int, "COL.2"),
+ series.New([]float64{5.3, 3.2, 1.2}, series.Float, "COL.3"),
+ ),
+ },
+ // should not have any rows
+ {
+ []F{
+ {Colname: "COL.2", Comparator: series.Greater, Comparando: 4},
+ {Colname: "COL.2", Comparator: series.Eq, Comparando: 1},
+ },
+ New(
+ series.New([]string{}, series.String, "COL.1"),
+ series.New([]int{}, series.Int, "COL.2"),
+ series.New([]float64{}, series.Float, "COL.3"),
+ ),
+ },
+ {
+ []F{
+ {Colidx: 1, Comparator: series.Greater, Comparando: 4},
+ {Colidx: 1, Comparator: series.Eq, Comparando: 1},
+ },
+ New(
+ series.New([]string{}, series.String, "COL.1"),
+ series.New([]int{}, series.Int, "COL.2"),
+ series.New([]float64{}, series.Float, "COL.3"),
+ ),
+ },
+ {
+ []F{
+ {Colname: "COL.2", Comparator: series.Less, Comparando: 4},
+ {Colname: "COL.1", Comparator: series.Eq, Comparando: "b"},
+ },
+ New(
+ series.New([]string{"b"}, series.String, "COL.1"),
+ series.New([]int{1}, series.Int, "COL.2"),
+ series.New([]float64{3.0}, series.Float, "COL.3"),
+ ),
+ },
+ {
+ []F{
+ {Colidx: 1, Comparator: series.Less, Comparando: 4},
+ {Colidx: 0, Comparator: series.Eq, Comparando: "b"},
+ },
+ New(
+ series.New([]string{"b"}, series.String, "COL.1"),
+ series.New([]int{1}, series.Int, "COL.2"),
+ series.New([]float64{3.0}, series.Float, "COL.3"),
+ ),
+ },
+ }
+ for i, tc := range table {
+ b := a.FilterAggregation(And, tc.filters...)
+
+ if b.Err != nil {
+ t.Errorf("Test: %d\nError:%v", i, b.Err)
+ }
+ //if err := checkAddrDf(a, b); err != nil {
+ //t.Error(err)
+ //}
+ // Check that the types are the same between both DataFrames
+ if !reflect.DeepEqual(tc.expDf.Types(), b.Types()) {
+ t.Errorf("Test: %d\nDifferent types:\nA:%v\nB:%v", i, tc.expDf.Types(), b.Types())
+ }
+ // Check that the colnames are the same between both DataFrames
+ if !reflect.DeepEqual(tc.expDf.Names(), b.Names()) {
+ t.Errorf("Test: %d\nDifferent colnames:\nA:%v\nB:%v", i, tc.expDf.Names(), b.Names())
+ }
+ // Check that the values are the same between both DataFrames
+ if !reflect.DeepEqual(tc.expDf.Records(), b.Records()) {
+ t.Errorf("Test: %d\nDifferent values:\nA:%v\nB:%v", i, tc.expDf.Records(), b.Records())
+ }
}
}
@@ -1008,13 +1243,13 @@ func TestLoadMaps(t *testing.T) {
{ // Test: 0
LoadMaps(
[]map[string]interface{}{
- map[string]interface{}{
+ {
"A": "a",
"B": 1,
"C": true,
"D": 0,
},
- map[string]interface{}{
+ {
"A": "b",
"B": 2,
"C": true,
@@ -1032,13 +1267,13 @@ func TestLoadMaps(t *testing.T) {
{ // Test: 1
LoadMaps(
[]map[string]interface{}{
- map[string]interface{}{
+ {
"A": "a",
"B": 1,
"C": true,
"D": 0,
},
- map[string]interface{}{
+ {
"A": "b",
"B": 2,
"C": true,
@@ -1059,13 +1294,13 @@ func TestLoadMaps(t *testing.T) {
{ // Test: 2
LoadMaps(
[]map[string]interface{}{
- map[string]interface{}{
+ {
"A": "a",
"B": 1,
"C": true,
"D": 0,
},
- map[string]interface{}{
+ {
"A": "b",
"B": 2,
"C": true,
@@ -1086,13 +1321,13 @@ func TestLoadMaps(t *testing.T) {
{ // Test: 3
LoadMaps(
[]map[string]interface{}{
- map[string]interface{}{
+ {
"A": "a",
"B": 1,
"C": true,
"D": 0,
},
- map[string]interface{}{
+ {
"A": "b",
"B": 2,
"C": true,
@@ -1117,13 +1352,13 @@ func TestLoadMaps(t *testing.T) {
{ // Test: 4
LoadMaps(
[]map[string]interface{}{
- map[string]interface{}{
+ {
"A": "a",
"B": 1,
"C": true,
"D": 0,
},
- map[string]interface{}{
+ {
"A": "b",
"B": 2,
"C": true,
@@ -1191,13 +1426,13 @@ func TestReadJSON(t *testing.T) {
expDf DataFrame
}{
{
- `[{"COL.1":null,"COL.2":1,"COL.3":3},{"COL.1":5,"COL.2":2,"COL.3":2},{"COL.1":6,"COL.2":3,"COL.3":1}]`,
+ `[{"COL.1":null,"COL.2":1,"COL.3":3},{"COL.1":5,"COL.2":2,"COL.3":2},{"COL.1":6,"COL.2":3,"COL.3":20180428}]`,
LoadRecords(
[][]string{
{"COL.1", "COL.2", "COL.3"},
{"NaN", "1", "3"},
{"5", "2", "2"},
- {"6", "3", "1"},
+ {"6", "3", "20180428"},
},
DetectTypes(false),
DefaultType(series.Int),
@@ -1238,6 +1473,79 @@ func TestReadJSON(t *testing.T) {
}
}
+func TestReadHTML(t *testing.T) {
+ table := []struct {
+ htmlStr string
+ expDf []DataFrame
+ }{
+ {
+ "",
+ []DataFrame{},
+ },
+ {
+ `
+
+
+
+ `,
+ []DataFrame{
+ LoadRecords(
+ [][]string{
+ {"COL.1"},
+ {"100"},
+ }),
+ },
+ },
+ {
+ `
+
+
+ COL.1 | COL.2 | COL.3 |
+ 100 |
+
+
+ `,
+ []DataFrame{
+ LoadRecords(
+ [][]string{
+ {"COL.1", "COL.2", "COL.3"},
+ {"COL.1", "COL.2", "100"},
+ }),
+ },
+ },
+ }
+
+ for i, tc := range table {
+ cs := ReadHTML(strings.NewReader(tc.htmlStr))
+ if tc.htmlStr != "" && len(cs) == 0 {
+ t.Errorf("Test: %d, got zero dataframes: %#v", i, cs)
+ }
+ for j, c := range cs {
+ if len(cs) != len(tc.expDf) {
+ t.Errorf("Test: %d\n got len(%d), want len(%d)", i, len(cs), len(tc.expDf))
+ }
+ if c.Err != nil {
+ t.Errorf("Test: %d\nError:%v", i, c.Err)
+ }
+ // Check that the types are the same between both DataFrames
+ if !reflect.DeepEqual(tc.expDf[j].Types(), c.Types()) {
+ t.Errorf("Test: %d\nDifferent types:\nA:%v\nB:%v", i, tc.expDf[j].Types(), c.Types())
+ }
+ // Check that the colnames are the same between both DataFrames
+ if !reflect.DeepEqual(tc.expDf[j].Names(), c.Names()) {
+ t.Errorf("Test: %d\nDifferent colnames:\nA:%v\nB:%v", i, tc.expDf[j].Names(), c.Names())
+ }
+ // Check that the values are the same between both DataFrames
+ if !reflect.DeepEqual(tc.expDf[j].Records(), c.Records()) {
+ t.Errorf("Test: %d\nDifferent values:\nA:%v\nB:%v", i, tc.expDf[j].Records(), c.Records())
+ }
+ }
+ }
+}
+
func TestDataFrame_SetNames(t *testing.T) {
a := New(
series.New([]string{"a", "b", "c"}, series.String, "COL.1"),
@@ -1641,17 +1949,17 @@ func TestDataFrame_Maps(t *testing.T) {
)
m := a.Maps()
expected := []map[string]interface{}{
- map[string]interface{}{
+ {
"COL.1": "a",
"COL.2": nil,
"COL.3": nil,
},
- map[string]interface{}{
+ {
"COL.1": "b",
"COL.2": 2,
"COL.3": nil,
},
- map[string]interface{}{
+ {
"COL.1": "c",
"COL.2": 3,
"COL.3": 3,
@@ -2491,11 +2799,11 @@ func TestDescribe(t *testing.T) {
{
LoadRecords(
[][]string{
- []string{"A", "B", "C", "D"},
- []string{"a", "4", "5.1", "true"},
- []string{"b", "4", "6.0", "true"},
- []string{"c", "3", "6.0", "false"},
- []string{"a", "2", "7.1", "false"},
+ {"A", "B", "C", "D"},
+ {"a", "4", "5.1", "true"},
+ {"b", "4", "6.0", "true"},
+ {"c", "3", "6.0", "false"},
+ {"a", "2", "7.1", "false"},
}),
New(
@@ -2558,3 +2866,69 @@ func TestDescribe(t *testing.T) {
}
}
}
+
+const MIN = 0.000001
+
+func IsEqual(f1, f2 float64) bool {
+ if f1 > f2 {
+ return math.Dim(f1, f2) < MIN
+ } else {
+ return math.Dim(f2, f1) < MIN
+ }
+}
+func TestDataFrame_GroupBy(t *testing.T) {
+ a := New(
+ series.New([]string{"b", "a", "b", "a", "b"}, series.String, "key1"),
+ series.New([]int{1, 2, 1, 2, 2}, series.Int, "key2"),
+ series.New([]float64{3.0, 4.0, 5.3, 3.2, 1.2}, series.Float, "values"),
+ )
+ groups := a.GroupBy("key1", "key2")
+ resultMap := make(map[string]float32, 3)
+ resultMap[fmt.Sprintf("%s_%d", "a", 2)] = 4 + 3.2
+ resultMap[fmt.Sprintf("%s_%d", "b", 1)] = 3 + 5.3
+ resultMap[fmt.Sprintf("%s_%d", "b", 2)] = 1.2
+
+ for k, values := range groups.groups {
+ curV := 0.0
+ for _, vMap := range values.Maps() {
+ curV += vMap["values"].(float64)
+ }
+ targetV, ok := resultMap[k]
+ if !ok {
+ t.Errorf("GroupBy: %s not found", k)
+ return
+ }
+ if !IsEqual(float64(targetV), curV) {
+ t.Errorf("GroupBy: expect %f , but got %f", targetV, curV)
+ }
+ }
+
+ b := New(
+ series.New([]string{"b", "a", "b", "a", "b"}, series.String, "key3"),
+ )
+ groups = b.GroupBy("key1", "key2")
+ if groups.Err == nil {
+ t.Errorf("GroupBy: COLUMNS NOT FOUND")
+ }
+}
+
+func TestDataFrame_Aggregation(t *testing.T) {
+ a := New(
+ series.New([]string{"b", "a", "b", "a", "b"}, series.String, "key1"),
+ series.New([]int{1, 2, 1, 2, 2}, series.Int, "key2"),
+ series.New([]float64{3.0, 4.0, 5.3, 3.2, 1.2}, series.Float, "values"),
+ series.New([]float64{3.0, 4.0, 5.3, 3.2, 1.2}, series.Float, "values2"),
+ )
+ groups := a.GroupBy("key1", "key2")
+ df := groups.Aggregation([]AggregationType{Aggregation_MAX, Aggregation_MIN, Aggregation_COUNT, Aggregation_SUM}, []string{"values", "values2", "values2", "values2"})
+ resultMap := make(map[string]float32, 3)
+ resultMap[fmt.Sprintf("%s_%d", "a", 2)] = 4
+ resultMap[fmt.Sprintf("%s_%d", "b", 1)] = 5.3
+ resultMap[fmt.Sprintf("%s_%d", "b", 2)] = 1.2
+ for _, m := range df.Maps() {
+ key := fmt.Sprintf("%s_%d", m["key1"], m["key2"])
+ if !IsEqual(m["values_MAX"].(float64), float64(resultMap[key])) {
+ t.Errorf("Aggregation: expect %f , but got %f", float64(resultMap[key]), m["values"].(float64))
+ }
+ }
+}
diff --git a/dataframe/examples_test.go b/dataframe/examples_test.go
index 6687ea7..8cdb36c 100644
--- a/dataframe/examples_test.go
+++ b/dataframe/examples_test.go
@@ -24,9 +24,9 @@ func ExampleLoadStructs() {
Accuracy float64
}
users := []User{
- User{"Aram", 17, 0.2},
- User{"Juan", 18, 0.8},
- User{"Ana", 22, 0.5},
+ {"Aram", 17, 0.2},
+ {"Juan", 18, 0.8},
+ {"Ana", 22, 0.5},
}
df := dataframe.LoadStructs(users)
fmt.Println(df)
@@ -35,11 +35,11 @@ func ExampleLoadStructs() {
func ExampleLoadRecords() {
df := dataframe.LoadRecords(
[][]string{
- []string{"A", "B", "C", "D"},
- []string{"a", "4", "5.1", "true"},
- []string{"k", "5", "7.0", "true"},
- []string{"k", "4", "6.0", "true"},
- []string{"a", "2", "7.1", "false"},
+ {"A", "B", "C", "D"},
+ {"a", "4", "5.1", "true"},
+ {"k", "5", "7.0", "true"},
+ {"k", "4", "6.0", "true"},
+ {"a", "2", "7.1", "false"},
},
)
fmt.Println(df)
@@ -48,11 +48,11 @@ func ExampleLoadRecords() {
func ExampleLoadRecords_options() {
df := dataframe.LoadRecords(
[][]string{
- []string{"A", "B", "C", "D"},
- []string{"a", "4", "5.1", "true"},
- []string{"k", "5", "7.0", "true"},
- []string{"k", "4", "6.0", "true"},
- []string{"a", "2", "7.1", "false"},
+ {"A", "B", "C", "D"},
+ {"a", "4", "5.1", "true"},
+ {"k", "5", "7.0", "true"},
+ {"k", "4", "6.0", "true"},
+ {"a", "2", "7.1", "false"},
},
dataframe.DetectTypes(false),
dataframe.DefaultType(series.Float),
@@ -67,13 +67,13 @@ func ExampleLoadRecords_options() {
func ExampleLoadMaps() {
df := dataframe.LoadMaps(
[]map[string]interface{}{
- map[string]interface{}{
+ {
"A": "a",
"B": 1,
"C": true,
"D": 0,
},
- map[string]interface{}{
+ {
"A": "b",
"B": 2,
"C": true,
@@ -109,11 +109,11 @@ func ExampleReadJSON() {
func ExampleDataFrame_Subset() {
df := dataframe.LoadRecords(
[][]string{
- []string{"A", "B", "C", "D"},
- []string{"a", "4", "5.1", "true"},
- []string{"k", "5", "7.0", "true"},
- []string{"k", "4", "6.0", "true"},
- []string{"a", "2", "7.1", "false"},
+ {"A", "B", "C", "D"},
+ {"a", "4", "5.1", "true"},
+ {"k", "5", "7.0", "true"},
+ {"k", "4", "6.0", "true"},
+ {"a", "2", "7.1", "false"},
},
)
sub := df.Subset([]int{0, 2})
@@ -123,11 +123,11 @@ func ExampleDataFrame_Subset() {
func ExampleDataFrame_Select() {
df := dataframe.LoadRecords(
[][]string{
- []string{"A", "B", "C", "D"},
- []string{"a", "4", "5.1", "true"},
- []string{"k", "5", "7.0", "true"},
- []string{"k", "4", "6.0", "true"},
- []string{"a", "2", "7.1", "false"},
+ {"A", "B", "C", "D"},
+ {"a", "4", "5.1", "true"},
+ {"k", "5", "7.0", "true"},
+ {"k", "4", "6.0", "true"},
+ {"a", "2", "7.1", "false"},
},
)
sel1 := df.Select([]int{0, 2})
@@ -139,11 +139,11 @@ func ExampleDataFrame_Select() {
func ExampleDataFrame_Filter() {
df := dataframe.LoadRecords(
[][]string{
- []string{"A", "B", "C", "D"},
- []string{"a", "4", "5.1", "true"},
- []string{"k", "5", "7.0", "true"},
- []string{"k", "4", "6.0", "true"},
- []string{"a", "2", "7.1", "false"},
+ {"A", "B", "C", "D"},
+ {"a", "4", "5.1", "true"},
+ {"k", "5", "7.0", "true"},
+ {"k", "4", "6.0", "true"},
+ {"a", "2", "7.1", "false"},
},
)
fil := df.Filter(
@@ -172,11 +172,11 @@ func ExampleDataFrame_Filter() {
func ExampleDataFrame_Mutate() {
df := dataframe.LoadRecords(
[][]string{
- []string{"A", "B", "C", "D"},
- []string{"a", "4", "5.1", "true"},
- []string{"k", "5", "7.0", "true"},
- []string{"k", "4", "6.0", "true"},
- []string{"a", "2", "7.1", "false"},
+ {"A", "B", "C", "D"},
+ {"a", "4", "5.1", "true"},
+ {"k", "5", "7.0", "true"},
+ {"k", "4", "6.0", "true"},
+ {"a", "2", "7.1", "false"},
},
)
// Change column C with a new one
@@ -194,20 +194,20 @@ func ExampleDataFrame_Mutate() {
func ExampleDataFrame_InnerJoin() {
df := dataframe.LoadRecords(
[][]string{
- []string{"A", "B", "C", "D"},
- []string{"a", "4", "5.1", "true"},
- []string{"k", "5", "7.0", "true"},
- []string{"k", "4", "6.0", "true"},
- []string{"a", "2", "7.1", "false"},
+ {"A", "B", "C", "D"},
+ {"a", "4", "5.1", "true"},
+ {"k", "5", "7.0", "true"},
+ {"k", "4", "6.0", "true"},
+ {"a", "2", "7.1", "false"},
},
)
df2 := dataframe.LoadRecords(
[][]string{
- []string{"A", "F", "D"},
- []string{"1", "1", "true"},
- []string{"4", "2", "false"},
- []string{"2", "8", "false"},
- []string{"5", "9", "false"},
+ {"A", "F", "D"},
+ {"1", "1", "true"},
+ {"4", "2", "false"},
+ {"2", "8", "false"},
+ {"5", "9", "false"},
},
)
join := df.InnerJoin(df2, "D")
@@ -217,20 +217,20 @@ func ExampleDataFrame_InnerJoin() {
func ExampleDataFrame_Set() {
df := dataframe.LoadRecords(
[][]string{
- []string{"A", "B", "C", "D"},
- []string{"a", "4", "5.1", "true"},
- []string{"k", "5", "7.0", "true"},
- []string{"k", "4", "6.0", "true"},
- []string{"a", "2", "7.1", "false"},
+ {"A", "B", "C", "D"},
+ {"a", "4", "5.1", "true"},
+ {"k", "5", "7.0", "true"},
+ {"k", "4", "6.0", "true"},
+ {"a", "2", "7.1", "false"},
},
)
df2 := df.Set(
series.Ints([]int{0, 2}),
dataframe.LoadRecords(
[][]string{
- []string{"A", "B", "C", "D"},
- []string{"b", "4", "6.0", "true"},
- []string{"c", "3", "6.0", "false"},
+ {"A", "B", "C", "D"},
+ {"b", "4", "6.0", "true"},
+ {"c", "3", "6.0", "false"},
},
),
)
@@ -240,11 +240,11 @@ func ExampleDataFrame_Set() {
func ExampleDataFrame_Arrange() {
df := dataframe.LoadRecords(
[][]string{
- []string{"A", "B", "C", "D"},
- []string{"a", "4", "5.1", "true"},
- []string{"b", "4", "6.0", "true"},
- []string{"c", "3", "6.0", "false"},
- []string{"a", "2", "7.1", "false"},
+ {"A", "B", "C", "D"},
+ {"a", "4", "5.1", "true"},
+ {"b", "4", "6.0", "true"},
+ {"c", "3", "6.0", "false"},
+ {"a", "2", "7.1", "false"},
},
)
sorted := df.Arrange(
@@ -257,11 +257,11 @@ func ExampleDataFrame_Arrange() {
func ExampleDataFrame_Describe() {
df := dataframe.LoadRecords(
[][]string{
- []string{"A", "B", "C", "D"},
- []string{"a", "4", "5.1", "true"},
- []string{"b", "4", "6.0", "true"},
- []string{"c", "3", "6.0", "false"},
- []string{"a", "2", "7.1", "false"},
+ {"A", "B", "C", "D"},
+ {"a", "4", "5.1", "true"},
+ {"b", "4", "6.0", "true"},
+ {"c", "3", "6.0", "false"},
+ {"a", "2", "7.1", "false"},
},
)
fmt.Println(df.Describe())
diff --git a/go.mod b/go.mod
new file mode 100644
index 0000000..6af1705
--- /dev/null
+++ b/go.mod
@@ -0,0 +1,8 @@
+module github.com/go-gota/gota
+
+go 1.16
+
+require (
+ golang.org/x/net v0.0.0-20210423184538-5f58ad60dda6
+ gonum.org/v1/gonum v0.9.1
+)
diff --git a/go.sum b/go.sum
new file mode 100644
index 0000000..20a4dc3
--- /dev/null
+++ b/go.sum
@@ -0,0 +1,72 @@
+dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU=
+gioui.org v0.0.0-20210308172011-57750fc8a0a6/go.mod h1:RSH6KIUZ0p2xy5zHDxgAM4zumjgTw83q2ge/PI+yyw8=
+github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo=
+github.com/ajstarks/svgo v0.0.0-20180226025133-644b8db467af/go.mod h1:K08gAheRH3/J6wwsYMMT4xOr94bZjxIelGM0+d/wbFw=
+github.com/boombuler/barcode v1.0.0/go.mod h1:paBWMcWSl3LHKBqUq+rly7CNSldXjb2rDl3JlRe0mD8=
+github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/fogleman/gg v1.2.1-0.20190220221249-0403632d5b90/go.mod h1:R/bRT+9gY/C5z7JzPU0zXsXHKM4/ayA+zqcVNZzPa1k=
+github.com/fogleman/gg v1.3.0/go.mod h1:R/bRT+9gY/C5z7JzPU0zXsXHKM4/ayA+zqcVNZzPa1k=
+github.com/go-fonts/dejavu v0.1.0/go.mod h1:4Wt4I4OU2Nq9asgDCteaAaWZOV24E+0/Pwo0gppep4g=
+github.com/go-fonts/latin-modern v0.2.0/go.mod h1:rQVLdDMK+mK1xscDwsqM5J8U2jrRa3T0ecnM9pNujks=
+github.com/go-fonts/liberation v0.1.1/go.mod h1:K6qoJYypsmfVjWg8KOVDQhLc8UDgIK2HYqyqAO9z7GY=
+github.com/go-fonts/stix v0.1.0/go.mod h1:w/c1f0ldAUlJmLBvlbkvVXLAD+tAMqobIIQpmnUIzUY=
+github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU=
+github.com/go-latex/latex v0.0.0-20210118124228-b3d85cf34e07/go.mod h1:CO1AlKB2CSIqUrmQPqA0gdRIlnLEY0gK5JGjh37zN5U=
+github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod h1:E/TSTwGwJL78qG/PmXZO1EjYhfJinVAhrmmHX6Z8B9k=
+github.com/jung-kurt/gofpdf v1.0.0/go.mod h1:7Id9E/uU8ce6rXgefFLlgrJj/GYY22cpxn+r32jIOes=
+github.com/jung-kurt/gofpdf v1.0.3-0.20190309125859-24315acbbda5/go.mod h1:7Id9E/uU8ce6rXgefFLlgrJj/GYY22cpxn+r32jIOes=
+github.com/phpdave11/gofpdf v1.4.2/go.mod h1:zpO6xFn9yxo3YLyMvW8HcKWVdbNqgIfOOp2dXMnm1mY=
+github.com/phpdave11/gofpdi v1.0.12/go.mod h1:vBmVV0Do6hSBHC8uKUQ71JGW+ZGQq74llk/7bXwjDoI=
+github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
+github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
+github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/ruudk/golang-pdf417 v0.0.0-20181029194003-1af4ab5afa58/go.mod h1:6lfFZQK844Gfx8o5WFuvpxWRwnSoipWe/p622j1v06w=
+github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
+golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
+golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
+golang.org/x/exp v0.0.0-20180321215751-8460e604b9de/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
+golang.org/x/exp v0.0.0-20180807140117-3d87b88a115f/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
+golang.org/x/exp v0.0.0-20190125153040-c74c464bbbf2/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
+golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
+golang.org/x/exp v0.0.0-20191002040644-a1355ae1e2c3 h1:n9HxLrNxWWtEb1cA950nuEEj3QnKbtsCJ6KjcgisNUs=
+golang.org/x/exp v0.0.0-20191002040644-a1355ae1e2c3/go.mod h1:NOZ3BPKG0ec/BKJQgnvsSFpcKLM5xXVWnvZS97DWHgE=
+golang.org/x/image v0.0.0-20180708004352-c73c2afc3b81/go.mod h1:ux5Hcp/YLpHSI86hEcLt0YII63i6oz57MZXIpbrjZUs=
+golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js=
+golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
+golang.org/x/image v0.0.0-20190910094157-69e4b8554b2a/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
+golang.org/x/image v0.0.0-20200119044424-58c23975cae1/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
+golang.org/x/image v0.0.0-20200430140353-33d19683fad8/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
+golang.org/x/image v0.0.0-20200618115811-c13761719519/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
+golang.org/x/image v0.0.0-20201208152932-35266b937fa6/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
+golang.org/x/image v0.0.0-20210216034530-4410531fe030/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
+golang.org/x/mobile v0.0.0-20190719004257-d2bd2a29d028/go.mod h1:E/iHnbuqvinMTCcRqshq8CkpyQDoeVncDDYHnLhea+o=
+golang.org/x/mod v0.1.0/go.mod h1:0QHyrYULN0/3qlju5TqG8bIK38QM8yzMo5ekMj3DlcY=
+golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
+golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20210423184538-5f58ad60dda6 h1:0PC75Fz/kyMGhL0e1QnypqK2kQMqKt9csD1GnMJR+Zk=
+golang.org/x/net v0.0.0-20210423184538-5f58ad60dda6/go.mod h1:OJAsFXCWl8Ukc7SiCT/9KSuxbyM7479/AVlXFRxuMCk=
+golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210304124612-50617c2ba197/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
+golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
+golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
+golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
+golang.org/x/tools v0.0.0-20180525024113-a5b4c53f6e8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
+golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
+golang.org/x/tools v0.0.0-20190206041539-40960b6deb8e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
+golang.org/x/tools v0.0.0-20190927191325-030b2cf1153e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
+golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+gonum.org/v1/gonum v0.0.0-20180816165407-929014505bf4/go.mod h1:Y+Yx5eoAFn32cQvJDxZx5Dpnq+c3wtXuadVZAcxbbBo=
+gonum.org/v1/gonum v0.8.2/go.mod h1:oe/vMfY3deqTw+1EZJhuvEW2iwGF1bW9wwu7XCu0+v0=
+gonum.org/v1/gonum v0.9.1 h1:HCWmqqNoELL0RAQeKBXWtkp04mGk8koafcB4He6+uhc=
+gonum.org/v1/gonum v0.9.1/go.mod h1:TZumC3NeyVQskjXqmyWt4S3bINhy7B4eYwW69EbyX+0=
+gonum.org/v1/netlib v0.0.0-20190313105609-8cb42192e0e0 h1:OE9mWmgKkjJyEmDAAtGMPjXu+YNeGvK9VTSHY6+Qihc=
+gonum.org/v1/netlib v0.0.0-20190313105609-8cb42192e0e0/go.mod h1:wa6Ws7BG/ESfp6dHfk7C6KdzKA7wR7u/rKwOGE66zvw=
+gonum.org/v1/plot v0.0.0-20190515093506-e2840ee46a6b/go.mod h1:Wt8AAjI+ypCyYX3nZBvf6cAIx93T+c/OS2HFAYskSZc=
+gonum.org/v1/plot v0.9.0/go.mod h1:3Pcqqmp6RHvJI72kgb8fThyUnav364FOsdDo2aGW5lY=
+rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4=
diff --git a/series/rolling_window.go b/series/rolling_window.go
new file mode 100644
index 0000000..e2f1740
--- /dev/null
+++ b/series/rolling_window.go
@@ -0,0 +1,52 @@
+package series
+
+// RollingWindow is used for rolling window calculations.
+type RollingWindow struct {
+ window int
+ series Series
+}
+
+// Rolling creates new RollingWindow
+func (s Series) Rolling(window int) RollingWindow {
+ return RollingWindow{
+ window: window,
+ series: s,
+ }
+}
+
+// Mean returns the rolling mean.
+func (r RollingWindow) Mean() (s Series) {
+ s = New([]float64{}, Float, "Mean")
+ for _, block := range r.getBlocks() {
+ s.Append(block.Mean())
+ }
+
+ return
+}
+
+// StdDev returns the rolling mean.
+func (r RollingWindow) StdDev() (s Series) {
+ s = New([]float64{}, Float, "StdDev")
+ for _, block := range r.getBlocks() {
+ s.Append(block.StdDev())
+ }
+
+ return
+}
+
+func (r RollingWindow) getBlocks() (blocks []Series) {
+ for i := 1; i <= r.series.Len(); i++ {
+ if i < r.window {
+ blocks = append(blocks, r.series.Empty())
+ continue
+ }
+
+ index := []int{}
+ for j := i - r.window; j < i; j++ {
+ index = append(index, j)
+ }
+ blocks = append(blocks, r.series.Subset(index))
+ }
+
+ return
+}
diff --git a/series/rolling_window_test.go b/series/rolling_window_test.go
new file mode 100644
index 0000000..77ca331
--- /dev/null
+++ b/series/rolling_window_test.go
@@ -0,0 +1,85 @@
+package series
+
+import (
+ "math"
+ "strings"
+ "testing"
+)
+
+func TestSeries_RollingMean(t *testing.T) {
+ tests := []struct {
+ window int
+ series Series
+ expected Series
+ }{
+ {
+ 3,
+ Ints([]int{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}),
+ Floats([]float64{math.NaN(), math.NaN(), 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0}),
+ },
+ {
+ 2,
+ Floats([]float64{1.0, 2.0, 3.0}),
+ Floats([]float64{math.NaN(), 1.5, 2.5}),
+ },
+ {
+ 0,
+ Floats([]float64{}),
+ Floats([]float64{}),
+ },
+ }
+
+ for testnum, test := range tests {
+ expected := test.expected
+ received := test.series.Rolling(test.window).Mean()
+
+ for i := 0; i < expected.Len(); i++ {
+ if strings.Compare(expected.Elem(i).String(),
+ received.Elem(i).String()) != 0 {
+ t.Errorf(
+ "Test:%v\nExpected:\n%v\nReceived:\n%v",
+ testnum, expected, received,
+ )
+ }
+ }
+ }
+}
+
+func TestSeries_RollingStdDev(t *testing.T) {
+ tests := []struct {
+ window int
+ series Series
+ expected Series
+ }{
+ {
+ 3,
+ Ints([]int{5, 5, 6, 7, 5, 5, 5}),
+ Floats([]float64{math.NaN(), math.NaN(), 0.5773502691896257, 1.0, 1.0, 1.1547005383792515, 0.0}),
+ },
+ {
+ 2,
+ Floats([]float64{1.0, 2.0, 3.0}),
+ Floats([]float64{math.NaN(), 0.7071067811865476, 0.7071067811865476}),
+ },
+ {
+ 0,
+ Floats([]float64{}),
+ Floats([]float64{}),
+ },
+ }
+
+ for testnum, test := range tests {
+ expected := test.expected
+ received := test.series.Rolling(test.window).StdDev()
+
+ for i := 0; i < expected.Len(); i++ {
+ if strings.Compare(expected.Elem(i).String(),
+ received.Elem(i).String()) != 0 {
+ t.Errorf(
+ "Test:%v\nExpected:\n%v\nReceived:\n%v",
+ testnum, expected, received,
+ )
+ }
+ }
+ }
+}
diff --git a/series/series.go b/series/series.go
index 79fe0a0..408773d 100644
--- a/series/series.go
+++ b/series/series.go
@@ -93,15 +93,19 @@ type Comparator string
// Supported Comparators
const (
- Eq Comparator = "==" // Equal
- Neq Comparator = "!=" // Non equal
- Greater Comparator = ">" // Greater than
- GreaterEq Comparator = ">=" // Greater or equal than
- Less Comparator = "<" // Lesser than
- LessEq Comparator = "<=" // Lesser or equal than
- In Comparator = "in" // Inside
+ Eq Comparator = "==" // Equal
+ Neq Comparator = "!=" // Non equal
+ Greater Comparator = ">" // Greater than
+ GreaterEq Comparator = ">=" // Greater or equal than
+ Less Comparator = "<" // Lesser than
+ LessEq Comparator = "<=" // Lesser or equal than
+ In Comparator = "in" // Inside
+ CompFunc Comparator = "func" // user-defined comparison function
)
+// compFunc defines a user-defined comparator function. Used internally for type assertions
+type compFunc = func(el Element) bool
+
// Type is a convenience alias that can be used for a more type safe way of
// reason and use Series types.
type Type string
@@ -390,9 +394,25 @@ func (s Series) Compare(comparator Comparator, comparando interface{}) Series {
return ret, nil
}
- comp := New(comparando, s.t, "")
bools := make([]bool, s.Len())
- // In comparator comparation
+
+ // CompFunc comparator comparison
+ if comparator == CompFunc {
+ f, ok := comparando.(compFunc)
+ if !ok {
+ panic("comparando is not a comparison function of type func(el Element) bool")
+ }
+
+ for i := 0; i < s.Len(); i++ {
+ e := s.elements.Elem(i)
+ bools[i] = f(e)
+ }
+
+ return Bools(bools)
+ }
+
+ comp := New(comparando, s.t, "")
+ // In comparator comparison
if comparator == In {
for i := 0; i < s.Len(); i++ {
e := s.elements.Elem(i)
@@ -777,7 +797,6 @@ func (s Series) Quantile(p float64) float64 {
// the function passed in via argument `f` will not expect another type, but
// instead expects to handle Element(s) of type Float.
func (s Series) Map(f MapFunction) Series {
-
mappedValues := make([]Element, s.Len())
for i := 0; i < s.Len(); i++ {
value := f(s.elements.Elem(i))
@@ -785,3 +804,17 @@ func (s Series) Map(f MapFunction) Series {
}
return New(mappedValues, s.Type(), s.Name)
}
+
+// Sum calculates the sum value of a series
+func (s Series) Sum() float64 {
+ if s.elements.Len() == 0 || s.Type() == String || s.Type() == Bool {
+ return math.NaN()
+ }
+ sFloat := s.Float()
+ sum := sFloat[0]
+ for i := 1; i < len(sFloat); i++ {
+ elem := sFloat[i]
+ sum += elem
+ }
+ return sum
+}
diff --git a/series/series_test.go b/series/series_test.go
index c7d0516..d8e24fe 100644
--- a/series/series_test.go
+++ b/series/series_test.go
@@ -4,8 +4,8 @@ import (
"fmt"
"math"
"reflect"
- "testing"
"strings"
+ "testing"
)
// Check that there are no shared memory addreses between the elements of two Series
@@ -421,6 +421,76 @@ func TestSeries_Compare(t *testing.T) {
}
}
+func TestSeries_Compare_CompFunc(t *testing.T) {
+ table := []struct {
+ series Series
+ comparator Comparator
+ comparando interface{}
+ expected Series
+ panic bool
+ }{
+ {
+ Strings([]string{"A", "B", "C", "B", "D", "BADA"}),
+ CompFunc,
+ func(el Element) bool {
+ if el.Type() == String {
+ if val, ok := el.Val().(string); ok {
+ return strings.HasPrefix(val, "B")
+ }
+ return false
+ }
+ return false
+ },
+ Bools([]bool{false, true, false, true, false, true}),
+ false,
+ },
+ {
+ Strings([]string{"A", "B", "C", "B", "D", "BADA"}),
+ CompFunc,
+ func(el Element) {},
+ Bools([]bool{false, false, false, false, false}),
+ true,
+ },
+ }
+ for testnum, test := range table {
+ func() {
+ defer func() {
+ if r := recover(); r != nil {
+ // recovered
+ if !test.panic {
+ t.Errorf("did not expected panic but was '%v'", r)
+ }
+ } else {
+ // nothing to recover from
+ if test.panic {
+ t.Errorf("exptected panic but did not panic")
+ }
+ }
+ }()
+
+ a := test.series
+ b := a.Compare(test.comparator, test.comparando)
+ if err := b.Err; err != nil {
+ t.Errorf("Test:%v\nError:%v", testnum, err)
+ }
+ expected := test.expected.Records()
+ received := b.Records()
+ if !reflect.DeepEqual(expected, received) {
+ t.Errorf(
+ "Test:%v\nExpected:\n%v\nReceived:\n%v",
+ testnum, expected, received,
+ )
+ }
+ if err := checkTypes(b); err != nil {
+ t.Errorf(
+ "Test:%v\nError:%v",
+ testnum, err,
+ )
+ }
+ }()
+ }
+}
+
func TestSeries_Subset(t *testing.T) {
table := []struct {
series Series
@@ -1525,9 +1595,8 @@ func TestSeries_Quantile(t *testing.T) {
}
}
-
func TestSeries_Map(t *testing.T) {
- tests := []struct {
+ tests := []struct {
series Series
expected Series
}{
@@ -1564,11 +1633,11 @@ func TestSeries_Map(t *testing.T) {
doubleFloat64 := func(e Element) Element {
var result Element
result = e.Copy()
- result.Set(result.Float() * 2)
+ result.Set(result.Float() * 2)
return Element(result)
}
- // and two booleans
+ // and two booleans
and := func(e Element) Element {
var result Element
result = e.Copy()
@@ -1588,11 +1657,11 @@ func TestSeries_Map(t *testing.T) {
i, err := result.Int()
if err != nil {
return Element(&intElement{
- e: +5,
+ e: +5,
nan: false,
})
}
- result.Set(i + 5)
+ result.Set(i + 5)
return Element(result)
}
@@ -1604,12 +1673,12 @@ func TestSeries_Map(t *testing.T) {
return Element(result)
}
- for testnum, test := range tests {
+ for testnum, test := range tests {
switch test.series.Type() {
case Bool:
expected := test.expected
received := test.series.Map(and)
- for i := 0 ; i