-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Showing
6 changed files
with
358 additions
and
231 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,3 +2,6 @@ julia 0.6 | |
StringEncodings | ||
Missings | ||
Compat | ||
IteratorInterfaceExtensions | ||
TableTraits | ||
TableTraitsUtils |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
export metadata | ||
|
||
struct Metadata | ||
filename::AbstractString | ||
encoding::AbstractString # e.g. "ISO8859-1" | ||
endianness::Symbol # :LittleEndian, :BigEndian | ||
compression::Symbol # :RDC, :RLE | ||
pagesize::Int | ||
npages::Int | ||
nrows::Int | ||
ncols::Int | ||
columnsinfo::Vector{Pair{Symbol, DataType}} # Float64 or String | ||
end | ||
|
||
function metadata(h::Handler) | ||
ci = [Pair(h.column_symbols[i], | ||
h.column_types[i] == column_type_decimal ? Float64 : String) | ||
for i in 1:h.column_count] | ||
cmp = ifelse(h.compression == compression_method_rle, :RLE, | ||
ifelse(h.compression == compression_method_rdc, :RDC, :none)) | ||
Metadata( | ||
h.config.filename, | ||
h.file_encoding, | ||
h.file_endianness, | ||
cmp, | ||
h.page_length, | ||
h.page_count, | ||
h.row_count, | ||
h.column_count, | ||
ci | ||
) | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,123 @@ | ||
using IteratorInterfaceExtensions, TableTraits, TableTraitsUtils | ||
|
||
import Base.size | ||
|
||
""" | ||
ResultSet is the primary object that represents data returned from | ||
reading a SAS data file. ResultSet implements the Base.Iteration | ||
interface as well as the IterableTables.jl interface. | ||
*Fields* | ||
- `columns`: a vector of columns, each being a vector itself | ||
- `names`: a vector of column symbols | ||
- `size`: a tuple (nrows, ncols) | ||
*Accessors* | ||
- `columns(::ResultSet)` | ||
- `names(::ResultSet)` | ||
- `size(::ResultSet)` | ||
- `size(::ResultSet, dim::Integer)` | ||
*Single Row/Column Indexing* | ||
- `rs[i]` returns a tuple for row `i` | ||
- `rs[:c]` returns a vector for column with symbol `c` | ||
*Multi Row/Column Indexing* | ||
- `rs[i:j]` returns a view of ResultSet with rows between `i` and `j` | ||
- `rs[c...]` returns a view of ResultSet with columns specified | ||
*Cell Indexing* | ||
- `rs[i,j]` returns a single value for row `i` column `j` | ||
- `rs[i,c]` returns a single value for row `i` column symbol `c` | ||
- Specific cell can be assigned with the above indexing methods | ||
""" | ||
struct ResultSet | ||
columns::AbstractVector{AbstractVector} | ||
names::AbstractVector{Symbol} | ||
size::NTuple{2, Int} | ||
end | ||
|
||
# exports | ||
export columns | ||
|
||
# accessors | ||
columns(rs::ResultSet) = rs.columns | ||
Base.names(rs::ResultSet) = rs.names | ||
Base.size(rs::ResultSet) = rs.size | ||
Base.size(rs::ResultSet, i::Integer) = rs.size[i] | ||
|
||
# Size displayed as a string | ||
sizestr(rs::ResultSet) = string(size(rs, 1)) * " rows x " * string(size(rs, 2)) * " columns" | ||
|
||
# find index for the column symbol | ||
function symindex(rs::ResultSet, s::Symbol) | ||
n = findfirst(x -> x == s, rs.names) | ||
n == 0 && error("column symbol not found: $s") | ||
n | ||
end | ||
|
||
# Direct cell access | ||
Base.getindex(rs::ResultSet, i::Integer, j::Integer) = rs.columns[j][i] | ||
Base.getindex(rs::ResultSet, i::Integer, s::Symbol) = rs.columns[symindex(rs, s)][i] | ||
Base.setindex!(rs::ResultSet, val, i::Integer, j::Integer) = rs.columns[j][i] = val | ||
Base.setindex!(rs::ResultSet, val, i::Integer, s::Symbol) = rs.columns[symindex(rs, s)][i] = val | ||
|
||
# Return a single row as a tuple | ||
Base.getindex(rs::ResultSet, i::Integer) = Tuple([c[i] for c in rs.columns]) | ||
|
||
# Return a single row as a tuple | ||
Base.getindex(rs::ResultSet, c::Symbol) = rs.columns[symindex(rs, c)] | ||
|
||
# index by row range => returns ResultSet object | ||
function Base.getindex(rs::ResultSet, r::UnitRange{Int}) | ||
ResultSet(map(x -> view(x, r), rs.columns), rs.names, (length(r), size(rs, 2))) | ||
end | ||
|
||
# index by columns => returns ResultSet object | ||
function Base.getindex(rs::ResultSet, ss::Symbol...) | ||
v = Int[] | ||
for (idx, nam) in enumerate(rs.names) | ||
nam in ss && push!(v, idx) | ||
end | ||
ResultSet(rs.columns[v], rs.names[v], (size(rs, 1), length(v))) | ||
end | ||
|
||
# Iterators | ||
Base.start(rs::ResultSet) = 1 | ||
Base.done(rs::ResultSet, i::Int) = i > size(rs, 1) | ||
Base.next(rs::ResultSet, i::Int) = (rs[i], i+1) | ||
|
||
# Display ResultSet object | ||
function Base.show(io::IO, rs::ResultSet) | ||
println(io, "ResultSet (", sizestr(rs), ")") | ||
max_rows = 5 | ||
max_cols = 10 | ||
n = min(size(rs, 1), max_rows) | ||
m = min(size(rs, 2), max_cols) | ||
print(io, "Columns ") | ||
for i in 1:m | ||
i > 1 && print(io, ", ") | ||
print(io, i, ":", rs.names[i]) | ||
end | ||
m < length(rs.names) && print(io, " …") | ||
println(io) | ||
for i in 1:n | ||
print(io, i, ": ") | ||
for j in 1:m | ||
j > 1 && print(", ") | ||
print(io, rs.columns[j][i]) | ||
end | ||
println(io) | ||
end | ||
n < size(rs, 1) && println(io, "⋮") | ||
end | ||
|
||
# IteratableTables | ||
IteratorInterfaceExtensions.isiterable(::ResultSet) = true | ||
|
||
TableTraits.isiterabletable(::ResultSet) = true | ||
|
||
function IteratorInterfaceExtensions.getiterator(rs::ResultSet) | ||
TableTraitsUtils.create_tableiterator(rs.columns, rs.names) | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.