From 2b9e47ce960c43701a250cf1acbfa79daad71faf Mon Sep 17 00:00:00 2001 From: Tom Kwong Date: Thu, 22 Feb 2018 22:30:31 -0800 Subject: [PATCH] ResultSet/Metadata changes initial commit for beta testing (re: #29, #30, #31) --- REQUIRE | 3 + src/Metadata.jl | 32 ++++++++++ src/ResultSet.jl | 123 ++++++++++++++++++++++++++++++++++++ src/SASLib.jl | 160 ++++------------------------------------------- src/Types.jl | 115 ++++++++++++++++++++++++++++++++++ test/runtests.jl | 156 ++++++++++++++++++++++----------------------- 6 files changed, 358 insertions(+), 231 deletions(-) create mode 100644 src/Metadata.jl create mode 100644 src/ResultSet.jl create mode 100644 src/Types.jl diff --git a/REQUIRE b/REQUIRE index acb36e4..ca30f03 100644 --- a/REQUIRE +++ b/REQUIRE @@ -2,3 +2,6 @@ julia 0.6 StringEncodings Missings Compat +IteratorInterfaceExtensions +TableTraits +TableTraitsUtils diff --git a/src/Metadata.jl b/src/Metadata.jl new file mode 100644 index 0000000..488a12d --- /dev/null +++ b/src/Metadata.jl @@ -0,0 +1,32 @@ +export metadata + +struct Metadata + filename::AbstractString + encoding::AbstractString # e.g. "ISO8859-1" + endianness::Symbol # :LittleEndian, :BigEndian + compression::Symbol # :RDC, :RLE + pagesize::Int + npages::Int + nrows::Int + ncols::Int + columnsinfo::Vector{Pair{Symbol, DataType}} # Float64 or String +end + +function metadata(h::Handler) + ci = [Pair(h.column_symbols[i], + h.column_types[i] == column_type_decimal ? Float64 : String) + for i in 1:h.column_count] + cmp = ifelse(h.compression == compression_method_rle, :RLE, + ifelse(h.compression == compression_method_rdc, :RDC, :none)) + Metadata( + h.config.filename, + h.file_encoding, + h.file_endianness, + cmp, + h.page_length, + h.page_count, + h.row_count, + h.column_count, + ci + ) +end \ No newline at end of file diff --git a/src/ResultSet.jl b/src/ResultSet.jl new file mode 100644 index 0000000..33e2e8e --- /dev/null +++ b/src/ResultSet.jl @@ -0,0 +1,123 @@ +using IteratorInterfaceExtensions, TableTraits, TableTraitsUtils + +import Base.size + +""" +ResultSet is the primary object that represents data returned from +reading a SAS data file. ResultSet implements the Base.Iteration +interface as well as the IterableTables.jl interface. + +*Fields* +- `columns`: a vector of columns, each being a vector itself +- `names`: a vector of column symbols +- `size`: a tuple (nrows, ncols) + +*Accessors* +- `columns(::ResultSet)` +- `names(::ResultSet)` +- `size(::ResultSet)` +- `size(::ResultSet, dim::Integer)` + +*Single Row/Column Indexing* +- `rs[i]` returns a tuple for row `i` +- `rs[:c]` returns a vector for column with symbol `c` + +*Multi Row/Column Indexing* +- `rs[i:j]` returns a view of ResultSet with rows between `i` and `j` +- `rs[c...]` returns a view of ResultSet with columns specified + +*Cell Indexing* +- `rs[i,j]` returns a single value for row `i` column `j` +- `rs[i,c]` returns a single value for row `i` column symbol `c` +- Specific cell can be assigned with the above indexing methods + +""" +struct ResultSet + columns::AbstractVector{AbstractVector} + names::AbstractVector{Symbol} + size::NTuple{2, Int} +end + +# exports +export columns + +# accessors +columns(rs::ResultSet) = rs.columns +Base.names(rs::ResultSet) = rs.names +Base.size(rs::ResultSet) = rs.size +Base.size(rs::ResultSet, i::Integer) = rs.size[i] + +# Size displayed as a string +sizestr(rs::ResultSet) = string(size(rs, 1)) * " rows x " * string(size(rs, 2)) * " columns" + +# find index for the column symbol +function symindex(rs::ResultSet, s::Symbol) + n = findfirst(x -> x == s, rs.names) + n == 0 && error("column symbol not found: $s") + n +end + +# Direct cell access +Base.getindex(rs::ResultSet, i::Integer, j::Integer) = rs.columns[j][i] +Base.getindex(rs::ResultSet, i::Integer, s::Symbol) = rs.columns[symindex(rs, s)][i] +Base.setindex!(rs::ResultSet, val, i::Integer, j::Integer) = rs.columns[j][i] = val +Base.setindex!(rs::ResultSet, val, i::Integer, s::Symbol) = rs.columns[symindex(rs, s)][i] = val + +# Return a single row as a tuple +Base.getindex(rs::ResultSet, i::Integer) = Tuple([c[i] for c in rs.columns]) + +# Return a single row as a tuple +Base.getindex(rs::ResultSet, c::Symbol) = rs.columns[symindex(rs, c)] + +# index by row range => returns ResultSet object +function Base.getindex(rs::ResultSet, r::UnitRange{Int}) + ResultSet(map(x -> view(x, r), rs.columns), rs.names, (length(r), size(rs, 2))) +end + +# index by columns => returns ResultSet object +function Base.getindex(rs::ResultSet, ss::Symbol...) + v = Int[] + for (idx, nam) in enumerate(rs.names) + nam in ss && push!(v, idx) + end + ResultSet(rs.columns[v], rs.names[v], (size(rs, 1), length(v))) +end + +# Iterators +Base.start(rs::ResultSet) = 1 +Base.done(rs::ResultSet, i::Int) = i > size(rs, 1) +Base.next(rs::ResultSet, i::Int) = (rs[i], i+1) + +# Display ResultSet object +function Base.show(io::IO, rs::ResultSet) + println(io, "ResultSet (", sizestr(rs), ")") + max_rows = 5 + max_cols = 10 + n = min(size(rs, 1), max_rows) + m = min(size(rs, 2), max_cols) + print(io, "Columns ") + for i in 1:m + i > 1 && print(io, ", ") + print(io, i, ":", rs.names[i]) + end + m < length(rs.names) && print(io, " …") + println(io) + for i in 1:n + print(io, i, ": ") + for j in 1:m + j > 1 && print(", ") + print(io, rs.columns[j][i]) + end + println(io) + end + n < size(rs, 1) && println(io, "⋮") +end + +# IteratableTables +IteratorInterfaceExtensions.isiterable(::ResultSet) = true + +TableTraits.isiterabletable(::ResultSet) = true + +function IteratorInterfaceExtensions.getiterator(rs::ResultSet) + TableTraitsUtils.create_tableiterator(rs.columns, rs.names) +end diff --git a/src/SASLib.jl b/src/SASLib.jl index bef2891..1159288 100644 --- a/src/SASLib.jl +++ b/src/SASLib.jl @@ -11,121 +11,9 @@ import Base.show include("constants.jl") include("utils.jl") include("ObjectPool.jl") - -struct FileFormatError <: Exception - message::AbstractString -end - -struct ConfigError <: Exception - message::AbstractString -end - -struct ReaderConfig - filename::AbstractString - encoding::AbstractString - chunk_size::Int64 - convert_dates::Bool - include_columns::Vector - exclude_columns::Vector - string_array_fn::Dict{Symbol, Function} - number_array_fn::Dict{Symbol, Function} - verbose_level::Int64 -end - -struct Column - id::Int64 - name::AbstractString - label::Vector{UInt8} # really? - format::AbstractString - coltype::UInt8 - length::Int64 -end - -# technically these fields may have lower precision (need casting?) -struct SubHeaderPointer - offset::Int64 - length::Int64 - compression::Int64 - shtype::Int64 -end - -mutable struct Handler - io::IOStream - config::ReaderConfig - - compression::UInt8 - column_names_strings::Vector{Vector{UInt8}} - column_names::Vector{AbstractString} - column_symbols::Vector{Symbol} - column_types::Vector{UInt8} - column_formats::Vector{AbstractString} - columns::Vector{Column} - - # column indices being read/returned - # tuple of column index, column symbol, column type - column_indices::Vector{Tuple{Int64, Symbol, UInt8}} - - current_page_data_subheader_pointers::Vector{SubHeaderPointer} - cached_page::Vector{UInt8} - column_data_lengths::Vector{Int64} - column_data_offsets::Vector{Int64} - current_row_in_file_index::Int64 - current_row_in_page_index::Int64 - - file_endianness::Symbol - sys_endianness::Symbol - byte_swap::Bool - - U64::Bool - int_length::Int8 - page_bit_offset::Int8 - subheader_pointer_length::UInt8 - - file_encoding::AbstractString - platform::AbstractString - name::Union{AbstractString,Vector{UInt8}} - file_type::Union{AbstractString,Vector{UInt8}} - - date_created::DateTime - date_modified::DateTime - - header_length::Int64 - page_length::Int64 - page_count::Int64 - sas_release::Union{AbstractString,Vector{UInt8}} - server_type::Union{AbstractString,Vector{UInt8}} - os_version::Union{AbstractString,Vector{UInt8}} - os_name::Union{AbstractString,Vector{UInt8}} - - row_length::Int64 - row_count::Int64 - col_count_p1::Int64 - col_count_p2::Int64 - mix_page_row_count::Int64 - lcs::Int64 - lcp::Int64 - - current_page_type::Int64 - current_page_block_count::Int64 # number of records in current page - current_page_subheaders_count::Int64 - column_count::Int64 - # creator_proc::Union{Void, Vector{UInt8}} - - byte_chunk::Dict{Symbol, Vector{UInt8}} - string_chunk::Dict{Symbol, AbstractArray{String,1}} - current_row_in_chunk_index::Int64 - - current_page::Int64 - vendor::UInt8 - use_base_transcoder::Bool - - string_decoder_buffer::IOBuffer - string_decoder::StringDecoder - - Handler(config::ReaderConfig) = new( - Base.open(config.filename), - config) -end +include("Types.jl") +include("ResultSet.jl") +include("Metadata.jl") function _open(config::ReaderConfig) # println("Opening $(config.filename)") @@ -181,7 +69,7 @@ function read(handler::Handler, nrows=0) # println("Reading $(handler.config.filename)") elapsed = @elapsed result = read_chunk(handler, nrows) elapsed = round(elapsed, 5) - println1(handler, "Read $(handler.config.filename) with size $(result[:nrows]) x $(result[:ncols]) in $elapsed seconds") + println1(handler, "Read $(handler.config.filename) with size $(size(result, 1)) x $(size(result, 2)) in $elapsed seconds") return result end @@ -986,42 +874,16 @@ function read_chunk(handler, nrows=0) handler.current_row_in_chunk_index = 0 perf_read_data = @elapsed read_data(handler, nrows) - perf_chunk_to_data_frame = @elapsed rslt = _chunk_to_dataframe(handler, nrows) - # here column symbols contains only ones for columns that are actually read + if handler.config.verbose_level > 1 + println("Read data in ", perf_read_data, " msec") + println("Converted data in ", perf_chunk_to_data_frame, " msec") + end + column_symbols = [sym for (k, sym, ty) in handler.column_indices] - column_names = String.(column_symbols) - column_types = [eltype(typeof(rslt[sym])) for (k, sym, ty) in handler.column_indices] - column_info = [( - k, - sym, - ty == column_type_string ? :String : :Number, - eltype(typeof(rslt[sym])), - typeof(rslt[sym]) - ) for (k, sym, ty) in handler.column_indices] - - return Dict( - :data => rslt, - :nrows => nrows, - :ncols => length(column_symbols), - :filename => handler.config.filename, - :page_count => handler.current_page, - :page_length => Int64(handler.page_length), - :file_encoding => handler.file_encoding, - :file_endianness => handler.file_endianness, - :system_endianness => handler.sys_endianness, - :column_offsets => handler.column_data_offsets, - :column_lengths => handler.column_data_lengths, - :column_types => column_types, - :column_symbols => column_symbols, - :column_names => column_names, - :column_info => column_info, - :compression => compressionstring(handler), - :perf_read_data => perf_read_data, - :perf_type_conversion => perf_chunk_to_data_frame, - :process_id => myid() - ) + return ResultSet([rslt[s] for s in column_symbols], column_symbols, + (nrows, length(column_symbols))) end # not extremely efficient but is a safe way to do it diff --git a/src/Types.jl b/src/Types.jl new file mode 100644 index 0000000..1c3452f --- /dev/null +++ b/src/Types.jl @@ -0,0 +1,115 @@ +struct FileFormatError <: Exception + message::AbstractString +end + +struct ConfigError <: Exception + message::AbstractString +end + +struct ReaderConfig + filename::AbstractString + encoding::AbstractString + chunk_size::Int64 + convert_dates::Bool + include_columns::Vector + exclude_columns::Vector + string_array_fn::Dict{Symbol, Function} + number_array_fn::Dict{Symbol, Function} + verbose_level::Int64 +end + +struct Column + id::Int64 + name::AbstractString + label::Vector{UInt8} # really? + format::AbstractString + coltype::UInt8 + length::Int64 +end + +# technically these fields may have lower precision (need casting?) +struct SubHeaderPointer + offset::Int64 + length::Int64 + compression::Int64 + shtype::Int64 +end + +mutable struct Handler + io::IOStream + config::ReaderConfig + + compression::UInt8 + column_names_strings::Vector{Vector{UInt8}} + column_names::Vector{AbstractString} + column_symbols::Vector{Symbol} + column_types::Vector{UInt8} + column_formats::Vector{AbstractString} + columns::Vector{Column} + + # column indices being read/returned + # tuple of column index, column symbol, column type + column_indices::Vector{Tuple{Int64, Symbol, UInt8}} + + current_page_data_subheader_pointers::Vector{SubHeaderPointer} + cached_page::Vector{UInt8} + column_data_lengths::Vector{Int64} + column_data_offsets::Vector{Int64} + current_row_in_file_index::Int64 + current_row_in_page_index::Int64 + + file_endianness::Symbol + sys_endianness::Symbol + byte_swap::Bool + + U64::Bool + int_length::Int8 + page_bit_offset::Int8 + subheader_pointer_length::UInt8 + + file_encoding::AbstractString + platform::AbstractString + name::Union{AbstractString,Vector{UInt8}} + file_type::Union{AbstractString,Vector{UInt8}} + + date_created::DateTime + date_modified::DateTime + + header_length::Int64 + page_length::Int64 + page_count::Int64 + sas_release::Union{AbstractString,Vector{UInt8}} + server_type::Union{AbstractString,Vector{UInt8}} + os_version::Union{AbstractString,Vector{UInt8}} + os_name::Union{AbstractString,Vector{UInt8}} + + row_length::Int64 + row_count::Int64 + col_count_p1::Int64 + col_count_p2::Int64 + mix_page_row_count::Int64 + lcs::Int64 + lcp::Int64 + + current_page_type::Int64 + current_page_block_count::Int64 # number of records in current page + current_page_subheaders_count::Int64 + column_count::Int64 + # creator_proc::Union{Void, Vector{UInt8}} + + byte_chunk::Dict{Symbol, Vector{UInt8}} + string_chunk::Dict{Symbol, AbstractVector{String}} + current_row_in_chunk_index::Int64 + + current_page::Int64 + vendor::UInt8 + use_base_transcoder::Bool + + string_decoder_buffer::IOBuffer + string_decoder::StringDecoder + + Handler(config::ReaderConfig) = new( + Base.open(config.filename), + config) +end + diff --git a/test/runtests.jl b/test/runtests.jl index 8aad1e9..35c4b29 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -60,7 +60,7 @@ openfile(dir, file; kwargs...) = SASLib.open(getpath(dir, file), kwargs...) Base.Filesystem.readdir("$dir")) for f in files result = readfile(dir, f) - @test (result[:nrows], result[:ncols]) == (10, 100) + @test size(result) == (10, 100) end end @@ -68,56 +68,54 @@ openfile(dir, file; kwargs...) = SASLib.open(getpath(dir, file), kwargs...) handler = openfile("data_pandas", "test1.sas7bdat") @test handler.config.filename == "data_pandas/test1.sas7bdat" result = SASLib.read(handler, 3) # read 3 rows - @test result[:nrows] == 3 + @test size(result, 1) == 3 result = SASLib.read(handler, 4) # read 4 rows - @test result[:nrows] == 4 + @test size(result, 1) == 4 result = SASLib.read(handler, 5) # should read only 3 rows even though we ask for 5 - @test result[:nrows] == 3 + @test size(result, 1) == 3 end @testset "various data types" begin - result = readfile("data_pandas", "test1.sas7bdat") - df = result[:data] - @test sum(df[:Column1][1:5]) == 2.066 - @test count(isnan, df[:Column1]) == 1 - @test df[:Column98][1:3] == [ "apple", "dog", "pear" ] - @test df[:Column4][1:3] == [Date("1965-12-10"), Date("1977-03-07"), Date("1983-08-15")] + rs = readfile("data_pandas", "test1.sas7bdat") + @test sum(rs[:Column1][1:5]) == 2.066 + @test count(isnan, rs[:Column1]) == 1 + @test rs[:Column98][1:3] == [ "apple", "dog", "pear" ] + @test rs[:Column4][1:3] == [Date("1965-12-10"), Date("1977-03-07"), Date("1983-08-15")] end @testset "datetime with missing values" begin - result = readfile("data_pandas", "datetime.sas7bdat") - df = result[:data] - @test (result[:nrows], result[:ncols]) == (5, 4) - @test result[:data][:mtg][1] == Date(2017, 11, 24) - @test result[:data][:dt][5] == DateTime(2018, 3, 31, 14, 20, 33) - @test count(ismissing, result[:data][:mtg]) == 1 - @test count(ismissing, result[:data][:dt]) == 3 + rs = readfile("data_pandas", "datetime.sas7bdat") + @test size(rs) == (5, 4) + @test rs[:mtg][1] == Date(2017, 11, 24) + @test rs[:dt][5] == DateTime(2018, 3, 31, 14, 20, 33) + @test count(ismissing, rs[:mtg]) == 1 + @test count(ismissing, rs[:dt]) == 3 end @testset "include/exclude columns" begin fname = getpath("data_pandas", "productsales.sas7bdat") - result = readsas(fname, include_columns=[:MONTH, :YEAR]) - @test result[:ncols] == 2 - @test sort(result[:column_symbols]) == sort([:MONTH, :YEAR]) + rs = readsas(fname, include_columns=[:MONTH, :YEAR]) + @test size(rs, 2) == 2 + @test sort(names(rs)) == sort([:MONTH, :YEAR]) - result = readsas(fname, include_columns=[1, 2, 7]) - @test result[:ncols] == 3 - @test sort(result[:column_symbols]) == sort([:ACTUAL, :PREDICT, :PRODUCT]) + rs = readsas(fname, include_columns=[1, 2, 7]) + @test size(rs, 2) == 3 + @test sort(names(rs)) == sort([:ACTUAL, :PREDICT, :PRODUCT]) - result = readsas(fname, exclude_columns=[:DIVISION]) - @test result[:ncols] == 9 - @test !(:DIVISION in result[:column_symbols]) + rs = readsas(fname, exclude_columns=[:DIVISION]) + @test size(rs, 2) == 9 + @test !(:DIVISION in names(rs)) - result = readsas(fname, exclude_columns=collect(2:10)) - @test result[:ncols] == 1 - @test sort(result[:column_symbols]) == sort([:ACTUAL]) + rs = readsas(fname, exclude_columns=collect(2:10)) + @test size(rs, 2) == 1 + @test sort(names(rs)) == sort([:ACTUAL]) # case insensitive include/exclude - result = readsas(fname, include_columns=[:month, :Year]) - @test result[:ncols] == 2 - result = readsas(fname, exclude_columns=[:diVisiON]) - @test result[:ncols] == 9 + rs = readsas(fname, include_columns=[:month, :Year]) + @test size(rs, 2) == 2 + rs = readsas(fname, exclude_columns=[:diVisiON]) + @test size(rs, 2) == 9 # test bad include/exclude param # see https://discourse.julialang.org/t/test-warn-doesnt-work-with-warn-in-0-7/9001 @@ -136,50 +134,44 @@ openfile(dir, file; kwargs...) = SASLib.open(getpath(dir, file), kwargs...) end @testset "misc" begin - result = readfile("data_pandas", "productsales.sas7bdat") - df = result[:data] - @test result[:ncols] == 10 - @test result[:nrows] == 1440 - @test result[:page_length] == 8192 - @test sum(df[:ACTUAL]) ≈ 730337.0 + rs = readfile("data_pandas", "productsales.sas7bdat") + @test size(rs) == (1440, 10) +# @test result[:page_length] == 8192 + @test sum(rs[:ACTUAL]) ≈ 730337.0 handler = openfile("data_AHS2013", "topical.sas7bdat") @test show(handler) == nothing end @testset "stat_transfer" begin - result = readfile("data_misc", "types.sas7bdat") - df = result[:data] - @test sum(df[:vbyte][1:2]) == 9 - @test sum(df[:vint][1:2]) == 9 - @test sum(df[:vlong][1:2]) == 9 - @test sum(df[:vfloat][1:2]) ≈ 10.14000010 - @test sum(df[:vdouble][1:2]) ≈ 10.14000000 + rs = readfile("data_misc", "types.sas7bdat") + @test sum(rs[:vbyte][1:2]) == 9 + @test sum(rs[:vint][1:2]) == 9 + @test sum(rs[:vlong][1:2]) == 9 + @test sum(rs[:vfloat][1:2]) ≈ 10.14000010 + @test sum(rs[:vdouble][1:2]) ≈ 10.14000000 end # topical.sas7bdat contains columns labels which should be ignored anywas @testset "AHS2013" begin handler = openfile("data_AHS2013", "topical.sas7bdat") - result = SASLib.read(handler, 1000) + rs = SASLib.read(handler, 1000) SASLib.close(handler) - df = result[:data] - @test result[:ncols] == 114 - @test result[:nrows] == 1000 - @test result[:page_count] == 10 - @test result[:page_length] == 16384 - @test result[:system_endianness] == :LittleEndian - @test count(x -> x == "B", df[:DPEVVEHIC]) == 648 - @test mean(filter(!isnan, df[:PTCOSTGAS])) ≈ 255.51543209876544 + @test size(rs) == (1000, 114) + # @test result[:page_count] == 10 + # @test result[:page_length] == 16384 + # @test result[:system_endianness] == :LittleEndian + @test count(x -> x == "B", rs[:DPEVVEHIC]) == 648 + @test mean(filter(!isnan, rs[:PTCOSTGAS])) ≈ 255.51543209876544 end @testset "file encodings" begin - result = readfile("data_reikoch", "extr.sas7bdat") - df = result[:data] - @test result[:file_encoding] == "CP932" - @test df[:AETXT][1] == "眠気" - - result = readfile("data_pandas", "test1.sas7bdat", encoding = "US-ASCII") - @test result[:file_encoding] == "US-ASCII" - @test result[:data][:Column42][3] == "dog" + rs = readfile("data_reikoch", "extr.sas7bdat") + # @test result[:file_encoding] == "CP932" + @test rs[:AETXT][1] == "眠気" + + rs = readfile("data_pandas", "test1.sas7bdat", encoding = "US-ASCII") + # @test result[:file_encoding] == "US-ASCII" + @test rs[:Column42][3] == "dog" end @testset "handler object" begin @@ -205,36 +197,36 @@ openfile(dir, file; kwargs...) = SASLib.open(getpath(dir, file), kwargs...) @testset "array constructors" begin - result = readsas("data_AHS2013/homimp.sas7bdat") - @test typeof(result[:data][:RAS]) == SASLib.ObjectPool{String,UInt16} + rs = readsas("data_AHS2013/homimp.sas7bdat") + @test typeof(rs[:RAS]) == SASLib.ObjectPool{String,UInt16} # string_array_fn test for specific string columns - result = readsas("data_AHS2013/homimp.sas7bdat", + rs = readsas("data_AHS2013/homimp.sas7bdat", string_array_fn = Dict(:RAS => REGULAR_STR_ARRAY)) - @test typeof(result[:data][:RAS]) == Array{String,1} - @test typeof(result[:data][:RAH]) != Array{String,1} + @test typeof(rs[:RAS]) == Array{String,1} + @test typeof(rs[:RAH]) != Array{String,1} # string_array_fn test for all string columns - result = readsas("data_AHS2013/homimp.sas7bdat", + rs = readsas("data_AHS2013/homimp.sas7bdat", string_array_fn = Dict(:_all_ => REGULAR_STR_ARRAY)) - @test typeof(result[:data][:RAS]) == Array{String,1} - @test typeof(result[:data][:RAH]) == Array{String,1} - @test typeof(result[:data][:JRAS]) == Array{String,1} - @test typeof(result[:data][:JRAD]) == Array{String,1} - @test typeof(result[:data][:CONTROL]) == Array{String,1} + @test typeof(rs[:RAS]) == Array{String,1} + @test typeof(rs[:RAH]) == Array{String,1} + @test typeof(rs[:JRAS]) == Array{String,1} + @test typeof(rs[:JRAD]) == Array{String,1} + @test typeof(rs[:CONTROL]) == Array{String,1} # number_array_fn test by column name makesharedarray(n) = SharedArray{Float64}(n) - result = readsas("data_misc/numeric_1000000_2.sas7bdat", + rs = readsas("data_misc/numeric_1000000_2.sas7bdat", number_array_fn = Dict(:f => makesharedarray)) - @test typeof(result[:data][:f]) == SharedArray{Float64,1} - @test typeof(result[:data][:x]) == Array{Float64,1} + @test typeof(rs[:f]) == SharedArray{Float64,1} + @test typeof(rs[:x]) == Array{Float64,1} # number_array_fn test for all numeric columns - result = readsas("data_misc/numeric_1000000_2.sas7bdat", + rs = readsas("data_misc/numeric_1000000_2.sas7bdat", number_array_fn = Dict(:_all_ => makesharedarray)) - @test typeof(result[:data][:f]) == SharedArray{Float64,1} - @test typeof(result[:data][:x]) == SharedArray{Float64,1} + @test typeof(rs[:f]) == SharedArray{Float64,1} + @test typeof(rs[:x]) == SharedArray{Float64,1} end @@ -243,8 +235,8 @@ openfile(dir, file; kwargs...) = SASLib.open(getpath(dir, file), kwargs...) for f in readdir(dir) if endswith(f, ".sas7bdat") && !(f in ["zero_variables.sas7bdat"]) - result = readfile(dir, f) - @test result[:nrows] > 0 + rs = readfile(dir, f) + @test size(rs, 1) > 0 end end end