From 8213ce36801995ce2b840a1c1d44e0f578ee211f Mon Sep 17 00:00:00 2001 From: rofinn Date: Thu, 8 Oct 2020 19:54:07 -0500 Subject: [PATCH] Removed redundant/unreachable code and added more tests. --- src/data.jl | 2 +- src/deprecated.jl | 5 ----- src/imputors/interp.jl | 3 ++- src/imputors/locf.jl | 8 ++----- src/imputors/nocb.jl | 8 ++----- test/assertions.jl | 10 +++++++++ test/chain.jl | 15 +++++++++++++ test/data.jl | 12 +++++++++++ test/filter.jl | 20 +++++++++++++++++ test/imputors/interp.jl | 24 ++++++++++++++++++++- test/imputors/locf.jl | 24 ++++++++++++++++++++- test/imputors/nocb.jl | 24 ++++++++++++++++++++- test/imputors/svd.jl | 2 ++ test/runtests.jl | 2 ++ test/testutils.jl | 48 +++++++++++++++++++++++++++++++++++++++++ 15 files changed, 185 insertions(+), 22 deletions(-) create mode 100644 test/data.jl diff --git a/src/data.jl b/src/data.jl index 1168b5b..34a94a3 100644 --- a/src/data.jl +++ b/src/data.jl @@ -19,7 +19,7 @@ function datasets() end # Return just the root path with the data dep path part removed - return [first(t)[length(dep)+1:end] for t in selected] + return [first(t)[length(dep)+2:end] for t in selected] end function dataset(name) diff --git a/src/deprecated.jl b/src/deprecated.jl index 70cf946..343f231 100644 --- a/src/deprecated.jl +++ b/src/deprecated.jl @@ -228,11 +228,6 @@ function impute!(data::AbstractMatrix{Union{T, Missing}}, imp::Union{DropObs, Dr return data end -function impute!(data::AbstractVector{Union{T, Missing}}, imp::Union{DropObs, DropVars}) where T - data = impute(data, imp) - return data -end - impute!(data, imp::Union{DropObs, DropVars}) = impute(data, imp) @deprecate impute(data, C::Chain) run(data, C) false diff --git a/src/imputors/interp.jl b/src/imputors/interp.jl index 1d896aa..ae8854a 100644 --- a/src/imputors/interp.jl +++ b/src/imputors/interp.jl @@ -26,7 +26,8 @@ julia> impute(M, Interpolate(); dims=:rows) """ struct Interpolate <: Imputor end -function _impute!(data::AbstractArray{<:Union{T, Missing}}, imp::Interpolate) where T +function _impute!(data::AbstractVector{<:Union{T, Missing}}, imp::Interpolate) where T + @assert !all(ismissing, data) i = findfirst(!ismissing, data) + 1 while i < lastindex(data) diff --git a/src/imputors/locf.jl b/src/imputors/locf.jl index 33b9710..3ba07df 100644 --- a/src/imputors/locf.jl +++ b/src/imputors/locf.jl @@ -30,13 +30,9 @@ julia> impute(M, LOCF(); dims=:rows) struct LOCF <: Imputor end function _impute!(data::AbstractVector{Union{T, Missing}}, imp::LOCF) where T - start_idx = findfirst(!ismissing, data) - if start_idx === nothing - @debug "Cannot carry forward points when all values are missing" - return data - end + @assert !all(ismissing, data) + start_idx = findfirst(!ismissing, data) + 1 - start_idx += 1 for i in start_idx:lastindex(data) if ismissing(data[i]) data[i] = data[i-1] diff --git a/src/imputors/nocb.jl b/src/imputors/nocb.jl index ab9b3f9..5225cab 100644 --- a/src/imputors/nocb.jl +++ b/src/imputors/nocb.jl @@ -31,13 +31,9 @@ julia> impute(M, NOCB(); dims=:rows) struct NOCB <: Imputor end function _impute!(data::AbstractVector{Union{T, Missing}}, imp::NOCB) where T - end_idx = findlast(!ismissing, data) - if end_idx === nothing - @debug "Cannot carry backward points when all values are missing" - return data - end + @assert !all(ismissing, data) + end_idx = findlast(!ismissing, data) - 1 - end_idx -= 1 for i in end_idx:-1:firstindex(data) if ismissing(data[i]) data[i] = data[i+1] diff --git a/test/assertions.jl b/test/assertions.jl index 58c3ebb..6aa3d6f 100644 --- a/test/assertions.jl +++ b/test/assertions.jl @@ -58,4 +58,14 @@ @test_throws DimensionMismatch assert(a[1:10], t) @test_throws DimensionMismatch assert(m[1:3, :], t; dims=:cols) end + + @testset "functional" begin + @test_throws AssertionError Impute.threshold(a; ratio=0.1) + @test_throws AssertionError a |> Impute.threshold(; ratio=0.1) + + t = Threshold(; ratio=0.8) + # Use isequal because we expect the results to contain missings + @test isequal(Impute.threshold(a; ratio=0.8), a) + @test isequal(a |> Impute.threshold(; ratio=0.8), a) + end end diff --git a/test/chain.jl b/test/chain.jl index a7854e8..4cb5c98 100644 --- a/test/chain.jl +++ b/test/chain.jl @@ -1,4 +1,5 @@ @testset "Chaining and Piping" begin + # TODO: Add tests at each section to double check that orig hasn't been overwritten. orig = Impute.dataset("test/table/neuro") |> DataFrame @testset "DataFrame" begin @@ -122,4 +123,18 @@ # Confirm that we don't have any more missing values @test all(!ismissing, result) end + + @testset "Multi-type" begin + data = Impute.dataset("test/table/neuro") |> Tables.matrix + @test any(ismissing, data) + # Filter out colunns with more than 400 missing values, Fill with 0, and check that + # everything was replaced + C = Impute.Filter(c -> count(ismissing, c) < 400) ∘ Impute.Replace(; values=0.0) ∘ Impute.Threshold() + + result = Impute.run(data, C; dims=:cols) + @test size(result, 1) == size(data, 1) + # We should have filtered out 1 column + @test size(result, 2) < size(data, 2) + @test all(!ismissing, result) + end end diff --git a/test/data.jl b/test/data.jl new file mode 100644 index 0000000..c4fe220 --- /dev/null +++ b/test/data.jl @@ -0,0 +1,12 @@ +@testset "data" begin + datasets = Impute.datasets() + + @testset "Impute.dataset($name)" for name in datasets + result = Impute.dataset(name) + if occursin("matrix", name) + @test isa(result, AbstractDict) + elseif occursin("table", name) + @test isa(result, CSV.File) + end + end +end diff --git a/test/filter.jl b/test/filter.jl index 34b726b..ef93698 100644 --- a/test/filter.jl +++ b/test/filter.jl @@ -160,4 +160,24 @@ @test isequal(collect(result'), Impute.filter(collect(aa'); dims=:rows)) end end + + @testset "functional" begin + expected = deleteat!(deepcopy(a), [2, 3, 7]) + + @test a |> Impute.filter() == expected + @test a |> Impute.filter(!ismissing) == expected + @test Impute.filter(!ismissing, a) == expected + + b = deepcopy(a) + @test b |> Impute.filter!() == expected + @test b == expected + + b = deepcopy(a) + @test b |> Impute.filter!(!ismissing) == expected + @test b == expected + + b = deepcopy(a) + @test Impute.filter!(!ismissing, b) == expected + @test b == expected + end end diff --git a/test/imputors/interp.jl b/test/imputors/interp.jl index f2a1e9b..9feed28 100644 --- a/test/imputors/interp.jl +++ b/test/imputors/interp.jl @@ -1,6 +1,28 @@ @testset "Interpolate" begin @testset "Default" begin - test_all(ImputorTester(Interpolate)) + tester = ImputorTester(Interpolate) + + test_hashing(tester) + test_equality(tester) + test_vector(tester) + test_matrix(tester) + # test_cube(tester) + test_dataframe(tester) + test_groupby(tester) + test_axisarray(tester) + test_nameddimsarray(tester) + test_keyedarray(tester) + test_columntable(tester) + test_rowtable(tester) + + @testset "Cube" begin + a = allowmissing(1.0:1.0:60.0) + a[[2, 7, 18, 23, 34, 41, 55, 59, 60]] .= missing + C = collect(reshape(a, 5, 4, 3)) + + # Cube tests are expected to fail + @test_throws MethodError impute(C, tester.imp(; tester.kwargs...); dims=3) + end end @testset "Floats" begin diff --git a/test/imputors/locf.jl b/test/imputors/locf.jl index d9d61dc..a3a94dc 100644 --- a/test/imputors/locf.jl +++ b/test/imputors/locf.jl @@ -1,6 +1,28 @@ @testset "LOCF" begin @testset "Default" begin - test_all(ImputorTester(LOCF)) + tester = ImputorTester(LOCF) + + test_hashing(tester) + test_equality(tester) + test_vector(tester) + test_matrix(tester) + # test_cube(tester) + test_dataframe(tester) + test_groupby(tester) + test_axisarray(tester) + test_nameddimsarray(tester) + test_keyedarray(tester) + test_columntable(tester) + test_rowtable(tester) + + @testset "Cube" begin + a = allowmissing(1.0:1.0:60.0) + a[[2, 7, 18, 23, 34, 41, 55, 59, 60]] .= missing + C = collect(reshape(a, 5, 4, 3)) + + # Cube tests are expected to fail + @test_throws MethodError impute(C, tester.imp(; tester.kwargs...); dims=3) + end end @testset "Floats" begin diff --git a/test/imputors/nocb.jl b/test/imputors/nocb.jl index 6d6ac02..6e1ebe9 100644 --- a/test/imputors/nocb.jl +++ b/test/imputors/nocb.jl @@ -1,6 +1,28 @@ @testset "NOCB" begin @testset "Default" begin - test_all(ImputorTester(NOCB)) + tester = ImputorTester(NOCB) + + test_hashing(tester) + test_equality(tester) + test_vector(tester) + test_matrix(tester) + # test_cube(tester) + test_dataframe(tester) + test_groupby(tester) + test_axisarray(tester) + test_nameddimsarray(tester) + test_keyedarray(tester) + test_columntable(tester) + test_rowtable(tester) + + @testset "Cube" begin + a = allowmissing(1.0:1.0:60.0) + a[[2, 7, 18, 23, 34, 41, 55, 59, 60]] .= missing + C = collect(reshape(a, 5, 4, 3)) + + # Cube tests are expected to fail + @test_throws MethodError impute(C, tester.imp(; tester.kwargs...); dims=3) + end end @testset "Floats" begin diff --git a/test/imputors/svd.jl b/test/imputors/svd.jl index 1bf9aa3..0f29102 100644 --- a/test/imputors/svd.jl +++ b/test/imputors/svd.jl @@ -52,6 +52,8 @@ # Test having only missing data c = missings(5, 2) @test isequal(impute(c, tester.imp(; tester.kwargs...); dims=:cols), c) + c_ = tester.f!(deepcopy(c); dims=:cols) + @test isequal(c_, c) end end # Internal `svd` call isn't supported by these type, but maybe they should be? diff --git a/test/runtests.jl b/test/runtests.jl index 34f2ed8..d5652b7 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,5 +1,6 @@ using AxisArrays using AxisKeys +using CSV using Combinatorics using DataFrames using Dates @@ -46,6 +47,7 @@ using Impute: include("assertions.jl") include("chain.jl") + include("data.jl") include("deprecated.jl") include("filter.jl") include("imputors/interp.jl") diff --git a/test/testutils.jl b/test/testutils.jl index d382597..20e2ef6 100644 --- a/test/testutils.jl +++ b/test/testutils.jl @@ -54,6 +54,7 @@ function test_all(tester::ImputorTester) test_equality(tester) test_vector(tester) test_matrix(tester) + test_cube(tester) test_dataframe(tester) test_groupby(tester) test_axisarray(tester) @@ -168,6 +169,53 @@ function test_matrix(tester::ImputorTester) # Test having only missing data c = missings(5, 2) @test isequal(impute(c, tester.imp(; tester.kwargs...); dims=:cols), c) + c_ = impute!(deepcopy(c), tester.imp(; tester.kwargs...); dims=:cols) + @test isequal(c_, c) + end + end +end + +function test_cube(tester::ImputorTester) + @testset "Cube" begin + a = allowmissing(1.0:1.0:60.0) + a[[2, 7, 18, 23, 34, 41, 55, 59, 60]] .= missing + C = collect(reshape(a, 5, 4, 3)) + + result = impute(C, tester.imp(; tester.kwargs...); dims=3) + + @testset "Base" begin + # Test that we have fewer missing values + @test count(ismissing, result) < count(ismissing, C) + @test isa(result, Array{Union{Float64, Missing}, 3}) + @test eltype(result) <: eltype(C) + + # Test that functional form behaves the same way + @test result == tester.f(C; dims=3, tester.kwargs...) + end + + @testset "In-place" begin + # Test that the in-place function return the new results and logs whether it + # successfully did it in-place + C2 = deepcopy(C) + C2_ = tester.f!(C2; dims=3, tester.kwargs...) + @test C2_ == result + if C2 != result + @warn "$(tester.f!) did not mutate input data of type Matrix" + end + end + + @testset "No missing" begin + # Test having no missing data + B = collect(reshape(allowmissing(1.0:1.0:60.0), 5, 4, 3)) + @test impute(B, tester.imp(; tester.kwargs...); dims=3) == B + end + + @testset "All missing" begin + # Test having only missing data + M = missings(5, 4, 3) + @test isequal(impute(M, tester.imp(; tester.kwargs...); dims=3), M) + M_ = impute!(deepcopy(M), tester.imp(; tester.kwargs...); dims=3) + @test isequal(M_, M) end end end