Skip to content

Commit

Permalink
Removed redundant/unreachable code and added more tests.
Browse files Browse the repository at this point in the history
  • Loading branch information
rofinn committed Oct 9, 2020
1 parent af45737 commit 8213ce3
Show file tree
Hide file tree
Showing 15 changed files with 185 additions and 22 deletions.
2 changes: 1 addition & 1 deletion src/data.jl
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ function datasets()
end

# Return just the root path with the data dep path part removed
return [first(t)[length(dep)+1:end] for t in selected]
return [first(t)[length(dep)+2:end] for t in selected]
end

function dataset(name)
Expand Down
5 changes: 0 additions & 5 deletions src/deprecated.jl
Original file line number Diff line number Diff line change
Expand Up @@ -228,11 +228,6 @@ function impute!(data::AbstractMatrix{Union{T, Missing}}, imp::Union{DropObs, Dr
return data
end

function impute!(data::AbstractVector{Union{T, Missing}}, imp::Union{DropObs, DropVars}) where T
data = impute(data, imp)
return data
end

impute!(data, imp::Union{DropObs, DropVars}) = impute(data, imp)

@deprecate impute(data, C::Chain) run(data, C) false
Expand Down
3 changes: 2 additions & 1 deletion src/imputors/interp.jl
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ julia> impute(M, Interpolate(); dims=:rows)
"""
struct Interpolate <: Imputor end

function _impute!(data::AbstractArray{<:Union{T, Missing}}, imp::Interpolate) where T
function _impute!(data::AbstractVector{<:Union{T, Missing}}, imp::Interpolate) where T
@assert !all(ismissing, data)
i = findfirst(!ismissing, data) + 1

while i < lastindex(data)
Expand Down
8 changes: 2 additions & 6 deletions src/imputors/locf.jl
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,9 @@ julia> impute(M, LOCF(); dims=:rows)
struct LOCF <: Imputor end

function _impute!(data::AbstractVector{Union{T, Missing}}, imp::LOCF) where T
start_idx = findfirst(!ismissing, data)
if start_idx === nothing
@debug "Cannot carry forward points when all values are missing"
return data
end
@assert !all(ismissing, data)
start_idx = findfirst(!ismissing, data) + 1

start_idx += 1
for i in start_idx:lastindex(data)
if ismissing(data[i])
data[i] = data[i-1]
Expand Down
8 changes: 2 additions & 6 deletions src/imputors/nocb.jl
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,9 @@ julia> impute(M, NOCB(); dims=:rows)
struct NOCB <: Imputor end

function _impute!(data::AbstractVector{Union{T, Missing}}, imp::NOCB) where T
end_idx = findlast(!ismissing, data)
if end_idx === nothing
@debug "Cannot carry backward points when all values are missing"
return data
end
@assert !all(ismissing, data)
end_idx = findlast(!ismissing, data) - 1

end_idx -= 1
for i in end_idx:-1:firstindex(data)
if ismissing(data[i])
data[i] = data[i+1]
Expand Down
10 changes: 10 additions & 0 deletions test/assertions.jl
Original file line number Diff line number Diff line change
Expand Up @@ -58,4 +58,14 @@
@test_throws DimensionMismatch assert(a[1:10], t)
@test_throws DimensionMismatch assert(m[1:3, :], t; dims=:cols)
end

@testset "functional" begin
@test_throws AssertionError Impute.threshold(a; ratio=0.1)
@test_throws AssertionError a |> Impute.threshold(; ratio=0.1)

t = Threshold(; ratio=0.8)
# Use isequal because we expect the results to contain missings
@test isequal(Impute.threshold(a; ratio=0.8), a)
@test isequal(a |> Impute.threshold(; ratio=0.8), a)
end
end
15 changes: 15 additions & 0 deletions test/chain.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
@testset "Chaining and Piping" begin
# TODO: Add tests at each section to double check that orig hasn't been overwritten.
orig = Impute.dataset("test/table/neuro") |> DataFrame

@testset "DataFrame" begin
Expand Down Expand Up @@ -122,4 +123,18 @@
# Confirm that we don't have any more missing values
@test all(!ismissing, result)
end

@testset "Multi-type" begin
data = Impute.dataset("test/table/neuro") |> Tables.matrix
@test any(ismissing, data)
# Filter out colunns with more than 400 missing values, Fill with 0, and check that
# everything was replaced
C = Impute.Filter(c -> count(ismissing, c) < 400) Impute.Replace(; values=0.0) Impute.Threshold()

result = Impute.run(data, C; dims=:cols)
@test size(result, 1) == size(data, 1)
# We should have filtered out 1 column
@test size(result, 2) < size(data, 2)
@test all(!ismissing, result)
end
end
12 changes: 12 additions & 0 deletions test/data.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
@testset "data" begin
datasets = Impute.datasets()

@testset "Impute.dataset($name)" for name in datasets
result = Impute.dataset(name)
if occursin("matrix", name)
@test isa(result, AbstractDict)
elseif occursin("table", name)
@test isa(result, CSV.File)
end
end
end
20 changes: 20 additions & 0 deletions test/filter.jl
Original file line number Diff line number Diff line change
Expand Up @@ -160,4 +160,24 @@
@test isequal(collect(result'), Impute.filter(collect(aa'); dims=:rows))
end
end

@testset "functional" begin
expected = deleteat!(deepcopy(a), [2, 3, 7])

@test a |> Impute.filter() == expected
@test a |> Impute.filter(!ismissing) == expected
@test Impute.filter(!ismissing, a) == expected

b = deepcopy(a)
@test b |> Impute.filter!() == expected
@test b == expected

b = deepcopy(a)
@test b |> Impute.filter!(!ismissing) == expected
@test b == expected

b = deepcopy(a)
@test Impute.filter!(!ismissing, b) == expected
@test b == expected
end
end
24 changes: 23 additions & 1 deletion test/imputors/interp.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,28 @@
@testset "Interpolate" begin
@testset "Default" begin
test_all(ImputorTester(Interpolate))
tester = ImputorTester(Interpolate)

test_hashing(tester)
test_equality(tester)
test_vector(tester)
test_matrix(tester)
# test_cube(tester)
test_dataframe(tester)
test_groupby(tester)
test_axisarray(tester)
test_nameddimsarray(tester)
test_keyedarray(tester)
test_columntable(tester)
test_rowtable(tester)

@testset "Cube" begin
a = allowmissing(1.0:1.0:60.0)
a[[2, 7, 18, 23, 34, 41, 55, 59, 60]] .= missing
C = collect(reshape(a, 5, 4, 3))

# Cube tests are expected to fail
@test_throws MethodError impute(C, tester.imp(; tester.kwargs...); dims=3)
end
end

@testset "Floats" begin
Expand Down
24 changes: 23 additions & 1 deletion test/imputors/locf.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,28 @@
@testset "LOCF" begin
@testset "Default" begin
test_all(ImputorTester(LOCF))
tester = ImputorTester(LOCF)

test_hashing(tester)
test_equality(tester)
test_vector(tester)
test_matrix(tester)
# test_cube(tester)
test_dataframe(tester)
test_groupby(tester)
test_axisarray(tester)
test_nameddimsarray(tester)
test_keyedarray(tester)
test_columntable(tester)
test_rowtable(tester)

@testset "Cube" begin
a = allowmissing(1.0:1.0:60.0)
a[[2, 7, 18, 23, 34, 41, 55, 59, 60]] .= missing
C = collect(reshape(a, 5, 4, 3))

# Cube tests are expected to fail
@test_throws MethodError impute(C, tester.imp(; tester.kwargs...); dims=3)
end
end

@testset "Floats" begin
Expand Down
24 changes: 23 additions & 1 deletion test/imputors/nocb.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,28 @@
@testset "NOCB" begin
@testset "Default" begin
test_all(ImputorTester(NOCB))
tester = ImputorTester(NOCB)

test_hashing(tester)
test_equality(tester)
test_vector(tester)
test_matrix(tester)
# test_cube(tester)
test_dataframe(tester)
test_groupby(tester)
test_axisarray(tester)
test_nameddimsarray(tester)
test_keyedarray(tester)
test_columntable(tester)
test_rowtable(tester)

@testset "Cube" begin
a = allowmissing(1.0:1.0:60.0)
a[[2, 7, 18, 23, 34, 41, 55, 59, 60]] .= missing
C = collect(reshape(a, 5, 4, 3))

# Cube tests are expected to fail
@test_throws MethodError impute(C, tester.imp(; tester.kwargs...); dims=3)
end
end

@testset "Floats" begin
Expand Down
2 changes: 2 additions & 0 deletions test/imputors/svd.jl
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@
# Test having only missing data
c = missings(5, 2)
@test isequal(impute(c, tester.imp(; tester.kwargs...); dims=:cols), c)
c_ = tester.f!(deepcopy(c); dims=:cols)
@test isequal(c_, c)
end
end
# Internal `svd` call isn't supported by these type, but maybe they should be?
Expand Down
2 changes: 2 additions & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
using AxisArrays
using AxisKeys
using CSV
using Combinatorics
using DataFrames
using Dates
Expand Down Expand Up @@ -46,6 +47,7 @@ using Impute:

include("assertions.jl")
include("chain.jl")
include("data.jl")
include("deprecated.jl")
include("filter.jl")
include("imputors/interp.jl")
Expand Down
48 changes: 48 additions & 0 deletions test/testutils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ function test_all(tester::ImputorTester)
test_equality(tester)
test_vector(tester)
test_matrix(tester)
test_cube(tester)
test_dataframe(tester)
test_groupby(tester)
test_axisarray(tester)
Expand Down Expand Up @@ -168,6 +169,53 @@ function test_matrix(tester::ImputorTester)
# Test having only missing data
c = missings(5, 2)
@test isequal(impute(c, tester.imp(; tester.kwargs...); dims=:cols), c)
c_ = impute!(deepcopy(c), tester.imp(; tester.kwargs...); dims=:cols)
@test isequal(c_, c)
end
end
end

function test_cube(tester::ImputorTester)
@testset "Cube" begin
a = allowmissing(1.0:1.0:60.0)
a[[2, 7, 18, 23, 34, 41, 55, 59, 60]] .= missing
C = collect(reshape(a, 5, 4, 3))

result = impute(C, tester.imp(; tester.kwargs...); dims=3)

@testset "Base" begin
# Test that we have fewer missing values
@test count(ismissing, result) < count(ismissing, C)
@test isa(result, Array{Union{Float64, Missing}, 3})
@test eltype(result) <: eltype(C)

# Test that functional form behaves the same way
@test result == tester.f(C; dims=3, tester.kwargs...)
end

@testset "In-place" begin
# Test that the in-place function return the new results and logs whether it
# successfully did it in-place
C2 = deepcopy(C)
C2_ = tester.f!(C2; dims=3, tester.kwargs...)
@test C2_ == result
if C2 != result
@warn "$(tester.f!) did not mutate input data of type Matrix"
end
end

@testset "No missing" begin
# Test having no missing data
B = collect(reshape(allowmissing(1.0:1.0:60.0), 5, 4, 3))
@test impute(B, tester.imp(; tester.kwargs...); dims=3) == B
end

@testset "All missing" begin
# Test having only missing data
M = missings(5, 4, 3)
@test isequal(impute(M, tester.imp(; tester.kwargs...); dims=3), M)
M_ = impute!(deepcopy(M), tester.imp(; tester.kwargs...); dims=3)
@test isequal(M_, M)
end
end
end
Expand Down

0 comments on commit 8213ce3

Please sign in to comment.