Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[#23] - Feature/methods describer #31

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/SyntheticDatasets.jl
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ end

include("sklearn.jl")
include("matlab.jl")
include("descriptor.jl")

function convert(features::Array{T, 2}, labels::Array{D, 1})::DataFrame where {T <: Number, D <: Number}
df = DataFrame()
Expand Down
100 changes: 100 additions & 0 deletions src/descriptor.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
mutable struct MethodDescriber
name::String
description::Union{String, Nothing}
problem_type::Union{Symbol, Nothing}
f::Union{Function, Nothing}

MethodDescriber() = new()

end

function MethodDescriber( name::String;
description = nothing,
problem_type = nothing,
f = nothing)

method = MethodDescriber()

method.name = name
method.description = description
method.problem_type = problem_type
method.f = f
return method
end

function Base.show(io::IO, method::MethodDescriber)
println(io, "$(method.name)")
method.problem_type !== nothing && println(io, "problem type: " * string(method.problem_type))
method.description !== nothing && println(io, "Description:\n" * method.description)
end

mutable struct MethodDescriberSet
describers::Array{MethodDescriber, 1}
end

MethodDescriberSet(args...) = MethodDescriberSet([args...])

function methodsFilter(methods::MethodDescriberSet, parameters::Union{Pair, Array{Pair}})
if !(parameters isa Array)
parameters = [parameters]
end

filtered_methods = Set()

for parameter in parameters
if !(parameter[1] in fieldnames(MethodDescriber))
filipebraida marked this conversation as resolved.
Show resolved Hide resolved
@warn "$(parameter[1]) isn't a property of MethodDescriber"
continue
end
for method in methods.describers
property = getfield(method, parameter[1])
if property == parameter[2]
push!(filtered_methods, method)
end
end
end

return MethodDescriberSet(collect(filtered_methods))
end

methodsFilter(parameters::Union{Pair, Array{Pair}}) = methodsFilter(METHODS, parameters)
Conradox marked this conversation as resolved.
Show resolved Hide resolved

function Base.show(io::IO, methods::MethodDescriberSet)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

eu percebi que está imprimindo com vários \n no final.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isso deve ser por causa dos println. Mas fiz isso pra separar em linhas distintas as características.

for method in methods.describers
print(io, method)
end
end

const METHODS = MethodDescriberSet(
MethodDescriber(
"generate_blobs",
problem_type = :Classification,
description = """
Generate isotropic Gaussian blobs for clustering. Sklearn interface to make_blobs.
""",
f = generate_blobs,),
MethodDescriber(
"generate_moons",
problem_type = :Classification,
description = """
Generate isotropic Gaussian blobs for clustering. Sklearn interface to make_blobs.
""",
f = generate_moons,),
MethodDescriber(
"make_s_curve",
problem_type = :Regression,
description = """
Generate an S curve dataset. Sklearn interface to make_s_curve.
""",
f = generate_s_curve,),
MethodDescriber(
"generate_regression",
problem_type = :Regression,
description = """
Generate a random regression problem. Sklearn interface to make_regression.
""",
f = generate_regression,)

)

methods() = println(METHODS)
Conradox marked this conversation as resolved.
Show resolved Hide resolved
81 changes: 81 additions & 0 deletions test/describer.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
using SyntheticDatasets
using DataFrames
using Test

@testset "SkLearn Generators" begin
samples = 20000
features = 20

data = SyntheticDatasets.generate_blobs(centers = [-1 1;-0.5 0.75],
cluster_std = 0.225,
n_samples = 20000,
center_box = (-1.5, 1.5))

@test size(data)[1] == samples
@test size(data)[2] == 3

samples = 20000
data = SyntheticDatasets.generate_moons(n_samples = 20000)

@test size(data)[1] == samples
@test size(data)[2] == 3

data = SyntheticDatasets.generate_s_curve(n_samples = samples,
noise = 2.2,
random_state = 5)

@test size(data)[1] == samples
@test size(data)[2] == 4

data = SyntheticDatasets.generate_circles(n_samples = samples)

@test size(data)[1] == samples
@test size(data)[2] == 3

data = SyntheticDatasets.generate_regression(n_samples = samples,
n_features = features,
noise = 2.2,
random_state = 5)

@test size(data)[1] == samples
@test size(data)[2] == features + 1

data = SyntheticDatasets.generate_classification(n_samples = samples,
n_features = features,
n_classes = 1)

@test size(data)[1] == samples
@test size(data)[2] == features + 1

data = SyntheticDatasets.generate_friedman1(n_samples = samples,
n_features = features)

@test size(data)[1] == samples
@test size(data)[2] == features + 1

data = SyntheticDatasets.generate_friedman2(n_samples = samples)

@test size(data)[1] == samples
@test size(data)[2] == 5

data = SyntheticDatasets.generate_friedman3(n_samples = samples)

@test size(data)[1] == samples
@test size(data)[2] == 5

data = SyntheticDatasets.generate_low_rank_matrix(n_samples = samples,
n_features = features,
effective_rank = 10,
tail_strength = 0.5,
random_state = 5)

@test size(data)[1] == samples
@test size(data)[2] == features

data = SyntheticDatasets.generate_swiss_roll(n_samples =samples,
noise = 2.2,
random_state = 5)

@test size(data)[1] == samples
@test size(data)[2] == 4
end