From ae68249450674af623d59246c55a47627c321aa7 Mon Sep 17 00:00:00 2001 From: Dylan Asmar Date: Sat, 16 Dec 2023 21:46:46 -0700 Subject: [PATCH 01/10] removed duplicate ci --- .github/workflows/CI.yml | 16 --------- .github/workflows/ci.yml | 70 ---------------------------------------- 2 files changed, 86 deletions(-) delete mode 100644 .github/workflows/CI.yml delete mode 100644 .github/workflows/ci.yml diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml deleted file mode 100644 index af98974..0000000 --- a/.github/workflows/CI.yml +++ /dev/null @@ -1,16 +0,0 @@ -name: CI - -on: [push, pull_request] - -jobs: - test: - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v2 - - uses: julia-actions/setup-julia@v1 - with: - version: '1' - arch: x64 - - uses: julia-actions/julia-buildpkg@v1 - - uses: julia-actions/julia-runtest@v1 diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml deleted file mode 100644 index 31aa262..0000000 --- a/.github/workflows/ci.yml +++ /dev/null @@ -1,70 +0,0 @@ -name: CI -on: - pull_request: - branches: - - master - push: - branches: - - master - tags: '*' -jobs: - test: - name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} - runs-on: ${{ matrix.os }} - strategy: - fail-fast: false - matrix: - version: - - '1.3' # Replace this with the minimum Julia version that your package supports. E.g. if your package requires Julia 1.5 or higher, change this to '1.5'. - - '1' # Leave this line unchanged. '1' will automatically expand to the latest stable 1.x release of Julia. - - 'nightly' - os: - - ubuntu-latest - arch: - - x64 - steps: - - uses: actions/checkout@v2 - - uses: julia-actions/setup-julia@v1 - with: - version: ${{ matrix.version }} - arch: ${{ matrix.arch }} - - uses: actions/cache@v1 - env: - cache-name: cache-artifacts - with: - path: ~/.julia/artifacts - key: ${{ runner.os }}-test-${{ env.cache-name }}-${{ hashFiles('**/Project.toml') }} - restore-keys: | - ${{ runner.os }}-test-${{ env.cache-name }}- - ${{ runner.os }}-test- - ${{ runner.os }}- - - uses: julia-actions/julia-buildpkg@v1 - continue-on-error: ${{ matrix.version == 'nightly' }} - - uses: julia-actions/julia-runtest@v1 - continue-on-error: ${{ matrix.version == 'nightly' }} - - uses: julia-actions/julia-processcoverage@v1 - - uses: codecov/codecov-action@v1 - with: - file: lcov.info - # docs: - # name: Documentation - # runs-on: ubuntu-latest - # steps: - # - uses: actions/checkout@v2 - # - uses: julia-actions/setup-julia@v1 - # with: - # version: '1' - # - run: | - # julia --project=docs -e ' - # using Pkg - # Pkg.develop(PackageSpec(path=pwd())) - # Pkg.instantiate()' - # - run: | - # julia --project=docs -e ' - # using Documenter: doctest - # using DeepQLearning - # doctest(DeepQLearning)' # change MYPACKAGE to the name of your package - # - run: julia --project=docs docs/make.jl - # env: - # GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # DOCUMENTER_KEY: ${{ secrets.DOCUMENTER_KEY }} From cef586fb4cab935120da2033e9f40e95b191ace5 Mon Sep 17 00:00:00 2001 From: Dylan Asmar Date: Sat, 16 Dec 2023 21:47:01 -0700 Subject: [PATCH 02/10] clean up files --- .codecov.yml | 1 - appveyor.yml | 47 ----------------------------------------------- 2 files changed, 48 deletions(-) delete mode 100755 .codecov.yml delete mode 100755 appveyor.yml diff --git a/.codecov.yml b/.codecov.yml deleted file mode 100755 index 69cb760..0000000 --- a/.codecov.yml +++ /dev/null @@ -1 +0,0 @@ -comment: false diff --git a/appveyor.yml b/appveyor.yml deleted file mode 100755 index 8e5c7b9..0000000 --- a/appveyor.yml +++ /dev/null @@ -1,47 +0,0 @@ -environment: - matrix: - - JULIA_URL: "https://julialang-s3.julialang.org/bin/winnt/x86/0.6/julia-0.6-latest-win32.exe" - - JULIA_URL: "https://julialang-s3.julialang.org/bin/winnt/x64/0.6/julia-0.6-latest-win64.exe" - - JULIA_URL: "https://julialangnightlies-s3.julialang.org/bin/winnt/x86/julia-latest-win32.exe" - - JULIA_URL: "https://julialangnightlies-s3.julialang.org/bin/winnt/x64/julia-latest-win64.exe" - -## uncomment the following lines to allow failures on nightly julia -## (tests will run but not make your overall status red) -#matrix: -# allow_failures: -# - JULIA_URL: "https://julialangnightlies-s3.julialang.org/bin/winnt/x86/julia-latest-win32.exe" -# - JULIA_URL: "https://julialangnightlies-s3.julialang.org/bin/winnt/x64/julia-latest-win64.exe" - -branches: - only: - - master - - /release-.*/ - -notifications: - - provider: Email - on_build_success: false - on_build_failure: false - on_build_status_changed: false - -install: - - ps: "[System.Net.ServicePointManager]::SecurityProtocol = [System.Net.SecurityProtocolType]::Tls12" -# If there's a newer build queued for the same PR, cancel this one - - ps: if ($env:APPVEYOR_PULL_REQUEST_NUMBER -and $env:APPVEYOR_BUILD_NUMBER -ne ((Invoke-RestMethod ` - https://ci.appveyor.com/api/projects/$env:APPVEYOR_ACCOUNT_NAME/$env:APPVEYOR_PROJECT_SLUG/history?recordsNumber=50).builds | ` - Where-Object pullRequestId -eq $env:APPVEYOR_PULL_REQUEST_NUMBER)[0].buildNumber) { ` - throw "There are newer queued builds for this pull request, failing early." } -# Download most recent Julia Windows binary - - ps: (new-object net.webclient).DownloadFile( - $env:JULIA_URL, - "C:\projects\julia-binary.exe") -# Run installer silently, output to C:\projects\julia - - C:\projects\julia-binary.exe /S /D=C:\projects\julia - -build_script: -# Need to convert from shallow to complete for Pkg.clone to work - - IF EXIST .git\shallow (git fetch --unshallow) - - C:\projects\julia\bin\julia -e "versioninfo(); - Pkg.clone(pwd(), \"DeepQLearning\"); Pkg.build(\"DeepQLearning\")" - -test_script: - - C:\projects\julia\bin\julia -e "Pkg.test(\"DeepQLearning\")" From e597e5ed2834b6cdc671719893d4502414cd0c20 Mon Sep 17 00:00:00 2001 From: Dylan Asmar Date: Sat, 16 Dec 2023 21:47:16 -0700 Subject: [PATCH 03/10] added .vscode and Manifest.toml to .gitignore --- .gitignore | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 15990a0..1496a5e 100755 --- a/.gitignore +++ b/.gitignore @@ -4,4 +4,6 @@ *log**/ log* *.bson -events.out.tfevents* \ No newline at end of file +events.out.tfevents* +.vscode +Manifest.toml \ No newline at end of file From 94c0f2ed83d956c159cc69922966df3f2a018d58 Mon Sep 17 00:00:00 2001 From: Dylan Asmar Date: Sat, 16 Dec 2023 21:50:35 -0700 Subject: [PATCH 04/10] removed POMDPSimulators and updated compats --- Project.toml | 9 ++++----- test/flux_test.jl | 3 +-- test/runtests.jl | 1 - 3 files changed, 5 insertions(+), 8 deletions(-) diff --git a/Project.toml b/Project.toml index 381d6b1..0ccb098 100644 --- a/Project.toml +++ b/Project.toml @@ -21,21 +21,20 @@ TensorBoardLogger = "899adc3e-224a-11e9-021f-63837185c80f" [compat] BSON = "0.2, 0.3" CommonRLInterface = "0.2, 0.3" -EllipsisNotation = "0.4, 1.0" -Flux = "0.10, 0.11, 0.12" +EllipsisNotation = "1" +Flux = "0.10, 0.11, 0.12, 0.13, 0.14" POMDPLinter = "0.1" POMDPTools = "0.1" POMDPs = "0.9" Parameters = "0.12" -StatsBase = "0.32, 0.33" +StatsBase = "0.32, 0.33, 0.34" TensorBoardLogger = "0.1" julia = "1" [extras] POMDPModels = "355abbd5-f08e-5560-ac9e-8b5f2592a0ca" -POMDPSimulators = "e0d0a172-29c6-5d4e-96d0-f262df5d01fd" StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["POMDPModels", "POMDPSimulators", "StaticArrays", "Test"] +test = ["POMDPModels", "StaticArrays", "Test"] diff --git a/test/flux_test.jl b/test/flux_test.jl index 9af3d94..b6b73ea 100644 --- a/test/flux_test.jl +++ b/test/flux_test.jl @@ -3,7 +3,6 @@ using POMDPs using Random using DeepQLearning using POMDPModels -using POMDPSimulators using POMDPTools using RLInterface using Test @@ -118,4 +117,4 @@ optimizer = ADAM(Flux.params(active_q), 1e-3) # use deep copy to update the target network -# use Flux.reset to reset RNN if necessary \ No newline at end of file +# use Flux.reset to reset RNN if necessary diff --git a/test/runtests.jl b/test/runtests.jl index 293dd89..36a3c21 100755 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,6 +1,5 @@ using DeepQLearning using POMDPModels -using POMDPSimulators using POMDPTools using Flux using Random From 7bda5acd3f6178b1ee67dc5cf8d54d75b4a077e2 Mon Sep 17 00:00:00 2001 From: Dylan Asmar Date: Sat, 16 Dec 2023 22:25:46 -0700 Subject: [PATCH 05/10] fix errors during tests --- test/runtests.jl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/test/runtests.jl b/test/runtests.jl index 36a3c21..262d626 100755 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -202,6 +202,10 @@ end end RL.reset!(env::SimpleEnv) = env.s = 1 + RL.state(env::SimpleEnv) = env.s + RL.setstate!(env::SimpleEnv, s::Int) = env.s = s + RL.setstate!(env::SimpleEnv, s::Float32) = env.s = Int(s) + RL.setstate!(env::SimpleEnv, s::Vector{Float32}) = env.s = Int(s[1]) RL.actions(env::SimpleEnv) = [-1, 1] RL.observe(env::SimpleEnv) = Float32[env.s] RL.terminated(env::SimpleEnv) = env.s >= 3 From 0489481726ee381a28c8916043ac8a565f354f33 Mon Sep 17 00:00:00 2001 From: Dylan Asmar Date: Sat, 16 Dec 2023 23:58:15 -0700 Subject: [PATCH 06/10] Updated to Flux v0.14 --- Project.toml | 4 ++-- src/dueling.jl | 4 ++-- src/solver.jl | 18 +++++++++--------- test/flux_test.jl | 2 +- 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/Project.toml b/Project.toml index 0ccb098..2b38b01 100644 --- a/Project.toml +++ b/Project.toml @@ -22,14 +22,14 @@ TensorBoardLogger = "899adc3e-224a-11e9-021f-63837185c80f" BSON = "0.2, 0.3" CommonRLInterface = "0.2, 0.3" EllipsisNotation = "1" -Flux = "0.10, 0.11, 0.12, 0.13, 0.14" +Flux = "0.14" POMDPLinter = "0.1" POMDPTools = "0.1" POMDPs = "0.9" Parameters = "0.12" StatsBase = "0.32, 0.33, 0.34" TensorBoardLogger = "0.1" -julia = "1" +julia = "1.9" [extras] POMDPModels = "355abbd5-f08e-5560-ac9e-8b5f2592a0ca" diff --git a/src/dueling.jl b/src/dueling.jl index 862a483..00e7580 100644 --- a/src/dueling.jl +++ b/src/dueling.jl @@ -50,9 +50,9 @@ function create_dueling_network(m::Chain) @assert isa(l, Dense) error_str end nlayers = length(m.layers) - _, last_layer_size = size(m[end].W) + _, last_layer_size = size(m[end].weight) val = Chain([deepcopy(m[i]) for i=duel_layer+1:nlayers-1]..., Dense(last_layer_size, 1)) adv = Chain([deepcopy(m[i]) for i=duel_layer+1:nlayers]...) - base = Chain([deepcopy(m[i]) for i=1:duel_layer+1-1]...) + base = Chain(identity, [deepcopy(m[i]) for i=1:duel_layer+1-1]...) return DuelingNetwork(base, val, adv) end diff --git a/src/solver.jl b/src/solver.jl index 7a85f91..5379d0d 100755 --- a/src/solver.jl +++ b/src/solver.jl @@ -37,7 +37,7 @@ function POMDPs.solve(solver::DeepQLearningSolver, problem::POMDP) return solve(solver, env) end -function POMDPs.solve(solver::DeepQLearningSolver, env::AbstractEnv) +function POMDPs.solve(solver::DeepQLearningSolver, env::AbstractEnv) action_map = collect(actions(env)) action_indices = Dict(a=>i for (i, a) in enumerate(action_map)) @@ -56,14 +56,14 @@ function POMDPs.solve(solver::DeepQLearningSolver, env::AbstractEnv) return dqn_train!(solver, env, policy, replay) end -function dqn_train!(solver::DeepQLearningSolver, env::AbstractEnv, policy::AbstractNNPolicy, replay) +function dqn_train!(solver::DeepQLearningSolver, env::AbstractEnv, policy::AbstractNNPolicy, replay) if solver.logdir !== nothing logger = TBLogger(solver.logdir) solver.logdir = logger.logdir end active_q = getnetwork(policy) # shallow copy target_q = deepcopy(active_q) - optimizer = ADAM(solver.learning_rate) + optimizer = Adam(solver.learning_rate) # start training resetstate!(policy) reset!(env) @@ -177,7 +177,7 @@ function dqn_train!(solver::DeepQLearningSolver, env::AbstractEnv, policy::Abstr return policy end -function initialize_replay_buffer(solver::DeepQLearningSolver, env::AbstractEnv, action_indices) +function initialize_replay_buffer(solver::DeepQLearningSolver, env::AbstractEnv, action_indices) # init and populate replay buffer if solver.recurrence replay = EpisodeReplayBuffer(env, solver.buffer_size, solver.batch_size, solver.trace_length) @@ -200,7 +200,7 @@ function batch_train!(solver::DeepQLearningSolver, s_batch, a_batch, r_batch, sp_batch, done_batch, indices, importance_weights = sample(replay) active_q = getnetwork(policy) - p = params(active_q) + p = Flux.params(active_q) loss_val = nothing td_vals = nothing @@ -237,7 +237,7 @@ end # for RNNs function batch_train!(solver::DeepQLearningSolver, - env::AbstractEnv, + env::AbstractEnv, policy::AbstractNNPolicy, optimizer, target_q, @@ -249,7 +249,7 @@ function batch_train!(solver::DeepQLearningSolver, Flux.reset!(active_q) Flux.reset!(target_q) - p = params(active_q) + p = Flux.params(active_q) loss_val = nothing td_vals = nothing @@ -289,7 +289,7 @@ end function save_model(solver::DeepQLearningSolver, active_q, scores_eval::Float64, saved_mean_reward::Float64, model_saved::Bool) if scores_eval >= saved_mean_reward - bson(joinpath(solver.logdir, "qnetwork.bson"), qnetwork=[w for w in params(active_q)]) + bson(joinpath(solver.logdir, "qnetwork.bson"), qnetwork=[w for w in Flux.params(active_q)]) if solver.verbose @printf("Saving new model with eval reward %1.3f \n", scores_eval) end @@ -304,7 +304,7 @@ function restore_best_model(solver::DeepQLearningSolver, problem::MDP) restore_best_model(solver, env) end -function restore_best_model(solver::DeepQLearningSolver, env::AbstractEnv) +function restore_best_model(solver::DeepQLearningSolver, env::AbstractEnv) if solver.dueling active_q = create_dueling_network(solver.qnetwork) else diff --git a/test/flux_test.jl b/test/flux_test.jl index b6b73ea..ad377b6 100644 --- a/test/flux_test.jl +++ b/test/flux_test.jl @@ -113,7 +113,7 @@ l, td = loss(q_sa, q_targets) Flux.data(l) -optimizer = ADAM(Flux.params(active_q), 1e-3) +optimizer = Adam(Flux.params(active_q), 1e-3) # use deep copy to update the target network From f10c33caa2552b1f00269b2b781720f4a8af173b Mon Sep 17 00:00:00 2001 From: Dylan Asmar Date: Sat, 16 Dec 2023 23:59:30 -0700 Subject: [PATCH 07/10] updated CI --- .github/workflows/CI.yml | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 .github/workflows/CI.yml diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml new file mode 100644 index 0000000..1189c97 --- /dev/null +++ b/.github/workflows/CI.yml @@ -0,0 +1,35 @@ +name: CI +on: + push: + branches: + - master + tags: '*' + pull_request: +jobs: + test: + name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + version: + - "1" + os: + - ubuntu-latest + - macOS-latest + - windows-latest + arch: + - x64 + steps: + - uses: actions/checkout@v4 + - uses: julia-actions/setup-julia@v1 + with: + version: ${{ matrix.version }} + arch: ${{ matrix.arch }} + - uses: julia-actions/cache@v1 + - uses: julia-actions/julia-buildpkg@v1 + - uses: julia-actions/julia-runtest@v1 + - uses: julia-actions/julia-processcoverage@v1 + - uses: codecov/codecov-action@v3 + with: + files: lcov.info \ No newline at end of file From ed988bdb789f3301d8164bc55612747c812ad2e8 Mon Sep 17 00:00:00 2001 From: Dylan Asmar Date: Sun, 17 Dec 2023 00:00:08 -0700 Subject: [PATCH 08/10] version bump --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 2b38b01..192eb0f 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "DeepQLearning" uuid = "de0a67f4-c691-11e8-0034-5fc6e16e22d3" repo = "https://github.com/JuliaPOMDP/DeepQLearning.jl" -version = "0.6.5" +version = "0.7.0" [deps] BSON = "fbb218c0-5317-5bc6-957e-2ee96dd4b1f0" From 8cb4d34c5e684e7d002e6eea8b2fc1733856408d Mon Sep 17 00:00:00 2001 From: Dylan Asmar Date: Sun, 17 Dec 2023 00:07:03 -0700 Subject: [PATCH 09/10] readme badge and installation updates --- README.md | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index a4df914..c86a089 100755 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # DeepQLearning -[![Build status](https://github.com/JuliaPOMDP/DeepQLearning.jl/workflows/CI/badge.svg)](https://github.com/JuliaPOMDP/DeepQLearning.jl/actions) -[![CodeCov](https://codecov.io/gh/JuliaPOMDP/DeepQLearning.jl/branch/master/graph/badge.svg)](https://codecov.io/gh/JuliaPOMDP/DeepQLearning.jl) +[![Build status](https://github.com/JuliaPOMDP/DeepQLearning.jl/workflows/CI/badge.svg)](https://github.com/JuliaPOMDP/DeepQLearning.jl/actions/workflows/CI.yml) +[![codecov](https://codecov.io/github/JuliaPOMDP/DeepQLearning.jl/branch/master/graph/badge.svg?token=EfDZPMisVB)](https://codecov.io/github/JuliaPOMDP/DeepQLearning.jl) This package provides an implementation of the Deep Q learning algorithm for solving MDPs. For more information see https://arxiv.org/pdf/1312.5602.pdf. It uses POMDPs.jl and Flux.jl @@ -17,12 +17,6 @@ It supports the following innovations: ```Julia using Pkg -# Pkg.Registry.add("https://github.com/JuliaPOMDP/Registry) # for julia 1.1+ - -# for julia 1.0 add the registry throught the POMDP package -# Pkg.add("POMDPs") -# using POMDPs -# POMDPs.add_registry() Pkg.add("DeepQLearning") ``` From fcb567a34e51f2b5e93a4a7443c474fca028d69a Mon Sep 17 00:00:00 2001 From: Dylan Asmar Date: Tue, 19 Dec 2023 14:36:04 -0700 Subject: [PATCH 10/10] added min support julia version to ci --- .github/workflows/CI.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 1189c97..e62ac4f 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -14,6 +14,7 @@ jobs: matrix: version: - "1" + - "1.9" # min supported version os: - ubuntu-latest - macOS-latest