diff --git a/src/nf/nf_dense_layer.f90 b/src/nf/nf_dense_layer.f90 index c5735799..1d4f6b9c 100644 --- a/src/nf/nf_dense_layer.f90 +++ b/src/nf/nf_dense_layer.f90 @@ -4,6 +4,7 @@ module nf_dense_layer !! It is used internally by the layer type. !! It is not intended to be used directly by the user. + use nf_optimizers, only: optimizer_base_type use nf_activation, only: activation_function use nf_base_layer, only: base_layer @@ -28,6 +29,8 @@ module nf_dense_layer real, allocatable :: db(:) ! bias gradients class(activation_function), allocatable :: activation + class(optimizer_base_type), allocatable :: optimizer_1d + class(optimizer_base_type), allocatable :: optimizer_2d contains @@ -38,6 +41,8 @@ module nf_dense_layer procedure :: get_params procedure :: init procedure :: set_params + procedure :: apply_optimizer + procedure :: set_optimizer end type dense_layer @@ -124,6 +129,20 @@ module subroutine init(self, input_shape) !! Shape of the input layer end subroutine init + module subroutine apply_optimizer(self, batch_size) + class(dense_layer), intent(in out), target :: self + integer, intent(in) :: batch_size + end subroutine apply_optimizer + + module subroutine set_optimizer(self, optimizer) + !! Initialize the layer data structures. + !! + !! This is a deferred procedure from the `base_layer` abstract type. + class(dense_layer), intent(in out) :: self + !! Dense layer instance + class(optimizer_base_type), intent(in), optional :: optimizer + + end subroutine set_optimizer end interface end module nf_dense_layer diff --git a/src/nf/nf_dense_layer_submodule.f90 b/src/nf/nf_dense_layer_submodule.f90 index 50d5b10d..26e9b3fe 100644 --- a/src/nf/nf_dense_layer_submodule.f90 +++ b/src/nf/nf_dense_layer_submodule.f90 @@ -1,5 +1,6 @@ submodule(nf_dense_layer) nf_dense_layer_submodule + use nf_optimizers, only: adam use nf_activation, only: activation_function use nf_base_layer, only: base_layer use nf_random, only: random_normal @@ -151,4 +152,44 @@ module subroutine init(self, input_shape) end subroutine init + module subroutine set_optimizer(self, optimizer) + class(dense_layer), intent(in out) :: self + class(optimizer_base_type), intent(in), optional:: optimizer + + if (.not. allocated(self % optimizer_1d)) then + if (present(optimizer)) then + self % optimizer_1d = optimizer + else + self % optimizer_1d = adam(learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1.e-7) + end if + call self % optimizer_1d % init(self % output_size) + end if + if (.not. allocated(self % optimizer_2d)) then + if (present(optimizer)) then + self % optimizer_2d = optimizer + else + self % optimizer_2d = adam(learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1.e-7) + end if + call self % optimizer_2d % init(self % input_size * self % output_size) + end if + + end subroutine set_optimizer + + module subroutine apply_optimizer(self, batch_size) + class(dense_layer), intent(in out), target :: self + integer, intent(in) :: batch_size + + real, pointer :: w_(:), dw_(:) + + call self % optimizer_1d % minimize( self % biases, self % db / batch_size) + + associate(n => self % input_size * self % output_size) + w_(1:n) => self % weights + dw_(1:n) => self % dw + call self % optimizer_2d % minimize( w_, dw_ / batch_size) + end associate + + + end subroutine apply_optimizer + end submodule nf_dense_layer_submodule diff --git a/src/nf/nf_network.f90 b/src/nf/nf_network.f90 index bcf10ae8..2eaf01e3 100644 --- a/src/nf/nf_network.f90 +++ b/src/nf/nf_network.f90 @@ -26,6 +26,8 @@ module nf_network procedure :: get_params procedure :: print_info procedure :: set_params + procedure :: apply_optimizer + procedure :: set_optimizers procedure :: train procedure :: update @@ -242,6 +244,16 @@ module subroutine update(self, optimizer, batch_size) !! Set to `size(input_data, dim=2)` for a batch gradient descent. end subroutine update + module subroutine set_optimizers(self, optimizer) + class(network), intent(in out) :: self + class(optimizer_base_type), intent(in) :: optimizer + end subroutine set_optimizers + + module subroutine apply_optimizer(self, batch_size) + class(network), intent(in out) :: self + integer, intent(in) :: batch_size + end subroutine + end interface end module nf_network diff --git a/src/nf/nf_network_submodule.f90 b/src/nf/nf_network_submodule.f90 index a3404dcc..e52051aa 100644 --- a/src/nf/nf_network_submodule.f90 +++ b/src/nf/nf_network_submodule.f90 @@ -399,6 +399,25 @@ module subroutine set_params(self, params) end subroutine set_params + module subroutine set_optimizers(self, optimizer) + class(network), intent(in out) :: self + class(optimizer_base_type), intent(in) :: optimizer + + integer :: n + + do n = 1, size(self % layers) + + select type (this_layer => self % layers(n) % p) + + type is (dense_layer) + call this_layer % set_optimizer(optimizer) + + end select + + end do + + end subroutine set_optimizers + module subroutine train(self, input_data, output_data, batch_size, & epochs, optimizer, loss) @@ -426,6 +445,8 @@ module subroutine train(self, input_data, output_data, batch_size, & call self % optimizer % init(self % get_num_params()) + call self % set_optimizers(optimizer) + ! Passing the loss instance is optional. ! If not provided, we default to quadratic(). if (present(loss)) then @@ -506,9 +527,11 @@ module subroutine update(self, optimizer, batch_size) end select end do - params = self % get_params() - call self % optimizer % minimize(params, self % get_gradients() / batch_size_) - call self % set_params(params) +! params = self % get_params() +! call self % optimizer % minimize(params, self % get_gradients() / batch_size_) +! call self % set_params(params) + + call self % apply_optimizer(batch_size_) ! Flush network gradients to zero. do concurrent(n = 2:size(self % layers)) @@ -524,4 +547,20 @@ module subroutine update(self, optimizer, batch_size) end subroutine update + + module subroutine apply_optimizer(self, batch_size) + class(network), intent(in out) :: self + integer, intent(in) :: batch_size + + integer :: n + + do concurrent(n = 2:size(self % layers)) + select type(this_layer => self % layers(n) % p) + type is(dense_layer) + call this_layer % apply_optimizer(batch_size) + end select + end do + + end subroutine apply_optimizer + end submodule nf_network_submodule