Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Loss = -nan #64

Open
3ntr0phy opened this issue Apr 21, 2018 · 3 comments
Open

Loss = -nan #64

3ntr0phy opened this issue Apr 21, 2018 · 3 comments

Comments

@3ntr0phy
Copy link

Hi,
I'm trying to train my own data.
This is the solver :
net: "train_val.prototxt"
#test_initialization: false
#test_iter: 100
#test_interval: 1000
display: 20
average_loss: 20
base_lr: 0.000001
lr_policy: "poly"
power: 1.0
max_iter: 500
momentum: 0.9
weight_decay: 0.0001
snapshot: 100
snapshot_prefix: "mobilenet"

this is the train_val :
name: "MOBILENET"

transform_param {

scale: 0.017

mirror: false

crop_size: 224

mean_value: [103.94,116.78,123.68]

}

layer {
name: "data"
type: "ImageData"
top: "data"
top: "label"
include {
phase: TRAIN
}
transform_param {
mean_file: "imagenet_mean.binaryproto"
mirror:false
}
image_data_param {
source: "./train.txt"
batch_size: 16
new_height: 256
new_width: 256
root_folder: "/"
}
}

layer {
name: "conv1"
type: "Convolution"
bottom: "data"
top: "conv1"
param {
lr_mult: 1
decay_mult: 1
}
convolution_param {
num_output: 32
bias_term: false
pad: 1
kernel_size: 3
stride: 2
weight_filler {
type: "msra"
}
}
}
layer {
name: "conv1/bn"
type: "BatchNorm"
bottom: "conv1"
top: "conv1"
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
batch_norm_param {
use_global_stats: true
eps: 1e-5
}
}
layer {
name: "conv1/scale"
type: "Scale"
bottom: "conv1"
top: "conv1"
param {
lr_mult: 1
decay_mult: 0
}
param {
lr_mult: 1
decay_mult: 0
}
scale_param {
filler {
value: 1
}
bias_term: true
bias_filler {
value: 0
}
}
}
layer {
name: "relu1"
type: "ReLU"
bottom: "conv1"
top: "conv1"
}
layer {
name: "conv2_1/dw"
type: "Convolution"
bottom: "conv1"
top: "conv2_1/dw"
param {
lr_mult: 1
decay_mult: 1
}
convolution_param {
num_output: 32
bias_term: false
pad: 1
kernel_size: 3
group: 32
engine: CAFFE
stride: 1
weight_filler {
type: "msra"
}
}
}
layer {
name: "conv2_1/dw/bn"
type: "BatchNorm"
bottom: "conv2_1/dw"
top: "conv2_1/dw"
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
batch_norm_param {
use_global_stats: true
eps: 1e-5
}
}
layer {
name: "conv2_1/dw/scale"
type: "Scale"
bottom: "conv2_1/dw"
top: "conv2_1/dw"
param {
lr_mult: 1
decay_mult: 0
}
param {
lr_mult: 1
decay_mult: 0
}
scale_param {
filler {
value: 1
}
bias_term: true
bias_filler {
value: 0
}
}
}
layer {
name: "relu2_1/dw"
type: "ReLU"
bottom: "conv2_1/dw"
top: "conv2_1/dw"
}
layer {
name: "conv2_1/sep"
type: "Convolution"
bottom: "conv2_1/dw"
top: "conv2_1/sep"
param {
lr_mult: 1
decay_mult: 1
}
convolution_param {
num_output: 64
bias_term: false
pad: 0
kernel_size: 1
stride: 1
weight_filler {
type: "msra"
}
}
}
layer {
name: "conv2_1/sep/bn"
type: "BatchNorm"
bottom: "conv2_1/sep"
top: "conv2_1/sep"
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
batch_norm_param {
use_global_stats: true
eps: 1e-5
}
}
layer {
name: "conv2_1/sep/scale"
type: "Scale"
bottom: "conv2_1/sep"
top: "conv2_1/sep"
param {
lr_mult: 1
decay_mult: 0
}
param {
lr_mult: 1
decay_mult: 0
}
scale_param {
filler {
value: 1
}
bias_term: true
bias_filler {
value: 0
}
}
}
layer {
name: "relu2_1/sep"
type: "ReLU"
bottom: "conv2_1/sep"
top: "conv2_1/sep"
}
layer {
name: "conv2_2/dw"
type: "Convolution"
bottom: "conv2_1/sep"
top: "conv2_2/dw"
param {
lr_mult: 1
decay_mult: 1
}
convolution_param {
num_output: 64
bias_term: false
pad: 1
kernel_size: 3
group: 64
engine: CAFFE
stride: 2
weight_filler {
type: "msra"
}
}
}
layer {
name: "conv2_2/dw/bn"
type: "BatchNorm"
bottom: "conv2_2/dw"
top: "conv2_2/dw"
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
batch_norm_param {
use_global_stats: true
eps: 1e-5
}
}
layer {
name: "conv2_2/dw/scale"
type: "Scale"
bottom: "conv2_2/dw"
top: "conv2_2/dw"
param {
lr_mult: 1
decay_mult: 0
}
param {
lr_mult: 1
decay_mult: 0
}
scale_param {
filler {
value: 1
}
bias_term: true
bias_filler {
value: 0
}
}
}
layer {
name: "relu2_2/dw"
type: "ReLU"
bottom: "conv2_2/dw"
top: "conv2_2/dw"
}
layer {
name: "conv2_2/sep"
type: "Convolution"
bottom: "conv2_2/dw"
top: "conv2_2/sep"
param {
lr_mult: 1
decay_mult: 1
}
convolution_param {
num_output: 128
bias_term: false
pad: 0
kernel_size: 1
stride: 1
weight_filler {
type: "msra"
}
}
}
layer {
name: "conv2_2/sep/bn"
type: "BatchNorm"
bottom: "conv2_2/sep"
top: "conv2_2/sep"
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
batch_norm_param {
use_global_stats: true
eps: 1e-5
}
}
layer {
name: "conv2_2/sep/scale"
type: "Scale"
bottom: "conv2_2/sep"
top: "conv2_2/sep"
param {
lr_mult: 1
decay_mult: 0
}
param {
lr_mult: 1
decay_mult: 0
}
scale_param {
filler {
value: 1
}
bias_term: true
bias_filler {
value: 0
}
}
}
layer {
name: "relu2_2/sep"
type: "ReLU"
bottom: "conv2_2/sep"
top: "conv2_2/sep"
}
layer {
name: "conv3_1/dw"
type: "Convolution"
bottom: "conv2_2/sep"
top: "conv3_1/dw"
param {
lr_mult: 1
decay_mult: 1
}
convolution_param {
num_output: 128
bias_term: false
pad: 1
kernel_size: 3
group: 128
engine: CAFFE
stride: 1
weight_filler {
type: "msra"
}
}
}
layer {
name: "conv3_1/dw/bn"
type: "BatchNorm"
bottom: "conv3_1/dw"
top: "conv3_1/dw"
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
batch_norm_param {
use_global_stats: true
eps: 1e-5
}
}
layer {
name: "conv3_1/dw/scale"
type: "Scale"
bottom: "conv3_1/dw"
top: "conv3_1/dw"
param {
lr_mult: 1
decay_mult: 0
}
param {
lr_mult: 1
decay_mult: 0
}
scale_param {
filler {
value: 1
}
bias_term: true
bias_filler {
value: 0
}
}
}
layer {
name: "relu3_1/dw"
type: "ReLU"
bottom: "conv3_1/dw"
top: "conv3_1/dw"
}
layer {
name: "conv3_1/sep"
type: "Convolution"
bottom: "conv3_1/dw"
top: "conv3_1/sep"
param {
lr_mult: 1
decay_mult: 1
}
convolution_param {
num_output: 128
bias_term: false
pad: 0
kernel_size: 1
stride: 1
weight_filler {
type: "msra"
}
}
}
layer {
name: "conv3_1/sep/bn"
type: "BatchNorm"
bottom: "conv3_1/sep"
top: "conv3_1/sep"
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
batch_norm_param {
use_global_stats: true
eps: 1e-5
}
}
layer {
name: "conv3_1/sep/scale"
type: "Scale"
bottom: "conv3_1/sep"
top: "conv3_1/sep"
param {
lr_mult: 1
decay_mult: 0
}
param {
lr_mult: 1
decay_mult: 0
}
scale_param {
filler {
value: 1
}
bias_term: true
bias_filler {
value: 0
}
}
}
layer {
name: "relu3_1/sep"
type: "ReLU"
bottom: "conv3_1/sep"
top: "conv3_1/sep"
}
layer {
name: "conv3_2/dw"
type: "Convolution"
bottom: "conv3_1/sep"
top: "conv3_2/dw"
param {
lr_mult: 1
decay_mult: 1
}
convolution_param {
num_output: 128
bias_term: false
pad: 1
kernel_size: 3
group: 128
engine: CAFFE
stride: 2
weight_filler {
type: "msra"
}
}
}
layer {
name: "conv3_2/dw/bn"
type: "BatchNorm"
bottom: "conv3_2/dw"
top: "conv3_2/dw"
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
batch_norm_param {
use_global_stats: true
eps: 1e-5
}
}
layer {
name: "conv3_2/dw/scale"
type: "Scale"
bottom: "conv3_2/dw"
top: "conv3_2/dw"
param {
lr_mult: 1
decay_mult: 0
}
param {
lr_mult: 1
decay_mult: 0
}
scale_param {
filler {
value: 1
}
bias_term: true
bias_filler {
value: 0
}
}
}
layer {
name: "relu3_2/dw"
type: "ReLU"
bottom: "conv3_2/dw"
top: "conv3_2/dw"
}
layer {
name: "conv3_2/sep"
type: "Convolution"
bottom: "conv3_2/dw"
top: "conv3_2/sep"
param {
lr_mult: 1
decay_mult: 1
}
convolution_param {
num_output: 256
bias_term: false
pad: 0
kernel_size: 1
stride: 1
weight_filler {
type: "msra"
}
}
}
layer {
name: "conv3_2/sep/bn"
type: "BatchNorm"
bottom: "conv3_2/sep"
top: "conv3_2/sep"
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
batch_norm_param {
use_global_stats: true
eps: 1e-5
}
}
layer {
name: "conv3_2/sep/scale"
type: "Scale"
bottom: "conv3_2/sep"
top: "conv3_2/sep"
param {
lr_mult: 1
decay_mult: 0
}
param {
lr_mult: 1
decay_mult: 0
}
scale_param {
filler {
value: 1
}
bias_term: true
bias_filler {
value: 0
}
}
}
layer {
name: "relu3_2/sep"
type: "ReLU"
bottom: "conv3_2/sep"
top: "conv3_2/sep"
}
layer {
name: "conv4_1/dw"
type: "Convolution"
bottom: "conv3_2/sep"
top: "conv4_1/dw"
param {
lr_mult: 1
decay_mult: 1
}
convolution_param {
num_output: 256
bias_term: false
pad: 1
kernel_size: 3
group: 256
engine: CAFFE
stride: 1
weight_filler {
type: "msra"
}
}
}
layer {
name: "conv4_1/dw/bn"
type: "BatchNorm"
bottom: "conv4_1/dw"
top: "conv4_1/dw"
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
batch_norm_param {
use_global_stats: true
eps: 1e-5
}
}
layer {
name: "conv4_1/dw/scale"
type: "Scale"
bottom: "conv4_1/dw"
top: "conv4_1/dw"
param {
lr_mult: 1
decay_mult: 0
}
param {
lr_mult: 1
decay_mult: 0
}
scale_param {
filler {
value: 1
}
bias_term: true
bias_filler {
value: 0
}
}
}
layer {
name: "relu4_1/dw"
type: "ReLU"
bottom: "conv4_1/dw"
top: "conv4_1/dw"
}
layer {
name: "conv4_1/sep"
type: "Convolution"
bottom: "conv4_1/dw"
top: "conv4_1/sep"
param {
lr_mult: 1
decay_mult: 1
}
convolution_param {
num_output: 256
bias_term: false
pad: 0
kernel_size: 1
stride: 1
weight_filler {
type: "msra"
}
}
}
layer {
name: "conv4_1/sep/bn"
type: "BatchNorm"
bottom: "conv4_1/sep"
top: "conv4_1/sep"
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
batch_norm_param {
use_global_stats: true
eps: 1e-5
}
}
layer {
name: "conv4_1/sep/scale"
type: "Scale"
bottom: "conv4_1/sep"
top: "conv4_1/sep"
param {
lr_mult: 1
decay_mult: 0
}
param {
lr_mult: 1
decay_mult: 0
}
scale_param {
filler {
value: 1
}
bias_term: true
bias_filler {
value: 0
}
}
}
layer {
name: "relu4_1/sep"
type: "ReLU"
bottom: "conv4_1/sep"
top: "conv4_1/sep"
}
layer {
name: "conv4_2/dw"
type: "Convolution"
bottom: "conv4_1/sep"
top: "conv4_2/dw"
param {
lr_mult: 1
decay_mult: 1
}
convolution_param {
num_output: 256
bias_term: false
pad: 1
kernel_size: 3
group: 256
engine: CAFFE
stride: 2
weight_filler {
type: "msra"
}
}
}
layer {
name: "conv4_2/dw/bn"
type: "BatchNorm"
bottom: "conv4_2/dw"
top: "conv4_2/dw"
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
batch_norm_param {
use_global_stats: true
eps: 1e-5
}
}
layer {
name: "conv4_2/dw/scale"
type: "Scale"
bottom: "conv4_2/dw"
top: "conv4_2/dw"
param {
lr_mult: 1
decay_mult: 0
}
param {
lr_mult: 1
decay_mult: 0
}
scale_param {
filler {
value: 1
}
bias_term: true
bias_filler {
value: 0
}
}
}
layer {
name: "relu4_2/dw"
type: "ReLU"
bottom: "conv4_2/dw"
top: "conv4_2/dw"
}
layer {
name: "conv4_2/sep"
type: "Convolution"
bottom: "conv4_2/dw"
top: "conv4_2/sep"
param {
lr_mult: 1
decay_mult: 1
}
convolution_param {
num_output: 512
bias_term: false
pad: 0
kernel_size: 1
stride: 1
weight_filler {
type: "msra"
}
}
}
layer {
name: "conv4_2/sep/bn"
type: "BatchNorm"
bottom: "conv4_2/sep"
top: "conv4_2/sep"
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
batch_norm_param {
use_global_stats: true
eps: 1e-5
}
}
layer {
name: "conv4_2/sep/scale"
type: "Scale"
bottom: "conv4_2/sep"
top: "conv4_2/sep"
param {
lr_mult: 1
decay_mult: 0
}
param {
lr_mult: 1
decay_mult: 0
}
scale_param {
filler {
value: 1
}
bias_term: true
bias_filler {
value: 0
}
}
}
layer {
name: "relu4_2/sep"
type: "ReLU"
bottom: "conv4_2/sep"
top: "conv4_2/sep"
}
layer {
name: "conv5_1/dw"
type: "Convolution"
bottom: "conv4_2/sep"
top: "conv5_1/dw"
param {
lr_mult: 1
decay_mult: 1
}
convolution_param {
num_output: 512
bias_term: false
pad: 1
kernel_size: 3
group: 512
engine: CAFFE
stride: 1
weight_filler {
type: "msra"
}
}
}
layer {
name: "conv5_1/dw/bn"
type: "BatchNorm"
bottom: "conv5_1/dw"
top: "conv5_1/dw"
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
batch_norm_param {
use_global_stats: true
eps: 1e-5
}
}
layer {
name: "conv5_1/dw/scale"
type: "Scale"
bottom: "conv5_1/dw"
top: "conv5_1/dw"
param {
lr_mult: 1
decay_mult: 0
}
param {
lr_mult: 1
decay_mult: 0
}
scale_param {
filler {
value: 1
}
bias_term: true
bias_filler {
value: 0
}
}
}
layer {
name: "relu5_1/dw"
type: "ReLU"
bottom: "conv5_1/dw"
top: "conv5_1/dw"
}
layer {
name: "conv5_1/sep"
type: "Convolution"
bottom: "conv5_1/dw"
top: "conv5_1/sep"
param {
lr_mult: 1
decay_mult: 1
}
convolution_param {
num_output: 512
bias_term: false
pad: 0
kernel_size: 1
stride: 1
weight_filler {
type: "msra"
}
}
}
layer {
name: "conv5_1/sep/bn"
type: "BatchNorm"
bottom: "conv5_1/sep"
top: "conv5_1/sep"
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
batch_norm_param {
use_global_stats: true
eps: 1e-5
}
}
layer {
name: "conv5_1/sep/scale"
type: "Scale"
bottom: "conv5_1/sep"
top: "conv5_1/sep"
param {
lr_mult: 1
decay_mult: 0
}
param {
lr_mult: 1
decay_mult: 0
}
scale_param {
filler {
value: 1
}
bias_term: true
bias_filler {
value: 0
}
}
}
layer {
name: "relu5_1/sep"
type: "ReLU"
bottom: "conv5_1/sep"
top: "conv5_1/sep"
}
layer {
name: "conv5_2/dw"
type: "Convolution"
bottom: "conv5_1/sep"
top: "conv5_2/dw"
param {
lr_mult: 1
decay_mult: 1
}
convolution_param {
num_output: 512
bias_term: false
pad: 1
kernel_size: 3
group: 512
engine: CAFFE
stride: 1
weight_filler {
type: "msra"
}
}
}
layer {
name: "conv5_2/dw/bn"
type: "BatchNorm"
bottom: "conv5_2/dw"
top: "conv5_2/dw"
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
batch_norm_param {
use_global_stats: true
eps: 1e-5
}
}
layer {
name: "conv5_2/dw/scale"
type: "Scale"
bottom: "conv5_2/dw"
top: "conv5_2/dw"
param {
lr_mult: 1
decay_mult: 0
}
param {
lr_mult: 1
decay_mult: 0
}
scale_param {
filler {
value: 1
}
bias_term: true
bias_filler {
value: 0
}
}
}
layer {
name: "relu5_2/dw"
type: "ReLU"
bottom: "conv5_2/dw"
top: "conv5_2/dw"
}
layer {
name: "conv5_2/sep"
type: "Convolution"
bottom: "conv5_2/dw"
top: "conv5_2/sep"
param {
lr_mult: 1
decay_mult: 1
}
convolution_param {
num_output: 512
bias_term: false
pad: 0
kernel_size: 1
stride: 1
weight_filler {
type: "msra"
}
}
}
layer {
name: "conv5_2/sep/bn"
type: "BatchNorm"
bottom: "conv5_2/sep"
top: "conv5_2/sep"
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
batch_norm_param {
use_global_stats: true
eps: 1e-5
}
}
layer {
name: "conv5_2/sep/scale"
type: "Scale"
bottom: "conv5_2/sep"
top: "conv5_2/sep"
param {
lr_mult: 1
decay_mult: 0
}
param {
lr_mult: 1
decay_mult: 0
}
scale_param {
filler {
value: 1
}
bias_term: true
bias_filler {
value: 0
}
}
}
layer {
name: "relu5_2/sep"
type: "ReLU"
bottom: "conv5_2/sep"
top: "conv5_2/sep"
}
layer {
name: "conv5_3/dw"
type: "Convolution"
bottom: "conv5_2/sep"
top: "conv5_3/dw"
param {
lr_mult: 1
decay_mult: 1
}
convolution_param {
num_output: 512
bias_term: false
pad: 1
kernel_size: 3
group: 512
engine: CAFFE
stride: 1
weight_filler {
type: "msra"
}
}
}
layer {
name: "conv5_3/dw/bn"
type: "BatchNorm"
bottom: "conv5_3/dw"
top: "conv5_3/dw"
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
batch_norm_param {
use_global_stats: true
eps: 1e-5
}
}
layer {
name: "conv5_3/dw/scale"
type: "Scale"
bottom: "conv5_3/dw"
top: "conv5_3/dw"
param {
lr_mult: 1
decay_mult: 0
}
param {
lr_mult: 1
decay_mult: 0
}
scale_param {
filler {
value: 1
}
bias_term: true
bias_filler {
value: 0
}
}
}
layer {
name: "relu5_3/dw"
type: "ReLU"
bottom: "conv5_3/dw"
top: "conv5_3/dw"
}
layer {
name: "conv5_3/sep"
type: "Convolution"
bottom: "conv5_3/dw"
top: "conv5_3/sep"
param {
lr_mult: 1
decay_mult: 1
}
convolution_param {
num_output: 512
bias_term: false
pad: 0
kernel_size: 1
stride: 1
weight_filler {
type: "msra"
}
}
}
layer {
name: "conv5_3/sep/bn"
type: "BatchNorm"
bottom: "conv5_3/sep"
top: "conv5_3/sep"
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
batch_norm_param {
use_global_stats: true
eps: 1e-5
}
}
layer {
name: "conv5_3/sep/scale"
type: "Scale"
bottom: "conv5_3/sep"
top: "conv5_3/sep"
param {
lr_mult: 1
decay_mult: 0
}
param {
lr_mult: 1
decay_mult: 0
}
scale_param {
filler {
value: 1
}
bias_term: true
bias_filler {
value: 0
}
}
}
layer {
name: "relu5_3/sep"
type: "ReLU"
bottom: "conv5_3/sep"
top: "conv5_3/sep"
}
layer {
name: "conv5_4/dw"
type: "Convolution"
bottom: "conv5_3/sep"
top: "conv5_4/dw"
param {
lr_mult: 1
decay_mult: 1
}
convolution_param {
num_output: 512
bias_term: false
pad: 1
kernel_size: 3
group: 512
engine: CAFFE
stride: 1
weight_filler {
type: "msra"
}
}
}
layer {
name: "conv5_4/dw/bn"
type: "BatchNorm"
bottom: "conv5_4/dw"
top: "conv5_4/dw"
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
batch_norm_param {
use_global_stats: true
eps: 1e-5
}
}
layer {
name: "conv5_4/dw/scale"
type: "Scale"
bottom: "conv5_4/dw"
top: "conv5_4/dw"
param {
lr_mult: 1
decay_mult: 0
}
param {
lr_mult: 1
decay_mult: 0
}
scale_param {
filler {
value: 1
}
bias_term: true
bias_filler {
value: 0
}
}
}
layer {
name: "relu5_4/dw"
type: "ReLU"
bottom: "conv5_4/dw"
top: "conv5_4/dw"
}
layer {
name: "conv5_4/sep"
type: "Convolution"
bottom: "conv5_4/dw"
top: "conv5_4/sep"
param {
lr_mult: 1
decay_mult: 1
}
convolution_param {
num_output: 512
bias_term: false
pad: 0
kernel_size: 1
stride: 1
weight_filler {
type: "msra"
}
}
}
layer {
name: "conv5_4/sep/bn"
type: "BatchNorm"
bottom: "conv5_4/sep"
top: "conv5_4/sep"
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
batch_norm_param {
use_global_stats: true
eps: 1e-5
}
}
layer {
name: "conv5_4/sep/scale"
type: "Scale"
bottom: "conv5_4/sep"
top: "conv5_4/sep"
param {
lr_mult: 1
decay_mult: 0
}
param {
lr_mult: 1
decay_mult: 0
}
scale_param {
filler {
value: 1
}
bias_term: true
bias_filler {
value: 0
}
}
}
layer {
name: "relu5_4/sep"
type: "ReLU"
bottom: "conv5_4/sep"
top: "conv5_4/sep"
}
layer {
name: "conv5_5/dw"
type: "Convolution"
bottom: "conv5_4/sep"
top: "conv5_5/dw"
param {
lr_mult: 1
decay_mult: 1
}
convolution_param {
num_output: 512
bias_term: false
pad: 1
kernel_size: 3
group: 512
engine: CAFFE
stride: 1
weight_filler {
type: "msra"
}
}
}
layer {
name: "conv5_5/dw/bn"
type: "BatchNorm"
bottom: "conv5_5/dw"
top: "conv5_5/dw"
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
batch_norm_param {
use_global_stats: true
eps: 1e-5
}
}
layer {
name: "conv5_5/dw/scale"
type: "Scale"
bottom: "conv5_5/dw"
top: "conv5_5/dw"
param {
lr_mult: 1
decay_mult: 0
}
param {
lr_mult: 1
decay_mult: 0
}
scale_param {
filler {
value: 1
}
bias_term: true
bias_filler {
value: 0
}
}
}
layer {
name: "relu5_5/dw"
type: "ReLU"
bottom: "conv5_5/dw"
top: "conv5_5/dw"
}
layer {
name: "conv5_5/sep"
type: "Convolution"
bottom: "conv5_5/dw"
top: "conv5_5/sep"
param {
lr_mult: 1
decay_mult: 1
}
convolution_param {
num_output: 512
bias_term: false
pad: 0
kernel_size: 1
stride: 1
weight_filler {
type: "msra"
}
}
}
layer {
name: "conv5_5/sep/bn"
type: "BatchNorm"
bottom: "conv5_5/sep"
top: "conv5_5/sep"
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
batch_norm_param {
use_global_stats: true
eps: 1e-5
}
}
layer {
name: "conv5_5/sep/scale"
type: "Scale"
bottom: "conv5_5/sep"
top: "conv5_5/sep"
param {
lr_mult: 1
decay_mult: 0
}
param {
lr_mult: 1
decay_mult: 0
}
scale_param {
filler {
value: 1
}
bias_term: true
bias_filler {
value: 0
}
}
}
layer {
name: "relu5_5/sep"
type: "ReLU"
bottom: "conv5_5/sep"
top: "conv5_5/sep"
}
layer {
name: "conv5_6/dw"
type: "Convolution"
bottom: "conv5_5/sep"
top: "conv5_6/dw"
param {
lr_mult: 1
decay_mult: 1
}
convolution_param {
num_output: 512
bias_term: false
pad: 1
kernel_size: 3
group: 512
engine: CAFFE
stride: 2
weight_filler {
type: "msra"
}
}
}
layer {
name: "conv5_6/dw/bn"
type: "BatchNorm"
bottom: "conv5_6/dw"
top: "conv5_6/dw"
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
batch_norm_param {
use_global_stats: true
eps: 1e-5
}
}
layer {
name: "conv5_6/dw/scale"
type: "Scale"
bottom: "conv5_6/dw"
top: "conv5_6/dw"
param {
lr_mult: 1
decay_mult: 0
}
param {
lr_mult: 1
decay_mult: 0
}
scale_param {
filler {
value: 1
}
bias_term: true
bias_filler {
value: 0
}
}
}
layer {
name: "relu5_6/dw"
type: "ReLU"
bottom: "conv5_6/dw"
top: "conv5_6/dw"
}
layer {
name: "conv5_6/sep"
type: "Convolution"
bottom: "conv5_6/dw"
top: "conv5_6/sep"
param {
lr_mult: 1
decay_mult: 1
}
convolution_param {
num_output: 1024
bias_term: false
pad: 0
kernel_size: 1
stride: 1
weight_filler {
type: "msra"
}
}
}
layer {
name: "conv5_6/sep/bn"
type: "BatchNorm"
bottom: "conv5_6/sep"
top: "conv5_6/sep"
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
batch_norm_param {
use_global_stats: true
eps: 1e-5
}
}
layer {
name: "conv5_6/sep/scale"
type: "Scale"
bottom: "conv5_6/sep"
top: "conv5_6/sep"
param {
lr_mult: 1
decay_mult: 0
}
param {
lr_mult: 1
decay_mult: 0
}
scale_param {
filler {
value: 1
}
bias_term: true
bias_filler {
value: 0
}
}
}
layer {
name: "relu5_6/sep"
type: "ReLU"
bottom: "conv5_6/sep"
top: "conv5_6/sep"
}
layer {
name: "conv6/dw"
type: "Convolution"
bottom: "conv5_6/sep"
top: "conv6/dw"
param {
lr_mult: 1
decay_mult: 1
}
convolution_param {
num_output: 1024
bias_term: false
pad: 1
kernel_size: 3
group: 1024
engine: CAFFE
stride: 1
weight_filler {
type: "msra"
}
}
}
layer {
name: "conv6/dw/bn"
type: "BatchNorm"
bottom: "conv6/dw"
top: "conv6/dw"
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
batch_norm_param {
use_global_stats: true
eps: 1e-5
}
}
layer {
name: "conv6/dw/scale"
type: "Scale"
bottom: "conv6/dw"
top: "conv6/dw"
param {
lr_mult: 1
decay_mult: 0
}
param {
lr_mult: 1
decay_mult: 0
}
scale_param {
filler {
value: 1
}
bias_term: true
bias_filler {
value: 0
}
}
}
layer {
name: "relu6/dw"
type: "ReLU"
bottom: "conv6/dw"
top: "conv6/dw"
}
layer {
name: "conv6/sep"
type: "Convolution"
bottom: "conv6/dw"
top: "conv6/sep"
param {
lr_mult: 1
decay_mult: 1
}
convolution_param {
num_output: 1024
bias_term: false
pad: 0
kernel_size: 1
stride: 1
weight_filler {
type: "msra"
}
}
}
layer {
name: "conv6/sep/bn"
type: "BatchNorm"
bottom: "conv6/sep"
top: "conv6/sep"
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
param {
lr_mult: 0
decay_mult: 0
}
batch_norm_param {
use_global_stats: true
eps: 1e-5
}
}
layer {
name: "conv6/sep/scale"
type: "Scale"
bottom: "conv6/sep"
top: "conv6/sep"
param {
lr_mult: 1
decay_mult: 0
}
param {
lr_mult: 1
decay_mult: 0
}
scale_param {
filler {
value: 1
}
bias_term: true
bias_filler {
value: 0
}
}
}
layer {
name: "relu6/sep"
type: "ReLU"
bottom: "conv6/sep"
top: "conv6/sep"
}
layer {
name: "pool6"
type: "Pooling"
bottom: "conv6/sep"
top: "pool6"
pooling_param {
pool: AVE
global_pooling: true
}
}
layer {
name: "fc7"
type: "Convolution"
bottom: "pool6"
top: "fc7"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 42
kernel_size: 1
weight_filler {
type: "msra"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "loss"
type: "SoftmaxWithLoss"
bottom: "fc7"
bottom: "label"
top: "loss"
}
layer {
name: "top1/acc"
type: "Accuracy"
bottom: "fc7"
bottom: "label"
top: "top1/acc"
include {
phase: TEST
}
}
layer {
name: "top5/acc"
type: "Accuracy"
bottom: "fc7"
bottom: "label"
top: "top5/acc"
include {
phase: TEST
}
accuracy_param {
top_k: 5
}
}

I tried also to replace the mean as you said in your tutorial but also I get

I0421 13:23:50.226541 2808 solver.cpp:218] Iteration 0 (-1.82169e-44 iter/s, 40.202s/20 iters), loss = -nan
I0421 13:23:50.226686 2808 solver.cpp:237] Train net output #0: loss = -nan (* 1 = -nan loss)
I0421 13:23:50.226697 2808 sgd_solver.cpp:105] Iteration 0, lr = 1e-06

I really don't understand why loss goes to -nan

@zhangnn016
Copy link

I got this problem too, have you find the reason?

@TerryBryant
Copy link

@Jacoppy @zhangnn016 try to remove all use_global_stats: true in the batch_norm_param, let the BatchNorm layers be in a default value

@NarcissusInMirror
Copy link

@Jacoppy @zhangnn016 try to remove all use_global_stats: true in the batch_norm_param, let the BatchNorm layers be in a default value

Thank you very much, it works for me!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

4 participants