params.yaml

# Parameters for DVC pipeline

# Parameters specific to the problem to solve
problem: pcvrp
problem_params:
  PRICE_MODE: 1           # for pcvrp medic (deprecated)
  DISCOUNT_FACTOR: 0.9    # for pcvrp medic (deprecated)
  PLAYER_ROLE: engineer  # medic, engineer, maybe searcher?
  MEDIC_MODEL_FILE: pretrained_asist/medic/cvrp_baseline
  MEDIC_GRAPH_SIZE: 55        # number of victims (contains high value)
  RUBBLE_GRAPH_SIZE: 18       # number of rubbles
  HIGH_VALUE_VICTIM_SIZE: 7   # number of freeze plate + number of high value victims
  MEDIC_SPEED: 0.0091         # medic speed after conversion
  ENGINEER_SPEED: 0.0065      # engineer speed after conversion
  MEDIC_TOOL_DURABILITY: 20
  ENGINEER_TOOL_DURABILITY: 131
  DISTANCE_RATIO: 0.001663    # Saturn to 2D[0-1] conversion factor
  HIGH_VALUE_MISMATCH_PENALTY_COEFF: 0.004  # coefficient 1
  LATE_RUBBLE_PENALTY_COEFF: 1.757          # coefficient 2

# Metric file
metric_file: scores/scores.json

# Training parameters
seed: 1234
eval_only: false
no_cuda: false
cuda: 0         # Specify a list of int if want model be distributed across gpus, or a single int otherwise
run_name: engineer_medic
graph_size: 25
batch_size: 512
n_epochs: 50
epoch_size: 128000
val_size: 128000             # affect num_samples when collecting problem dataset using baseline
val_dataset:
eval_batch_size: 1024
checkpoint_encoder: false
data_distribution: saturn
epoch_start: 0
checkpoint_epochs: 1
log_step: 50
load_path:
resume:
no_tensorboard: false
no_progress_bar: false
no_wandb: false              # Set to true if don't want to log onto WandB
high_value: 1

# Model parameters
model: attention
embedding_dim: 128
hidden_dim: 128
n_encode_layers: 3
tanh_clipping: 10
normalization: batch
lr_model: 1.0e-4
lr_critic: 1.0e-4
lr_decay: 1.0
max_grad_norm: 1.0
exp_beta: 0.8
baseline: rollout
bl_alpha: 0.05
bl_warmup_epochs:
shrink_size: