configs/experiment/projects/bridging/dinosaur/voc2012_trainaug_feat_rec_dino_base16_auto.yaml
# @package _global_
defaults:
- /experiment/projects/bridging/dinosaur/_base_feature_recon # (1)!
- /dataset: voc2012_trainaug # (2)!
- /experiment/projects/bridging/dinosaur/_preprocessing_voc2012_segm_dino_feature_recon # (3)!
- /experiment/projects/bridging/dinosaur/_metrics_coco # (4)!
- _self_
# The following parameters assume training on 8 GPUs, leading to an effective batch size of 64.
trainer:
devices: 8
max_steps: 300000
max_epochs:
check_val_every_n_epoch: 50
dataset:
num_workers: 4
batch_size: 8
models:
conditioning:
_target_: routed.ocl.conditioning.RandomConditioning
n_slots: 6
object_dim: 256
batch_size_path: input.batch_size
feature_extractor:
model_name: vit_base_patch16_224_dino
pretrained: ${when_testing:false,true}
freeze: true
perceptual_grouping: {}
object_decoder:
_target_: routed.ocl.decoding.AutoregressivePatchDecoder
decoder_cond_dim: ${.output_dim}
use_input_transform: true
use_decoder_masks: true
decoder:
_target_: ocl.neural_networks.build_transformer_decoder
_partial_: true
n_layers: 4
n_heads: 4
return_attention_weights: true
masks_path: perceptual_grouping.feature_attributions
object_features_path: perceptual_grouping.objects
experiment:
input_feature_dim: 768
- /experiment/projects/bridging/dinosaur/_base_feature_recon
- /dataset/voc2012_trainaug
- /experiment/projects/bridging/dinosaur/_preprocessing_voc2012_segm_dino_feature_recon
- /experiment/projects/bridging/dinosaur/_metrics_coco