{ "activation_dropout": 0.0, "activation_function": "silu", "architectures": [ "LwDetrForObjectDetection" ], "attention_bias": true, "attention_dropout": 0.0, "auxiliary_loss": true, "backbone": null, "backbone_config": { "cae_init_values": 0.1, "dropout_prob": 0.0, "dtype": "float32", "hidden_act": "gelu", "hidden_size": 192, "image_size": 1024, "initializer_range": 0.02, "layer_norm_eps": 1e-06, "mlp_ratio": 4, "model_type": "lw_detr_vit", "num_attention_heads": 12, "num_channels": 3, "num_hidden_layers": 6, "num_windows": 16, "num_windows_side": 4, "out_features": [ "stage2", "stage4", "stage6" ], "out_indices": [ 2, 4, 6 ], "patch_size": 16, "pretrain_image_size": 224, "qkv_bias": true, "stage_names": [ "stem", "stage1", "stage2", "stage3", "stage4", "stage5", "stage6" ], "use_absolute_position_embeddings": true, "window_block_indices": [ 0, 2, 4 ] }, "batch_norm_eps": 1e-05, "bbox_cost": 5, "bbox_loss_coefficient": 5, "c2f_num_blocks": 3, "class_cost": 2, "d_model": 256, "decoder_activation_function": "relu", "decoder_cross_attention_heads": 16, "decoder_ffn_dim": 2048, "decoder_layers": 3, "decoder_n_points": 2, "decoder_self_attention_heads": 8, "dice_loss_coefficient": 1, "disable_custom_kernels": true, "dropout": 0.1, "dtype": "float32", "eos_coefficient": 0.1, "focal_alpha": 0.25, "giou_cost": 2, "giou_loss_coefficient": 2, "group_detr": 13, "hidden_expansion": 0.5, "id2label": { "0": "Tray", "1": "Cart" }, "init_std": 0.02, "keys_to_ignore_at_inference": [ "loss_dict", "auxiliary_outputs", "last_hidden_state", "intermediate_hidden_states", "intermediate_logits", "intermediate_reference_points", "encoder_last_hidden_state", "init_reference_points", "enc_outputs_class", "enc_outputs_coord_logits" ], "label2id": { "Cart": 1, "Tray": 0 }, "model_type": "lw_detr", "num_feature_levels": 1, "num_queries": 100, "projector_in_channels": [ 256 ], "projector_out_channels": 256, "projector_scale_factors": [ 1.0 ], "transformers_version": "5.2.0", "use_cache": false }