mohammed-aljafry commited on
Commit
d62455e
·
verified ·
1 Parent(s): c89800e

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. config.json +8 -6
  2. modeling_interfuser.py +368 -0
  3. pytorch_model.bin +2 -2
config.json CHANGED
@@ -1,16 +1,18 @@
1
  {
2
- "model_type": "interfuser",
3
  "architectures": [
4
- "InterfuserModel"
5
  ],
 
 
6
  "embed_dim": 256,
7
  "enc_depth": 6,
8
- "dec_depth": 6,
 
 
9
  "num_heads": 8,
10
- "dim_feedforward": 2048,
11
- "dropout": 0.1,
12
  "rgb_backbone_name": "r50",
13
- "lidar_backbone_name": "r18",
 
14
  "use_different_backbone": true,
15
  "waypoints_pred_head": "gru"
16
  }
 
1
  {
 
2
  "architectures": [
3
+ "InterfuserForHuggingFace"
4
  ],
5
+ "dec_depth": 6,
6
+ "dim_feedforward": 2048,
7
  "embed_dim": 256,
8
  "enc_depth": 6,
9
+ "in_chans": 12,
10
+ "lidar_backbone_name": "r18",
11
+ "model_type": "interfuser",
12
  "num_heads": 8,
 
 
13
  "rgb_backbone_name": "r50",
14
+ "torch_dtype": "float32",
15
+ "transformers_version": "4.52.4",
16
  "use_different_backbone": true,
17
  "waypoints_pred_head": "gru"
18
  }
modeling_interfuser.py ADDED
@@ -0,0 +1,368 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # -*- coding: utf-8 -*-
3
+ # This file contains all custom class definitions required to run the Interfuser model.
4
+
5
+ import torch
6
+ from torch import nn
7
+ import torch.nn.functional as F
8
+ from transformers import PreTrainedModel, PretrainedConfig
9
+ from functools import partial
10
+ import math
11
+ from collections import OrderedDict
12
+ import copy
13
+ from typing import Optional, List
14
+ from torch import Tensor
15
+
16
+ # ...
17
+ # It's better to import from the original source if possible
18
+ # For full portability, we define them here.
19
+ from InterFuser.interfuser.timm.models.layers import to_2tuple
20
+ from InterFuser.interfuser.timm.models.resnet import resnet50d, resnet26d, resnet18d
21
+
22
+ # ==============================================================================
23
+ # SECTION 1: ALL DEPENDENCY CLASSES FROM THE ORIGINAL CODE
24
+ # ==============================================================================
25
+
26
+ class HybridEmbed(nn.Module):
27
+ def __init__(self, backbone, img_size=224, patch_size=1, feature_size=None, in_chans=3, embed_dim=768):
28
+ super().__init__()
29
+ assert isinstance(backbone, nn.Module)
30
+ img_size = to_2tuple(img_size)
31
+ patch_size = to_2tuple(patch_size)
32
+ self.img_size = img_size
33
+ self.patch_size = patch_size
34
+ self.backbone = backbone
35
+ if feature_size is None:
36
+ with torch.no_grad():
37
+ training = backbone.training
38
+ if training:
39
+ backbone.eval()
40
+ o = self.backbone(torch.zeros(1, in_chans, img_size[0], img_size[1]))
41
+ if isinstance(o, (list, tuple)):
42
+ o = o[-1]
43
+ feature_size = o.shape[-2:]
44
+ feature_dim = o.shape[1]
45
+ backbone.train(training)
46
+ else:
47
+ feature_size = to_2tuple(feature_size)
48
+ if hasattr(self.backbone, "feature_info"):
49
+ feature_dim = self.backbone.feature_info.channels()[-1]
50
+ else:
51
+ feature_dim = self.backbone.num_features
52
+ self.proj = nn.Conv2d(feature_dim, embed_dim, kernel_size=1, stride=1)
53
+
54
+ def forward(self, x):
55
+ x = self.backbone(x)
56
+ if isinstance(x, (list, tuple)):
57
+ x = x[-1]
58
+ x = self.proj(x)
59
+ global_x = torch.mean(x, [2, 3], keepdim=False)[:, :, None]
60
+ return x, global_x
61
+
62
+ class PositionEmbeddingSine(nn.Module):
63
+ def __init__(self, num_pos_feats=64, temperature=10000, normalize=False, scale=None):
64
+ super().__init__()
65
+ self.num_pos_feats = num_pos_feats
66
+ self.temperature = temperature
67
+ self.normalize = normalize
68
+ if scale is not None and normalize is False:
69
+ raise ValueError("normalize should be True if scale is passed")
70
+ if scale is None:
71
+ scale = 2 * math.pi
72
+ self.scale = scale
73
+
74
+ def forward(self, tensor):
75
+ x = tensor
76
+ bs, _, h, w = x.shape
77
+ not_mask = torch.ones((bs, h, w), device=x.device)
78
+ y_embed = not_mask.cumsum(1, dtype=torch.float32)
79
+ x_embed = not_mask.cumsum(2, dtype=torch.float32)
80
+ if self.normalize:
81
+ eps = 1e-6
82
+ y_embed = y_embed / (y_embed[:, -1:, :] + eps) * self.scale
83
+ x_embed = x_embed / (x_embed[:, :, -1:] + eps) * self.scale
84
+ dim_t = torch.arange(self.num_pos_feats, dtype=torch.float32, device=x.device)
85
+ dim_t = self.temperature ** (2 * (dim_t // 2) / self.num_pos_feats)
86
+ pos_x = x_embed[:, :, :, None] / dim_t
87
+ pos_y = y_embed[:, :, :, None] / dim_t
88
+ pos_x = torch.stack((pos_x[:, :, :, 0::2].sin(), pos_x[:, :, :, 1::2].cos()), dim=4).flatten(3)
89
+ pos_y = torch.stack((pos_y[:, :, :, 0::2].sin(), pos_y[:, :, :, 1::2].cos()), dim=4).flatten(3)
90
+ pos = torch.cat((pos_y, pos_x), dim=3).permute(0, 3, 1, 2)
91
+ return pos
92
+
93
+ def _get_clones(module, N):
94
+ return nn.ModuleList([copy.deepcopy(module) for i in range(N)])
95
+
96
+ class TransformerEncoderLayer(nn.Module):
97
+ def __init__(self, d_model, nhead, dim_feedforward=2048, dropout=0.1, activation=nn.ReLU(), normalize_before=False):
98
+ super().__init__()
99
+ self.self_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout)
100
+ self.linear1 = nn.Linear(d_model, dim_feedforward)
101
+ self.dropout = nn.Dropout(dropout)
102
+ self.linear2 = nn.Linear(dim_feedforward, d_model)
103
+ self.norm1 = nn.LayerNorm(d_model)
104
+ self.norm2 = nn.LayerNorm(d_model)
105
+ self.dropout1 = nn.Dropout(dropout)
106
+ self.dropout2 = nn.Dropout(dropout)
107
+ self.activation = activation
108
+ self.normalize_before = normalize_before
109
+ def with_pos_embed(self, tensor, pos: Optional[Tensor]):
110
+ return tensor if pos is None else tensor + pos
111
+ def forward(self, src, src_mask: Optional[Tensor] = None, src_key_padding_mask: Optional[Tensor] = None, pos: Optional[Tensor] = None):
112
+ q = k = self.with_pos_embed(src, pos)
113
+ src2 = self.self_attn(q, k, value=src, attn_mask=src_mask, key_padding_mask=src_key_padding_mask)[0]
114
+ src = src + self.dropout1(src2)
115
+ src = self.norm1(src)
116
+ src2 = self.linear2(self.dropout(self.activation(self.linear1(src))))
117
+ src = src + self.dropout2(src2)
118
+ src = self.norm2(src)
119
+ return src
120
+
121
+ class TransformerEncoder(nn.Module):
122
+ def __init__(self, encoder_layer, num_layers, norm=None):
123
+ super().__init__()
124
+ self.layers = _get_clones(encoder_layer, num_layers)
125
+ self.num_layers = num_layers
126
+ self.norm = norm
127
+ def forward(self, src, mask: Optional[Tensor] = None, src_key_padding_mask: Optional[Tensor] = None, pos: Optional[Tensor] = None):
128
+ output = src
129
+ for layer in self.layers:
130
+ output = layer(output, src_mask=mask, src_key_padding_mask=src_key_padding_mask, pos=pos)
131
+ if self.norm is not None:
132
+ output = self.norm(output)
133
+ return output
134
+
135
+ class TransformerDecoderLayer(nn.Module):
136
+ def __init__(self, d_model, nhead, dim_feedforward=2048, dropout=0.1, activation=nn.ReLU(), normalize_before=False):
137
+ super().__init__()
138
+ self.self_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout)
139
+ self.multihead_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout)
140
+ self.linear1 = nn.Linear(d_model, dim_feedforward)
141
+ self.dropout = nn.Dropout(dropout)
142
+ self.linear2 = nn.Linear(dim_feedforward, d_model)
143
+ self.norm1 = nn.LayerNorm(d_model)
144
+ self.norm2 = nn.LayerNorm(d_model)
145
+ self.norm3 = nn.LayerNorm(d_model)
146
+ self.dropout1 = nn.Dropout(dropout)
147
+ self.dropout2 = nn.Dropout(dropout)
148
+ self.dropout3 = nn.Dropout(dropout)
149
+ self.activation = activation
150
+ self.normalize_before = normalize_before
151
+ def with_pos_embed(self, tensor, pos: Optional[Tensor]):
152
+ return tensor if pos is None else tensor + pos
153
+ def forward(self, tgt, memory, tgt_mask: Optional[Tensor] = None, memory_mask: Optional[Tensor] = None, tgt_key_padding_mask: Optional[Tensor] = None, memory_key_padding_mask: Optional[Tensor] = None, pos: Optional[Tensor] = None, query_pos: Optional[Tensor] = None):
154
+ q = k = self.with_pos_embed(tgt, query_pos)
155
+ tgt2 = self.self_attn(q, k, value=tgt, attn_mask=tgt_mask, key_padding_mask=tgt_key_padding_mask)[0]
156
+ tgt = tgt + self.dropout1(tgt2)
157
+ tgt = self.norm1(tgt)
158
+ tgt2 = self.multihead_attn(query=self.with_pos_embed(tgt, query_pos), key=self.with_pos_embed(memory, pos), value=memory, attn_mask=memory_mask, key_padding_mask=memory_key_padding_mask)[0]
159
+ tgt = tgt + self.dropout2(tgt2)
160
+ tgt = self.norm2(tgt)
161
+ tgt2 = self.linear2(self.dropout(self.activation(self.linear1(tgt))))
162
+ tgt = tgt + self.dropout3(tgt2)
163
+ tgt = self.norm3(tgt)
164
+ return tgt
165
+
166
+ class TransformerDecoder(nn.Module):
167
+ def __init__(self, decoder_layer, num_layers, norm=None, return_intermediate=False):
168
+ super().__init__()
169
+ self.layers = _get_clones(decoder_layer, num_layers)
170
+ self.num_layers = num_layers
171
+ self.norm = norm
172
+ self.return_intermediate = return_intermediate
173
+ def forward(self, tgt, memory, tgt_mask: Optional[Tensor] = None, memory_mask: Optional[Tensor] = None, tgt_key_padding_mask: Optional[Tensor] = None, memory_key_padding_mask: Optional[Tensor] = None, pos: Optional[Tensor] = None, query_pos: Optional[Tensor] = None):
174
+ output = tgt
175
+ for layer in self.layers:
176
+ output = layer(output, memory, tgt_mask=tgt_mask, memory_mask=memory_mask, tgt_key_padding_mask=tgt_key_padding_mask, memory_key_padding_mask=memory_key_padding_mask, pos=pos, query_pos=query_pos)
177
+ if self.norm is not None:
178
+ output = self.norm(output)
179
+ return output.unsqueeze(0)
180
+
181
+ class GRUWaypointsPredictor(nn.Module):
182
+ def __init__(self, input_dim, waypoints=10):
183
+ super().__init__()
184
+ self.gru = torch.nn.GRU(input_size=input_dim, hidden_size=64, batch_first=True)
185
+ self.encoder = nn.Linear(2, 64)
186
+ self.decoder = nn.Linear(64, 2)
187
+ self.waypoints = waypoints
188
+ def forward(self, x, target_point):
189
+ bs = x.shape[0]
190
+ z = self.encoder(target_point).unsqueeze(0)
191
+ output, _ = self.gru(x, z)
192
+ output = output.reshape(bs * self.waypoints, -1)
193
+ output = self.decoder(output).reshape(bs, self.waypoints, 2)
194
+ output = torch.cumsum(output, 1)
195
+ return output
196
+
197
+ # ... (Add other dependency classes like SpatialSoftmax, MultiPath_Generator, etc. if needed by other configs)
198
+
199
+ # --- The ORIGINAL Interfuser Model Class ---
200
+ class Interfuser(nn.Module):
201
+ def __init__(self, img_size=224, multi_view_img_size=112, patch_size=8, in_chans=3, embed_dim=768, enc_depth=6, dec_depth=6, dim_feedforward=2048, normalize_before=False, rgb_backbone_name="r26", lidar_backbone_name="r26", num_heads=8, norm_layer=None, dropout=0.1, end2end=False, direct_concat=True, separate_view_attention=False, separate_all_attention=False, act_layer=None, weight_init="", freeze_num=-1, with_lidar=False, with_right_left_sensors=True, with_center_sensor=False, traffic_pred_head_type="det", waypoints_pred_head="heatmap", reverse_pos=True, use_different_backbone=False, use_view_embed=True, use_mmad_pretrain=None):
202
+ super().__init__()
203
+ self.num_features = self.embed_dim = embed_dim
204
+ norm_layer = norm_layer or partial(nn.LayerNorm, eps=1e-6)
205
+ act_layer = act_layer or nn.GELU
206
+
207
+ self.waypoints_pred_head = waypoints_pred_head
208
+ self.with_lidar = with_lidar
209
+ self.with_right_left_sensors = with_right_left_sensors
210
+ self.attn_mask = None # Simplified
211
+
212
+ if use_different_backbone:
213
+ if rgb_backbone_name == "r50": self.rgb_backbone = resnet50d(pretrained=False, in_chans=3, features_only=True, out_indices=[4])
214
+ if rgb_backbone_name == "r26": self.rgb_backbone = resnet26d(pretrained=False, in_chans=3, features_only=True, out_indices=[4])
215
+ if lidar_backbone_name == "r18": self.lidar_backbone = resnet18d(pretrained=False, in_chans=3, features_only=True, out_indices=[4])
216
+
217
+ rgb_embed_layer = partial(HybridEmbed, backbone=self.rgb_backbone)
218
+ lidar_embed_layer = partial(HybridEmbed, backbone=self.lidar_backbone)
219
+ self.rgb_patch_embed = rgb_embed_layer(img_size=img_size, patch_size=patch_size, in_chans=in_chans, embed_dim=embed_dim)
220
+ self.lidar_patch_embed = lidar_embed_layer(img_size=img_size, patch_size=patch_size, in_chans=3, embed_dim=embed_dim)
221
+ else: raise NotImplementedError("Only use_different_backbone=True supported in this wrapper")
222
+
223
+ self.global_embed = nn.Parameter(torch.zeros(1, embed_dim, 5))
224
+ self.view_embed = nn.Parameter(torch.zeros(1, embed_dim, 5, 1))
225
+ self.query_pos_embed = nn.Parameter(torch.zeros(1, embed_dim, 11))
226
+ self.query_embed = nn.Parameter(torch.zeros(400 + 11, 1, embed_dim))
227
+
228
+ if self.waypoints_pred_head == "gru": self.waypoints_generator = GRUWaypointsPredictor(embed_dim)
229
+ else: raise NotImplementedError("Only GRU waypoints head supported in this wrapper")
230
+
231
+ self.junction_pred_head = nn.Linear(embed_dim, 2)
232
+ self.traffic_light_pred_head = nn.Linear(embed_dim, 2)
233
+ self.stop_sign_head = nn.Linear(embed_dim, 2)
234
+ self.traffic_pred_head = nn.Sequential(*[nn.Linear(embed_dim + 32, 64), nn.ReLU(), nn.Linear(64, 7), nn.Sigmoid()])
235
+ self.position_encoding = PositionEmbeddingSine(embed_dim // 2, normalize=True)
236
+
237
+ encoder_layer = TransformerEncoderLayer(embed_dim, num_heads, dim_feedforward, dropout, act_layer, normalize_before)
238
+ self.encoder = TransformerEncoder(encoder_layer, enc_depth, None)
239
+ decoder_layer = TransformerDecoderLayer(embed_dim, num_heads, dim_feedforward, dropout, act_layer, normalize_before)
240
+ decoder_norm = nn.LayerNorm(embed_dim)
241
+ self.decoder = TransformerDecoder(decoder_layer, dec_depth, decoder_norm, return_intermediate=False)
242
+
243
+ def forward_features(self, front_image, left_image, right_image, front_center_image, lidar, measurements):
244
+ features = []
245
+ front_image_token, front_image_token_global = self.rgb_patch_embed(front_image)
246
+ front_image_token = (front_image_token + self.position_encoding(front_image_token))
247
+ front_image_token = front_image_token.flatten(2).permute(2, 0, 1)
248
+ front_image_token_global = (front_image_token_global + self.global_embed[:, :, 0:1])
249
+ front_image_token_global = front_image_token_global.permute(2, 0, 1)
250
+ features.extend([front_image_token, front_image_token_global])
251
+ left_image_token, left_image_token_global = self.rgb_patch_embed(left_image)
252
+ left_image_token = (left_image_token + self.position_encoding(left_image_token)).flatten(2).permute(2, 0, 1)
253
+ left_image_token_global = (left_image_token_global + self.global_embed[:, :, 1:2]).permute(2, 0, 1)
254
+ right_image_token, right_image_token_global = self.rgb_patch_embed(right_image)
255
+ right_image_token = (right_image_token + self.position_encoding(right_image_token)).flatten(2).permute(2, 0, 1)
256
+ right_image_token_global = (right_image_token_global + self.global_embed[:, :, 2:3]).permute(2, 0, 1)
257
+ features.extend([left_image_token, left_image_token_global, right_image_token, right_image_token_global])
258
+ return torch.cat(features, 0)
259
+
260
+ def forward(self, x):
261
+ front_image, left_image, right_image = x["rgb"], x["rgb_left"], x["rgb_right"]
262
+ measurements, target_point = x["measurements"], x["target_point"]
263
+ features = self.forward_features(front_image, left_image, right_image, x["rgb_center"], x["lidar"], measurements)
264
+ bs = front_image.shape[0]
265
+ tgt = self.position_encoding(torch.ones((bs, 1, 20, 20), device=x["rgb"].device)).flatten(2)
266
+ tgt = torch.cat([tgt, self.query_pos_embed.repeat(bs, 1, 1)], 2).permute(2, 0, 1)
267
+ memory = self.encoder(features, mask=self.attn_mask)
268
+ hs = self.decoder(self.query_embed.repeat(1, bs, 1), memory, query_pos=tgt)[0].permute(1, 0, 2)
269
+ traffic_feature = hs[:, :400]
270
+ waypoints_feature = hs[:, 401:411]
271
+ is_junction_feature = hs[:, 400]
272
+ traffic_light_state_feature, stop_sign_feature = hs[:, 400], hs[:, 400]
273
+ waypoints = self.waypoints_generator(waypoints_feature, target_point)
274
+ is_junction = self.junction_pred_head(is_junction_feature)
275
+ traffic_light_state = self.traffic_light_pred_head(traffic_light_state_feature)
276
+ stop_sign = self.stop_sign_head(stop_sign_feature)
277
+ velocity = measurements[:, 6:7].unsqueeze(-1).repeat(1, 400, 32)
278
+ traffic_feature_with_vel = torch.cat([traffic_feature, velocity], dim=2)
279
+ traffic = self.traffic_pred_head(traffic_feature_with_vel)
280
+ return traffic, waypoints, is_junction, traffic_light_state, stop_sign, traffic_feature
281
+
282
+ # ==============================================================================
283
+ # SECTION 2: HUGGING FACE WRAPPER CLASSES
284
+ # ==============================================================================
285
+ # ==============================================================================
286
+ # أضف هذا الكود في نهاية خلية تعريف النموذج الأصلي
287
+ # ==============================================================================
288
+
289
+ print("
290
+ --- Defining Hugging Face compatible wrapper classes ---")
291
+
292
+
293
+ # --- 2. فئة النموذج المتوافقة (HF-Compatible Model Class) ---
294
+ class InterfuserConfig(PretrainedConfig):
295
+
296
+ model_type = "interfuser"
297
+
298
+ def __init__(
299
+ self,
300
+ embed_dim=256,
301
+ enc_depth=6,
302
+ dec_depth=6,
303
+ num_heads=8,
304
+ dim_feedforward=2048,
305
+ rgb_backbone_name="r50",
306
+ lidar_backbone_name="r18",
307
+ waypoints_pred_head="gru",
308
+ use_different_backbone=True,
309
+ **kwargs
310
+ ):
311
+ super().__init__(**kwargs)
312
+ self.embed_dim = embed_dim
313
+ self.enc_depth = enc_depth
314
+ self.dec_depth = dec_depth
315
+ self.num_heads = num_heads
316
+ self.dim_feedforward = dim_feedforward
317
+ self.rgb_backbone_name = rgb_backbone_name
318
+ self.lidar_backbone_name = lidar_backbone_name
319
+ self.waypoints_pred_head = waypoints_pred_head
320
+ self.use_different_backbone = use_different_backbone
321
+ # Add the architectures key for auto-mapping
322
+ self.architectures = ["InterfuserForHuggingFace"]
323
+
324
+
325
+ # --- 2. فئة النموذج المتوافقة (HF-Compatible Model Class) ---
326
+ # هذه هي النسخة الجديدة من نموذجك التي ترث من PreTrainedModel
327
+ class InterfuserForHuggingFace(PreTrainedModel):
328
+
329
+ config_class = InterfuserConfig # Link to the config class
330
+
331
+ def __init__(self, config: InterfuserConfig):
332
+ super().__init__(config)
333
+ self.config = config
334
+
335
+ # We instantiate the original Interfuser model inside our wrapper
336
+ # The parameters are taken from our config object.
337
+ # This requires the original 'Interfuser' class to be defined in the notebook.
338
+ self.interfuser_model = Interfuser(
339
+ embed_dim=self.config.embed_dim,
340
+ enc_depth=self.config.enc_depth,
341
+ dec_depth=self.config.dec_depth,
342
+ num_heads=self.config.num_heads,
343
+ dim_feedforward=self.config.dim_feedforward,
344
+ rgb_backbone_name=self.config.rgb_backbone_name,
345
+ lidar_backbone_name=self.config.lidar_backbone_name,
346
+ waypoints_pred_head=self.config.waypoints_pred_head,
347
+ use_different_backbone=self.config.use_different_backbone
348
+ )
349
+
350
+ def forward(self, rgb, rgb_left, rgb_right, rgb_center, lidar, measurements, target_point, **kwargs):
351
+
352
+ # The original model expects a dictionary, so we create one.
353
+ inputs_dict = {
354
+ 'rgb': rgb,
355
+ 'rgb_left': rgb_left,
356
+ 'rgb_right': rgb_right,
357
+ 'rgb_center': rgb_center,
358
+ 'lidar': lidar,
359
+ 'measurements': measurements,
360
+ 'target_point': target_point
361
+ }
362
+
363
+ # Call the forward method of the original model
364
+ # The output is already a tuple, which is what HF expects.
365
+ return self.interfuser_model.forward(inputs_dict)
366
+
367
+ # --- رسالة تأكيد ---
368
+ print("✅ Hugging Face wrapper classes (InterfuserConfig, InterfuserForHuggingFace) are now defined.")
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5ef39ec4197f3e9ed8b659709b331a2cd63c30b6019c61eb155eb438135e3dd7
3
- size 212334062
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7900840868237b4916efd78f996eb338480b79985dc09a29e574d1e9b130da29
3
+ size 212282626