para-lost commited on
Commit
37b1444
·
1 Parent(s): b122ff8

chaneg name

Browse files
Files changed (1) hide show
  1. pipeline.py +4 -14
pipeline.py CHANGED
@@ -21,7 +21,7 @@ import random
21
  import cv2
22
  import numpy as np
23
  from torchvision import transforms
24
- from torchvision.transforms import functional as F
25
  from torchvision.transforms import InterpolationMode
26
 
27
  from dataclasses import dataclass
@@ -6059,18 +6059,8 @@ class MaxLongEdgeMinShortEdgeResize(torch.nn.Module):
6059
  if max(new_width, new_height) > self.max_size:
6060
  scale = self.max_size / max(new_width, new_height)
6061
  new_width, new_height = self._apply_scale(new_width, new_height, scale)
6062
-
6063
- # if img.dim() == 3:
6064
- # img = img.unsqueeze(0) # → [1,C,H,W]
6065
- resized = F.interpolate(
6066
- img,
6067
- size=(new_height, new_width),
6068
- mode=self.interpolation, # e.g. "bilinear"
6069
- antialias=self.antialias, # if you need anti‑aliasing
6070
- )
6071
- # resized = resized.squeeze(0) # → [C,H,W]
6072
- return resized
6073
- # return F.resize(img, (new_height, new_width), self.interpolation, antialias=self.antialias)
6074
 
6075
 
6076
  class ImageTransform:
@@ -6829,7 +6819,7 @@ class InterleaveInferencer:
6829
  past_key_values = gen_context['past_key_values']
6830
  kv_lens = gen_context['kv_lens']
6831
  ropes = gen_context['ropes']
6832
- device = next(self.model.parameters()).device
6833
  generation_input = self.model.prepare_start_tokens(kv_lens, ropes, self.new_token_ids)
6834
  generation_input = self._to_device(generation_input, device)
6835
  unpacked_latent = self.model.generate_text(
 
21
  import cv2
22
  import numpy as np
23
  from torchvision import transforms
24
+ from torchvision.transforms import functional as TF
25
  from torchvision.transforms import InterpolationMode
26
 
27
  from dataclasses import dataclass
 
6059
  if max(new_width, new_height) > self.max_size:
6060
  scale = self.max_size / max(new_width, new_height)
6061
  new_width, new_height = self._apply_scale(new_width, new_height, scale)
6062
+
6063
+ return TF.resize(img, (new_height, new_width), self.interpolation, antialias=self.antialias)
 
 
 
 
 
 
 
 
 
 
6064
 
6065
 
6066
  class ImageTransform:
 
6819
  past_key_values = gen_context['past_key_values']
6820
  kv_lens = gen_context['kv_lens']
6821
  ropes = gen_context['ropes']
6822
+
6823
  generation_input = self.model.prepare_start_tokens(kv_lens, ropes, self.new_token_ids)
6824
  generation_input = self._to_device(generation_input, device)
6825
  unpacked_latent = self.model.generate_text(