From f2a7c1333fa4aba81e1ce1c8738ac1cb4bb67c53 Mon Sep 17 00:00:00 2001 From: qirui0667 <sunqr0667@126.com> Date: Thu, 7 Mar 2024 16:16:04 +0800 Subject: [PATCH 1/6] add attn mask operation for ip-adapter --- scripts/controlmodel_ipadapter.py | 32 ++++++++++++++++++++- scripts/controlnet.py | 46 ++++++++++++++++++++++--------- scripts/hook.py | 4 +++ 3 files changed, 68 insertions(+), 14 deletions(-) diff --git a/scripts/controlmodel_ipadapter.py b/scripts/controlmodel_ipadapter.py index 3359e87..048f0b4 100644 --- a/scripts/controlmodel_ipadapter.py +++ b/scripts/controlmodel_ipadapter.py @@ -8,6 +8,8 @@ import numpy as np from transformers.models.clip.modeling_clip import CLIPVisionModelOutput from scripts.logging import logger +# jiahui's modify +import torch.nn.functional as F class ImageEmbed(NamedTuple): """Image embed for a single image.""" @@ -607,9 +609,14 @@ class PlugableIPAdapter(torch.nn.Module): # jiahui's modify # 1.init + # for iplora IP_mode = kwargs.pop('IP_mode') multi_references_dir = kwargs.pop('multi_references_dir') ip_layer_wise = kwargs.pop('ip_layer_wise') + # for instantid-multi_id + self.ip_mask = kwargs.pop('ip_mask') + self.h = kwargs.pop('height') + self.w = kwargs.pop('width') # 2.image proj process if preprocessor_outputs is not None: if isinstance(preprocessor_outputs, (list, tuple)): @@ -724,5 +731,28 @@ class PlugableIPAdapter(torch.nn.Module): ip_out = torch.nn.functional.scaled_dot_product_attention(q, ip_k, ip_v, attn_mask=None, dropout_p=0.0, is_causal=False) ip_out = ip_out.transpose(1, 2).reshape(batch_size, -1, h * head_dim) - return ip_out * self.weight * layer_wise_weight # jiahui's modify + # jiahui's modify for instatntid-multi id + qs = ip_out.shape[1] + if self.ip_mask is not None: + for rate in [1, 2, 4, 8, 16, 32]: + mask_h = -(-self.h//rate) # fancy ceil + mask_w = -(-self.w//rate) + if mask_h*mask_w == qs: + break + assert mask_h*mask_w == qs, logger.error("mask and latent output are not equal in size") + + mask = torch.from_numpy(np.array(self.ip_mask).astype(np.float32)/ 255.0).clone().unsqueeze(0).to(ip_out.device, dtype=ip_out.dtype) + # mask_downsample = F.interpolate(mask.unsqueeze(1), size=(mask_h, mask_w), mode="bicubic").squeeze(1) + mask = mask.permute(0, 3, 1, 2)[:, 0, :, :] # B , H, W + mask_downsample = F.interpolate(mask.unsqueeze(1), size=(mask_h, mask_w), mode="bicubic").squeeze(1) + mask_downsample = 1.0 - mask_downsample + + mask_downsample = mask_downsample.repeat(batch_size, 1, 1, 1) + mask_downsample = mask_downsample.view(mask_downsample.shape[0], -1, 1).repeat(1, 1, ip_out.shape[2]) + return ip_out * mask_downsample * self.weight * layer_wise_weight + else: + return ip_out * self.weight * layer_wise_weight + # return ip_out * self.weight * layer_wise_weight # jiahui's modify: for iplora + # +++++++++++++++++++++ + return forward diff --git a/scripts/controlnet.py b/scripts/controlnet.py index d38fdc6..90262a6 100644 --- a/scripts/controlnet.py +++ b/scripts/controlnet.py @@ -982,19 +982,32 @@ class Script(scripts.Script, metaclass=( return control, hr_control # jiahui's modify - # if unit.image is not None: - # controls, hr_controls = list(zip(*[preprocess_input_image(img) for img in input_images])) - # if len(controls) == len(hr_controls) == 1: - # control = controls[0] - # hr_control = hr_controls[0] - # else: - # control = controls - # hr_control = hr_controls - if unit.multi_references_dir != '': + if unit.multi_references_dir != '': # for ip lora assert os.path.exists(unit.multi_references_dir) or os.path.exists(os.path.join('models/iplora',unit.multi_references_dir)), ValueError(f"cache file is not exists ==> {unit.multi_references_dir}") control = None hr_control = None else: + # jiahui's modify —— instantid multi id + if len(input_images)==1 and not np.all(unit.image['mask']==0) and control_model_type in [ControlModelType.IPAdapter, ControlModelType.InstantID]: + input_images[0] = cv2.bitwise_and(unit.image['image'], unit.image['mask']) + unit.module = 'instant_id_face_embedding' if control_model_type==ControlModelType.IPAdapter else 'instant_id_face_keypoints' + if resize_mode==external_code.ResizeMode.INNER_FIT: # crop&resize + mask_h, mask_w, _ = unit.image['mask'].shape + if mask_h < mask_w: + mask_new_h = h + mask_new_w = h/mask_h * mask_w + else: + mask_new_w = w + mask_new_h = w/mask_w * mask_h + w_start, h_start = int(abs(mask_new_w-w)//2), int(abs(mask_new_h-h)//2) + mask = cv2.resize(unit.image['mask'], (int(mask_new_w), int(mask_new_h)))[w_start:w_start+w, h_start:h_start+h] + elif resize_mode==external_code.ResizeMode.RESIZE: # just resize + mask = cv2.resize(unit.image['mask'], (w, h)) + else: + logger.error(f"This resize mode >>{resize_mode}<< is not support ") + return + # ++++++++++++++++++++++++++++++++++++++++ + controls, hr_controls = list(zip(*[preprocess_input_image(img) for img in input_images])) if len(controls) == len(hr_controls) == 1: control = controls[0] @@ -1002,9 +1015,6 @@ class Script(scripts.Script, metaclass=( else: control = controls hr_control = hr_controls - # else: - # logger.error("Image and Cache fiel path can not be empty at the same time") - # return # ++++++++++++++++++++ preprocessor_dict = dict( @@ -1036,9 +1046,14 @@ class Script(scripts.Script, metaclass=( soft_injection=control_mode != external_code.ControlMode.BALANCED, cfg_injection=control_mode == external_code.ControlMode.CONTROL, # jiahui's modify + # for ip lora IP_mode = unit.IP_mode, multi_references_dir = unit.multi_references_dir, - ip_layer_wise = unit.ip_layer_wise + ip_layer_wise = unit.ip_layer_wise, + # for instantid + ip_mask = None if unit.image is None # when use iplora, image will be uploaded + or (np.all(mask==0) and unit.module=='instant_id_face_embedding') # when use instantid uploaded image,but doesn't use mask + else mask, # ++++++++++++++ ) forward_params.append(forward_param) @@ -1131,9 +1146,14 @@ class Script(scripts.Script, metaclass=( start=param.start_guidance_percent, end=param.stop_guidance_percent, # jiahui's modify + # for ip_lora IP_mode=param.IP_mode, multi_references_dir=param.multi_references_dir, ip_layer_wise = param.ip_layer_wise, + # for instantid multi id + ip_mask = param.ip_mask, + height = (p.height // 8) * 8, + width = (p.width // 8) *8, # +++++++++++++++ ) if param.control_model_type == ControlModelType.Controlllite: diff --git a/scripts/hook.py b/scripts/hook.py index 20c58d6..2c48d57 100644 --- a/scripts/hook.py +++ b/scripts/hook.py @@ -173,9 +173,12 @@ class ControlParams: hr_option: HiResFixOption = HiResFixOption.BOTH, control_context_override: Optional[Any] = None, # jiahui's modify + # for ip lora multi_references_dir='', IP_mode='Full', ip_layer_wise='', + # for instantid-multi id + ip_mask=None, # ++++++++++++++++ **kwargs # To avoid errors ): @@ -202,6 +205,7 @@ class ControlParams: self.multi_references_dir=multi_references_dir self.IP_mode=IP_mode self.ip_layer_wise=ip_layer_wise + self.ip_mask = ip_mask # ++++++++++++++++ @property -- GitLab From 3d7635ee044bc8df6f548fc66841a78c105d8215 Mon Sep 17 00:00:00 2001 From: qirui0667 <sunqr0667@126.com> Date: Thu, 7 Mar 2024 23:52:44 +0800 Subject: [PATCH 2/6] add attn mask operation for controlnet --- scripts/controlmodel_ipadapter.py | 12 +++++++----- scripts/controlnet.py | 2 +- scripts/hook.py | 23 +++++++++++++++++++++++ 3 files changed, 31 insertions(+), 6 deletions(-) diff --git a/scripts/controlmodel_ipadapter.py b/scripts/controlmodel_ipadapter.py index 048f0b4..2155baa 100644 --- a/scripts/controlmodel_ipadapter.py +++ b/scripts/controlmodel_ipadapter.py @@ -732,11 +732,12 @@ class PlugableIPAdapter(torch.nn.Module): ip_out = ip_out.transpose(1, 2).reshape(batch_size, -1, h * head_dim) # jiahui's modify for instatntid-multi id + import cv2 qs = ip_out.shape[1] if self.ip_mask is not None: - for rate in [1, 2, 4, 8, 16, 32]: - mask_h = -(-self.h//rate) # fancy ceil - mask_w = -(-self.w//rate) + for rate in [8, 16, 32]: + mask_h = self.h//rate # fancy ceil + mask_w = self.w//rate if mask_h*mask_w == qs: break assert mask_h*mask_w == qs, logger.error("mask and latent output are not equal in size") @@ -745,14 +746,15 @@ class PlugableIPAdapter(torch.nn.Module): # mask_downsample = F.interpolate(mask.unsqueeze(1), size=(mask_h, mask_w), mode="bicubic").squeeze(1) mask = mask.permute(0, 3, 1, 2)[:, 0, :, :] # B , H, W mask_downsample = F.interpolate(mask.unsqueeze(1), size=(mask_h, mask_w), mode="bicubic").squeeze(1) - mask_downsample = 1.0 - mask_downsample + # mask_downsample = 1.0 - mask_downsample mask_downsample = mask_downsample.repeat(batch_size, 1, 1, 1) mask_downsample = mask_downsample.view(mask_downsample.shape[0], -1, 1).repeat(1, 1, ip_out.shape[2]) return ip_out * mask_downsample * self.weight * layer_wise_weight else: return ip_out * self.weight * layer_wise_weight - # return ip_out * self.weight * layer_wise_weight # jiahui's modify: for iplora # +++++++++++++++++++++ + # return ip_out * self.weight * layer_wise_weight # jiahui's modify: for iplora + return forward diff --git a/scripts/controlnet.py b/scripts/controlnet.py index 90262a6..377271b 100644 --- a/scripts/controlnet.py +++ b/scripts/controlnet.py @@ -1051,7 +1051,7 @@ class Script(scripts.Script, metaclass=( multi_references_dir = unit.multi_references_dir, ip_layer_wise = unit.ip_layer_wise, # for instantid - ip_mask = None if unit.image is None # when use iplora, image will be uploaded + ip_mask = None if unit.image is None # when use iplora, image will be not uploaded or (np.all(mask==0) and unit.module=='instant_id_face_embedding') # when use instantid uploaded image,but doesn't use mask else mask, # ++++++++++++++ diff --git a/scripts/hook.py b/scripts/hook.py index 2c48d57..6641a2e 100644 --- a/scripts/hook.py +++ b/scripts/hook.py @@ -600,6 +600,29 @@ class UnetHook(nn.Module): y=y ) + # jiahui's modify —— for Instantid multiid + import torch.nn.functional as F + if param.ip_mask is not None: + control_update = [] + for control_ in control: + b, c, w, h = control_.shape + for rate in [8, 16, 32]: + mask_h = process.height//rate + mask_w = process.width//rate + if mask_h == h and mask_w==w: + break + assert mask_h==h and mask_w==w, logger.error("mask and control output are not equal in size") + # shape: h, w, 3 + mask = torch.from_numpy(param.ip_mask.astype(np.float32)/ 255.0).clone().unsqueeze(0).to(control_.device, dtype=control_.dtype) + # mask_downsample = F.interpolate(mask.unsqueeze(1), size=(mask_h, mask_w), mode="bicubic").squeeze(1) + mask = mask.permute(0, 3, 1, 2)[:, 0, :, :] # B , H, W + mask_downsample = F.interpolate(mask.unsqueeze(1), size=(mask_h, mask_w), mode="bicubic").squeeze(1) + + mask_downsample = mask_downsample.repeat(b, 1, 1, 1) + control_update.append(control_ * mask_downsample) + control = control_update + # ++++++++++++++++++++++++++++ + if is_sdxl: control_scales = [param.weight] * 10 else: -- GitLab From eabaae0fabed9202e26dab0601693262bffc2555 Mon Sep 17 00:00:00 2001 From: qirui0667 <sunqr0667@126.com> Date: Fri, 8 Mar 2024 14:57:09 +0800 Subject: [PATCH 3/6] support mask control for api --- scripts/controlnet.py | 58 +++++++++++++++++++++++++++---------------- 1 file changed, 37 insertions(+), 21 deletions(-) diff --git a/scripts/controlnet.py b/scripts/controlnet.py index 377271b..aa4dbb8 100644 --- a/scripts/controlnet.py +++ b/scripts/controlnet.py @@ -651,19 +651,30 @@ class Script(scripts.Script, metaclass=( color = HWC3(image['image']) alpha = image['mask'][:, :, 0:1] input_image = np.concatenate([color, alpha], axis=2) - elif ( - not shared.opts.data.get("controlnet_ignore_noninpaint_mask", False) and - # There is wield gradio issue that would produce mask that is - # not pure color when no scribble is made on canvas. - # See https://github.com/Mikubill/sd-webui-controlnet/issues/1638. - not ( - (image['mask'][:, :, 0] <= 5).all() or - (image['mask'][:, :, 0] >= 250).all() - ) - ): - logger.info("using mask as input") - input_image = HWC3(image['mask'][:, :, 0]) - unit.module = 'none' # Always use black bg and white line + else: # jiahui's modify for feature map mask openration + image['image'] = HWC3(decode_image(image['image'])) + image['mask'] = HWC3(decode_image(image['mask'])) + input_image = image + + + # jiahui's modify + # elif ( + # not shared.opts.data.get("controlnet_ignore_noninpaint_mask", False) and + # # There is wield gradio issue that would produce mask that is + # # not pure color when no scribble is made on canvas. + # # See https://github.com/Mikubill/sd-webui-controlnet/issues/1638. + # not ( + # (image['mask'][:, :, 0] <= 5).all() or + # (image['mask'][:, :, 0] >= 250).all() + # ) + # ): + # logger.info("using mask as input") + # input_image = HWC3(image['mask'][:, :, 0]) + # unit.module = 'none' # Always use black bg and white line + + # ++++++++ + + elif a1111_image is not None: input_image = HWC3(np.asarray(a1111_image)) a1111_i2i_resize_mode = getattr(p, "resize_mode", None) @@ -689,7 +700,7 @@ class Script(scripts.Script, metaclass=( shared.state.interrupted = True raise ValueError("controlnet is enabled but no input image is given") - assert isinstance(input_image, (np.ndarray, list)) + assert isinstance(input_image, (np.ndarray, list, dict)) return input_image, resize_mode @staticmethod @@ -917,6 +928,8 @@ class Script(scripts.Script, metaclass=( if isinstance(input_image, list): assert unit.accepts_multiple_inputs() input_images = input_image + elif isinstance(input_image, dict): # jiahui's modify ——instantid for control mask + input_images = [input_image] else: # Following operations are only for single input image. input_image = Script.try_crop_image_with_a1111_mask(p, unit, input_image, resize_mode) input_image = np.ascontiguousarray(input_image.copy()).copy() # safe numpy @@ -982,17 +995,19 @@ class Script(scripts.Script, metaclass=( return control, hr_control # jiahui's modify + mask = None if unit.multi_references_dir != '': # for ip lora assert os.path.exists(unit.multi_references_dir) or os.path.exists(os.path.join('models/iplora',unit.multi_references_dir)), ValueError(f"cache file is not exists ==> {unit.multi_references_dir}") control = None hr_control = None else: # jiahui's modify —— instantid multi id - if len(input_images)==1 and not np.all(unit.image['mask']==0) and control_model_type in [ControlModelType.IPAdapter, ControlModelType.InstantID]: - input_images[0] = cv2.bitwise_and(unit.image['image'], unit.image['mask']) - unit.module = 'instant_id_face_embedding' if control_model_type==ControlModelType.IPAdapter else 'instant_id_face_keypoints' + if len(input_images)==1 and not np.all(input_images[0]['mask']==0) and control_model_type in [ControlModelType.IPAdapter, ControlModelType.InstantID]: + mask = input_images[0]['mask'] + input_images[0] = cv2.bitwise_and(input_images[0]['image'], input_images[0]['mask']) + # unit.module = 'instant_id_face_embedding' if control_model_type==ControlModelType.IPAdapter else 'instant_id_face_keypoints' if resize_mode==external_code.ResizeMode.INNER_FIT: # crop&resize - mask_h, mask_w, _ = unit.image['mask'].shape + mask_h, mask_w, _ = mask.shape if mask_h < mask_w: mask_new_h = h mask_new_w = h/mask_h * mask_w @@ -1000,9 +1015,9 @@ class Script(scripts.Script, metaclass=( mask_new_w = w mask_new_h = w/mask_w * mask_h w_start, h_start = int(abs(mask_new_w-w)//2), int(abs(mask_new_h-h)//2) - mask = cv2.resize(unit.image['mask'], (int(mask_new_w), int(mask_new_h)))[w_start:w_start+w, h_start:h_start+h] + mask = cv2.resize(mask, (int(mask_new_w), int(mask_new_h)))[w_start:w_start+w, h_start:h_start+h] elif resize_mode==external_code.ResizeMode.RESIZE: # just resize - mask = cv2.resize(unit.image['mask'], (w, h)) + mask = cv2.resize(mask, (w, h)) else: logger.error(f"This resize mode >>{resize_mode}<< is not support ") return @@ -1051,7 +1066,8 @@ class Script(scripts.Script, metaclass=( multi_references_dir = unit.multi_references_dir, ip_layer_wise = unit.ip_layer_wise, # for instantid - ip_mask = None if unit.image is None # when use iplora, image will be not uploaded + ip_mask = None if unit.image is None # (iplora) use embeds cache + or mask is None # (iplora) use embeds cache or (np.all(mask==0) and unit.module=='instant_id_face_embedding') # when use instantid uploaded image,but doesn't use mask else mask, # ++++++++++++++ -- GitLab From 2f320612e067791b7b1897ae5050d06216d1b65b Mon Sep 17 00:00:00 2001 From: qirui0667 <sunqr0667@126.com> Date: Fri, 8 Mar 2024 18:02:09 +0800 Subject: [PATCH 4/6] fix bug --- scripts/controlmodel_ipadapter.py | 7 +++-- scripts/controlnet.py | 51 ++++++++++++++++--------------- scripts/hook.py | 11 +++++-- 3 files changed, 39 insertions(+), 30 deletions(-) diff --git a/scripts/controlmodel_ipadapter.py b/scripts/controlmodel_ipadapter.py index 2155baa..f0d20aa 100644 --- a/scripts/controlmodel_ipadapter.py +++ b/scripts/controlmodel_ipadapter.py @@ -732,12 +732,13 @@ class PlugableIPAdapter(torch.nn.Module): ip_out = ip_out.transpose(1, 2).reshape(batch_size, -1, h * head_dim) # jiahui's modify for instatntid-multi id - import cv2 + def round_half_up(number): + return math.ceil(number) if number - math.floor(number) >= 0.5 else math.floor(number) qs = ip_out.shape[1] if self.ip_mask is not None: for rate in [8, 16, 32]: - mask_h = self.h//rate # fancy ceil - mask_w = self.w//rate + mask_h = round_half_up(self.h/rate) # fancy ceil + mask_w = round_half_up(self.w/rate) if mask_h*mask_w == qs: break assert mask_h*mask_w == qs, logger.error("mask and latent output are not equal in size") diff --git a/scripts/controlnet.py b/scripts/controlnet.py index aa4dbb8..0f124f0 100644 --- a/scripts/controlnet.py +++ b/scripts/controlnet.py @@ -917,6 +917,7 @@ class Script(scripts.Script, metaclass=( p.controlnet_control_loras.append(control_lora) # if unit.image is not None: # jiahui's modify + mask = None if control_model_type == ControlModelType.IPAdapter \ and issubclass(type(p), StableDiffusionProcessingTxt2Img) \ and unit.multi_references_dir != "" : @@ -928,9 +929,34 @@ class Script(scripts.Script, metaclass=( if isinstance(input_image, list): assert unit.accepts_multiple_inputs() input_images = input_image - elif isinstance(input_image, dict): # jiahui's modify ——instantid for control mask + # jiahui's modify + elif isinstance(input_image, dict)and control_model_type in [ControlModelType.IPAdapter, ControlModelType.InstantID]: # jiahui's modify ——instantid for control mask + mask = input_image['mask'] + input_image = cv2.bitwise_and(input_image['image'], input_image['mask']) + # unit.module = 'instant_id_face_embedding' if control_model_type==ControlModelType.IPAdapter else 'instant_id_face_keypoints' + if resize_mode==external_code.ResizeMode.INNER_FIT: # crop&resize + mask_h, mask_w, _ = mask.shape + if mask_h < mask_w: + mask_new_h = h + mask_new_w = h/mask_h * mask_w + else: + mask_new_w = w + mask_new_h = w/mask_w * mask_h + w_start, h_start = int(abs(mask_new_w-w)//2), int(abs(mask_new_h-h)//2) + mask = cv2.resize(mask, (int(mask_new_w), int(mask_new_h)))[w_start:w_start+w, h_start:h_start+h] + elif resize_mode==external_code.ResizeMode.RESIZE: # just resize + mask = cv2.resize(mask, (w, h)) + else: + logger.error(f"This resize mode >>{resize_mode}<< is not support ") + return input_images = [input_image] + # ++++++++++++++++++++++++++++++ else: # Following operations are only for single input image. + # jiahui's modify: + if isinstance(input_image, dict): + mask = input_image['mask'] if not np.all(input_image['mask']==0) else None + input_image = input_image['image'] + # +++++++++++++++++++++++++ input_image = Script.try_crop_image_with_a1111_mask(p, unit, input_image, resize_mode) input_image = np.ascontiguousarray(input_image.copy()).copy() # safe numpy if unit.module == 'inpaint_only+lama' and resize_mode == external_code.ResizeMode.OUTER_FIT: @@ -995,34 +1021,11 @@ class Script(scripts.Script, metaclass=( return control, hr_control # jiahui's modify - mask = None if unit.multi_references_dir != '': # for ip lora assert os.path.exists(unit.multi_references_dir) or os.path.exists(os.path.join('models/iplora',unit.multi_references_dir)), ValueError(f"cache file is not exists ==> {unit.multi_references_dir}") control = None hr_control = None else: - # jiahui's modify —— instantid multi id - if len(input_images)==1 and not np.all(input_images[0]['mask']==0) and control_model_type in [ControlModelType.IPAdapter, ControlModelType.InstantID]: - mask = input_images[0]['mask'] - input_images[0] = cv2.bitwise_and(input_images[0]['image'], input_images[0]['mask']) - # unit.module = 'instant_id_face_embedding' if control_model_type==ControlModelType.IPAdapter else 'instant_id_face_keypoints' - if resize_mode==external_code.ResizeMode.INNER_FIT: # crop&resize - mask_h, mask_w, _ = mask.shape - if mask_h < mask_w: - mask_new_h = h - mask_new_w = h/mask_h * mask_w - else: - mask_new_w = w - mask_new_h = w/mask_w * mask_h - w_start, h_start = int(abs(mask_new_w-w)//2), int(abs(mask_new_h-h)//2) - mask = cv2.resize(mask, (int(mask_new_w), int(mask_new_h)))[w_start:w_start+w, h_start:h_start+h] - elif resize_mode==external_code.ResizeMode.RESIZE: # just resize - mask = cv2.resize(mask, (w, h)) - else: - logger.error(f"This resize mode >>{resize_mode}<< is not support ") - return - # ++++++++++++++++++++++++++++++++++++++++ - controls, hr_controls = list(zip(*[preprocess_input_image(img) for img in input_images])) if len(controls) == len(hr_controls) == 1: control = controls[0] diff --git a/scripts/hook.py b/scripts/hook.py index 6641a2e..5c790e5 100644 --- a/scripts/hook.py +++ b/scripts/hook.py @@ -602,13 +602,18 @@ class UnetHook(nn.Module): # jiahui's modify —— for Instantid multiid import torch.nn.functional as F + import math if param.ip_mask is not None: + def round_half_up(number): + return math.ceil(number) if number - math.floor(number) >= 0.5 else math.floor(number) control_update = [] for control_ in control: - b, c, w, h = control_.shape + b, c, h, w = control_.shape for rate in [8, 16, 32]: - mask_h = process.height//rate - mask_w = process.width//rate + # mask_h = round((process.height//8*8)/rate) + # mask_w = round((process.width//8*8)//rate) + mask_h = round_half_up((process.height//8*8)/rate) + mask_w = round_half_up((process.width//8*8)//rate) if mask_h == h and mask_w==w: break assert mask_h==h and mask_w==w, logger.error("mask and control output are not equal in size") -- GitLab From eeef366efef64009c41aa373ad8c640f6a65a71b Mon Sep 17 00:00:00 2001 From: qirui0667 <sunqr0667@126.com> Date: Sun, 10 Mar 2024 01:06:27 +0800 Subject: [PATCH 5/6] update --- scripts/controlmodel_ipadapter.py | 6 ++++-- scripts/controlnet.py | 9 +++++---- scripts/hook.py | 8 ++++---- 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/scripts/controlmodel_ipadapter.py b/scripts/controlmodel_ipadapter.py index f0d20aa..d2a3e84 100644 --- a/scripts/controlmodel_ipadapter.py +++ b/scripts/controlmodel_ipadapter.py @@ -737,8 +737,10 @@ class PlugableIPAdapter(torch.nn.Module): qs = ip_out.shape[1] if self.ip_mask is not None: for rate in [8, 16, 32]: - mask_h = round_half_up(self.h/rate) # fancy ceil - mask_w = round_half_up(self.w/rate) + # mask_h = round_half_up(self.h/rate) # fancy ceil + # mask_w = round_half_up(self.w/rate) + mask_h = math.ceil(self.h/rate) + mask_w = math.ceil(self.w/rate) if mask_h*mask_w == qs: break assert mask_h*mask_w == qs, logger.error("mask and latent output are not equal in size") diff --git a/scripts/controlnet.py b/scripts/controlnet.py index 0f124f0..7af6e06 100644 --- a/scripts/controlnet.py +++ b/scripts/controlnet.py @@ -929,8 +929,10 @@ class Script(scripts.Script, metaclass=( if isinstance(input_image, list): assert unit.accepts_multiple_inputs() input_images = input_image - # jiahui's modify - elif isinstance(input_image, dict)and control_model_type in [ControlModelType.IPAdapter, ControlModelType.InstantID]: # jiahui's modify ——instantid for control mask + # jiahui's modify—— control mask preprocess for instantid + elif isinstance(input_image, dict) \ + and not np.all(input_image['mask']==0) \ + and control_model_type in [ControlModelType.IPAdapter, ControlModelType.InstantID]: mask = input_image['mask'] input_image = cv2.bitwise_and(input_image['image'], input_image['mask']) # unit.module = 'instant_id_face_embedding' if control_model_type==ControlModelType.IPAdapter else 'instant_id_face_keypoints' @@ -953,8 +955,7 @@ class Script(scripts.Script, metaclass=( # ++++++++++++++++++++++++++++++ else: # Following operations are only for single input image. # jiahui's modify: - if isinstance(input_image, dict): - mask = input_image['mask'] if not np.all(input_image['mask']==0) else None + if isinstance(input_image, dict) and np.all(input_image['mask']==0): input_image = input_image['image'] # +++++++++++++++++++++++++ input_image = Script.try_crop_image_with_a1111_mask(p, unit, input_image, resize_mode) diff --git a/scripts/hook.py b/scripts/hook.py index 5c790e5..92f70d6 100644 --- a/scripts/hook.py +++ b/scripts/hook.py @@ -610,10 +610,10 @@ class UnetHook(nn.Module): for control_ in control: b, c, h, w = control_.shape for rate in [8, 16, 32]: - # mask_h = round((process.height//8*8)/rate) - # mask_w = round((process.width//8*8)//rate) - mask_h = round_half_up((process.height//8*8)/rate) - mask_w = round_half_up((process.width//8*8)//rate) + mask_h = math.ceil((process.height//8*8)/rate) + mask_w = math.ceil((process.width//8*8)//rate) + # mask_h = round_half_up((process.height//8*8)/rate) + # mask_w = round_half_up((process.width//8*8)/rate) if mask_h == h and mask_w==w: break assert mask_h==h and mask_w==w, logger.error("mask and control output are not equal in size") -- GitLab From 3be60fa8b1bdbd96155a512ee1b28bec419cb597 Mon Sep 17 00:00:00 2001 From: qirui0667 <sunqr0667@126.com> Date: Mon, 11 Mar 2024 11:35:20 +0800 Subject: [PATCH 6/6] update --- scripts/controlnet.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/scripts/controlnet.py b/scripts/controlnet.py index 7af6e06..322bf2b 100644 --- a/scripts/controlnet.py +++ b/scripts/controlnet.py @@ -934,6 +934,7 @@ class Script(scripts.Script, metaclass=( and not np.all(input_image['mask']==0) \ and control_model_type in [ControlModelType.IPAdapter, ControlModelType.InstantID]: mask = input_image['mask'] + logger.info(f"mask will be used for feature map") input_image = cv2.bitwise_and(input_image['image'], input_image['mask']) # unit.module = 'instant_id_face_embedding' if control_model_type==ControlModelType.IPAdapter else 'instant_id_face_keypoints' if resize_mode==external_code.ResizeMode.INNER_FIT: # crop&resize @@ -955,8 +956,12 @@ class Script(scripts.Script, metaclass=( # ++++++++++++++++++++++++++++++ else: # Following operations are only for single input image. # jiahui's modify: - if isinstance(input_image, dict) and np.all(input_image['mask']==0): + if isinstance(input_image, dict): + if not np.all(input_image['mask']==0): + mask = input_image['mask'] + logger.info(f"mask will be used for feature map") input_image = input_image['image'] + # +++++++++++++++++++++++++ input_image = Script.try_crop_image_with_a1111_mask(p, unit, input_image, resize_mode) input_image = np.ascontiguousarray(input_image.copy()).copy() # safe numpy -- GitLab