feat: new controlnet

9a931762 · zxl · 492dcd72 · 9a931762 · 9a931762 · 9a931762
Commit 9a931762 authored 1 year ago by zxl
Expand all Hide whitespace changes
Inline Side-by-side

Showing

with 803 additions and 11 deletions
+803 -11
--- a/LICENSE
+++ b/LICENSE
--- a/README.md
+++ b/README.md
@@ -2,7 +2,7 @@

 The WebUI extension for ControlNet and other injection-based SD controls.

-![image](https://user-images.githubusercontent.com/20929282/246632890-400b2e0d-b064-4505-b31d-49375216ca98.png)
+![image](https://github.com/Mikubill/sd-webui-controlnet/assets/19834515/00787fd1-1bc5-4b90-9a23-9683f8458b85)

 This extension is for AUTOMATIC1111's [Stable Diffusion web UI](https://github.com/AUTOMATIC1111/stable-diffusion-webui), allows the Web UI to add [ControlNet](https://github.com/lllyasviel/ControlNet) to the original Stable Diffusion model to generate images. The addition is on-the-fly, the merging is not required.

@@ -18,13 +18,6 @@ This extension is for AUTOMATIC1111's [Stable Diffusion web UI](https://github.c
 8. Download models (see below).
 9. After you put models in the correct folder, you may need to refresh to see the models. The refresh button is right to your "Model" dropdown.

-
-**Update from ControlNet 1.0 to 1.1:**
-
-* If you are not sure, you can back up and remove the folder "stable-diffusion-webui\extensions\sd-webui-controlnet", and then start from the step 1 in the above Installation section. 
-
-* Or you can start from the step 6 in the above Install section.
-
 # Download Models

 Right now all the 14 models of ControlNet 1.1 are in the beta test.
@@ -33,13 +26,15 @@ Download the models from ControlNet 1.1: https://huggingface.co/lllyasviel/Contr

 You need to download model files ending with ".pth" .

-Put models in your "stable-diffusion-webui\extensions\sd-webui-controlnet\models". Now we have already included all "yaml" files. You only need to download "pth" files.
+Put models in your "stable-diffusion-webui\extensions\sd-webui-controlnet\models". You only need to download "pth" files.

 Do not right-click the filenames in HuggingFace website to download. Some users right-clicked those HuggingFace HTML websites and saved those HTML pages as PTH/YAML files. They are not downloading correct files. Instead, please click the small download arrow “↓” icon in HuggingFace to download.

-Note: If you download models elsewhere, please make sure that yaml file names and model files names are same. Please manually rename all yaml files if you download from other sources. (Some models like "shuffle" needs the yaml file so that we know the outputs of ControlNet should pass a global average pooling before injecting to SD U-Nets.)
+# Download Models for SDXL
+
+See instructions [here](https://github.com/Mikubill/sd-webui-controlnet/discussions/2039).

-# New Features in ControlNet 1.1
+# Features in ControlNet 1.1

 ### Perfect Support for All ControlNet 1.0/1.1 and T2I Adapter Models.


--- a/annotator/annotator_path.py
+++ b/annotator/annotator_path.py
--- a/annotator/binary/__init__.py
+++ b/annotator/binary/__init__.py
--- a/annotator/canny/__init__.py
+++ b/annotator/canny/__init__.py
--- a/annotator/clipvision/__init__.py
+++ b/annotator/clipvision/__init__.py
+import os
+import torch
+
+from modules import devices
+from modules.modelloader import load_file_from_url
+from annotator.annotator_path import models_path
+from transformers import CLIPVisionModelWithProjection, CLIPVisionConfig, CLIPImageProcessor, modeling_utils
+
+
+config_clip_g = {
+  "attention_dropout": 0.0,
+  "dropout": 0.0,
+  "hidden_act": "gelu",
+  "hidden_size": 1664,
+  "image_size": 224,
+  "initializer_factor": 1.0,
+  "initializer_range": 0.02,
+  "intermediate_size": 8192,
+  "layer_norm_eps": 1e-05,
+  "model_type": "clip_vision_model",
+  "num_attention_heads": 16,
+  "num_channels": 3,
+  "num_hidden_layers": 48,
+  "patch_size": 14,
+  "projection_dim": 1280,
+  "torch_dtype": "float32"
+}
+
+config_clip_h = {
+  "attention_dropout": 0.0,
+  "dropout": 0.0,
+  "hidden_act": "gelu",
+  "hidden_size": 1280,
+  "image_size": 224,
+  "initializer_factor": 1.0,
+  "initializer_range": 0.02,
+  "intermediate_size": 5120,
+  "layer_norm_eps": 1e-05,
+  "model_type": "clip_vision_model",
+  "num_attention_heads": 16,
+  "num_channels": 3,
+  "num_hidden_layers": 32,
+  "patch_size": 14,
+  "projection_dim": 1024,
+  "torch_dtype": "float32"
+}
+
+config_clip_vitl = {
+  "attention_dropout": 0.0,
+  "dropout": 0.0,
+  "hidden_act": "quick_gelu",
+  "hidden_size": 1024,
+  "image_size": 224,
+  "initializer_factor": 1.0,
+  "initializer_range": 0.02,
+  "intermediate_size": 4096,
+  "layer_norm_eps": 1e-05,
+  "model_type": "clip_vision_model",
+  "num_attention_heads": 16,
+  "num_channels": 3,
+  "num_hidden_layers": 24,
+  "patch_size": 14,
+  "projection_dim": 768,
+  "torch_dtype": "float32"
+}
+
+configs = {
+    'clip_g': config_clip_g,
+    'clip_h': config_clip_h,
+    'clip_vitl': config_clip_vitl,
+}
+
+downloads = {
+    'clip_vitl': 'https://huggingface.co/openai/clip-vit-large-patch14/resolve/main/pytorch_model.bin',
+    'clip_g': 'https://huggingface.co/lllyasviel/Annotators/resolve/main/clip_g.pth',
+    'clip_h': 'https://huggingface.co/h94/IP-Adapter/resolve/main/models/image_encoder/pytorch_model.bin'
+}
+
+
+class ClipVisionDetector:
+    def __init__(self, config):
+        assert config in downloads
+        self.download_link = downloads[config]
+        self.model_path = os.path.join(models_path, 'clip_vision')
+        self.file_name = config + '.pth'
+        self.config = configs[config]
+        self.device = devices.get_device_for("controlnet")
+        os.makedirs(self.model_path, exist_ok=True)
+        file_path = os.path.join(self.model_path, self.file_name)
+        if not os.path.exists(file_path):
+            load_file_from_url(url=self.download_link, model_dir=self.model_path, file_name=self.file_name)
+        config = CLIPVisionConfig(**self.config)
+        self.model = CLIPVisionModelWithProjection(config)
+        self.processor = CLIPImageProcessor(crop_size=224,
+                                            do_center_crop=True,
+                                            do_convert_rgb=True,
+                                            do_normalize=True,
+                                            do_resize=True,
+                                            image_mean=[0.48145466, 0.4578275, 0.40821073],
+                                            image_std=[0.26862954, 0.26130258, 0.27577711],
+                                            resample=3,
+                                            size=224)
+
+        sd = torch.load(file_path, map_location=torch.device('cpu'))
+        self.model.load_state_dict(sd, strict=False)
+        del sd
+
+        self.model.eval()
+        self.model.cpu()
+
+    def unload_model(self):
+        if self.model is not None:
+            self.model.to('meta')
+
+    def __call__(self, input_image):
+        with torch.no_grad():
+            clip_vision_model = self.model.cpu()
+            feat = self.processor(images=input_image, return_tensors="pt")
+            feat['pixel_values'] = feat['pixel_values'].cpu()
+            result = clip_vision_model(**feat, output_hidden_states=True)
+            result['hidden_states'] = [v.to(devices.get_device_for("controlnet")) for v in result['hidden_states']]
+            result = {k: v.to(devices.get_device_for("controlnet")) if isinstance(v, torch.Tensor) else v for k, v in result.items()}
+        return result
--- a/annotator/color/__init__.py
+++ b/annotator/color/__init__.py
--- a/annotator/hed/__init__.py
+++ b/annotator/hed/__init__.py
--- a/annotator/keypose/__init__.py
+++ b/annotator/keypose/__init__.py
--- a/annotator/keypose/faster_rcnn_r50_fpn_coco.py
+++ b/annotator/keypose/faster_rcnn_r50_fpn_coco.py
--- a/annotator/keypose/hrnet_w48_coco_256x192.py
+++ b/annotator/keypose/hrnet_w48_coco_256x192.py
--- a/annotator/lama/__init__.py
+++ b/annotator/lama/__init__.py
--- a/annotator/lama/config.yaml
+++ b/annotator/lama/config.yaml
--- a/annotator/lama/saicinpainting/__init__.py
+++ b/annotator/lama/saicinpainting/__init__.py
--- a/annotator/lama/saicinpainting/training/__init__.py
+++ b/annotator/lama/saicinpainting/training/__init__.py
--- a/annotator/lama/saicinpainting/training/data/__init__.py
+++ b/annotator/lama/saicinpainting/training/data/__init__.py
--- a/annotator/lama/saicinpainting/training/data/masks.py
+++ b/annotator/lama/saicinpainting/training/data/masks.py
--- a/annotator/lama/saicinpainting/training/losses/__init__.py
+++ b/annotator/lama/saicinpainting/training/losses/__init__.py
--- a/annotator/lama/saicinpainting/training/losses/adversarial.py
+++ b/annotator/lama/saicinpainting/training/losses/adversarial.py
--- a/annotator/lama/saicinpainting/training/losses/constants.py
+++ b/annotator/lama/saicinpainting/training/losses/constants.py