Commit 9a931762 authored by zxl's avatar zxl
Browse files

feat: new controlnet

parent 492dcd72
No related merge requests found
Showing with 803 additions and 11 deletions
+803 -11
LICENSE 0 → 100644
This diff is collapsed.
README.md 100755 → 100644
......@@ -2,7 +2,7 @@
The WebUI extension for ControlNet and other injection-based SD controls.
![image](https://user-images.githubusercontent.com/20929282/246632890-400b2e0d-b064-4505-b31d-49375216ca98.png)
![image](https://github.com/Mikubill/sd-webui-controlnet/assets/19834515/00787fd1-1bc5-4b90-9a23-9683f8458b85)
This extension is for AUTOMATIC1111's [Stable Diffusion web UI](https://github.com/AUTOMATIC1111/stable-diffusion-webui), allows the Web UI to add [ControlNet](https://github.com/lllyasviel/ControlNet) to the original Stable Diffusion model to generate images. The addition is on-the-fly, the merging is not required.
......@@ -18,13 +18,6 @@ This extension is for AUTOMATIC1111's [Stable Diffusion web UI](https://github.c
8. Download models (see below).
9. After you put models in the correct folder, you may need to refresh to see the models. The refresh button is right to your "Model" dropdown.
**Update from ControlNet 1.0 to 1.1:**
* If you are not sure, you can back up and remove the folder "stable-diffusion-webui\extensions\sd-webui-controlnet", and then start from the step 1 in the above Installation section.
* Or you can start from the step 6 in the above Install section.
# Download Models
Right now all the 14 models of ControlNet 1.1 are in the beta test.
......@@ -33,13 +26,15 @@ Download the models from ControlNet 1.1: https://huggingface.co/lllyasviel/Contr
You need to download model files ending with ".pth" .
Put models in your "stable-diffusion-webui\extensions\sd-webui-controlnet\models". Now we have already included all "yaml" files. You only need to download "pth" files.
Put models in your "stable-diffusion-webui\extensions\sd-webui-controlnet\models". You only need to download "pth" files.
Do not right-click the filenames in HuggingFace website to download. Some users right-clicked those HuggingFace HTML websites and saved those HTML pages as PTH/YAML files. They are not downloading correct files. Instead, please click the small download arrow “↓” icon in HuggingFace to download.
Note: If you download models elsewhere, please make sure that yaml file names and model files names are same. Please manually rename all yaml files if you download from other sources. (Some models like "shuffle" needs the yaml file so that we know the outputs of ControlNet should pass a global average pooling before injecting to SD U-Nets.)
# Download Models for SDXL
See instructions [here](https://github.com/Mikubill/sd-webui-controlnet/discussions/2039).
# New Features in ControlNet 1.1
# Features in ControlNet 1.1
### Perfect Support for All ControlNet 1.0/1.1 and T2I Adapter Models.
......
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
import os
import torch
from modules import devices
from modules.modelloader import load_file_from_url
from annotator.annotator_path import models_path
from transformers import CLIPVisionModelWithProjection, CLIPVisionConfig, CLIPImageProcessor, modeling_utils
config_clip_g = {
"attention_dropout": 0.0,
"dropout": 0.0,
"hidden_act": "gelu",
"hidden_size": 1664,
"image_size": 224,
"initializer_factor": 1.0,
"initializer_range": 0.02,
"intermediate_size": 8192,
"layer_norm_eps": 1e-05,
"model_type": "clip_vision_model",
"num_attention_heads": 16,
"num_channels": 3,
"num_hidden_layers": 48,
"patch_size": 14,
"projection_dim": 1280,
"torch_dtype": "float32"
}
config_clip_h = {
"attention_dropout": 0.0,
"dropout": 0.0,
"hidden_act": "gelu",
"hidden_size": 1280,
"image_size": 224,
"initializer_factor": 1.0,
"initializer_range": 0.02,
"intermediate_size": 5120,
"layer_norm_eps": 1e-05,
"model_type": "clip_vision_model",
"num_attention_heads": 16,
"num_channels": 3,
"num_hidden_layers": 32,
"patch_size": 14,
"projection_dim": 1024,
"torch_dtype": "float32"
}
config_clip_vitl = {
"attention_dropout": 0.0,
"dropout": 0.0,
"hidden_act": "quick_gelu",
"hidden_size": 1024,
"image_size": 224,
"initializer_factor": 1.0,
"initializer_range": 0.02,
"intermediate_size": 4096,
"layer_norm_eps": 1e-05,
"model_type": "clip_vision_model",
"num_attention_heads": 16,
"num_channels": 3,
"num_hidden_layers": 24,
"patch_size": 14,
"projection_dim": 768,
"torch_dtype": "float32"
}
configs = {
'clip_g': config_clip_g,
'clip_h': config_clip_h,
'clip_vitl': config_clip_vitl,
}
downloads = {
'clip_vitl': 'https://huggingface.co/openai/clip-vit-large-patch14/resolve/main/pytorch_model.bin',
'clip_g': 'https://huggingface.co/lllyasviel/Annotators/resolve/main/clip_g.pth',
'clip_h': 'https://huggingface.co/h94/IP-Adapter/resolve/main/models/image_encoder/pytorch_model.bin'
}
class ClipVisionDetector:
def __init__(self, config):
assert config in downloads
self.download_link = downloads[config]
self.model_path = os.path.join(models_path, 'clip_vision')
self.file_name = config + '.pth'
self.config = configs[config]
self.device = devices.get_device_for("controlnet")
os.makedirs(self.model_path, exist_ok=True)
file_path = os.path.join(self.model_path, self.file_name)
if not os.path.exists(file_path):
load_file_from_url(url=self.download_link, model_dir=self.model_path, file_name=self.file_name)
config = CLIPVisionConfig(**self.config)
self.model = CLIPVisionModelWithProjection(config)
self.processor = CLIPImageProcessor(crop_size=224,
do_center_crop=True,
do_convert_rgb=True,
do_normalize=True,
do_resize=True,
image_mean=[0.48145466, 0.4578275, 0.40821073],
image_std=[0.26862954, 0.26130258, 0.27577711],
resample=3,
size=224)
sd = torch.load(file_path, map_location=torch.device('cpu'))
self.model.load_state_dict(sd, strict=False)
del sd
self.model.eval()
self.model.cpu()
def unload_model(self):
if self.model is not None:
self.model.to('meta')
def __call__(self, input_image):
with torch.no_grad():
clip_vision_model = self.model.cpu()
feat = self.processor(images=input_image, return_tensors="pt")
feat['pixel_values'] = feat['pixel_values'].cpu()
result = clip_vision_model(**feat, output_hidden_states=True)
result['hidden_states'] = [v.to(devices.get_device_for("controlnet")) for v in result['hidden_states']]
result = {k: v.to(devices.get_device_for("controlnet")) if isinstance(v, torch.Tensor) else v for k, v in result.items()}
return result
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
File mode changed from 100755 to 100644
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment