Skip to content

Commit

Permalink
Merge pull request #20 from shirleyzhu233/xzhu/sdxl
Browse files Browse the repository at this point in the history
Update SDXL implementation
  • Loading branch information
fmu2 authored Oct 21, 2024
2 parents a6406e9 + fa5ce80 commit da3f3d2
Show file tree
Hide file tree
Showing 13 changed files with 396 additions and 1,752 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@
```bash
conda env create -f environment.yml
conda activate freecontrol
pip install -U diffusers
pip install -U gradio
```

**Sample Semantic Bases**
Expand Down
85 changes: 42 additions & 43 deletions config/base.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,70 +5,69 @@ sd_config:
steps: 200 # Diffusion scale
guidance_scale: 7.5 # Classifier-free guidance scale
grad_guidance_scale: 1 # Gradient guidance scale, it will be multiplied with the weight in each type of guidance
sd_version: '2.1' # choice from ['2,1'|'2.0'|'1.4']
sd_version: "1.5" # choice from ["1.5", "2.1_base"]
dreambooth: null # Path to dreambooth. Set to null to disable dreambooth
safetensors: True # whether to use safetenosr. For most ckpt from civitai.com, they used safetensor format
same_latent: False # whether to use the same latent from inversion
safetensors: True # whether to use safetenosr. For most ckpt from civitai.com, they used safetensor format
same_latent: False # whether to use the same latent from inversion
appearnace_same_latent: False
pca_paths:
[]
seed: 922 # Seed for random number generator
generated_sample: False # Whether to use generated sample as the reference pose
prompt : "" # Prompt for generated sample
generated_sample: False # Whether to use generated sample as the reference pose
prompt: "" # Prompt for generated sample
negative_prompt: "longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality" # Prompt for negative sample
target_obj: '' # Target object for inversion
target_obj: "" # Target object for inversion
obj_pairs: ""

data:
inversion:
target_folder: 'dataset/latent'
target_folder: "dataset/latent"
num_inference_steps: 999
method: 'DDIM' # choice from ['DDIM'|'NTI'|'NPT']
fixed_size: [512,512] # Set to null to disable fixed size, otherwise set to the fixed size (h,w) of the target image
method: "DDIM" # choice from ['DDIM'|'NTI'|'NPT']
fixed_size: [512, 512] # Set to null to disable fixed size, otherwise set to the fixed size (h,w) of the target image
prompt: "A photo of a person, holding his hands, sketch"
select_objects: "person"
policy: "share" # choice from ['share'|'separate']
sd_model: ''
sd_model: ""

# Guidance config
guidance:
pca_guidance:
start_step: 0 # Start step for PCA self-attention injection
end_step: 80 # End step for PCA injection
weight: 800 # Weight for PCA self-attention injection
start_step: 0 # Start step for PCA self-attention injection
end_step: 80 # End step for PCA injection
weight: 800 # Weight for PCA self-attention injection
select_feature: "key"
# weight1: 0
structure_guidance: # Parameters for PCA injection
apply: True # Whether apply PCA injection
n_components: 64 # Number of leading components for PCA injection
normalized: True # Whether normalize the PCA score
mask_type: 'cross_attn' # Mask type for PCA injection, choice from ['cross_attn'|'tr']
penalty_type: 'max' # Penalty type for PCA injection, choice from ['max'|'mean']
mask_tr: 0.3 # Threshold for PCA score, only applied when normalized is true
penalty_factor: 10 # Penalty factor for masked region, only applied when normalized is true
warm_up: # Parameters for Guidance weight warm up
apply: True # Whether apply warm up
end_step: 10 # End step for warm up
adaptive: # Parameters for adaptive self-attention injection
apply: False # Whether apply adaptive self-attention injection
adaptive_p: 1 # power of the adaptive threshold
blocks: # Blocks for self-attention injection
[ 'up_blocks.1']
appearance_guidance: # Parameters for texture regulation
apply: True # Whether apply texture regulation
reg_factor: 0.05 # Regularization factor
structure_guidance: # Parameters for PCA injection
apply: True # Whether apply PCA injection
n_components: 64 # Number of leading components for PCA injection
normalized: True # Whether normalize the PCA score
mask_type: "cross_attn" # Mask type for PCA injection, choice from ["cross_attn"|"tr"]
penalty_type: "max" # Penalty type for PCA injection, choice from ["max"|"mean"]
mask_tr: 0.3 # Threshold for PCA score, only applied when normalized is true
penalty_factor: 10 # Penalty factor for masked region, only applied when normalized is true
warm_up: # Parameters for Guidance weight warm up
apply: True # Whether apply warm up
end_step: 10 # End step for warm up
adaptive: # Parameters for adaptive self-attention injection
apply: False # Whether apply adaptive self-attention injection
adaptive_p: 1 # power of the adaptive threshold
blocks: # Blocks for self-attention injection
["up_blocks.1"]
appearance_guidance: # Parameters for texture regulation
apply: True # Whether apply texture regulation
reg_factor: 0.05 # Regularization factor
tr: 0.5
cross_attn_mask_tr: 0.3
app_n_components: 2 # Number of leading components used to extract mean appearance feature
app_n_components: 2 # Number of leading components used to extract mean appearance feature

cross_attn:
start_step: 0 # Start step for cross-attention guidance
end_step: 80 # End step for cross-attention guidance
weight: 0 # Weight for cross-attention guidance
obj_only: True # Whether apply object only cross-attention guidance
soft_guidance: # Parameters for soft guidance
apply: True # Whether apply soft guidance
kernel_size: 5 # Kernel size for Gaussian blur
sigma: 2 # Sigma for Gaussian blur
start_step: 0 # Start step for cross-attention guidance
end_step: 80 # End step for cross-attention guidance
weight: 0 # Weight for cross-attention guidance
obj_only: True # Whether apply object only cross-attention guidance
soft_guidance: # Parameters for soft guidance
apply: True # Whether apply soft guidance
kernel_size: 5 # Kernel size for Gaussian blur
sigma: 2 # Sigma for Gaussian blur
blocks:
[ 'up_blocks.1']
["up_blocks.1"]
73 changes: 73 additions & 0 deletions config/sdxl_base.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
# SD config
sd_config:
H: 1024 # Generated image weight
W: 1024 # Generated image height
steps: 200 # Diffusion scale
guidance_scale: 7.5 # Classifier-free guidance scale
grad_guidance_scale: 1 # Gradient guidance scale, it will be multiplied with the weight in each type of guidance
sd_version: "XL-1.0" # choice from ['sdxl-1.0','2,1'|'2.0'|'1.4']
dreambooth: null # Path to dreambooth. Set to null to disable dreambooth
safetensors: True # whether to use safetenosr. For most ckpt from civitai.com, they used safetensor format
same_latent: False # whether to use the same latent from inversion
appearnace_same_latent: False
pca_paths:
[]
seed: 922 # Seed for random number generator
generated_sample: False # Whether to use generated sample as the reference pose
prompt : "" # Prompt for generated sample
negative_prompt: "longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality" # Prompt for negative sample
target_obj: "" # Target object for inversion
obj_pairs: ""

data:
inversion:
target_folder: "dataset/latent"
num_inference_steps: 999
method: "DDIM" # choice from ['DDIM'|'NTI'|'NPT']
fixed_size: [1024,1024] # Set to null to disable fixed size, otherwise set to the fixed size (h,w) of the target image
prompt: "A photo of a person, holding his hands, sketch"
select_objects: "person"
policy: "share" # choice from ['share'|'separate']
sd_model: ""

# Guidance config
guidance:
pca_guidance:
start_step: 0 # Start step for PCA self-attention injection
end_step: 80 # End step for PCA injection
weight: 800 # Weight for PCA self-attention injection
select_feature: "key"
structure_guidance: # Parameters for PCA injection
apply: True # Whether apply PCA injection
n_components: 64 # Number of leading components for PCA injection
normalized: True # Whether normalize the PCA score
mask_type: "cross_attn" # Mask type for PCA injection, choice from ['cross_attn'|'tr']
penalty_type: "max" # Penalty type for PCA injection, choice from ['max'|'mean']
mask_tr: 0.3 # Threshold for PCA score, only applied when normalized is true
penalty_factor: 10 # Penalty factor for masked region, only applied when normalized is true
warm_up: # Parameters for Guidance weight warm up
apply: True # Whether apply warm up
end_step: 10 # End step for warm up
adaptive: # Parameters for adaptive self-attention injection
apply: False # Whether apply adaptive self-attention injection
adaptive_p: 1 # power of the adaptive threshold
blocks: # Blocks for self-attention injection
["up_blocks.0"]
appearance_guidance: # Parameters for texture regulation
apply: True # Whether apply texture regulation
reg_factor: 0.05 # Regularization factor
tr: 0.5
cross_attn_mask_tr: 0.3
app_n_components: 2 # Number of leading components used to extract mean appearance feature

cross_attn:
start_step: 0 # Start step for cross-attention guidance
end_step: 80 # End step for cross-attention guidance
weight: 0 # Weight for cross-attention guidance
obj_only: True # Whether apply object only cross-attention guidance
soft_guidance: # Parameters for soft guidance
apply: True # Whether apply soft guidance
kernel_size: 5 # Kernel size for Gaussian blur
sigma: 2 # Sigma for Gaussian blur
blocks:
["up_blocks.0"]
13 changes: 10 additions & 3 deletions gradio_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,13 +69,20 @@ def freecontrol_generate(condition_image, prompt, scale, ddim_steps, sd_version,
input_config = gradio_update_parameter

# Load base config
base_config = yaml.load(open("config/base.yaml", "r"), Loader=yaml.FullLoader)
if 'XL' in sd_version:
config_path = 'config/sdxl_base.yaml'
else:
config_path = 'config/base.yaml'
base_config = yaml.load(open(config_path, "r"), Loader=yaml.FullLoader)
# Update the Default config by gradio config
config = merge_sweep_config(base_config=base_config, update=input_config)
config = OmegaConf.create(config)

# set the correct pipeline
pipeline_name = "SDPipeline"
if 'XL' in sd_version:
pipeline_name = "SDXLPipeline"
else:
pipeline_name = "SDPipeline"

pipeline = make_pipeline(pipeline_name,
model_path,
Expand Down Expand Up @@ -277,7 +284,7 @@ def main():

run_button.click(fn=freecontrol_generate, inputs=ips, outputs=[result_gallery])

block.launch(server_name='0.0.0.0', share=False, server_port=9989)
block.launch(server_name='0.0.0.0', share=True, server_port=9989)


if __name__ == '__main__':
Expand Down
3 changes: 2 additions & 1 deletion libs/model/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
from .pipelines import make_pipeline
from . import sd_pipeline
from . import sd_pipeline
from . import sdxl_pipeline
6 changes: 3 additions & 3 deletions libs/model/module/xformer_attention.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,15 +54,15 @@ def __call__(self, attn: Attention, hidden_states, encoder_hidden_states=None, a
if attn.group_norm is not None:
hidden_states = attn.group_norm(hidden_states.transpose(1, 2)).transpose(1, 2)

query = attn.to_q(hidden_states, scale=scale)
query = attn.to_q(hidden_states)

if encoder_hidden_states is None:
encoder_hidden_states = hidden_states
elif attn.norm_cross:
encoder_hidden_states = attn.norm_encoder_hidden_states(encoder_hidden_states)

key = attn.to_k(encoder_hidden_states, scale=scale)
value = attn.to_v(encoder_hidden_states, scale=scale)
key = attn.to_k(encoder_hidden_states)
value = attn.to_v(encoder_hidden_states)

# Record the Q,K,V for PCA guidance
self.key = key
Expand Down
4 changes: 2 additions & 2 deletions libs/model/sd_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,8 +304,8 @@ def __call__(
self.compute_cross_attn_mask(cond_control_ids, cond_example_ids, cond_appearance_ids)

if _in_step(self.guidance_config.pca_guidance, i):
# Compute the PCA structure and appearance guidance
# Set the select feature to key by default
# Compute the PCA structure and appearance guidance
# Set the select feature to key by default
try:
select_feature = self.guidance_config.pca_guidance.select_feature
except:
Expand Down
Loading

0 comments on commit da3f3d2

Please sign in to comment.