diff --git a/.gitignore b/.gitignore index 26fac01..e783667 100644 --- a/.gitignore +++ b/.gitignore @@ -12,6 +12,7 @@ venv.zip .idea/modules.xml .idea/jarRepositories.xml .idea/compiler.xml +.idea/discord.xml .idea/libraries/ *.iws *.iml @@ -40,4 +41,8 @@ build/ .vscode/ ### Mac OS ### -.DS_Store \ No newline at end of file +.DS_Store + +### Image Outputs ### +/result/generated/** +/result/upscaled/** \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml index 54b799f..db3b44c 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -1,11 +1,8 @@ + - - diff --git a/README.md b/README.md index 0c964c1..361af44 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,10 @@ -# diffusion-tool -Diffusion Tool is an AI image generator and upscaler created for my third-year Artificial Intelligence university exam, using Java and Python. +

+ diffusion-tool +

diffusion-tool

+

Image generator and upscaler created for my AI university exam +

-# Project Description +## Description At its core, it's a JavaFX application that integrates the Python interpreter and uses it to implement Stable Diffusion pipelines for generative AI plus upscaling and BSRGAN's degradation model for the upscaling of any image. I initially thought about using the Spring framework to manage user registration, but I wanted everyone to be able to use the program offline, so I opted @@ -10,7 +13,7 @@ It is structured as follow: from the user side, we have the Login and Sign Up pa and Upscale pages. The last two are the essential part of the project and they act as GUI for the Python scripts. -# Prerequisites +## Prerequisites In order to compile and run the software, it is required that you have the following prerequisites: - Open Java Development Kit (OpenJDK) 17 or above - Apache Maven (at least version 3.6.3 is recommended) @@ -23,7 +26,7 @@ with the packages listed in *requirements*. pip install -r requirements.txt ``` -# System requirements +## System requirements I will only include consumer-level hardware. AI-computing capable hardware that has a GPU with enough VRAM should be capable of running this software. **ATTENTION**: currently, AMD GPUs are not supported as the application relies on CUDA, a technology exclusive to NVIDIA. @@ -34,7 +37,7 @@ AI-computing capable hardware that has a GPU with enough VRAM should be capable | `RAM` 16 GBs | 16 GBs | | `GPU` NVIDIA GeForce GTX 1660 SUPER | NVIDIA GeForce RTX 3060 | -# Building +## Building Executable packages can be downloaded from [Releases](https://github.com/ShyVortex/diffusion-tool/releases) or manually built instead. You can do that assuming the above prerequisites have already been installed. Once you're in the project directory, type the following in a terminal to download the dependencies and compile all the classes: @@ -47,20 +50,28 @@ Then, if you also want a runnable .jar archive, type: ``` With these commands, a new folder named 'target' is created containing the compiled project as well as the executable file. -# Screenshots +## Unlock Stable Diffusion 3 +The newest generative model is currently gated, so first you need to sign up [here](https://huggingface.co/stabilityai/stable-diffusion-3-medium-diffusers). +Proceed to generate a [token](https://huggingface.co/settings/tokens) under your account settings which you will use to login with: + ```shell + huggingface-cli login + ``` +Enter your credentials first, then the token when it's needed. + +## Screenshots ### Home -![immagine](https://github.com/ShyVortex/diffusion-tool/assets/111277410/53a8ba6f-a189-4376-a8af-0c9996a26d62) +![home-view](https://github.com/user-attachments/assets/50052e5a-c8a4-4eaa-b39f-ae537c81fb9f) ### Image Generation -![immagine](https://github.com/ShyVortex/diffusion-tool/assets/111277410/4a83e1f2-3613-4ae2-a498-cb3f2a8b1479) +![generate-view](https://github.com/user-attachments/assets/dc8239d9-faa7-4a88-bb09-7d808763220c) ### Image Upscaling -![immagine](https://github.com/ShyVortex/diffusion-tool/assets/111277410/e6f6aea6-e9a2-46f4-8b7b-066ae73aa8f4) +![upscale-view](https://github.com/user-attachments/assets/db703513-dc09-4344-96c8-1a6c0ce5d246) -# Upscaling Comparison +## Upscaling Comparison ### Low-res vs. Upscaled ![UpscalingComparison](https://github.com/ShyVortex/diffusion-tool/assets/111277410/0e380dda-36f4-4187-8ff2-9cf287dca06d) ![UpscalingComparison2](https://github.com/ShyVortex/diffusion-tool/assets/111277410/05f0d876-1b9b-4b50-8dba-c558abf815fe) -# Credits +## Credits As stated before, this project uses BSRGAN's degradation model for upscaling purposes. BSRGAN is a practical degradation model for Deep Blind Image Super-Resolution, developed by [Kai Zhang](https://cszn.github.io/), Jingyun Liang, [Luc Van Gool](https://vision.ee.ethz.ch/people-details.OTAyMzM=.TGlzdC8zMjQ4LC0xOTcxNDY1MTc4.html), [Radu Timofte](http://people.ee.ethz.ch/~timofter/), @@ -71,10 +82,11 @@ I've edited said script to adapt it and make it work on my project, keeping ackn The project utilizes Stable Diffusion's generative AI pipelines for image generation and upscaling, in particular: + [stable-diffusion-2-1](https://huggingface.co/stabilityai/stable-diffusion-2-1) + [stable-diffusion-2-1-base](https://huggingface.co/stabilityai/stable-diffusion-2-1-base) ++ [stable-diffusion-3-medium](https://huggingface.co/stabilityai/stable-diffusion-3-medium-diffusers) + [sd-x2-latent-upscaler](https://huggingface.co/stabilityai/sd-x2-latent-upscaler) + [pixel-art-style](https://huggingface.co/kohbanye/pixel-art-style) + [pixel-art-xl](https://huggingface.co/nerijs/pixel-art-xl) -# License +## License - This project is distributed under the [GNU General Public License v3.0](https://github.com/ShyVortex/diffusion-tool/blob/master/LICENSE.md). - Copyright of [@ShyVortex](https://github.com/ShyVortex), 2024. diff --git a/pom.xml b/pom.xml index 681e021..82499fe 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ it.unimol diffusion-tool - 1.0.1 + 1.1.0 diffusion-tool @@ -20,17 +20,17 @@ org.openjfx javafx-controls - 17.0.6 + 17.0.12 org.openjfx javafx-fxml - 17.0.6 + 17.0.12 org.openjfx javafx-web - 17.0.6 + 17.0.12 org.controlsfx diff --git a/requirements.txt b/requirements.txt index a2fa6c9..3c09372 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,10 +1,10 @@ accelerate==0.26.1 certifi==2024.7.4 charset-normalizer==3.3.2 -diffusers==0.25.0 +diffusers==0.29.2 filelock==3.13.1 fsspec==2023.12.2 -huggingface-hub==0.20.1 +huggingface-hub==0.24.3 idna==3.7 importlib-metadata==7.0.1 Jinja2==3.1.4 @@ -21,10 +21,10 @@ nvidia-cufft-cu12==11.0.2.54 nvidia-curand-cu12==10.3.2.106 nvidia-cusolver-cu12==11.4.5.107 nvidia-cusparse-cu12==12.1.0.106 -nvidia-nccl-cu12==2.18.1 +nvidia-nccl-cu12==2.20.5 nvidia-nvjitlink-cu12==12.3.101 nvidia-nvtx-cu12==12.1.105 -opencv-python==4.9.0.80 +opencv-python==4.10.0.82 packaging==23.2 peft==0.9.0 pillow==10.3.0 @@ -43,3 +43,4 @@ triton==2.1.0 typing_extensions==4.9.0 urllib3==2.2.2 zipp==3.19.1 +sentencepiece==0.2.0 diff --git a/src/main/java/it/unimol/diffusiontool/application/DiffusionApplication.java b/src/main/java/it/unimol/diffusiontool/application/DiffusionApplication.java index 09f9dee..b78074d 100644 --- a/src/main/java/it/unimol/diffusiontool/application/DiffusionApplication.java +++ b/src/main/java/it/unimol/diffusiontool/application/DiffusionApplication.java @@ -55,7 +55,7 @@ public User getUser() { } private void setVersion() { - this.version = "1.0.1"; + this.version = "1.1.0"; } public void setRootNode(Parent rootNode) { diff --git a/src/main/java/it/unimol/diffusiontool/controller/DiffusionController.java b/src/main/java/it/unimol/diffusiontool/controller/DiffusionController.java index 48b7818..634ee32 100644 --- a/src/main/java/it/unimol/diffusiontool/controller/DiffusionController.java +++ b/src/main/java/it/unimol/diffusiontool/controller/DiffusionController.java @@ -252,11 +252,12 @@ private void initGenerateView() { profilePicProperty.set(diffApp.getUser().getProfilePic()); homeUserImage.imageProperty().bind(profilePicProperty); styleComboBox.getItems().addAll( - "General", + "Stable Diffusion 2.1", + "Stable Diffusion 3", "Pixel Art" ); styleComboBox.setPromptText(styleComboBox.getItems().get(0)); - styleComboBox.setValue("General"); + styleComboBox.setValue("Stable Diffusion 2.1"); } @FXML @@ -588,10 +589,10 @@ private void OnProfileDeleteClick() throws Exception { @FXML private void OnStyleSelect() { - if (styleComboBox.getValue().equals("General")) - upscaleCheckBox.setVisible(true); - else - upscaleCheckBox.setVisible(false); + // Upscaling checkbox only visible if selected model is SD2.1 + upscaleCheckBox.setVisible( + styleComboBox.getValue().equals("Stable Diffusion 2.1") + ); } @FXML @@ -1219,9 +1220,11 @@ public File findPyScript() { String fileName; switch (pythonCalledBy) { case 1: - if (styleComboBox.getValue().equals("General")) - // if (includeUpscaling) -> generate_upscale.py, else -> generate.py - fileName = includeUpscaling ? "generate_upscale.py" : "generate.py"; + if (styleComboBox.getValue().equals("Stable Diffusion 2.1")) + // if (includeUpscaling) -> generate_upscale.py, else -> generate_sd2-1.py + fileName = includeUpscaling ? "generate_upscale.py" : "generate_sd2-1.py"; + else if (styleComboBox.getValue().equals("Stable Diffusion 3")) + fileName = "generate_sd3.py"; else fileName = "generate_pixart.py"; break; diff --git a/src/main/python/it/unimol/diffusiontool/generate_pixart.py b/src/main/python/it/unimol/diffusiontool/generate_pixart.py index 5180c8c..9c4caa9 100644 --- a/src/main/python/it/unimol/diffusiontool/generate_pixart.py +++ b/src/main/python/it/unimol/diffusiontool/generate_pixart.py @@ -10,7 +10,7 @@ def main(): # Check if the correct number of command-line arguments is provided if len(sys.argv) != 4: - print("Usage: python generate.py ") + print("Usage: python generate_pixelart.py ") sys.exit(1) # Get the prompt and date from the command-line arguments passed from Java @@ -27,7 +27,7 @@ def main(): # Process the prompt and set the output path with torch.cuda.amp.autocast(): image = pipe(prompt=prompt, negative_prompt=tags, num_inference_steps=25).images[0] - output_folder = os.path.abspath("result/generated/general") + output_folder = os.path.abspath("result/generated/pixelart") output_filename = f"generated_image_{date}.png" output_filepath = os.path.join(output_folder, output_filename) diff --git a/src/main/python/it/unimol/diffusiontool/generate.py b/src/main/python/it/unimol/diffusiontool/generate_sd2-1.py similarity index 86% rename from src/main/python/it/unimol/diffusiontool/generate.py rename to src/main/python/it/unimol/diffusiontool/generate_sd2-1.py index e1075fe..b31095b 100644 --- a/src/main/python/it/unimol/diffusiontool/generate.py +++ b/src/main/python/it/unimol/diffusiontool/generate_sd2-1.py @@ -10,7 +10,7 @@ def main(): # Check if the correct number of command-line arguments is provided if len(sys.argv) != 4: - print("Usage: python generate.py ") + print("Usage: python generate_sd2-1.py ") sys.exit(1) # Get the prompt and date from the command-line arguments passed from Java @@ -22,12 +22,14 @@ def main(): repo_id = "stabilityai/stable-diffusion-2-1" pipe = DiffusionPipeline.from_pretrained(repo_id, torch_dtype=torch.float16, variant="fp16") pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config) - pipe = pipe.to("cuda") + + # offload components to CPU during inference to save memory + pipe.enable_model_cpu_offload() # Process the prompt and set the output path with torch.cuda.amp.autocast(): image = pipe(prompt=prompt, negative_prompt=tags, num_inference_steps=25).images[0] - output_folder = os.path.abspath("result/generated/general") + output_folder = os.path.abspath("result/generated/sd2-1") output_filename = f"generated_image_{date}.png" output_filepath = os.path.join(output_folder, output_filename) diff --git a/src/main/python/it/unimol/diffusiontool/generate_sd3.py b/src/main/python/it/unimol/diffusiontool/generate_sd3.py new file mode 100644 index 0000000..c676edf --- /dev/null +++ b/src/main/python/it/unimol/diffusiontool/generate_sd3.py @@ -0,0 +1,62 @@ +import sys +from diffusers import StableDiffusion3Pipeline +from PIL import Image +from io import BytesIO +import torch +import os +import base64 + + +def main(): + # Check if the correct number of command-line arguments is provided + if len(sys.argv) != 4: + print("Usage: python generate_sd3.py ") + sys.exit(1) + + # Get the prompt and date from the command-line arguments passed from Java + prompt = sys.argv[1] + tags = sys.argv[2] + date = sys.argv[3] + + # Model initialization and processing + repo_id = "stabilityai/stable-diffusion-3-medium-diffusers" + pipe = StableDiffusion3Pipeline.from_pretrained( + repo_id, + + # removes memory-intensive text encoder to decrease memory requirements + text_encoder_3=None, + tokenizer_3=None, + + torch_dtype=torch.float16, + ) + + # offload components to CPU during inference to save memory + pipe.enable_model_cpu_offload() + + # Process the prompt and set the output path + with torch.cuda.amp.autocast(): + image = pipe( + prompt=prompt, + negative_prompt=tags, + num_inference_steps=25, + guidance_scale=6.5 + ).images[0] + output_folder = os.path.abspath("result/generated/sd3") + output_filename = f"generated_image_{date}.png" + output_filepath = os.path.join(output_folder, output_filename) + + # Check if the output folder exists, and create it if not, then save the image + if not os.path.exists(output_folder): + os.makedirs(output_folder) + image.save(output_filepath) + + # Encode the image as a base64 string + with open(output_filepath, "rb") as image_file: + encoded_image = base64.b64encode(image_file.read()).decode('utf-8') + + # Print image as string + print(encoded_image) + + +if __name__ == "__main__": + main() diff --git a/src/main/python/it/unimol/diffusiontool/generate_upscale.py b/src/main/python/it/unimol/diffusiontool/generate_upscale.py index a386307..5c514ee 100644 --- a/src/main/python/it/unimol/diffusiontool/generate_upscale.py +++ b/src/main/python/it/unimol/diffusiontool/generate_upscale.py @@ -21,7 +21,9 @@ def main(): repo_id = "stabilityai/stable-diffusion-2-1-base" # you can use 2-1 if you have more VRAM pipe = DiffusionPipeline.from_pretrained(repo_id, torch_dtype=torch.float16, variant="fp16") pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config) - pipe = pipe.to("cuda") + + # offload components to CPU during inference to save memory + pipe.enable_model_cpu_offload() # Load upscaling model model_id = "stabilityai/sd-x2-latent-upscaler" @@ -47,7 +49,7 @@ def main(): guidance_scale=0, generator=torch.manual_seed(33), ).images[0] - output_folder = os.path.abspath("result/generated/general") + output_folder = os.path.abspath("result/generated/sd2_1") output_filename = f"generated_image_{date}.png" output_filepath = os.path.join(output_folder, output_filename) diff --git a/src/main/resources/app-generate-view.fxml b/src/main/resources/app-generate-view.fxml index 29e9a94..73718b8 100644 --- a/src/main/resources/app-generate-view.fxml +++ b/src/main/resources/app-generate-view.fxml @@ -14,8 +14,8 @@ - @@ -75,13 +75,13 @@