Skip to content

Commit

Permalink
Speed up bulk BufferBarrier and LayoutTransition
Browse files Browse the repository at this point in the history
  • Loading branch information
knokko committed Jan 20, 2025
1 parent 5e0cf1d commit a3d99f4
Show file tree
Hide file tree
Showing 2 changed files with 106 additions and 69 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import static com.github.knokko.boiler.exceptions.VulkanFailureException.assertVkSuccess;
import static com.github.knokko.boiler.utilities.ColorPacker.*;
import static org.lwjgl.system.MemoryUtil.NULL;
import static org.lwjgl.system.MemoryUtil.memFree;
import static org.lwjgl.vulkan.KHRDynamicRendering.vkCmdBeginRenderingKHR;
import static org.lwjgl.vulkan.KHRDynamicRendering.vkCmdEndRenderingKHR;
import static org.lwjgl.vulkan.VK10.*;
Expand Down Expand Up @@ -357,23 +358,40 @@ public void bufferBarrier(VkbBufferRange bufferRange, ResourceUsage srcUsage, Re
* @param buffers The buffer ranges for which a pipeline barrier should be recorded
*/
public void bulkBufferBarrier(ResourceUsage srcUsage, ResourceUsage dstUsage, VkbBufferRange... buffers) {
var bufferBarrier = VkBufferMemoryBarrier.calloc(1, stack);
bufferBarrier.sType$Default();
bufferBarrier.srcAccessMask(srcUsage.accessMask());
bufferBarrier.dstAccessMask(dstUsage.accessMask());
bufferBarrier.srcQueueFamilyIndex(VK_QUEUE_FAMILY_IGNORED);
bufferBarrier.dstQueueFamilyIndex(VK_QUEUE_FAMILY_IGNORED);
boolean useHeap = buffers.length > 10;
int capacity = Math.min(buffers.length, 100);
var pBufferBarriers = useHeap ? VkBufferMemoryBarrier.calloc(capacity) : VkBufferMemoryBarrier.calloc(capacity, stack);

for (int index = 0; index < capacity; index++) {
var bufferBarrier = pBufferBarriers.get(index);
bufferBarrier.sType$Default();
bufferBarrier.srcAccessMask(srcUsage.accessMask());
bufferBarrier.dstAccessMask(dstUsage.accessMask());
bufferBarrier.srcQueueFamilyIndex(VK_QUEUE_FAMILY_IGNORED);
bufferBarrier.dstQueueFamilyIndex(VK_QUEUE_FAMILY_IGNORED);
}

int index = 0;
int total = 0;
for (VkbBufferRange bufferRange : buffers) {
var bufferBarrier = pBufferBarriers.get(index);
bufferBarrier.buffer(bufferRange.buffer().vkBuffer());
bufferBarrier.offset(bufferRange.offset());
bufferBarrier.size(bufferRange.size());

vkCmdPipelineBarrier(
commandBuffer, srcUsage.stageMask(), dstUsage.stageMask(),
0, null, bufferBarrier, null
);
index += 1;
total += 1;
if (index == capacity || total == buffers.length) {
pBufferBarriers.limit(index);
vkCmdPipelineBarrier(
commandBuffer, srcUsage.stageMask(), dstUsage.stageMask(),
0, null, pBufferBarriers, null
);
index = 0;
}
}

if (useHeap) memFree(pBufferBarriers);
}

/**
Expand Down Expand Up @@ -427,24 +445,41 @@ public void transitionLayout(
* @param images The images whose layout should be transitioned
*/
public void bulkTransitionLayout(ResourceUsage oldUsage, ResourceUsage newUsage, VkbImage... images) {
var pImageBarrier = VkImageMemoryBarrier.calloc(1, stack);
pImageBarrier.sType$Default();
pImageBarrier.srcAccessMask(oldUsage != null ? oldUsage.accessMask() : 0);
pImageBarrier.dstAccessMask(newUsage.accessMask());
pImageBarrier.oldLayout(oldUsage != null ? oldUsage.imageLayout() : VK_IMAGE_LAYOUT_UNDEFINED);
pImageBarrier.newLayout(newUsage.imageLayout());
pImageBarrier.srcQueueFamilyIndex(VK_QUEUE_FAMILY_IGNORED);
pImageBarrier.dstQueueFamilyIndex(VK_QUEUE_FAMILY_IGNORED);
boolean useHeap = images.length > 10;
int capacity = Math.min(images.length, 100);
var pImageBarriers = useHeap ? VkImageMemoryBarrier.calloc(capacity) : VkImageMemoryBarrier.calloc(capacity, stack);

for (int index = 0; index < capacity; index++) {
var pImageBarrier = pImageBarriers.get(index);
pImageBarrier.sType$Default();
pImageBarrier.srcAccessMask(oldUsage != null ? oldUsage.accessMask() : 0);
pImageBarrier.dstAccessMask(newUsage.accessMask());
pImageBarrier.oldLayout(oldUsage != null ? oldUsage.imageLayout() : VK_IMAGE_LAYOUT_UNDEFINED);
pImageBarrier.newLayout(newUsage.imageLayout());
pImageBarrier.srcQueueFamilyIndex(VK_QUEUE_FAMILY_IGNORED);
pImageBarrier.dstQueueFamilyIndex(VK_QUEUE_FAMILY_IGNORED);
}

int index = 0;
int total = 0;
for (VkbImage image : images) {
var pImageBarrier = pImageBarriers.get(index);
pImageBarrier.image(image.vkImage());
instance.images.subresourceRange(stack, pImageBarrier.subresourceRange(), image.aspectMask());

vkCmdPipelineBarrier(
commandBuffer, oldUsage != null ? oldUsage.stageMask() : VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
newUsage.stageMask(), 0, null, null, pImageBarrier
);
index += 1;
total += 1;
if (index == capacity || total == images.length) {
pImageBarriers.limit(index);
vkCmdPipelineBarrier(
commandBuffer, oldUsage != null ? oldUsage.stageMask() : VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
newUsage.stageMask(), 0, null, null, pImageBarriers
);
index = 0;
}
}

if (useHeap) memFree(pImageBarriers);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -211,55 +211,57 @@ public void testBulkOperations() {
VK_API_VERSION_1_2, "TestBulkOperations", 1
).validation().forbidValidationErrors().build();

int amount = 500;
var sourceBuffers = new MappedVkbBuffer[amount];
var middleBuffers = new VkbBuffer[amount];
var images1 = new VkbImage[amount];
var images2 = new VkbImage[amount];
var destinationBuffers = new MappedVkbBuffer[amount];
var destinationRanges = new MappedVkbBufferRange[amount];

for (int index = 0; index < amount; index++) {
sourceBuffers[index] = instance.buffers.createMapped(4L, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, "Source" + index);
sourceBuffers[index].fullMappedRange().intBuffer().put(index);
middleBuffers[index] = instance.buffers.create(
4L, VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, "Middle" + index
);
images1[index] = instance.images.createSimple(
1, 1, VK_FORMAT_R8G8B8A8_UNORM,
VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
VK_IMAGE_ASPECT_COLOR_BIT, "Test1Image" + index
);
images2[index] = instance.images.createSimple(
1, 1, VK_FORMAT_R8G8B8A8_UNORM,
VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
VK_IMAGE_ASPECT_COLOR_BIT, "Test2Image" + index
);
destinationBuffers[index] = instance.buffers.createMapped(4L, VK_BUFFER_USAGE_TRANSFER_DST_BIT, "Destination" + index);
destinationRanges[index] = destinationBuffers[index].fullMappedRange();
}

var commands = new SingleTimeCommands(instance);
commands.submit("Bulk", recorder -> {
recorder.bulkCopyBuffer(recorder.convert(sourceBuffers), recorder.convert(middleBuffers));
recorder.bulkBufferBarrier(ResourceUsage.TRANSFER_DEST, ResourceUsage.TRANSFER_SOURCE, recorder.convert(middleBuffers));
recorder.bulkTransitionLayout(null, ResourceUsage.TRANSFER_DEST, images1);
recorder.bulkCopyBufferToImage(images1, recorder.convert(middleBuffers));
recorder.bulkTransitionLayout(ResourceUsage.TRANSFER_DEST, ResourceUsage.TRANSFER_SOURCE, images1);
recorder.bulkTransitionLayout(null, ResourceUsage.TRANSFER_DEST, images2);
recorder.bulkCopyImage(images1, images2);
recorder.bulkTransitionLayout(ResourceUsage.TRANSFER_DEST, ResourceUsage.TRANSFER_SOURCE, images2);
recorder.bulkCopyImageToBuffer(images2, recorder.convert(destinationRanges));
});
commands.destroy();
for (int amount : new int[] { 0, 1, 200, 2002 }) {
var sourceBuffers = new MappedVkbBuffer[amount];
var middleBuffers = new VkbBuffer[amount];
var images1 = new VkbImage[amount];
var images2 = new VkbImage[amount];
var destinationBuffers = new MappedVkbBuffer[amount];
var destinationRanges = new MappedVkbBufferRange[amount];

for (int index = 0; index < amount; index++) {
sourceBuffers[index] = instance.buffers.createMapped(4L, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, "Source" + index);
sourceBuffers[index].fullMappedRange().intBuffer().put(index);
middleBuffers[index] = instance.buffers.create(
4L, VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, "Middle" + index
);
images1[index] = instance.images.createSimple(
1, 1, VK_FORMAT_R8G8B8A8_UNORM,
VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
VK_IMAGE_ASPECT_COLOR_BIT, "Test1Image" + index
);
images2[index] = instance.images.createSimple(
1, 1, VK_FORMAT_R8G8B8A8_UNORM,
VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
VK_IMAGE_ASPECT_COLOR_BIT, "Test2Image" + index
);
destinationBuffers[index] = instance.buffers.createMapped(4L, VK_BUFFER_USAGE_TRANSFER_DST_BIT, "Destination" + index);
destinationRanges[index] = destinationBuffers[index].fullMappedRange();
}

for (int index = 0; index < amount; index++) assertEquals(index, destinationRanges[index].intBuffer().get());
var commands = new SingleTimeCommands(instance);
commands.submit("Bulk", recorder -> {
recorder.bulkCopyBuffer(recorder.convert(sourceBuffers), recorder.convert(middleBuffers));
recorder.bulkBufferBarrier(ResourceUsage.TRANSFER_DEST, ResourceUsage.TRANSFER_SOURCE, recorder.convert(middleBuffers));
recorder.bulkTransitionLayout(null, ResourceUsage.TRANSFER_DEST, images1);
recorder.bulkCopyBufferToImage(images1, recorder.convert(middleBuffers));
recorder.bulkTransitionLayout(ResourceUsage.TRANSFER_DEST, ResourceUsage.TRANSFER_SOURCE, images1);
recorder.bulkTransitionLayout(null, ResourceUsage.TRANSFER_DEST, images2);
recorder.bulkCopyImage(images1, images2);
recorder.bulkTransitionLayout(ResourceUsage.TRANSFER_DEST, ResourceUsage.TRANSFER_SOURCE, images2);
recorder.bulkCopyImageToBuffer(images2, recorder.convert(destinationRanges));
});
commands.destroy();

for (int index = 0; index < amount; index++) assertEquals(index, destinationRanges[index].intBuffer().get());

for (var buffer : destinationBuffers) buffer.destroy(instance);
for (var image : images2) image.destroy(instance);
for (var image : images1) image.destroy(instance);
for (var buffer : middleBuffers) buffer.destroy(instance);
for (var buffer : sourceBuffers) buffer.destroy(instance);
}

for (var buffer : destinationBuffers) buffer.destroy(instance);
for (var image : images2) image.destroy(instance);
for (var image : images1) image.destroy(instance);
for (var buffer : middleBuffers) buffer.destroy(instance);
for (var buffer : sourceBuffers) buffer.destroy(instance);
instance.destroyInitialObjects();
}
}

0 comments on commit a3d99f4

Please sign in to comment.