diff --git a/GPCS4/Graphics/Gnm/GnmCommandBufferDraw.cpp b/GPCS4/Graphics/Gnm/GnmCommandBufferDraw.cpp index e4fa95f70..c06c0f852 100644 --- a/GPCS4/Graphics/Gnm/GnmCommandBufferDraw.cpp +++ b/GPCS4/Graphics/Gnm/GnmCommandBufferDraw.cpp @@ -35,6 +35,11 @@ namespace sce::Gnm __debugbreak(); \ } +// Dump the recompiled shader to file +// so that we can analyze it using spirv toolset. +// #define SHADER_DUMP_FILE + + GnmCommandBufferDraw::GnmCommandBufferDraw(vlt::VltDevice* device) : GnmCommandBuffer(device) { @@ -397,6 +402,17 @@ namespace sce::Gnm }; m_context->setBlendMode(rtSlot, blend); + + VltLogicOpState loState; + loState.enableLogicOp = VK_FALSE; + loState.logicOp = VK_LOGIC_OP_NO_OP; + + VltMultisampleState msState; + msState.enableAlphaToCoverage = VK_FALSE; + msState.sampleMask = 0xFFFFFFFF; + + m_context->setLogicOpState(loState); + m_context->setMultisampleState(msState); } void GnmCommandBufferDraw::setDepthStencilControl(DepthStencilControl depthControl) @@ -489,9 +505,9 @@ namespace sce::Gnm void GnmCommandBufferDraw::drawIndexAuto(uint32_t indexCount, DrawModifier modifier) { - m_state.ia.indexBuffer = generateIndexBuffer(indexCount); // If the index size is currently 32 bits, this command will partially set it to 16 bits - m_state.ia.indexType = VK_INDEX_TYPE_UINT16; + m_state.ia.indexType = VK_INDEX_TYPE_UINT16; + m_state.ia.indexBuffer = generateIndexBufferAuto(indexCount); commitGraphicsState(); @@ -530,7 +546,7 @@ namespace sce::Gnm { //commitComputeState(); - m_context->dispatch(threadGroupX, threadGroupY, threadGroupZ); + //m_context->dispatch(threadGroupX, threadGroupY, threadGroupZ); } void GnmCommandBufferDraw::dispatchWithOrderedAppend(uint32_t threadGroupX, uint32_t threadGroupY, uint32_t threadGroupZ, DispatchOrderedAppendMode orderedAppendMode) @@ -658,16 +674,19 @@ namespace sce::Gnm VltBufferCreateInfo info = {}; info.size = size; info.usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; - info.stages = VK_PIPELINE_STAGE_VERTEX_INPUT_BIT; + info.stages = VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT; info.access = VK_ACCESS_INDEX_READ_BIT; Rc buffer = m_device->createBuffer(info, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); - m_context->uploadBuffer(buffer, data); + m_context->updateBuffer(buffer, + 0, + size, + data); return buffer; } - Rc GnmCommandBufferDraw::generateIndexBuffer(uint32_t indexCount) + Rc GnmCommandBufferDraw::generateIndexBufferAuto(uint32_t indexCount) { // Auto-generated indexes are forced in 16 bits width. std::vector indexes; @@ -743,11 +762,14 @@ namespace sce::Gnm m_factory.createBuffer(info, buffer); m_tracker->track(buffer); - m_context->uploadBuffer(buffer.buffer, vsharp->getBaseAddress()); - m_context->bindVertexBuffer( - binding, - VltBufferSlice(buffer.buffer, 0, buffer.buffer->info().size), - vsharp->getStride()); + m_context->updateBuffer(buffer.buffer, + 0, + vsharp->getSize(), + vsharp->getBaseAddress()); + + m_context->bindVertexBuffer(binding, + VltBufferSlice(buffer.buffer, 0, buffer.buffer->info().size), + vsharp->getStride()); } void GnmCommandBufferDraw::updateVertexBinding(GcnModule& vsModule) @@ -880,8 +902,11 @@ namespace sce::Gnm VK_SHADER_STAGE_VERTEX_BIT, shader); +#ifdef SHADER_DUMP_FILE std::ofstream fout(shader->key().toString(), std::ios::binary); shader->dump(fout); +#endif + } void GnmCommandBufferDraw::updatePixelShaderStage() @@ -902,11 +927,13 @@ namespace sce::Gnm // bind the shader auto shader = psModule.compile(ctx.meta); m_context->bindShader( - VK_SHADER_STAGE_COMPUTE_BIT, + VK_SHADER_STAGE_FRAGMENT_BIT, shader); +#ifdef SHADER_DUMP_FILE std::ofstream fout(shader->key().toString(), std::ios::binary); shader->dump(fout); +#endif } void GnmCommandBufferDraw::commitGraphicsState() @@ -951,7 +978,7 @@ namespace sce::Gnm info.access = access; uint32_t slot = 0; - if (usage == VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT) + if (usage & VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT) { info.memoryType = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; @@ -973,7 +1000,9 @@ namespace sce::Gnm m_factory.createBuffer(info, buffer); - m_context->uploadBuffer(buffer.buffer, + m_context->updateBuffer(buffer.buffer, + 0, + buffer.gnmBuffer.getSize(), buffer.gnmBuffer.getBaseAddress()); slot = computeResourceBinding( @@ -1026,11 +1055,15 @@ namespace sce::Gnm subresourceLayers.baseArrayLayer = 0; subresourceLayers.layerCount = 1; - m_context->uploadImage( + uint32_t bytesPerElement = tsharp->getDataFormat().getBytesPerElement(); + uint32_t pitchInBytes = surfaceInfo.m_pitch * bytesPerElement; + m_context->updateImage( image, subresourceLayers, + VkOffset3D{}, + image->info().extent, tsharp->getBaseAddress(), - tsharp->getPitch(), + pitchInBytes, surfaceInfo.m_surfaceSize); uint32_t slot = computeResourceBinding( @@ -1071,9 +1104,9 @@ namespace sce::Gnm bindResourceBuffer( vsharp, res.startRegister, - VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, + VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, stage, - VK_ACCESS_UNIFORM_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT); + VK_ACCESS_UNIFORM_READ_BIT); } break; case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: diff --git a/GPCS4/Graphics/Gnm/GnmCommandBufferDraw.h b/GPCS4/Graphics/Gnm/GnmCommandBufferDraw.h index b6bde8d34..9e082156f 100644 --- a/GPCS4/Graphics/Gnm/GnmCommandBufferDraw.h +++ b/GPCS4/Graphics/Gnm/GnmCommandBufferDraw.h @@ -160,7 +160,7 @@ namespace sce::Gnm const void* data, uint32_t size); - vlt::Rc generateIndexBuffer( + vlt::Rc generateIndexBufferAuto( uint32_t indexCount); inline void bindVertexBuffer( diff --git a/GPCS4/Graphics/Gnm/GnmDataFormat.cpp b/GPCS4/Graphics/Gnm/GnmDataFormat.cpp index ef851de46..fe1bfa30d 100644 --- a/GPCS4/Graphics/Gnm/GnmDataFormat.cpp +++ b/GPCS4/Graphics/Gnm/GnmDataFormat.cpp @@ -400,7 +400,7 @@ DataFormat DataFormat::build(StencilFormat sFmt, TextureChannelType channelType) uint32_t DataFormat::getBytesPerElement(void) const { - return getBitsPerElement() * 8; + return getBitsPerElement() / 8; } uint32_t DataFormat::getBitsPerElement(void) const diff --git a/GPCS4/Graphics/Violet/VltContext.cpp b/GPCS4/Graphics/Violet/VltContext.cpp index 6b356c997..bd9628f81 100644 --- a/GPCS4/Graphics/Violet/VltContext.cpp +++ b/GPCS4/Graphics/Violet/VltContext.cpp @@ -793,6 +793,147 @@ namespace sce::vlt m_state.cb.framebuffer->setStencilClearValue(clearValue); } + void VltContext::updateBuffer( + const Rc& buffer, + VkDeviceSize offset, + VkDeviceSize size, + const void* data) + { + + this->endRendering(); + + VltBufferSliceHandle bufferSlice = buffer->getSliceHandle(offset, size); + VltCmdType cmdBuffer = VltCmdType::ExecBuffer; + + if (m_execBarriers.isBufferDirty(bufferSlice, VltAccess::Write)) + m_execBarriers.recordCommands(m_cmd); + + // Vulkan specifies that small amounts of data (up to 64kB) can + // be copied to a buffer directly if the size is a multiple of + // four. Anything else must be copied through a staging buffer. + // We'll limit the size to 4kB in order to keep command buffers + // reasonably small, we do not know how much data apps may upload. + if ((size <= 4096) && ((size & 0x3) == 0) && ((offset & 0x3) == 0)) + { + m_cmd->cmdUpdateBuffer( + cmdBuffer, + bufferSlice.handle, + bufferSlice.offset, + bufferSlice.length, + data); + } + else + { + auto stagingSlice = m_staging.alloc(CACHE_LINE_SIZE, size); + auto stagingHandle = stagingSlice.getSliceHandle(); + + std::memcpy(stagingHandle.mapPtr, data, size); + + VkBufferCopy region; + region.srcOffset = stagingHandle.offset; + region.dstOffset = bufferSlice.offset; + region.size = size; + + m_cmd->cmdCopyBuffer(cmdBuffer, + stagingHandle.handle, bufferSlice.handle, 1, ®ion); + + m_cmd->trackResource(stagingSlice.buffer()); + } + + m_execBarriers.accessBuffer( + bufferSlice, + VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_ACCESS_TRANSFER_WRITE_BIT, + buffer->info().stages, + buffer->info().access); + + m_cmd->trackResource(buffer); + } + + void VltContext::updateImage( + const Rc& image, + const VkImageSubresourceLayers& subresources, + VkOffset3D imageOffset, + VkExtent3D imageExtent, + const void* data, + VkDeviceSize pitchPerRow, + VkDeviceSize pitchPerLayer) + { + this->endRendering(); + + // Upload data through a staging buffer. Special care needs to + // be taken when dealing with compressed image formats: Rather + // than copying pixels, we'll be copying blocks of pixels. + const VltFormatInfo* formatInfo = image->formatInfo(); + + // Align image extent to a full block. This is necessary in + // case the image size is not a multiple of the block size. + VkExtent3D elementCount = vutil::computeBlockCount( + imageExtent, formatInfo->blockSize); + elementCount.depth *= subresources.layerCount; + + // Allocate staging buffer memory for the image data. The + // pixels or blocks will be tightly packed within the buffer. + auto stagingSlice = m_staging.alloc(CACHE_LINE_SIZE, + formatInfo->elementSize * vutil::flattenImageExtent(elementCount)); + auto stagingHandle = stagingSlice.getSliceHandle(); + vutil::packImageData(stagingHandle.mapPtr, data, + elementCount, formatInfo->elementSize, + pitchPerRow, pitchPerLayer); + + // Prepare the image layout. If the given extent covers + // the entire image, we may discard its previous contents. + auto subresourceRange = vutil::makeSubresourceRange(subresources); + subresourceRange.aspectMask = formatInfo->aspectMask; + + if (m_execBarriers.isImageDirty(image, subresourceRange, VltAccess::Write)) + m_execBarriers.recordCommands(m_cmd); + + // Initialize the image if the entire subresource is covered + VkImageLayout imageLayoutInitial = image->info().layout; + VkImageLayout imageLayoutTransfer = image->pickLayout(VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + + if (image->isFullSubresource(subresources, imageExtent)) + imageLayoutInitial = VK_IMAGE_LAYOUT_UNDEFINED; + + m_execAcquires.accessImage( + image, subresourceRange, + imageLayoutInitial, 0, 0, + imageLayoutTransfer, + VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_ACCESS_TRANSFER_WRITE_BIT); + + m_execAcquires.recordCommands(m_cmd); + + // Copy contents of the staging buffer into the image. + // Since our source data is tightly packed, we do not + // need to specify any strides. + VkBufferImageCopy region; + region.bufferOffset = stagingHandle.offset; + region.bufferRowLength = 0; + region.bufferImageHeight = 0; + region.imageSubresource = subresources; + region.imageOffset = imageOffset; + region.imageExtent = imageExtent; + + m_cmd->cmdCopyBufferToImage(VltCmdType::ExecBuffer, + stagingHandle.handle, image->handle(), + imageLayoutTransfer, 1, ®ion); + + // Transition image back into its optimal layout + m_execBarriers.accessImage( + image, subresourceRange, + imageLayoutTransfer, + VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_ACCESS_TRANSFER_WRITE_BIT, + image->info().layout, + image->info().stages, + image->info().access); + + m_cmd->trackResource(image); + m_cmd->trackResource(stagingSlice.buffer()); + } + void VltContext::uploadBuffer( const Rc& buffer, const void* data) @@ -1007,6 +1148,8 @@ namespace sce::vlt if (!m_flags.test(VltContextFlag::GpRenderingActive) && framebuffer != nullptr) { + m_execBarriers.recordCommands(m_cmd); + const VltFramebufferSize fbSize = framebuffer->size(); VkRect2D renderArea; @@ -1544,4 +1687,5 @@ namespace sce::vlt } + } // namespace sce::vlt \ No newline at end of file diff --git a/GPCS4/Graphics/Violet/VltContext.h b/GPCS4/Graphics/Violet/VltContext.h index f04960e90..b40ce3c93 100644 --- a/GPCS4/Graphics/Violet/VltContext.h +++ b/GPCS4/Graphics/Violet/VltContext.h @@ -384,6 +384,41 @@ namespace sce::vlt void setStencilClearValue( VkClearValue clearValue); + /** + * \brief Updates a buffer + * + * Copies data from the host into a buffer. + * \param [in] buffer Destination buffer + * \param [in] offset Offset of sub range to update + * \param [in] size Length of sub range to update + * \param [in] data Data to upload + */ + void updateBuffer( + const Rc& buffer, + VkDeviceSize offset, + VkDeviceSize size, + const void* data); + + /** + * \brief Updates an image + * + * Copies data from the host into an image. + * \param [in] image Destination image + * \param [in] subsresources Image subresources to update + * \param [in] imageOffset Offset of the image area to update + * \param [in] imageExtent Size of the image area to update + * \param [in] data Source data + * \param [in] pitchPerRow Row pitch of the source data + * \param [in] pitchPerLayer Layer pitch of the source data + */ + void updateImage( + const Rc& image, + const VkImageSubresourceLayers& subresources, + VkOffset3D imageOffset, + VkExtent3D imageExtent, + const void* data, + VkDeviceSize pitchPerRow, + VkDeviceSize pitchPerLayer); /** * \brief Uses transfer queue to initialize buffer diff --git a/GPCS4/Graphics/Violet/VltUtil.cpp b/GPCS4/Graphics/Violet/VltUtil.cpp index 70706c190..5dccbe5c3 100644 --- a/GPCS4/Graphics/Violet/VltUtil.cpp +++ b/GPCS4/Graphics/Violet/VltUtil.cpp @@ -52,7 +52,8 @@ namespace sce::vlt::vutil const VkDeviceSize bytesPerLayer = blockCount.height * bytesPerRow; const VkDeviceSize bytesTotal = blockCount.depth * bytesPerLayer; - const bool directCopy = ((bytesPerRow == pitchPerRow) || (blockCount.height == 1)) && ((bytesPerLayer == pitchPerLayer) || (blockCount.depth == 1)); + const bool directCopy = ((bytesPerRow == pitchPerRow) || (blockCount.height == 1)) && + ((bytesPerLayer == pitchPerLayer) || (blockCount.depth == 1)); if (directCopy) {