Skip to content

Commit

Permalink
Merge pull request #187 from billhollings/master
Browse files Browse the repository at this point in the history
 Support larger VkBufferViews by using 2D Metal textures.
  • Loading branch information
billhollings authored Jun 28, 2018
2 parents 699839b + 42d4272 commit 5943aae
Show file tree
Hide file tree
Showing 11 changed files with 70 additions and 79 deletions.
2 changes: 1 addition & 1 deletion ExternalRevisions/SPIRV-Cross_repo_revision
Original file line number Diff line number Diff line change
@@ -1 +1 @@
e59cc244958af6059f7bd1d16d833e17409dec37
d67e586b2e16a46a5cc1515093e8a04bff31c594
6 changes: 4 additions & 2 deletions MoltenVK/MoltenVK/API/vk_mvk_moltenvk.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ extern "C" {
*/
#define MVK_VERSION_MAJOR 1
#define MVK_VERSION_MINOR 0
#define MVK_VERSION_PATCH 12
#define MVK_VERSION_PATCH 13

#define MVK_MAKE_VERSION(major, minor, patch) (((major) * 10000) + ((minor) * 100) + (patch))
#define MVK_VERSION MVK_MAKE_VERSION(MVK_VERSION_MAJOR, MVK_VERSION_MINOR, MVK_VERSION_PATCH)
Expand All @@ -69,7 +69,8 @@ typedef struct {
VkBool32 shaderConversionFlipVertexY; /**< If enabled, MSL vertex shader code created during Runtime Shader Conversion will flip the Y-axis of each vertex, as Vulkan coordinate system is inverse of OpenGL. Default is true. */
VkBool32 synchronousQueueSubmits; /**< If enabled, queue command submissions (vkQueueSubmit() & vkQueuePresentKHR()) will be processed on the thread that called the submission function. If disabled, processing will be dispatched to a GCD dispatch_queue whose priority is determined by VkDeviceQueueCreateInfo::pQueuePriorities during vkCreateDevice(). This setting affects how much command processing should be performed on the rendering thread, or offloaded to a secondary thread. Default value is false, and command processing will be handled on a prioritizable queue thread. */
VkBool32 supportLargeQueryPools; /**< Metal allows only 8192 occlusion queries per MTLBuffer. If enabled, MoltenVK allocates a MTLBuffer for each query pool, allowing each query pool to support 8192 queries, which may slow performance or cause unexpected behaviour if the query pool is not established prior to a Metal renderpass, or if the query pool is changed within a Metal renderpass. If disabled, one MTLBuffer will be shared by all query pools, which improves performance, but limits the total device queries to 8192. Default value is true. */
VkBool32 presentWithCommandBuffer; /**< If enabled, each surface presentation is scheduled using a command buffer. Enabling this may improve rendering frame synchronization, but may result in reduced frame rates. Default value is false if the MVK_PRESENT_WITHOUT_COMMAND_BUFFER build setting is defined when MoltenVK is compiled, and true otherwise. By default the MVK_PRESENT_WITHOUT_COMMAND_BUFFER build setting is not defined and the value of this setting is true. */
VkBool32 presentWithCommandBuffer; /**< If enabled, each surface presentation is scheduled using a command buffer. Enabling this setting may improve rendering frame synchronization, but may result in reduced frame rates. Default value is false if the MVK_PRESENT_WITHOUT_COMMAND_BUFFER build setting is defined when MoltenVK is compiled, and true otherwise. By default the MVK_PRESENT_WITHOUT_COMMAND_BUFFER build setting is not defined and the value of this setting is true. */
VkBool32 swapchainMagFilterUseNearest; /**< If enabled, swapchain images will use simple Nearest sampling when magnifying the swapchain image to fit a physical display surface. If disabled, swapchain images will use Linear sampling when magnifying the swapchain image to fit a physical display surface. Enabling this setting avoids smearing effects when swapchain images are simple interger multiples of display pixels (eg- macOS Retina, and typical of graphics apps and games), but may cause aliasing effects when using non-integer display scaling. Default value is true. */
VkBool32 displayWatermark; /**< If enabled, a MoltenVK logo watermark will be rendered on top of the scene. This can be enabled for publicity during demos. Default value is true if the MVK_DISPLAY_WATERMARK build setting is defined when MoltenVK is compiled, and false otherwise. By default the MVK_DISPLAY_WATERMARK build setting is not defined. */
VkBool32 performanceTracking; /**< If enabled, per-frame performance statistics are tracked, optionally logged, and can be retrieved via the vkGetSwapchainPerformanceMVK() function, and various performance statistics are tracked, logged, and can be retrieved via the vkGetPerformanceStatisticsMVK() function. Default value is true in the presence of the DEBUG build setting, and false otherwise. */
uint32_t performanceLoggingFrameCount; /**< If non-zero, performance statistics will be periodically logged to the console, on a repeating cycle of this many frames per swapchain. The performanceTracking capability must also be enabled. Default value is 300 in the presence of the DEBUG build setting, and zero otherwise. */
Expand All @@ -87,6 +88,7 @@ typedef struct {
VkBool32 texelBuffers; /**< If true, texel buffers are supported, allowing the contents of a buffer to be interpreted as an image via a VkBufferView. */
VkBool32 depthClipMode; /**< If true, the device supports both depth clipping and depth clamping per the depthClampEnable flag of VkPipelineRasterizationStateCreateInfo in VkGraphicsPipelineCreateInfo. */
VkBool32 presentModeImmediate; /**< If true, the device supports immediate surface present mode (VK_PRESENT_MODE_IMMEDIATE_KHR), allowing a swapchain image to be presented immediately, without waiting for the vertical sync period of the display. */
uint32_t maxTextureDimension; /**< The maximum size of each texture dimension (width, height, or depth). */
uint32_t maxPerStageBufferCount; /**< The total number of per-stage Metal buffers available for shader uniform content and attributes. */
uint32_t maxPerStageTextureCount; /**< The total number of per-stage Metal textures available for shader uniform content. */
uint32_t maxPerStageSamplerCount; /**< The total number of per-stage Metal samplers available for shader uniform content. */
Expand Down
19 changes: 3 additions & 16 deletions MoltenVK/MoltenVK/GPUObjects/MVKBuffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,21 +85,8 @@ class MVKBufferView : public MVKBaseDeviceObject {

public:


#pragma mark Resource memory

/** Returns the number of bytes used by this buffer view. */
inline VkDeviceSize getByteCount() { return _byteCount; };


#pragma mark Metal

/** Returns the Metal buffer underlying this memory allocation. */
inline id<MTLBuffer> getMTLBuffer() { return _buffer->getMTLBuffer(); }

/** Returns the offset at which the contents of this instance starts within the underlying Metal buffer. */
inline NSUInteger getMTLBufferOffset() { return _mtlBufferOffset; }

/** Returns a Metal texture that overlays this buffer view. */
id<MTLTexture> getMTLTexture();

Expand All @@ -112,10 +99,10 @@ class MVKBufferView : public MVKBaseDeviceObject {

protected:
MVKBuffer* _buffer;
id<MTLTexture> _mtlTexture;
MTLPixelFormat _mtlPixelFormat;
NSUInteger _mtlBufferOffset;
MTLPixelFormat _mtlPixelFormat;
id<MTLTexture> _mtlTexture;
VkDeviceSize _byteCount;
NSUInteger _mtlBytesPerRow;
VkExtent2D _textureSize;
std::mutex _lock;
};
Expand Down
29 changes: 17 additions & 12 deletions MoltenVK/MoltenVK/GPUObjects/MVKBuffer.mm
Original file line number Diff line number Diff line change
Expand Up @@ -111,15 +111,13 @@
lock_guard<mutex> lock(_lock);
if (_mtlTexture) { return _mtlTexture; }

VkDeviceSize byteAlign = _device->_pProperties->limits.minTexelBufferOffsetAlignment;
NSUInteger mtlByteCnt = mvkAlignByteOffset(_byteCount, byteAlign);
MTLTextureDescriptor* mtlTexDesc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat: _mtlPixelFormat
width: _textureSize.width
height: _textureSize.height
mipmapped: NO];
_mtlTexture = [getMTLBuffer() newTextureWithDescriptor: mtlTexDesc
offset: _mtlBufferOffset
bytesPerRow: mtlByteCnt];
_mtlTexture = [_buffer->getMTLBuffer() newTextureWithDescriptor: mtlTexDesc
offset: _mtlBufferOffset
bytesPerRow: _mtlBytesPerRow];
}
return _mtlTexture;
}
Expand All @@ -131,18 +129,25 @@
_buffer = (MVKBuffer*)pCreateInfo->buffer;
_mtlBufferOffset = _buffer->getMTLBufferOffset() + pCreateInfo->offset;
_mtlPixelFormat = mtlPixelFormatFromVkFormat(pCreateInfo->format);
_mtlTexture = nil;
VkExtent2D fmtBlockSize = mvkVkFormatBlockTexelSize(pCreateInfo->format); // Pixel size of format
size_t bytesPerBlock = mvkVkFormatBytesPerBlock(pCreateInfo->format);
_mtlTexture = nil;

// Layout texture as a 1D array of texel blocks (which are texels for non-compressed textures) that covers the bytes
_byteCount = pCreateInfo->range;
if (_byteCount == VK_WHOLE_SIZE) { _byteCount = _buffer->getByteCount() - _mtlBufferOffset; } // Remaining bytes in buffer
size_t blockCount = _byteCount / bytesPerBlock;
_byteCount = blockCount * bytesPerBlock; // Round down
VkDeviceSize byteCount = pCreateInfo->range;
if (byteCount == VK_WHOLE_SIZE) { byteCount = _buffer->getByteCount() - _mtlBufferOffset; } // Remaining bytes in buffer
size_t blockCount = byteCount / bytesPerBlock;

// But Metal requires the texture to be a 2D texture. Determine the number of 2D rows we need and their width.
size_t maxBlocksPerRow = _device->_pMetalFeatures->maxTextureDimension / fmtBlockSize.width;
size_t blocksPerRow = min(blockCount, maxBlocksPerRow);
_mtlBytesPerRow = mvkAlignByteOffset(blocksPerRow * bytesPerBlock, _device->_pProperties->limits.minTexelBufferOffsetAlignment);

size_t rowCount = blockCount / blocksPerRow;
if (blockCount % blocksPerRow) { rowCount++; }

_textureSize.width = (uint32_t)blockCount * fmtBlockSize.width;
_textureSize.height = fmtBlockSize.height;
_textureSize.width = uint32_t(blocksPerRow * fmtBlockSize.width);
_textureSize.height = uint32_t(rowCount * fmtBlockSize.height);

if ( !_device->_pMetalFeatures->texelBuffers ) {
setConfigurationResult(mvkNotifyErrorWithText(VK_ERROR_FEATURE_NOT_PRESENT, "Texel buffers are not supported on this device."));
Expand Down
47 changes: 19 additions & 28 deletions MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm
Original file line number Diff line number Diff line change
Expand Up @@ -319,16 +319,12 @@
_metalFeatures.mtlBufferAlignment = 64;
_metalFeatures.mtlCopyBufferAlignment = 1;
_metalFeatures.texelBuffers = true;

if ( [_mtlDevice supportsFeatureSet: MTLFeatureSet_iOS_GPUFamily3_v1] ) {
_metalFeatures.indirectDrawing = true;
_metalFeatures.baseVertexInstanceDrawing = true;
_metalFeatures.mtlBufferAlignment = 16; // Min float4 alignment for typical vertex buffers. MTLBuffer may go down to 4 bytes for other data.
}
_metalFeatures.maxTextureDimension = (4 * KIBI);

if ( [_mtlDevice supportsFeatureSet: MTLFeatureSet_iOS_GPUFamily1_v2] ) {
_metalFeatures.mslVersion = SPIRVToMSLConverterOptions::makeMSLVersion(1, 1);
_metalFeatures.dynamicMTLBuffers = true;
_metalFeatures.maxTextureDimension = (8 * KIBI);
}
if ( [_mtlDevice supportsFeatureSet: MTLFeatureSet_iOS_GPUFamily1_v3] ) {
_metalFeatures.mslVersion = SPIRVToMSLConverterOptions::makeMSLVersion(1, 2);
Expand All @@ -341,6 +337,13 @@
if ( [_mtlDevice supportsFeatureSet: MTLFeatureSet_iOS_GPUFamily2_v4] ) {
_metalFeatures.depthClipMode = true;
}

if ( [_mtlDevice supportsFeatureSet: MTLFeatureSet_iOS_GPUFamily3_v1] ) {
_metalFeatures.indirectDrawing = true;
_metalFeatures.baseVertexInstanceDrawing = true;
_metalFeatures.mtlBufferAlignment = 16; // Min float4 alignment for typical vertex buffers. MTLBuffer may go down to 4 bytes for other data.
_metalFeatures.maxTextureDimension = (16 * KIBI);
}
#endif

#if MVK_MACOS
Expand All @@ -352,6 +355,7 @@
_metalFeatures.baseVertexInstanceDrawing = true;
_metalFeatures.ioSurfaces = true;
_metalFeatures.depthClipMode = true;
_metalFeatures.maxTextureDimension = (16 * KIBI);

if ( [_mtlDevice supportsFeatureSet: MTLFeatureSet_macOS_GPUFamily1_v2] ) {
_metalFeatures.mslVersion = SPIRVToMSLConverterOptions::makeMSLVersion(1, 2);
Expand Down Expand Up @@ -513,29 +517,15 @@
_properties.limits.sampledImageStencilSampleCounts = _metalFeatures.supportedSampleCounts;
_properties.limits.storageImageSampleCounts = _metalFeatures.supportedSampleCounts;

uint32_t maxTextureDimension;
#if MVK_IOS
if ( [_mtlDevice supportsFeatureSet: MTLFeatureSet_iOS_GPUFamily3_v1] ) {
maxTextureDimension = (16 * KIBI);
} else if ( [_mtlDevice supportsFeatureSet: MTLFeatureSet_iOS_GPUFamily1_v2] ) {
maxTextureDimension = (8 * KIBI);
} else {
maxTextureDimension = (4 * KIBI);
}
#endif
#if MVK_MACOS
maxTextureDimension = (16 * KIBI);
#endif

_properties.limits.maxImageDimension1D = maxTextureDimension;
_properties.limits.maxImageDimension2D = maxTextureDimension;
_properties.limits.maxImageDimensionCube = maxTextureDimension;
_properties.limits.maxFramebufferWidth = maxTextureDimension;
_properties.limits.maxFramebufferHeight = maxTextureDimension;
_properties.limits.maxImageDimension1D = _metalFeatures.maxTextureDimension;
_properties.limits.maxImageDimension2D = _metalFeatures.maxTextureDimension;
_properties.limits.maxImageDimensionCube = _metalFeatures.maxTextureDimension;
_properties.limits.maxFramebufferWidth = _metalFeatures.maxTextureDimension;
_properties.limits.maxFramebufferHeight = _metalFeatures.maxTextureDimension;
_properties.limits.maxFramebufferLayers = 256;

_properties.limits.maxViewportDimensions[0] = maxTextureDimension;
_properties.limits.maxViewportDimensions[1] = maxTextureDimension;
_properties.limits.maxViewportDimensions[0] = _metalFeatures.maxTextureDimension;
_properties.limits.maxViewportDimensions[1] = _metalFeatures.maxTextureDimension;
float maxVPDim = max(_properties.limits.maxViewportDimensions[0], _properties.limits.maxViewportDimensions[1]);
_properties.limits.viewportBoundsRange[0] = (-2.0 * maxVPDim);
_properties.limits.viewportBoundsRange[1] = (2.0 * maxVPDim) - 1;
Expand Down Expand Up @@ -569,7 +559,7 @@
_properties.limits.maxDescriptorSetSampledImages = (_properties.limits.maxPerStageDescriptorSampledImages * 2);
_properties.limits.maxDescriptorSetStorageImages = (_properties.limits.maxPerStageDescriptorStorageImages * 2);

_properties.limits.maxTexelBufferElements = (uint32_t)_metalFeatures.maxMTLBufferSize;
_properties.limits.maxTexelBufferElements = _properties.limits.maxImageDimension2D * _properties.limits.maxImageDimension2D;
_properties.limits.maxUniformBufferRange = (uint32_t)_metalFeatures.maxMTLBufferSize;
_properties.limits.maxStorageBufferRange = (uint32_t)_metalFeatures.maxMTLBufferSize;
_properties.limits.maxPushConstantsSize = (4 * KIBI);
Expand Down Expand Up @@ -1451,6 +1441,7 @@
pCfg->supportLargeQueryPools = true;
pCfg->shaderConversionFlipVertexY = true;
pCfg->presentWithCommandBuffer = MVK_PRESENT_WITH_COMMAND_BUFFER_BOOL;
pCfg->swapchainMagFilterUseNearest = true;
pCfg->displayWatermark = MVK_DISPLAY_WATERMARK_BOOL;
pCfg->performanceTracking = MVK_DEBUG;
pCfg->performanceLoggingFrameCount = MVK_DEBUG ? 300 : 0;
Expand Down
2 changes: 2 additions & 0 deletions MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm
Original file line number Diff line number Diff line change
Expand Up @@ -357,6 +357,7 @@
const VkGraphicsPipelineCreateInfo* pCreateInfo) {

shaderContext.options.mslVersion = _device->_pMetalFeatures->mslVersion;
shaderContext.options.texelBufferTextureWidth = _device->_pMetalFeatures->maxTextureDimension;

MVKPipelineLayout* layout = (MVKPipelineLayout*)pCreateInfo->layout;
layout->populateShaderConverterContext(shaderContext);
Expand Down Expand Up @@ -501,6 +502,7 @@ void serialize(Archive & archive, SPIRVToMSLConverterOptions& opt) {
archive(opt.entryPointName,
opt.entryPointStage,
opt.mslVersion,
opt.texelBufferTextureWidth,
opt.shouldFlipVertexY,
opt.isRenderingPoints);
}
Expand Down
1 change: 1 addition & 0 deletions MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ class MVKSwapchain : public MVKBaseDeviceObject {
protected:
friend class MVKSwapchainImage;

void initCAMetalLayer(const VkSwapchainCreateInfoKHR* pCreateInfo);
void initSurfaceImages(const VkSwapchainCreateInfoKHR* pCreateInfo);
void initFrameIntervalTracking();
void releaseUndisplayedSurfaces();
Expand Down
38 changes: 19 additions & 19 deletions MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.mm
Original file line number Diff line number Diff line change
Expand Up @@ -183,36 +183,36 @@
MVKSwapchain* oldSwapchain = (MVKSwapchain*)pCreateInfo->oldSwapchain;
if (oldSwapchain) { oldSwapchain->releaseUndisplayedSurfaces(); }

// Get the layer underlying the surface view, which must be a CAMetalLayer.
initCAMetalLayer(pCreateInfo);
initSurfaceImages(pCreateInfo);
initFrameIntervalTracking();

_licenseWatermark = NULL;
}

// Initializes the CAMetalLayer underlying the surface of this swapchain.
void MVKSwapchain::initCAMetalLayer(const VkSwapchainCreateInfoKHR* pCreateInfo) {

MVKSurface* mvkSrfc = (MVKSurface*)pCreateInfo->surface;
_mtlLayer = mvkSrfc->getCAMetalLayer();
_mtlLayer.device = getMTLDevice();
_mtlLayer.pixelFormat = mtlPixelFormatFromVkFormat(pCreateInfo->imageFormat);
_mtlLayer.framebufferOnly = !mvkIsAnyFlagEnabled(pCreateInfo->imageUsage, (VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
VK_IMAGE_USAGE_TRANSFER_DST_BIT |
VK_IMAGE_USAGE_SAMPLED_BIT |
VK_IMAGE_USAGE_STORAGE_BIT));

if (pCreateInfo->presentMode == VK_PRESENT_MODE_IMMEDIATE_KHR) {
_mtlLayer.displaySyncEnabledMVK = NO;
} else {
_mtlLayer.displaySyncEnabledMVK = YES;
}
_mtlLayer.displaySyncEnabledMVK = (pCreateInfo->presentMode != VK_PRESENT_MODE_IMMEDIATE_KHR);
_mtlLayer.magnificationFilter = _device->_mvkConfig.swapchainMagFilterUseNearest ? kCAFilterNearest : kCAFilterLinear;
_mtlLayer.framebufferOnly = !mvkIsAnyFlagEnabled(pCreateInfo->imageUsage, (VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
VK_IMAGE_USAGE_TRANSFER_DST_BIT |
VK_IMAGE_USAGE_SAMPLED_BIT |
VK_IMAGE_USAGE_STORAGE_BIT));

// TODO: set additional CAMetalLayer properties before extracting drawables:
// - presentsWithTransaction
// - maximumDrawableCount (maybe for MAILBOX?)
// - drawsAsynchronously
// - colorspace (macOS only) Vulkan only supports sRGB colorspace for now.
// - wantsExtendedDynamicRangeContent (macOS only)

initSurfaceImages(pCreateInfo);
initFrameIntervalTracking();

_licenseWatermark = NULL;
// - colorspace (macOS only) Vulkan only supports sRGB colorspace for now.
// - wantsExtendedDynamicRangeContent (macOS only)
}

/** Initializes the array of images used for the surfaces of this swapchain. */
// Initializes the array of images used for the surface of this swapchain.
void MVKSwapchain::initSurfaceImages(const VkSwapchainCreateInfoKHR* pCreateInfo) {

_mtlLayerOrigDrawSize = _mtlLayer.updatedDrawableSizeMVK;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ bool contains(const vector<T>& vec, const T& val) {
MVK_PUBLIC_SYMBOL bool SPIRVToMSLConverterOptions::matches(const SPIRVToMSLConverterOptions& other) const {
if (entryPointStage != other.entryPointStage) { return false; }
if (mslVersion != other.mslVersion) { return false; }
if (texelBufferTextureWidth != other.texelBufferTextureWidth) { return false; }
if (!!shouldFlipVertexY != !!other.shouldFlipVertexY) { return false; }
if (!!isRenderingPoints != !!other.isRenderingPoints) { return false; }
if (entryPointName != other.entryPointName) { return false; }
Expand Down Expand Up @@ -199,6 +200,7 @@ MVK_PUBLIC_SYMBOL bool SPIRVToMSLConverter::convert(SPIRVToMSLConverterContext&
#endif

mslOpts.msl_version = context.options.mslVersion;
mslOpts.texel_buffer_texture_width = context.options.texelBufferTextureWidth;
mslOpts.enable_point_size_builtin = context.options.isRenderingPoints;
mslOpts.resolve_specialized_array_lengths = true;
pMSLCompiler->set_msl_options(mslOpts);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ namespace mvk {
spv::ExecutionModel entryPointStage = spv::ExecutionModelMax;

uint32_t mslVersion = makeMSLVersion(2);
uint32_t texelBufferTextureWidth = 4096;
bool shouldFlipVertexY = true;
bool isRenderingPoints = false;

Expand Down
Loading

0 comments on commit 5943aae

Please sign in to comment.