Skip to content

Commit

Permalink
[VP] add missing preferred SLM setting
Browse files Browse the repository at this point in the history
add missing preferred SLM setting on MTL
  • Loading branch information
Alex1Zhang authored and intel-mediadev committed Aug 2, 2023
1 parent be4e290 commit 9e0c256
Show file tree
Hide file tree
Showing 7 changed files with 52 additions and 2 deletions.
1 change: 1 addition & 0 deletions media_common/agnostic/common/hw/mhw_render.h
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,7 @@ typedef struct _MHW_GPGPU_WALKER_PARAMS
uint32_t IndirectDataLength;
uint32_t IndirectDataStartAddress;
uint32_t BindingTableID;
uint32_t ForcePreferredSLMZero;
} MHW_GPGPU_WALKER_PARAMS, *PMHW_GPGPU_WALKER_PARAMS;

typedef struct _MHW_MEDIA_OBJECT_PARAMS
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1408,13 +1408,49 @@ namespace xe_hpg
//!< DWORD 6_7
struct
{
uint64_t Reserved192 : __CODEGEN_BITFIELD(0, 63); //!< Reserved
uint32_t PreferredSlmAllocationSizePerSubslice : __CODEGEN_BITFIELD(0, 3); //!< PREFERRED_SLM_ALLOCATION_SIZE_PER_SUBSLICE
uint64_t Reserved192 : __CODEGEN_BITFIELD(4, 63); //!< Reserved
};
uint64_t Value = 0;
} DW6_7;

//! \name Initializations

//! \brief SHARED_LOCAL_MEMORY_SIZE
//! \details
//! This field indicates how much Shared Local Memory the thread group
//! requires.
//! If the barriers are not enabled,HW will enable at least 1 barrier for
//! Mid thread preemption to work.
enum SHARED_LOCAL_MEMORY_SIZE
{
SHARED_LOCAL_MEMORY_SIZE_SLMENCODES0K = 0, //!< No additional details
SHARED_LOCAL_MEMORY_SIZE_SLMENCODES1K = 1, //!< No additional details
SHARED_LOCAL_MEMORY_SIZE_SLMENCODES2K = 2, //!< No additional details
SHARED_LOCAL_MEMORY_SIZE_SLMENCODES4K = 3, //!< No additional details
SHARED_LOCAL_MEMORY_SIZE_SLMENCODES8K = 4, //!< No additional details
SHARED_LOCAL_MEMORY_SIZE_SLMENCODES16K = 5, //!< No additional details
SHARED_LOCAL_MEMORY_SIZE_SLMENCODES32K = 6, //!< No additional details
SHARED_LOCAL_MEMORY_SIZE_SLMENCODES64K = 7, //!< No additional details
};

//! \brief PREFERRED_SLM_ALLOCATION_SIZE_PER_SUBSLICE
//! \details
//! For products where SLM and Subslice L1 cacheshares a common,
//! re-partitionable RAM, this field indicates the preferred SLM size per
//! Subslice for this dispatch. The SLM size programmed here should be >=
//! the per thread-group SLM size programmed in DW[5][20:16].
enum PREFERRED_SLM_ALLOCATION_SIZE_PER_SUBSLICE
{
PREFERRED_SLM_ALLOCATION_SIZE_PER_SUBSLICE_SLMENCODESMAX = 0x0, //!< No additional details
PREFERRED_SLM_ALLOCATION_SIZE_PER_SUBSLICE_SLMENCODES0K = 0x8, //!< No additional details
PREFERRED_SLM_ALLOCATION_SIZE_PER_SUBSLICE_SLMENCODES16K = 0x9, //!< No additional details
PREFERRED_SLM_ALLOCATION_SIZE_PER_SUBSLICE_SLMENCODES32K = 0xa, //!< No additional details
PREFERRED_SLM_ALLOCATION_SIZE_PER_SUBSLICE_SLMENCODES64K = 0xb, //!< No additional details
PREFERRED_SLM_ALLOCATION_SIZE_PER_SUBSLICE_SLMENCODES96K = 0xc, //!< No additional details
PREFERRED_SLM_ALLOCATION_SIZE_PER_SUBSLICE_SLMENCODES128K = 0xd, //!< No additional details
};

//! \brief Explicit member initialization function
INTERFACE_DESCRIPTOR_DATA_G12HP_CMD()
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,16 @@ class Impl : public render::Impl<mhw::render::xe_hpg::Cmd>
cmd.interface_descriptor_data.DW4.BindingTablePointer = MOS_ROUNDUP_SHIFT(params.dwBindingTableOffset, MHW_BINDING_TABLE_ID_SHIFT);
cmd.interface_descriptor_data.DW5.NumberOfThreadsInGpgpuThreadGroup = params.dwNumberofThreadsInGPGPUGroup;
cmd.interface_descriptor_data.DW5.SharedLocalMemorySize = params.dwSharedLocalMemorySize;

if (params.dwSharedLocalMemorySize > 0)
{
cmd.interface_descriptor_data.DW6_7.PreferredSlmAllocationSizePerSubslice = mhw::render::xe_hpg::Cmd::COMPUTE_WALKER_CMD::INTERFACE_DESCRIPTOR_DATA_G12HP_CMD::PREFERRED_SLM_ALLOCATION_SIZE_PER_SUBSLICE_SLMENCODES96K;
}
else // if (params.dwSharedLocalMemorySize == 0)
{
cmd.interface_descriptor_data.DW6_7.PreferredSlmAllocationSizePerSubslice = params.forcePreferredSLMZero ?
mhw::render::xe_hpg::Cmd::COMPUTE_WALKER_CMD::INTERFACE_DESCRIPTOR_DATA_G12HP_CMD::PREFERRED_SLM_ALLOCATION_SIZE_PER_SUBSLICE_SLMENCODESMAX :
mhw::render::xe_hpg::Cmd::COMPUTE_WALKER_CMD::INTERFACE_DESCRIPTOR_DATA_G12HP_CMD::PREFERRED_SLM_ALLOCATION_SIZE_PER_SUBSLICE_SLMENCODES0K;
}
// when Barriers is not 0, the EU fusion will close.
// Assigns barrier count.
if (params.bBarrierEnable)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1138,6 +1138,7 @@ MHW_SETPAR_DECL_SRC(COMPUTE_WALKER, XRenderHal_Interface_Xe_Hpg_Base)
params.dwNumberofThreadsInGPGPUGroup = m_interfaceDescriptorParams->dwNumberofThreadsInGPGPUGroup;
params.dwSharedLocalMemorySize = m_interfaceDescriptorParams->dwSharedLocalMemorySize;
params.IndirectDataStartAddress = m_gpgpuWalkerParams->IndirectDataStartAddress;
params.forcePreferredSLMZero = m_gpgpuWalkerParams->ForcePreferredSLMZero;

if (m_gpgpuWalkerParams->ThreadDepth == 0)
{
Expand Down
1 change: 1 addition & 0 deletions media_softlet/agnostic/common/hw/mhw_render_cmdpar.h
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,7 @@ struct _MHW_PAR_T(COMPUTE_WALKER)
bool bGlobalBarrierEnable = false; //! Enable Global Barrier (SKL+)
uint32_t dwNumberofThreadsInGPGPUGroup = 0; //! Number of threads per group
uint32_t dwSharedLocalMemorySize = 0; //! Size of SharedLocalMemory (SLM)
int32_t forcePreferredSLMZero = 0; //! force preferredSLM value as 0
int32_t iCrsThdConDataRdLn = 0;
PMHW_STATE_HEAP pGeneralStateHeap = 0; //! General state heap in use
MemoryBlock *memoryBlock = nullptr; //! Memory block associated with the state heap
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -919,6 +919,7 @@ MOS_STATUS RenderCmdPacket::PrepareComputeWalkerParams(KERNEL_WALKER_PARAMS para
// Indirect Data Length is a multiple of 64 bytes (size of L3 cacheline). Bits [5:0] are zero.
gpgpuWalker.IndirectDataLength = MOS_ALIGN_CEIL(params.iCurbeLength, 1 << MHW_COMPUTE_INDIRECT_SHIFT);
gpgpuWalker.BindingTableID = params.iBindingTable;
gpgpuWalker.ForcePreferredSLMZero = params.forcePreferredSLMZero;

return MOS_STATUS_SUCCESS;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ typedef struct _KERNEL_WALKER_PARAMS
bool bSyncFlag;
bool isGroupStartInvolvedInGroupSize; // true if group start need be involved in the group size.
bool calculateBlockXYByAlignedRect; // true if iBlocksX/iBlocksY is calculated by alignedRect in RenderCmdPacket instead of kernel object.
bool forcePreferredSLMZero; // true if preferredSLM need force to 0.
}KERNEL_WALKER_PARAMS, * PKERNEL_WALKER_PARAMS;

typedef struct _KERNEL_PACKET_RENDER_DATA
Expand Down

0 comments on commit 9e0c256

Please sign in to comment.