Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

nvptx: add implementations for GPU builtins #21030

Merged
merged 1 commit into from
Aug 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/Sema.zig
Original file line number Diff line number Diff line change
Expand Up @@ -26703,7 +26703,7 @@ fn zirWorkItem(

switch (target.cpu.arch) {
// TODO: Allow for other GPU targets.
.amdgcn, .spirv, .spirv64, .spirv32 => {},
.amdgcn, .spirv, .spirv64, .spirv32, .nvptx, .nvptx64 => {},
else => {
return sema.fail(block, builtin_src, "builtin only available on GPU targets; targeted architecture is {s}", .{@tagName(target.cpu.arch)});
},
Expand Down
54 changes: 35 additions & 19 deletions src/codegen/llvm.zig
Original file line number Diff line number Diff line change
Expand Up @@ -10286,7 +10286,7 @@ pub const FuncGen = struct {
return self.wip.cast(.addrspacecast, operand, try o.lowerType(inst_ty), "");
}

fn amdgcnWorkIntrinsic(
fn workIntrinsic(
self: *FuncGen,
dimension: u32,
default: u32,
Expand All @@ -10303,44 +10303,60 @@ pub const FuncGen = struct {
fn airWorkItemId(self: *FuncGen, inst: Air.Inst.Index) !Builder.Value {
const o = self.ng.object;
const target = o.pt.zcu.getTarget();
assert(target.cpu.arch == .amdgcn); // TODO is to port this function to other GPU architectures

const pl_op = self.air.instructions.items(.data)[@intFromEnum(inst)].pl_op;
const dimension = pl_op.payload;
return self.amdgcnWorkIntrinsic(dimension, 0, "amdgcn.workitem.id");

return switch (target.cpu.arch) {
.amdgcn => self.workIntrinsic(dimension, 0, "amdgcn.workitem.id"),
.nvptx, .nvptx64 => self.workIntrinsic(dimension, 0, "nvvm.read.ptx.sreg.tid"),
else => unreachable,
};
}

fn airWorkGroupSize(self: *FuncGen, inst: Air.Inst.Index) !Builder.Value {
const o = self.ng.object;
const target = o.pt.zcu.getTarget();
assert(target.cpu.arch == .amdgcn); // TODO is to port this function to other GPU architectures

const pl_op = self.air.instructions.items(.data)[@intFromEnum(inst)].pl_op;
const dimension = pl_op.payload;
if (dimension >= 3) return .@"1";

// Fetch the dispatch pointer, which points to this structure:
// https://github.com/RadeonOpenCompute/ROCR-Runtime/blob/adae6c61e10d371f7cbc3d0e94ae2c070cab18a4/src/inc/hsa.h#L2913
const dispatch_ptr =
try self.wip.callIntrinsic(.normal, .none, .@"amdgcn.dispatch.ptr", &.{}, &.{}, "");

// Load the work_group_* member from the struct as u16.
// Just treat the dispatch pointer as an array of u16 to keep things simple.
const workgroup_size_ptr = try self.wip.gep(.inbounds, .i16, dispatch_ptr, &.{
try o.builder.intValue(try o.lowerType(Type.usize), 2 + dimension),
}, "");
const workgroup_size_alignment = comptime Builder.Alignment.fromByteUnits(2);
return self.wip.load(.normal, .i16, workgroup_size_ptr, workgroup_size_alignment, "");
switch (target.cpu.arch) {
.amdgcn => {
if (dimension >= 3) return .@"1";

// Fetch the dispatch pointer, which points to this structure:
// https://github.com/RadeonOpenCompute/ROCR-Runtime/blob/adae6c61e10d371f7cbc3d0e94ae2c070cab18a4/src/inc/hsa.h#L2913
const dispatch_ptr =
try self.wip.callIntrinsic(.normal, .none, .@"amdgcn.dispatch.ptr", &.{}, &.{}, "");

// Load the work_group_* member from the struct as u16.
// Just treat the dispatch pointer as an array of u16 to keep things simple.
const workgroup_size_ptr = try self.wip.gep(.inbounds, .i16, dispatch_ptr, &.{
try o.builder.intValue(try o.lowerType(Type.usize), 2 + dimension),
}, "");
const workgroup_size_alignment = comptime Builder.Alignment.fromByteUnits(2);
return self.wip.load(.normal, .i16, workgroup_size_ptr, workgroup_size_alignment, "");
},
.nvptx, .nvptx64 => {
return self.workIntrinsic(dimension, 1, "nvvm.read.ptx.sreg.ntid");
},
else => unreachable,
}
}

fn airWorkGroupId(self: *FuncGen, inst: Air.Inst.Index) !Builder.Value {
const o = self.ng.object;
const target = o.pt.zcu.getTarget();
assert(target.cpu.arch == .amdgcn); // TODO is to port this function to other GPU architectures

const pl_op = self.air.instructions.items(.data)[@intFromEnum(inst)].pl_op;
const dimension = pl_op.payload;
return self.amdgcnWorkIntrinsic(dimension, 0, "amdgcn.workgroup.id");

return switch (target.cpu.arch) {
.amdgcn => self.workIntrinsic(dimension, 0, "amdgcn.workgroup.id"),
.nvptx, .nvptx64 => self.workIntrinsic(dimension, 0, "nvvm.read.ptx.sreg.ctaid"),
else => unreachable,
};
}

fn getErrorNameTable(self: *FuncGen) Allocator.Error!Builder.Variable.Index {
Expand Down
77 changes: 77 additions & 0 deletions src/codegen/llvm/Builder.zig
Original file line number Diff line number Diff line change
Expand Up @@ -2729,6 +2729,17 @@ pub const Intrinsic = enum {
@"amdgcn.workgroup.id.z",
@"amdgcn.dispatch.ptr",

// NVPTX
@"nvvm.read.ptx.sreg.tid.x",
@"nvvm.read.ptx.sreg.tid.y",
@"nvvm.read.ptx.sreg.tid.z",
@"nvvm.read.ptx.sreg.ntid.x",
@"nvvm.read.ptx.sreg.ntid.y",
@"nvvm.read.ptx.sreg.ntid.z",
@"nvvm.read.ptx.sreg.ctaid.x",
@"nvvm.read.ptx.sreg.ctaid.y",
@"nvvm.read.ptx.sreg.ctaid.z",

// WebAssembly
@"wasm.memory.size",
@"wasm.memory.grow",
Expand Down Expand Up @@ -3886,6 +3897,72 @@ pub const Intrinsic = enum {
.attrs = &.{ .nocallback, .nofree, .nosync, .nounwind, .speculatable, .willreturn, .{ .memory = Attribute.Memory.all(.none) } },
},

.@"nvvm.read.ptx.sreg.tid.x" = .{
.ret_len = 1,
.params = &.{
.{ .kind = .{ .type = .i32 } },
},
.attrs = &.{ .nounwind, .readnone },
},
.@"nvvm.read.ptx.sreg.tid.y" = .{
.ret_len = 1,
.params = &.{
.{ .kind = .{ .type = .i32 } },
},
.attrs = &.{ .nounwind, .readnone },
},
.@"nvvm.read.ptx.sreg.tid.z" = .{
.ret_len = 1,
.params = &.{
.{ .kind = .{ .type = .i32 } },
},
.attrs = &.{ .nounwind, .readnone },
},

.@"nvvm.read.ptx.sreg.ntid.x" = .{
.ret_len = 1,
.params = &.{
.{ .kind = .{ .type = .i32 } },
},
.attrs = &.{ .nounwind, .readnone },
},
.@"nvvm.read.ptx.sreg.ntid.y" = .{
.ret_len = 1,
.params = &.{
.{ .kind = .{ .type = .i32 } },
},
.attrs = &.{ .nounwind, .readnone },
},
.@"nvvm.read.ptx.sreg.ntid.z" = .{
.ret_len = 1,
.params = &.{
.{ .kind = .{ .type = .i32 } },
},
.attrs = &.{ .nounwind, .readnone },
},

.@"nvvm.read.ptx.sreg.ctaid.x" = .{
.ret_len = 1,
.params = &.{
.{ .kind = .{ .type = .i32 } },
},
.attrs = &.{ .nounwind, .readnone },
},
.@"nvvm.read.ptx.sreg.ctaid.y" = .{
.ret_len = 1,
.params = &.{
.{ .kind = .{ .type = .i32 } },
},
.attrs = &.{ .nounwind, .readnone },
},
.@"nvvm.read.ptx.sreg.ctaid.z" = .{
.ret_len = 1,
.params = &.{
.{ .kind = .{ .type = .i32 } },
},
.attrs = &.{ .nounwind, .readnone },
},

.@"wasm.memory.size" = .{
.ret_len = 1,
.params = &.{
Expand Down