diff --git a/src/Sema.zig b/src/Sema.zig index 4fcb9c98c860..2d8858d7cd63 100644 --- a/src/Sema.zig +++ b/src/Sema.zig @@ -26703,7 +26703,7 @@ fn zirWorkItem( switch (target.cpu.arch) { // TODO: Allow for other GPU targets. - .amdgcn, .spirv, .spirv64, .spirv32 => {}, + .amdgcn, .spirv, .spirv64, .spirv32, .nvptx, .nvptx64 => {}, else => { return sema.fail(block, builtin_src, "builtin only available on GPU targets; targeted architecture is {s}", .{@tagName(target.cpu.arch)}); }, diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig index 6453ea04a323..e65615058440 100644 --- a/src/codegen/llvm.zig +++ b/src/codegen/llvm.zig @@ -10286,7 +10286,7 @@ pub const FuncGen = struct { return self.wip.cast(.addrspacecast, operand, try o.lowerType(inst_ty), ""); } - fn amdgcnWorkIntrinsic( + fn workIntrinsic( self: *FuncGen, dimension: u32, default: u32, @@ -10303,44 +10303,60 @@ pub const FuncGen = struct { fn airWorkItemId(self: *FuncGen, inst: Air.Inst.Index) !Builder.Value { const o = self.ng.object; const target = o.pt.zcu.getTarget(); - assert(target.cpu.arch == .amdgcn); // TODO is to port this function to other GPU architectures const pl_op = self.air.instructions.items(.data)[@intFromEnum(inst)].pl_op; const dimension = pl_op.payload; - return self.amdgcnWorkIntrinsic(dimension, 0, "amdgcn.workitem.id"); + + return switch (target.cpu.arch) { + .amdgcn => self.workIntrinsic(dimension, 0, "amdgcn.workitem.id"), + .nvptx, .nvptx64 => self.workIntrinsic(dimension, 0, "nvvm.read.ptx.sreg.tid"), + else => unreachable, + }; } fn airWorkGroupSize(self: *FuncGen, inst: Air.Inst.Index) !Builder.Value { const o = self.ng.object; const target = o.pt.zcu.getTarget(); - assert(target.cpu.arch == .amdgcn); // TODO is to port this function to other GPU architectures const pl_op = self.air.instructions.items(.data)[@intFromEnum(inst)].pl_op; const dimension = pl_op.payload; - if (dimension >= 3) return .@"1"; - - // Fetch the dispatch pointer, which points to this structure: - // https://github.com/RadeonOpenCompute/ROCR-Runtime/blob/adae6c61e10d371f7cbc3d0e94ae2c070cab18a4/src/inc/hsa.h#L2913 - const dispatch_ptr = - try self.wip.callIntrinsic(.normal, .none, .@"amdgcn.dispatch.ptr", &.{}, &.{}, ""); - // Load the work_group_* member from the struct as u16. - // Just treat the dispatch pointer as an array of u16 to keep things simple. - const workgroup_size_ptr = try self.wip.gep(.inbounds, .i16, dispatch_ptr, &.{ - try o.builder.intValue(try o.lowerType(Type.usize), 2 + dimension), - }, ""); - const workgroup_size_alignment = comptime Builder.Alignment.fromByteUnits(2); - return self.wip.load(.normal, .i16, workgroup_size_ptr, workgroup_size_alignment, ""); + switch (target.cpu.arch) { + .amdgcn => { + if (dimension >= 3) return .@"1"; + + // Fetch the dispatch pointer, which points to this structure: + // https://github.com/RadeonOpenCompute/ROCR-Runtime/blob/adae6c61e10d371f7cbc3d0e94ae2c070cab18a4/src/inc/hsa.h#L2913 + const dispatch_ptr = + try self.wip.callIntrinsic(.normal, .none, .@"amdgcn.dispatch.ptr", &.{}, &.{}, ""); + + // Load the work_group_* member from the struct as u16. + // Just treat the dispatch pointer as an array of u16 to keep things simple. + const workgroup_size_ptr = try self.wip.gep(.inbounds, .i16, dispatch_ptr, &.{ + try o.builder.intValue(try o.lowerType(Type.usize), 2 + dimension), + }, ""); + const workgroup_size_alignment = comptime Builder.Alignment.fromByteUnits(2); + return self.wip.load(.normal, .i16, workgroup_size_ptr, workgroup_size_alignment, ""); + }, + .nvptx, .nvptx64 => { + return self.workIntrinsic(dimension, 1, "nvvm.read.ptx.sreg.ntid"); + }, + else => unreachable, + } } fn airWorkGroupId(self: *FuncGen, inst: Air.Inst.Index) !Builder.Value { const o = self.ng.object; const target = o.pt.zcu.getTarget(); - assert(target.cpu.arch == .amdgcn); // TODO is to port this function to other GPU architectures const pl_op = self.air.instructions.items(.data)[@intFromEnum(inst)].pl_op; const dimension = pl_op.payload; - return self.amdgcnWorkIntrinsic(dimension, 0, "amdgcn.workgroup.id"); + + return switch (target.cpu.arch) { + .amdgcn => self.workIntrinsic(dimension, 0, "amdgcn.workgroup.id"), + .nvptx, .nvptx64 => self.workIntrinsic(dimension, 0, "nvvm.read.ptx.sreg.ctaid"), + else => unreachable, + }; } fn getErrorNameTable(self: *FuncGen) Allocator.Error!Builder.Variable.Index { diff --git a/src/codegen/llvm/Builder.zig b/src/codegen/llvm/Builder.zig index a23bc86c57a9..90da3bdd7a92 100644 --- a/src/codegen/llvm/Builder.zig +++ b/src/codegen/llvm/Builder.zig @@ -2729,6 +2729,17 @@ pub const Intrinsic = enum { @"amdgcn.workgroup.id.z", @"amdgcn.dispatch.ptr", + // NVPTX + @"nvvm.read.ptx.sreg.tid.x", + @"nvvm.read.ptx.sreg.tid.y", + @"nvvm.read.ptx.sreg.tid.z", + @"nvvm.read.ptx.sreg.ntid.x", + @"nvvm.read.ptx.sreg.ntid.y", + @"nvvm.read.ptx.sreg.ntid.z", + @"nvvm.read.ptx.sreg.ctaid.x", + @"nvvm.read.ptx.sreg.ctaid.y", + @"nvvm.read.ptx.sreg.ctaid.z", + // WebAssembly @"wasm.memory.size", @"wasm.memory.grow", @@ -3886,6 +3897,72 @@ pub const Intrinsic = enum { .attrs = &.{ .nocallback, .nofree, .nosync, .nounwind, .speculatable, .willreturn, .{ .memory = Attribute.Memory.all(.none) } }, }, + .@"nvvm.read.ptx.sreg.tid.x" = .{ + .ret_len = 1, + .params = &.{ + .{ .kind = .{ .type = .i32 } }, + }, + .attrs = &.{ .nounwind, .readnone }, + }, + .@"nvvm.read.ptx.sreg.tid.y" = .{ + .ret_len = 1, + .params = &.{ + .{ .kind = .{ .type = .i32 } }, + }, + .attrs = &.{ .nounwind, .readnone }, + }, + .@"nvvm.read.ptx.sreg.tid.z" = .{ + .ret_len = 1, + .params = &.{ + .{ .kind = .{ .type = .i32 } }, + }, + .attrs = &.{ .nounwind, .readnone }, + }, + + .@"nvvm.read.ptx.sreg.ntid.x" = .{ + .ret_len = 1, + .params = &.{ + .{ .kind = .{ .type = .i32 } }, + }, + .attrs = &.{ .nounwind, .readnone }, + }, + .@"nvvm.read.ptx.sreg.ntid.y" = .{ + .ret_len = 1, + .params = &.{ + .{ .kind = .{ .type = .i32 } }, + }, + .attrs = &.{ .nounwind, .readnone }, + }, + .@"nvvm.read.ptx.sreg.ntid.z" = .{ + .ret_len = 1, + .params = &.{ + .{ .kind = .{ .type = .i32 } }, + }, + .attrs = &.{ .nounwind, .readnone }, + }, + + .@"nvvm.read.ptx.sreg.ctaid.x" = .{ + .ret_len = 1, + .params = &.{ + .{ .kind = .{ .type = .i32 } }, + }, + .attrs = &.{ .nounwind, .readnone }, + }, + .@"nvvm.read.ptx.sreg.ctaid.y" = .{ + .ret_len = 1, + .params = &.{ + .{ .kind = .{ .type = .i32 } }, + }, + .attrs = &.{ .nounwind, .readnone }, + }, + .@"nvvm.read.ptx.sreg.ctaid.z" = .{ + .ret_len = 1, + .params = &.{ + .{ .kind = .{ .type = .i32 } }, + }, + .attrs = &.{ .nounwind, .readnone }, + }, + .@"wasm.memory.size" = .{ .ret_len = 1, .params = &.{