Skip to content

Commit

Permalink
Description formatting
Browse files Browse the repository at this point in the history
Co-authored-by: mofeing <[email protected]>
  • Loading branch information
jumerckx and mofeing committed Dec 31, 2023
1 parent 2b93af3 commit 7b2cfa7
Show file tree
Hide file tree
Showing 15 changed files with 869 additions and 2,202 deletions.
13 changes: 8 additions & 5 deletions deps/tblgen/jl-generators.cc
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,12 @@ namespace
description = std::regex_replace(description, std::regex("\n" + leading_spaces_str), "\n");
}
description = std::regex_replace(description, std::regex("(['\"$])"), "\\$1");
description = std::regex_replace(description, std::regex("(^|\n)(Example|Syntax):"), "$1# $2");

// remove trailing whitespaces and newlines
while (std::isspace(description.back())) {
description.pop_back();
}
return description;
}

Expand Down Expand Up @@ -146,12 +152,9 @@ end
attributes = NamedAttribute[{4}]
{5}
create_operation(
"{6}", location,
"{6}", location;
operands, owned_regions, successors, attributes,
results={7},
operands=operands,
owned_regions=owned_regions,
successors=successors,
attributes=attributes,
result_inference={8}
))"; // 0: results, 1: operands, 2: owned_regions, 3: successors, 4: attributes, 5: optionals, 6: opname, 7: results expression, 8: result_inference

Expand Down
80 changes: 20 additions & 60 deletions src/dialects/AMDGPU.jl
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ Note that `lds_barrier` does **not** force reads to or from global memory
to complete before execution continues. Therefore, it should be used when
operations on global memory can be issued far in advance of when their results
are used (for example, by writing them to LDS).
"""
function lds_barrier(; location=Location())
results = MLIRType[]
Expand All @@ -29,12 +28,9 @@ function lds_barrier(; location=Location())
attributes = NamedAttribute[]

create_operation(
"amdgpu.lds_barrier", location,
"amdgpu.lds_barrier", location;
operands, owned_regions, successors, attributes,
results=results,
operands=operands,
owned_regions=owned_regions,
successors=successors,
attributes=attributes,
result_inference=false
)
end
Expand Down Expand Up @@ -67,7 +63,6 @@ order (that is, v[0] will go to arg[7:0], v[1] to arg[15:8] and so on).
The negateA, negateB, and negateC flags are only supported for double-precision
operations on gfx940+.
"""
function mfma(sourceA::Value, sourceB::Value, destC::Value; destD::MLIRType, m::Union{Attribute, NamedAttribute}, n::Union{Attribute, NamedAttribute}, k::Union{Attribute, NamedAttribute}, blocks::Union{Attribute, NamedAttribute}, cbsz=nothing::Union{Nothing, Union{Attribute, NamedAttribute}}, abid=nothing::Union{Nothing, Union{Attribute, NamedAttribute}}, blgp=nothing::Union{Nothing, Union{Attribute, NamedAttribute}}, reducePrecision=nothing::Union{Nothing, Union{Attribute, NamedAttribute}}, negateA=nothing::Union{Nothing, Union{Attribute, NamedAttribute}}, negateB=nothing::Union{Nothing, Union{Attribute, NamedAttribute}}, negateC=nothing::Union{Nothing, Union{Attribute, NamedAttribute}}, location=Location())
results = MLIRType[destD, ]
Expand All @@ -84,12 +79,9 @@ function mfma(sourceA::Value, sourceB::Value, destC::Value; destD::MLIRType, m::
(negateC != nothing) && push!(attributes, namedattribute("negateC", negateC))

create_operation(
"amdgpu.mfma", location,
"amdgpu.mfma", location;
operands, owned_regions, successors, attributes,
results=results,
operands=operands,
owned_regions=owned_regions,
successors=successors,
attributes=attributes,
result_inference=false
)
end
Expand All @@ -112,7 +104,6 @@ Out of bounds atomic operations are ignored in hardware.
See `amdgpu.raw_buffer_load` for a description of how the underlying
instruction is constructed.
"""
function raw_buffer_atomic_cmpswap(src::Value, cmp::Value, memref::Value, indices::Vector{Value}, sgprOffset=nothing::Union{Nothing, Value}; value::MLIRType, boundsCheck=nothing::Union{Nothing, Union{Attribute, NamedAttribute}}, indexOffset=nothing::Union{Nothing, Union{Attribute, NamedAttribute}}, location=Location())
results = MLIRType[value, ]
Expand All @@ -126,12 +117,9 @@ function raw_buffer_atomic_cmpswap(src::Value, cmp::Value, memref::Value, indice
(indexOffset != nothing) && push!(attributes, namedattribute("indexOffset", indexOffset))

create_operation(
"amdgpu.raw_buffer_atomic_cmpswap", location,
"amdgpu.raw_buffer_atomic_cmpswap", location;
operands, owned_regions, successors, attributes,
results=results,
operands=operands,
owned_regions=owned_regions,
successors=successors,
attributes=attributes,
result_inference=false
)
end
Expand All @@ -155,7 +143,6 @@ Out of bounds atomic operations are ignored in hardware.
See `amdgpu.raw_buffer_load` for a description of how the underlying
instruction is constructed.
"""
function raw_buffer_atomic_fadd(value::Value, memref::Value, indices::Vector{Value}, sgprOffset=nothing::Union{Nothing, Value}; boundsCheck=nothing::Union{Nothing, Union{Attribute, NamedAttribute}}, indexOffset=nothing::Union{Nothing, Union{Attribute, NamedAttribute}}, location=Location())
results = MLIRType[]
Expand All @@ -169,12 +156,9 @@ function raw_buffer_atomic_fadd(value::Value, memref::Value, indices::Vector{Val
(indexOffset != nothing) && push!(attributes, namedattribute("indexOffset", indexOffset))

create_operation(
"amdgpu.raw_buffer_atomic_fadd", location,
"amdgpu.raw_buffer_atomic_fadd", location;
operands, owned_regions, successors, attributes,
results=results,
operands=operands,
owned_regions=owned_regions,
successors=successors,
attributes=attributes,
result_inference=false
)
end
Expand All @@ -197,7 +181,6 @@ Out of bounds atomic operations are ignored in hardware.
See `amdgpu.raw_buffer_load` for a description of how the underlying
instruction is constructed.
"""
function raw_buffer_atomic_fmax(value::Value, memref::Value, indices::Vector{Value}, sgprOffset=nothing::Union{Nothing, Value}; boundsCheck=nothing::Union{Nothing, Union{Attribute, NamedAttribute}}, indexOffset=nothing::Union{Nothing, Union{Attribute, NamedAttribute}}, location=Location())
results = MLIRType[]
Expand All @@ -211,12 +194,9 @@ function raw_buffer_atomic_fmax(value::Value, memref::Value, indices::Vector{Val
(indexOffset != nothing) && push!(attributes, namedattribute("indexOffset", indexOffset))

create_operation(
"amdgpu.raw_buffer_atomic_fmax", location,
"amdgpu.raw_buffer_atomic_fmax", location;
operands, owned_regions, successors, attributes,
results=results,
operands=operands,
owned_regions=owned_regions,
successors=successors,
attributes=attributes,
result_inference=false
)
end
Expand All @@ -239,7 +219,6 @@ Out of bounds atomic operations are ignored in hardware.
See `amdgpu.raw_buffer_load` for a description of how the underlying
instruction is constructed.
"""
function raw_buffer_atomic_smax(value::Value, memref::Value, indices::Vector{Value}, sgprOffset=nothing::Union{Nothing, Value}; boundsCheck=nothing::Union{Nothing, Union{Attribute, NamedAttribute}}, indexOffset=nothing::Union{Nothing, Union{Attribute, NamedAttribute}}, location=Location())
results = MLIRType[]
Expand All @@ -253,12 +232,9 @@ function raw_buffer_atomic_smax(value::Value, memref::Value, indices::Vector{Val
(indexOffset != nothing) && push!(attributes, namedattribute("indexOffset", indexOffset))

create_operation(
"amdgpu.raw_buffer_atomic_smax", location,
"amdgpu.raw_buffer_atomic_smax", location;
operands, owned_regions, successors, attributes,
results=results,
operands=operands,
owned_regions=owned_regions,
successors=successors,
attributes=attributes,
result_inference=false
)
end
Expand All @@ -281,7 +257,6 @@ Out of bounds atomic operations are ignored in hardware.
See `amdgpu.raw_buffer_load` for a description of how the underlying
instruction is constructed.
"""
function raw_buffer_atomic_umin(value::Value, memref::Value, indices::Vector{Value}, sgprOffset=nothing::Union{Nothing, Value}; boundsCheck=nothing::Union{Nothing, Union{Attribute, NamedAttribute}}, indexOffset=nothing::Union{Nothing, Union{Attribute, NamedAttribute}}, location=Location())
results = MLIRType[]
Expand All @@ -295,12 +270,9 @@ function raw_buffer_atomic_umin(value::Value, memref::Value, indices::Vector{Val
(indexOffset != nothing) && push!(attributes, namedattribute("indexOffset", indexOffset))

create_operation(
"amdgpu.raw_buffer_atomic_umin", location,
"amdgpu.raw_buffer_atomic_umin", location;
operands, owned_regions, successors, attributes,
results=results,
operands=operands,
owned_regions=owned_regions,
successors=successors,
attributes=attributes,
result_inference=false
)
end
Expand Down Expand Up @@ -336,7 +308,6 @@ are translated to intrinsic arguments as follows:
- If `boundsCheck` is false and the target chipset is RDNA, OOB_SELECT is set
to 2 to disable bounds checks, otherwise it is 3
- The cache coherency bits are off
"""
function raw_buffer_load(memref::Value, indices::Vector{Value}, sgprOffset=nothing::Union{Nothing, Value}; value::MLIRType, boundsCheck=nothing::Union{Nothing, Union{Attribute, NamedAttribute}}, indexOffset=nothing::Union{Nothing, Union{Attribute, NamedAttribute}}, location=Location())
results = MLIRType[value, ]
Expand All @@ -350,12 +321,9 @@ function raw_buffer_load(memref::Value, indices::Vector{Value}, sgprOffset=nothi
(indexOffset != nothing) && push!(attributes, namedattribute("indexOffset", indexOffset))

create_operation(
"amdgpu.raw_buffer_load", location,
"amdgpu.raw_buffer_load", location;
operands, owned_regions, successors, attributes,
results=results,
operands=operands,
owned_regions=owned_regions,
successors=successors,
attributes=attributes,
result_inference=false
)
end
Expand All @@ -379,7 +347,6 @@ components is partically completed is chipset-dependent.
See `amdgpu.raw_buffer_load` for a description of how the underlying
instruction is constructed.
"""
function raw_buffer_store(value::Value, memref::Value, indices::Vector{Value}, sgprOffset=nothing::Union{Nothing, Value}; boundsCheck=nothing::Union{Nothing, Union{Attribute, NamedAttribute}}, indexOffset=nothing::Union{Nothing, Union{Attribute, NamedAttribute}}, location=Location())
results = MLIRType[]
Expand All @@ -393,12 +360,9 @@ function raw_buffer_store(value::Value, memref::Value, indices::Vector{Value}, s
(indexOffset != nothing) && push!(attributes, namedattribute("indexOffset", indexOffset))

create_operation(
"amdgpu.raw_buffer_store", location,
"amdgpu.raw_buffer_store", location;
operands, owned_regions, successors, attributes,
results=results,
operands=operands,
owned_regions=owned_regions,
successors=successors,
attributes=attributes,
result_inference=false
)
end
Expand All @@ -419,7 +383,6 @@ containing only 8 valid values:
The `clamp` flag is used to saturate the output of type T to numeric_limits<T>::max()
in case of overflow.
"""
function wmma(sourceA::Value, sourceB::Value, destC::Value; destD::MLIRType, subwordOffset=nothing::Union{Nothing, Union{Attribute, NamedAttribute}}, unsignedA=nothing::Union{Nothing, Union{Attribute, NamedAttribute}}, unsignedB=nothing::Union{Nothing, Union{Attribute, NamedAttribute}}, clamp=nothing::Union{Nothing, Union{Attribute, NamedAttribute}}, location=Location())
results = MLIRType[destD, ]
Expand All @@ -433,12 +396,9 @@ function wmma(sourceA::Value, sourceB::Value, destC::Value; destD::MLIRType, sub
(clamp != nothing) && push!(attributes, namedattribute("clamp", clamp))

create_operation(
"amdgpu.wmma", location,
"amdgpu.wmma", location;
operands, owned_regions, successors, attributes,
results=results,
operands=operands,
owned_regions=owned_regions,
successors=successors,
attributes=attributes,
result_inference=false
)
end
Expand Down
Loading

0 comments on commit 7b2cfa7

Please sign in to comment.