Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: Accelerate Unsafe CAS Intrinsics on Z #20308

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion runtime/compiler/env/j9method.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5560,7 +5560,7 @@ TR_J9MethodBase::isUnsafeCAS(TR::Compilation * c)
case TR::jdk_internal_misc_Unsafe_compareAndExchangeReference:
{
TR_ASSERT_FATAL(c, "comp should not be NULL");
return (c->target().cpu.isPower() || c->target().cpu.isX86());
return (c->target().cpu.isPower() || c->target().cpu.isX86() || c->target().cpu.isZ());
}
case TR::sun_misc_Unsafe_compareAndSwapInt_jlObjectJII_Z:
case TR::sun_misc_Unsafe_compareAndSwapLong_jlObjectJJJ_Z:
Expand Down
6 changes: 3 additions & 3 deletions runtime/compiler/optimizer/InlinerTempForJ9.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -406,7 +406,7 @@ TR_J9InlinerPolicy::alwaysWorthInlining(TR_ResolvedMethod * calleeMethod, TR::No
case TR::jdk_internal_misc_Unsafe_compareAndExchangeInt:
case TR::jdk_internal_misc_Unsafe_compareAndExchangeLong:
case TR::jdk_internal_misc_Unsafe_compareAndExchangeReference:
if (comp()->target().cpu.isPower() || comp()->target().cpu.isX86())
if (comp()->target().cpu.isPower() || comp()->target().cpu.isX86() || comp()->target().cpu.isZ())
{
return false;
}
Expand All @@ -427,7 +427,7 @@ TR_J9InlinerPolicy::alwaysWorthInlining(TR_ResolvedMethod * calleeMethod, TR::No
* failed the isInlineableJNI check and should not be force inlined.
*/
case TR::jdk_internal_misc_Unsafe_compareAndExchangeObject:
if (comp()->target().cpu.isPower() || comp()->target().cpu.isX86())
if (comp()->target().cpu.isPower() || comp()->target().cpu.isX86() || comp()->target().cpu.isZ())
{
return !calleeMethod->isNative();
}
Expand Down Expand Up @@ -2669,7 +2669,7 @@ TR_J9InlinerPolicy::inlineUnsafeCall(TR::ResolvedMethodSymbol *calleeSymbol, TR:
case TR::jdk_internal_misc_Unsafe_compareAndExchangeLong:
case TR::jdk_internal_misc_Unsafe_compareAndExchangeObject:
case TR::jdk_internal_misc_Unsafe_compareAndExchangeReference:
if (disableCAEIntrinsic || !(comp()->target().cpu.isPower() || comp()->target().cpu.isX86()))
if (disableCAEIntrinsic || !(comp()->target().cpu.isPower() || comp()->target().cpu.isX86() || comp()->target().cpu.isZ()))
{
break;
}
Expand Down
45 changes: 45 additions & 0 deletions runtime/compiler/z/codegen/J9CodeGenerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3802,6 +3802,7 @@ J9::Z::CodeGenerator::inlineDirectCall(
}

static const char * enableTRTRE = feGetEnv("TR_enableTRTRE");
static bool disableCAEIntrinsic = feGetEnv("TR_DisableCAEIntrinsic") != NULL;
switch (methodSymbol->getRecognizedMethod())
{
case TR::sun_misc_Unsafe_compareAndSwapInt_jlObjectJII_Z:
Expand All @@ -3816,6 +3817,7 @@ J9::Z::CodeGenerator::inlineDirectCall(
resultReg = TR::TreeEvaluator::VMinlineCompareAndSwap(node, cg, TR::InstOpCode::CS, IS_NOT_OBJ);
return true;
}
break;

case TR::sun_misc_Unsafe_compareAndSwapLong_jlObjectJJJ_Z:
// As above, we only want to inline the JNI methods, so add an explicit test for isNative()
Expand All @@ -3842,6 +3844,49 @@ J9::Z::CodeGenerator::inlineDirectCall(
}
break;

case TR::jdk_internal_misc_Unsafe_compareAndExchangeInt:
if ((!TR::Compiler->om.canGenerateArraylets() || node->isUnsafeGetPutCASCallOnNonArray()) && node->isSafeForCGToFastPathUnsafeCall())
{
if (!disableCAEIntrinsic)
{
resultReg = TR::TreeEvaluator::VMinlineCompareAndSwap(node, cg, TR::InstOpCode::CS, IS_NOT_OBJ, true);
return true;
}
}
break;

case TR::jdk_internal_misc_Unsafe_compareAndExchangeLong:
if (comp->target().is64Bit() && (!TR::Compiler->om.canGenerateArraylets() || node->isUnsafeGetPutCASCallOnNonArray()) && node->isSafeForCGToFastPathUnsafeCall())
{
if (!disableCAEIntrinsic)
{
resultReg = TR::TreeEvaluator::VMinlineCompareAndSwap(node, cg, TR::InstOpCode::CSG, IS_NOT_OBJ, true);
return true;
}
}
// Too risky to do Long-31bit version now.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should move this comment close to where we only do the transformation for 64-bit.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not exactly sure when you want me to move this. The comment is right after the block with the comp->target().is64Bit() check.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think right before the condition for 64-bit would be the appropriate place. I would not have thought of else part for 31-bit.

break;

case TR::jdk_internal_misc_Unsafe_compareAndExchangeObject:
/*
* Starting from Java 12, compareAndExchangeObject was changed from a native call to a
* Java wrapper calling compareAndExchangeReference.
* We only want to inline the JNI native method, so add an explicit test for isNative().
*/
if (!methodSymbol->isNative())
break;
// If native, fall through.
case TR::jdk_internal_misc_Unsafe_compareAndExchangeReference:
if ((!TR::Compiler->om.canGenerateArraylets() || node->isUnsafeGetPutCASCallOnNonArray()) && node->isSafeForCGToFastPathUnsafeCall())
{
if (!disableCAEIntrinsic)
{
resultReg = TR::TreeEvaluator::VMinlineCompareAndSwap(node, cg, (comp->useCompressedPointers() ? TR::InstOpCode::CS : TR::InstOpCode::getCmpAndSwapOpCode()), IS_OBJ, true);
return true;
}
}
break;

case TR::java_util_concurrent_atomic_AtomicBoolean_getAndSet:
case TR::java_util_concurrent_atomic_AtomicInteger_getAndAdd:
case TR::java_util_concurrent_atomic_AtomicInteger_getAndIncrement:
Expand Down
66 changes: 43 additions & 23 deletions runtime/compiler/z/codegen/J9TreeEvaluator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11949,12 +11949,11 @@ void J9::Z::TreeEvaluator::genWrtbarForArrayCopy(TR::Node *node, TR::Register *s
}

TR::Register*
J9::Z::TreeEvaluator::VMinlineCompareAndSwap(TR::Node *node, TR::CodeGenerator *cg, TR::InstOpCode::Mnemonic casOp, bool isObj)
J9::Z::TreeEvaluator::VMinlineCompareAndSwap(TR::Node *node, TR::CodeGenerator *cg, TR::InstOpCode::Mnemonic casOp, bool isObj, bool isExchange)
{
TR::Register *scratchReg = NULL;
TR::Register *objReg, *oldVReg, *newVReg;
TR::Register *resultReg = cg->allocateRegister();
TR::LabelSymbol *doneLabel = generateLabelSymbol(cg);
TR::Register *resultReg = isExchange ? NULL : cg->allocateRegister();
r30shah marked this conversation as resolved.
Show resolved Hide resolved
TR::MemoryReference* casMemRef = NULL;

TR::Compilation * comp = cg->comp();
Expand Down Expand Up @@ -12021,11 +12020,11 @@ J9::Z::TreeEvaluator::VMinlineCompareAndSwap(TR::Node *node, TR::CodeGenerator *
oldVReg = cg->gprClobberEvaluate(oldVNode); // CS oldReg, newReg, OFF(objReg)
newVReg = cg->evaluate(newVNode); // oldReg is clobbered

TR::Register* compressedValueRegister = newVReg;
TR::Register* decompressedValueRegister = newVReg;

if (isValueCompressedReference)
{
compressedValueRegister = cg->evaluate(decompressedValueNode);
decompressedValueRegister = cg->evaluate(decompressedValueNode);
}

bool needsDup = false;
Expand All @@ -12036,17 +12035,20 @@ J9::Z::TreeEvaluator::VMinlineCompareAndSwap(TR::Node *node, TR::CodeGenerator *
newVReg = cg->allocateCollectedReferenceRegister();
generateRRInstruction(cg, TR::InstOpCode::getLoadRegOpCode(), node, newVReg, objReg);
if (!isValueCompressedReference)
compressedValueRegister = newVReg;
decompressedValueRegister = newVReg;

needsDup = true;
}

generateRIInstruction(cg, TR::InstOpCode::getLoadHalfWordImmOpCode(), node, resultReg, 0x0);
if (!isExchange)
{
generateRIInstruction(cg, TR::InstOpCode::getLoadHalfWordImmOpCode(), node, resultReg, 0x0);
}

// We can run into trouble when the offset value gets too big, or it may
// simply not nbe known at compile time.
// simply not be known at compile time.
//
if (offsetNode->getOpCode().isLoadConst() && offsetNode->getRegister()==NULL)
if (offsetNode->getOpCode().isLoadConst() && offsetNode->getRegister() == NULL)
{
// We know at compile time
intptr_t offsetValue = offsetNode->getLongInt();
Expand All @@ -12064,7 +12066,7 @@ J9::Z::TreeEvaluator::VMinlineCompareAndSwap(TR::Node *node, TR::CodeGenerator *
{
scratchReg = cg->gprClobberEvaluate(offsetNode);

generateRRInstruction(cg, TR::InstOpCode::getAddRegOpCode(), node, scratchReg,objReg);
generateRRInstruction(cg, TR::InstOpCode::getAddRegOpCode(), node, scratchReg, objReg);
casMemRef = generateS390MemoryReference(scratchReg, 0, cg);
}

Expand Down Expand Up @@ -12094,14 +12096,29 @@ J9::Z::TreeEvaluator::VMinlineCompareAndSwap(TR::Node *node, TR::CodeGenerator *

// Setup return
//
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BNE, node, doneLabel);

generateRIInstruction(cg, TR::InstOpCode::getLoadHalfWordImmOpCode(), node, resultReg, 0x1);
if (isExchange)
{
resultReg = oldVReg;
if (isObj)
{
resultReg->setContainsCollectedReference();
if (TR::Compiler->om.compressedReferenceShiftOffset() != 0)
{
generateRSInstruction(cg, TR::InstOpCode::SLLG, node, resultReg, resultReg, TR::Compiler->om.compressedReferenceShiftOffset());
}
}
}
else
{
TR::LabelSymbol *doneLabel = generateLabelSymbol(cg);
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BNE, node, doneLabel);

TR::RegisterDependencyConditions* cond = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 1, cg);
cond->addPostCondition(resultReg, TR::RealRegister::AssignAny);
generateRIInstruction(cg, TR::InstOpCode::getLoadHalfWordImmOpCode(), node, resultReg, 0x1);
TR::RegisterDependencyConditions* cond = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 1, cg);
cond->addPostCondition(resultReg, TR::RealRegister::AssignAny);

generateS390LabelInstruction(cg, TR::InstOpCode::label, node, doneLabel, cond);
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, doneLabel, cond);
hzongaro marked this conversation as resolved.
Show resolved Hide resolved
}

// Do wrtbar for Objects
//
Expand All @@ -12118,10 +12135,10 @@ J9::Z::TreeEvaluator::VMinlineCompareAndSwap(TR::Node *node, TR::CodeGenerator *
TR::Register *raReg = cg->allocateRegister();
TR::RegisterDependencyConditions* condWrtBar = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 5, cg);
condWrtBar->addPostCondition(objReg, TR::RealRegister::GPR1);
if (compressedValueRegister != newVReg)
if (decompressedValueRegister != newVReg)
condWrtBar->addPostCondition(newVReg, TR::RealRegister::AssignAny); //defect 92001
if (compressedValueRegister != objReg) // add this because I got conflicting dependencies on GPR1 and GPR2!
condWrtBar->addPostCondition(compressedValueRegister, TR::RealRegister::GPR2); //defect 92001
if (decompressedValueRegister != objReg) // add this because I got conflicting dependencies on GPR1 and GPR2!
condWrtBar->addPostCondition(decompressedValueRegister, TR::RealRegister::GPR2); //defect 92001
condWrtBar->addPostCondition(epReg, cg->getEntryPointRegister());
condWrtBar->addPostCondition(raReg, cg->getReturnAddressRegister());
// Cardmarking is not inlined for gencon. Consider doing so when perf issue arises.
Expand All @@ -12134,9 +12151,8 @@ J9::Z::TreeEvaluator::VMinlineCompareAndSwap(TR::Node *node, TR::CodeGenerator *
wbRef = comp->getSymRefTab()->findOrCreateWriteBarrierStoreGenerationalSymbolRef(comp->getMethodSymbol());
else
wbRef = comp->getSymRefTab()->findOrCreateWriteBarrierStoreSymbolRef(comp->getMethodSymbol());
VMnonNullSrcWrtBarCardCheckEvaluator(node, objReg, compressedValueRegister, epReg, raReg, doneLabelWrtBar, wbRef, condWrtBar, cg, false);
VMnonNullSrcWrtBarCardCheckEvaluator(node, objReg, decompressedValueRegister, epReg, raReg, doneLabelWrtBar, wbRef, condWrtBar, cg, false);
}

else if (doCrdMrk)
{
VMCardCheckEvaluator(node, objReg, epReg, condWrtBar, cg, false, doneLabelWrtBar, false);
Expand All @@ -12150,6 +12166,8 @@ J9::Z::TreeEvaluator::VMinlineCompareAndSwap(TR::Node *node, TR::CodeGenerator *
cg->stopUsingRegister(raReg);
}

node->setRegister(resultReg);

// Value is not used, and not eval'd to avoid the extra reg
// So recursively decrement to compensate
//
Expand All @@ -12160,7 +12178,10 @@ J9::Z::TreeEvaluator::VMinlineCompareAndSwap(TR::Node *node, TR::CodeGenerator *
cg->decReferenceCount(oldVNode);
cg->decReferenceCount(newVNode);

cg->stopUsingRegister(oldVReg);
if (!isExchange)
{
cg->stopUsingRegister(oldVReg);
}

if (needsDup)
{
Expand All @@ -12174,7 +12195,6 @@ J9::Z::TreeEvaluator::VMinlineCompareAndSwap(TR::Node *node, TR::CodeGenerator *
if (isValueCompressedReference)
cg->decReferenceCount(decompressedValueNode);

node->setRegister(resultReg);
return resultReg;
}

Expand Down
2 changes: 1 addition & 1 deletion runtime/compiler/z/codegen/J9TreeEvaluator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ class OMR_EXTENSIBLE TreeEvaluator: public J9::TreeEvaluator
* Inline Java's (Java 11 onwards) StringLatin1.inflate([BI[CII)V
*/
static TR::Register *inlineStringLatin1Inflate(TR::Node *node, TR::CodeGenerator *cg);
static TR::Register *VMinlineCompareAndSwap( TR::Node *node, TR::CodeGenerator *cg, TR::InstOpCode::Mnemonic casOp, bool isObj);
static TR::Register *VMinlineCompareAndSwap( TR::Node *node, TR::CodeGenerator *cg, TR::InstOpCode::Mnemonic casOp, bool isObj, bool isExchange = false);
static TR::Register *inlineAtomicOps(TR::Node *node, TR::CodeGenerator *cg, int8_t size, TR::MethodSymbol *method, bool isArray = false);
static TR::Register *inlineAtomicFieldUpdater(TR::Node *node, TR::CodeGenerator *cg, TR::MethodSymbol *method);
static TR::Register *inlineKeepAlive(TR::Node *node, TR::CodeGenerator *cg);
Expand Down