Skip to content

Commit 90a9fbb

Browse files
committed
[JSC] Add "AddZeroExtend64" Air opcode
https://bugs.webkit.org/show_bug.cgi?id=249765 rdar://103631099 Reviewed by Mark Lam. In ARM64, we are leveraging LDR style address, which can take 32bit index in addressing and zero-extend / sign-extend that in load/store. This is useful since WasmAddress' index is 32bit and we need to zero-extend it. However, we cannot use this addressing when there is an offset since this addressing cannot encode offset. As a result, we are emitting Move32 and Add64 when there is an offset. However, ARM64 can do even better for that case since ARM64 add / sub instructions also support LDR style extension. This patch adds AddZeroExtend64 and AddSignExtend64. They take 32bit second operand and extend it before adding. This is particularly useful when computing WasmAddress. We also leverage this in AirIRGenerator. In the added testb3, the generated code is changed as follows. Before: O2: testWasmAddressWithOffset()... Generated JIT code for Compilation: Code at [0x115f74980, 0x115f749a0): <0> 0x115f74980: pacibsp <4> 0x115f74984: stp fp, lr, [sp, #-16]! <8> 0x115f74988: mov fp, sp <12> 0x115f7498c: ubfx x0, x0, #0, WebKit#32; emitSave <16> 0x115f74990: add x0, x2, x0 <20> 0x115f74994: sturb w1, [x0, WebKit#1] <24> 0x115f74998: ldp fp, lr, [sp], WebKit#16 <28> 0x115f7499c: retab After: O2: testWasmAddressWithOffset()... Generated JIT code for Compilation: Code at [0x121108980, 0x1211089a0): <0> 0x121108980: pacibsp <4> 0x121108984: stp fp, lr, [sp, #-16]! <8> 0x121108988: mov fp, sp <12> 0x12110898c: add x0, x2, w0, uxtw; emitSave <16> 0x121108990: sturb w1, [x0, WebKit#1] <20> 0x121108994: ldp fp, lr, [sp], WebKit#16 <24> 0x121108998: retab * Source/JavaScriptCore/assembler/MacroAssemblerARM64.h: (JSC::MacroAssemblerARM64::addZeroExtend64): (JSC::MacroAssemblerARM64::addSignExtend64): * Source/JavaScriptCore/b3/B3LowerToAir.cpp: * Source/JavaScriptCore/b3/air/AirInstInlines.h: (JSC::B3::Air::isAddZeroExtend64Valid): (JSC::B3::Air::isAddSignExtend64Valid): * Source/JavaScriptCore/b3/air/AirOpcode.opcodes: Canonical link: https://commits.webkit.org/258259@main
1 parent b395978 commit 90a9fbb

File tree

8 files changed

+104
-4
lines changed

8 files changed

+104
-4
lines changed

Source/JavaScriptCore/assembler/MacroAssemblerARM64.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -336,6 +336,18 @@ class MacroAssemblerARM64 : public AbstractMacroAssembler<Assembler> {
336336
store64(dataTempRegister, address.m_ptr);
337337
}
338338

339+
void addZeroExtend64(RegisterID src, RegisterID srcExtend, RegisterID dest)
340+
{
341+
ASSERT(srcExtend != ARM64Registers::sp);
342+
m_assembler.add<64>(dest, src, srcExtend, Assembler::UXTW, 0);
343+
}
344+
345+
void addSignExtend64(RegisterID src, RegisterID srcExtend, RegisterID dest)
346+
{
347+
ASSERT(srcExtend != ARM64Registers::sp);
348+
m_assembler.add<64>(dest, src, srcExtend, Assembler::SXTW, 0);
349+
}
350+
339351
void addPtrNoFlags(TrustedImm32 imm, RegisterID srcDest)
340352
{
341353
add64(imm, srcDest);

Source/JavaScriptCore/b3/B3LowerToAir.cpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3728,6 +3728,25 @@ class LowerToAir {
37283728
case WasmAddress: {
37293729
WasmAddressValue* address = m_value->as<WasmAddressValue>();
37303730

3731+
if constexpr (isARM64()) {
3732+
Value* index = m_value->child(0);
3733+
if (canBeInternal(index)) {
3734+
// Maybe, the ideal approach is to introduce a decorator (Index@EXT) to the Air operand
3735+
// to provide an extension opportunity for the specific form under the Air opcode.
3736+
if (isMergeableValue(index, ZExt32)) {
3737+
append(AddZeroExtend64, Arg(address->pinnedGPR()), tmp(index->child(0)), tmp(address));
3738+
commitInternal(index);
3739+
return;
3740+
}
3741+
3742+
if (isMergeableValue(index, SExt32)) {
3743+
append(AddSignExtend64, Arg(address->pinnedGPR()), tmp(index->child(0)), tmp(address));
3744+
commitInternal(index);
3745+
return;
3746+
}
3747+
}
3748+
}
3749+
37313750
append(Add64, Arg(address->pinnedGPR()), tmp(m_value->child(0)), tmp(address));
37323751
return;
37333752
}

Source/JavaScriptCore/b3/air/AirInstInlines.h

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,26 @@ inline std::optional<unsigned> Inst::shouldTryAliasingDef()
181181
return std::nullopt;
182182
}
183183

184+
inline bool isAddZeroExtend64Valid(const Inst& inst)
185+
{
186+
#if CPU(ARM64)
187+
return inst.args[1] != Tmp(ARM64Registers::sp);
188+
#else
189+
UNUSED_PARAM(inst);
190+
return true;
191+
#endif
192+
}
193+
194+
inline bool isAddSignExtend64Valid(const Inst& inst)
195+
{
196+
#if CPU(ARM64)
197+
return inst.args[1] != Tmp(ARM64Registers::sp);
198+
#else
199+
UNUSED_PARAM(inst);
200+
return true;
201+
#endif
202+
}
203+
184204
inline bool isShiftValid(const Inst& inst)
185205
{
186206
#if CPU(X86) || CPU(X86_64)

Source/JavaScriptCore/b3/air/AirOpcode.opcodes

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,12 @@ Add32 U:G:32, UZD:G:32
123123
x86: Tmp, Addr
124124
x86: Tmp, Index
125125

126+
arm64: AddZeroExtend64 U:G:64, U:G:32, D:G:64
127+
Tmp, Tmp*, Tmp
128+
129+
arm64: AddSignExtend64 U:G:64, U:G:32, D:G:64
130+
Tmp, Tmp*, Tmp
131+
126132
x86: Add8 U:G:8, UD:G:8
127133
Imm, Addr
128134
Imm, Index

Source/JavaScriptCore/b3/testb3.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1171,6 +1171,7 @@ void testFloatMaxMin();
11711171
void testDoubleMaxMin();
11721172

11731173
void testWasmAddressDoesNotCSE();
1174+
void testWasmAddressWithOffset();
11741175
void testStoreAfterClobberExitsSideways();
11751176
void testStoreAfterClobberDifferentWidth();
11761177
void testStoreAfterClobberDifferentWidthSuccessor();

Source/JavaScriptCore/b3/testb3_1.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -815,6 +815,7 @@ void run(const char* filter)
815815
RUN(testWasmBoundsCheck(std::numeric_limits<unsigned>::max() - 5));
816816

817817
RUN(testWasmAddress());
818+
RUN(testWasmAddressWithOffset());
818819

819820
RUN(testFastTLSLoad());
820821
RUN(testFastTLSStore());

Source/JavaScriptCore/b3/testb3_7.cpp

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1191,6 +1191,34 @@ void testWasmAddress()
11911191
CHECK_EQ(numToStore, value);
11921192
}
11931193

1194+
void testWasmAddressWithOffset()
1195+
{
1196+
Procedure proc;
1197+
GPRReg pinnedGPR = GPRInfo::argumentGPR2;
1198+
proc.pinRegister(pinnedGPR);
1199+
1200+
Vector<uint8_t> values(3);
1201+
values[0] = 20;
1202+
values[1] = 21;
1203+
values[2] = 22;
1204+
uint8_t numToStore = 42;
1205+
1206+
BasicBlock* root = proc.addBlock();
1207+
1208+
// Root
1209+
Value* offset = root->appendNew<Value>(proc, Trunc, Origin(), root->appendNew<ArgumentRegValue>(proc, Origin(), GPRInfo::argumentGPR0));
1210+
Value* valueToStore = root->appendNew<Value>(proc, Trunc, Origin(), root->appendNew<ArgumentRegValue>(proc, Origin(), GPRInfo::argumentGPR1));
1211+
Value* pointer = root->appendNew<Value>(proc, ZExt32, Origin(), offset);
1212+
root->appendNew<MemoryValue>(proc, Store8, Origin(), valueToStore, root->appendNew<WasmAddressValue>(proc, Origin(), pointer, pinnedGPR), 1);
1213+
root->appendNewControlValue(proc, Return, Origin());
1214+
1215+
auto code = compileProc(proc);
1216+
invoke<void>(*code, 1, numToStore, values.data());
1217+
CHECK_EQ(20U, values[0]);
1218+
CHECK_EQ(21U, values[1]);
1219+
CHECK_EQ(42U, values[2]);
1220+
}
1221+
11941222
void testFastTLSLoad()
11951223
{
11961224
#if ENABLE(FAST_TLS_JIT)

Source/JavaScriptCore/wasm/WasmAirIRGenerator64.cpp

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -962,7 +962,6 @@ inline AirIRGenerator64::ExpressionType AirIRGenerator64::emitCheckAndPreparePoi
962962
ASSERT(m_memoryBaseGPR);
963963

964964
auto result = g64();
965-
append(Move32, pointer, result);
966965

967966
switch (m_mode) {
968967
case MemoryMode::BoundsChecking: {
@@ -972,7 +971,12 @@ inline AirIRGenerator64::ExpressionType AirIRGenerator64::emitCheckAndPreparePoi
972971
ASSERT(sizeOfOperation + offset > offset);
973972
auto temp = g64();
974973
append(Move, Arg::bigImm(static_cast<uint64_t>(sizeOfOperation) + offset - 1), temp);
975-
append(Add64, result, temp);
974+
if constexpr (isARM64())
975+
append(AddZeroExtend64, temp, pointer, temp);
976+
else {
977+
append(Move32, pointer, result);
978+
append(Add64, result, temp);
979+
}
976980

977981
emitCheck([&] {
978982
return Inst(Branch64, nullptr, Arg::relCond(MacroAssembler::AboveOrEqual), temp, Tmp(m_boundsCheckingSizeGPR));
@@ -994,11 +998,17 @@ inline AirIRGenerator64::ExpressionType AirIRGenerator64::emitCheckAndPreparePoi
994998
// PROT_NONE region, but it's better if we use a smaller immediate because it can codegens better. We know that anything equal to or greater
995999
// than the declared 'maximum' will trap, so we can compare against that number. If there was no declared 'maximum' then we still know that
9961000
// any access equal to or greater than 4GiB will trap, no need to add the redzone.
1001+
if constexpr (!isARM64())
1002+
append(Move32, pointer, result);
9971003
if (offset >= Memory::fastMappedRedzoneBytes()) {
9981004
uint64_t maximum = m_info.memory.maximum() ? m_info.memory.maximum().bytes() : std::numeric_limits<uint32_t>::max();
9991005
auto temp = g64();
10001006
append(Move, Arg::bigImm(static_cast<uint64_t>(sizeOfOperation) + offset - 1), temp);
1001-
append(Add64, result, temp);
1007+
if constexpr (isARM64())
1008+
append(AddZeroExtend64, temp, pointer, temp);
1009+
else
1010+
append(Add64, result, temp);
1011+
10021012
auto sizeMax = addConstant(Types::I64, maximum);
10031013

10041014
emitCheck([&] {
@@ -1012,7 +1022,10 @@ inline AirIRGenerator64::ExpressionType AirIRGenerator64::emitCheckAndPreparePoi
10121022
#endif
10131023
}
10141024

1015-
append(Add64, Tmp(m_memoryBaseGPR), result);
1025+
if constexpr (isARM64())
1026+
append(AddZeroExtend64, Tmp(m_memoryBaseGPR), pointer, result);
1027+
else
1028+
append(Add64, Tmp(m_memoryBaseGPR), result);
10161029
return result;
10171030
}
10181031

0 commit comments

Comments
 (0)