| // |
| // Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. |
| // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
| // |
| // This code is free software; you can redistribute it and/or modify it |
| // under the terms of the GNU General Public License version 2 only, as |
| // published by the Free Software Foundation. |
| // |
| // This code is distributed in the hope that it will be useful, but WITHOUT |
| // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| // version 2 for more details (a copy is included in the LICENSE file that |
| // accompanied this code). |
| // |
| // You should have received a copy of the GNU General Public License version |
| // 2 along with this work; if not, write to the Free Software Foundation, |
| // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
| // |
| // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
| // or visit www.oracle.com if you need additional information or have any |
| // questions. |
| // |
| // |
| |
| // X86 Architecture Description File |
| |
| //----------REGISTER DEFINITION BLOCK------------------------------------------ |
| // This information is used by the matcher and the register allocator to |
| // describe individual registers and classes of registers within the target |
| // archtecture. |
| |
| register %{ |
| //----------Architecture Description Register Definitions---------------------- |
| // General Registers |
| // "reg_def" name ( register save type, C convention save type, |
| // ideal register type, encoding ); |
| // Register Save Types: |
| // |
| // NS = No-Save: The register allocator assumes that these registers |
| // can be used without saving upon entry to the method, & |
| // that they do not need to be saved at call sites. |
| // |
| // SOC = Save-On-Call: The register allocator assumes that these registers |
| // can be used without saving upon entry to the method, |
| // but that they must be saved at call sites. |
| // |
| // SOE = Save-On-Entry: The register allocator assumes that these registers |
| // must be saved before using them upon entry to the |
| // method, but they do not need to be saved at call |
| // sites. |
| // |
| // AS = Always-Save: The register allocator assumes that these registers |
| // must be saved before using them upon entry to the |
| // method, & that they must be saved at call sites. |
| // |
| // Ideal Register Type is used to determine how to save & restore a |
| // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get |
| // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. |
| // |
| // The encoding number is the actual bit-pattern placed into the opcodes. |
| |
| // General Registers |
| // Previously set EBX, ESI, and EDI as save-on-entry for java code |
| // Turn off SOE in java-code due to frequent use of uncommon-traps. |
| // Now that allocator is better, turn on ESI and EDI as SOE registers. |
| |
| reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg()); |
| reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg()); |
| reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg()); |
| reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg()); |
| // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code |
| reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg()); |
| reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg()); |
| reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg()); |
| reg_def ESP( NS, NS, Op_RegI, 4, rsp->as_VMReg()); |
| |
| // Float registers. We treat TOS/FPR0 special. It is invisible to the |
| // allocator, and only shows up in the encodings. |
| reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); |
| reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); |
| // Ok so here's the trick FPR1 is really st(0) except in the midst |
| // of emission of assembly for a machnode. During the emission the fpu stack |
| // is pushed making FPR1 == st(1) temporarily. However at any safepoint |
| // the stack will not have this element so FPR1 == st(0) from the |
| // oopMap viewpoint. This same weirdness with numbering causes |
| // instruction encoding to have to play games with the register |
| // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation |
| // where it does flt->flt moves to see an example |
| // |
| reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()); |
| reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next()); |
| reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()); |
| reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next()); |
| reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()); |
| reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next()); |
| reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()); |
| reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next()); |
| reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()); |
| reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next()); |
| reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()); |
| reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next()); |
| reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()); |
| reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next()); |
| |
| // Specify priority of register selection within phases of register |
| // allocation. Highest priority is first. A useful heuristic is to |
| // give registers a low priority when they are required by machine |
| // instructions, like EAX and EDX. Registers which are used as |
| // pairs must fall on an even boundary (witness the FPR#L's in this list). |
| // For the Intel integer registers, the equivalent Long pairs are |
| // EDX:EAX, EBX:ECX, and EDI:EBP. |
| alloc_class chunk0( ECX, EBX, EBP, EDI, EAX, EDX, ESI, ESP, |
| FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H, |
| FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H, |
| FPR6L, FPR6H, FPR7L, FPR7H ); |
| |
| |
| //----------Architecture Description Register Classes-------------------------- |
| // Several register classes are automatically defined based upon information in |
| // this architecture description. |
| // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ ) |
| // 2) reg_class compiler_method_oop_reg ( /* as def'd in frame section */ ) |
| // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ ) |
| // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) |
| // |
| // Class for no registers (empty set). |
| reg_class no_reg(); |
| |
| // Class for all registers |
| reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP); |
| // Class for all registers (excluding EBP) |
| reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP); |
| // Dynamic register class that selects at runtime between register classes |
| // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer). |
| // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg; |
| reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %}); |
| |
| // Class for general registers |
| reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX); |
| // Class for general registers (excluding EBP). |
| // This register class can be used for implicit null checks on win95. |
| // It is also safe for use by tailjumps (we don't want to allocate in ebp). |
| // Used also if the PreserveFramePointer flag is true. |
| reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX); |
| // Dynamic register class that selects between int_reg and int_reg_no_ebp. |
| reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %}); |
| |
| // Class of "X" registers |
| reg_class int_x_reg(EBX, ECX, EDX, EAX); |
| |
| // Class of registers that can appear in an address with no offset. |
| // EBP and ESP require an extra instruction byte for zero offset. |
| // Used in fast-unlock |
| reg_class p_reg(EDX, EDI, ESI, EBX); |
| |
| // Class for general registers excluding ECX |
| reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX); |
| // Class for general registers excluding ECX (and EBP) |
| reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX); |
| // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp. |
| reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %}); |
| |
| // Class for general registers excluding EAX |
| reg_class nax_reg(EDX, EDI, ESI, ECX, EBX); |
| |
| // Class for general registers excluding EAX and EBX. |
| reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP); |
| // Class for general registers excluding EAX and EBX (and EBP) |
| reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX); |
| // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp. |
| reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %}); |
| |
| // Class of EAX (for multiply and divide operations) |
| reg_class eax_reg(EAX); |
| |
| // Class of EBX (for atomic add) |
| reg_class ebx_reg(EBX); |
| |
| // Class of ECX (for shift and JCXZ operations and cmpLTMask) |
| reg_class ecx_reg(ECX); |
| |
| // Class of EDX (for multiply and divide operations) |
| reg_class edx_reg(EDX); |
| |
| // Class of EDI (for synchronization) |
| reg_class edi_reg(EDI); |
| |
| // Class of ESI (for synchronization) |
| reg_class esi_reg(ESI); |
| |
| // Singleton class for stack pointer |
| reg_class sp_reg(ESP); |
| |
| // Singleton class for instruction pointer |
| // reg_class ip_reg(EIP); |
| |
| // Class of integer register pairs |
| reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI ); |
| // Class of integer register pairs (excluding EBP and EDI); |
| reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX ); |
| // Dynamic register class that selects between long_reg and long_reg_no_ebp. |
| reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %}); |
| |
| // Class of integer register pairs that aligns with calling convention |
| reg_class eadx_reg( EAX,EDX ); |
| reg_class ebcx_reg( ECX,EBX ); |
| |
| // Not AX or DX, used in divides |
| reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP); |
| // Not AX or DX (and neither EBP), used in divides |
| reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI); |
| // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp. |
| reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %}); |
| |
| // Floating point registers. Notice FPR0 is not a choice. |
| // FPR0 is not ever allocated; we use clever encodings to fake |
| // a 2-address instructions out of Intels FP stack. |
| reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L ); |
| |
| reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H, |
| FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H, |
| FPR7L,FPR7H ); |
| |
| reg_class fp_flt_reg0( FPR1L ); |
| reg_class fp_dbl_reg0( FPR1L,FPR1H ); |
| reg_class fp_dbl_reg1( FPR2L,FPR2H ); |
| reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H, |
| FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H ); |
| |
| %} |
| |
| |
| //----------SOURCE BLOCK------------------------------------------------------- |
| // This is a block of C++ code which provides values, functions, and |
| // definitions necessary in the rest of the architecture description |
| source_hpp %{ |
| // Must be visible to the DFA in dfa_x86_32.cpp |
| extern bool is_operand_hi32_zero(Node* n); |
| %} |
| |
| source %{ |
| #define RELOC_IMM32 Assembler::imm_operand |
| #define RELOC_DISP32 Assembler::disp32_operand |
| |
| #define __ _masm. |
| |
| // How to find the high register of a Long pair, given the low register |
| #define HIGH_FROM_LOW(x) ((x)+2) |
| |
| // These masks are used to provide 128-bit aligned bitmasks to the XMM |
| // instructions, to allow sign-masking or sign-bit flipping. They allow |
| // fast versions of NegF/NegD and AbsF/AbsD. |
| |
| // Note: 'double' and 'long long' have 32-bits alignment on x86. |
| static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) { |
| // Use the expression (adr)&(~0xF) to provide 128-bits aligned address |
| // of 128-bits operands for SSE instructions. |
| jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF))); |
| // Store the value to a 128-bits operand. |
| operand[0] = lo; |
| operand[1] = hi; |
| return operand; |
| } |
| |
| // Buffer for 128-bits masks used by SSE instructions. |
| static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment) |
| |
| // Static initialization during VM startup. |
| static jlong *float_signmask_pool = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF)); |
| static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF)); |
| static jlong *float_signflip_pool = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000)); |
| static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000)); |
| |
| // Offset hacking within calls. |
| static int pre_call_resets_size() { |
| int size = 0; |
| Compile* C = Compile::current(); |
| if (C->in_24_bit_fp_mode()) { |
| size += 6; // fldcw |
| } |
| if (C->max_vector_size() > 16) { |
| size += 3; // vzeroupper |
| } |
| return size; |
| } |
| |
| // !!!!! Special hack to get all type of calls to specify the byte offset |
| // from the start of the call to the point where the return address |
| // will point. |
| int MachCallStaticJavaNode::ret_addr_offset() { |
| return 5 + pre_call_resets_size(); // 5 bytes from start of call to where return address points |
| } |
| |
| int MachCallDynamicJavaNode::ret_addr_offset() { |
| return 10 + pre_call_resets_size(); // 10 bytes from start of call to where return address points |
| } |
| |
| static int sizeof_FFree_Float_Stack_All = -1; |
| |
| int MachCallRuntimeNode::ret_addr_offset() { |
| assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already"); |
| return sizeof_FFree_Float_Stack_All + 5 + pre_call_resets_size(); |
| } |
| |
| // Indicate if the safepoint node needs the polling page as an input. |
| // Since x86 does have absolute addressing, it doesn't. |
| bool SafePointNode::needs_polling_address_input() { |
| return false; |
| } |
| |
| // |
| // Compute padding required for nodes which need alignment |
| // |
| |
| // The address of the call instruction needs to be 4-byte aligned to |
| // ensure that it does not span a cache line so that it can be patched. |
| int CallStaticJavaDirectNode::compute_padding(int current_offset) const { |
| current_offset += pre_call_resets_size(); // skip fldcw, if any |
| current_offset += 1; // skip call opcode byte |
| return round_to(current_offset, alignment_required()) - current_offset; |
| } |
| |
| // The address of the call instruction needs to be 4-byte aligned to |
| // ensure that it does not span a cache line so that it can be patched. |
| int CallDynamicJavaDirectNode::compute_padding(int current_offset) const { |
| current_offset += pre_call_resets_size(); // skip fldcw, if any |
| current_offset += 5; // skip MOV instruction |
| current_offset += 1; // skip call opcode byte |
| return round_to(current_offset, alignment_required()) - current_offset; |
| } |
| |
| // EMIT_RM() |
| void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) { |
| unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3); |
| cbuf.insts()->emit_int8(c); |
| } |
| |
| // EMIT_CC() |
| void emit_cc(CodeBuffer &cbuf, int f1, int f2) { |
| unsigned char c = (unsigned char)( f1 | f2 ); |
| cbuf.insts()->emit_int8(c); |
| } |
| |
| // EMIT_OPCODE() |
| void emit_opcode(CodeBuffer &cbuf, int code) { |
| cbuf.insts()->emit_int8((unsigned char) code); |
| } |
| |
| // EMIT_OPCODE() w/ relocation information |
| void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) { |
| cbuf.relocate(cbuf.insts_mark() + offset, reloc); |
| emit_opcode(cbuf, code); |
| } |
| |
| // EMIT_D8() |
| void emit_d8(CodeBuffer &cbuf, int d8) { |
| cbuf.insts()->emit_int8((unsigned char) d8); |
| } |
| |
| // EMIT_D16() |
| void emit_d16(CodeBuffer &cbuf, int d16) { |
| cbuf.insts()->emit_int16(d16); |
| } |
| |
| // EMIT_D32() |
| void emit_d32(CodeBuffer &cbuf, int d32) { |
| cbuf.insts()->emit_int32(d32); |
| } |
| |
| // emit 32 bit value and construct relocation entry from relocInfo::relocType |
| void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc, |
| int format) { |
| cbuf.relocate(cbuf.insts_mark(), reloc, format); |
| cbuf.insts()->emit_int32(d32); |
| } |
| |
| // emit 32 bit value and construct relocation entry from RelocationHolder |
| void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec, |
| int format) { |
| #ifdef ASSERT |
| if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) { |
| assert(cast_to_oop(d32)->is_oop() && (ScavengeRootsInCode || !cast_to_oop(d32)->is_scavengable()), "cannot embed scavengable oops in code"); |
| } |
| #endif |
| cbuf.relocate(cbuf.insts_mark(), rspec, format); |
| cbuf.insts()->emit_int32(d32); |
| } |
| |
| // Access stack slot for load or store |
| void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) { |
| emit_opcode( cbuf, opcode ); // (e.g., FILD [ESP+src]) |
| if( -128 <= disp && disp <= 127 ) { |
| emit_rm( cbuf, 0x01, rm_field, ESP_enc ); // R/M byte |
| emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte |
| emit_d8 (cbuf, disp); // Displacement // R/M byte |
| } else { |
| emit_rm( cbuf, 0x02, rm_field, ESP_enc ); // R/M byte |
| emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte |
| emit_d32(cbuf, disp); // Displacement // R/M byte |
| } |
| } |
| |
| // rRegI ereg, memory mem) %{ // emit_reg_mem |
| void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) { |
| // There is no index & no scale, use form without SIB byte |
| if ((index == 0x4) && |
| (scale == 0) && (base != ESP_enc)) { |
| // If no displacement, mode is 0x0; unless base is [EBP] |
| if ( (displace == 0) && (base != EBP_enc) ) { |
| emit_rm(cbuf, 0x0, reg_encoding, base); |
| } |
| else { // If 8-bit displacement, mode 0x1 |
| if ((displace >= -128) && (displace <= 127) |
| && (disp_reloc == relocInfo::none) ) { |
| emit_rm(cbuf, 0x1, reg_encoding, base); |
| emit_d8(cbuf, displace); |
| } |
| else { // If 32-bit displacement |
| if (base == -1) { // Special flag for absolute address |
| emit_rm(cbuf, 0x0, reg_encoding, 0x5); |
| // (manual lies; no SIB needed here) |
| if ( disp_reloc != relocInfo::none ) { |
| emit_d32_reloc(cbuf, displace, disp_reloc, 1); |
| } else { |
| emit_d32 (cbuf, displace); |
| } |
| } |
| else { // Normal base + offset |
| emit_rm(cbuf, 0x2, reg_encoding, base); |
| if ( disp_reloc != relocInfo::none ) { |
| emit_d32_reloc(cbuf, displace, disp_reloc, 1); |
| } else { |
| emit_d32 (cbuf, displace); |
| } |
| } |
| } |
| } |
| } |
| else { // Else, encode with the SIB byte |
| // If no displacement, mode is 0x0; unless base is [EBP] |
| if (displace == 0 && (base != EBP_enc)) { // If no displacement |
| emit_rm(cbuf, 0x0, reg_encoding, 0x4); |
| emit_rm(cbuf, scale, index, base); |
| } |
| else { // If 8-bit displacement, mode 0x1 |
| if ((displace >= -128) && (displace <= 127) |
| && (disp_reloc == relocInfo::none) ) { |
| emit_rm(cbuf, 0x1, reg_encoding, 0x4); |
| emit_rm(cbuf, scale, index, base); |
| emit_d8(cbuf, displace); |
| } |
| else { // If 32-bit displacement |
| if (base == 0x04 ) { |
| emit_rm(cbuf, 0x2, reg_encoding, 0x4); |
| emit_rm(cbuf, scale, index, 0x04); |
| } else { |
| emit_rm(cbuf, 0x2, reg_encoding, 0x4); |
| emit_rm(cbuf, scale, index, base); |
| } |
| if ( disp_reloc != relocInfo::none ) { |
| emit_d32_reloc(cbuf, displace, disp_reloc, 1); |
| } else { |
| emit_d32 (cbuf, displace); |
| } |
| } |
| } |
| } |
| } |
| |
| |
| void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) { |
| if( dst_encoding == src_encoding ) { |
| // reg-reg copy, use an empty encoding |
| } else { |
| emit_opcode( cbuf, 0x8B ); |
| emit_rm(cbuf, 0x3, dst_encoding, src_encoding ); |
| } |
| } |
| |
| void emit_cmpfp_fixup(MacroAssembler& _masm) { |
| Label exit; |
| __ jccb(Assembler::noParity, exit); |
| __ pushf(); |
| // |
| // comiss/ucomiss instructions set ZF,PF,CF flags and |
| // zero OF,AF,SF for NaN values. |
| // Fixup flags by zeroing ZF,PF so that compare of NaN |
| // values returns 'less than' result (CF is set). |
| // Leave the rest of flags unchanged. |
| // |
| // 7 6 5 4 3 2 1 0 |
| // |S|Z|r|A|r|P|r|C| (r - reserved bit) |
| // 0 0 1 0 1 0 1 1 (0x2B) |
| // |
| __ andl(Address(rsp, 0), 0xffffff2b); |
| __ popf(); |
| __ bind(exit); |
| } |
| |
| void emit_cmpfp3(MacroAssembler& _masm, Register dst) { |
| Label done; |
| __ movl(dst, -1); |
| __ jcc(Assembler::parity, done); |
| __ jcc(Assembler::below, done); |
| __ setb(Assembler::notEqual, dst); |
| __ movzbl(dst, dst); |
| __ bind(done); |
| } |
| |
| |
| //============================================================================= |
| const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty; |
| |
| int Compile::ConstantTable::calculate_table_base_offset() const { |
| return 0; // absolute addressing, no offset |
| } |
| |
| bool MachConstantBaseNode::requires_postalloc_expand() const { return false; } |
| void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) { |
| ShouldNotReachHere(); |
| } |
| |
| void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const { |
| // Empty encoding |
| } |
| |
| uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const { |
| return 0; |
| } |
| |
| #ifndef PRODUCT |
| void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const { |
| st->print("# MachConstantBaseNode (empty encoding)"); |
| } |
| #endif |
| |
| |
| //============================================================================= |
| #ifndef PRODUCT |
| void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const { |
| Compile* C = ra_->C; |
| |
| int framesize = C->frame_size_in_bytes(); |
| int bangsize = C->bang_size_in_bytes(); |
| assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); |
| // Remove wordSize for return addr which is already pushed. |
| framesize -= wordSize; |
| |
| if (C->need_stack_bang(bangsize)) { |
| framesize -= wordSize; |
| st->print("# stack bang (%d bytes)", bangsize); |
| st->print("\n\t"); |
| st->print("PUSH EBP\t# Save EBP"); |
| if (PreserveFramePointer) { |
| st->print("\n\t"); |
| st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); |
| } |
| if (framesize) { |
| st->print("\n\t"); |
| st->print("SUB ESP, #%d\t# Create frame",framesize); |
| } |
| } else { |
| st->print("SUB ESP, #%d\t# Create frame",framesize); |
| st->print("\n\t"); |
| framesize -= wordSize; |
| st->print("MOV [ESP + #%d], EBP\t# Save EBP",framesize); |
| if (PreserveFramePointer) { |
| st->print("\n\t"); |
| st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); |
| if (framesize > 0) { |
| st->print("\n\t"); |
| st->print("ADD EBP, #%d", framesize); |
| } |
| } |
| } |
| |
| if (VerifyStackAtCalls) { |
| st->print("\n\t"); |
| framesize -= wordSize; |
| st->print("MOV [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize); |
| } |
| |
| if( C->in_24_bit_fp_mode() ) { |
| st->print("\n\t"); |
| st->print("FLDCW \t# load 24 bit fpu control word"); |
| } |
| if (UseSSE >= 2 && VerifyFPU) { |
| st->print("\n\t"); |
| st->print("# verify FPU stack (must be clean on entry)"); |
| } |
| |
| #ifdef ASSERT |
| if (VerifyStackAtCalls) { |
| st->print("\n\t"); |
| st->print("# stack alignment check"); |
| } |
| #endif |
| st->cr(); |
| } |
| #endif |
| |
| |
| void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { |
| Compile* C = ra_->C; |
| MacroAssembler _masm(&cbuf); |
| |
| int framesize = C->frame_size_in_bytes(); |
| int bangsize = C->bang_size_in_bytes(); |
| |
| __ verified_entry(framesize, C->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode()); |
| |
| C->set_frame_complete(cbuf.insts_size()); |
| |
| if (C->has_mach_constant_base_node()) { |
| // NOTE: We set the table base offset here because users might be |
| // emitted before MachConstantBaseNode. |
| Compile::ConstantTable& constant_table = C->constant_table(); |
| constant_table.set_table_base_offset(constant_table.calculate_table_base_offset()); |
| } |
| } |
| |
| uint MachPrologNode::size(PhaseRegAlloc *ra_) const { |
| return MachNode::size(ra_); // too many variables; just compute it the hard way |
| } |
| |
| int MachPrologNode::reloc() const { |
| return 0; // a large enough number |
| } |
| |
| //============================================================================= |
| #ifndef PRODUCT |
| void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { |
| Compile *C = ra_->C; |
| int framesize = C->frame_size_in_bytes(); |
| assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); |
| // Remove two words for return addr and rbp, |
| framesize -= 2*wordSize; |
| |
| if (C->max_vector_size() > 16) { |
| st->print("VZEROUPPER"); |
| st->cr(); st->print("\t"); |
| } |
| if (C->in_24_bit_fp_mode()) { |
| st->print("FLDCW standard control word"); |
| st->cr(); st->print("\t"); |
| } |
| if (framesize) { |
| st->print("ADD ESP,%d\t# Destroy frame",framesize); |
| st->cr(); st->print("\t"); |
| } |
| st->print_cr("POPL EBP"); st->print("\t"); |
| if (do_polling() && C->is_method_compilation()) { |
| st->print("TEST PollPage,EAX\t! Poll Safepoint"); |
| st->cr(); st->print("\t"); |
| } |
| } |
| #endif |
| |
| void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { |
| Compile *C = ra_->C; |
| |
| if (C->max_vector_size() > 16) { |
| // Clear upper bits of YMM registers when current compiled code uses |
| // wide vectors to avoid AVX <-> SSE transition penalty during call. |
| MacroAssembler masm(&cbuf); |
| masm.vzeroupper(); |
| } |
| // If method set FPU control word, restore to standard control word |
| if (C->in_24_bit_fp_mode()) { |
| MacroAssembler masm(&cbuf); |
| masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); |
| } |
| |
| int framesize = C->frame_size_in_bytes(); |
| assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); |
| // Remove two words for return addr and rbp, |
| framesize -= 2*wordSize; |
| |
| // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here |
| |
| if (framesize >= 128) { |
| emit_opcode(cbuf, 0x81); // add SP, #framesize |
| emit_rm(cbuf, 0x3, 0x00, ESP_enc); |
| emit_d32(cbuf, framesize); |
| } else if (framesize) { |
| emit_opcode(cbuf, 0x83); // add SP, #framesize |
| emit_rm(cbuf, 0x3, 0x00, ESP_enc); |
| emit_d8(cbuf, framesize); |
| } |
| |
| emit_opcode(cbuf, 0x58 | EBP_enc); |
| |
| if (do_polling() && C->is_method_compilation()) { |
| cbuf.relocate(cbuf.insts_end(), relocInfo::poll_return_type, 0); |
| emit_opcode(cbuf,0x85); |
| emit_rm(cbuf, 0x0, EAX_enc, 0x5); // EAX |
| emit_d32(cbuf, (intptr_t)os::get_polling_page()); |
| } |
| } |
| |
| uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { |
| Compile *C = ra_->C; |
| // If method set FPU control word, restore to standard control word |
| int size = C->in_24_bit_fp_mode() ? 6 : 0; |
| if (C->max_vector_size() > 16) size += 3; // vzeroupper |
| if (do_polling() && C->is_method_compilation()) size += 6; |
| |
| int framesize = C->frame_size_in_bytes(); |
| assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); |
| // Remove two words for return addr and rbp, |
| framesize -= 2*wordSize; |
| |
| size++; // popl rbp, |
| |
| if (framesize >= 128) { |
| size += 6; |
| } else { |
| size += framesize ? 3 : 0; |
| } |
| return size; |
| } |
| |
| int MachEpilogNode::reloc() const { |
| return 0; // a large enough number |
| } |
| |
| const Pipeline * MachEpilogNode::pipeline() const { |
| return MachNode::pipeline_class(); |
| } |
| |
| int MachEpilogNode::safepoint_offset() const { return 0; } |
| |
| //============================================================================= |
| |
| enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack }; |
| static enum RC rc_class( OptoReg::Name reg ) { |
| |
| if( !OptoReg::is_valid(reg) ) return rc_bad; |
| if (OptoReg::is_stack(reg)) return rc_stack; |
| |
| VMReg r = OptoReg::as_VMReg(reg); |
| if (r->is_Register()) return rc_int; |
| if (r->is_FloatRegister()) { |
| assert(UseSSE < 2, "shouldn't be used in SSE2+ mode"); |
| return rc_float; |
| } |
| assert(r->is_XMMRegister(), "must be"); |
| return rc_xmm; |
| } |
| |
| static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg, |
| int opcode, const char *op_str, int size, outputStream* st ) { |
| if( cbuf ) { |
| emit_opcode (*cbuf, opcode ); |
| encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none); |
| #ifndef PRODUCT |
| } else if( !do_size ) { |
| if( size != 0 ) st->print("\n\t"); |
| if( opcode == 0x8B || opcode == 0x89 ) { // MOV |
| if( is_load ) st->print("%s %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset); |
| else st->print("%s [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]); |
| } else { // FLD, FST, PUSH, POP |
| st->print("%s [ESP + #%d]",op_str,offset); |
| } |
| #endif |
| } |
| int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); |
| return size+3+offset_size; |
| } |
| |
| // Helper for XMM registers. Extra opcode bits, limited syntax. |
| static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load, |
| int offset, int reg_lo, int reg_hi, int size, outputStream* st ) { |
| if (cbuf) { |
| MacroAssembler _masm(cbuf); |
| if (reg_lo+1 == reg_hi) { // double move? |
| if (is_load) { |
| __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); |
| } else { |
| __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); |
| } |
| } else { |
| if (is_load) { |
| __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); |
| } else { |
| __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); |
| } |
| } |
| #ifndef PRODUCT |
| } else if (!do_size) { |
| if (size != 0) st->print("\n\t"); |
| if (reg_lo+1 == reg_hi) { // double move? |
| if (is_load) st->print("%s %s,[ESP + #%d]", |
| UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD", |
| Matcher::regName[reg_lo], offset); |
| else st->print("MOVSD [ESP + #%d],%s", |
| offset, Matcher::regName[reg_lo]); |
| } else { |
| if (is_load) st->print("MOVSS %s,[ESP + #%d]", |
| Matcher::regName[reg_lo], offset); |
| else st->print("MOVSS [ESP + #%d],%s", |
| offset, Matcher::regName[reg_lo]); |
| } |
| #endif |
| } |
| int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); |
| // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. |
| return size+5+offset_size; |
| } |
| |
| |
| static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, |
| int src_hi, int dst_hi, int size, outputStream* st ) { |
| if (cbuf) { |
| MacroAssembler _masm(cbuf); |
| if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? |
| __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]), |
| as_XMMRegister(Matcher::_regEncode[src_lo])); |
| } else { |
| __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]), |
| as_XMMRegister(Matcher::_regEncode[src_lo])); |
| } |
| #ifndef PRODUCT |
| } else if (!do_size) { |
| if (size != 0) st->print("\n\t"); |
| if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers |
| if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? |
| st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); |
| } else { |
| st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); |
| } |
| } else { |
| if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move? |
| st->print("MOVSD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); |
| } else { |
| st->print("MOVSS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); |
| } |
| } |
| #endif |
| } |
| // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. |
| // Only MOVAPS SSE prefix uses 1 byte. |
| int sz = 4; |
| if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) && |
| UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3; |
| return size + sz; |
| } |
| |
| static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, |
| int src_hi, int dst_hi, int size, outputStream* st ) { |
| // 32-bit |
| if (cbuf) { |
| MacroAssembler _masm(cbuf); |
| __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]), |
| as_Register(Matcher::_regEncode[src_lo])); |
| #ifndef PRODUCT |
| } else if (!do_size) { |
| st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); |
| #endif |
| } |
| return 4; |
| } |
| |
| |
| static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, |
| int src_hi, int dst_hi, int size, outputStream* st ) { |
| // 32-bit |
| if (cbuf) { |
| MacroAssembler _masm(cbuf); |
| __ movdl(as_Register(Matcher::_regEncode[dst_lo]), |
| as_XMMRegister(Matcher::_regEncode[src_lo])); |
| #ifndef PRODUCT |
| } else if (!do_size) { |
| st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); |
| #endif |
| } |
| return 4; |
| } |
| |
| static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) { |
| if( cbuf ) { |
| emit_opcode(*cbuf, 0x8B ); |
| emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] ); |
| #ifndef PRODUCT |
| } else if( !do_size ) { |
| if( size != 0 ) st->print("\n\t"); |
| st->print("MOV %s,%s",Matcher::regName[dst],Matcher::regName[src]); |
| #endif |
| } |
| return size+2; |
| } |
| |
| static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi, |
| int offset, int size, outputStream* st ) { |
| if( src_lo != FPR1L_num ) { // Move value to top of FP stack, if not already there |
| if( cbuf ) { |
| emit_opcode( *cbuf, 0xD9 ); // FLD (i.e., push it) |
| emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] ); |
| #ifndef PRODUCT |
| } else if( !do_size ) { |
| if( size != 0 ) st->print("\n\t"); |
| st->print("FLD %s",Matcher::regName[src_lo]); |
| #endif |
| } |
| size += 2; |
| } |
| |
| int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/; |
| const char *op_str; |
| int op; |
| if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store? |
| op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D "; |
| op = 0xDD; |
| } else { // 32-bit store |
| op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S "; |
| op = 0xD9; |
| assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" ); |
| } |
| |
| return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st); |
| } |
| |
| // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad. |
| static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, |
| int src_hi, int dst_hi, uint ireg, outputStream* st); |
| |
| static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, |
| int stack_offset, int reg, uint ireg, outputStream* st); |
| |
| static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_offset, |
| int dst_offset, uint ireg, outputStream* st) { |
| int calc_size = 0; |
| int src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4); |
| int dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4); |
| switch (ireg) { |
| case Op_VecS: |
| calc_size = 3+src_offset_size + 3+dst_offset_size; |
| break; |
| case Op_VecD: |
| calc_size = 3+src_offset_size + 3+dst_offset_size; |
| src_offset += 4; |
| dst_offset += 4; |
| src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4); |
| dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4); |
| calc_size += 3+src_offset_size + 3+dst_offset_size; |
| break; |
| case Op_VecX: |
| calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size; |
| break; |
| case Op_VecY: |
| calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size; |
| break; |
| default: |
| ShouldNotReachHere(); |
| } |
| if (cbuf) { |
| MacroAssembler _masm(cbuf); |
| int offset = __ offset(); |
| switch (ireg) { |
| case Op_VecS: |
| __ pushl(Address(rsp, src_offset)); |
| __ popl (Address(rsp, dst_offset)); |
| break; |
| case Op_VecD: |
| __ pushl(Address(rsp, src_offset)); |
| __ popl (Address(rsp, dst_offset)); |
| __ pushl(Address(rsp, src_offset+4)); |
| __ popl (Address(rsp, dst_offset+4)); |
| break; |
| case Op_VecX: |
| __ movdqu(Address(rsp, -16), xmm0); |
| __ movdqu(xmm0, Address(rsp, src_offset)); |
| __ movdqu(Address(rsp, dst_offset), xmm0); |
| __ movdqu(xmm0, Address(rsp, -16)); |
| break; |
| case Op_VecY: |
| __ vmovdqu(Address(rsp, -32), xmm0); |
| __ vmovdqu(xmm0, Address(rsp, src_offset)); |
| __ vmovdqu(Address(rsp, dst_offset), xmm0); |
| __ vmovdqu(xmm0, Address(rsp, -32)); |
| break; |
| default: |
| ShouldNotReachHere(); |
| } |
| int size = __ offset() - offset; |
| assert(size == calc_size, "incorrect size calculattion"); |
| return size; |
| #ifndef PRODUCT |
| } else if (!do_size) { |
| switch (ireg) { |
| case Op_VecS: |
| st->print("pushl [rsp + #%d]\t# 32-bit mem-mem spill\n\t" |
| "popl [rsp + #%d]", |
| src_offset, dst_offset); |
| break; |
| case Op_VecD: |
| st->print("pushl [rsp + #%d]\t# 64-bit mem-mem spill\n\t" |
| "popq [rsp + #%d]\n\t" |
| "pushl [rsp + #%d]\n\t" |
| "popq [rsp + #%d]", |
| src_offset, dst_offset, src_offset+4, dst_offset+4); |
| break; |
| case Op_VecX: |
| st->print("movdqu [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t" |
| "movdqu xmm0, [rsp + #%d]\n\t" |
| "movdqu [rsp + #%d], xmm0\n\t" |
| "movdqu xmm0, [rsp - #16]", |
| src_offset, dst_offset); |
| break; |
| case Op_VecY: |
| st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t" |
| "vmovdqu xmm0, [rsp + #%d]\n\t" |
| "vmovdqu [rsp + #%d], xmm0\n\t" |
| "vmovdqu xmm0, [rsp - #32]", |
| src_offset, dst_offset); |
| break; |
| default: |
| ShouldNotReachHere(); |
| } |
| #endif |
| } |
| return calc_size; |
| } |
| |
| uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const { |
| // Get registers to move |
| OptoReg::Name src_second = ra_->get_reg_second(in(1)); |
| OptoReg::Name src_first = ra_->get_reg_first(in(1)); |
| OptoReg::Name dst_second = ra_->get_reg_second(this ); |
| OptoReg::Name dst_first = ra_->get_reg_first(this ); |
| |
| enum RC src_second_rc = rc_class(src_second); |
| enum RC src_first_rc = rc_class(src_first); |
| enum RC dst_second_rc = rc_class(dst_second); |
| enum RC dst_first_rc = rc_class(dst_first); |
| |
| assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" ); |
| |
| // Generate spill code! |
| int size = 0; |
| |
| if( src_first == dst_first && src_second == dst_second ) |
| return size; // Self copy, no move |
| |
| if (bottom_type()->isa_vect() != NULL) { |
| uint ireg = ideal_reg(); |
| assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity"); |
| assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity"); |
| assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY), "sanity"); |
| if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { |
| // mem -> mem |
| int src_offset = ra_->reg2offset(src_first); |
| int dst_offset = ra_->reg2offset(dst_first); |
| return vec_stack_to_stack_helper(cbuf, do_size, src_offset, dst_offset, ireg, st); |
| } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { |
| return vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st); |
| } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { |
| int stack_offset = ra_->reg2offset(dst_first); |
| return vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st); |
| } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) { |
| int stack_offset = ra_->reg2offset(src_first); |
| return vec_spill_helper(cbuf, do_size, true, stack_offset, dst_first, ireg, st); |
| } else { |
| ShouldNotReachHere(); |
| } |
| } |
| |
| // -------------------------------------- |
| // Check for mem-mem move. push/pop to move. |
| if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { |
| if( src_second == dst_first ) { // overlapping stack copy ranges |
| assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" ); |
| size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); |
| size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); |
| src_second_rc = dst_second_rc = rc_bad; // flag as already moved the second bits |
| } |
| // move low bits |
| size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH ",size, st); |
| size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP ",size, st); |
| if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits |
| size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); |
| size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); |
| } |
| return size; |
| } |
| |
| // -------------------------------------- |
| // Check for integer reg-reg copy |
| if( src_first_rc == rc_int && dst_first_rc == rc_int ) |
| size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st); |
| |
| // Check for integer store |
| if( src_first_rc == rc_int && dst_first_rc == rc_stack ) |
| size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st); |
| |
| // Check for integer load |
| if( dst_first_rc == rc_int && src_first_rc == rc_stack ) |
| size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st); |
| |
| // Check for integer reg-xmm reg copy |
| if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) { |
| assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), |
| "no 64 bit integer-float reg moves" ); |
| return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); |
| } |
| // -------------------------------------- |
| // Check for float reg-reg copy |
| if( src_first_rc == rc_float && dst_first_rc == rc_float ) { |
| assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || |
| (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" ); |
| if( cbuf ) { |
| |
| // Note the mucking with the register encode to compensate for the 0/1 |
| // indexing issue mentioned in a comment in the reg_def sections |
| // for FPR registers many lines above here. |
| |
| if( src_first != FPR1L_num ) { |
| emit_opcode (*cbuf, 0xD9 ); // FLD ST(i) |
| emit_d8 (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 ); |
| emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) |
| emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); |
| } else { |
| emit_opcode (*cbuf, 0xDD ); // FST ST(i) |
| emit_d8 (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 ); |
| } |
| #ifndef PRODUCT |
| } else if( !do_size ) { |
| if( size != 0 ) st->print("\n\t"); |
| if( src_first != FPR1L_num ) st->print("FLD %s\n\tFSTP %s",Matcher::regName[src_first],Matcher::regName[dst_first]); |
| else st->print( "FST %s", Matcher::regName[dst_first]); |
| #endif |
| } |
| return size + ((src_first != FPR1L_num) ? 2+2 : 2); |
| } |
| |
| // Check for float store |
| if( src_first_rc == rc_float && dst_first_rc == rc_stack ) { |
| return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st); |
| } |
| |
| // Check for float load |
| if( dst_first_rc == rc_float && src_first_rc == rc_stack ) { |
| int offset = ra_->reg2offset(src_first); |
| const char *op_str; |
| int op; |
| if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load? |
| op_str = "FLD_D"; |
| op = 0xDD; |
| } else { // 32-bit load |
| op_str = "FLD_S"; |
| op = 0xD9; |
| assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" ); |
| } |
| if( cbuf ) { |
| emit_opcode (*cbuf, op ); |
| encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none); |
| emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) |
| emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); |
| #ifndef PRODUCT |
| } else if( !do_size ) { |
| if( size != 0 ) st->print("\n\t"); |
| st->print("%s ST,[ESP + #%d]\n\tFSTP %s",op_str, offset,Matcher::regName[dst_first]); |
| #endif |
| } |
| int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); |
| return size + 3+offset_size+2; |
| } |
| |
| // Check for xmm reg-reg copy |
| if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { |
| assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || |
| (src_first+1 == src_second && dst_first+1 == dst_second), |
| "no non-adjacent float-moves" ); |
| return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); |
| } |
| |
| // Check for xmm reg-integer reg copy |
| if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) { |
| assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), |
| "no 64 bit float-integer reg moves" ); |
| return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); |
| } |
| |
| // Check for xmm store |
| if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { |
| return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st); |
| } |
| |
| // Check for float xmm load |
| if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) { |
| return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st); |
| } |
| |
| // Copy from float reg to xmm reg |
| if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) { |
| // copy to the top of stack from floating point reg |
| // and use LEA to preserve flags |
| if( cbuf ) { |
| emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP-8] |
| emit_rm(*cbuf, 0x1, ESP_enc, 0x04); |
| emit_rm(*cbuf, 0x0, 0x04, ESP_enc); |
| emit_d8(*cbuf,0xF8); |
| #ifndef PRODUCT |
| } else if( !do_size ) { |
| if( size != 0 ) st->print("\n\t"); |
| st->print("LEA ESP,[ESP-8]"); |
| #endif |
| } |
| size += 4; |
| |
| size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st); |
| |
| // Copy from the temp memory to the xmm reg. |
| size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st); |
| |
| if( cbuf ) { |
| emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP+8] |
| emit_rm(*cbuf, 0x1, ESP_enc, 0x04); |
| emit_rm(*cbuf, 0x0, 0x04, ESP_enc); |
| emit_d8(*cbuf,0x08); |
| #ifndef PRODUCT |
| } else if( !do_size ) { |
| if( size != 0 ) st->print("\n\t"); |
| st->print("LEA ESP,[ESP+8]"); |
| #endif |
| } |
| size += 4; |
| return size; |
| } |
| |
| assert( size > 0, "missed a case" ); |
| |
| // -------------------------------------------------------------------- |
| // Check for second bits still needing moving. |
| if( src_second == dst_second ) |
| return size; // Self copy; no move |
| assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" ); |
| |
| // Check for second word int-int move |
| if( src_second_rc == rc_int && dst_second_rc == rc_int ) |
| return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st); |
| |
| // Check for second word integer store |
| if( src_second_rc == rc_int && dst_second_rc == rc_stack ) |
| return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st); |
| |
| // Check for second word integer load |
| if( dst_second_rc == rc_int && src_second_rc == rc_stack ) |
| return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st); |
| |
| |
| Unimplemented(); |
| } |
| |
| #ifndef PRODUCT |
| void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const { |
| implementation( NULL, ra_, false, st ); |
| } |
| #endif |
| |
| void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { |
| implementation( &cbuf, ra_, false, NULL ); |
| } |
| |
| uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { |
| return implementation( NULL, ra_, true, NULL ); |
| } |
| |
| |
| //============================================================================= |
| #ifndef PRODUCT |
| void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { |
| int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); |
| int reg = ra_->get_reg_first(this); |
| st->print("LEA %s,[ESP + #%d]",Matcher::regName[reg],offset); |
| } |
| #endif |
| |
| void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { |
| int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); |
| int reg = ra_->get_encode(this); |
| if( offset >= 128 ) { |
| emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] |
| emit_rm(cbuf, 0x2, reg, 0x04); |
| emit_rm(cbuf, 0x0, 0x04, ESP_enc); |
| emit_d32(cbuf, offset); |
| } |
| else { |
| emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] |
| emit_rm(cbuf, 0x1, reg, 0x04); |
| emit_rm(cbuf, 0x0, 0x04, ESP_enc); |
| emit_d8(cbuf, offset); |
| } |
| } |
| |
| uint BoxLockNode::size(PhaseRegAlloc *ra_) const { |
| int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); |
| if( offset >= 128 ) { |
| return 7; |
| } |
| else { |
| return 4; |
| } |
| } |
| |
| //============================================================================= |
| #ifndef PRODUCT |
| void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { |
| st->print_cr( "CMP EAX,[ECX+4]\t# Inline cache check"); |
| st->print_cr("\tJNE SharedRuntime::handle_ic_miss_stub"); |
| st->print_cr("\tNOP"); |
| st->print_cr("\tNOP"); |
| if( !OptoBreakpoint ) |
| st->print_cr("\tNOP"); |
| } |
| #endif |
| |
| void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { |
| MacroAssembler masm(&cbuf); |
| #ifdef ASSERT |
| uint insts_size = cbuf.insts_size(); |
| #endif |
| masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes())); |
| masm.jump_cc(Assembler::notEqual, |
| RuntimeAddress(SharedRuntime::get_ic_miss_stub())); |
| /* WARNING these NOPs are critical so that verified entry point is properly |
| aligned for patching by NativeJump::patch_verified_entry() */ |
| int nops_cnt = 2; |
| if( !OptoBreakpoint ) // Leave space for int3 |
| nops_cnt += 1; |
| masm.nop(nops_cnt); |
| |
| assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node"); |
| } |
| |
| uint MachUEPNode::size(PhaseRegAlloc *ra_) const { |
| return OptoBreakpoint ? 11 : 12; |
| } |
| |
| |
| //============================================================================= |
| |
| int Matcher::regnum_to_fpu_offset(int regnum) { |
| return regnum - 32; // The FP registers are in the second chunk |
| } |
| |
| // This is UltraSparc specific, true just means we have fast l2f conversion |
| const bool Matcher::convL2FSupported(void) { |
| return true; |
| } |
| |
| // Is this branch offset short enough that a short branch can be used? |
| // |
| // NOTE: If the platform does not provide any short branch variants, then |
| // this method should return false for offset 0. |
| bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { |
| // The passed offset is relative to address of the branch. |
| // On 86 a branch displacement is calculated relative to address |
| // of a next instruction. |
| offset -= br_size; |
| |
| // the short version of jmpConUCF2 contains multiple branches, |
| // making the reach slightly less |
| if (rule == jmpConUCF2_rule) |
| return (-126 <= offset && offset <= 125); |
| return (-128 <= offset && offset <= 127); |
| } |
| |
| const bool Matcher::isSimpleConstant64(jlong value) { |
| // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?. |
| return false; |
| } |
| |
| // The ecx parameter to rep stos for the ClearArray node is in dwords. |
| const bool Matcher::init_array_count_is_in_bytes = false; |
| |
| // Threshold size for cleararray. |
| const int Matcher::init_array_short_size = 8 * BytesPerLong; |
| |
| // Needs 2 CMOV's for longs. |
| const int Matcher::long_cmove_cost() { return 1; } |
| |
| // No CMOVF/CMOVD with SSE/SSE2 |
| const int Matcher::float_cmove_cost() { return (UseSSE>=1) ? ConditionalMoveLimit : 0; } |
| |
| // Does the CPU require late expand (see block.cpp for description of late expand)? |
| const bool Matcher::require_postalloc_expand = false; |
| |
| // Should the Matcher clone shifts on addressing modes, expecting them to |
| // be subsumed into complex addressing expressions or compute them into |
| // registers? True for Intel but false for most RISCs |
| const bool Matcher::clone_shift_expressions = true; |
| |
| // Do we need to mask the count passed to shift instructions or does |
| // the cpu only look at the lower 5/6 bits anyway? |
| const bool Matcher::need_masked_shift_count = false; |
| |
| bool Matcher::narrow_oop_use_complex_address() { |
| ShouldNotCallThis(); |
| return true; |
| } |
| |
| bool Matcher::narrow_klass_use_complex_address() { |
| ShouldNotCallThis(); |
| return true; |
| } |
| |
| |
| // Is it better to copy float constants, or load them directly from memory? |
| // Intel can load a float constant from a direct address, requiring no |
| // extra registers. Most RISCs will have to materialize an address into a |
| // register first, so they would do better to copy the constant from stack. |
| const bool Matcher::rematerialize_float_constants = true; |
| |
| // If CPU can load and store mis-aligned doubles directly then no fixup is |
| // needed. Else we split the double into 2 integer pieces and move it |
| // piece-by-piece. Only happens when passing doubles into C code as the |
| // Java calling convention forces doubles to be aligned. |
| const bool Matcher::misaligned_doubles_ok = true; |
| |
| |
| void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) { |
| // Get the memory operand from the node |
| uint numopnds = node->num_opnds(); // Virtual call for number of operands |
| uint skipped = node->oper_input_base(); // Sum of leaves skipped so far |
| assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" ); |
| uint opcnt = 1; // First operand |
| uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand |
| while( idx >= skipped+num_edges ) { |
| skipped += num_edges; |
| opcnt++; // Bump operand count |
| assert( opcnt < numopnds, "Accessing non-existent operand" ); |
| num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand |
| } |
| |
| MachOper *memory = node->_opnds[opcnt]; |
| MachOper *new_memory = NULL; |
| switch (memory->opcode()) { |
| case DIRECT: |
| case INDOFFSET32X: |
| // No transformation necessary. |
| return; |
| case INDIRECT: |
| new_memory = new (C) indirect_win95_safeOper( ); |
| break; |
| case INDOFFSET8: |
| new_memory = new (C) indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0)); |
| break; |
| case INDOFFSET32: |
| new_memory = new (C) indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0)); |
| break; |
| case INDINDEXOFFSET: |
| new_memory = new (C) indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0)); |
| break; |
| case INDINDEXSCALE: |
| new_memory = new (C) indIndexScale_win95_safeOper(memory->scale()); |
| break; |
| case INDINDEXSCALEOFFSET: |
| new_memory = new (C) indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0)); |
| break; |
| case LOAD_LONG_INDIRECT: |
| case LOAD_LONG_INDOFFSET32: |
| // Does not use EBP as address register, use { EDX, EBX, EDI, ESI} |
| return; |
| default: |
| assert(false, "unexpected memory operand in pd_implicit_null_fixup()"); |
| return; |
| } |
| node->_opnds[opcnt] = new_memory; |
| } |
| |
| // Advertise here if the CPU requires explicit rounding operations |
| // to implement the UseStrictFP mode. |
| const bool Matcher::strict_fp_requires_explicit_rounding = true; |
| |
| // Are floats conerted to double when stored to stack during deoptimization? |
| // On x32 it is stored with convertion only when FPU is used for floats. |
| bool Matcher::float_in_double() { return (UseSSE == 0); } |
| |
| // Do ints take an entire long register or just half? |
| const bool Matcher::int_in_long = false; |
| |
| // Return whether or not this register is ever used as an argument. This |
| // function is used on startup to build the trampoline stubs in generateOptoStub. |
| // Registers not mentioned will be killed by the VM call in the trampoline, and |
| // arguments in those registers not be available to the callee. |
| bool Matcher::can_be_java_arg( int reg ) { |
| if( reg == ECX_num || reg == EDX_num ) return true; |
| if( (reg == XMM0_num || reg == XMM1_num ) && UseSSE>=1 ) return true; |
| if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true; |
| return false; |
| } |
| |
| bool Matcher::is_spillable_arg( int reg ) { |
| return can_be_java_arg(reg); |
| } |
| |
| bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) { |
| // Use hardware integer DIV instruction when |
| // it is faster than a code which use multiply. |
| // Only when constant divisor fits into 32 bit |
| // (min_jint is excluded to get only correct |
| // positive 32 bit values from negative). |
| return VM_Version::has_fast_idiv() && |
| (divisor == (int)divisor && divisor != min_jint); |
| } |
| |
| // Register for DIVI projection of divmodI |
| RegMask Matcher::divI_proj_mask() { |
| return EAX_REG_mask(); |
| } |
| |
| // Register for MODI projection of divmodI |
| RegMask Matcher::modI_proj_mask() { |
| return EDX_REG_mask(); |
| } |
| |
| // Register for DIVL projection of divmodL |
| RegMask Matcher::divL_proj_mask() { |
| ShouldNotReachHere(); |
| return RegMask(); |
| } |
| |
| // Register for MODL projection of divmodL |
| RegMask Matcher::modL_proj_mask() { |
| ShouldNotReachHere(); |
| return RegMask(); |
| } |
| |
| const RegMask Matcher::method_handle_invoke_SP_save_mask() { |
| return NO_REG_mask(); |
| } |
| |
| // Returns true if the high 32 bits of the value is known to be zero. |
| bool is_operand_hi32_zero(Node* n) { |
| int opc = n->Opcode(); |
| if (opc == Op_AndL) { |
| Node* o2 = n->in(2); |
| if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { |
| return true; |
| } |
| } |
| if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { |
| return true; |
| } |
| return false; |
| } |
| |
| %} |
| |
| //----------ENCODING BLOCK----------------------------------------------------- |
| // This block specifies the encoding classes used by the compiler to output |
| // byte streams. Encoding classes generate functions which are called by |
| // Machine Instruction Nodes in order to generate the bit encoding of the |
| // instruction. Operands specify their base encoding interface with the |
| // interface keyword. There are currently supported four interfaces, |
| // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an |
| // operand to generate a function which returns its register number when |
| // queried. CONST_INTER causes an operand to generate a function which |
| // returns the value of the constant when queried. MEMORY_INTER causes an |
| // operand to generate four functions which return the Base Register, the |
| // Index Register, the Scale Value, and the Offset Value of the operand when |
| // queried. COND_INTER causes an operand to generate six functions which |
| // return the encoding code (ie - encoding bits for the instruction) |
| // associated with each basic boolean condition for a conditional instruction. |
| // Instructions specify two basic values for encoding. They use the |
| // ins_encode keyword to specify their encoding class (which must be one of |
| // the class names specified in the encoding block), and they use the |
| // opcode keyword to specify, in order, their primary, secondary, and |
| // tertiary opcode. Only the opcode sections which a particular instruction |
| // needs for encoding need to be specified. |
| encode %{ |
| // Build emit functions for each basic byte or larger field in the intel |
| // encoding scheme (opcode, rm, sib, immediate), and call them from C++ |
| // code in the enc_class source block. Emit functions will live in the |
| // main source block for now. In future, we can generalize this by |
| // adding a syntax that specifies the sizes of fields in an order, |
| // so that the adlc can build the emit functions automagically |
| |
| // Emit primary opcode |
| enc_class OpcP %{ |
| emit_opcode(cbuf, $primary); |
| %} |
| |
| // Emit secondary opcode |
| enc_class OpcS %{ |
| emit_opcode(cbuf, $secondary); |
| %} |
| |
| // Emit opcode directly |
| enc_class Opcode(immI d8) %{ |
| emit_opcode(cbuf, $d8$$constant); |
| %} |
| |
| enc_class SizePrefix %{ |
| emit_opcode(cbuf,0x66); |
| %} |
| |
| enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) |
| emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); |
| %} |
| |
| enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{ // OpcRegReg(Many) |
| emit_opcode(cbuf,$opcode$$constant); |
| emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); |
| %} |
| |
| enc_class mov_r32_imm0( rRegI dst ) %{ |
| emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd -- MOV r32 ,imm32 |
| emit_d32 ( cbuf, 0x0 ); // imm32==0x0 |
| %} |
| |
| enc_class cdq_enc %{ |
| // Full implementation of Java idiv and irem; checks for |
| // special case as described in JVM spec., p.243 & p.271. |
| // |
| // normal case special case |
| // |
| // input : rax,: dividend min_int |
| // reg: divisor -1 |
| // |
| // output: rax,: quotient (= rax, idiv reg) min_int |
| // rdx: remainder (= rax, irem reg) 0 |
| // |
| // Code sequnce: |
| // |
| // 81 F8 00 00 00 80 cmp rax,80000000h |
| // 0F 85 0B 00 00 00 jne normal_case |
| // 33 D2 xor rdx,edx |
| // 83 F9 FF cmp rcx,0FFh |
| // 0F 84 03 00 00 00 je done |
| // normal_case: |
| // 99 cdq |
| // F7 F9 idiv rax,ecx |
| // done: |
| // |
| emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8); |
| emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); |
| emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80); // cmp rax,80000000h |
| emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85); |
| emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00); |
| emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // jne normal_case |
| emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2); // xor rdx,edx |
| emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh |
| emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84); |
| emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00); |
| emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // je done |
| // normal_case: |
| emit_opcode(cbuf,0x99); // cdq |
| // idiv (note: must be emitted by the user of this rule) |
| // normal: |
| %} |
| |
| // Dense encoding for older common ops |
| enc_class Opc_plus(immI opcode, rRegI reg) %{ |
| emit_opcode(cbuf, $opcode$$constant + $reg$$reg); |
| %} |
| |
| |
| // Opcde enc_class for 8/32 bit immediate instructions with sign-extension |
| enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit |
| // Check for 8-bit immediate, and set sign extend bit in opcode |
| if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { |
| emit_opcode(cbuf, $primary | 0x02); |
| } |
| else { // If 32-bit immediate |
| emit_opcode(cbuf, $primary); |
| } |
| %} |
| |
| enc_class OpcSErm (rRegI dst, immI imm) %{ // OpcSEr/m |
| // Emit primary opcode and set sign-extend bit |
| // Check for 8-bit immediate, and set sign extend bit in opcode |
| if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { |
| emit_opcode(cbuf, $primary | 0x02); } |
| else { // If 32-bit immediate |
| emit_opcode(cbuf, $primary); |
| } |
| // Emit r/m byte with secondary opcode, after primary opcode. |
| emit_rm(cbuf, 0x3, $secondary, $dst$$reg); |
| %} |
| |
| enc_class Con8or32 (immI imm) %{ // Con8or32(storeImmI), 8 or 32 bits |
| // Check for 8-bit immediate, and set sign extend bit in opcode |
| if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { |
| $$$emit8$imm$$constant; |
| } |
| else { // If 32-bit immediate |
| // Output immediate |
| $$$emit32$imm$$constant; |
| } |
| %} |
| |
| enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{ |
| // Emit primary opcode and set sign-extend bit |
| // Check for 8-bit immediate, and set sign extend bit in opcode |
| int con = (int)$imm$$constant; // Throw away top bits |
| emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); |
| // Emit r/m byte with secondary opcode, after primary opcode. |
| emit_rm(cbuf, 0x3, $secondary, $dst$$reg); |
| if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); |
| else emit_d32(cbuf,con); |
| %} |
| |
| enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{ |
| // Emit primary opcode and set sign-extend bit |
| // Check for 8-bit immediate, and set sign extend bit in opcode |
| int con = (int)($imm$$constant >> 32); // Throw away bottom bits |
| emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); |
| // Emit r/m byte with tertiary opcode, after primary opcode. |
| emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg)); |
| if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); |
| else emit_d32(cbuf,con); |
| %} |
| |
| enc_class OpcSReg (rRegI dst) %{ // BSWAP |
| emit_cc(cbuf, $secondary, $dst$$reg ); |
| %} |
| |
| enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP |
| int destlo = $dst$$reg; |
| int desthi = HIGH_FROM_LOW(destlo); |
| // bswap lo |
| emit_opcode(cbuf, 0x0F); |
| emit_cc(cbuf, 0xC8, destlo); |
| // bswap hi |
| emit_opcode(cbuf, 0x0F); |
| emit_cc(cbuf, 0xC8, desthi); |
| // xchg lo and hi |
| emit_opcode(cbuf, 0x87); |
| emit_rm(cbuf, 0x3, destlo, desthi); |
| %} |
| |
| enc_class RegOpc (rRegI div) %{ // IDIV, IMOD, JMP indirect, ... |
| emit_rm(cbuf, 0x3, $secondary, $div$$reg ); |
| %} |
| |
| enc_class enc_cmov(cmpOp cop ) %{ // CMOV |
| $$$emit8$primary; |
| emit_cc(cbuf, $secondary, $cop$$cmpcode); |
| %} |
| |
| enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV |
| int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1); |
| emit_d8(cbuf, op >> 8 ); |
| emit_d8(cbuf, op & 255); |
| %} |
| |
| // emulate a CMOV with a conditional branch around a MOV |
| enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV |
| // Invert sense of branch from sense of CMOV |
| emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) ); |
| emit_d8( cbuf, $brOffs$$constant ); |
| %} |
| |
| enc_class enc_PartialSubtypeCheck( ) %{ |
| Register Redi = as_Register(EDI_enc); // result register |
| Register Reax = as_Register(EAX_enc); // super class |
| Register Recx = as_Register(ECX_enc); // killed |
| Register Resi = as_Register(ESI_enc); // sub class |
| Label miss; |
| |
| MacroAssembler _masm(&cbuf); |
| __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi, |
| NULL, &miss, |
| /*set_cond_codes:*/ true); |
| if ($primary) { |
| __ xorptr(Redi, Redi); |
| } |
| __ bind(miss); |
| %} |
| |
| enc_class FFree_Float_Stack_All %{ // Free_Float_Stack_All |
| MacroAssembler masm(&cbuf); |
| int start = masm.offset(); |
| if (UseSSE >= 2) { |
| if (VerifyFPU) { |
| masm.verify_FPU(0, "must be empty in SSE2+ mode"); |
| } |
| } else { |
| // External c_calling_convention expects the FPU stack to be 'clean'. |
| // Compiled code leaves it dirty. Do cleanup now. |
| masm.empty_FPU_stack(); |
| } |
| if (sizeof_FFree_Float_Stack_All == -1) { |
| sizeof_FFree_Float_Stack_All = masm.offset() - start; |
| } else { |
| assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size"); |
| } |
| %} |
| |
| enc_class Verify_FPU_For_Leaf %{ |
| if( VerifyFPU ) { |
| MacroAssembler masm(&cbuf); |
| masm.verify_FPU( -3, "Returning from Runtime Leaf call"); |
| } |
| %} |
| |
| enc_class Java_To_Runtime (method meth) %{ // CALL Java_To_Runtime, Java_To_Runtime_Leaf |
| // This is the instruction starting address for relocation info. |
| cbuf.set_insts_mark(); |
| $$$emit8$primary; |
| // CALL directly to the runtime |
| emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), |
| runtime_call_Relocation::spec(), RELOC_IMM32 ); |
| |
| if (UseSSE >= 2) { |
| MacroAssembler _masm(&cbuf); |
| BasicType rt = tf()->return_type(); |
| |
| if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) { |
| // A C runtime call where the return value is unused. In SSE2+ |
| // mode the result needs to be removed from the FPU stack. It's |
| // likely that this function call could be removed by the |
| // optimizer if the C function is a pure function. |
| __ ffree(0); |
| } else if (rt == T_FLOAT) { |
| __ lea(rsp, Address(rsp, -4)); |
| __ fstp_s(Address(rsp, 0)); |
| __ movflt(xmm0, Address(rsp, 0)); |
| __ lea(rsp, Address(rsp, 4)); |
| } else if (rt == T_DOUBLE) { |
| __ lea(rsp, Address(rsp, -8)); |
| __ fstp_d(Address(rsp, 0)); |
| __ movdbl(xmm0, Address(rsp, 0)); |
| __ lea(rsp, Address(rsp, 8)); |
| } |
| } |
| %} |
| |
| |
| enc_class pre_call_resets %{ |
| // If method sets FPU control word restore it here |
| debug_only(int off0 = cbuf.insts_size()); |
| if (ra_->C->in_24_bit_fp_mode()) { |
| MacroAssembler _masm(&cbuf); |
| __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); |
| } |
| if (ra_->C->max_vector_size() > 16) { |
| // Clear upper bits of YMM registers when current compiled code uses |
| // wide vectors to avoid AVX <-> SSE transition penalty during call. |
| MacroAssembler _masm(&cbuf); |
| __ vzeroupper(); |
| } |
| debug_only(int off1 = cbuf.insts_size()); |
| assert(off1 - off0 == pre_call_resets_size(), "correct size prediction"); |
| %} |
| |
| enc_class post_call_FPU %{ |
| // If method sets FPU control word do it here also |
| if (Compile::current()->in_24_bit_fp_mode()) { |
| MacroAssembler masm(&cbuf); |
| masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); |
| } |
| %} |
| |
| enc_class Java_Static_Call (method meth) %{ // JAVA STATIC CALL |
| // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine |
| // who we intended to call. |
| cbuf.set_insts_mark(); |
| $$$emit8$primary; |
| if (!_method) { |
| emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), |
| runtime_call_Relocation::spec(), RELOC_IMM32 ); |
| } else if (_optimized_virtual) { |
| emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), |
| opt_virtual_call_Relocation::spec(), RELOC_IMM32 ); |
| } else { |
| emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), |
| static_call_Relocation::spec(), RELOC_IMM32 ); |
| } |
| if (_method) { // Emit stub for static call. |
| address stub = CompiledStaticCall::emit_to_interp_stub(cbuf); |
| if (stub == NULL) { |
| ciEnv::current()->record_failure("CodeCache is full"); |
| return; |
| } |
| } |
| %} |
| |
| enc_class Java_Dynamic_Call (method meth) %{ // JAVA DYNAMIC CALL |
| MacroAssembler _masm(&cbuf); |
| __ ic_call((address)$meth$$method); |
| %} |
| |
| enc_class Java_Compiled_Call (method meth) %{ // JAVA COMPILED CALL |
| int disp = in_bytes(Method::from_compiled_offset()); |
| assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small"); |
| |
| // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())] |
| cbuf.set_insts_mark(); |
| $$$emit8$primary; |
| emit_rm(cbuf, 0x01, $secondary, EAX_enc ); // R/M byte |
| emit_d8(cbuf, disp); // Displacement |
| |
| %} |
| |
| // Following encoding is no longer used, but may be restored if calling |
| // convention changes significantly. |
| // Became: Xor_Reg(EBP), Java_To_Runtime( labl ) |
| // |
| // enc_class Java_Interpreter_Call (label labl) %{ // JAVA INTERPRETER CALL |
| // // int ic_reg = Matcher::inline_cache_reg(); |
| // // int ic_encode = Matcher::_regEncode[ic_reg]; |
| // // int imo_reg = Matcher::interpreter_method_oop_reg(); |
| // // int imo_encode = Matcher::_regEncode[imo_reg]; |
| // |
| // // // Interpreter expects method_oop in EBX, currently a callee-saved register, |
| // // // so we load it immediately before the call |
| // // emit_opcode(cbuf, 0x8B); // MOV imo_reg,ic_reg # method_oop |
| // // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte |
| // |
| // // xor rbp,ebp |
| // emit_opcode(cbuf, 0x33); |
| // emit_rm(cbuf, 0x3, EBP_enc, EBP_enc); |
| // |
| // // CALL to interpreter. |
| // cbuf.set_insts_mark(); |
| // $$$emit8$primary; |
| // emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4), |
| // runtime_call_Relocation::spec(), RELOC_IMM32 ); |
| // %} |
| |
| enc_class RegOpcImm (rRegI dst, immI8 shift) %{ // SHL, SAR, SHR |
| $$$emit8$primary; |
| emit_rm(cbuf, 0x3, $secondary, $dst$$reg); |
| $$$emit8$shift$$constant; |
| %} |
| |
| enc_class LdImmI (rRegI dst, immI src) %{ // Load Immediate |
| // Load immediate does not have a zero or sign extended version |
| // for 8-bit immediates |
| emit_opcode(cbuf, 0xB8 + $dst$$reg); |
| $$$emit32$src$$constant; |
| %} |
| |
| enc_class LdImmP (rRegI dst, immI src) %{ // Load Immediate |
| // Load immediate does not have a zero or sign extended version |
| // for 8-bit immediates |
| emit_opcode(cbuf, $primary + $dst$$reg); |
| $$$emit32$src$$constant; |
| %} |
| |
| enc_class LdImmL_Lo( eRegL dst, immL src) %{ // Load Immediate |
| // Load immediate does not have a zero or sign extended version |
| // for 8-bit immediates |
| int dst_enc = $dst$$reg; |
| int src_con = $src$$constant & 0x0FFFFFFFFL; |
| if (src_con == 0) { |
| // xor dst, dst |
| emit_opcode(cbuf, 0x33); |
| emit_rm(cbuf, 0x3, dst_enc, dst_enc); |
| } else { |
| emit_opcode(cbuf, $primary + dst_enc); |
| emit_d32(cbuf, src_con); |
| } |
| %} |
| |
| enc_class LdImmL_Hi( eRegL dst, immL src) %{ // Load Immediate |
| // Load immediate does not have a zero or sign extended version |
| // for 8-bit immediates |
| int dst_enc = $dst$$reg + 2; |
| int src_con = ((julong)($src$$constant)) >> 32; |
| if (src_con == 0) { |
| // xor dst, dst |
| emit_opcode(cbuf, 0x33); |
| emit_rm(cbuf, 0x3, dst_enc, dst_enc); |
| } else { |
| emit_opcode(cbuf, $primary + dst_enc); |
| emit_d32(cbuf, src_con); |
| } |
| %} |
| |
| |
| // Encode a reg-reg copy. If it is useless, then empty encoding. |
| enc_class enc_Copy( rRegI dst, rRegI src ) %{ |
| encode_Copy( cbuf, $dst$$reg, $src$$reg ); |
| %} |
| |
| enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{ |
| encode_Copy( cbuf, $dst$$reg, $src$$reg ); |
| %} |
| |
| enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) |
| emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); |
| %} |
| |
| enc_class RegReg_Lo(eRegL dst, eRegL src) %{ // RegReg(Many) |
| $$$emit8$primary; |
| emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); |
| %} |
| |
| enc_class RegReg_Hi(eRegL dst, eRegL src) %{ // RegReg(Many) |
| $$$emit8$secondary; |
| emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg)); |
| %} |
| |
| enc_class RegReg_Lo2(eRegL dst, eRegL src) %{ // RegReg(Many) |
| emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); |
| %} |
| |
| enc_class RegReg_Hi2(eRegL dst, eRegL src) %{ // RegReg(Many) |
| emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg)); |
| %} |
| |
| enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{ |
| emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg)); |
| %} |
| |
| enc_class Con32 (immI src) %{ // Con32(storeImmI) |
| // Output immediate |
| $$$emit32$src$$constant; |
| %} |
| |
| enc_class Con32FPR_as_bits(immFPR src) %{ // storeF_imm |
| // Output Float immediate bits |
| jfloat jf = $src$$constant; |
| int jf_as_bits = jint_cast( jf ); |
| emit_d32(cbuf, jf_as_bits); |
| %} |
| |
| enc_class Con32F_as_bits(immF src) %{ // storeX_imm |
| // Output Float immediate bits |
| jfloat jf = $src$$constant; |
| int jf_as_bits = jint_cast( jf ); |
| emit_d32(cbuf, jf_as_bits); |
| %} |
| |
| enc_class Con16 (immI src) %{ // Con16(storeImmI) |
| // Output immediate |
| $$$emit16$src$$constant; |
| %} |
| |
| enc_class Con_d32(immI src) %{ |
| emit_d32(cbuf,$src$$constant); |
| %} |
| |
| enc_class conmemref (eRegP t1) %{ // Con32(storeImmI) |
| // Output immediate memory reference |
| emit_rm(cbuf, 0x00, $t1$$reg, 0x05 ); |
| emit_d32(cbuf, 0x00); |
| %} |
| |
| enc_class lock_prefix( ) %{ |
| if( os::is_MP() ) |
| emit_opcode(cbuf,0xF0); // [Lock] |
| %} |
| |
| // Cmp-xchg long value. |
| // Note: we need to swap rbx, and rcx before and after the |
| // cmpxchg8 instruction because the instruction uses |
| // rcx as the high order word of the new value to store but |
| // our register encoding uses rbx,. |
| enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{ |
| |
| // XCHG rbx,ecx |
| emit_opcode(cbuf,0x87); |
| emit_opcode(cbuf,0xD9); |
| // [Lock] |
| if( os::is_MP() ) |
| emit_opcode(cbuf,0xF0); |
| // CMPXCHG8 [Eptr] |
| emit_opcode(cbuf,0x0F); |
| emit_opcode(cbuf,0xC7); |
| emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); |
| // XCHG rbx,ecx |
| emit_opcode(cbuf,0x87); |
| emit_opcode(cbuf,0xD9); |
| %} |
| |
| enc_class enc_cmpxchg(eSIRegP mem_ptr) %{ |
| // [Lock] |
| if( os::is_MP() ) |
| emit_opcode(cbuf,0xF0); |
| |
| // CMPXCHG [Eptr] |
| emit_opcode(cbuf,0x0F); |
| emit_opcode(cbuf,0xB1); |
| emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); |
| %} |
| |
| enc_class enc_flags_ne_to_boolean( iRegI res ) %{ |
| int res_encoding = $res$$reg; |
| |
| // MOV res,0 |
| emit_opcode( cbuf, 0xB8 + res_encoding); |
| emit_d32( cbuf, 0 ); |
| // JNE,s fail |
| emit_opcode(cbuf,0x75); |
| emit_d8(cbuf, 5 ); |
| // MOV res,1 |
| emit_opcode( cbuf, 0xB8 + res_encoding); |
| emit_d32( cbuf, 1 ); |
| // fail: |
| %} |
| |
| enc_class set_instruction_start( ) %{ |
| cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand |
| %} |
| |
| enc_class RegMem (rRegI ereg, memory mem) %{ // emit_reg_mem |
| int reg_encoding = $ereg$$reg; |
| int base = $mem$$base; |
| int index = $mem$$index; |
| int scale = $mem$$scale; |
| int displace = $mem$$disp; |
| relocInfo::relocType disp_reloc = $mem->disp_reloc(); |
| encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); |
| %} |
| |
| enc_class RegMem_Hi(eRegL ereg, memory mem) %{ // emit_reg_mem |
| int reg_encoding = HIGH_FROM_LOW($ereg$$reg); // Hi register of pair, computed from lo |
| int base = $mem$$base; |
| int index = $mem$$index; |
| int scale = $mem$$scale; |
| int displace = $mem$$disp + 4; // Offset is 4 further in memory |
| assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" ); |
| encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none); |
| %} |
| |
| enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{ |
| int r1, r2; |
| if( $tertiary == 0xA4 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); } |
| else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); } |
| emit_opcode(cbuf,0x0F); |
| emit_opcode(cbuf,$tertiary); |
| emit_rm(cbuf, 0x3, r1, r2); |
| emit_d8(cbuf,$cnt$$constant); |
| emit_d8(cbuf,$primary); |
| emit_rm(cbuf, 0x3, $secondary, r1); |
| emit_d8(cbuf,$cnt$$constant); |
| %} |
| |
| enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{ |
| emit_opcode( cbuf, 0x8B ); // Move |
| emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg)); |
| if( $cnt$$constant > 32 ) { // Shift, if not by zero |
| emit_d8(cbuf,$primary); |
| emit_rm(cbuf, 0x3, $secondary, $dst$$reg); |
| emit_d8(cbuf,$cnt$$constant-32); |
| } |
| emit_d8(cbuf,$primary); |
| emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg)); |
| emit_d8(cbuf,31); |
| %} |
| |
| enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{ |
| int r1, r2; |
| if( $secondary == 0x5 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); } |
| else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); } |
| |
| emit_opcode( cbuf, 0x8B ); // Move r1,r2 |
| emit_rm(cbuf, 0x3, r1, r2); |
| if( $cnt$$constant > 32 ) { // Shift, if not by zero |
| emit_opcode(cbuf,$primary); |
| emit_rm(cbuf, 0x3, $secondary, r1); |
| emit_d8(cbuf,$cnt$$constant-32); |
| } |
| emit_opcode(cbuf,0x33); // XOR r2,r2 |
| emit_rm(cbuf, 0x3, r2, r2); |
| %} |
| |
| // Clone of RegMem but accepts an extra parameter to access each |
| // half of a double in memory; it never needs relocation info. |
| enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{ |
| emit_opcode(cbuf,$opcode$$constant); |
| int reg_encoding = $rm_reg$$reg; |
| int base = $mem$$base; |
| int index = $mem$$index; |
| int scale = $mem$$scale; |
| int displace = $mem$$disp + $disp_for_half$$constant; |
| relocInfo::relocType disp_reloc = relocInfo::none; |
| encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); |
| %} |
| |
| // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!! |
| // |
| // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant |
| // and it never needs relocation information. |
| // Frequently used to move data between FPU's Stack Top and memory. |
| enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{ |
| int rm_byte_opcode = $rm_opcode$$constant; |
| int base = $mem$$base; |
| int index = $mem$$index; |
| int scale = $mem$$scale; |
| int displace = $mem$$disp; |
| assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" ); |
| encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none); |
| %} |
| |
| enc_class RMopc_Mem (immI rm_opcode, memory mem) %{ |
| int rm_byte_opcode = $rm_opcode$$constant; |
| int base = $mem$$base; |
| int index = $mem$$index; |
| int scale = $mem$$scale; |
| int displace = $mem$$disp; |
| relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals |
| encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); |
| %} |
| |
| enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{ // emit_reg_lea |
| int reg_encoding = $dst$$reg; |
| int base = $src0$$reg; // 0xFFFFFFFF indicates no base |
| int index = 0x04; // 0x04 indicates no index |
| int scale = 0x00; // 0x00 indicates no scale |
| int displace = $src1$$constant; // 0x00 indicates no displacement |
| relocInfo::relocType disp_reloc = relocInfo::none; |
| encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); |
| %} |
| |
| enc_class min_enc (rRegI dst, rRegI src) %{ // MIN |
| // Compare dst,src |
| emit_opcode(cbuf,0x3B); |
| emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); |
| // jmp dst < src around move |
| emit_opcode(cbuf,0x7C); |
| emit_d8(cbuf,2); |
| // move dst,src |
| emit_opcode(cbuf,0x8B); |
| emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); |
| %} |
| |
| enc_class max_enc (rRegI dst, rRegI src) %{ // MAX |
| // Compare dst,src |
| emit_opcode(cbuf,0x3B); |
| emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); |
| // jmp dst > src around move |
| emit_opcode(cbuf,0x7F); |
| emit_d8(cbuf,2); |
| // move dst,src |
| emit_opcode(cbuf,0x8B); |
| emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); |
| %} |
| |
| enc_class enc_FPR_store(memory mem, regDPR src) %{ |
| // If src is FPR1, we can just FST to store it. |
| // Else we need to FLD it to FPR1, then FSTP to store/pop it. |
| int reg_encoding = 0x2; // Just store |
| int base = $mem$$base; |
| int index = $mem$$index; |
| int scale = $mem$$scale; |
| int displace = $mem$$disp; |
| relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals |
| if( $src$$reg != FPR1L_enc ) { |
| reg_encoding = 0x3; // Store & pop |
| emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it) |
| emit_d8( cbuf, 0xC0-1+$src$$reg ); |
| } |
| cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand |
| emit_opcode(cbuf,$primary); |
| encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); |
| %} |
| |
| enc_class neg_reg(rRegI dst) %{ |
| // NEG $dst |
| emit_opcode(cbuf,0xF7); |
| emit_rm(cbuf, 0x3, 0x03, $dst$$reg ); |
| %} |
| |
| enc_class setLT_reg(eCXRegI dst) %{ |
| // SETLT $dst |
| emit_opcode(cbuf,0x0F); |
| emit_opcode(cbuf,0x9C); |
| emit_rm( cbuf, 0x3, 0x4, $dst$$reg ); |
| %} |
| |
| enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{ // cadd_cmpLT |
| int tmpReg = $tmp$$reg; |
| |
| // SUB $p,$q |
| emit_opcode(cbuf,0x2B); |
| emit_rm(cbuf, 0x3, $p$$reg, $q$$reg); |
| // SBB $tmp,$tmp |
| emit_opcode(cbuf,0x1B); |
| emit_rm(cbuf, 0x3, tmpReg, tmpReg); |
| // AND $tmp,$y |
| emit_opcode(cbuf,0x23); |
| emit_rm(cbuf, 0x3, tmpReg, $y$$reg); |
| // ADD $p,$tmp |
| emit_opcode(cbuf,0x03); |
| emit_rm(cbuf, 0x3, $p$$reg, tmpReg); |
| %} |
| |
| enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{ |
| // TEST shift,32 |
| emit_opcode(cbuf,0xF7); |
| emit_rm(cbuf, 0x3, 0, ECX_enc); |
| emit_d32(cbuf,0x20); |
| // JEQ,s small |
| emit_opcode(cbuf, 0x74); |
| emit_d8(cbuf, 0x04); |
| // MOV $dst.hi,$dst.lo |
| emit_opcode( cbuf, 0x8B ); |
| emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg ); |
| // CLR $dst.lo |
| emit_opcode(cbuf, 0x33); |
| emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg); |
| // small: |
| // SHLD $dst.hi,$dst.lo,$shift |
| emit_opcode(cbuf,0x0F); |
| emit_opcode(cbuf,0xA5); |
| emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg)); |
| // SHL $dst.lo,$shift" |
| emit_opcode(cbuf,0xD3); |
| emit_rm(cbuf, 0x3, 0x4, $dst$$reg ); |
| %} |
| |
| enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{ |
| // TEST shift,32 |
| emit_opcode(cbuf,0xF7); |
| emit_rm(cbuf, 0x3, 0, ECX_enc); |
| emit_d32(cbuf,0x20); |
| // JEQ,s small |
| emit_opcode(cbuf, 0x74); |
| emit_d8(cbuf, 0x04); |
| // MOV $dst.lo,$dst.hi |
| emit_opcode( cbuf, 0x8B ); |
| emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) ); |
| // CLR $dst.hi |
| emit_opcode(cbuf, 0x33); |
| emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg)); |
| // small: |
| // SHRD $dst.lo,$dst.hi,$shift |
| emit_opcode(cbuf,0x0F); |
| emit_opcode(cbuf,0xAD); |
| emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg); |
| // SHR $dst.hi,$shift" |
| emit_opcode(cbuf,0xD3); |
| emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) ); |
| %} |
| |
| enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{ |
| // TEST shift,32 |
| emit_opcode(cbuf,0xF7); |
| emit_rm(cbuf, 0x3, 0, ECX_enc); |
| emit_d32(cbuf,0x20); |
| // JEQ,s small |
| emit_opcode(cbuf, 0x74); |
| emit_d8(cbuf, 0x05); |
| // MOV $dst.lo,$dst.hi |
| emit_opcode( cbuf, 0x8B ); |
| emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) ); |
| // SAR $dst.hi,31 |
| emit_opcode(cbuf, 0xC1); |
| emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) ); |
| emit_d8(cbuf, 0x1F ); |
| // small: |
| // SHRD $dst.lo,$dst.hi,$shift |
| emit_opcode(cbuf,0x0F); |
| emit_opcode(cbuf,0xAD); |
| emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg); |
| // SAR $dst.hi,$shift" |
| emit_opcode(cbuf,0xD3); |
| emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) ); |
| %} |
| |
| |
| // ----------------- Encodings for floating point unit ----------------- |
| // May leave result in FPU-TOS or FPU reg depending on opcodes |
| enc_class OpcReg_FPR(regFPR src) %{ // FMUL, FDIV |
| $$$emit8$primary; |
| emit_rm(cbuf, 0x3, $secondary, $src$$reg ); |
| %} |
| |
| // Pop argument in FPR0 with FSTP ST(0) |
| enc_class PopFPU() %{ |
| emit_opcode( cbuf, 0xDD ); |
| emit_d8( cbuf, 0xD8 ); |
| %} |
| |
| // !!!!! equivalent to Pop_Reg_F |
| enc_class Pop_Reg_DPR( regDPR dst ) %{ |
| emit_opcode( cbuf, 0xDD ); // FSTP ST(i) |
| emit_d8( cbuf, 0xD8+$dst$$reg ); |
| %} |
| |
| enc_class Push_Reg_DPR( regDPR dst ) %{ |
| emit_opcode( cbuf, 0xD9 ); |
| emit_d8( cbuf, 0xC0-1+$dst$$reg ); // FLD ST(i-1) |
| %} |
| |
| enc_class strictfp_bias1( regDPR dst ) %{ |
| emit_opcode( cbuf, 0xDB ); // FLD m80real |
| emit_opcode( cbuf, 0x2D ); |
| emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() ); |
| emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 |
| emit_opcode( cbuf, 0xC8+$dst$$reg ); |
| %} |
| |
| enc_class strictfp_bias2( regDPR dst ) %{ |
| emit_opcode( cbuf, 0xDB ); // FLD m80real |
| emit_opcode( cbuf, 0x2D ); |
| emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() ); |
| emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 |
| emit_opcode( cbuf, 0xC8+$dst$$reg ); |
| %} |
| |
| // Special case for moving an integer register to a stack slot. |
| enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS |
| store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp ); |
| %} |
| |
| // Special case for moving a register to a stack slot. |
| enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS |
| // Opcode already emitted |
| emit_rm( cbuf, 0x02, $src$$reg, ESP_enc ); // R/M byte |
| emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte |
| emit_d32(cbuf, $dst$$disp); // Displacement |
| %} |
| |
| // Push the integer in stackSlot 'src' onto FP-stack |
| enc_class Push_Mem_I( memory src ) %{ // FILD [ESP+src] |
| store_to_stackslot( cbuf, $primary, $secondary, $src$$disp ); |
| %} |
| |
| // Push FPU's TOS float to a stack-slot, and pop FPU-stack |
| enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst] |
| store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp ); |
| %} |
| |
| // Same as Pop_Mem_F except for opcode |
| // Push FPU's TOS double to a stack-slot, and pop FPU-stack |
| enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst] |
| store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp ); |
| %} |
| |
| enc_class Pop_Reg_FPR( regFPR dst ) %{ |
| emit_opcode( cbuf, 0xDD ); // FSTP ST(i) |
| emit_d8( cbuf, 0xD8+$dst$$reg ); |
| %} |
| |
| enc_class Push_Reg_FPR( regFPR dst ) %{ |
| emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) |
| emit_d8( cbuf, 0xC0-1+$dst$$reg ); |
| %} |
| |
| // Push FPU's float to a stack-slot, and pop FPU-stack |
| enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{ |
| int pop = 0x02; |
| if ($src$$reg != FPR1L_enc) { |
| emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) |
| emit_d8( cbuf, 0xC0-1+$src$$reg ); |
| pop = 0x03; |
| } |
| store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S [ESP+dst] |
| %} |
| |
| // Push FPU's double to a stack-slot, and pop FPU-stack |
| enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{ |
| int pop = 0x02; |
| if ($src$$reg != FPR1L_enc) { |
| emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) |
| emit_d8( cbuf, 0xC0-1+$src$$reg ); |
| pop = 0x03; |
| } |
| store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D [ESP+dst] |
| %} |
| |
| // Push FPU's double to a FPU-stack-slot, and pop FPU-stack |
| enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{ |
| int pop = 0xD0 - 1; // -1 since we skip FLD |
| if ($src$$reg != FPR1L_enc) { |
| emit_opcode( cbuf, 0xD9 ); // FLD ST(src-1) |
| emit_d8( cbuf, 0xC0-1+$src$$reg ); |
| pop = 0xD8; |
| } |
| emit_opcode( cbuf, 0xDD ); |
| emit_d8( cbuf, pop+$dst$$reg ); // FST<P> ST(i) |
| %} |
| |
| |
| enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{ |
| // load dst in FPR0 |
| emit_opcode( cbuf, 0xD9 ); |
| emit_d8( cbuf, 0xC0-1+$dst$$reg ); |
| if ($src$$reg != FPR1L_enc) { |
| // fincstp |
| emit_opcode (cbuf, 0xD9); |
| emit_opcode (cbuf, 0xF7); |
| // swap src with FPR1: |
| // FXCH FPR1 with src |
| emit_opcode(cbuf, 0xD9); |
| emit_d8(cbuf, 0xC8-1+$src$$reg ); |
| // fdecstp |
| emit_opcode (cbuf, 0xD9); |
| emit_opcode (cbuf, 0xF6); |
| } |
| %} |
| |
| enc_class Push_ModD_encoding(regD src0, regD src1) %{ |
| MacroAssembler _masm(&cbuf); |
| __ subptr(rsp, 8); |
| __ movdbl(Address(rsp, 0), $src1$$XMMRegister); |
| __ fld_d(Address(rsp, 0)); |
| __ movdbl(Address(rsp, 0), $src0$$XMMRegister); |
| __ fld_d(Address(rsp, 0)); |
| %} |
| |
| enc_class Push_ModF_encoding(regF src0, regF src1) %{ |
| MacroAssembler _masm(&cbuf); |
| __ subptr(rsp, 4); |
| __ movflt(Address(rsp, 0), $src1$$XMMRegister); |
| __ fld_s(Address(rsp, 0)); |
| __ movflt(Address(rsp, 0), $src0$$XMMRegister); |
| __ fld_s(Address(rsp, 0)); |
| %} |
| |
| enc_class Push_ResultD(regD dst) %{ |
| MacroAssembler _masm(&cbuf); |
| __ fstp_d(Address(rsp, 0)); |
| __ movdbl($dst$$XMMRegister, Address(rsp, 0)); |
| __ addptr(rsp, 8); |
| %} |
| |
| enc_class Push_ResultF(regF dst, immI d8) %{ |
| MacroAssembler _masm(&cbuf); |
| __ fstp_s(Address(rsp, 0)); |
| __ movflt($dst$$XMMRegister, Address(rsp, 0)); |
| __ addptr(rsp, $d8$$constant); |
| %} |
| |
| enc_class Push_SrcD(regD src) %{ |
| MacroAssembler _masm(&cbuf); |
| __ subptr(rsp, 8); |
| __ movdbl(Address(rsp, 0), $src$$XMMRegister); |
| __ fld_d(Address(rsp, 0)); |
| %} |
| |
| enc_class push_stack_temp_qword() %{ |
| MacroAssembler _masm(&cbuf); |
| __ subptr(rsp, 8); |
| %} |
| |
| enc_class pop_stack_temp_qword() %{ |
| MacroAssembler _masm(&cbuf); |
| __ addptr(rsp, 8); |
| %} |
| |
| enc_class push_xmm_to_fpr1(regD src) %{ |
| MacroAssembler _masm(&cbuf); |
| __ movdbl(Address(rsp, 0), $src$$XMMRegister); |
| __ fld_d(Address(rsp, 0)); |
| %} |
| |
| enc_class Push_Result_Mod_DPR( regDPR src) %{ |
| if ($src$$reg != FPR1L_enc) { |
| // fincstp |
| emit_opcode (cbuf, 0xD9); |
| emit_opcode (cbuf, 0xF7); |
| // FXCH FPR1 with src |
| emit_opcode(cbuf, 0xD9); |
| emit_d8(cbuf, 0xC8-1+$src$$reg ); |
| // fdecstp |
| emit_opcode (cbuf, 0xD9); |
| emit_opcode (cbuf, 0xF6); |
| } |
| // // following asm replaced with Pop_Reg_F or Pop_Mem_F |
| // // FSTP FPR$dst$$reg |
| // emit_opcode( cbuf, 0xDD ); |
| // emit_d8( cbuf, 0xD8+$dst$$reg ); |
| %} |
| |
| enc_class fnstsw_sahf_skip_parity() %{ |
| // fnstsw ax |
| emit_opcode( cbuf, 0xDF ); |
| emit_opcode( cbuf, 0xE0 ); |
| // sahf |
| emit_opcode( cbuf, 0x9E ); |
| // jnp ::skip |
| emit_opcode( cbuf, 0x7B ); |
| emit_opcode( cbuf, 0x05 ); |
| %} |
| |
| enc_class emitModDPR() %{ |
| // fprem must be iterative |
| // :: loop |
| // fprem |
| emit_opcode( cbuf, 0xD9 ); |
| emit_opcode( cbuf, 0xF8 ); |
| // wait |
| emit_opcode( cbuf, 0x9b ); |
| // fnstsw ax |
| emit_opcode( cbuf, 0xDF ); |
| emit_opcode( cbuf, 0xE0 ); |
| // sahf |
| emit_opcode( cbuf, 0x9E ); |
| // jp ::loop |
| emit_opcode( cbuf, 0x0F ); |
| emit_opcode( cbuf, 0x8A ); |
| emit_opcode( cbuf, 0xF4 ); |
| emit_opcode( cbuf, 0xFF ); |
| emit_opcode( cbuf, 0xFF ); |
| emit_opcode( cbuf, 0xFF ); |
| %} |
| |
| enc_class fpu_flags() %{ |
| // fnstsw_ax |
| emit_opcode( cbuf, 0xDF); |
| emit_opcode( cbuf, 0xE0); |
| // test ax,0x0400 |
| emit_opcode( cbuf, 0x66 ); // operand-size prefix for 16-bit immediate |
| emit_opcode( cbuf, 0xA9 ); |
| emit_d16 ( cbuf, 0x0400 ); |
| // // // This sequence works, but stalls for 12-16 cycles on PPro |
| // // test rax,0x0400 |
| // emit_opcode( cbuf, 0xA9 ); |
| // emit_d32 ( cbuf, 0x00000400 ); |
| // |
| // jz exit (no unordered comparison) |
| emit_opcode( cbuf, 0x74 ); |
| emit_d8 ( cbuf, 0x02 ); |
| // mov ah,1 - treat as LT case (set carry flag) |
| emit_opcode( cbuf, 0xB4 ); |
| emit_d8 ( cbuf, 0x01 ); |
| // sahf |
| emit_opcode( cbuf, 0x9E); |
| %} |
| |
| enc_class cmpF_P6_fixup() %{ |
| // Fixup the integer flags in case comparison involved a NaN |
| // |
| // JNP exit (no unordered comparison, P-flag is set by NaN) |
| emit_opcode( cbuf, 0x7B ); |
| emit_d8 ( cbuf, 0x03 ); |
| // MOV AH,1 - treat as LT case (set carry flag) |
| emit_opcode( cbuf, 0xB4 ); |
| emit_d8 ( cbuf, 0x01 ); |
| // SAHF |
| emit_opcode( cbuf, 0x9E); |
| // NOP // target for branch to avoid branch to branch |
| emit_opcode( cbuf, 0x90); |
| %} |
| |
| // fnstsw_ax(); |
| // sahf(); |
| // movl(dst, nan_result); |
| // jcc(Assembler::parity, exit); |
| // movl(dst, less_result); |
| // jcc(Assembler::below, exit); |
| // movl(dst, equal_result); |
| // jcc(Assembler::equal, exit); |
| // movl(dst, greater_result); |
| |
| // less_result = 1; |
| // greater_result = -1; |
| // equal_result = 0; |
| // nan_result = -1; |
| |
| enc_class CmpF_Result(rRegI dst) %{ |
| // fnstsw_ax(); |
| emit_opcode( cbuf, 0xDF); |
| emit_opcode( cbuf, 0xE0); |
| // sahf |
| emit_opcode( cbuf, 0x9E); |
| // movl(dst, nan_result); |
| emit_opcode( cbuf, 0xB8 + $dst$$reg); |
| emit_d32( cbuf, -1 ); |
| // jcc(Assembler::parity, exit); |
| emit_opcode( cbuf, 0x7A ); |
| emit_d8 ( cbuf, 0x13 ); |
| // movl(dst, less_result); |
| emit_opcode( cbuf, 0xB8 + $dst$$reg); |
| emit_d32( cbuf, -1 ); |
| // jcc(Assembler::below, exit); |
| emit_opcode( cbuf, 0x72 ); |
| emit_d8 ( cbuf, 0x0C ); |
| // movl(dst, equal_result); |
| emit_opcode( cbuf, 0xB8 + $dst$$reg); |
| emit_d32( cbuf, 0 ); |
| // jcc(Assembler::equal, exit); |
| emit_opcode( cbuf, 0x74 ); |
| emit_d8 ( cbuf, 0x05 ); |
| // movl(dst, greater_result); |
| emit_opcode( cbuf, 0xB8 + $dst$$reg); |
| emit_d32( cbuf, 1 ); |
| %} |
| |
| |
| // Compare the longs and set flags |
| // BROKEN! Do Not use as-is |
| enc_class cmpl_test( eRegL src1, eRegL src2 ) %{ |
| // CMP $src1.hi,$src2.hi |
| emit_opcode( cbuf, 0x3B ); |
| emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) ); |
| // JNE,s done |
| emit_opcode(cbuf,0x75); |
| emit_d8(cbuf, 2 ); |
| // CMP $src1.lo,$src2.lo |
| emit_opcode( cbuf, 0x3B ); |
| emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); |
| // done: |
| %} |
| |
| enc_class convert_int_long( regL dst, rRegI src ) %{ |
| // mov $dst.lo,$src |
| int dst_encoding = $dst$$reg; |
| int src_encoding = $src$$reg; |
| encode_Copy( cbuf, dst_encoding , src_encoding ); |
| // mov $dst.hi,$src |
| encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding ); |
| // sar $dst.hi,31 |
| emit_opcode( cbuf, 0xC1 ); |
| emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) ); |
| emit_d8(cbuf, 0x1F ); |
| %} |
| |
| enc_class convert_long_double( eRegL src ) %{ |
| // push $src.hi |
| emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg)); |
| // push $src.lo |
| emit_opcode(cbuf, 0x50+$src$$reg ); |
| // fild 64-bits at [SP] |
| emit_opcode(cbuf,0xdf); |
| emit_d8(cbuf, 0x6C); |
| emit_d8(cbuf, 0x24); |
| emit_d8(cbuf, 0x00); |
| // pop stack |
| emit_opcode(cbuf, 0x83); // add SP, #8 |
| emit_rm(cbuf, 0x3, 0x00, ESP_enc); |
| emit_d8(cbuf, 0x8); |
| %} |
| |
| enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{ |
| // IMUL EDX:EAX,$src1 |
| emit_opcode( cbuf, 0xF7 ); |
| emit_rm( cbuf, 0x3, 0x5, $src1$$reg ); |
| // SAR EDX,$cnt-32 |
| int shift_count = ((int)$cnt$$constant) - 32; |
| if (shift_count > 0) { |
| emit_opcode(cbuf, 0xC1); |
| emit_rm(cbuf, 0x3, 7, $dst$$reg ); |
| emit_d8(cbuf, shift_count); |
| } |
| %} |
| |
| // this version doesn't have add sp, 8 |
| enc_class convert_long_double2( eRegL src ) %{ |
| // push $src.hi |
| emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg)); |
| // push $src.lo |
| emit_opcode(cbuf, 0x50+$src$$reg ); |
| // fild 64-bits at [SP] |
| emit_opcode(cbuf,0xdf); |
| emit_d8(cbuf, 0x6C); |
| emit_d8(cbuf, 0x24); |
| emit_d8(cbuf, 0x00); |
| %} |
| |
| enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{ |
| // Basic idea: long = (long)int * (long)int |
| // IMUL EDX:EAX, src |
| emit_opcode( cbuf, 0xF7 ); |
| emit_rm( cbuf, 0x3, 0x5, $src$$reg); |
| %} |
| |
| enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{ |
| // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) |
| // MUL EDX:EAX, src |
| emit_opcode( cbuf, 0xF7 ); |
| emit_rm( cbuf, 0x3, 0x4, $src$$reg); |
| %} |
| |
| enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{ |
| // Basic idea: lo(result) = lo(x_lo * y_lo) |
| // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) |
| // MOV $tmp,$src.lo |
| encode_Copy( cbuf, $tmp$$reg, $src$$reg ); |
| // IMUL $tmp,EDX |
| emit_opcode( cbuf, 0x0F ); |
| emit_opcode( cbuf, 0xAF ); |
| emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); |
| // MOV EDX,$src.hi |
| encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) ); |
| // IMUL EDX,EAX |
| emit_opcode( cbuf, 0x0F ); |
| emit_opcode( cbuf, 0xAF ); |
| emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg ); |
| // ADD $tmp,EDX |
| emit_opcode( cbuf, 0x03 ); |
| emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); |
| // MUL EDX:EAX,$src.lo |
| emit_opcode( cbuf, 0xF7 ); |
| emit_rm( cbuf, 0x3, 0x4, $src$$reg ); |
| // ADD EDX,ESI |
| emit_opcode( cbuf, 0x03 ); |
| emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg ); |
| %} |
| |
| enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{ |
| // Basic idea: lo(result) = lo(src * y_lo) |
| // hi(result) = hi(src * y_lo) + lo(src * y_hi) |
| // IMUL $tmp,EDX,$src |
| emit_opcode( cbuf, 0x6B ); |
| emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); |
| emit_d8( cbuf, (int)$src$$constant ); |
| // MOV EDX,$src |
| emit_opcode(cbuf, 0xB8 + EDX_enc); |
| emit_d32( cbuf, (int)$src$$constant ); |
| // MUL EDX:EAX,EDX |
| emit_opcode( cbuf, 0xF7 ); |
| emit_rm( cbuf, 0x3, 0x4, EDX_enc ); |
| // ADD EDX,ESI |
| emit_opcode( cbuf, 0x03 ); |
| emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg ); |
| %} |
| |
| enc_class long_div( eRegL src1, eRegL src2 ) %{ |
| // PUSH src1.hi |
| emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) ); |
| // PUSH src1.lo |
| emit_opcode(cbuf, 0x50+$src1$$reg ); |
| // PUSH src2.hi |
| emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) ); |
| // PUSH src2.lo |
| emit_opcode(cbuf, 0x50+$src2$$reg ); |
| // CALL directly to the runtime |
| cbuf.set_insts_mark(); |
| emit_opcode(cbuf,0xE8); // Call into runtime |
| emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); |
| // Restore stack |
| emit_opcode(cbuf, 0x83); // add SP, #framesize |
| emit_rm(cbuf, 0x3, 0x00, ESP_enc); |
| emit_d8(cbuf, 4*4); |
| %} |
| |
| enc_class long_mod( eRegL src1, eRegL src2 ) %{ |
| // PUSH src1.hi |
| emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) ); |
| // PUSH src1.lo |
| emit_opcode(cbuf, 0x50+$src1$$reg ); |
| // PUSH src2.hi |
| emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) ); |
| // PUSH src2.lo |
| emit_opcode(cbuf, 0x50+$src2$$reg ); |
| // CALL directly to the runtime |
| cbuf.set_insts_mark(); |
| emit_opcode(cbuf,0xE8); // Call into runtime |
| emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); |
| // Restore stack |
| emit_opcode(cbuf, 0x83); // add SP, #framesize |
| emit_rm(cbuf, 0x3, 0x00, ESP_enc); |
| emit_d8(cbuf, 4*4); |
| %} |
| |
| enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{ |
| // MOV $tmp,$src.lo |
| emit_opcode(cbuf, 0x8B); |
| emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg); |
| // OR $tmp,$src.hi |
| emit_opcode(cbuf, 0x0B); |
| emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg)); |
| %} |
| |
| enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{ |
| // CMP $src1.lo,$src2.lo |
| emit_opcode( cbuf, 0x3B ); |
| emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); |
| // JNE,s skip |
| emit_cc(cbuf, 0x70, 0x5); |
| emit_d8(cbuf,2); |
| // CMP $src1.hi,$src2.hi |
| emit_opcode( cbuf, 0x3B ); |
| emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) ); |
| %} |
| |
| enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{ |
| // CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits |
| emit_opcode( cbuf, 0x3B ); |
| emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); |
| // MOV $tmp,$src1.hi |
| emit_opcode( cbuf, 0x8B ); |
| emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) ); |
| // SBB $tmp,$src2.hi\t! Compute flags for long compare |
| emit_opcode( cbuf, 0x1B ); |
| emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) ); |
| %} |
| |
| enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{ |
| // XOR $tmp,$tmp |
| emit_opcode(cbuf,0x33); // XOR |
| emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg); |
| // CMP $tmp,$src.lo |
| emit_opcode( cbuf, 0x3B ); |
| emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg ); |
| // SBB $tmp,$src.hi |
| emit_opcode( cbuf, 0x1B ); |
| emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) ); |
| %} |
| |
| // Sniff, sniff... smells like Gnu Superoptimizer |
| enc_class neg_long( eRegL dst ) %{ |
| emit_opcode(cbuf,0xF7); // NEG hi |
| emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg)); |
| emit_opcode(cbuf,0xF7); // NEG lo |
| emit_rm (cbuf,0x3, 0x3, $dst$$reg ); |
| emit_opcode(cbuf,0x83); // SBB hi,0 |
| emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg)); |
| emit_d8 (cbuf,0 ); |
| %} |
| |
| enc_class enc_pop_rdx() %{ |
| emit_opcode(cbuf,0x5A); |
| %} |
| |
| enc_class enc_rethrow() %{ |
| cbuf.set_insts_mark(); |
| emit_opcode(cbuf, 0xE9); // jmp entry |
| emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4, |
| runtime_call_Relocation::spec(), RELOC_IMM32 ); |
| %} |
| |
| |
| // Convert a double to an int. Java semantics require we do complex |
| // manglelations in the corner cases. So we set the rounding mode to |
| // 'zero', store the darned double down as an int, and reset the |
| // rounding mode to 'nearest'. The hardware throws an exception which |
| // patches up the correct value directly to the stack. |
| enc_class DPR2I_encoding( regDPR src ) %{ |
| // Flip to round-to-zero mode. We attempted to allow invalid-op |
| // exceptions here, so that a NAN or other corner-case value will |
| // thrown an exception (but normal values get converted at full speed). |
| // However, I2C adapters and other float-stack manglers leave pending |
| // invalid-op exceptions hanging. We would have to clear them before |
| // enabling them and that is more expensive than just testing for the |
| // invalid value Intel stores down in the corner cases. |
| emit_opcode(cbuf,0xD9); // FLDCW trunc |
| emit_opcode(cbuf,0x2D); |
| emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); |
| // Allocate a word |
| emit_opcode(cbuf,0x83); // SUB ESP,4 |
| emit_opcode(cbuf,0xEC); |
| emit_d8(cbuf,0x04); |
| // Encoding assumes a double has been pushed into FPR0. |
| // Store down the double as an int, popping the FPU stack |
| emit_opcode(cbuf,0xDB); // FISTP [ESP] |
| emit_opcode(cbuf,0x1C); |
| emit_d8(cbuf,0x24); |
| // Restore the rounding mode; mask the exception |
| emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode |
| emit_opcode(cbuf,0x2D); |
| emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() |
| ? (int)StubRoutines::addr_fpu_cntrl_wrd_24() |
| : (int)StubRoutines::addr_fpu_cntrl_wrd_std()); |
| |
| // Load the converted int; adjust CPU stack |
| emit_opcode(cbuf,0x58); // POP EAX |
| emit_opcode(cbuf,0x3D); // CMP EAX,imm |
| emit_d32 (cbuf,0x80000000); // 0x80000000 |
| emit_opcode(cbuf,0x75); // JNE around_slow_call |
| emit_d8 (cbuf,0x07); // Size of slow_call |
| // Push src onto stack slow-path |
| emit_opcode(cbuf,0xD9 ); // FLD ST(i) |
| emit_d8 (cbuf,0xC0-1+$src$$reg ); |
| // CALL directly to the runtime |
| cbuf.set_insts_mark(); |
| emit_opcode(cbuf,0xE8); // Call into runtime |
| emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); |
| // Carry on here... |
| %} |
| |
| enc_class DPR2L_encoding( regDPR src ) %{ |
| emit_opcode(cbuf,0xD9); // FLDCW trunc |
| emit_opcode(cbuf,0x2D); |
| emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); |
| // Allocate a word |
| emit_opcode(cbuf,0x83); // SUB ESP,8 |
| emit_opcode(cbuf,0xEC); |
| emit_d8(cbuf,0x08); |
| // Encoding assumes a double has been pushed into FPR0. |
| // Store down the double as a long, popping the FPU stack |
| emit_opcode(cbuf,0xDF); // FISTP [ESP] |
| emit_opcode(cbuf,0x3C); |
| emit_d8(cbuf,0x24); |
| // Restore the rounding mode; mask the exception |
| emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode |
| emit_opcode(cbuf,0x2D); |
| emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() |
| ? (int)StubRoutines::addr_fpu_cntrl_wrd_24() |
| : (int)StubRoutines::addr_fpu_cntrl_wrd_std()); |
| |
| // Load the converted int; adjust CPU stack |
| emit_opcode(cbuf,0x58); // POP EAX |
| emit_opcode(cbuf,0x5A); // POP EDX |
| emit_opcode(cbuf,0x81); // CMP EDX,imm |
| emit_d8 (cbuf,0xFA); // rdx |
| emit_d32 (cbuf,0x80000000); // 0x80000000 |
| emit_opcode(cbuf,0x75); // JNE around_slow_call |
| emit_d8 (cbuf,0x07+4); // Size of slow_call |
| emit_opcode(cbuf,0x85); // TEST EAX,EAX |
| emit_opcode(cbuf,0xC0); // 2/rax,/rax, |
| emit_opcode(cbuf,0x75); // JNE around_slow_call |
| emit_d8 (cbuf,0x07); // Size of slow_call |
| // Push src onto stack slow-path |
| emit_opcode(cbuf,0xD9 ); // FLD ST(i) |
| emit_d8 (cbuf,0xC0-1+$src$$reg ); |
| // CALL directly to the runtime |
| cbuf.set_insts_mark(); |
| emit_opcode(cbuf,0xE8); // Call into runtime |
| emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); |
| // Carry on here... |
| %} |
| |
| enc_class FMul_ST_reg( eRegFPR src1 ) %{ |
| // Operand was loaded from memory into fp ST (stack top) |
| // FMUL ST,$src /* D8 C8+i */ |
| emit_opcode(cbuf, 0xD8); |
| emit_opcode(cbuf, 0xC8 + $src1$$reg); |
| %} |
| |
| enc_class FAdd_ST_reg( eRegFPR src2 ) %{ |
| // FADDP ST,src2 /* D8 C0+i */ |
| emit_opcode(cbuf, 0xD8); |
| emit_opcode(cbuf, 0xC0 + $src2$$reg); |
| //could use FADDP src2,fpST /* DE C0+i */ |
| %} |
| |
| enc_class FAddP_reg_ST( eRegFPR src2 ) %{ |
| // FADDP src2,ST /* DE C0+i */ |
| emit_opcode(cbuf, 0xDE); |
| emit_opcode(cbuf, 0xC0 + $src2$$reg); |
| %} |
| |
| enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{ |
| // Operand has been loaded into fp ST (stack top) |
| // FSUB ST,$src1 |
| emit_opcode(cbuf, 0xD8); |
| emit_opcode(cbuf, 0xE0 + $src1$$reg); |
| |
| // FDIV |
| emit_opcode(cbuf, 0xD8); |
| emit_opcode(cbuf, 0xF0 + $src2$$reg); |
| %} |
| |
| enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{ |
| // Operand was loaded from memory into fp ST (stack top) |
| // FADD ST,$src /* D8 C0+i */ |
| emit_opcode(cbuf, 0xD8); |
| emit_opcode(cbuf, 0xC0 + $src1$$reg); |
| |
| // FMUL ST,src2 /* D8 C*+i */ |
| emit_opcode(cbuf, 0xD8); |
| emit_opcode(cbuf, 0xC8 + $src2$$reg); |
| %} |
| |
| |
| enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{ |
| // Operand was loaded from memory into fp ST (stack top) |
| // FADD ST,$src /* D8 C0+i */ |
| emit_opcode(cbuf, 0xD8); |
| emit_opcode(cbuf, 0xC0 + $src1$$reg); |
| |
| // FMULP src2,ST /* DE C8+i */ |
| emit_opcode(cbuf, 0xDE); |
| emit_opcode(cbuf, 0xC8 + $src2$$reg); |
| %} |
| |
| // Atomically load the volatile long |
| enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{ |
| emit_opcode(cbuf,0xDF); |
| int rm_byte_opcode = 0x05; |
| int base = $mem$$base; |
| int index = $mem$$index; |
| int scale = $mem$$scale; |
| int displace = $mem$$disp; |
| relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals |
| encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); |
| store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp ); |
| %} |
| |
| // Volatile Store Long. Must be atomic, so move it into |
| // the FP TOS and then do a 64-bit FIST. Has to probe the |
| // target address before the store (for null-ptr checks) |
| // so the memory operand is used twice in the encoding. |
| enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{ |
| store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp ); |
| cbuf.set_insts_mark(); // Mark start of FIST in case $mem has an oop |
| emit_opcode(cbuf,0xDF); |
| int rm_byte_opcode = 0x07; |
| int base = $mem$$base; |
| int index = $mem$$index; |
| int scale = $mem$$scale; |
| int displace = $mem$$disp; |
| relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals |
| encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); |
| %} |
| |
| // Safepoint Poll. This polls the safepoint page, and causes an |
| // exception if it is not readable. Unfortunately, it kills the condition code |
| // in the process |
| // We current use TESTL [spp],EDI |
| // A better choice might be TESTB [spp + pagesize() - CacheLineSize()],0 |
| |
| enc_class Safepoint_Poll() %{ |
| cbuf.relocate(cbuf.insts_mark(), relocInfo::poll_type, 0); |
| emit_opcode(cbuf,0x85); |
| emit_rm (cbuf, 0x0, 0x7, 0x5); |
| emit_d32(cbuf, (intptr_t)os::get_polling_page()); |
| %} |
| %} |
| |
| |
| //----------FRAME-------------------------------------------------------------- |
| // Definition of frame structure and management information. |
| // |
| // S T A C K L A Y O U T Allocators stack-slot number |
| // | (to get allocators register number |
| // G Owned by | | v add OptoReg::stack0()) |
| // r CALLER | | |
| // o | +--------+ pad to even-align allocators stack-slot |
| // w V | pad0 | numbers; owned by CALLER |
| // t -----------+--------+----> Matcher::_in_arg_limit, unaligned |
| // h ^ | in | 5 |
| // | | args | 4 Holes in incoming args owned by SELF |
| // | | | | 3 |
| // | | +--------+ |
| // V | | old out| Empty on Intel, window on Sparc |
| // | old |preserve| Must be even aligned. |
| // | SP-+--------+----> Matcher::_old_SP, even aligned |
| // | | in | 3 area for Intel ret address |
| // Owned by |preserve| Empty on Sparc. |
| // SELF +--------+ |
| // | | pad2 | 2 pad to align old SP |
| // | +--------+ 1 |
| // | | locks | 0 |
| // | +--------+----> OptoReg::stack0(), even aligned |
| // | | pad1 | 11 pad to align new SP |
| // | +--------+ |
| // | | | 10 |
| // | | spills | 9 spills |
| // V | | 8 (pad0 slot for callee) |
| // -----------+--------+----> Matcher::_out_arg_limit, unaligned |
| // ^ | out | 7 |
| // | | args | 6 Holes in outgoing args owned by CALLEE |
| // Owned by +--------+ |
| // CALLEE | new out| 6 Empty on Intel, window on Sparc |
| // | new |preserve| Must be even-aligned. |
| // | SP-+--------+----> Matcher::_new_SP, even aligned |
| // | | | |
| // |
| // Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is |
| // known from SELF's arguments and the Java calling convention. |
| // Region 6-7 is determined per call site. |
| // Note 2: If the calling convention leaves holes in the incoming argument |
| // area, those holes are owned by SELF. Holes in the outgoing area |
| // are owned by the CALLEE. Holes should not be nessecary in the |
| // incoming area, as the Java calling convention is completely under |
| // the control of the AD file. Doubles can be sorted and packed to |
| // avoid holes. Holes in the outgoing arguments may be nessecary for |
| // varargs C calling conventions. |
| // Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is |
| // even aligned with pad0 as needed. |
| // Region 6 is even aligned. Region 6-7 is NOT even aligned; |
| // region 6-11 is even aligned; it may be padded out more so that |
| // the region from SP to FP meets the minimum stack alignment. |
| |
| frame %{ |
| // What direction does stack grow in (assumed to be same for C & Java) |
| stack_direction(TOWARDS_LOW); |
| |
| // These three registers define part of the calling convention |
| // between compiled code and the interpreter. |
| inline_cache_reg(EAX); // Inline Cache Register |
| interpreter_method_oop_reg(EBX); // Method Oop Register when calling interpreter |
| |
| // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset] |
| cisc_spilling_operand_name(indOffset32); |
| |
| // Number of stack slots consumed by locking an object |
| sync_stack_slots(1); |
| |
| // Compiled code's Frame Pointer |
| frame_pointer(ESP); |
| // Interpreter stores its frame pointer in a register which is |
| // stored to the stack by I2CAdaptors. |
| // I2CAdaptors convert from interpreted java to compiled java. |
| interpreter_frame_pointer(EBP); |
| |
| // Stack alignment requirement |
| // Alignment size in bytes (128-bit -> 16 bytes) |
| stack_alignment(StackAlignmentInBytes); |
| |
| // Number of stack slots between incoming argument block and the start of |
| // a new frame. The PROLOG must add this many slots to the stack. The |
| // EPILOG must remove this many slots. Intel needs one slot for |
| // return address and one for rbp, (must save rbp) |
| in_preserve_stack_slots(2+VerifyStackAtCalls); |
| |
| // Number of outgoing stack slots killed above the out_preserve_stack_slots |
| // for calls to C. Supports the var-args backing area for register parms. |
| varargs_C_out_slots_killed(0); |
| |
| // The after-PROLOG location of the return address. Location of |
| // return address specifies a type (REG or STACK) and a number |
| // representing the register number (i.e. - use a register name) or |
| // stack slot. |
| // Ret Addr is on stack in slot 0 if no locks or verification or alignment. |
| // Otherwise, it is above the locks and verification slot and alignment word |
| return_addr(STACK - 1 + |
| round_to((Compile::current()->in_preserve_stack_slots() + |
| Compile::current()->fixed_slots()), |
| stack_alignment_in_slots())); |
| |
| // Body of function which returns an integer array locating |
| // arguments either in registers or in stack slots. Passed an array |
| // of ideal registers called "sig" and a "length" count. Stack-slot |
| // offsets are based on outgoing arguments, i.e. a CALLER setting up |
| // arguments for a CALLEE. Incoming stack arguments are |
| // automatically biased by the preserve_stack_slots field above. |
| calling_convention %{ |
| // No difference between ingoing/outgoing just pass false |
| SharedRuntime::java_calling_convention(sig_bt, regs, length, false); |
| %} |
| |
| |
| // Body of function which returns an integer array locating |
| // arguments either in registers or in stack slots. Passed an array |
| // of ideal registers called "sig" and a "length" count. Stack-slot |
| // offsets are based on outgoing arguments, i.e. a CALLER setting up |
| // arguments for a CALLEE. Incoming stack arguments are |
| // automatically biased by the preserve_stack_slots field above. |
| c_calling_convention %{ |
| // This is obviously always outgoing |
| (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length); |
| %} |
| |
| // Location of C & interpreter return values |
| c_return_value %{ |
| assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); |
| static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; |
| static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; |
| |
| // in SSE2+ mode we want to keep the FPU stack clean so pretend |
| // that C functions return float and double results in XMM0. |
| if( ideal_reg == Op_RegD && UseSSE>=2 ) |
| return OptoRegPair(XMM0b_num,XMM0_num); |
| if( ideal_reg == Op_RegF && UseSSE>=2 ) |
| return OptoRegPair(OptoReg::Bad,XMM0_num); |
| |
| return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); |
| %} |
| |
| // Location of return values |
| return_value %{ |
| assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); |
| static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; |
| static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; |
| if( ideal_reg == Op_RegD && UseSSE>=2 ) |
| return OptoRegPair(XMM0b_num,XMM0_num); |
| if( ideal_reg == Op_RegF && UseSSE>=1 ) |
| return OptoRegPair(OptoReg::Bad,XMM0_num); |
| return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); |
| %} |
| |
| %} |
| |
| //----------ATTRIBUTES--------------------------------------------------------- |
| //----------Operand Attributes------------------------------------------------- |
| op_attrib op_cost(0); // Required cost attribute |
| |
| //----------Instruction Attributes--------------------------------------------- |
| ins_attrib ins_cost(100); // Required cost attribute |
| ins_attrib ins_size(8); // Required size attribute (in bits) |
| ins_attrib ins_short_branch(0); // Required flag: is this instruction a |
| // non-matching short branch variant of some |
| // long branch? |
| ins_attrib ins_alignment(1); // Required alignment attribute (must be a power of 2) |
| // specifies the alignment that some part of the instruction (not |
| // necessarily the start) requires. If > 1, a compute_padding() |
| // function must be provided for the instruction |
| |
| //----------OPERANDS----------------------------------------------------------- |
| // Operand definitions must precede instruction definitions for correct parsing |
| // in the ADLC because operands constitute user defined types which are used in |
| // instruction definitions. |
| |
| //----------Simple Operands---------------------------------------------------- |
| // Immediate Operands |
| // Integer Immediate |
| operand immI() %{ |
| match(ConI); |
| |
| op_cost(10); |
| format %{ %} |
| interface(CONST_INTER); |
| %} |
| |
| // Constant for test vs zero |
| operand immI0() %{ |
| predicate(n->get_int() == 0); |
| match(ConI); |
| |
| op_cost(0); |
| format %{ %} |
| interface(CONST_INTER); |
| %} |
| |
| // Constant for increment |
| operand immI1() %{ |
| predicate(n->get_int() == 1); |
| match(ConI); |
| |
| op_cost(0); |
| format %{ %} |
| interface(CONST_INTER); |
| %} |
| |
| // Constant for decrement |
| operand immI_M1() %{ |
| predicate(n->get_int() == -1); |
| match(ConI); |
| |
| op_cost(0); |
| format %{ %} |
| interface(CONST_INTER); |
| %} |
| |
| // Valid scale values for addressing modes |
| operand immI2() %{ |
| predicate(0 <= n->get_int() && (n->get_int() <= 3)); |
| match(ConI); |
| |
| format %{ %} |
| interface(CONST_INTER); |
| %} |
| |
| operand immI8() %{ |
| predicate((-128 <= n->get_int()) && (n->get_int() <= 127)); |
| match(ConI); |
| |
| op_cost(5); |
| format %{ %} |
| interface(CONST_INTER); |
| %} |
| |
| operand immI16() %{ |
| predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767)); |
| match(ConI); |
| |
| op_cost(10); |
| format %{ %} |
| interface(CONST_INTER); |
| %} |
| |
| // Int Immediate non-negative |
| operand immU31() |
| %{ |
| predicate(n->get_int() >= 0); |
| match(ConI); |
| |
| op_cost(0); |
| format %{ %} |
| interface(CONST_INTER); |
| %} |
| |
| // Constant for long shifts |
| operand immI_32() %{ |
| predicate( n->get_int() == 32 ); |
| match(ConI); |
| |
| op_cost(0); |
| format %{ %} |
| interface(CONST_INTER); |
| %} |
| |
| operand immI_1_31() %{ |
| predicate( n->get_int() >= 1 && n->get_int() <= 31 ); |
| match(ConI); |
| |
| op_cost(0); |
| format %{ %} |
| interface(CONST_INTER); |
| %} |
| |
| operand immI_32_63() %{ |
| predicate( n->get_int() >= 32 && n->get_int() <= 63 ); |
| match(ConI); |
| op_cost(0); |
| |
| format %{ %} |
| interface(CONST_INTER); |
| %} |
| |
| operand immI_1() %{ |
| predicate( n->get_int() == 1 ); |
| match(ConI); |
| |
| op_cost(0); |
| format %{ %} |
| interface(CONST_INTER); |
| %} |
| |
| operand immI_2() %{ |
| predicate( n->get_int() == 2 ); |
| match(ConI); |
| |
| op_cost(0); |
| format %{ %} |
| interface(CONST_INTER); |
| %} |
| |
| operand immI_3() %{ |
| predicate( n->get_int() == 3 ); |
| match(ConI); |
| |
| op_cost(0); |
| format %{ %} |
| interface(CONST_INTER); |
| %} |
| |
| // Pointer Immediate |
| operand immP() %{ |
| match(ConP); |
| |
| op_cost(10); |
| format %{ %} |
| interface(CONST_INTER); |
| %} |
| |
| // NULL Pointer Immediate |
| operand immP0() %{ |
| predicate( n->get_ptr() == 0 ); |
| match(ConP); |
| op_cost(0); |
| |
| format %{ %} |
| interface(CONST_INTER); |
| %} |
| |
| // Long Immediate |
| operand immL() %{ |
| match(ConL); |
| |
| op_cost(20); |
| format %{ %} |
| interface(CONST_INTER); |
| %} |
| |
| // Long Immediate zero |
| operand immL0() %{ |
| predicate( n->get_long() == 0L ); |
| match(ConL); |
| op_cost(0); |
| |
| format %{ %} |
| interface(CONST_INTER); |
| %} |
| |
| // Long Immediate zero |
| operand immL_M1() %{ |
| predicate( n->get_long() == -1L ); |
| match(ConL); |
| op_cost(0); |
| |
| format %{ %} |
| interface(CONST_INTER); |
| %} |
| |
| // Long immediate from 0 to 127. |
| // Used for a shorter form of long mul by 10. |
| operand immL_127() %{ |
| predicate((0 <= n->get_long()) && (n->get_long() <= 127)); |
| match(ConL); |
| op_cost(0); |
| |
| format %{ %} |
| interface(CONST_INTER); |
| %} |
| |
| // Long Immediate: low 32-bit mask |
| operand immL_32bits() %{ |
| predicate(n->get_long() == 0xFFFFFFFFL); |
| match(ConL); |
| op_cost(0); |
| |
| format %{ %} |
| interface(CONST_INTER); |
| %} |
| |
| // Long Immediate: low 32-bit mask |
| operand immL32() %{ |
| predicate(n->get_long() == (int)(n->get_long())); |
| match(ConL); |
| op_cost(20); |
| |
| format %{ %} |
| interface(CONST_INTER); |
| %} |
| |
| //Double Immediate zero |
| operand immDPR0() %{ |
| // Do additional (and counter-intuitive) test against NaN to work around VC++ |
| // bug that generates code such that NaNs compare equal to 0.0 |
| predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) ); |
| match(ConD); |
| |
| op_cost(5); |
| format %{ %} |
| interface(CONST_INTER); |
| %} |
| |
| // Double Immediate one |
| operand immDPR1() %{ |
| predicate( UseSSE<=1 && n->getd() == 1.0 ); |
| match(ConD); |
| |
| op_cost(5); |
| format %{ %} |
| interface(CONST_INTER); |
| %} |
| |
| // Double Immediate |
| operand immDPR() %{ |
| predicate(UseSSE<=1); |
| match(ConD); |
| |
| op_cost(5); |
| format %{ %} |
| interface(CONST_INTER); |
| %} |
| |
| operand immD() %{ |
| predicate(UseSSE>=2); |
| match(ConD); |
| |
| op_cost(5); |
| format %{ %} |
| interface(CONST_INTER); |
| %} |
| |
| // Double Immediate zero |
| operand immD0() %{ |
| // Do additional (and counter-intuitive) test against NaN to work around VC++ |
| // bug that generates code such that NaNs compare equal to 0.0 AND do not |
| // compare equal to -0.0. |
| predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 ); |
| match(ConD); |
| |
| format %{ %} |
| interface(CONST_INTER); |
| %} |
| |
| // Float Immediate zero |
| operand immFPR0() %{ |
| predicate(UseSSE == 0 && n->getf() == 0.0F); |
| match(ConF); |
| |
| op_cost(5); |
| format %{ %} |
| interface(CONST_INTER); |
| %} |
| |
| // Float Immediate one |
| operand immFPR1() %{ |
| predicate(UseSSE == 0 && n->getf() == 1.0F); |
| match(ConF); |
| |
| op_cost(5); |
| format %{ %} |
| interface(CONST_INTER); |
| %} |
| |
| // Float Immediate |
| operand immFPR() %{ |
| predicate( UseSSE == 0 ); |
| match(ConF); |
| |
| op_cost(5); |
| format %{ %} |
| interface(CONST_INTER); |
| %} |
| |
| // Float Immediate |
| operand immF() %{ |
| predicate(UseSSE >= 1); |
| match(ConF); |
| |
| op_cost(5); |
| format %{ %} |
| interface(CONST_INTER); |
| %} |
| |
| // Float Immediate zero. Zero and not -0.0 |
| operand immF0() %{ |
| predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 ); |
| match(ConF); |
| |
| op_cost(5); |
| format %{ %} |
| interface(CONST_INTER); |
| %} |
| |
| // Immediates for special shifts (sign extend) |
| |
| // Constants for increment |
| operand immI_16() %{ |
| predicate( n->get_int() == 16 ); |
| match(ConI); |
| |
| format %{ %} |
| interface(CONST_INTER); |
| %} |
| |
| operand immI_24() %{ |
| predicate( n->get_int() == 24 ); |
| match(ConI); |
| |
| format %{ %} |
| interface(CONST_INTER); |
| %} |
| |
| // Constant for byte-wide masking |
| operand immI_255() %{ |
| predicate( n->get_int() == 255 ); |
| match(ConI); |
| |
| format %{ %} |
| interface(CONST_INTER); |
| %} |
| |
| // Constant for short-wide masking |
| operand immI_65535() %{ |
| predicate(n->get_int() == 65535); |
| match(ConI); |
| |
| format %{ %} |
| interface(CONST_INTER); |
| %} |
| |
| // Register Operands |
| // Integer Register |
| operand rRegI() %{ |
| constraint(ALLOC_IN_RC(int_reg)); |
| match(RegI); |
| match(xRegI); |
| match(eAXRegI); |
| match(eBXRegI); |
| match(eCXRegI); |
| match(eDXRegI); |
| match(eDIRegI); |
| match(eSIRegI); |
| |
| format %{ %} |
| interface(REG_INTER); |
| %} |
| |
| // Subset of Integer Register |
| operand xRegI(rRegI reg) %{ |
| constraint(ALLOC_IN_RC(int_x_reg)); |
| match(reg); |
| match(eAXRegI); |
| match(eBXRegI); |
| match(eCXRegI); |
| match(eDXRegI); |
| |
| format %{ %} |
| interface(REG_INTER); |
| %} |
| |
| // Special Registers |
| operand eAXRegI(xRegI reg) %{ |
| constraint(ALLOC_IN_RC(eax_reg)); |
| match(reg); |
| match(rRegI); |
| |
| format %{ "EAX" %} |
| interface(REG_INTER); |
| %} |
| |
| // Special Registers |
| operand eBXRegI(xRegI reg) %{ |
| constraint(ALLOC_IN_RC(ebx_reg)); |
| match(reg); |
| match(rRegI); |
| |
| format %{ "EBX" %} |
| interface(REG_INTER); |
| %} |
| |
| operand eCXRegI(xRegI reg) %{ |
| constraint(ALLOC_IN_RC(ecx_reg)); |
| match(reg); |
| match(rRegI); |
| |
| format %{ "ECX" %} |
| interface(REG_INTER); |
| %} |
| |
| operand eDXRegI(xRegI reg) %{ |
| constraint(ALLOC_IN_RC(edx_reg)); |
| match(reg); |
| match(rRegI); |
| |
| format %{ "EDX" %} |
| interface(REG_INTER); |
| %} |
| |
| operand eDIRegI(xRegI reg) %{ |
| constraint(ALLOC_IN_RC(edi_reg)); |
| match(reg); |
| match(rRegI); |
| |
| format %{ "EDI" %} |
| interface(REG_INTER); |
| %} |
| |
| operand naxRegI() %{ |
| constraint(ALLOC_IN_RC(nax_reg)); |
| match(RegI); |
| match(eCXRegI); |
| match(eDXRegI); |
| match(eSIRegI); |
| match(eDIRegI); |
| |
| format %{ %} |
| interface(REG_INTER); |
| %} |
| |
| operand nadxRegI() %{ |
| constraint(ALLOC_IN_RC(nadx_reg)); |
| match(RegI); |
| match(eBXRegI); |
| match(eCXRegI); |
| match(eSIRegI); |
| match(eDIRegI); |
| |
| format %{ %} |
| interface(REG_INTER); |
| %} |
| |
| operand ncxRegI() %{ |
| constraint(ALLOC_IN_RC(ncx_reg)); |
| match(RegI); |
| match(eAXRegI); |
| match(eDXRegI); |
| match(eSIRegI); |
| match(eDIRegI); |
| |
| format %{ %} |
| interface(REG_INTER); |
| %} |
| |
| // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg |
| // // |
| operand eSIRegI(xRegI reg) %{ |
| constraint(ALLOC_IN_RC(esi_reg)); |
| match(reg); |
| match(rRegI); |
| |
| format %{ "ESI" %} |
| interface(REG_INTER); |
| %} |
| |
| // Pointer Register |
| operand anyRegP() %{ |
| constraint(ALLOC_IN_RC(any_reg)); |
| match(RegP); |
| match(eAXRegP); |
| match(eBXRegP); |
| match(eCXRegP); |
| match(eDIRegP); |
| match(eRegP); |
| |
| format %{ %} |
| interface(REG_INTER); |
| %} |
| |
| operand eRegP() %{ |
| constraint(ALLOC_IN_RC(int_reg)); |
| match(RegP); |
| match(eAXRegP); |
| match(eBXRegP); |
| match(eCXRegP); |
| match(eDIRegP); |
| |
| format %{ %} |
| interface(REG_INTER); |
| %} |
| |
| // On windows95, EBP is not safe to use for implicit null tests. |
| operand eRegP_no_EBP() %{ |
| constraint(ALLOC_IN_RC(int_reg_no_ebp)); |
| match(RegP); |
| match(eAXRegP); |
| match(eBXRegP); |
| match(eCXRegP); |
| match(eDIRegP); |
| |
| op_cost(100); |
| format %{ %} |
| interface(REG_INTER); |
| %} |
| |
| operand naxRegP() %{ |
| constraint(ALLOC_IN_RC(nax_reg)); |
| match(RegP); |
| match(eBXRegP); |
| match(eDXRegP); |
| match(eCXRegP); |
| match(eSIRegP); |
| match(eDIRegP); |
| |
| format %{ %} |
| interface(REG_INTER); |
| %} |
| |
| operand nabxRegP() %{ |
| constraint(ALLOC_IN_RC(nabx_reg)); |
| match(RegP); |
| match(eCXRegP); |
| match(eDXRegP); |
| match(eSIRegP); |
| match(eDIRegP); |
| |
| format %{ %} |
| interface(REG_INTER); |
| %} |
| |
| operand pRegP() %{ |
| constraint(ALLOC_IN_RC(p_reg)); |
| match(RegP); |
| match(eBXRegP); |
| match(eDXRegP); |
| match(eSIRegP); |
| match(eDIRegP); |
| |
| format %{ %} |
| interface(REG_INTER); |
| %} |
| |
| // Special Registers |
| // Return a pointer value |
| operand eAXRegP(eRegP reg) %{ |
| constraint(ALLOC_IN_RC(eax_reg)); |
| match(reg); |
| format %{ "EAX" %} |
| interface(REG_INTER); |
| %} |
| |
| // Used in AtomicAdd |
| operand eBXRegP(eRegP reg) %{ |
| constraint(ALLOC_IN_RC(ebx_reg)); |
| match(reg); |
| format %{ "EBX" %} |
| interface(REG_INTER); |
| %} |
| |
| // Tail-call (interprocedural jump) to interpreter |
| operand eCXRegP(eRegP reg) %{ |
| constraint(ALLOC_IN_RC(ecx_reg)); |
| match(reg); |
| format %{ "ECX" %} |
| interface(REG_INTER); |
| %} |
| |
| operand eSIRegP(eRegP reg) %{ |
| constraint(ALLOC_IN_RC(esi_reg)); |
| match(reg); |
| format %{ "ESI" %} |
| interface(REG_INTER); |
| %} |
| |
| // Used in rep stosw |
| operand eDIRegP(eRegP reg) %{ |
| constraint(ALLOC_IN_RC(edi_reg)); |
| match(reg); |
| format %{ "EDI" %} |
| interface(REG_INTER); |
| %} |
| |
| operand eRegL() %{ |
| constraint(ALLOC_IN_RC(long_reg)); |
| match(RegL); |
| match(eADXRegL); |
| |
| format %{ %} |
| interface(REG_INTER); |
| %} |
| |
| operand eADXRegL( eRegL reg ) %{ |
| constraint(ALLOC_IN_RC(eadx_reg)); |
| match(reg); |
| |
| format %{ "EDX:EAX" %} |
| interface(REG_INTER); |
| %} |
| |
| operand eBCXRegL( eRegL reg ) %{ |
| constraint(ALLOC_IN_RC(ebcx_reg)); |
| match(reg); |
| |
| format %{ "EBX:ECX" %} |
| interface(REG_INTER); |
| %} |
| |
| // Special case for integer high multiply |
| operand eADXRegL_low_only() %{ |
| constraint(ALLOC_IN_RC(eadx_reg)); |
| match(RegL); |
| |
| format %{ "EAX" %} |
| interface(REG_INTER); |
| %} |
| |
| // Flags register, used as output of compare instructions |
| operand eFlagsReg() %{ |
| constraint(ALLOC_IN_RC(int_flags)); |
| match(RegFlags); |
| |
| format %{ "EFLAGS" %} |
| interface(REG_INTER); |
| %} |
| |
| // Flags register, used as output of FLOATING POINT compare instructions |
| operand eFlagsRegU() %{ |
| constraint(ALLOC_IN_RC(int_flags)); |
| match(RegFlags); |
| |
| format %{ "EFLAGS_U" %} |
| interface(REG_INTER); |
| %} |
| |
| operand eFlagsRegUCF() %{ |
| constraint(ALLOC_IN_RC(int_flags)); |
| match(RegFlags); |
| predicate(false); |
| |
| format %{ "EFLAGS_U_CF" %} |
| interface(REG_INTER); |
| %} |
| |
| // Condition Code Register used by long compare |
| operand flagsReg_long_LTGE() %{ |
| constraint(ALLOC_IN_RC(int_flags)); |
| match(RegFlags); |
| format %{ "FLAGS_LTGE" %} |
| interface(REG_INTER); |
| %} |
| operand flagsReg_long_EQNE() %{ |
| constraint(ALLOC_IN_RC(int_flags)); |
| match(RegFlags); |
| format %{ "FLAGS_EQNE" %} |
| interface(REG_INTER); |
| %} |
| operand flagsReg_long_LEGT() %{ |
| constraint(ALLOC_IN_RC(int_flags)); |
| match(RegFlags); |
| format %{ "FLAGS_LEGT" %} |
| interface(REG_INTER); |
| %} |
| |
| // Float register operands |
| operand regDPR() %{ |
| predicate( UseSSE < 2 ); |
| constraint(ALLOC_IN_RC(fp_dbl_reg)); |
| match(RegD); |
| match(regDPR1); |
| match(regDPR2); |
| format %{ %} |
| interface(REG_INTER); |
| %} |
| |
| operand regDPR1(regDPR reg) %{ |
| predicate( UseSSE < 2 ); |
| constraint(ALLOC_IN_RC(fp_dbl_reg0)); |
| match(reg); |
| format %{ "FPR1" %} |
| interface(REG_INTER); |
| %} |
| |
| operand regDPR2(regDPR reg) %{ |
| predicate( UseSSE < 2 ); |
| constraint(ALLOC_IN_RC(fp_dbl_reg1)); |
| match(reg); |
| format %{ "FPR2" %} |
| interface(REG_INTER); |
| %} |
| |
| operand regnotDPR1(regDPR reg) %{ |
| predicate( UseSSE < 2 ); |
| constraint(ALLOC_IN_RC(fp_dbl_notreg0)); |
| match(reg); |
| format %{ %} |
| interface(REG_INTER); |
| %} |
| |
| // Float register operands |
| operand regFPR() %{ |
| predicate( UseSSE < 2 ); |
| constraint(ALLOC_IN_RC(fp_flt_reg)); |
| match(RegF); |
| match(regFPR1); |
| format %{ %} |
| interface(REG_INTER); |
| %} |
| |
| // Float register operands |
| operand regFPR1(regFPR reg) %{ |
| predicate( UseSSE < 2 ); |
| constraint(ALLOC_IN_RC(fp_flt_reg0)); |
| match(reg); |
| format %{ "FPR1" %} |
| interface(REG_INTER); |
| %} |
| |
| // XMM Float register operands |
| operand regF() %{ |
| predicate( UseSSE>=1 ); |
| constraint(ALLOC_IN_RC(float_reg)); |
| match(RegF); |
| format %{ %} |
| interface(REG_INTER); |
| %} |
| |
| // XMM Double register operands |
| operand regD() %{ |
| predicate( UseSSE>=2 ); |
| constraint(ALLOC_IN_RC(double_reg)); |
| match(RegD); |
| format %{ %} |
| interface(REG_INTER); |
| %} |
| |
| |
| //----------Memory Operands---------------------------------------------------- |
| // Direct Memory Operand |
| operand direct(immP addr) %{ |
| match(addr); |
| |
| format %{ "[$addr]" %} |
| interface(MEMORY_INTER) %{ |
| base(0xFFFFFFFF); |
| index(0x4); |
| scale(0x0); |
| disp($addr); |
| %} |
| %} |
| |
| // Indirect Memory Operand |
| operand indirect(eRegP reg) %{ |
| constraint(ALLOC_IN_RC(int_reg)); |
| match(reg); |
| |
| format %{ "[$reg]" %} |
| interface(MEMORY_INTER) %{ |
| base($reg); |
| index(0x4); |
| scale(0x0); |
| disp(0x0); |
| %} |
| %} |
| |
| // Indirect Memory Plus Short Offset Operand |
| operand indOffset8(eRegP reg, immI8 off) %{ |
| match(AddP reg off); |
| |
| format %{ "[$reg + $off]" %} |
| interface(MEMORY_INTER) %{ |
| base($reg); |
| index(0x4); |
| scale(0x0); |
| disp($off); |
| %} |
| %} |
| |
| // Indirect Memory Plus Long Offset Operand |
| operand indOffset32(eRegP reg, immI off) %{ |
| match(AddP reg off); |
| |
| format %{ "[$reg + $off]" %} |
| interface(MEMORY_INTER) %{ |
| base($reg); |
| index(0x4); |
| scale(0x0); |
| disp($off); |
| %} |
| %} |
| |
| // Indirect Memory Plus Long Offset Operand |
| operand indOffset32X(rRegI reg, immP off) %{ |
| match(AddP off reg); |
| |
| format %{ "[$reg + $off]" %} |
| interface(MEMORY_INTER) %{ |
| base($reg); |
| index(0x4); |
| scale(0x0); |
| disp($off); |
| %} |
| %} |
| |
| // Indirect Memory Plus Index Register Plus Offset Operand |
| operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{ |
| match(AddP (AddP reg ireg) off); |
| |
| op_cost(10); |
| format %{"[$reg + $off + $ireg]" %} |
| interface(MEMORY_INTER) %{ |
| base($reg); |
| index($ireg); |
| scale(0x0); |
| disp($off); |
| %} |
| %} |
| |
| // Indirect Memory Plus Index Register Plus Offset Operand |
| operand indIndex(eRegP reg, rRegI ireg) %{ |
| match(AddP reg ireg); |
| |
| op_cost(10); |
| format %{"[$reg + $ireg]" %} |
| interface(MEMORY_INTER) %{ |
| base($reg); |
| index($ireg); |
| scale(0x0); |
| disp(0x0); |
| %} |
| %} |
| |
| // // ------------------------------------------------------------------------- |
| // // 486 architecture doesn't support "scale * index + offset" with out a base |
| // // ------------------------------------------------------------------------- |
| // // Scaled Memory Operands |
| // // Indirect Memory Times Scale Plus Offset Operand |
| // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{ |
| // match(AddP off (LShiftI ireg scale)); |
| // |
| // op_cost(10); |
| // format %{"[$off + $ireg << $scale]" %} |
| // interface(MEMORY_INTER) %{ |
| // base(0x4); |
| // index($ireg); |
| // scale($scale); |
| // disp($off); |
| // %} |
| // %} |
| |
| // Indirect Memory Times Scale Plus Index Register |
| operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{ |
| match(AddP reg (LShiftI ireg scale)); |
| |
| op_cost(10); |
| format %{"[$reg + $ireg << $scale]" %} |
| interface(MEMORY_INTER) %{ |
| base($reg); |
| index($ireg); |
| scale($scale); |
| disp(0x0); |
| %} |
| %} |
| |
| // Indirect Memory Times Scale Plus Index Register Plus Offset Operand |
| operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{ |
| match(AddP (AddP reg (LShiftI ireg scale)) off); |
| |
| op_cost(10); |
| format %{"[$reg + $off + $ireg << $scale]" %} |
| interface(MEMORY_INTER) %{ |
| base($reg); |
| index($ireg); |
| scale($scale); |
| disp($off); |
| %} |
| %} |
| |
| //----------Load Long Memory Operands------------------------------------------ |
| // The load-long idiom will use it's address expression again after loading |
| // the first word of the long. If the load-long destination overlaps with |
| // registers used in the addressing expression, the 2nd half will be loaded |
| // from a clobbered address. Fix this by requiring that load-long use |
| // address registers that do not overlap with the load-long target. |
| |
| // load-long support |
| operand load_long_RegP() %{ |
| constraint(ALLOC_IN_RC(esi_reg)); |
| match(RegP); |
| match(eSIRegP); |
| op_cost(100); |
| format %{ %} |
| interface(REG_INTER); |
| %} |
| |
| // Indirect Memory Operand Long |
| operand load_long_indirect(load_long_RegP reg) %{ |
| constraint(ALLOC_IN_RC(esi_reg)); |
| match(reg); |
| |
| format %{ "[$reg]" %} |
| interface(MEMORY_INTER) %{ |
| base($reg); |
| index(0x4); |
| scale(0x0); |
| disp(0x0); |
| %} |
| %} |
| |
| // Indirect Memory Plus Long Offset Operand |
| operand load_long_indOffset32(load_long_RegP reg, immI off) %{ |
| match(AddP reg off); |
| |
| format %{ "[$reg + $off]" %} |
| interface(MEMORY_INTER) %{ |
| base($reg); |
| index(0x4); |
| scale(0x0); |
| disp($off); |
| %} |
| %} |
| |
| opclass load_long_memory(load_long_indirect, load_long_indOffset32); |
| |
| |
| //----------Special Memory Operands-------------------------------------------- |
| // Stack Slot Operand - This operand is used for loading and storing temporary |
| // values on the stack where a match requires a value to |
| // flow through memory. |
| operand stackSlotP(sRegP reg) %{ |
| constraint(ALLOC_IN_RC(stack_slots)); |
| // No match rule because this operand is only generated in matching |
| format %{ "[$reg]" %} |
| interface(MEMORY_INTER) %{ |
| base(0x4); // ESP |
| index(0x4); // No Index |
| scale(0x0); // No Scale |
| disp($reg); // Stack Offset |
| %} |
| %} |
| |
| operand stackSlotI(sRegI reg) %{ |
| constraint(ALLOC_IN_RC(stack_slots)); |
| // No match rule because this operand is only generated in matching |
| format %{ "[$reg]" %} |
| interface(MEMORY_INTER) %{ |
| base(0x4); // ESP |
| index(0x4); // No Index |
| scale(0x0); // No Scale |
| disp($reg); // Stack Offset |
| %} |
| %} |
| |
| operand stackSlotF(sRegF reg) %{ |
| constraint(ALLOC_IN_RC(stack_slots)); |
| // No match rule because this operand is only generated in matching |
| format %{ "[$reg]" %} |
| interface(MEMORY_INTER) %{ |
| base(0x4); // ESP |
| index(0x4); // No Index |
| scale(0x0); // No Scale |
| disp($reg); // Stack Offset |
| %} |
| %} |
| |
| operand stackSlotD(sRegD reg) %{ |
| constraint(ALLOC_IN_RC(stack_slots)); |
| // No match rule because this operand is only generated in matching |
| format %{ "[$reg]" %} |
| interface(MEMORY_INTER) %{ |
| base(0x4); // ESP |
| index(0x4); // No Index |
| scale(0x0); // No Scale |
| disp($reg); // Stack Offset |
| %} |
| %} |
| |
| operand stackSlotL(sRegL reg) %{ |
| constraint(ALLOC_IN_RC(stack_slots)); |
| // No match rule because this operand is only generated in matching |
| format %{ "[$reg]" %} |
| interface(MEMORY_INTER) %{ |
| base(0x4); // ESP |
| index(0x4); // No Index |
| scale(0x0); // No Scale |
| disp($reg); // Stack Offset |
| %} |
| %} |
| |
| //----------Memory Operands - Win95 Implicit Null Variants---------------- |
| // Indirect Memory Operand |
| operand indirect_win95_safe(eRegP_no_EBP reg) |
| %{ |
| constraint(ALLOC_IN_RC(int_reg)); |
| match(reg); |
| |
| op_cost(100); |
| format %{ "[$reg]" %} |
| interface(MEMORY_INTER) %{ |
| base($reg); |
| index(0x4); |
| scale(0x0); |
| disp(0x0); |
| %} |
| %} |
| |
| // Indirect Memory Plus Short Offset Operand |
| operand indOffset8_win95_safe(eRegP_no_EBP reg, immI8 off) |
| %{ |
| match(AddP reg off); |
| |
| op_cost(100); |
| format %{ "[$reg + $off]" %} |
| interface(MEMORY_INTER) %{ |
| base($reg); |
| index(0x4); |
| scale(0x0); |
| disp($off); |
| %} |
| %} |
| |
| // Indirect Memory Plus Long Offset Operand |
| operand indOffset32_win95_safe(eRegP_no_EBP reg, immI off) |
| %{ |
| match(AddP reg off); |
| |
| op_cost(100); |
| format %{ "[$reg + $off]" %} |
| interface(MEMORY_INTER) %{ |
| base($reg); |
| index(0x4); |
| scale(0x0); |
| disp($off); |
| %} |
| %} |
| |
| // Indirect Memory Plus Index Register Plus Offset Operand |
| operand indIndexOffset_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI off) |
| %{ |
| match(AddP (AddP reg ireg) off); |
| |
| op_cost(100); |
| format %{"[$reg + $off + $ireg]" %} |
| interface(MEMORY_INTER) %{ |
| base($reg); |
| index($ireg); |
| scale(0x0); |
| disp($off); |
| %} |
| %} |
| |
| // Indirect Memory Times Scale Plus Index Register |
| operand indIndexScale_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI2 scale) |
| %{ |
| match(AddP reg (LShiftI ireg scale)); |
| |
| op_cost(100); |
| format %{"[$reg + $ireg << $scale]" %} |
| interface(MEMORY_INTER) %{ |
| base($reg); |
| index($ireg); |
| scale($scale); |
| disp(0x0); |
| %} |
| %} |
| |
| // Indirect Memory Times Scale Plus Index Register Plus Offset Operand |
| operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, rRegI ireg, immI2 scale) |
| %{ |
| match(AddP (AddP reg (LShiftI ireg scale)) off); |
| |
| op_cost(100); |
| format %{"[$reg + $off + $ireg << $scale]" %} |
| interface(MEMORY_INTER) %{ |
| base($reg); |
| index($ireg); |
| scale($scale); |
| disp($off); |
| %} |
| %} |
| |
| //----------Conditional Branch Operands---------------------------------------- |
| // Comparison Op - This is the operation of the comparison, and is limited to |
| // the following set of codes: |
| // L (<), LE (<=), G (>), GE (>=), E (==), NE (!=) |
| // |
| // Other attributes of the comparison, such as unsignedness, are specified |
| // by the comparison instruction that sets a condition code flags register. |
| // That result is represented by a flags operand whose subtype is appropriate |
| // to the unsignedness (etc.) of the comparison. |
| // |
| // Later, the instruction which matches both the Comparison Op (a Bool) and |
| // the flags (produced by the Cmp) specifies the coding of the comparison op |
| // by matching a specific subtype of Bool operand below, such as cmpOpU. |
| |
| // Comparision Code |
| operand cmpOp() %{ |
| match(Bool); |
| |
| format %{ "" %} |
| interface(COND_INTER) %{ |
| equal(0x4, "e"); |
| not_equal(0x5, "ne"); |
| less(0xC, "l"); |
| greater_equal(0xD, "ge"); |
| less_equal(0xE, "le"); |
| greater(0xF, "g"); |
| overflow(0x0, "o"); |
| no_overflow(0x1, "no"); |
| %} |
| %} |
| |
| // Comparison Code, unsigned compare. Used by FP also, with |
| // C2 (unordered) turned into GT or LT already. The other bits |
| // C0 and C3 are turned into Carry & Zero flags. |
| operand cmpOpU() %{ |
| match(Bool); |
| |
| format %{ "" %} |
| interface(COND_INTER) %{ |
| equal(0x4, "e"); |
| not_equal(0x5, "ne"); |
| less(0x2, "b"); |
| greater_equal(0x3, "nb"); |
| less_equal(0x6, "be"); |
| greater(0x7, "nbe"); |
| overflow(0x0, "o"); |
| no_overflow(0x1, "no"); |
| %} |
| %} |
| |
| // Floating comparisons that don't require any fixup for the unordered case |
| operand cmpOpUCF() %{ |
| match(Bool); |
| predicate(n->as_Bool()->_test._test == BoolTest::lt || |
| n->as_Bool()->_test._test == BoolTest::ge || |
| n->as_Bool()->_test._test == BoolTest::le || |
| n->as_Bool()->_test._test == BoolTest::gt); |
| format %{ "" %} |
| interface(COND_INTER) %{ |
| equal(0x4, "e"); |
| not_equal(0x5, "ne"); |
| less(0x2, "b"); |
| greater_equal(0x3, "nb"); |
| less_equal(0x6, "be"); |
| greater(0x7, "nbe"); |
| overflow(0x0, "o"); |
| no_overflow(0x1, "no"); |
| %} |
| %} |
| |
| |
| // Floating comparisons that can be fixed up with extra conditional jumps |
| operand cmpOpUCF2() %{ |
| match(Bool); |
| predicate(n->as_Bool()->_test._test == BoolTest::ne || |
| n->as_Bool()->_test._test == BoolTest::eq); |
| format %{ "" %} |
| interface(COND_INTER) %{ |
| equal(0x4, "e"); |
| not_equal(0x5, "ne"); |
| less(0x2, "b"); |
| greater_equal(0x3, "nb"); |
| less_equal(0x6, "be"); |
| greater(0x7, "nbe"); |
| overflow(0x0, "o"); |
| no_overflow(0x1, "no"); |
| %} |
| %} |
| |
| // Comparison Code for FP conditional move |
| operand cmpOp_fcmov() %{ |
| match(Bool); |
| |
| predicate(n->as_Bool()->_test._test != BoolTest::overflow && |
| n->as_Bool()->_test._test != BoolTest::no_overflow); |
| format %{ "" %} |
| interface(COND_INTER) %{ |
| equal (0x0C8); |
| not_equal (0x1C8); |
| less (0x0C0); |
| greater_equal(0x1C0); |
| less_equal (0x0D0); |
| greater (0x1D0); |
| overflow(0x0, "o"); // not really supported by the instruction |
| no_overflow(0x1, "no"); // not really supported by the instruction |
| %} |
| %} |
| |
| // Comparision Code used in long compares |
| operand cmpOp_commute() %{ |
| match(Bool); |
| |
| format %{ "" %} |
| interface(COND_INTER) %{ |
| equal(0x4, "e"); |
| not_equal(0x5, "ne"); |
| less(0xF, "g"); |
| greater_equal(0xE, "le"); |
| less_equal(0xD, "ge"); |
| greater(0xC, "l"); |
| overflow(0x0, "o"); |
| no_overflow(0x1, "no"); |
| %} |
| %} |
| |
| //----------OPERAND CLASSES---------------------------------------------------- |
| // Operand Classes are groups of operands that are used as to simplify |
| // instruction definitions by not requiring the AD writer to specify separate |
| // instructions for every form of operand when the instruction accepts |
| // multiple operand types with the same basic encoding and format. The classic |
| // case of this is memory operands. |
| |
| opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset, |
| indIndex, indIndexScale, indIndexScaleOffset); |
| |
| // Long memory operations are encoded in 2 instructions and a +4 offset. |
| // This means some kind of offset is always required and you cannot use |
| // an oop as the offset (done when working on static globals). |
| opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset, |
| indIndex, indIndexScale, indIndexScaleOffset); |
| |
| |
| //----------PIPELINE----------------------------------------------------------- |
| // Rules which define the behavior of the target architectures pipeline. |
| pipeline %{ |
| |
| //----------ATTRIBUTES--------------------------------------------------------- |
| attributes %{ |
| variable_size_instructions; // Fixed size instructions |
| max_instructions_per_bundle = 3; // Up to 3 instructions per bundle |
| instruction_unit_size = 1; // An instruction is 1 bytes long |
| instruction_fetch_unit_size = 16; // The processor fetches one line |
| instruction_fetch_units = 1; // of 16 bytes |
| |
| // List of nop instructions |
| nops( MachNop ); |
| %} |
| |
| //----------RESOURCES---------------------------------------------------------- |
| // Resources are the functional units available to the machine |
| |
| // Generic P2/P3 pipeline |
| // 3 decoders, only D0 handles big operands; a "bundle" is the limit of |
| // 3 instructions decoded per cycle. |
| // 2 load/store ops per cycle, 1 branch, 1 FPU, |
| // 2 ALU op, only ALU0 handles mul/div instructions. |
| resources( D0, D1, D2, DECODE = D0 | D1 | D2, |
| MS0, MS1, MEM = MS0 | MS1, |
| BR, FPU, |
| ALU0, ALU1, ALU = ALU0 | ALU1 ); |
| |
| //----------PIPELINE DESCRIPTION----------------------------------------------- |
| // Pipeline Description specifies the stages in the machine's pipeline |
| |
| // Generic P2/P3 pipeline |
| pipe_desc(S0, S1, S2, S3, S4, S5); |
| |
| //----------PIPELINE CLASSES--------------------------------------------------- |
| // Pipeline Classes describe the stages in which input and output are |
| // referenced by the hardware pipeline. |
| |
| // Naming convention: ialu or fpu |
| // Then: _reg |
| // Then: _reg if there is a 2nd register |
| // Then: _long if it's a pair of instructions implementing a long |
| // Then: _fat if it requires the big decoder |
| // Or: _mem if it requires the big decoder and a memory unit. |
| |
| // Integer ALU reg operation |
| pipe_class ialu_reg(rRegI dst) %{ |
| single_instruction; |
| dst : S4(write); |
| dst : S3(read); |
| DECODE : S0; // any decoder |
| ALU : S3; // any alu |
| %} |
| |
| // Long ALU reg operation |
| pipe_class ialu_reg_long(eRegL dst) %{ |
| instruction_count(2); |
| dst : S4(write); |
| dst : S3(read); |
| DECODE : S0(2); // any 2 decoders |
| ALU : S3(2); // both alus |
| %} |
| |
| // Integer ALU reg operation using big decoder |
| pipe_class ialu_reg_fat(rRegI dst) %{ |
| single_instruction; |
| dst : S4(write); |
| dst : S3(read); |
| D0 : S0; // big decoder only |
| ALU : S3; // any alu |
| %} |
| |
| // Long ALU reg operation using big decoder |
| pipe_class ialu_reg_long_fat(eRegL dst) %{ |
| instruction_count(2); |
| dst : S4(write); |
| dst : S3(read); |
| D0 : S0(2); // big decoder only; twice |
| ALU : S3(2); // any 2 alus |
| %} |
| |
| // Integer ALU reg-reg operation |
| pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{ |
| single_instruction; |
| dst : S4(write); |
| src : S3(read); |
| DECODE : S0; // any decoder |
| ALU : S3; // any alu |
| %} |
| |
| // Long ALU reg-reg operation |
| pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{ |
| instruction_count(2); |
| dst : S4(write); |
| src : S3(read); |
| DECODE : S0(2); // any 2 decoders |
| ALU : S3(2); // both alus |
| %} |
| |
| // Integer ALU reg-reg operation |
| pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{ |
| single_instruction; |
| dst : S4(write); |
| src : S3(read); |
| D0 : S0; // big decoder only |
| ALU : S3; // any alu |
| %} |
| |
| // Long ALU reg-reg operation |
| pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{ |
| instruction_count(2); |
| dst : S4(write); |
| src : S3(read); |
| D0 : S0(2); // big decoder only; twice |
| ALU : S3(2); // both alus |
| %} |
| |
| // Integer ALU reg-mem operation |
| pipe_class ialu_reg_mem(rRegI dst, memory mem) %{ |
| single_instruction; |
| dst : S5(write); |
| mem : S3(read); |
| D0 : S0; // big decoder only |
| ALU : S4; // any alu |
| MEM : S3; // any mem |
| %} |
| |
| // Long ALU reg-mem operation |
| pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{ |
| instruction_count(2); |
| dst : S5(write); |
| mem : S3(read); |
| D0 : S0(2); // big decoder only; twice |
| ALU : S4(2); // any 2 alus |
| MEM : S3(2); // both mems |
| %} |
| |
| // Integer mem operation (prefetch) |
| pipe_class ialu_mem(memory mem) |
| %{ |
| single_instruction; |
| mem : S3(read); |
| D0 : S0; // big decoder only |
| MEM : S3; // any mem |
| %} |
| |
| // Integer Store to Memory |
| pipe_class ialu_mem_reg(memory mem, rRegI src) %{ |
| single_instruction; |
| mem : S3(read); |
| src : S5(read); |
| D0 : S0; // big decoder only |
| ALU : S4; // any alu |
| MEM : S3; |
| %} |
| |
| // Long Store to Memory |
| pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{ |
| instruction_count(2); |
| mem : S3(read); |
| src : S5(read); |
| D0 : S0(2); // big decoder only; twice |
| ALU : S4(2); // any 2 alus |
| MEM : S3(2); // Both mems |
| %} |
| |
| // Integer Store to Memory |
| pipe_class ialu_mem_imm(memory mem) %{ |
| single_instruction; |
| mem : S3(read); |
| D0 : S0; // big decoder only |
| ALU : S4; // any alu |
| MEM : S3; |
| %} |
| |
| // Integer ALU0 reg-reg operation |
| pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{ |
| single_instruction; |
| dst : S4(write); |
| src : S3(read); |
| D0 : S0; // Big decoder only |
| ALU0 : S3; // only alu0 |
| %} |
| |
| // Integer ALU0 reg-mem operation |
| pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{ |
| single_instruction; |
| dst : S5(write); |
| mem : S3(read); |
| D0 : S0; // big decoder only |
| ALU0 : S4; // ALU0 only |
| MEM : S3; // any mem |
| %} |
| |
| // Integer ALU reg-reg operation |
| pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{ |
| single_instruction; |
| cr : S4(write); |
| src1 : S3(read); |
| src2 : S3(read); |
| DECODE : S0; // any decoder |
| ALU : S3; // any alu |
| %} |
| |
| // Integer ALU reg-imm operation |
| pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{ |
| single_instruction; |
| cr : S4(write); |
| src1 : S3(read); |
| DECODE : S0; // any decoder |
| ALU : S3; // any alu |
| %} |
| |
| // Integer ALU reg-mem operation |
| pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{ |
| single_instruction; |
| cr : S4(write); |
| src1 : S3(read); |
| src2 : S3(read); |
| D0 : S0; // big decoder only |
| ALU : S4; // any alu |
| MEM : S3; |
| %} |
| |
| // Conditional move reg-reg |
| pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{ |
| instruction_count(4); |
| y : S4(read); |
| q : S3(read); |
| p : S3(read); |
| DECODE : S0(4); // any decoder |
| %} |
| |
| // Conditional move reg-reg |
| pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{ |
| single_instruction; |
| dst : S4(write); |
| src : S3(read); |
| cr : S3(read); |
| DECODE : S0; // any decoder |
| %} |
| |
| // Conditional move reg-mem |
| pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{ |
| single_instruction; |
| dst : S4(write); |
| src : S3(read); |
| cr : S3(read); |
| DECODE : S0; // any decoder |
| MEM : S3; |
| %} |
| |
| // Conditional move reg-reg long |
| pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{ |
| single_instruction; |
| dst : S4(write); |
| src : S3(read); |
| cr : S3(read); |
| DECODE : S0(2); // any 2 decoders |
| %} |
| |
| // Conditional move double reg-reg |
| pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{ |
| single_instruction; |
| dst : S4(write); |
| src : S3(read); |
| cr : S3(read); |
| DECODE : S0; // any decoder |
| %} |
| |
| // Float reg-reg operation |
| pipe_class fpu_reg(regDPR dst) %{ |
| instruction_count(2); |
| dst : S3(read); |
| DECODE : S0(2); // any 2 decoders |
| FPU : S3; |
| %} |
| |
| // Float reg-reg operation |
| pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{ |
| instruction_count(2); |
| dst : S4(write); |
| src : S3(read); |
| DECODE : S0(2); // any 2 decoders |
| FPU : S3; |
| %} |
| |
| // Float reg-reg operation |
| pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{ |
| instruction_count(3); |
| dst : S4(write); |
| src1 : S3(read); |
| src2 : S3(read); |
| DECODE : S0(3); // any 3 decoders |
| FPU : S3(2); |
| %} |
| |
| // Float reg-reg operation |
| pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{ |
| instruction_count(4); |
| dst : S4(write); |
| src1 : S3(read); |
| src2 : S3(read); |
| src3 : S3(read); |
| DECODE : S0(4); // any 3 decoders |
| FPU : S3(2); |
| %} |
| |
| // Float reg-reg operation |
| pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{ |
| instruction_count(4); |
| dst : S4(write); |
| src1 : S3(read); |
| src2 : S3(read); |
| src3 : S3(read); |
| DECODE : S1(3); // any 3 decoders |
| D0 : S0; // Big decoder only |
| FPU : S3(2); |
| MEM : S3; |
| %} |
| |
| // Float reg-mem operation |
| pipe_class fpu_reg_mem(regDPR dst, memory mem) %{ |
| instruction_count(2); |
| dst : S5(write); |
| mem : S3(read); |
| D0 : S0; // big decoder only |
| DECODE : S1; // any decoder for FPU POP |
| FPU : S4; |
| MEM : S3; // any mem |
| %} |
| |
| // Float reg-mem operation |
| pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{ |
| instruction_count(3); |
| dst : S5(write); |
| src1 : S3(read); |
| mem : S3(read); |
| D0 : S0; // big decoder only |
| DECODE : S1(2); // any decoder for FPU POP |
| FPU : S4; |
| MEM : S3; // any mem |
| %} |
| |
| // Float mem-reg operation |
| pipe_class fpu_mem_reg(memory mem, regDPR src) %{ |
| instruction_count(2); |
| src : S5(read); |
| mem : S3(read); |
| DECODE : S0; // any decoder for FPU PUSH |
| D0 : S1; // big decoder only |
| FPU : S4; |
| MEM : S3; // any mem |
| %} |
| |
| pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{ |
| instruction_count(3); |
| src1 : S3(read); |
| src2 : S3(read); |
| mem : S3(read); |
| DECODE : S0(2); // any decoder for FPU PUSH |
| D0 : S1; // big decoder only |
| FPU : S4; |
| MEM : S3; // any mem |
| %} |
| |
| pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{ |
| instruction_count(3); |
| src1 : S3(read); |
| src2 : S3(read); |
| mem : S4(read); |
| DECODE : S0; // any decoder for FPU PUSH |
| D0 : S0(2); // big decoder only |
| FPU : S4; |
| MEM : S3(2); // any mem |
| %} |
| |
| pipe_class fpu_mem_mem(memory dst, memory src1) %{ |
| instruction_count(2); |
| src1 : S3(read); |
| dst : S4(read); |
| D0 : S0(2); // big decoder only |
| MEM : S3(2); // any mem |
| %} |
| |
| pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{ |
| instruction_count(3); |
| src1 : S3(read); |
| src2 : S3(read); |
| dst : S4(read); |
| D0 : S0(3); // big decoder only |
| FPU : S4; |
| MEM : S3(3); // any mem |
| %} |
| |
| pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{ |
| instruction_count(3); |
| src1 : S4(read); |
| mem : S4(read); |
| DECODE : S0; // any decoder for FPU PUSH |
| D0 : S0(2); // big decoder only |
| FPU : S4; |
| MEM : S3(2); // any mem |
| %} |
| |
| // Float load constant |
| pipe_class fpu_reg_con(regDPR dst) %{ |
| instruction_count(2); |
| dst : S5(write); |
| D0 : S0; // big decoder only for the load |
| DECODE : S1; // any decoder for FPU POP |
| FPU : S4; |
| MEM : S3; // any mem |
| %} |
| |
| // Float load constant |
| pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{ |
| instruction_count(3); |
| dst : S5(write); |
| src : S3(read); |
| D0 : S0; // big decoder only for the load |
| DECODE : S1(2); // any decoder for FPU POP |
| FPU : S4; |
| MEM : S3; // any mem |
| %} |
| |
| // UnConditional branch |
| pipe_class pipe_jmp( label labl ) %{ |
| single_instruction; |
| BR : S3; |
| %} |
| |
| // Conditional branch |
| pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{ |
| single_instruction; |
| cr : S1(read); |
| BR : S3; |
| %} |
| |
| // Allocation idiom |
| pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{ |
| instruction_count(1); force_serialization; |
| fixed_latency(6); |
| heap_ptr : S3(read); |
| DECODE : S0(3); |
| D0 : S2; |
| MEM : S3; |
| ALU : S3(2); |
| dst : S5(write); |
| BR : S5; |
| %} |
| |
| // Generic big/slow expanded idiom |
| pipe_class pipe_slow( ) %{ |
| instruction_count(10); multiple_bundles; force_serialization; |
| fixed_latency(100); |
| D0 : S0(2); |
| MEM : S3(2); |
| %} |
| |
| // The real do-nothing guy |
| pipe_class empty( ) %{ |
| instruction_count(0); |
| %} |
| |
| // Define the class for the Nop node |
| define %{ |
| MachNop = empty; |
| %} |
| |
| %} |
| |
| //----------INSTRUCTIONS------------------------------------------------------- |
| // |
| // match -- States which machine-independent subtree may be replaced |
| // by this instruction. |
| // ins_cost -- The estimated cost of this instruction is used by instruction |
| // selection to identify a minimum cost tree of machine |
| // instructions that matches a tree of machine-independent |
| // instructions. |
| // format -- A string providing the disassembly for this instruction. |
| // The value of an instruction's operand may be inserted |
| // by referring to it with a '$' prefix. |
| // opcode -- Three instruction opcodes may be provided. These are referred |
| // to within an encode class as $primary, $secondary, and $tertiary |
| // respectively. The primary opcode is commonly used to |
| // indicate the type of machine instruction, while secondary |
| // and tertiary are often used for prefix options or addressing |
| // modes. |
| // ins_encode -- A list of encode classes with parameters. The encode class |
| // name must have been defined in an 'enc_class' specification |
| // in the encode section of the architecture description. |
| |
| //----------BSWAP-Instruction-------------------------------------------------- |
| instruct bytes_reverse_int(rRegI dst) %{ |
| match(Set dst (ReverseBytesI dst)); |
| |
| format %{ "BSWAP $dst" %} |
| opcode(0x0F, 0xC8); |
| ins_encode( OpcP, OpcSReg(dst) ); |
| ins_pipe( ialu_reg ); |
| %} |
| |
| instruct bytes_reverse_long(eRegL dst) %{ |
| match(Set dst (ReverseBytesL dst)); |
| |
| format %{ "BSWAP $dst.lo\n\t" |
| "BSWAP $dst.hi\n\t" |
| "XCHG $dst.lo $dst.hi" %} |
| |
| ins_cost(125); |
| ins_encode( bswap_long_bytes(dst) ); |
| ins_pipe( ialu_reg_reg); |
| %} |
| |
| instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{ |
| match(Set dst (ReverseBytesUS dst)); |
| effect(KILL cr); |
| |
| format %{ "BSWAP $dst\n\t" |
| "SHR $dst,16\n\t" %} |
| ins_encode %{ |
| __ bswapl($dst$$Register); |
| __ shrl($dst$$Register, 16); |
| %} |
| ins_pipe( ialu_reg ); |
| %} |
| |
| instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{ |
| match(Set dst (ReverseBytesS dst)); |
| effect(KILL cr); |
| |
| format %{ "BSWAP $dst\n\t" |
| "SAR $dst,16\n\t" %} |
| ins_encode %{ |
| __ bswapl($dst$$Register); |
| __ sarl($dst$$Register, 16); |
| %} |
| ins_pipe( ialu_reg ); |
| %} |
| |
| |
| //---------- Zeros Count Instructions ------------------------------------------ |
| |
| instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ |
| predicate(UseCountLeadingZerosInstruction); |
| match(Set dst (CountLeadingZerosI src)); |
| effect(KILL cr); |
| |
| format %{ "LZCNT $dst, $src\t# count leading zeros (int)" %} |
| ins_encode %{ |
| __ lzcntl($dst$$Register, $src$$Register); |
| %} |
| ins_pipe(ialu_reg); |
| %} |
| |
| instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{ |
| predicate(!UseCountLeadingZerosInstruction); |
| match(Set dst (CountLeadingZerosI src)); |
| effect(KILL cr); |
| |
| format %{ "BSR $dst, $src\t# count leading zeros (int)\n\t" |
| "JNZ skip\n\t" |
| "MOV $dst, -1\n" |
| "skip:\n\t" |
| "NEG $dst\n\t" |
| "ADD $dst, 31" %} |
| ins_encode %{ |
| Register Rdst = $dst$$Register; |
| Register Rsrc = $src$$Register; |
| Label skip; |
| __ bsrl(Rdst, Rsrc); |
| __ jccb(Assembler::notZero, skip); |
| __ movl(Rdst, -1); |
| __ bind(skip); |
| __ negl(Rdst); |
| __ addl(Rdst, BitsPerInt - 1); |
| %} |
| ins_pipe(ialu_reg); |
| %} |
| |
| instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ |
| predicate(UseCountLeadingZerosInstruction); |
| match(Set dst (CountLeadingZerosL src)); |
| effect(TEMP dst, KILL cr); |
| |
| format %{ "LZCNT $dst, $src.hi\t# count leading zeros (long)\n\t" |
| "JNC done\n\t" |
| "LZCNT $dst, $src.lo\n\t" |
| "ADD $dst, 32\n" |
| "done:" %} |
| ins_encode %{ |
| Register Rdst = $dst$$Register; |
| Register Rsrc = $src$$Register; |
| Label done; |
| __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); |
| __ jccb(Assembler::carryClear, done); |
| __ lzcntl(Rdst, Rsrc); |
| __ addl(Rdst, BitsPerInt); |
| __ bind(done); |
| %} |
| ins_pipe(ialu_reg); |
| %} |
| |
| instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{ |
| predicate(!UseCountLeadingZerosInstruction); |
| match(Set dst (CountLeadingZerosL src)); |
| effect(TEMP dst, KILL cr); |
| |
| format %{ "BSR $dst, $src.hi\t# count leading zeros (long)\n\t" |
| "JZ msw_is_zero\n\t" |
| "ADD $dst, 32\n\t" |
| "JMP not_zero\n" |
| "msw_is_zero:\n\t" |
| "BSR $dst, $src.lo\n\t" |
| "JNZ not_zero\n\t" |
| "MOV $dst, -1\n" |
| "not_zero:\n\t" |
| "NEG $dst\n\t" |
| "ADD $dst, 63\n" %} |
| ins_encode %{ |
| Register Rdst = $dst$$Register; |
| Register Rsrc = $src$$Register; |
| Label msw_is_zero; |
| Label not_zero; |
| __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc)); |
| __ jccb(Assembler::zero, msw_is_zero); |
| __ addl(Rdst, BitsPerInt); |
| __ jmpb(not_zero); |
| __ bind(msw_is_zero); |
| __ bsrl(Rdst, Rsrc); |
| __ jccb(Assembler::notZero, not_zero); |
| __ movl(Rdst, -1); |
| __ bind(not_zero); |
| __ negl(Rdst); |
| __ addl(Rdst, BitsPerLong - 1); |
| %} |
| ins_pipe(ialu_reg); |
| %} |
| |
| instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ |
| predicate(UseCountTrailingZerosInstruction); |
| match(Set dst (CountTrailingZerosI src)); |
| effect(KILL cr); |
| |
| format %{ "TZCNT $dst, $src\t# count trailing zeros (int)" %} |
| ins_encode %{ |
| __ tzcntl($dst$$Register, $src$$Register); |
| %} |
| ins_pipe(ialu_reg); |
| %} |
| |
| instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{ |
| predicate(!UseCountTrailingZerosInstruction); |
| match(Set dst (CountTrailingZerosI src)); |
| effect(KILL cr); |
| |
| format %{ "BSF $dst, $src\t# count trailing zeros (int)\n\t" |
| "JNZ done\n\t" |
| "MOV $dst, 32\n" |
| "done:" %} |
| ins_encode %{ |
| Register Rdst = $dst$$Register; |
| Label done; |
| __ bsfl(Rdst, $src$$Register); |
| __ jccb(Assembler::notZero, done); |
| __ movl(Rdst, BitsPerInt); |
| __ bind(done); |
| %} |
| ins_pipe(ialu_reg); |
| %} |
| |
| instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ |
| predicate(UseCountTrailingZerosInstruction); |
| match(Set dst (CountTrailingZerosL src)); |
| effect(TEMP dst, KILL cr); |
| |
| format %{ "TZCNT $dst, $src.lo\t# count trailing zeros (long) \n\t" |
| "JNC done\n\t" |
| "TZCNT $dst, $src.hi\n\t" |
| "ADD $dst, 32\n" |
| "done:" %} |
| ins_encode %{ |
| Register Rdst = $dst$$Register; |
| Register Rsrc = $src$$Register; |
| Label done; |
| __ tzcntl(Rdst, Rsrc); |
| __ jccb(Assembler::carryClear, done); |
| __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); |
| __ addl(Rdst, BitsPerInt); |
| __ bind(done); |
| %} |
| ins_pipe(ialu_reg); |
| %} |
| |
| instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{ |
| predicate(!UseCountTrailingZerosInstruction); |
| match(Set dst (CountTrailingZerosL src)); |
| effect(TEMP dst, KILL cr); |
| |
| format %{ "BSF $dst, $src.lo\t# count trailing zeros (long)\n\t" |
| "JNZ done\n\t" |
| "BSF $dst, $src.hi\n\t" |
| "JNZ msw_not_zero\n\t" |
| "MOV $dst, 32\n" |
| "msw_not_zero:\n\t" |
| "ADD $dst, 32\n" |
| "done:" %} |
| ins_encode %{ |
| Register Rdst = $dst$$Register; |
| Register Rsrc = $src$$Register; |
| Label msw_not_zero; |
| Label done; |
| __ bsfl(Rdst, Rsrc); |
| __ jccb(Assembler::notZero, done); |
| __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc)); |
| __ jccb(Assembler::notZero, msw_not_zero); |
| __ movl(Rdst, BitsPerInt); |
| __ bind(msw_not_zero); |
| __ addl(Rdst, BitsPerInt); |
| __ bind(done); |
| %} |
| ins_pipe(ialu_reg); |
| %} |
| |
| |
| //---------- Population Count Instructions ------------------------------------- |
| |
| instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{ |
| predicate(UsePopCountInstruction); |
| match(Set dst (PopCountI src)); |
| effect(KILL cr); |
| |
| format %{ "POPCNT $dst, $src" %} |
| ins_encode %{ |
| __ popcntl($dst$$Register, $src$$Register); |
| %} |
| ins_pipe(ialu_reg); |
| %} |
| |
| instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{ |
| predicate(UsePopCountInstruction); |
| match(Set dst (PopCountI (LoadI mem))); |
| effect(KILL cr); |
| |
| format %{ "POPCNT $dst, $mem" %} |
| ins_encode %{ |
| __ popcntl($dst$$Register, $mem$$Address); |
| %} |
| ins_pipe(ialu_reg); |
| %} |
| |
| // Note: Long.bitCount(long) returns an int. |
| instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ |
| predicate(UsePopCountInstruction); |
| match(Set dst (PopCountL src)); |
| effect(KILL cr, TEMP tmp, TEMP dst); |
| |
| format %{ "POPCNT $dst, $src.lo\n\t" |
| "POPCNT $tmp, $src.hi\n\t" |
| "ADD $dst, $tmp" %} |
| ins_encode %{ |
| __ popcntl($dst$$Register, $src$$Register); |
| __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); |
| __ addl($dst$$Register, $tmp$$Register); |
| %} |
| ins_pipe(ialu_reg); |
| %} |
| |
| // Note: Long.bitCount(long) returns an int. |
| instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{ |
| predicate(UsePopCountInstruction); |
| match(Set dst (PopCountL (LoadL mem))); |
| effect(KILL cr, TEMP tmp, TEMP dst); |
| |
| format %{ "POPCNT $dst, $mem\n\t" |
| "POPCNT $tmp, $mem+4\n\t" |
| "ADD $dst, $tmp" %} |
| ins_encode %{ |
| //__ popcntl($dst$$Register, $mem$$Address$$first); |
| //__ popcntl($tmp$$Register, $mem$$Address$$second); |
| __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none)); |
| __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none)); |
| __ addl($dst$$Register, $tmp$$Register); |
| %} |
| ins_pipe(ialu_reg); |
| %} |
| |
| |
| //----------Load/Store/Move Instructions--------------------------------------- |
| //----------Load Instructions-------------------------------------------------- |
| // Load Byte (8bit signed) |
| instruct loadB(xRegI dst, memory mem) %{ |
| match(Set dst (LoadB mem)); |
| |
| ins_cost(125); |
| format %{ "MOVSX8 $dst,$mem\t# byte" %} |
| |
| ins_encode %{ |
| __ movsbl($dst$$Register, $mem$$Address); |
| %} |
| |
| ins_pipe(ialu_reg_mem); |
| %} |
| |
| // Load Byte (8bit signed) into Long Register |
| instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{ |
| match(Set dst (ConvI2L (LoadB mem))); |
| effect(KILL cr); |
| |
| ins_cost(375); |
| format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t" |
| "MOV $dst.hi,$dst.lo\n\t" |
| "SAR $dst.hi,7" %} |
| |
| ins_encode %{ |
| __ movsbl($dst$$Register, $mem$$Address); |
| __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. |
| __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended. |
| %} |
| |
| ins_pipe(ialu_reg_mem); |
| %} |
| |
| // Load Unsigned Byte (8bit UNsigned) |
| instruct loadUB(xRegI dst, memory mem) %{ |
| match(Set dst (LoadUB mem)); |
| |
| ins_cost(125); |
| format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %} |
| |
| ins_encode %{ |
| __ movzbl($dst$$Register, $mem$$Address); |
| %} |
| |
| ins_pipe(ialu_reg_mem); |
| %} |
| |
| // Load Unsigned Byte (8 bit UNsigned) into Long Register |
| instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{ |
| match(Set dst (ConvI2L (LoadUB mem))); |
| effect(KILL cr); |
| |
| ins_cost(250); |
| format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t" |
| "XOR $dst.hi,$dst.hi" %} |
| |
| ins_encode %{ |
| Register Rdst = $dst$$Register; |
| __ movzbl(Rdst, $mem$$Address); |
| __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); |
| %} |
| |
| ins_pipe(ialu_reg_mem); |
| %} |
| |
| // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register |
| instruct loadUB2L_immI8(eRegL dst, memory mem, immI8 mask, eFlagsReg cr) %{ |
| match(Set dst (ConvI2L (AndI (LoadUB mem) mask))); |
| effect(KILL cr); |
| |
| format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 8-bit mask -> long\n\t" |
| "XOR $dst.hi,$dst.hi\n\t" |
| "AND $dst.lo,$mask" %} |
| ins_encode %{ |
| Register Rdst = $dst$$Register; |
| __ movzbl(Rdst, $mem$$Address); |
| __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); |
| __ andl(Rdst, $mask$$constant); |
| %} |
| ins_pipe(ialu_reg_mem); |
| %} |
| |
| // Load Short (16bit signed) |
| instruct loadS(rRegI dst, memory mem) %{ |
| match(Set dst (LoadS mem)); |
| |
| ins_cost(125); |
| format %{ "MOVSX $dst,$mem\t# short" %} |
| |
| ins_encode %{ |
| __ movswl($dst$$Register, $mem$$Address); |
| %} |
| |
| ins_pipe(ialu_reg_mem); |
| %} |
| |
| // Load Short (16 bit signed) to Byte (8 bit signed) |
| instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ |
| match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour)); |
| |
| ins_cost(125); |
| format %{ "MOVSX $dst, $mem\t# short -> byte" %} |
| ins_encode %{ |
| __ movsbl($dst$$Register, $mem$$Address); |
| %} |
| ins_pipe(ialu_reg_mem); |
| %} |
| |
| // Load Short (16bit signed) into Long Register |
| instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{ |
| match(Set dst (ConvI2L (LoadS mem))); |
| effect(KILL cr); |
| |
| ins_cost(375); |
| format %{ "MOVSX $dst.lo,$mem\t# short -> long\n\t" |
| "MOV $dst.hi,$dst.lo\n\t" |
| "SAR $dst.hi,15" %} |
| |
| ins_encode %{ |
| __ movswl($dst$$Register, $mem$$Address); |
| __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. |
| __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended. |
| %} |
| |
| ins_pipe(ialu_reg_mem); |
| %} |
| |
| // Load Unsigned Short/Char (16bit unsigned) |
| instruct loadUS(rRegI dst, memory mem) %{ |
| match(Set dst (LoadUS mem)); |
| |
| ins_cost(125); |
| format %{ "MOVZX $dst,$mem\t# ushort/char -> int" %} |
| |
| ins_encode %{ |
| __ movzwl($dst$$Register, $mem$$Address); |
| %} |
| |
| ins_pipe(ialu_reg_mem); |
| %} |
| |
| // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed) |
| instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ |
| match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour)); |
| |
| ins_cost(125); |
| format %{ "MOVSX $dst, $mem\t# ushort -> byte" %} |
| ins_encode %{ |
| __ movsbl($dst$$Register, $mem$$Address); |
| %} |
| ins_pipe(ialu_reg_mem); |
| %} |
| |
| // Load Unsigned Short/Char (16 bit UNsigned) into Long Register |
| instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{ |
| match(Set dst (ConvI2L (LoadUS mem))); |
| effect(KILL cr); |
| |
| ins_cost(250); |
| format %{ "MOVZX $dst.lo,$mem\t# ushort/char -> long\n\t" |
| "XOR $dst.hi,$dst.hi" %} |
| |
| ins_encode %{ |
| __ movzwl($dst$$Register, $mem$$Address); |
| __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); |
| %} |
| |
| ins_pipe(ialu_reg_mem); |
| %} |
| |
| // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register |
| instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ |
| match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); |
| effect(KILL cr); |
| |
| format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t" |
| "XOR $dst.hi,$dst.hi" %} |
| ins_encode %{ |
| Register Rdst = $dst$$Register; |
| __ movzbl(Rdst, $mem$$Address); |
| __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); |
| %} |
| ins_pipe(ialu_reg_mem); |
| %} |
| |
| // Load Unsigned Short/Char (16 bit UNsigned) with a 16-bit mask into Long Register |
| instruct loadUS2L_immI16(eRegL dst, memory mem, immI16 mask, eFlagsReg cr) %{ |
| match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); |
| effect(KILL cr); |
| |
| format %{ "MOVZX $dst.lo, $mem\t# ushort/char & 16-bit mask -> long\n\t" |
| "XOR $dst.hi,$dst.hi\n\t" |
| "AND $dst.lo,$mask" %} |
| ins_encode %{ |
| Register Rdst = $dst$$Register; |
| __ movzwl(Rdst, $mem$$Address); |
| __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); |
| __ andl(Rdst, $mask$$constant); |
| %} |
| ins_pipe(ialu_reg_mem); |
| %} |
| |
| // Load Integer |
| instruct loadI(rRegI dst, memory mem) %{ |
| match(Set dst (LoadI mem)); |
| |
| ins_cost(125); |
| format %{ "MOV $dst,$mem\t# int" %} |
| |
| ins_encode %{ |
| __ movl($dst$$Register, $mem$$Address); |
| %} |
| |
| ins_pipe(ialu_reg_mem); |
| %} |
| |
| // Load Integer (32 bit signed) to Byte (8 bit signed) |
| instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{ |
| match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour)); |
| |
| ins_cost(125); |
| format %{ "MOVSX $dst, $mem\t# int -> byte" %} |
| ins_encode %{ |
| __ movsbl($dst$$Register, $mem$$Address); |
| %} |
| ins_pipe(ialu_reg_mem); |
| %} |
| |
| // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned) |
| instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{ |
| match(Set dst (AndI (LoadI mem) mask)); |
| |
| ins_cost(125); |
| format %{ "MOVZX $dst, $mem\t# int -> ubyte" %} |
| ins_encode %{ |
| __ movzbl($dst$$Register, $mem$$Address); |
| %} |
| ins_pipe(ialu_reg_mem); |
| %} |
| |
| // Load Integer (32 bit signed) to Short (16 bit signed) |
| instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{ |
| match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen)); |
| |
| ins_cost(125); |
| format %{ "MOVSX $dst, $mem\t# int -> short" %} |
| ins_encode %{ |
| __ movswl($dst$$Register, $mem$$Address); |
| %} |
| ins_pipe(ialu_reg_mem); |
| %} |
| |
| // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned) |
| instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{ |
| match(Set dst (AndI (LoadI mem) mask)); |
| |
| ins_cost(125); |
| format %{ "MOVZX $dst, $mem\t# int -> ushort/char" %} |
| ins_encode %{ |
| __ movzwl($dst$$Register, $mem$$Address); |
| %} |
| ins_pipe(ialu_reg_mem); |
| %} |
| |
| // Load Integer into Long Register |
| instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{ |
| match(Set dst (ConvI2L (LoadI mem))); |
| effect(KILL cr); |
| |
| ins_cost(375); |
| format %{ "MOV $dst.lo,$mem\t# int -> long\n\t" |
| "MOV $dst.hi,$dst.lo\n\t" |
| "SAR $dst.hi,31" %} |
| |
| ins_encode %{ |
| __ movl($dst$$Register, $mem$$Address); |
| __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. |
| __ sarl(HIGH_FROM_LOW($dst$$Register), 31); |
| %} |
| |
| ins_pipe(ialu_reg_mem); |
| %} |
| |
| // Load Integer with mask 0xFF into Long Register |
| instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ |
| match(Set dst (ConvI2L (AndI (LoadI mem) mask))); |
| effect(KILL cr); |
| |
| format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t" |
| "XOR $dst.hi,$dst.hi" %} |
| ins_encode %{ |
| Register Rdst = $dst$$Register; |
| __ movzbl(Rdst, $mem$$Address); |
| __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); |
| %} |
| ins_pipe(ialu_reg_mem); |
| %} |
| |
| // Load Integer with mask 0xFFFF into Long Register |
| instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{ |
| match(Set dst (ConvI2L (AndI (LoadI mem) mask))); |
| effect(KILL cr); |
| |
| format %{ "MOVZX $dst.lo,$mem\t# int & 0xFFFF -> long\n\t" |
| "XOR $dst.hi,$dst.hi" %} |
| ins_encode %{ |
| Register Rdst = $dst$$Register; |
| __ movzwl(Rdst, $mem$$Address); |
| __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); |
| %} |
| ins_pipe(ialu_reg_mem); |
| %} |
| |
| // Load Integer with 31-bit mask into Long Register |
| instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{ |
| match(Set dst (ConvI2L (AndI (LoadI mem) mask))); |
| effect(KILL cr); |
| |
| format %{ "MOV $dst.lo,$mem\t# int & 31-bit mask -> long\n\t" |
| "XOR $dst.hi,$dst.hi\n\t" |
| "AND $dst.lo,$mask" %} |
| ins_encode %{ |
| Register Rdst = $dst$$Register; |
| __ movl(Rdst, $mem$$Address); |
| __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); |
| __ andl(Rdst, $mask$$constant); |
| %} |
| ins_pipe(ialu_reg_mem); |
| %} |
| |
| // Load Unsigned Integer into Long Register |
| instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{ |
| match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); |
| effect(KILL cr); |
| |
| ins_cost(250); |
| format %{ "MOV $dst.lo,$mem\t# uint -> long\n\t" |
| "XOR $dst.hi,$dst.hi" %} |
| |
| ins_encode %{ |
| __ movl($dst$$Register, $mem$$Address); |
| __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); |
| %} |
| |
| ins_pipe(ialu_reg_mem); |
| %} |
| |
| // Load Long. Cannot clobber address while loading, so restrict address |
| // register to ESI |
| instruct loadL(eRegL dst, load_long_memory mem) %{ |
| predicate(!((LoadLNode*)n)->require_atomic_access()); |
| match(Set dst (LoadL mem)); |
| |
| ins_cost(250); |
| format %{ "MOV $dst.lo,$mem\t# long\n\t" |
| "MOV $dst.hi,$mem+4" %} |
| |
| ins_encode %{ |
| Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); |
| Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none); |
| __ movl($dst$$Register, Amemlo); |
| __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi); |
| %} |
| |
| ins_pipe(ialu_reg_long_mem); |
| %} |
| |
| // Volatile Load Long. Must be atomic, so do 64-bit FILD |
| // then store it down to the stack and reload on the int |
| // side. |
| instruct loadL_volatile(stackSlotL dst, memory mem) %{ |
| predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access()); |
| match(Set dst (LoadL mem)); |
| |
| ins_cost(200); |
| format %{ "FILD $mem\t# Atomic volatile long load\n\t" |
| "FISTp $dst" %} |
| ins_encode(enc_loadL_volatile(mem,dst)); |
| ins_pipe( fpu_reg_mem ); |
| %} |
| |
| instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{ |
| predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); |
| match(Set dst (LoadL mem)); |
| effect(TEMP tmp); |
| ins_cost(180); |
| format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" |
| "MOVSD $dst,$tmp" %} |
| ins_encode %{ |
| __ movdbl($tmp$$XMMRegister, $mem$$Address); |
| __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{ |
| predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); |
| match(Set dst (LoadL mem)); |
| effect(TEMP tmp); |
| ins_cost(160); |
| format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" |
| "MOVD $dst.lo,$tmp\n\t" |
| "PSRLQ $tmp,32\n\t" |
| "MOVD $dst.hi,$tmp" %} |
| ins_encode %{ |
| __ movdbl($tmp$$XMMRegister, $mem$$Address); |
| __ movdl($dst$$Register, $tmp$$XMMRegister); |
| __ psrlq($tmp$$XMMRegister, 32); |
| __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Load Range |
| instruct loadRange(rRegI dst, memory mem) %{ |
| match(Set dst (LoadRange mem)); |
| |
| ins_cost(125); |
| format %{ "MOV $dst,$mem" %} |
| opcode(0x8B); |
| ins_encode( OpcP, RegMem(dst,mem)); |
| ins_pipe( ialu_reg_mem ); |
| %} |
| |
| |
| // Load Pointer |
| instruct loadP(eRegP dst, memory mem) %{ |
| match(Set dst (LoadP mem)); |
| |
| ins_cost(125); |
| format %{ "MOV $dst,$mem" %} |
| opcode(0x8B); |
| ins_encode( OpcP, RegMem(dst,mem)); |
| ins_pipe( ialu_reg_mem ); |
| %} |
| |
| // Load Klass Pointer |
| instruct loadKlass(eRegP dst, memory mem) %{ |
| match(Set dst (LoadKlass mem)); |
| |
| ins_cost(125); |
| format %{ "MOV $dst,$mem" %} |
| opcode(0x8B); |
| ins_encode( OpcP, RegMem(dst,mem)); |
| ins_pipe( ialu_reg_mem ); |
| %} |
| |
| // Load Double |
| instruct loadDPR(regDPR dst, memory mem) %{ |
| predicate(UseSSE<=1); |
| match(Set dst (LoadD mem)); |
| |
| ins_cost(150); |
| format %{ "FLD_D ST,$mem\n\t" |
| "FSTP $dst" %} |
| opcode(0xDD); /* DD /0 */ |
| ins_encode( OpcP, RMopc_Mem(0x00,mem), |
| Pop_Reg_DPR(dst) ); |
| ins_pipe( fpu_reg_mem ); |
| %} |
| |
| // Load Double to XMM |
| instruct loadD(regD dst, memory mem) %{ |
| predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); |
| match(Set dst (LoadD mem)); |
| ins_cost(145); |
| format %{ "MOVSD $dst,$mem" %} |
| ins_encode %{ |
| __ movdbl ($dst$$XMMRegister, $mem$$Address); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct loadD_partial(regD dst, memory mem) %{ |
| predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); |
| match(Set dst (LoadD mem)); |
| ins_cost(145); |
| format %{ "MOVLPD $dst,$mem" %} |
| ins_encode %{ |
| __ movdbl ($dst$$XMMRegister, $mem$$Address); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Load to XMM register (single-precision floating point) |
| // MOVSS instruction |
| instruct loadF(regF dst, memory mem) %{ |
| predicate(UseSSE>=1); |
| match(Set dst (LoadF mem)); |
| ins_cost(145); |
| format %{ "MOVSS $dst,$mem" %} |
| ins_encode %{ |
| __ movflt ($dst$$XMMRegister, $mem$$Address); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Load Float |
| instruct loadFPR(regFPR dst, memory mem) %{ |
| predicate(UseSSE==0); |
| match(Set dst (LoadF mem)); |
| |
| ins_cost(150); |
| format %{ "FLD_S ST,$mem\n\t" |
| "FSTP $dst" %} |
| opcode(0xD9); /* D9 /0 */ |
| ins_encode( OpcP, RMopc_Mem(0x00,mem), |
| Pop_Reg_FPR(dst) ); |
| ins_pipe( fpu_reg_mem ); |
| %} |
| |
| // Load Effective Address |
| instruct leaP8(eRegP dst, indOffset8 mem) %{ |
| match(Set dst mem); |
| |
| ins_cost(110); |
| format %{ "LEA $dst,$mem" %} |
| opcode(0x8D); |
| ins_encode( OpcP, RegMem(dst,mem)); |
| ins_pipe( ialu_reg_reg_fat ); |
| %} |
| |
| instruct leaP32(eRegP dst, indOffset32 mem) %{ |
| match(Set dst mem); |
| |
| ins_cost(110); |
| format %{ "LEA $dst,$mem" %} |
| opcode(0x8D); |
| ins_encode( OpcP, RegMem(dst,mem)); |
| ins_pipe( ialu_reg_reg_fat ); |
| %} |
| |
| instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{ |
| match(Set dst mem); |
| |
| ins_cost(110); |
| format %{ "LEA $dst,$mem" %} |
| opcode(0x8D); |
| ins_encode( OpcP, RegMem(dst,mem)); |
| ins_pipe( ialu_reg_reg_fat ); |
| %} |
| |
| instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{ |
| match(Set dst mem); |
| |
| ins_cost(110); |
| format %{ "LEA $dst,$mem" %} |
| opcode(0x8D); |
| ins_encode( OpcP, RegMem(dst,mem)); |
| ins_pipe( ialu_reg_reg_fat ); |
| %} |
| |
| instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{ |
| match(Set dst mem); |
| |
| ins_cost(110); |
| format %{ "LEA $dst,$mem" %} |
| opcode(0x8D); |
| ins_encode( OpcP, RegMem(dst,mem)); |
| ins_pipe( ialu_reg_reg_fat ); |
| %} |
| |
| // Load Constant |
| instruct loadConI(rRegI dst, immI src) %{ |
| match(Set dst src); |
| |
| format %{ "MOV $dst,$src" %} |
| ins_encode( LdImmI(dst, src) ); |
| ins_pipe( ialu_reg_fat ); |
| %} |
| |
| // Load Constant zero |
| instruct loadConI0(rRegI dst, immI0 src, eFlagsReg cr) %{ |
| match(Set dst src); |
| effect(KILL cr); |
| |
| ins_cost(50); |
| format %{ "XOR $dst,$dst" %} |
| opcode(0x33); /* + rd */ |
| ins_encode( OpcP, RegReg( dst, dst ) ); |
| ins_pipe( ialu_reg ); |
| %} |
| |
| instruct loadConP(eRegP dst, immP src) %{ |
| match(Set dst src); |
| |
| format %{ "MOV $dst,$src" %} |
| opcode(0xB8); /* + rd */ |
| ins_encode( LdImmP(dst, src) ); |
| ins_pipe( ialu_reg_fat ); |
| %} |
| |
| instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{ |
| match(Set dst src); |
| effect(KILL cr); |
| ins_cost(200); |
| format %{ "MOV $dst.lo,$src.lo\n\t" |
| "MOV $dst.hi,$src.hi" %} |
| opcode(0xB8); |
| ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) ); |
| ins_pipe( ialu_reg_long_fat ); |
| %} |
| |
| instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{ |
| match(Set dst src); |
| effect(KILL cr); |
| ins_cost(150); |
| format %{ "XOR $dst.lo,$dst.lo\n\t" |
| "XOR $dst.hi,$dst.hi" %} |
| opcode(0x33,0x33); |
| ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) ); |
| ins_pipe( ialu_reg_long ); |
| %} |
| |
| // The instruction usage is guarded by predicate in operand immFPR(). |
| instruct loadConFPR(regFPR dst, immFPR con) %{ |
| match(Set dst con); |
| ins_cost(125); |
| format %{ "FLD_S ST,[$constantaddress]\t# load from constant table: float=$con\n\t" |
| "FSTP $dst" %} |
| ins_encode %{ |
| __ fld_s($constantaddress($con)); |
| __ fstp_d($dst$$reg); |
| %} |
| ins_pipe(fpu_reg_con); |
| %} |
| |
| // The instruction usage is guarded by predicate in operand immFPR0(). |
| instruct loadConFPR0(regFPR dst, immFPR0 con) %{ |
| match(Set dst con); |
| ins_cost(125); |
| format %{ "FLDZ ST\n\t" |
| "FSTP $dst" %} |
| ins_encode %{ |
| __ fldz(); |
| __ fstp_d($dst$$reg); |
| %} |
| ins_pipe(fpu_reg_con); |
| %} |
| |
| // The instruction usage is guarded by predicate in operand immFPR1(). |
| instruct loadConFPR1(regFPR dst, immFPR1 con) %{ |
| match(Set dst con); |
| ins_cost(125); |
| format %{ "FLD1 ST\n\t" |
| "FSTP $dst" %} |
| ins_encode %{ |
| __ fld1(); |
| __ fstp_d($dst$$reg); |
| %} |
| ins_pipe(fpu_reg_con); |
| %} |
| |
| // The instruction usage is guarded by predicate in operand immF(). |
| instruct loadConF(regF dst, immF con) %{ |
| match(Set dst con); |
| ins_cost(125); |
| format %{ "MOVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %} |
| ins_encode %{ |
| __ movflt($dst$$XMMRegister, $constantaddress($con)); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| // The instruction usage is guarded by predicate in operand immF0(). |
| instruct loadConF0(regF dst, immF0 src) %{ |
| match(Set dst src); |
| ins_cost(100); |
| format %{ "XORPS $dst,$dst\t# float 0.0" %} |
| ins_encode %{ |
| __ xorps($dst$$XMMRegister, $dst$$XMMRegister); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| // The instruction usage is guarded by predicate in operand immDPR(). |
| instruct loadConDPR(regDPR dst, immDPR con) %{ |
| match(Set dst con); |
| ins_cost(125); |
| |
| format %{ "FLD_D ST,[$constantaddress]\t# load from constant table: double=$con\n\t" |
| "FSTP $dst" %} |
| ins_encode %{ |
| __ fld_d($constantaddress($con)); |
| __ fstp_d($dst$$reg); |
| %} |
| ins_pipe(fpu_reg_con); |
| %} |
| |
| // The instruction usage is guarded by predicate in operand immDPR0(). |
| instruct loadConDPR0(regDPR dst, immDPR0 con) %{ |
| match(Set dst con); |
| ins_cost(125); |
| |
| format %{ "FLDZ ST\n\t" |
| "FSTP $dst" %} |
| ins_encode %{ |
| __ fldz(); |
| __ fstp_d($dst$$reg); |
| %} |
| ins_pipe(fpu_reg_con); |
| %} |
| |
| // The instruction usage is guarded by predicate in operand immDPR1(). |
| instruct loadConDPR1(regDPR dst, immDPR1 con) %{ |
| match(Set dst con); |
| ins_cost(125); |
| |
| format %{ "FLD1 ST\n\t" |
| "FSTP $dst" %} |
| ins_encode %{ |
| __ fld1(); |
| __ fstp_d($dst$$reg); |
| %} |
| ins_pipe(fpu_reg_con); |
| %} |
| |
| // The instruction usage is guarded by predicate in operand immD(). |
| instruct loadConD(regD dst, immD con) %{ |
| match(Set dst con); |
| ins_cost(125); |
| format %{ "MOVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %} |
| ins_encode %{ |
| __ movdbl($dst$$XMMRegister, $constantaddress($con)); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| // The instruction usage is guarded by predicate in operand immD0(). |
| instruct loadConD0(regD dst, immD0 src) %{ |
| match(Set dst src); |
| ins_cost(100); |
| format %{ "XORPD $dst,$dst\t# double 0.0" %} |
| ins_encode %{ |
| __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Load Stack Slot |
| instruct loadSSI(rRegI dst, stackSlotI src) %{ |
| match(Set dst src); |
| ins_cost(125); |
| |
| format %{ "MOV $dst,$src" %} |
| opcode(0x8B); |
| ins_encode( OpcP, RegMem(dst,src)); |
| ins_pipe( ialu_reg_mem ); |
| %} |
| |
| instruct loadSSL(eRegL dst, stackSlotL src) %{ |
| match(Set dst src); |
| |
| ins_cost(200); |
| format %{ "MOV $dst,$src.lo\n\t" |
| "MOV $dst+4,$src.hi" %} |
| opcode(0x8B, 0x8B); |
| ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) ); |
| ins_pipe( ialu_mem_long_reg ); |
| %} |
| |
| // Load Stack Slot |
| instruct loadSSP(eRegP dst, stackSlotP src) %{ |
| match(Set dst src); |
| ins_cost(125); |
| |
| format %{ "MOV $dst,$src" %} |
| opcode(0x8B); |
| ins_encode( OpcP, RegMem(dst,src)); |
| ins_pipe( ialu_reg_mem ); |
| %} |
| |
| // Load Stack Slot |
| instruct loadSSF(regFPR dst, stackSlotF src) %{ |
| match(Set dst src); |
| ins_cost(125); |
| |
| format %{ "FLD_S $src\n\t" |
| "FSTP $dst" %} |
| opcode(0xD9); /* D9 /0, FLD m32real */ |
| ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), |
| Pop_Reg_FPR(dst) ); |
| ins_pipe( fpu_reg_mem ); |
| %} |
| |
| // Load Stack Slot |
| instruct loadSSD(regDPR dst, stackSlotD src) %{ |
| match(Set dst src); |
| ins_cost(125); |
| |
| format %{ "FLD_D $src\n\t" |
| "FSTP $dst" %} |
| opcode(0xDD); /* DD /0, FLD m64real */ |
| ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), |
| Pop_Reg_DPR(dst) ); |
| ins_pipe( fpu_reg_mem ); |
| %} |
| |
| // Prefetch instructions. |
| // Must be safe to execute with invalid address (cannot fault). |
| |
| instruct prefetchr0( memory mem ) %{ |
| predicate(UseSSE==0 && !VM_Version::supports_3dnow_prefetch()); |
| match(PrefetchRead mem); |
| ins_cost(0); |
| size(0); |
| format %{ "PREFETCHR (non-SSE is empty encoding)" %} |
| ins_encode(); |
| ins_pipe(empty); |
| %} |
| |
| instruct prefetchr( memory mem ) %{ |
| predicate(UseSSE==0 && VM_Version::supports_3dnow_prefetch() || ReadPrefetchInstr==3); |
| match(PrefetchRead mem); |
| ins_cost(100); |
| |
| format %{ "PREFETCHR $mem\t! Prefetch into level 1 cache for read" %} |
| ins_encode %{ |
| __ prefetchr($mem$$Address); |
| %} |
| ins_pipe(ialu_mem); |
| %} |
| |
| instruct prefetchrNTA( memory mem ) %{ |
| predicate(UseSSE>=1 && ReadPrefetchInstr==0); |
| match(PrefetchRead mem); |
| ins_cost(100); |
| |
| format %{ "PREFETCHNTA $mem\t! Prefetch into non-temporal cache for read" %} |
| ins_encode %{ |
| __ prefetchnta($mem$$Address); |
| %} |
| ins_pipe(ialu_mem); |
| %} |
| |
| instruct prefetchrT0( memory mem ) %{ |
| predicate(UseSSE>=1 && ReadPrefetchInstr==1); |
| match(PrefetchRead mem); |
| ins_cost(100); |
| |
| format %{ "PREFETCHT0 $mem\t! Prefetch into L1 and L2 caches for read" %} |
| ins_encode %{ |
| __ prefetcht0($mem$$Address); |
| %} |
| ins_pipe(ialu_mem); |
| %} |
| |
| instruct prefetchrT2( memory mem ) %{ |
| predicate(UseSSE>=1 && ReadPrefetchInstr==2); |
| match(PrefetchRead mem); |
| ins_cost(100); |
| |
| format %{ "PREFETCHT2 $mem\t! Prefetch into L2 cache for read" %} |
| ins_encode %{ |
| __ prefetcht2($mem$$Address); |
| %} |
| ins_pipe(ialu_mem); |
| %} |
| |
| instruct prefetchw0( memory mem ) %{ |
| predicate(UseSSE==0 && !VM_Version::supports_3dnow_prefetch()); |
| match(PrefetchWrite mem); |
| ins_cost(0); |
| size(0); |
| format %{ "Prefetch (non-SSE is empty encoding)" %} |
| ins_encode(); |
| ins_pipe(empty); |
| %} |
| |
| instruct prefetchw( memory mem ) %{ |
| predicate(UseSSE==0 && VM_Version::supports_3dnow_prefetch()); |
| match( PrefetchWrite mem ); |
| ins_cost(100); |
| |
| format %{ "PREFETCHW $mem\t! Prefetch into L1 cache and mark modified" %} |
| ins_encode %{ |
| __ prefetchw($mem$$Address); |
| %} |
| ins_pipe(ialu_mem); |
| %} |
| |
| instruct prefetchwNTA( memory mem ) %{ |
| predicate(UseSSE>=1); |
| match(PrefetchWrite mem); |
| ins_cost(100); |
| |
| format %{ "PREFETCHNTA $mem\t! Prefetch into non-temporal cache for write" %} |
| ins_encode %{ |
| __ prefetchnta($mem$$Address); |
| %} |
| ins_pipe(ialu_mem); |
| %} |
| |
| // Prefetch instructions for allocation. |
| |
| instruct prefetchAlloc0( memory mem ) %{ |
| predicate(UseSSE==0 && AllocatePrefetchInstr!=3); |
| match(PrefetchAllocation mem); |
| ins_cost(0); |
| size(0); |
| format %{ "Prefetch allocation (non-SSE is empty encoding)" %} |
| ins_encode(); |
| ins_pipe(empty); |
| %} |
| |
| instruct prefetchAlloc( memory mem ) %{ |
| predicate(AllocatePrefetchInstr==3); |
| match( PrefetchAllocation mem ); |
| ins_cost(100); |
| |
| format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %} |
| ins_encode %{ |
| __ prefetchw($mem$$Address); |
| %} |
| ins_pipe(ialu_mem); |
| %} |
| |
| instruct prefetchAllocNTA( memory mem ) %{ |
| predicate(UseSSE>=1 && AllocatePrefetchInstr==0); |
| match(PrefetchAllocation mem); |
| ins_cost(100); |
| |
| format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %} |
| ins_encode %{ |
| __ prefetchnta($mem$$Address); |
| %} |
| ins_pipe(ialu_mem); |
| %} |
| |
| instruct prefetchAllocT0( memory mem ) %{ |
| predicate(UseSSE>=1 && AllocatePrefetchInstr==1); |
| match(PrefetchAllocation mem); |
| ins_cost(100); |
| |
| format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %} |
| ins_encode %{ |
| __ prefetcht0($mem$$Address); |
| %} |
| ins_pipe(ialu_mem); |
| %} |
| |
| instruct prefetchAllocT2( memory mem ) %{ |
| predicate(UseSSE>=1 && AllocatePrefetchInstr==2); |
| match(PrefetchAllocation mem); |
| ins_cost(100); |
| |
| format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %} |
| ins_encode %{ |
| __ prefetcht2($mem$$Address); |
| %} |
| ins_pipe(ialu_mem); |
| %} |
| |
| //----------Store Instructions------------------------------------------------- |
| |
| // Store Byte |
| instruct storeB(memory mem, xRegI src) %{ |
| match(Set mem (StoreB mem src)); |
| |
| ins_cost(125); |
| format %{ "MOV8 $mem,$src" %} |
| opcode(0x88); |
| ins_encode( OpcP, RegMem( src, mem ) ); |
| ins_pipe( ialu_mem_reg ); |
| %} |
| |
| // Store Char/Short |
| instruct storeC(memory mem, rRegI src) %{ |
| match(Set mem (StoreC mem src)); |
| |
| ins_cost(125); |
| format %{ "MOV16 $mem,$src" %} |
| opcode(0x89, 0x66); |
| ins_encode( OpcS, OpcP, RegMem( src, mem ) ); |
| ins_pipe( ialu_mem_reg ); |
| %} |
| |
| // Store Integer |
| instruct storeI(memory mem, rRegI src) %{ |
| match(Set mem (StoreI mem src)); |
| |
| ins_cost(125); |
| format %{ "MOV $mem,$src" %} |
| opcode(0x89); |
| ins_encode( OpcP, RegMem( src, mem ) ); |
| ins_pipe( ialu_mem_reg ); |
| %} |
| |
| // Store Long |
| instruct storeL(long_memory mem, eRegL src) %{ |
| predicate(!((StoreLNode*)n)->require_atomic_access()); |
| match(Set mem (StoreL mem src)); |
| |
| ins_cost(200); |
| format %{ "MOV $mem,$src.lo\n\t" |
| "MOV $mem+4,$src.hi" %} |
| opcode(0x89, 0x89); |
| ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) ); |
| ins_pipe( ialu_mem_long_reg ); |
| %} |
| |
| // Store Long to Integer |
| instruct storeL2I(memory mem, eRegL src) %{ |
| match(Set mem (StoreI mem (ConvL2I src))); |
| |
| format %{ "MOV $mem,$src.lo\t# long -> int" %} |
| ins_encode %{ |
| __ movl($mem$$Address, $src$$Register); |
| %} |
| ins_pipe(ialu_mem_reg); |
| %} |
| |
| // Volatile Store Long. Must be atomic, so move it into |
| // the FP TOS and then do a 64-bit FIST. Has to probe the |
| // target address before the store (for null-ptr checks) |
| // so the memory operand is used twice in the encoding. |
| instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{ |
| predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access()); |
| match(Set mem (StoreL mem src)); |
| effect( KILL cr ); |
| ins_cost(400); |
| format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" |
| "FILD $src\n\t" |
| "FISTp $mem\t # 64-bit atomic volatile long store" %} |
| opcode(0x3B); |
| ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src)); |
| ins_pipe( fpu_reg_mem ); |
| %} |
| |
| instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{ |
| predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); |
| match(Set mem (StoreL mem src)); |
| effect( TEMP tmp, KILL cr ); |
| ins_cost(380); |
| format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" |
| "MOVSD $tmp,$src\n\t" |
| "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} |
| ins_encode %{ |
| __ cmpl(rax, $mem$$Address); |
| __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp)); |
| __ movdbl($mem$$Address, $tmp$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{ |
| predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); |
| match(Set mem (StoreL mem src)); |
| effect( TEMP tmp2 , TEMP tmp, KILL cr ); |
| ins_cost(360); |
| format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" |
| "MOVD $tmp,$src.lo\n\t" |
| "MOVD $tmp2,$src.hi\n\t" |
| "PUNPCKLDQ $tmp,$tmp2\n\t" |
| "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} |
| ins_encode %{ |
| __ cmpl(rax, $mem$$Address); |
| __ movdl($tmp$$XMMRegister, $src$$Register); |
| __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register)); |
| __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister); |
| __ movdbl($mem$$Address, $tmp$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Store Pointer; for storing unknown oops and raw pointers |
| instruct storeP(memory mem, anyRegP src) %{ |
| match(Set mem (StoreP mem src)); |
| |
| ins_cost(125); |
| format %{ "MOV $mem,$src" %} |
| opcode(0x89); |
| ins_encode( OpcP, RegMem( src, mem ) ); |
| ins_pipe( ialu_mem_reg ); |
| %} |
| |
| // Store Integer Immediate |
| instruct storeImmI(memory mem, immI src) %{ |
| match(Set mem (StoreI mem src)); |
| |
| ins_cost(150); |
| format %{ "MOV $mem,$src" %} |
| opcode(0xC7); /* C7 /0 */ |
| ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); |
| ins_pipe( ialu_mem_imm ); |
| %} |
| |
| // Store Short/Char Immediate |
| instruct storeImmI16(memory mem, immI16 src) %{ |
| predicate(UseStoreImmI16); |
| match(Set mem (StoreC mem src)); |
| |
| ins_cost(150); |
| format %{ "MOV16 $mem,$src" %} |
| opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */ |
| ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem), Con16( src )); |
| ins_pipe( ialu_mem_imm ); |
| %} |
| |
| // Store Pointer Immediate; null pointers or constant oops that do not |
| // need card-mark barriers. |
| instruct storeImmP(memory mem, immP src) %{ |
| match(Set mem (StoreP mem src)); |
| |
| ins_cost(150); |
| format %{ "MOV $mem,$src" %} |
| opcode(0xC7); /* C7 /0 */ |
| ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); |
| ins_pipe( ialu_mem_imm ); |
| %} |
| |
| // Store Byte Immediate |
| instruct storeImmB(memory mem, immI8 src) %{ |
| match(Set mem (StoreB mem src)); |
| |
| ins_cost(150); |
| format %{ "MOV8 $mem,$src" %} |
| opcode(0xC6); /* C6 /0 */ |
| ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); |
| ins_pipe( ialu_mem_imm ); |
| %} |
| |
| // Store CMS card-mark Immediate |
| instruct storeImmCM(memory mem, immI8 src) %{ |
| match(Set mem (StoreCM mem src)); |
| |
| ins_cost(150); |
| format %{ "MOV8 $mem,$src\t! CMS card-mark imm0" %} |
| opcode(0xC6); /* C6 /0 */ |
| ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); |
| ins_pipe( ialu_mem_imm ); |
| %} |
| |
| // Store Double |
| instruct storeDPR( memory mem, regDPR1 src) %{ |
| predicate(UseSSE<=1); |
| match(Set mem (StoreD mem src)); |
| |
| ins_cost(100); |
| format %{ "FST_D $mem,$src" %} |
| opcode(0xDD); /* DD /2 */ |
| ins_encode( enc_FPR_store(mem,src) ); |
| ins_pipe( fpu_mem_reg ); |
| %} |
| |
| // Store double does rounding on x86 |
| instruct storeDPR_rounded( memory mem, regDPR1 src) %{ |
| predicate(UseSSE<=1); |
| match(Set mem (StoreD mem (RoundDouble src))); |
| |
| ins_cost(100); |
| format %{ "FST_D $mem,$src\t# round" %} |
| opcode(0xDD); /* DD /2 */ |
| ins_encode( enc_FPR_store(mem,src) ); |
| ins_pipe( fpu_mem_reg ); |
| %} |
| |
| // Store XMM register to memory (double-precision floating points) |
| // MOVSD instruction |
| instruct storeD(memory mem, regD src) %{ |
| predicate(UseSSE>=2); |
| match(Set mem (StoreD mem src)); |
| ins_cost(95); |
| format %{ "MOVSD $mem,$src" %} |
| ins_encode %{ |
| __ movdbl($mem$$Address, $src$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Store XMM register to memory (single-precision floating point) |
| // MOVSS instruction |
| instruct storeF(memory mem, regF src) %{ |
| predicate(UseSSE>=1); |
| match(Set mem (StoreF mem src)); |
| ins_cost(95); |
| format %{ "MOVSS $mem,$src" %} |
| ins_encode %{ |
| __ movflt($mem$$Address, $src$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Store Float |
| instruct storeFPR( memory mem, regFPR1 src) %{ |
| predicate(UseSSE==0); |
| match(Set mem (StoreF mem src)); |
| |
| ins_cost(100); |
| format %{ "FST_S $mem,$src" %} |
| opcode(0xD9); /* D9 /2 */ |
| ins_encode( enc_FPR_store(mem,src) ); |
| ins_pipe( fpu_mem_reg ); |
| %} |
| |
| // Store Float does rounding on x86 |
| instruct storeFPR_rounded( memory mem, regFPR1 src) %{ |
| predicate(UseSSE==0); |
| match(Set mem (StoreF mem (RoundFloat src))); |
| |
| ins_cost(100); |
| format %{ "FST_S $mem,$src\t# round" %} |
| opcode(0xD9); /* D9 /2 */ |
| ins_encode( enc_FPR_store(mem,src) ); |
| ins_pipe( fpu_mem_reg ); |
| %} |
| |
| // Store Float does rounding on x86 |
| instruct storeFPR_Drounded( memory mem, regDPR1 src) %{ |
| predicate(UseSSE<=1); |
| match(Set mem (StoreF mem (ConvD2F src))); |
| |
| ins_cost(100); |
| format %{ "FST_S $mem,$src\t# D-round" %} |
| opcode(0xD9); /* D9 /2 */ |
| ins_encode( enc_FPR_store(mem,src) ); |
| ins_pipe( fpu_mem_reg ); |
| %} |
| |
| // Store immediate Float value (it is faster than store from FPU register) |
| // The instruction usage is guarded by predicate in operand immFPR(). |
| instruct storeFPR_imm( memory mem, immFPR src) %{ |
| match(Set mem (StoreF mem src)); |
| |
| ins_cost(50); |
| format %{ "MOV $mem,$src\t# store float" %} |
| opcode(0xC7); /* C7 /0 */ |
| ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32FPR_as_bits( src )); |
| ins_pipe( ialu_mem_imm ); |
| %} |
| |
| // Store immediate Float value (it is faster than store from XMM register) |
| // The instruction usage is guarded by predicate in operand immF(). |
| instruct storeF_imm( memory mem, immF src) %{ |
| match(Set mem (StoreF mem src)); |
| |
| ins_cost(50); |
| format %{ "MOV $mem,$src\t# store float" %} |
| opcode(0xC7); /* C7 /0 */ |
| ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32F_as_bits( src )); |
| ins_pipe( ialu_mem_imm ); |
| %} |
| |
| // Store Integer to stack slot |
| instruct storeSSI(stackSlotI dst, rRegI src) %{ |
| match(Set dst src); |
| |
| ins_cost(100); |
| format %{ "MOV $dst,$src" %} |
| opcode(0x89); |
| ins_encode( OpcPRegSS( dst, src ) ); |
| ins_pipe( ialu_mem_reg ); |
| %} |
| |
| // Store Integer to stack slot |
| instruct storeSSP(stackSlotP dst, eRegP src) %{ |
| match(Set dst src); |
| |
| ins_cost(100); |
| format %{ "MOV $dst,$src" %} |
| opcode(0x89); |
| ins_encode( OpcPRegSS( dst, src ) ); |
| ins_pipe( ialu_mem_reg ); |
| %} |
| |
| // Store Long to stack slot |
| instruct storeSSL(stackSlotL dst, eRegL src) %{ |
| match(Set dst src); |
| |
| ins_cost(200); |
| format %{ "MOV $dst,$src.lo\n\t" |
| "MOV $dst+4,$src.hi" %} |
| opcode(0x89, 0x89); |
| ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); |
| ins_pipe( ialu_mem_long_reg ); |
| %} |
| |
| //----------MemBar Instructions----------------------------------------------- |
| // Memory barrier flavors |
| |
| instruct membar_acquire() %{ |
| match(MemBarAcquire); |
| match(LoadFence); |
| ins_cost(400); |
| |
| size(0); |
| format %{ "MEMBAR-acquire ! (empty encoding)" %} |
| ins_encode(); |
| ins_pipe(empty); |
| %} |
| |
| instruct membar_acquire_lock() %{ |
| match(MemBarAcquireLock); |
| ins_cost(0); |
| |
| size(0); |
| format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %} |
| ins_encode( ); |
| ins_pipe(empty); |
| %} |
| |
| instruct membar_release() %{ |
| match(MemBarRelease); |
| match(StoreFence); |
| ins_cost(400); |
| |
| size(0); |
| format %{ "MEMBAR-release ! (empty encoding)" %} |
| ins_encode( ); |
| ins_pipe(empty); |
| %} |
| |
| instruct membar_release_lock() %{ |
| match(MemBarReleaseLock); |
| ins_cost(0); |
| |
| size(0); |
| format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %} |
| ins_encode( ); |
| ins_pipe(empty); |
| %} |
| |
| instruct membar_volatile(eFlagsReg cr) %{ |
| match(MemBarVolatile); |
| effect(KILL cr); |
| ins_cost(400); |
| |
| format %{ |
| $$template |
| if (os::is_MP()) { |
| $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile" |
| } else { |
| $$emit$$"MEMBAR-volatile ! (empty encoding)" |
| } |
| %} |
| ins_encode %{ |
| __ membar(Assembler::StoreLoad); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct unnecessary_membar_volatile() %{ |
| match(MemBarVolatile); |
| predicate(Matcher::post_store_load_barrier(n)); |
| ins_cost(0); |
| |
| size(0); |
| format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %} |
| ins_encode( ); |
| ins_pipe(empty); |
| %} |
| |
| instruct membar_storestore() %{ |
| match(MemBarStoreStore); |
| ins_cost(0); |
| |
| size(0); |
| format %{ "MEMBAR-storestore (empty encoding)" %} |
| ins_encode( ); |
| ins_pipe(empty); |
| %} |
| |
| //----------Move Instructions-------------------------------------------------- |
| instruct castX2P(eAXRegP dst, eAXRegI src) %{ |
| match(Set dst (CastX2P src)); |
| format %{ "# X2P $dst, $src" %} |
| ins_encode( /*empty encoding*/ ); |
| ins_cost(0); |
| ins_pipe(empty); |
| %} |
| |
| instruct castP2X(rRegI dst, eRegP src ) %{ |
| match(Set dst (CastP2X src)); |
| ins_cost(50); |
| format %{ "MOV $dst, $src\t# CastP2X" %} |
| ins_encode( enc_Copy( dst, src) ); |
| ins_pipe( ialu_reg_reg ); |
| %} |
| |
| //----------Conditional Move--------------------------------------------------- |
| // Conditional move |
| instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{ |
| predicate(!VM_Version::supports_cmov() ); |
| match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); |
| ins_cost(200); |
| format %{ "J$cop,us skip\t# signed cmove\n\t" |
| "MOV $dst,$src\n" |
| "skip:" %} |
| ins_encode %{ |
| Label Lskip; |
| // Invert sense of branch from sense of CMOV |
| __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); |
| __ movl($dst$$Register, $src$$Register); |
| __ bind(Lskip); |
| %} |
| ins_pipe( pipe_cmov_reg ); |
| %} |
| |
| instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{ |
| predicate(!VM_Version::supports_cmov() ); |
| match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); |
| ins_cost(200); |
| format %{ "J$cop,us skip\t# unsigned cmove\n\t" |
| "MOV $dst,$src\n" |
| "skip:" %} |
| ins_encode %{ |
| Label Lskip; |
| // Invert sense of branch from sense of CMOV |
| __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); |
| __ movl($dst$$Register, $src$$Register); |
| __ bind(Lskip); |
| %} |
| ins_pipe( pipe_cmov_reg ); |
| %} |
| |
| instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{ |
| predicate(VM_Version::supports_cmov() ); |
| match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); |
| ins_cost(200); |
| format %{ "CMOV$cop $dst,$src" %} |
| opcode(0x0F,0x40); |
| ins_encode( enc_cmov(cop), RegReg( dst, src ) ); |
| ins_pipe( pipe_cmov_reg ); |
| %} |
| |
| instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{ |
| predicate(VM_Version::supports_cmov() ); |
| match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); |
| ins_cost(200); |
| format %{ "CMOV$cop $dst,$src" %} |
| opcode(0x0F,0x40); |
| ins_encode( enc_cmov(cop), RegReg( dst, src ) ); |
| ins_pipe( pipe_cmov_reg ); |
| %} |
| |
| instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{ |
| predicate(VM_Version::supports_cmov() ); |
| match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); |
| ins_cost(200); |
| expand %{ |
| cmovI_regU(cop, cr, dst, src); |
| %} |
| %} |
| |
| // Conditional move |
| instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{ |
| predicate(VM_Version::supports_cmov() ); |
| match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); |
| ins_cost(250); |
| format %{ "CMOV$cop $dst,$src" %} |
| opcode(0x0F,0x40); |
| ins_encode( enc_cmov(cop), RegMem( dst, src ) ); |
| ins_pipe( pipe_cmov_mem ); |
| %} |
| |
| // Conditional move |
| instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{ |
| predicate(VM_Version::supports_cmov() ); |
| match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); |
| ins_cost(250); |
| format %{ "CMOV$cop $dst,$src" %} |
| opcode(0x0F,0x40); |
| ins_encode( enc_cmov(cop), RegMem( dst, src ) ); |
| ins_pipe( pipe_cmov_mem ); |
| %} |
| |
| instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{ |
| predicate(VM_Version::supports_cmov() ); |
| match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); |
| ins_cost(250); |
| expand %{ |
| cmovI_memU(cop, cr, dst, src); |
| %} |
| %} |
| |
| // Conditional move |
| instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ |
| predicate(VM_Version::supports_cmov() ); |
| match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); |
| ins_cost(200); |
| format %{ "CMOV$cop $dst,$src\t# ptr" %} |
| opcode(0x0F,0x40); |
| ins_encode( enc_cmov(cop), RegReg( dst, src ) ); |
| ins_pipe( pipe_cmov_reg ); |
| %} |
| |
| // Conditional move (non-P6 version) |
| // Note: a CMoveP is generated for stubs and native wrappers |
| // regardless of whether we are on a P6, so we |
| // emulate a cmov here |
| instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ |
| match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); |
| ins_cost(300); |
| format %{ "Jn$cop skip\n\t" |
| "MOV $dst,$src\t# pointer\n" |
| "skip:" %} |
| opcode(0x8b); |
| ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src)); |
| ins_pipe( pipe_cmov_reg ); |
| %} |
| |
| // Conditional move |
| instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{ |
| predicate(VM_Version::supports_cmov() ); |
| match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); |
| ins_cost(200); |
| format %{ "CMOV$cop $dst,$src\t# ptr" %} |
| opcode(0x0F,0x40); |
| ins_encode( enc_cmov(cop), RegReg( dst, src ) ); |
| ins_pipe( pipe_cmov_reg ); |
| %} |
| |
| instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{ |
| predicate(VM_Version::supports_cmov() ); |
| match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); |
| ins_cost(200); |
| expand %{ |
| cmovP_regU(cop, cr, dst, src); |
| %} |
| %} |
| |
| // DISABLED: Requires the ADLC to emit a bottom_type call that |
| // correctly meets the two pointer arguments; one is an incoming |
| // register but the other is a memory operand. ALSO appears to |
| // be buggy with implicit null checks. |
| // |
| //// Conditional move |
| //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{ |
| // predicate(VM_Version::supports_cmov() ); |
| // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); |
| // ins_cost(250); |
| // format %{ "CMOV$cop $dst,$src\t# ptr" %} |
| // opcode(0x0F,0x40); |
| // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); |
| // ins_pipe( pipe_cmov_mem ); |
| //%} |
| // |
| //// Conditional move |
| //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{ |
| // predicate(VM_Version::supports_cmov() ); |
| // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); |
| // ins_cost(250); |
| // format %{ "CMOV$cop $dst,$src\t# ptr" %} |
| // opcode(0x0F,0x40); |
| // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); |
| // ins_pipe( pipe_cmov_mem ); |
| //%} |
| |
| // Conditional move |
| instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{ |
| predicate(UseSSE<=1); |
| match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); |
| ins_cost(200); |
| format %{ "FCMOV$cop $dst,$src\t# double" %} |
| opcode(0xDA); |
| ins_encode( enc_cmov_dpr(cop,src) ); |
| ins_pipe( pipe_cmovDPR_reg ); |
| %} |
| |
| // Conditional move |
| instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{ |
| predicate(UseSSE==0); |
| match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); |
| ins_cost(200); |
| format %{ "FCMOV$cop $dst,$src\t# float" %} |
| opcode(0xDA); |
| ins_encode( enc_cmov_dpr(cop,src) ); |
| ins_pipe( pipe_cmovDPR_reg ); |
| %} |
| |
| // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. |
| instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{ |
| predicate(UseSSE<=1); |
| match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); |
| ins_cost(200); |
| format %{ "Jn$cop skip\n\t" |
| "MOV $dst,$src\t# double\n" |
| "skip:" %} |
| opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ |
| ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) ); |
| ins_pipe( pipe_cmovDPR_reg ); |
| %} |
| |
| // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. |
| instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{ |
| predicate(UseSSE==0); |
| match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); |
| ins_cost(200); |
| format %{ "Jn$cop skip\n\t" |
| "MOV $dst,$src\t# float\n" |
| "skip:" %} |
| opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ |
| ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) ); |
| ins_pipe( pipe_cmovDPR_reg ); |
| %} |
| |
| // No CMOVE with SSE/SSE2 |
| instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{ |
| predicate (UseSSE>=1); |
| match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); |
| ins_cost(200); |
| format %{ "Jn$cop skip\n\t" |
| "MOVSS $dst,$src\t# float\n" |
| "skip:" %} |
| ins_encode %{ |
| Label skip; |
| // Invert sense of branch from sense of CMOV |
| __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); |
| __ movflt($dst$$XMMRegister, $src$$XMMRegister); |
| __ bind(skip); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // No CMOVE with SSE/SSE2 |
| instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{ |
| predicate (UseSSE>=2); |
| match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); |
| ins_cost(200); |
| format %{ "Jn$cop skip\n\t" |
| "MOVSD $dst,$src\t# float\n" |
| "skip:" %} |
| ins_encode %{ |
| Label skip; |
| // Invert sense of branch from sense of CMOV |
| __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); |
| __ movdbl($dst$$XMMRegister, $src$$XMMRegister); |
| __ bind(skip); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // unsigned version |
| instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{ |
| predicate (UseSSE>=1); |
| match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); |
| ins_cost(200); |
| format %{ "Jn$cop skip\n\t" |
| "MOVSS $dst,$src\t# float\n" |
| "skip:" %} |
| ins_encode %{ |
| Label skip; |
| // Invert sense of branch from sense of CMOV |
| __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); |
| __ movflt($dst$$XMMRegister, $src$$XMMRegister); |
| __ bind(skip); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{ |
| predicate (UseSSE>=1); |
| match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); |
| ins_cost(200); |
| expand %{ |
| fcmovF_regU(cop, cr, dst, src); |
| %} |
| %} |
| |
| // unsigned version |
| instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{ |
| predicate (UseSSE>=2); |
| match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); |
| ins_cost(200); |
| format %{ "Jn$cop skip\n\t" |
| "MOVSD $dst,$src\t# float\n" |
| "skip:" %} |
| ins_encode %{ |
| Label skip; |
| // Invert sense of branch from sense of CMOV |
| __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); |
| __ movdbl($dst$$XMMRegister, $src$$XMMRegister); |
| __ bind(skip); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{ |
| predicate (UseSSE>=2); |
| match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); |
| ins_cost(200); |
| expand %{ |
| fcmovD_regU(cop, cr, dst, src); |
| %} |
| %} |
| |
| instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{ |
| predicate(VM_Version::supports_cmov() ); |
| match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); |
| ins_cost(200); |
| format %{ "CMOV$cop $dst.lo,$src.lo\n\t" |
| "CMOV$cop $dst.hi,$src.hi" %} |
| opcode(0x0F,0x40); |
| ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); |
| ins_pipe( pipe_cmov_reg_long ); |
| %} |
| |
| instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{ |
| predicate(VM_Version::supports_cmov() ); |
| match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); |
| ins_cost(200); |
| format %{ "CMOV$cop $dst.lo,$src.lo\n\t" |
| "CMOV$cop $dst.hi,$src.hi" %} |
| opcode(0x0F,0x40); |
| ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); |
| ins_pipe( pipe_cmov_reg_long ); |
| %} |
| |
| instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{ |
| predicate(VM_Version::supports_cmov() ); |
| match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); |
| ins_cost(200); |
| expand %{ |
| cmovL_regU(cop, cr, dst, src); |
| %} |
| %} |
| |
| //----------Arithmetic Instructions-------------------------------------------- |
| //----------Addition Instructions---------------------------------------------- |
| |
| // Integer Addition Instructions |
| instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ |
| match(Set dst (AddI dst src)); |
| effect(KILL cr); |
| |
| size(2); |
| format %{ "ADD $dst,$src" %} |
| opcode(0x03); |
| ins_encode( OpcP, RegReg( dst, src) ); |
| ins_pipe( ialu_reg_reg ); |
| %} |
| |
| instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ |
| match(Set dst (AddI dst src)); |
| effect(KILL cr); |
| |
| format %{ "ADD $dst,$src" %} |
| opcode(0x81, 0x00); /* /0 id */ |
| ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); |
| ins_pipe( ialu_reg ); |
| %} |
| |
| instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{ |
| predicate(UseIncDec); |
| match(Set dst (AddI dst src)); |
| effect(KILL cr); |
| |
| size(1); |
| format %{ "INC $dst" %} |
| opcode(0x40); /* */ |
| ins_encode( Opc_plus( primary, dst ) ); |
| ins_pipe( ialu_reg ); |
| %} |
| |
| instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{ |
| match(Set dst (AddI src0 src1)); |
| ins_cost(110); |
| |
| format %{ "LEA $dst,[$src0 + $src1]" %} |
| opcode(0x8D); /* 0x8D /r */ |
| ins_encode( OpcP, RegLea( dst, src0, src1 ) ); |
| ins_pipe( ialu_reg_reg ); |
| %} |
| |
| instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{ |
| match(Set dst (AddP src0 src1)); |
| ins_cost(110); |
| |
| format %{ "LEA $dst,[$src0 + $src1]\t# ptr" %} |
| opcode(0x8D); /* 0x8D /r */ |
| ins_encode( OpcP, RegLea( dst, src0, src1 ) ); |
| ins_pipe( ialu_reg_reg ); |
| %} |
| |
| instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{ |
| predicate(UseIncDec); |
| match(Set dst (AddI dst src)); |
| effect(KILL cr); |
| |
| size(1); |
| format %{ "DEC $dst" %} |
| opcode(0x48); /* */ |
| ins_encode( Opc_plus( primary, dst ) ); |
| ins_pipe( ialu_reg ); |
| %} |
| |
| instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{ |
| match(Set dst (AddP dst src)); |
| effect(KILL cr); |
| |
| size(2); |
| format %{ "ADD $dst,$src" %} |
| opcode(0x03); |
| ins_encode( OpcP, RegReg( dst, src) ); |
| ins_pipe( ialu_reg_reg ); |
| %} |
| |
| instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{ |
| match(Set dst (AddP dst src)); |
| effect(KILL cr); |
| |
| format %{ "ADD $dst,$src" %} |
| opcode(0x81,0x00); /* Opcode 81 /0 id */ |
| // ins_encode( RegImm( dst, src) ); |
| ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); |
| ins_pipe( ialu_reg ); |
| %} |
| |
| instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ |
| match(Set dst (AddI dst (LoadI src))); |
| effect(KILL cr); |
| |
| ins_cost(125); |
| format %{ "ADD $dst,$src" %} |
| opcode(0x03); |
| ins_encode( OpcP, RegMem( dst, src) ); |
| ins_pipe( ialu_reg_mem ); |
| %} |
| |
| instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ |
| match(Set dst (StoreI dst (AddI (LoadI dst) src))); |
| effect(KILL cr); |
| |
| ins_cost(150); |
| format %{ "ADD $dst,$src" %} |
| opcode(0x01); /* Opcode 01 /r */ |
| ins_encode( OpcP, RegMem( src, dst ) ); |
| ins_pipe( ialu_mem_reg ); |
| %} |
| |
| // Add Memory with Immediate |
| instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ |
| match(Set dst (StoreI dst (AddI (LoadI dst) src))); |
| effect(KILL cr); |
| |
| ins_cost(125); |
| format %{ "ADD $dst,$src" %} |
| opcode(0x81); /* Opcode 81 /0 id */ |
| ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) ); |
| ins_pipe( ialu_mem_imm ); |
| %} |
| |
| instruct incI_mem(memory dst, immI1 src, eFlagsReg cr) %{ |
| match(Set dst (StoreI dst (AddI (LoadI dst) src))); |
| effect(KILL cr); |
| |
| ins_cost(125); |
| format %{ "INC $dst" %} |
| opcode(0xFF); /* Opcode FF /0 */ |
| ins_encode( OpcP, RMopc_Mem(0x00,dst)); |
| ins_pipe( ialu_mem_imm ); |
| %} |
| |
| instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{ |
| match(Set dst (StoreI dst (AddI (LoadI dst) src))); |
| effect(KILL cr); |
| |
| ins_cost(125); |
| format %{ "DEC $dst" %} |
| opcode(0xFF); /* Opcode FF /1 */ |
| ins_encode( OpcP, RMopc_Mem(0x01,dst)); |
| ins_pipe( ialu_mem_imm ); |
| %} |
| |
| |
| instruct checkCastPP( eRegP dst ) %{ |
| match(Set dst (CheckCastPP dst)); |
| |
| size(0); |
| format %{ "#checkcastPP of $dst" %} |
| ins_encode( /*empty encoding*/ ); |
| ins_pipe( empty ); |
| %} |
| |
| instruct castPP( eRegP dst ) %{ |
| match(Set dst (CastPP dst)); |
| format %{ "#castPP of $dst" %} |
| ins_encode( /*empty encoding*/ ); |
| ins_pipe( empty ); |
| %} |
| |
| instruct castII( rRegI dst ) %{ |
| match(Set dst (CastII dst)); |
| format %{ "#castII of $dst" %} |
| ins_encode( /*empty encoding*/ ); |
| ins_cost(0); |
| ins_pipe( empty ); |
| %} |
| |
| |
| // Load-locked - same as a regular pointer load when used with compare-swap |
| instruct loadPLocked(eRegP dst, memory mem) %{ |
| match(Set dst (LoadPLocked mem)); |
| |
| ins_cost(125); |
| format %{ "MOV $dst,$mem\t# Load ptr. locked" %} |
| opcode(0x8B); |
| ins_encode( OpcP, RegMem(dst,mem)); |
| ins_pipe( ialu_reg_mem ); |
| %} |
| |
| // Conditional-store of the updated heap-top. |
| // Used during allocation of the shared heap. |
| // Sets flags (EQ) on success. Implemented with a CMPXCHG on Intel. |
| instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{ |
| match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval))); |
| // EAX is killed if there is contention, but then it's also unused. |
| // In the common case of no contention, EAX holds the new oop address. |
| format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %} |
| ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) ); |
| ins_pipe( pipe_cmpxchg ); |
| %} |
| |
| // Conditional-store of an int value. |
| // ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG on Intel. |
| instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{ |
| match(Set cr (StoreIConditional mem (Binary oldval newval))); |
| effect(KILL oldval); |
| format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %} |
| ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) ); |
| ins_pipe( pipe_cmpxchg ); |
| %} |
| |
| // Conditional-store of a long value. |
| // ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG8 on Intel. |
| instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ |
| match(Set cr (StoreLConditional mem (Binary oldval newval))); |
| effect(KILL oldval); |
| format %{ "XCHG EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t" |
| "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t" |
| "XCHG EBX,ECX" |
| %} |
| ins_encode %{ |
| // Note: we need to swap rbx, and rcx before and after the |
| // cmpxchg8 instruction because the instruction uses |
| // rcx as the high order word of the new value to store but |
| // our register encoding uses rbx. |
| __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc)); |
| if( os::is_MP() ) |
| __ lock(); |
| __ cmpxchg8($mem$$Address); |
| __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc)); |
| %} |
| ins_pipe( pipe_cmpxchg ); |
| %} |
| |
| // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them |
| |
| instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ |
| predicate(VM_Version::supports_cx8()); |
| match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval))); |
| effect(KILL cr, KILL oldval); |
| format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" |
| "MOV $res,0\n\t" |
| "JNE,s fail\n\t" |
| "MOV $res,1\n" |
| "fail:" %} |
| ins_encode( enc_cmpxchg8(mem_ptr), |
| enc_flags_ne_to_boolean(res) ); |
| ins_pipe( pipe_cmpxchg ); |
| %} |
| |
| instruct compareAndSwapP( rRegI res, pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ |
| match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval))); |
| effect(KILL cr, KILL oldval); |
| format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" |
| "MOV $res,0\n\t" |
| "JNE,s fail\n\t" |
| "MOV $res,1\n" |
| "fail:" %} |
| ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); |
| ins_pipe( pipe_cmpxchg ); |
| %} |
| |
| instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ |
| match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval))); |
| effect(KILL cr, KILL oldval); |
| format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" |
| "MOV $res,0\n\t" |
| "JNE,s fail\n\t" |
| "MOV $res,1\n" |
| "fail:" %} |
| ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); |
| ins_pipe( pipe_cmpxchg ); |
| %} |
| |
| instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ |
| predicate(n->as_LoadStore()->result_not_used()); |
| match(Set dummy (GetAndAddI mem add)); |
| effect(KILL cr); |
| format %{ "ADDL [$mem],$add" %} |
| ins_encode %{ |
| if (os::is_MP()) { __ lock(); } |
| __ addl($mem$$Address, $add$$constant); |
| %} |
| ins_pipe( pipe_cmpxchg ); |
| %} |
| |
| instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{ |
| match(Set newval (GetAndAddI mem newval)); |
| effect(KILL cr); |
| format %{ "XADDL [$mem],$newval" %} |
| ins_encode %{ |
| if (os::is_MP()) { __ lock(); } |
| __ xaddl($mem$$Address, $newval$$Register); |
| %} |
| ins_pipe( pipe_cmpxchg ); |
| %} |
| |
| instruct xchgI( memory mem, rRegI newval) %{ |
| match(Set newval (GetAndSetI mem newval)); |
| format %{ "XCHGL $newval,[$mem]" %} |
| ins_encode %{ |
| __ xchgl($newval$$Register, $mem$$Address); |
| %} |
| ins_pipe( pipe_cmpxchg ); |
| %} |
| |
| instruct xchgP( memory mem, pRegP newval) %{ |
| match(Set newval (GetAndSetP mem newval)); |
| format %{ "XCHGL $newval,[$mem]" %} |
| ins_encode %{ |
| __ xchgl($newval$$Register, $mem$$Address); |
| %} |
| ins_pipe( pipe_cmpxchg ); |
| %} |
| |
| //----------Subtraction Instructions------------------------------------------- |
| |
| // Integer Subtraction Instructions |
| instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ |
| match(Set dst (SubI dst src)); |
| effect(KILL cr); |
| |
| size(2); |
| format %{ "SUB $dst,$src" %} |
| opcode(0x2B); |
| ins_encode( OpcP, RegReg( dst, src) ); |
| ins_pipe( ialu_reg_reg ); |
| %} |
| |
| instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ |
| match(Set dst (SubI dst src)); |
| effect(KILL cr); |
| |
| format %{ "SUB $dst,$src" %} |
| opcode(0x81,0x05); /* Opcode 81 /5 */ |
| // ins_encode( RegImm( dst, src) ); |
| ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); |
| ins_pipe( ialu_reg ); |
| %} |
| |
| instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ |
| match(Set dst (SubI dst (LoadI src))); |
| effect(KILL cr); |
| |
| ins_cost(125); |
| format %{ "SUB $dst,$src" %} |
| opcode(0x2B); |
| ins_encode( OpcP, RegMem( dst, src) ); |
| ins_pipe( ialu_reg_mem ); |
| %} |
| |
| instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ |
| match(Set dst (StoreI dst (SubI (LoadI dst) src))); |
| effect(KILL cr); |
| |
| ins_cost(150); |
| format %{ "SUB $dst,$src" %} |
| opcode(0x29); /* Opcode 29 /r */ |
| ins_encode( OpcP, RegMem( src, dst ) ); |
| ins_pipe( ialu_mem_reg ); |
| %} |
| |
| // Subtract from a pointer |
| instruct subP_eReg(eRegP dst, rRegI src, immI0 zero, eFlagsReg cr) %{ |
| match(Set dst (AddP dst (SubI zero src))); |
| effect(KILL cr); |
| |
| size(2); |
| format %{ "SUB $dst,$src" %} |
| opcode(0x2B); |
| ins_encode( OpcP, RegReg( dst, src) ); |
| ins_pipe( ialu_reg_reg ); |
| %} |
| |
| instruct negI_eReg(rRegI dst, immI0 zero, eFlagsReg cr) %{ |
| match(Set dst (SubI zero dst)); |
| effect(KILL cr); |
| |
| size(2); |
| format %{ "NEG $dst" %} |
| opcode(0xF7,0x03); // Opcode F7 /3 |
| ins_encode( OpcP, RegOpc( dst ) ); |
| ins_pipe( ialu_reg ); |
| %} |
| |
| //----------Multiplication/Division Instructions------------------------------- |
| // Integer Multiplication Instructions |
| // Multiply Register |
| instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ |
| match(Set dst (MulI dst src)); |
| effect(KILL cr); |
| |
| size(3); |
| ins_cost(300); |
| format %{ "IMUL $dst,$src" %} |
| opcode(0xAF, 0x0F); |
| ins_encode( OpcS, OpcP, RegReg( dst, src) ); |
| ins_pipe( ialu_reg_reg_alu0 ); |
| %} |
| |
| // Multiply 32-bit Immediate |
| instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{ |
| match(Set dst (MulI src imm)); |
| effect(KILL cr); |
| |
| ins_cost(300); |
| format %{ "IMUL $dst,$src,$imm" %} |
| opcode(0x69); /* 69 /r id */ |
| ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) ); |
| ins_pipe( ialu_reg_reg_alu0 ); |
| %} |
| |
| instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{ |
| match(Set dst src); |
| effect(KILL cr); |
| |
| // Note that this is artificially increased to make it more expensive than loadConL |
| ins_cost(250); |
| format %{ "MOV EAX,$src\t// low word only" %} |
| opcode(0xB8); |
| ins_encode( LdImmL_Lo(dst, src) ); |
| ins_pipe( ialu_reg_fat ); |
| %} |
| |
| // Multiply by 32-bit Immediate, taking the shifted high order results |
| // (special case for shift by 32) |
| instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{ |
| match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); |
| predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && |
| _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && |
| _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); |
| effect(USE src1, KILL cr); |
| |
| // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only |
| ins_cost(0*100 + 1*400 - 150); |
| format %{ "IMUL EDX:EAX,$src1" %} |
| ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Multiply by 32-bit Immediate, taking the shifted high order results |
| instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{ |
| match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); |
| predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && |
| _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && |
| _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); |
| effect(USE src1, KILL cr); |
| |
| // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only |
| ins_cost(1*100 + 1*400 - 150); |
| format %{ "IMUL EDX:EAX,$src1\n\t" |
| "SAR EDX,$cnt-32" %} |
| ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Multiply Memory 32-bit Immediate |
| instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{ |
| match(Set dst (MulI (LoadI src) imm)); |
| effect(KILL cr); |
| |
| ins_cost(300); |
| format %{ "IMUL $dst,$src,$imm" %} |
| opcode(0x69); /* 69 /r id */ |
| ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) ); |
| ins_pipe( ialu_reg_mem_alu0 ); |
| %} |
| |
| // Multiply Memory |
| instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{ |
| match(Set dst (MulI dst (LoadI src))); |
| effect(KILL cr); |
| |
| ins_cost(350); |
| format %{ "IMUL $dst,$src" %} |
| opcode(0xAF, 0x0F); |
| ins_encode( OpcS, OpcP, RegMem( dst, src) ); |
| ins_pipe( ialu_reg_mem_alu0 ); |
| %} |
| |
| // Multiply Register Int to Long |
| instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{ |
| // Basic Idea: long = (long)int * (long)int |
| match(Set dst (MulL (ConvI2L src) (ConvI2L src1))); |
| effect(DEF dst, USE src, USE src1, KILL flags); |
| |
| ins_cost(300); |
| format %{ "IMUL $dst,$src1" %} |
| |
| ins_encode( long_int_multiply( dst, src1 ) ); |
| ins_pipe( ialu_reg_reg_alu0 ); |
| %} |
| |
| instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{ |
| // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) |
| match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask))); |
| effect(KILL flags); |
| |
| ins_cost(300); |
| format %{ "MUL $dst,$src1" %} |
| |
| ins_encode( long_uint_multiply(dst, src1) ); |
| ins_pipe( ialu_reg_reg_alu0 ); |
| %} |
| |
| // Multiply Register Long |
| instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ |
| match(Set dst (MulL dst src)); |
| effect(KILL cr, TEMP tmp); |
| ins_cost(4*100+3*400); |
| // Basic idea: lo(result) = lo(x_lo * y_lo) |
| // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) |
| format %{ "MOV $tmp,$src.lo\n\t" |
| "IMUL $tmp,EDX\n\t" |
| "MOV EDX,$src.hi\n\t" |
| "IMUL EDX,EAX\n\t" |
| "ADD $tmp,EDX\n\t" |
| "MUL EDX:EAX,$src.lo\n\t" |
| "ADD EDX,$tmp" %} |
| ins_encode( long_multiply( dst, src, tmp ) ); |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Multiply Register Long where the left operand's high 32 bits are zero |
| instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ |
| predicate(is_operand_hi32_zero(n->in(1))); |
| match(Set dst (MulL dst src)); |
| effect(KILL cr, TEMP tmp); |
| ins_cost(2*100+2*400); |
| // Basic idea: lo(result) = lo(x_lo * y_lo) |
| // hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0 |
| format %{ "MOV $tmp,$src.hi\n\t" |
| "IMUL $tmp,EAX\n\t" |
| "MUL EDX:EAX,$src.lo\n\t" |
| "ADD EDX,$tmp" %} |
| ins_encode %{ |
| __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); |
| __ imull($tmp$$Register, rax); |
| __ mull($src$$Register); |
| __ addl(rdx, $tmp$$Register); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Multiply Register Long where the right operand's high 32 bits are zero |
| instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ |
| predicate(is_operand_hi32_zero(n->in(2))); |
| match(Set dst (MulL dst src)); |
| effect(KILL cr, TEMP tmp); |
| ins_cost(2*100+2*400); |
| // Basic idea: lo(result) = lo(x_lo * y_lo) |
| // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0 |
| format %{ "MOV $tmp,$src.lo\n\t" |
| "IMUL $tmp,EDX\n\t" |
| "MUL EDX:EAX,$src.lo\n\t" |
| "ADD EDX,$tmp" %} |
| ins_encode %{ |
| __ movl($tmp$$Register, $src$$Register); |
| __ imull($tmp$$Register, rdx); |
| __ mull($src$$Register); |
| __ addl(rdx, $tmp$$Register); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Multiply Register Long where the left and the right operands' high 32 bits are zero |
| instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{ |
| predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2))); |
| match(Set dst (MulL dst src)); |
| effect(KILL cr); |
| ins_cost(1*400); |
| // Basic idea: lo(result) = lo(x_lo * y_lo) |
| // hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0 |
| format %{ "MUL EDX:EAX,$src.lo\n\t" %} |
| ins_encode %{ |
| __ mull($src$$Register); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Multiply Register Long by small constant |
| instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{ |
| match(Set dst (MulL dst src)); |
| effect(KILL cr, TEMP tmp); |
| ins_cost(2*100+2*400); |
| size(12); |
| // Basic idea: lo(result) = lo(src * EAX) |
| // hi(result) = hi(src * EAX) + lo(src * EDX) |
| format %{ "IMUL $tmp,EDX,$src\n\t" |
| "MOV EDX,$src\n\t" |
| "MUL EDX\t# EDX*EAX -> EDX:EAX\n\t" |
| "ADD EDX,$tmp" %} |
| ins_encode( long_multiply_con( dst, src, tmp ) ); |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Integer DIV with Register |
| instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ |
| match(Set rax (DivI rax div)); |
| effect(KILL rdx, KILL cr); |
| size(26); |
| ins_cost(30*100+10*100); |
| format %{ "CMP EAX,0x80000000\n\t" |
| "JNE,s normal\n\t" |
| "XOR EDX,EDX\n\t" |
| "CMP ECX,-1\n\t" |
| "JE,s done\n" |
| "normal: CDQ\n\t" |
| "IDIV $div\n\t" |
| "done:" %} |
| opcode(0xF7, 0x7); /* Opcode F7 /7 */ |
| ins_encode( cdq_enc, OpcP, RegOpc(div) ); |
| ins_pipe( ialu_reg_reg_alu0 ); |
| %} |
| |
| // Divide Register Long |
| instruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{ |
| match(Set dst (DivL src1 src2)); |
| effect( KILL cr, KILL cx, KILL bx ); |
| ins_cost(10000); |
| format %{ "PUSH $src1.hi\n\t" |
| "PUSH $src1.lo\n\t" |
| "PUSH $src2.hi\n\t" |
| "PUSH $src2.lo\n\t" |
| "CALL SharedRuntime::ldiv\n\t" |
| "ADD ESP,16" %} |
| ins_encode( long_div(src1,src2) ); |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Integer DIVMOD with Register, both quotient and mod results |
| instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ |
| match(DivModI rax div); |
| effect(KILL cr); |
| size(26); |
| ins_cost(30*100+10*100); |
| format %{ "CMP EAX,0x80000000\n\t" |
| "JNE,s normal\n\t" |
| "XOR EDX,EDX\n\t" |
| "CMP ECX,-1\n\t" |
| "JE,s done\n" |
| "normal: CDQ\n\t" |
| "IDIV $div\n\t" |
| "done:" %} |
| opcode(0xF7, 0x7); /* Opcode F7 /7 */ |
| ins_encode( cdq_enc, OpcP, RegOpc(div) ); |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Integer MOD with Register |
| instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{ |
| match(Set rdx (ModI rax div)); |
| effect(KILL rax, KILL cr); |
| |
| size(26); |
| ins_cost(300); |
| format %{ "CDQ\n\t" |
| "IDIV $div" %} |
| opcode(0xF7, 0x7); /* Opcode F7 /7 */ |
| ins_encode( cdq_enc, OpcP, RegOpc(div) ); |
| ins_pipe( ialu_reg_reg_alu0 ); |
| %} |
| |
| // Remainder Register Long |
| instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{ |
| match(Set dst (ModL src1 src2)); |
| effect( KILL cr, KILL cx, KILL bx ); |
| ins_cost(10000); |
| format %{ "PUSH $src1.hi\n\t" |
| "PUSH $src1.lo\n\t" |
| "PUSH $src2.hi\n\t" |
| "PUSH $src2.lo\n\t" |
| "CALL SharedRuntime::lrem\n\t" |
| "ADD ESP,16" %} |
| ins_encode( long_mod(src1,src2) ); |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Divide Register Long (no special case since divisor != -1) |
| instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ |
| match(Set dst (DivL dst imm)); |
| effect( TEMP tmp, TEMP tmp2, KILL cr ); |
| ins_cost(1000); |
| format %{ "MOV $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t" |
| "XOR $tmp2,$tmp2\n\t" |
| "CMP $tmp,EDX\n\t" |
| "JA,s fast\n\t" |
| "MOV $tmp2,EAX\n\t" |
| "MOV EAX,EDX\n\t" |
| "MOV EDX,0\n\t" |
| "JLE,s pos\n\t" |
| "LNEG EAX : $tmp2\n\t" |
| "DIV $tmp # unsigned division\n\t" |
| "XCHG EAX,$tmp2\n\t" |
| "DIV $tmp\n\t" |
| "LNEG $tmp2 : EAX\n\t" |
| "JMP,s done\n" |
| "pos:\n\t" |
| "DIV $tmp\n\t" |
| "XCHG EAX,$tmp2\n" |
| "fast:\n\t" |
| "DIV $tmp\n" |
| "done:\n\t" |
| "MOV EDX,$tmp2\n\t" |
| "NEG EDX:EAX # if $imm < 0" %} |
| ins_encode %{ |
| int con = (int)$imm$$constant; |
| assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); |
| int pcon = (con > 0) ? con : -con; |
| Label Lfast, Lpos, Ldone; |
| |
| __ movl($tmp$$Register, pcon); |
| __ xorl($tmp2$$Register,$tmp2$$Register); |
| __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); |
| __ jccb(Assembler::above, Lfast); // result fits into 32 bit |
| |
| __ movl($tmp2$$Register, $dst$$Register); // save |
| __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); |
| __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags |
| __ jccb(Assembler::lessEqual, Lpos); // result is positive |
| |
| // Negative dividend. |
| // convert value to positive to use unsigned division |
| __ lneg($dst$$Register, $tmp2$$Register); |
| __ divl($tmp$$Register); |
| __ xchgl($dst$$Register, $tmp2$$Register); |
| __ divl($tmp$$Register); |
| // revert result back to negative |
| __ lneg($tmp2$$Register, $dst$$Register); |
| __ jmpb(Ldone); |
| |
| __ bind(Lpos); |
| __ divl($tmp$$Register); // Use unsigned division |
| __ xchgl($dst$$Register, $tmp2$$Register); |
| // Fallthrow for final divide, tmp2 has 32 bit hi result |
| |
| __ bind(Lfast); |
| // fast path: src is positive |
| __ divl($tmp$$Register); // Use unsigned division |
| |
| __ bind(Ldone); |
| __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register); |
| if (con < 0) { |
| __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register); |
| } |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Remainder Register Long (remainder fit into 32 bits) |
| instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ |
| match(Set dst (ModL dst imm)); |
| effect( TEMP tmp, TEMP tmp2, KILL cr ); |
| ins_cost(1000); |
| format %{ "MOV $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t" |
| "CMP $tmp,EDX\n\t" |
| "JA,s fast\n\t" |
| "MOV $tmp2,EAX\n\t" |
| "MOV EAX,EDX\n\t" |
| "MOV EDX,0\n\t" |
| "JLE,s pos\n\t" |
| "LNEG EAX : $tmp2\n\t" |
| "DIV $tmp # unsigned division\n\t" |
| "MOV EAX,$tmp2\n\t" |
| "DIV $tmp\n\t" |
| "NEG EDX\n\t" |
| "JMP,s done\n" |
| "pos:\n\t" |
| "DIV $tmp\n\t" |
| "MOV EAX,$tmp2\n" |
| "fast:\n\t" |
| "DIV $tmp\n" |
| "done:\n\t" |
| "MOV EAX,EDX\n\t" |
| "SAR EDX,31\n\t" %} |
| ins_encode %{ |
| int con = (int)$imm$$constant; |
| assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); |
| int pcon = (con > 0) ? con : -con; |
| Label Lfast, Lpos, Ldone; |
| |
| __ movl($tmp$$Register, pcon); |
| __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); |
| __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit |
| |
| __ movl($tmp2$$Register, $dst$$Register); // save |
| __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); |
| __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags |
| __ jccb(Assembler::lessEqual, Lpos); // result is positive |
| |
| // Negative dividend. |
| // convert value to positive to use unsigned division |
| __ lneg($dst$$Register, $tmp2$$Register); |
| __ divl($tmp$$Register); |
| __ movl($dst$$Register, $tmp2$$Register); |
| __ divl($tmp$$Register); |
| // revert remainder back to negative |
| __ negl(HIGH_FROM_LOW($dst$$Register)); |
| __ jmpb(Ldone); |
| |
| __ bind(Lpos); |
| __ divl($tmp$$Register); |
| __ movl($dst$$Register, $tmp2$$Register); |
| |
| __ bind(Lfast); |
| // fast path: src is positive |
| __ divl($tmp$$Register); |
| |
| __ bind(Ldone); |
| __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); |
| __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign |
| |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Integer Shift Instructions |
| // Shift Left by one |
| instruct shlI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ |
| match(Set dst (LShiftI dst shift)); |
| effect(KILL cr); |
| |
| size(2); |
| format %{ "SHL $dst,$shift" %} |
| opcode(0xD1, 0x4); /* D1 /4 */ |
| ins_encode( OpcP, RegOpc( dst ) ); |
| ins_pipe( ialu_reg ); |
| %} |
| |
| // Shift Left by 8-bit immediate |
| instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ |
| match(Set dst (LShiftI dst shift)); |
| effect(KILL cr); |
| |
| size(3); |
| format %{ "SHL $dst,$shift" %} |
| opcode(0xC1, 0x4); /* C1 /4 ib */ |
| ins_encode( RegOpcImm( dst, shift) ); |
| ins_pipe( ialu_reg ); |
| %} |
| |
| // Shift Left by variable |
| instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ |
| match(Set dst (LShiftI dst shift)); |
| effect(KILL cr); |
| |
| size(2); |
| format %{ "SHL $dst,$shift" %} |
| opcode(0xD3, 0x4); /* D3 /4 */ |
| ins_encode( OpcP, RegOpc( dst ) ); |
| ins_pipe( ialu_reg_reg ); |
| %} |
| |
| // Arithmetic shift right by one |
| instruct sarI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ |
| match(Set dst (RShiftI dst shift)); |
| effect(KILL cr); |
| |
| size(2); |
| format %{ "SAR $dst,$shift" %} |
| opcode(0xD1, 0x7); /* D1 /7 */ |
| ins_encode( OpcP, RegOpc( dst ) ); |
| ins_pipe( ialu_reg ); |
| %} |
| |
| // Arithmetic shift right by one |
| instruct sarI_mem_1(memory dst, immI1 shift, eFlagsReg cr) %{ |
| match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); |
| effect(KILL cr); |
| format %{ "SAR $dst,$shift" %} |
| opcode(0xD1, 0x7); /* D1 /7 */ |
| ins_encode( OpcP, RMopc_Mem(secondary,dst) ); |
| ins_pipe( ialu_mem_imm ); |
| %} |
| |
| // Arithmetic Shift Right by 8-bit immediate |
| instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ |
| match(Set dst (RShiftI dst shift)); |
| effect(KILL cr); |
| |
| size(3); |
| format %{ "SAR $dst,$shift" %} |
| opcode(0xC1, 0x7); /* C1 /7 ib */ |
| ins_encode( RegOpcImm( dst, shift ) ); |
| ins_pipe( ialu_mem_imm ); |
| %} |
| |
| // Arithmetic Shift Right by 8-bit immediate |
| instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{ |
| match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); |
| effect(KILL cr); |
| |
| format %{ "SAR $dst,$shift" %} |
| opcode(0xC1, 0x7); /* C1 /7 ib */ |
| ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) ); |
| ins_pipe( ialu_mem_imm ); |
| %} |
| |
| // Arithmetic Shift Right by variable |
| instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ |
| match(Set dst (RShiftI dst shift)); |
| effect(KILL cr); |
| |
| size(2); |
| format %{ "SAR $dst,$shift" %} |
| opcode(0xD3, 0x7); /* D3 /7 */ |
| ins_encode( OpcP, RegOpc( dst ) ); |
| ins_pipe( ialu_reg_reg ); |
| %} |
| |
| // Logical shift right by one |
| instruct shrI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ |
| match(Set dst (URShiftI dst shift)); |
| effect(KILL cr); |
| |
| size(2); |
| format %{ "SHR $dst,$shift" %} |
| opcode(0xD1, 0x5); /* D1 /5 */ |
| ins_encode( OpcP, RegOpc( dst ) ); |
| ins_pipe( ialu_reg ); |
| %} |
| |
| // Logical Shift Right by 8-bit immediate |
| instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ |
| match(Set dst (URShiftI dst shift)); |
| effect(KILL cr); |
| |
| size(3); |
| format %{ "SHR $dst,$shift" %} |
| opcode(0xC1, 0x5); /* C1 /5 ib */ |
| ins_encode( RegOpcImm( dst, shift) ); |
| ins_pipe( ialu_reg ); |
| %} |
| |
| |
| // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24. |
| // This idiom is used by the compiler for the i2b bytecode. |
| instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{ |
| match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour)); |
| |
| size(3); |
| format %{ "MOVSX $dst,$src :8" %} |
| ins_encode %{ |
| __ movsbl($dst$$Register, $src$$Register); |
| %} |
| ins_pipe(ialu_reg_reg); |
| %} |
| |
| // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16. |
| // This idiom is used by the compiler the i2s bytecode. |
| instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{ |
| match(Set dst (RShiftI (LShiftI src sixteen) sixteen)); |
| |
| size(3); |
| format %{ "MOVSX $dst,$src :16" %} |
| ins_encode %{ |
| __ movswl($dst$$Register, $src$$Register); |
| %} |
| ins_pipe(ialu_reg_reg); |
| %} |
| |
| |
| // Logical Shift Right by variable |
| instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ |
| match(Set dst (URShiftI dst shift)); |
| effect(KILL cr); |
| |
| size(2); |
| format %{ "SHR $dst,$shift" %} |
| opcode(0xD3, 0x5); /* D3 /5 */ |
| ins_encode( OpcP, RegOpc( dst ) ); |
| ins_pipe( ialu_reg_reg ); |
| %} |
| |
| |
| //----------Logical Instructions----------------------------------------------- |
| //----------Integer Logical Instructions--------------------------------------- |
| // And Instructions |
| // And Register with Register |
| instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ |
| match(Set dst (AndI dst src)); |
| effect(KILL cr); |
| |
| size(2); |
| format %{ "AND $dst,$src" %} |
| opcode(0x23); |
| ins_encode( OpcP, RegReg( dst, src) ); |
| ins_pipe( ialu_reg_reg ); |
| %} |
| |
| // And Register with Immediate |
| instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ |
| match(Set dst (AndI dst src)); |
| effect(KILL cr); |
| |
| format %{ "AND $dst,$src" %} |
| opcode(0x81,0x04); /* Opcode 81 /4 */ |
| // ins_encode( RegImm( dst, src) ); |
| ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); |
| ins_pipe( ialu_reg ); |
| %} |
| |
| // And Register with Memory |
| instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ |
| match(Set dst (AndI dst (LoadI src))); |
| effect(KILL cr); |
| |
| ins_cost(125); |
| format %{ "AND $dst,$src" %} |
| opcode(0x23); |
| ins_encode( OpcP, RegMem( dst, src) ); |
| ins_pipe( ialu_reg_mem ); |
| %} |
| |
| // And Memory with Register |
| instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ |
| match(Set dst (StoreI dst (AndI (LoadI dst) src))); |
| effect(KILL cr); |
| |
| ins_cost(150); |
| format %{ "AND $dst,$src" %} |
| opcode(0x21); /* Opcode 21 /r */ |
| ins_encode( OpcP, RegMem( src, dst ) ); |
| ins_pipe( ialu_mem_reg ); |
| %} |
| |
| // And Memory with Immediate |
| instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ |
| match(Set dst (StoreI dst (AndI (LoadI dst) src))); |
| effect(KILL cr); |
| |
| ins_cost(125); |
| format %{ "AND $dst,$src" %} |
| opcode(0x81, 0x4); /* Opcode 81 /4 id */ |
| // ins_encode( MemImm( dst, src) ); |
| ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); |
| ins_pipe( ialu_mem_imm ); |
| %} |
| |
| // BMI1 instructions |
| instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{ |
| match(Set dst (AndI (XorI src1 minus_1) src2)); |
| predicate(UseBMI1Instructions); |
| effect(KILL cr); |
| |
| format %{ "ANDNL $dst, $src1, $src2" %} |
| |
| ins_encode %{ |
| __ andnl($dst$$Register, $src1$$Register, $src2$$Register); |
| %} |
| ins_pipe(ialu_reg); |
| %} |
| |
| instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{ |
| match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) )); |
| predicate(UseBMI1Instructions); |
| effect(KILL cr); |
| |
| ins_cost(125); |
| format %{ "ANDNL $dst, $src1, $src2" %} |
| |
| ins_encode %{ |
| __ andnl($dst$$Register, $src1$$Register, $src2$$Address); |
| %} |
| ins_pipe(ialu_reg_mem); |
| %} |
| |
| instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, eFlagsReg cr) %{ |
| match(Set dst (AndI (SubI imm_zero src) src)); |
| predicate(UseBMI1Instructions); |
| effect(KILL cr); |
| |
| format %{ "BLSIL $dst, $src" %} |
| |
| ins_encode %{ |
| __ blsil($dst$$Register, $src$$Register); |
| %} |
| ins_pipe(ialu_reg); |
| %} |
| |
| instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, eFlagsReg cr) %{ |
| match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) )); |
| predicate(UseBMI1Instructions); |
| effect(KILL cr); |
| |
| ins_cost(125); |
| format %{ "BLSIL $dst, $src" %} |
| |
| ins_encode %{ |
| __ blsil($dst$$Register, $src$$Address); |
| %} |
| ins_pipe(ialu_reg_mem); |
| %} |
| |
| instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) |
| %{ |
| match(Set dst (XorI (AddI src minus_1) src)); |
| predicate(UseBMI1Instructions); |
| effect(KILL cr); |
| |
| format %{ "BLSMSKL $dst, $src" %} |
| |
| ins_encode %{ |
| __ blsmskl($dst$$Register, $src$$Register); |
| %} |
| |
| ins_pipe(ialu_reg); |
| %} |
| |
| instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) |
| %{ |
| match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) )); |
| predicate(UseBMI1Instructions); |
| effect(KILL cr); |
| |
| ins_cost(125); |
| format %{ "BLSMSKL $dst, $src" %} |
| |
| ins_encode %{ |
| __ blsmskl($dst$$Register, $src$$Address); |
| %} |
| |
| ins_pipe(ialu_reg_mem); |
| %} |
| |
| instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) |
| %{ |
| match(Set dst (AndI (AddI src minus_1) src) ); |
| predicate(UseBMI1Instructions); |
| effect(KILL cr); |
| |
| format %{ "BLSRL $dst, $src" %} |
| |
| ins_encode %{ |
| __ blsrl($dst$$Register, $src$$Register); |
| %} |
| |
| ins_pipe(ialu_reg); |
| %} |
| |
| instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) |
| %{ |
| match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) )); |
| predicate(UseBMI1Instructions); |
| effect(KILL cr); |
| |
| ins_cost(125); |
| format %{ "BLSRL $dst, $src" %} |
| |
| ins_encode %{ |
| __ blsrl($dst$$Register, $src$$Address); |
| %} |
| |
| ins_pipe(ialu_reg_mem); |
| %} |
| |
| // Or Instructions |
| // Or Register with Register |
| instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ |
| match(Set dst (OrI dst src)); |
| effect(KILL cr); |
| |
| size(2); |
| format %{ "OR $dst,$src" %} |
| opcode(0x0B); |
| ins_encode( OpcP, RegReg( dst, src) ); |
| ins_pipe( ialu_reg_reg ); |
| %} |
| |
| instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{ |
| match(Set dst (OrI dst (CastP2X src))); |
| effect(KILL cr); |
| |
| size(2); |
| format %{ "OR $dst,$src" %} |
| opcode(0x0B); |
| ins_encode( OpcP, RegReg( dst, src) ); |
| ins_pipe( ialu_reg_reg ); |
| %} |
| |
| |
| // Or Register with Immediate |
| instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ |
| match(Set dst (OrI dst src)); |
| effect(KILL cr); |
| |
| format %{ "OR $dst,$src" %} |
| opcode(0x81,0x01); /* Opcode 81 /1 id */ |
| // ins_encode( RegImm( dst, src) ); |
| ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); |
| ins_pipe( ialu_reg ); |
| %} |
| |
| // Or Register with Memory |
| instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ |
| match(Set dst (OrI dst (LoadI src))); |
| effect(KILL cr); |
| |
| ins_cost(125); |
| format %{ "OR $dst,$src" %} |
| opcode(0x0B); |
| ins_encode( OpcP, RegMem( dst, src) ); |
| ins_pipe( ialu_reg_mem ); |
| %} |
| |
| // Or Memory with Register |
| instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ |
| match(Set dst (StoreI dst (OrI (LoadI dst) src))); |
| effect(KILL cr); |
| |
| ins_cost(150); |
| format %{ "OR $dst,$src" %} |
| opcode(0x09); /* Opcode 09 /r */ |
| ins_encode( OpcP, RegMem( src, dst ) ); |
| ins_pipe( ialu_mem_reg ); |
| %} |
| |
| // Or Memory with Immediate |
| instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ |
| match(Set dst (StoreI dst (OrI (LoadI dst) src))); |
| effect(KILL cr); |
| |
| ins_cost(125); |
| format %{ "OR $dst,$src" %} |
| opcode(0x81,0x1); /* Opcode 81 /1 id */ |
| // ins_encode( MemImm( dst, src) ); |
| ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); |
| ins_pipe( ialu_mem_imm ); |
| %} |
| |
| // ROL/ROR |
| // ROL expand |
| instruct rolI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{ |
| effect(USE_DEF dst, USE shift, KILL cr); |
| |
| format %{ "ROL $dst, $shift" %} |
| opcode(0xD1, 0x0); /* Opcode D1 /0 */ |
| ins_encode( OpcP, RegOpc( dst )); |
| ins_pipe( ialu_reg ); |
| %} |
| |
| instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ |
| effect(USE_DEF dst, USE shift, KILL cr); |
| |
| format %{ "ROL $dst, $shift" %} |
| opcode(0xC1, 0x0); /*Opcode /C1 /0 */ |
| ins_encode( RegOpcImm(dst, shift) ); |
| ins_pipe(ialu_reg); |
| %} |
| |
| instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{ |
| effect(USE_DEF dst, USE shift, KILL cr); |
| |
| format %{ "ROL $dst, $shift" %} |
| opcode(0xD3, 0x0); /* Opcode D3 /0 */ |
| ins_encode(OpcP, RegOpc(dst)); |
| ins_pipe( ialu_reg_reg ); |
| %} |
| // end of ROL expand |
| |
| // ROL 32bit by one once |
| instruct rolI_eReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{ |
| match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); |
| |
| expand %{ |
| rolI_eReg_imm1(dst, lshift, cr); |
| %} |
| %} |
| |
| // ROL 32bit var by imm8 once |
| instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{ |
| predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); |
| match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); |
| |
| expand %{ |
| rolI_eReg_imm8(dst, lshift, cr); |
| %} |
| %} |
| |
| // ROL 32bit var by var once |
| instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{ |
| match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift)))); |
| |
| expand %{ |
| rolI_eReg_CL(dst, shift, cr); |
| %} |
| %} |
| |
| // ROL 32bit var by var once |
| instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ |
| match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift)))); |
| |
| expand %{ |
| rolI_eReg_CL(dst, shift, cr); |
| %} |
| %} |
| |
| // ROR expand |
| instruct rorI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{ |
| effect(USE_DEF dst, USE shift, KILL cr); |
| |
| format %{ "ROR $dst, $shift" %} |
| opcode(0xD1,0x1); /* Opcode D1 /1 */ |
| ins_encode( OpcP, RegOpc( dst ) ); |
| ins_pipe( ialu_reg ); |
| %} |
| |
| instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ |
| effect (USE_DEF dst, USE shift, KILL cr); |
| |
| format %{ "ROR $dst, $shift" %} |
| opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */ |
| ins_encode( RegOpcImm(dst, shift) ); |
| ins_pipe( ialu_reg ); |
| %} |
| |
| instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{ |
| effect(USE_DEF dst, USE shift, KILL cr); |
| |
| format %{ "ROR $dst, $shift" %} |
| opcode(0xD3, 0x1); /* Opcode D3 /1 */ |
| ins_encode(OpcP, RegOpc(dst)); |
| ins_pipe( ialu_reg_reg ); |
| %} |
| // end of ROR expand |
| |
| // ROR right once |
| instruct rorI_eReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{ |
| match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); |
| |
| expand %{ |
| rorI_eReg_imm1(dst, rshift, cr); |
| %} |
| %} |
| |
| // ROR 32bit by immI8 once |
| instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{ |
| predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); |
| match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); |
| |
| expand %{ |
| rorI_eReg_imm8(dst, rshift, cr); |
| %} |
| %} |
| |
| // ROR 32bit var by var once |
| instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{ |
| match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift)))); |
| |
| expand %{ |
| rorI_eReg_CL(dst, shift, cr); |
| %} |
| %} |
| |
| // ROR 32bit var by var once |
| instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ |
| match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift)))); |
| |
| expand %{ |
| rorI_eReg_CL(dst, shift, cr); |
| %} |
| %} |
| |
| // Xor Instructions |
| // Xor Register with Register |
| instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ |
| match(Set dst (XorI dst src)); |
| effect(KILL cr); |
| |
| size(2); |
| format %{ "XOR $dst,$src" %} |
| opcode(0x33); |
| ins_encode( OpcP, RegReg( dst, src) ); |
| ins_pipe( ialu_reg_reg ); |
| %} |
| |
| // Xor Register with Immediate -1 |
| instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{ |
| match(Set dst (XorI dst imm)); |
| |
| size(2); |
| format %{ "NOT $dst" %} |
| ins_encode %{ |
| __ notl($dst$$Register); |
| %} |
| ins_pipe( ialu_reg ); |
| %} |
| |
| // Xor Register with Immediate |
| instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ |
| match(Set dst (XorI dst src)); |
| effect(KILL cr); |
| |
| format %{ "XOR $dst,$src" %} |
| opcode(0x81,0x06); /* Opcode 81 /6 id */ |
| // ins_encode( RegImm( dst, src) ); |
| ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); |
| ins_pipe( ialu_reg ); |
| %} |
| |
| // Xor Register with Memory |
| instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ |
| match(Set dst (XorI dst (LoadI src))); |
| effect(KILL cr); |
| |
| ins_cost(125); |
| format %{ "XOR $dst,$src" %} |
| opcode(0x33); |
| ins_encode( OpcP, RegMem(dst, src) ); |
| ins_pipe( ialu_reg_mem ); |
| %} |
| |
| // Xor Memory with Register |
| instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ |
| match(Set dst (StoreI dst (XorI (LoadI dst) src))); |
| effect(KILL cr); |
| |
| ins_cost(150); |
| format %{ "XOR $dst,$src" %} |
| opcode(0x31); /* Opcode 31 /r */ |
| ins_encode( OpcP, RegMem( src, dst ) ); |
| ins_pipe( ialu_mem_reg ); |
| %} |
| |
| // Xor Memory with Immediate |
| instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ |
| match(Set dst (StoreI dst (XorI (LoadI dst) src))); |
| effect(KILL cr); |
| |
| ins_cost(125); |
| format %{ "XOR $dst,$src" %} |
| opcode(0x81,0x6); /* Opcode 81 /6 id */ |
| ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); |
| ins_pipe( ialu_mem_imm ); |
| %} |
| |
| //----------Convert Int to Boolean--------------------------------------------- |
| |
| instruct movI_nocopy(rRegI dst, rRegI src) %{ |
| effect( DEF dst, USE src ); |
| format %{ "MOV $dst,$src" %} |
| ins_encode( enc_Copy( dst, src) ); |
| ins_pipe( ialu_reg_reg ); |
| %} |
| |
| instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{ |
| effect( USE_DEF dst, USE src, KILL cr ); |
| |
| size(4); |
| format %{ "NEG $dst\n\t" |
| "ADC $dst,$src" %} |
| ins_encode( neg_reg(dst), |
| OpcRegReg(0x13,dst,src) ); |
| ins_pipe( ialu_reg_reg_long ); |
| %} |
| |
| instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{ |
| match(Set dst (Conv2B src)); |
| |
| expand %{ |
| movI_nocopy(dst,src); |
| ci2b(dst,src,cr); |
| %} |
| %} |
| |
| instruct movP_nocopy(rRegI dst, eRegP src) %{ |
| effect( DEF dst, USE src ); |
| format %{ "MOV $dst,$src" %} |
| ins_encode( enc_Copy( dst, src) ); |
| ins_pipe( ialu_reg_reg ); |
| %} |
| |
| instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{ |
| effect( USE_DEF dst, USE src, KILL cr ); |
| format %{ "NEG $dst\n\t" |
| "ADC $dst,$src" %} |
| ins_encode( neg_reg(dst), |
| OpcRegReg(0x13,dst,src) ); |
| ins_pipe( ialu_reg_reg_long ); |
| %} |
| |
| instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{ |
| match(Set dst (Conv2B src)); |
| |
| expand %{ |
| movP_nocopy(dst,src); |
| cp2b(dst,src,cr); |
| %} |
| %} |
| |
| instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{ |
| match(Set dst (CmpLTMask p q)); |
| effect(KILL cr); |
| ins_cost(400); |
| |
| // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination |
| format %{ "XOR $dst,$dst\n\t" |
| "CMP $p,$q\n\t" |
| "SETlt $dst\n\t" |
| "NEG $dst" %} |
| ins_encode %{ |
| Register Rp = $p$$Register; |
| Register Rq = $q$$Register; |
| Register Rd = $dst$$Register; |
| Label done; |
| __ xorl(Rd, Rd); |
| __ cmpl(Rp, Rq); |
| __ setb(Assembler::less, Rd); |
| __ negl(Rd); |
| %} |
| |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct cmpLTMask0(rRegI dst, immI0 zero, eFlagsReg cr) %{ |
| match(Set dst (CmpLTMask dst zero)); |
| effect(DEF dst, KILL cr); |
| ins_cost(100); |
| |
| format %{ "SAR $dst,31\t# cmpLTMask0" %} |
| ins_encode %{ |
| __ sarl($dst$$Register, 31); |
| %} |
| ins_pipe(ialu_reg); |
| %} |
| |
| /* better to save a register than avoid a branch */ |
| instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ |
| match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q))); |
| effect(KILL cr); |
| ins_cost(400); |
| format %{ "SUB $p,$q\t# cadd_cmpLTMask\n\t" |
| "JGE done\n\t" |
| "ADD $p,$y\n" |
| "done: " %} |
| ins_encode %{ |
| Register Rp = $p$$Register; |
| Register Rq = $q$$Register; |
| Register Ry = $y$$Register; |
| Label done; |
| __ subl(Rp, Rq); |
| __ jccb(Assembler::greaterEqual, done); |
| __ addl(Rp, Ry); |
| __ bind(done); |
| %} |
| |
| ins_pipe(pipe_cmplt); |
| %} |
| |
| /* better to save a register than avoid a branch */ |
| instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ |
| match(Set y (AndI (CmpLTMask p q) y)); |
| effect(KILL cr); |
| |
| ins_cost(300); |
| |
| format %{ "CMPL $p, $q\t# and_cmpLTMask\n\t" |
| "JLT done\n\t" |
| "XORL $y, $y\n" |
| "done: " %} |
| ins_encode %{ |
| Register Rp = $p$$Register; |
| Register Rq = $q$$Register; |
| Register Ry = $y$$Register; |
| Label done; |
| __ cmpl(Rp, Rq); |
| __ jccb(Assembler::less, done); |
| __ xorl(Ry, Ry); |
| __ bind(done); |
| %} |
| |
| ins_pipe(pipe_cmplt); |
| %} |
| |
| /* If I enable this, I encourage spilling in the inner loop of compress. |
| instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{ |
| match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q))); |
| */ |
| //----------Overflow Math Instructions----------------------------------------- |
| |
| instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2) |
| %{ |
| match(Set cr (OverflowAddI op1 op2)); |
| effect(DEF cr, USE_KILL op1, USE op2); |
| |
| format %{ "ADD $op1, $op2\t# overflow check int" %} |
| |
| ins_encode %{ |
| __ addl($op1$$Register, $op2$$Register); |
| %} |
| ins_pipe(ialu_reg_reg); |
| %} |
| |
| instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2) |
| %{ |
| match(Set cr (OverflowAddI op1 op2)); |
| effect(DEF cr, USE_KILL op1, USE op2); |
| |
| format %{ "ADD $op1, $op2\t# overflow check int" %} |
| |
| ins_encode %{ |
| __ addl($op1$$Register, $op2$$constant); |
| %} |
| ins_pipe(ialu_reg_reg); |
| %} |
| |
| instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2) |
| %{ |
| match(Set cr (OverflowSubI op1 op2)); |
| |
| format %{ "CMP $op1, $op2\t# overflow check int" %} |
| ins_encode %{ |
| __ cmpl($op1$$Register, $op2$$Register); |
| %} |
| ins_pipe(ialu_reg_reg); |
| %} |
| |
| instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2) |
| %{ |
| match(Set cr (OverflowSubI op1 op2)); |
| |
| format %{ "CMP $op1, $op2\t# overflow check int" %} |
| ins_encode %{ |
| __ cmpl($op1$$Register, $op2$$constant); |
| %} |
| ins_pipe(ialu_reg_reg); |
| %} |
| |
| instruct overflowNegI_rReg(eFlagsReg cr, immI0 zero, eAXRegI op2) |
| %{ |
| match(Set cr (OverflowSubI zero op2)); |
| effect(DEF cr, USE_KILL op2); |
| |
| format %{ "NEG $op2\t# overflow check int" %} |
| ins_encode %{ |
| __ negl($op2$$Register); |
| %} |
| ins_pipe(ialu_reg_reg); |
| %} |
| |
| instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2) |
| %{ |
| match(Set cr (OverflowMulI op1 op2)); |
| effect(DEF cr, USE_KILL op1, USE op2); |
| |
| format %{ "IMUL $op1, $op2\t# overflow check int" %} |
| ins_encode %{ |
| __ imull($op1$$Register, $op2$$Register); |
| %} |
| ins_pipe(ialu_reg_reg_alu0); |
| %} |
| |
| instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp) |
| %{ |
| match(Set cr (OverflowMulI op1 op2)); |
| effect(DEF cr, TEMP tmp, USE op1, USE op2); |
| |
| format %{ "IMUL $tmp, $op1, $op2\t# overflow check int" %} |
| ins_encode %{ |
| __ imull($tmp$$Register, $op1$$Register, $op2$$constant); |
| %} |
| ins_pipe(ialu_reg_reg_alu0); |
| %} |
| |
| //----------Long Instructions------------------------------------------------ |
| // Add Long Register with Register |
| instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ |
| match(Set dst (AddL dst src)); |
| effect(KILL cr); |
| ins_cost(200); |
| format %{ "ADD $dst.lo,$src.lo\n\t" |
| "ADC $dst.hi,$src.hi" %} |
| opcode(0x03, 0x13); |
| ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); |
| ins_pipe( ialu_reg_reg_long ); |
| %} |
| |
| // Add Long Register with Immediate |
| instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ |
| match(Set dst (AddL dst src)); |
| effect(KILL cr); |
| format %{ "ADD $dst.lo,$src.lo\n\t" |
| "ADC $dst.hi,$src.hi" %} |
| opcode(0x81,0x00,0x02); /* Opcode 81 /0, 81 /2 */ |
| ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); |
| ins_pipe( ialu_reg_long ); |
| %} |
| |
| // Add Long Register with Memory |
| instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ |
| match(Set dst (AddL dst (LoadL mem))); |
| effect(KILL cr); |
| ins_cost(125); |
| format %{ "ADD $dst.lo,$mem\n\t" |
| "ADC $dst.hi,$mem+4" %} |
| opcode(0x03, 0x13); |
| ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); |
| ins_pipe( ialu_reg_long_mem ); |
| %} |
| |
| // Subtract Long Register with Register. |
| instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ |
| match(Set dst (SubL dst src)); |
| effect(KILL cr); |
| ins_cost(200); |
| format %{ "SUB $dst.lo,$src.lo\n\t" |
| "SBB $dst.hi,$src.hi" %} |
| opcode(0x2B, 0x1B); |
| ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); |
| ins_pipe( ialu_reg_reg_long ); |
| %} |
| |
| // Subtract Long Register with Immediate |
| instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ |
| match(Set dst (SubL dst src)); |
| effect(KILL cr); |
| format %{ "SUB $dst.lo,$src.lo\n\t" |
| "SBB $dst.hi,$src.hi" %} |
| opcode(0x81,0x05,0x03); /* Opcode 81 /5, 81 /3 */ |
| ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); |
| ins_pipe( ialu_reg_long ); |
| %} |
| |
| // Subtract Long Register with Memory |
| instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ |
| match(Set dst (SubL dst (LoadL mem))); |
| effect(KILL cr); |
| ins_cost(125); |
| format %{ "SUB $dst.lo,$mem\n\t" |
| "SBB $dst.hi,$mem+4" %} |
| opcode(0x2B, 0x1B); |
| ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); |
| ins_pipe( ialu_reg_long_mem ); |
| %} |
| |
| instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{ |
| match(Set dst (SubL zero dst)); |
| effect(KILL cr); |
| ins_cost(300); |
| format %{ "NEG $dst.hi\n\tNEG $dst.lo\n\tSBB $dst.hi,0" %} |
| ins_encode( neg_long(dst) ); |
| ins_pipe( ialu_reg_reg_long ); |
| %} |
| |
| // And Long Register with Register |
| instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ |
| match(Set dst (AndL dst src)); |
| effect(KILL cr); |
| format %{ "AND $dst.lo,$src.lo\n\t" |
| "AND $dst.hi,$src.hi" %} |
| opcode(0x23,0x23); |
| ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); |
| ins_pipe( ialu_reg_reg_long ); |
| %} |
| |
| // And Long Register with Immediate |
| instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ |
| match(Set dst (AndL dst src)); |
| effect(KILL cr); |
| format %{ "AND $dst.lo,$src.lo\n\t" |
| "AND $dst.hi,$src.hi" %} |
| opcode(0x81,0x04,0x04); /* Opcode 81 /4, 81 /4 */ |
| ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); |
| ins_pipe( ialu_reg_long ); |
| %} |
| |
| // And Long Register with Memory |
| instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ |
| match(Set dst (AndL dst (LoadL mem))); |
| effect(KILL cr); |
| ins_cost(125); |
| format %{ "AND $dst.lo,$mem\n\t" |
| "AND $dst.hi,$mem+4" %} |
| opcode(0x23, 0x23); |
| ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); |
| ins_pipe( ialu_reg_long_mem ); |
| %} |
| |
| // BMI1 instructions |
| instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{ |
| match(Set dst (AndL (XorL src1 minus_1) src2)); |
| predicate(UseBMI1Instructions); |
| effect(KILL cr, TEMP dst); |
| |
| format %{ "ANDNL $dst.lo, $src1.lo, $src2.lo\n\t" |
| "ANDNL $dst.hi, $src1.hi, $src2.hi" |
| %} |
| |
| ins_encode %{ |
| Register Rdst = $dst$$Register; |
| Register Rsrc1 = $src1$$Register; |
| Register Rsrc2 = $src2$$Register; |
| __ andnl(Rdst, Rsrc1, Rsrc2); |
| __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2)); |
| %} |
| ins_pipe(ialu_reg_reg_long); |
| %} |
| |
| instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{ |
| match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) )); |
| predicate(UseBMI1Instructions); |
| effect(KILL cr, TEMP dst); |
| |
| ins_cost(125); |
| format %{ "ANDNL $dst.lo, $src1.lo, $src2\n\t" |
| "ANDNL $dst.hi, $src1.hi, $src2+4" |
| %} |
| |
| ins_encode %{ |
| Register Rdst = $dst$$Register; |
| Register Rsrc1 = $src1$$Register; |
| Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none); |
| |
| __ andnl(Rdst, Rsrc1, $src2$$Address); |
| __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi); |
| %} |
| ins_pipe(ialu_reg_mem); |
| %} |
| |
| instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{ |
| match(Set dst (AndL (SubL imm_zero src) src)); |
| predicate(UseBMI1Instructions); |
| effect(KILL cr, TEMP dst); |
| |
| format %{ "MOVL $dst.hi, 0\n\t" |
| "BLSIL $dst.lo, $src.lo\n\t" |
| "JNZ done\n\t" |
| "BLSIL $dst.hi, $src.hi\n" |
| "done:" |
| %} |
| |
| ins_encode %{ |
| Label done; |
| Register Rdst = $dst$$Register; |
| Register Rsrc = $src$$Register; |
| __ movl(HIGH_FROM_LOW(Rdst), 0); |
| __ blsil(Rdst, Rsrc); |
| __ jccb(Assembler::notZero, done); |
| __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); |
| __ bind(done); |
| %} |
| ins_pipe(ialu_reg); |
| %} |
| |
| instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{ |
| match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) )); |
| predicate(UseBMI1Instructions); |
| effect(KILL cr, TEMP dst); |
| |
| ins_cost(125); |
| format %{ "MOVL $dst.hi, 0\n\t" |
| "BLSIL $dst.lo, $src\n\t" |
| "JNZ done\n\t" |
| "BLSIL $dst.hi, $src+4\n" |
| "done:" |
| %} |
| |
| ins_encode %{ |
| Label done; |
| Register Rdst = $dst$$Register; |
| Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); |
| |
| __ movl(HIGH_FROM_LOW(Rdst), 0); |
| __ blsil(Rdst, $src$$Address); |
| __ jccb(Assembler::notZero, done); |
| __ blsil(HIGH_FROM_LOW(Rdst), src_hi); |
| __ bind(done); |
| %} |
| ins_pipe(ialu_reg_mem); |
| %} |
| |
| instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) |
| %{ |
| match(Set dst (XorL (AddL src minus_1) src)); |
| predicate(UseBMI1Instructions); |
| effect(KILL cr, TEMP dst); |
| |
| format %{ "MOVL $dst.hi, 0\n\t" |
| "BLSMSKL $dst.lo, $src.lo\n\t" |
| "JNC done\n\t" |
| "BLSMSKL $dst.hi, $src.hi\n" |
| "done:" |
| %} |
| |
| ins_encode %{ |
| Label done; |
| Register Rdst = $dst$$Register; |
| Register Rsrc = $src$$Register; |
| __ movl(HIGH_FROM_LOW(Rdst), 0); |
| __ blsmskl(Rdst, Rsrc); |
| __ jccb(Assembler::carryClear, done); |
| __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); |
| __ bind(done); |
| %} |
| |
| ins_pipe(ialu_reg); |
| %} |
| |
| instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) |
| %{ |
| match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) )); |
| predicate(UseBMI1Instructions); |
| effect(KILL cr, TEMP dst); |
| |
| ins_cost(125); |
| format %{ "MOVL $dst.hi, 0\n\t" |
| "BLSMSKL $dst.lo, $src\n\t" |
| "JNC done\n\t" |
| "BLSMSKL $dst.hi, $src+4\n" |
| "done:" |
| %} |
| |
| ins_encode %{ |
| Label done; |
| Register Rdst = $dst$$Register; |
| Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); |
| |
| __ movl(HIGH_FROM_LOW(Rdst), 0); |
| __ blsmskl(Rdst, $src$$Address); |
| __ jccb(Assembler::carryClear, done); |
| __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi); |
| __ bind(done); |
| %} |
| |
| ins_pipe(ialu_reg_mem); |
| %} |
| |
| instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) |
| %{ |
| match(Set dst (AndL (AddL src minus_1) src) ); |
| predicate(UseBMI1Instructions); |
| effect(KILL cr, TEMP dst); |
| |
| format %{ "MOVL $dst.hi, $src.hi\n\t" |
| "BLSRL $dst.lo, $src.lo\n\t" |
| "JNC done\n\t" |
| "BLSRL $dst.hi, $src.hi\n" |
| "done:" |
| %} |
| |
| ins_encode %{ |
| Label done; |
| Register Rdst = $dst$$Register; |
| Register Rsrc = $src$$Register; |
| __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); |
| __ blsrl(Rdst, Rsrc); |
| __ jccb(Assembler::carryClear, done); |
| __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); |
| __ bind(done); |
| %} |
| |
| ins_pipe(ialu_reg); |
| %} |
| |
| instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) |
| %{ |
| match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) )); |
| predicate(UseBMI1Instructions); |
| effect(KILL cr, TEMP dst); |
| |
| ins_cost(125); |
| format %{ "MOVL $dst.hi, $src+4\n\t" |
| "BLSRL $dst.lo, $src\n\t" |
| "JNC done\n\t" |
| "BLSRL $dst.hi, $src+4\n" |
| "done:" |
| %} |
| |
| ins_encode %{ |
| Label done; |
| Register Rdst = $dst$$Register; |
| Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); |
| __ movl(HIGH_FROM_LOW(Rdst), src_hi); |
| __ blsrl(Rdst, $src$$Address); |
| __ jccb(Assembler::carryClear, done); |
| __ blsrl(HIGH_FROM_LOW(Rdst), src_hi); |
| __ bind(done); |
| %} |
| |
| ins_pipe(ialu_reg_mem); |
| %} |
| |
| // Or Long Register with Register |
| instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ |
| match(Set dst (OrL dst src)); |
| effect(KILL cr); |
| format %{ "OR $dst.lo,$src.lo\n\t" |
| "OR $dst.hi,$src.hi" %} |
| opcode(0x0B,0x0B); |
| ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); |
| ins_pipe( ialu_reg_reg_long ); |
| %} |
| |
| // Or Long Register with Immediate |
| instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ |
| match(Set dst (OrL dst src)); |
| effect(KILL cr); |
| format %{ "OR $dst.lo,$src.lo\n\t" |
| "OR $dst.hi,$src.hi" %} |
| opcode(0x81,0x01,0x01); /* Opcode 81 /1, 81 /1 */ |
| ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); |
| ins_pipe( ialu_reg_long ); |
| %} |
| |
| // Or Long Register with Memory |
| instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ |
| match(Set dst (OrL dst (LoadL mem))); |
| effect(KILL cr); |
| ins_cost(125); |
| format %{ "OR $dst.lo,$mem\n\t" |
| "OR $dst.hi,$mem+4" %} |
| opcode(0x0B,0x0B); |
| ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); |
| ins_pipe( ialu_reg_long_mem ); |
| %} |
| |
| // Xor Long Register with Register |
| instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ |
| match(Set dst (XorL dst src)); |
| effect(KILL cr); |
| format %{ "XOR $dst.lo,$src.lo\n\t" |
| "XOR $dst.hi,$src.hi" %} |
| opcode(0x33,0x33); |
| ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); |
| ins_pipe( ialu_reg_reg_long ); |
| %} |
| |
| // Xor Long Register with Immediate -1 |
| instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{ |
| match(Set dst (XorL dst imm)); |
| format %{ "NOT $dst.lo\n\t" |
| "NOT $dst.hi" %} |
| ins_encode %{ |
| __ notl($dst$$Register); |
| __ notl(HIGH_FROM_LOW($dst$$Register)); |
| %} |
| ins_pipe( ialu_reg_long ); |
| %} |
| |
| // Xor Long Register with Immediate |
| instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ |
| match(Set dst (XorL dst src)); |
| effect(KILL cr); |
| format %{ "XOR $dst.lo,$src.lo\n\t" |
| "XOR $dst.hi,$src.hi" %} |
| opcode(0x81,0x06,0x06); /* Opcode 81 /6, 81 /6 */ |
| ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); |
| ins_pipe( ialu_reg_long ); |
| %} |
| |
| // Xor Long Register with Memory |
| instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ |
| match(Set dst (XorL dst (LoadL mem))); |
| effect(KILL cr); |
| ins_cost(125); |
| format %{ "XOR $dst.lo,$mem\n\t" |
| "XOR $dst.hi,$mem+4" %} |
| opcode(0x33,0x33); |
| ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); |
| ins_pipe( ialu_reg_long_mem ); |
| %} |
| |
| // Shift Left Long by 1 |
| instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{ |
| predicate(UseNewLongLShift); |
| match(Set dst (LShiftL dst cnt)); |
| effect(KILL cr); |
| ins_cost(100); |
| format %{ "ADD $dst.lo,$dst.lo\n\t" |
| "ADC $dst.hi,$dst.hi" %} |
| ins_encode %{ |
| __ addl($dst$$Register,$dst$$Register); |
| __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); |
| %} |
| ins_pipe( ialu_reg_long ); |
| %} |
| |
| // Shift Left Long by 2 |
| instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{ |
| predicate(UseNewLongLShift); |
| match(Set dst (LShiftL dst cnt)); |
| effect(KILL cr); |
| ins_cost(100); |
| format %{ "ADD $dst.lo,$dst.lo\n\t" |
| "ADC $dst.hi,$dst.hi\n\t" |
| "ADD $dst.lo,$dst.lo\n\t" |
| "ADC $dst.hi,$dst.hi" %} |
| ins_encode %{ |
| __ addl($dst$$Register,$dst$$Register); |
| __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); |
| __ addl($dst$$Register,$dst$$Register); |
| __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); |
| %} |
| ins_pipe( ialu_reg_long ); |
| %} |
| |
| // Shift Left Long by 3 |
| instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{ |
| predicate(UseNewLongLShift); |
| match(Set dst (LShiftL dst cnt)); |
| effect(KILL cr); |
| ins_cost(100); |
| format %{ "ADD $dst.lo,$dst.lo\n\t" |
| "ADC $dst.hi,$dst.hi\n\t" |
| "ADD $dst.lo,$dst.lo\n\t" |
| "ADC $dst.hi,$dst.hi\n\t" |
| "ADD $dst.lo,$dst.lo\n\t" |
| "ADC $dst.hi,$dst.hi" %} |
| ins_encode %{ |
| __ addl($dst$$Register,$dst$$Register); |
| __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); |
| __ addl($dst$$Register,$dst$$Register); |
| __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); |
| __ addl($dst$$Register,$dst$$Register); |
| __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); |
| %} |
| ins_pipe( ialu_reg_long ); |
| %} |
| |
| // Shift Left Long by 1-31 |
| instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ |
| match(Set dst (LShiftL dst cnt)); |
| effect(KILL cr); |
| ins_cost(200); |
| format %{ "SHLD $dst.hi,$dst.lo,$cnt\n\t" |
| "SHL $dst.lo,$cnt" %} |
| opcode(0xC1, 0x4, 0xA4); /* 0F/A4, then C1 /4 ib */ |
| ins_encode( move_long_small_shift(dst,cnt) ); |
| ins_pipe( ialu_reg_long ); |
| %} |
| |
| // Shift Left Long by 32-63 |
| instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ |
| match(Set dst (LShiftL dst cnt)); |
| effect(KILL cr); |
| ins_cost(300); |
| format %{ "MOV $dst.hi,$dst.lo\n" |
| "\tSHL $dst.hi,$cnt-32\n" |
| "\tXOR $dst.lo,$dst.lo" %} |
| opcode(0xC1, 0x4); /* C1 /4 ib */ |
| ins_encode( move_long_big_shift_clr(dst,cnt) ); |
| ins_pipe( ialu_reg_long ); |
| %} |
| |
| // Shift Left Long by variable |
| instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ |
| match(Set dst (LShiftL dst shift)); |
| effect(KILL cr); |
| ins_cost(500+200); |
| size(17); |
| format %{ "TEST $shift,32\n\t" |
| "JEQ,s small\n\t" |
| "MOV $dst.hi,$dst.lo\n\t" |
| "XOR $dst.lo,$dst.lo\n" |
| "small:\tSHLD $dst.hi,$dst.lo,$shift\n\t" |
| "SHL $dst.lo,$shift" %} |
| ins_encode( shift_left_long( dst, shift ) ); |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Shift Right Long by 1-31 |
| instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ |
| match(Set dst (URShiftL dst cnt)); |
| effect(KILL cr); |
| ins_cost(200); |
| format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" |
| "SHR $dst.hi,$cnt" %} |
| opcode(0xC1, 0x5, 0xAC); /* 0F/AC, then C1 /5 ib */ |
| ins_encode( move_long_small_shift(dst,cnt) ); |
| ins_pipe( ialu_reg_long ); |
| %} |
| |
| // Shift Right Long by 32-63 |
| instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ |
| match(Set dst (URShiftL dst cnt)); |
| effect(KILL cr); |
| ins_cost(300); |
| format %{ "MOV $dst.lo,$dst.hi\n" |
| "\tSHR $dst.lo,$cnt-32\n" |
| "\tXOR $dst.hi,$dst.hi" %} |
| opcode(0xC1, 0x5); /* C1 /5 ib */ |
| ins_encode( move_long_big_shift_clr(dst,cnt) ); |
| ins_pipe( ialu_reg_long ); |
| %} |
| |
| // Shift Right Long by variable |
| instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ |
| match(Set dst (URShiftL dst shift)); |
| effect(KILL cr); |
| ins_cost(600); |
| size(17); |
| format %{ "TEST $shift,32\n\t" |
| "JEQ,s small\n\t" |
| "MOV $dst.lo,$dst.hi\n\t" |
| "XOR $dst.hi,$dst.hi\n" |
| "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" |
| "SHR $dst.hi,$shift" %} |
| ins_encode( shift_right_long( dst, shift ) ); |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Shift Right Long by 1-31 |
| instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ |
| match(Set dst (RShiftL dst cnt)); |
| effect(KILL cr); |
| ins_cost(200); |
| format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" |
| "SAR $dst.hi,$cnt" %} |
| opcode(0xC1, 0x7, 0xAC); /* 0F/AC, then C1 /7 ib */ |
| ins_encode( move_long_small_shift(dst,cnt) ); |
| ins_pipe( ialu_reg_long ); |
| %} |
| |
| // Shift Right Long by 32-63 |
| instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ |
| match(Set dst (RShiftL dst cnt)); |
| effect(KILL cr); |
| ins_cost(300); |
| format %{ "MOV $dst.lo,$dst.hi\n" |
| "\tSAR $dst.lo,$cnt-32\n" |
| "\tSAR $dst.hi,31" %} |
| opcode(0xC1, 0x7); /* C1 /7 ib */ |
| ins_encode( move_long_big_shift_sign(dst,cnt) ); |
| ins_pipe( ialu_reg_long ); |
| %} |
| |
| // Shift Right arithmetic Long by variable |
| instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ |
| match(Set dst (RShiftL dst shift)); |
| effect(KILL cr); |
| ins_cost(600); |
| size(18); |
| format %{ "TEST $shift,32\n\t" |
| "JEQ,s small\n\t" |
| "MOV $dst.lo,$dst.hi\n\t" |
| "SAR $dst.hi,31\n" |
| "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" |
| "SAR $dst.hi,$shift" %} |
| ins_encode( shift_right_arith_long( dst, shift ) ); |
| ins_pipe( pipe_slow ); |
| %} |
| |
| |
| //----------Double Instructions------------------------------------------------ |
| // Double Math |
| |
| // Compare & branch |
| |
| // P6 version of float compare, sets condition codes in EFLAGS |
| instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ |
| predicate(VM_Version::supports_cmov() && UseSSE <=1); |
| match(Set cr (CmpD src1 src2)); |
| effect(KILL rax); |
| ins_cost(150); |
| format %{ "FLD $src1\n\t" |
| "FUCOMIP ST,$src2 // P6 instruction\n\t" |
| "JNP exit\n\t" |
| "MOV ah,1 // saw a NaN, set CF\n\t" |
| "SAHF\n" |
| "exit:\tNOP // avoid branch to branch" %} |
| opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ |
| ins_encode( Push_Reg_DPR(src1), |
| OpcP, RegOpc(src2), |
| cmpF_P6_fixup ); |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{ |
| predicate(VM_Version::supports_cmov() && UseSSE <=1); |
| match(Set cr (CmpD src1 src2)); |
| ins_cost(150); |
| format %{ "FLD $src1\n\t" |
| "FUCOMIP ST,$src2 // P6 instruction" %} |
| opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ |
| ins_encode( Push_Reg_DPR(src1), |
| OpcP, RegOpc(src2)); |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Compare & branch |
| instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ |
| predicate(UseSSE<=1); |
| match(Set cr (CmpD src1 src2)); |
| effect(KILL rax); |
| ins_cost(200); |
| format %{ "FLD $src1\n\t" |
| "FCOMp $src2\n\t" |
| "FNSTSW AX\n\t" |
| "TEST AX,0x400\n\t" |
| "JZ,s flags\n\t" |
| "MOV AH,1\t# unordered treat as LT\n" |
| "flags:\tSAHF" %} |
| opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ |
| ins_encode( Push_Reg_DPR(src1), |
| OpcP, RegOpc(src2), |
| fpu_flags); |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Compare vs zero into -1,0,1 |
| instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{ |
| predicate(UseSSE<=1); |
| match(Set dst (CmpD3 src1 zero)); |
| effect(KILL cr, KILL rax); |
| ins_cost(280); |
| format %{ "FTSTD $dst,$src1" %} |
| opcode(0xE4, 0xD9); |
| ins_encode( Push_Reg_DPR(src1), |
| OpcS, OpcP, PopFPU, |
| CmpF_Result(dst)); |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Compare into -1,0,1 |
| instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{ |
| predicate(UseSSE<=1); |
| match(Set dst (CmpD3 src1 src2)); |
| effect(KILL cr, KILL rax); |
| ins_cost(300); |
| format %{ "FCMPD $dst,$src1,$src2" %} |
| opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ |
| ins_encode( Push_Reg_DPR(src1), |
| OpcP, RegOpc(src2), |
| CmpF_Result(dst)); |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // float compare and set condition codes in EFLAGS by XMM regs |
| instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{ |
| predicate(UseSSE>=2); |
| match(Set cr (CmpD src1 src2)); |
| ins_cost(145); |
| format %{ "UCOMISD $src1,$src2\n\t" |
| "JNP,s exit\n\t" |
| "PUSHF\t# saw NaN, set CF\n\t" |
| "AND [rsp], #0xffffff2b\n\t" |
| "POPF\n" |
| "exit:" %} |
| ins_encode %{ |
| __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); |
| emit_cmpfp_fixup(_masm); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{ |
| predicate(UseSSE>=2); |
| match(Set cr (CmpD src1 src2)); |
| ins_cost(100); |
| format %{ "UCOMISD $src1,$src2" %} |
| ins_encode %{ |
| __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // float compare and set condition codes in EFLAGS by XMM regs |
| instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{ |
| predicate(UseSSE>=2); |
| match(Set cr (CmpD src1 (LoadD src2))); |
| ins_cost(145); |
| format %{ "UCOMISD $src1,$src2\n\t" |
| "JNP,s exit\n\t" |
| "PUSHF\t# saw NaN, set CF\n\t" |
| "AND [rsp], #0xffffff2b\n\t" |
| "POPF\n" |
| "exit:" %} |
| ins_encode %{ |
| __ ucomisd($src1$$XMMRegister, $src2$$Address); |
| emit_cmpfp_fixup(_masm); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{ |
| predicate(UseSSE>=2); |
| match(Set cr (CmpD src1 (LoadD src2))); |
| ins_cost(100); |
| format %{ "UCOMISD $src1,$src2" %} |
| ins_encode %{ |
| __ ucomisd($src1$$XMMRegister, $src2$$Address); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Compare into -1,0,1 in XMM |
| instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{ |
| predicate(UseSSE>=2); |
| match(Set dst (CmpD3 src1 src2)); |
| effect(KILL cr); |
| ins_cost(255); |
| format %{ "UCOMISD $src1, $src2\n\t" |
| "MOV $dst, #-1\n\t" |
| "JP,s done\n\t" |
| "JB,s done\n\t" |
| "SETNE $dst\n\t" |
| "MOVZB $dst, $dst\n" |
| "done:" %} |
| ins_encode %{ |
| __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); |
| emit_cmpfp3(_masm, $dst$$Register); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Compare into -1,0,1 in XMM and memory |
| instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{ |
| predicate(UseSSE>=2); |
| match(Set dst (CmpD3 src1 (LoadD src2))); |
| effect(KILL cr); |
| ins_cost(275); |
| format %{ "UCOMISD $src1, $src2\n\t" |
| "MOV $dst, #-1\n\t" |
| "JP,s done\n\t" |
| "JB,s done\n\t" |
| "SETNE $dst\n\t" |
| "MOVZB $dst, $dst\n" |
| "done:" %} |
| ins_encode %{ |
| __ ucomisd($src1$$XMMRegister, $src2$$Address); |
| emit_cmpfp3(_masm, $dst$$Register); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| |
| instruct subDPR_reg(regDPR dst, regDPR src) %{ |
| predicate (UseSSE <=1); |
| match(Set dst (SubD dst src)); |
| |
| format %{ "FLD $src\n\t" |
| "DSUBp $dst,ST" %} |
| opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ |
| ins_cost(150); |
| ins_encode( Push_Reg_DPR(src), |
| OpcP, RegOpc(dst) ); |
| ins_pipe( fpu_reg_reg ); |
| %} |
| |
| instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ |
| predicate (UseSSE <=1); |
| match(Set dst (RoundDouble (SubD src1 src2))); |
| ins_cost(250); |
| |
| format %{ "FLD $src2\n\t" |
| "DSUB ST,$src1\n\t" |
| "FSTP_D $dst\t# D-round" %} |
| opcode(0xD8, 0x5); |
| ins_encode( Push_Reg_DPR(src2), |
| OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); |
| ins_pipe( fpu_mem_reg_reg ); |
| %} |
| |
| |
| instruct subDPR_reg_mem(regDPR dst, memory src) %{ |
| predicate (UseSSE <=1); |
| match(Set dst (SubD dst (LoadD src))); |
| ins_cost(150); |
| |
| format %{ "FLD $src\n\t" |
| "DSUBp $dst,ST" %} |
| opcode(0xDE, 0x5, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ |
| ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), |
| OpcP, RegOpc(dst) ); |
| ins_pipe( fpu_reg_mem ); |
| %} |
| |
| instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{ |
| predicate (UseSSE<=1); |
| match(Set dst (AbsD src)); |
| ins_cost(100); |
| format %{ "FABS" %} |
| opcode(0xE1, 0xD9); |
| ins_encode( OpcS, OpcP ); |
| ins_pipe( fpu_reg_reg ); |
| %} |
| |
| instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{ |
| predicate(UseSSE<=1); |
| match(Set dst (NegD src)); |
| ins_cost(100); |
| format %{ "FCHS" %} |
| opcode(0xE0, 0xD9); |
| ins_encode( OpcS, OpcP ); |
| ins_pipe( fpu_reg_reg ); |
| %} |
| |
| instruct addDPR_reg(regDPR dst, regDPR src) %{ |
| predicate(UseSSE<=1); |
| match(Set dst (AddD dst src)); |
| format %{ "FLD $src\n\t" |
| "DADD $dst,ST" %} |
| size(4); |
| ins_cost(150); |
| opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ |
| ins_encode( Push_Reg_DPR(src), |
| OpcP, RegOpc(dst) ); |
| ins_pipe( fpu_reg_reg ); |
| %} |
| |
| |
| instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ |
| predicate(UseSSE<=1); |
| match(Set dst (RoundDouble (AddD src1 src2))); |
| ins_cost(250); |
| |
| format %{ "FLD $src2\n\t" |
| "DADD ST,$src1\n\t" |
| "FSTP_D $dst\t# D-round" %} |
| opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/ |
| ins_encode( Push_Reg_DPR(src2), |
| OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); |
| ins_pipe( fpu_mem_reg_reg ); |
| %} |
| |
| |
| instruct addDPR_reg_mem(regDPR dst, memory src) %{ |
| predicate(UseSSE<=1); |
| match(Set dst (AddD dst (LoadD src))); |
| ins_cost(150); |
| |
| format %{ "FLD $src\n\t" |
| "DADDp $dst,ST" %} |
| opcode(0xDE, 0x0, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ |
| ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), |
| OpcP, RegOpc(dst) ); |
| ins_pipe( fpu_reg_mem ); |
| %} |
| |
| // add-to-memory |
| instruct addDPR_mem_reg(memory dst, regDPR src) %{ |
| predicate(UseSSE<=1); |
| match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src)))); |
| ins_cost(150); |
| |
| format %{ "FLD_D $dst\n\t" |
| "DADD ST,$src\n\t" |
| "FST_D $dst" %} |
| opcode(0xDD, 0x0); |
| ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst), |
| Opcode(0xD8), RegOpc(src), |
| set_instruction_start, |
| Opcode(0xDD), RMopc_Mem(0x03,dst) ); |
| ins_pipe( fpu_reg_mem ); |
| %} |
| |
| instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{ |
| predicate(UseSSE<=1); |
| match(Set dst (AddD dst con)); |
| ins_cost(125); |
| format %{ "FLD1\n\t" |
| "DADDp $dst,ST" %} |
| ins_encode %{ |
| __ fld1(); |
| __ faddp($dst$$reg); |
| %} |
| ins_pipe(fpu_reg); |
| %} |
| |
| instruct addDPR_reg_imm(regDPR dst, immDPR con) %{ |
| predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); |
| match(Set dst (AddD dst con)); |
| ins_cost(200); |
| format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" |
| "DADDp $dst,ST" %} |
| ins_encode %{ |
| __ fld_d($constantaddress($con)); |
| __ faddp($dst$$reg); |
| %} |
| ins_pipe(fpu_reg_mem); |
| %} |
| |
| instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{ |
| predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 ); |
| match(Set dst (RoundDouble (AddD src con))); |
| ins_cost(200); |
| format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" |
| "DADD ST,$src\n\t" |
| "FSTP_D $dst\t# D-round" %} |
| ins_encode %{ |
| __ fld_d($constantaddress($con)); |
| __ fadd($src$$reg); |
| __ fstp_d(Address(rsp, $dst$$disp)); |
| %} |
| ins_pipe(fpu_mem_reg_con); |
| %} |
| |
| instruct mulDPR_reg(regDPR dst, regDPR src) %{ |
| predicate(UseSSE<=1); |
| match(Set dst (MulD dst src)); |
| format %{ "FLD $src\n\t" |
| "DMULp $dst,ST" %} |
| opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ |
| ins_cost(150); |
| ins_encode( Push_Reg_DPR(src), |
| OpcP, RegOpc(dst) ); |
| ins_pipe( fpu_reg_reg ); |
| %} |
| |
| // Strict FP instruction biases argument before multiply then |
| // biases result to avoid double rounding of subnormals. |
| // |
| // scale arg1 by multiplying arg1 by 2^(-15360) |
| // load arg2 |
| // multiply scaled arg1 by arg2 |
| // rescale product by 2^(15360) |
| // |
| instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{ |
| predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() ); |
| match(Set dst (MulD dst src)); |
| ins_cost(1); // Select this instruction for all strict FP double multiplies |
| |
| format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t" |
| "DMULp $dst,ST\n\t" |
| "FLD $src\n\t" |
| "DMULp $dst,ST\n\t" |
| "FLD StubRoutines::_fpu_subnormal_bias2\n\t" |
| "DMULp $dst,ST\n\t" %} |
| opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ |
| ins_encode( strictfp_bias1(dst), |
| Push_Reg_DPR(src), |
| OpcP, RegOpc(dst), |
| strictfp_bias2(dst) ); |
| ins_pipe( fpu_reg_reg ); |
| %} |
| |
| instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{ |
| predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); |
| match(Set dst (MulD dst con)); |
| ins_cost(200); |
| format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" |
| "DMULp $dst,ST" %} |
| ins_encode %{ |
| __ fld_d($constantaddress($con)); |
| __ fmulp($dst$$reg); |
| %} |
| ins_pipe(fpu_reg_mem); |
| %} |
| |
| |
| instruct mulDPR_reg_mem(regDPR dst, memory src) %{ |
| predicate( UseSSE<=1 ); |
| match(Set dst (MulD dst (LoadD src))); |
| ins_cost(200); |
| format %{ "FLD_D $src\n\t" |
| "DMULp $dst,ST" %} |
| opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/ /* LoadD DD /0 */ |
| ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), |
| OpcP, RegOpc(dst) ); |
| ins_pipe( fpu_reg_mem ); |
| %} |
| |
| // |
| // Cisc-alternate to reg-reg multiply |
| instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{ |
| predicate( UseSSE<=1 ); |
| match(Set dst (MulD src (LoadD mem))); |
| ins_cost(250); |
| format %{ "FLD_D $mem\n\t" |
| "DMUL ST,$src\n\t" |
| "FSTP_D $dst" %} |
| opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadD D9 /0 */ |
| ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem), |
| OpcReg_FPR(src), |
| Pop_Reg_DPR(dst) ); |
| ins_pipe( fpu_reg_reg_mem ); |
| %} |
| |
| |
| // MACRO3 -- addDPR a mulDPR |
| // This instruction is a '2-address' instruction in that the result goes |
| // back to src2. This eliminates a move from the macro; possibly the |
| // register allocator will have to add it back (and maybe not). |
| instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ |
| predicate( UseSSE<=1 ); |
| match(Set src2 (AddD (MulD src0 src1) src2)); |
| format %{ "FLD $src0\t# ===MACRO3d===\n\t" |
| "DMUL ST,$src1\n\t" |
| "DADDp $src2,ST" %} |
| ins_cost(250); |
| opcode(0xDD); /* LoadD DD /0 */ |
| ins_encode( Push_Reg_FPR(src0), |
| FMul_ST_reg(src1), |
| FAddP_reg_ST(src2) ); |
| ins_pipe( fpu_reg_reg_reg ); |
| %} |
| |
| |
| // MACRO3 -- subDPR a mulDPR |
| instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ |
| predicate( UseSSE<=1 ); |
| match(Set src2 (SubD (MulD src0 src1) src2)); |
| format %{ "FLD $src0\t# ===MACRO3d===\n\t" |
| "DMUL ST,$src1\n\t" |
| "DSUBRp $src2,ST" %} |
| ins_cost(250); |
| ins_encode( Push_Reg_FPR(src0), |
| FMul_ST_reg(src1), |
| Opcode(0xDE), Opc_plus(0xE0,src2)); |
| ins_pipe( fpu_reg_reg_reg ); |
| %} |
| |
| |
| instruct divDPR_reg(regDPR dst, regDPR src) %{ |
| predicate( UseSSE<=1 ); |
| match(Set dst (DivD dst src)); |
| |
| format %{ "FLD $src\n\t" |
| "FDIVp $dst,ST" %} |
| opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ |
| ins_cost(150); |
| ins_encode( Push_Reg_DPR(src), |
| OpcP, RegOpc(dst) ); |
| ins_pipe( fpu_reg_reg ); |
| %} |
| |
| // Strict FP instruction biases argument before division then |
| // biases result, to avoid double rounding of subnormals. |
| // |
| // scale dividend by multiplying dividend by 2^(-15360) |
| // load divisor |
| // divide scaled dividend by divisor |
| // rescale quotient by 2^(15360) |
| // |
| instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{ |
| predicate (UseSSE<=1); |
| match(Set dst (DivD dst src)); |
| predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() ); |
| ins_cost(01); |
| |
| format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t" |
| "DMULp $dst,ST\n\t" |
| "FLD $src\n\t" |
| "FDIVp $dst,ST\n\t" |
| "FLD StubRoutines::_fpu_subnormal_bias2\n\t" |
| "DMULp $dst,ST\n\t" %} |
| opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ |
| ins_encode( strictfp_bias1(dst), |
| Push_Reg_DPR(src), |
| OpcP, RegOpc(dst), |
| strictfp_bias2(dst) ); |
| ins_pipe( fpu_reg_reg ); |
| %} |
| |
| instruct divDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ |
| predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) ); |
| match(Set dst (RoundDouble (DivD src1 src2))); |
| |
| format %{ "FLD $src1\n\t" |
| "FDIV ST,$src2\n\t" |
| "FSTP_D $dst\t# D-round" %} |
| opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */ |
| ins_encode( Push_Reg_DPR(src1), |
| OpcP, RegOpc(src2), Pop_Mem_DPR(dst) ); |
| ins_pipe( fpu_mem_reg_reg ); |
| %} |
| |
| |
| instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{ |
| predicate(UseSSE<=1); |
| match(Set dst (ModD dst src)); |
| effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS |
| |
| format %{ "DMOD $dst,$src" %} |
| ins_cost(250); |
| ins_encode(Push_Reg_Mod_DPR(dst, src), |
| emitModDPR(), |
| Push_Result_Mod_DPR(src), |
| Pop_Reg_DPR(dst)); |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{ |
| predicate(UseSSE>=2); |
| match(Set dst (ModD src0 src1)); |
| effect(KILL rax, KILL cr); |
| |
| format %{ "SUB ESP,8\t # DMOD\n" |
| "\tMOVSD [ESP+0],$src1\n" |
| "\tFLD_D [ESP+0]\n" |
| "\tMOVSD [ESP+0],$src0\n" |
| "\tFLD_D [ESP+0]\n" |
| "loop:\tFPREM\n" |
| "\tFWAIT\n" |
| "\tFNSTSW AX\n" |
| "\tSAHF\n" |
| "\tJP loop\n" |
| "\tFSTP_D [ESP+0]\n" |
| "\tMOVSD $dst,[ESP+0]\n" |
| "\tADD ESP,8\n" |
| "\tFSTP ST0\t # Restore FPU Stack" |
| %} |
| ins_cost(250); |
| ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU); |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct sinDPR_reg(regDPR1 dst, regDPR1 src) %{ |
| predicate (UseSSE<=1); |
| match(Set dst (SinD src)); |
| ins_cost(1800); |
| format %{ "DSIN $dst" %} |
| opcode(0xD9, 0xFE); |
| ins_encode( OpcP, OpcS ); |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct sinD_reg(regD dst, eFlagsReg cr) %{ |
| predicate (UseSSE>=2); |
| match(Set dst (SinD dst)); |
| effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" |
| ins_cost(1800); |
| format %{ "DSIN $dst" %} |
| opcode(0xD9, 0xFE); |
| ins_encode( Push_SrcD(dst), OpcP, OpcS, Push_ResultD(dst) ); |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct cosDPR_reg(regDPR1 dst, regDPR1 src) %{ |
| predicate (UseSSE<=1); |
| match(Set dst (CosD src)); |
| ins_cost(1800); |
| format %{ "DCOS $dst" %} |
| opcode(0xD9, 0xFF); |
| ins_encode( OpcP, OpcS ); |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct cosD_reg(regD dst, eFlagsReg cr) %{ |
| predicate (UseSSE>=2); |
| match(Set dst (CosD dst)); |
| effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" |
| ins_cost(1800); |
| format %{ "DCOS $dst" %} |
| opcode(0xD9, 0xFF); |
| ins_encode( Push_SrcD(dst), OpcP, OpcS, Push_ResultD(dst) ); |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct tanDPR_reg(regDPR1 dst, regDPR1 src) %{ |
| predicate (UseSSE<=1); |
| match(Set dst(TanD src)); |
| format %{ "DTAN $dst" %} |
| ins_encode( Opcode(0xD9), Opcode(0xF2), // fptan |
| Opcode(0xDD), Opcode(0xD8)); // fstp st |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct tanD_reg(regD dst, eFlagsReg cr) %{ |
| predicate (UseSSE>=2); |
| match(Set dst(TanD dst)); |
| effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" |
| format %{ "DTAN $dst" %} |
| ins_encode( Push_SrcD(dst), |
| Opcode(0xD9), Opcode(0xF2), // fptan |
| Opcode(0xDD), Opcode(0xD8), // fstp st |
| Push_ResultD(dst) ); |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct atanDPR_reg(regDPR dst, regDPR src) %{ |
| predicate (UseSSE<=1); |
| match(Set dst(AtanD dst src)); |
| format %{ "DATA $dst,$src" %} |
| opcode(0xD9, 0xF3); |
| ins_encode( Push_Reg_DPR(src), |
| OpcP, OpcS, RegOpc(dst) ); |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{ |
| predicate (UseSSE>=2); |
| match(Set dst(AtanD dst src)); |
| effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" |
| format %{ "DATA $dst,$src" %} |
| opcode(0xD9, 0xF3); |
| ins_encode( Push_SrcD(src), |
| OpcP, OpcS, Push_ResultD(dst) ); |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct sqrtDPR_reg(regDPR dst, regDPR src) %{ |
| predicate (UseSSE<=1); |
| match(Set dst (SqrtD src)); |
| format %{ "DSQRT $dst,$src" %} |
| opcode(0xFA, 0xD9); |
| ins_encode( Push_Reg_DPR(src), |
| OpcS, OpcP, Pop_Reg_DPR(dst) ); |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct powDPR_reg(regDPR X, regDPR1 Y, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{ |
| predicate (UseSSE<=1); |
| match(Set Y (PowD X Y)); // Raise X to the Yth power |
| effect(KILL rax, KILL rdx, KILL rcx, KILL cr); |
| format %{ "fast_pow $X $Y -> $Y // KILL $rax, $rcx, $rdx" %} |
| ins_encode %{ |
| __ subptr(rsp, 8); |
| __ fld_s($X$$reg - 1); |
| __ fast_pow(); |
| __ addptr(rsp, 8); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct powD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{ |
| predicate (UseSSE>=2); |
| match(Set dst (PowD src0 src1)); // Raise src0 to the src1'th power |
| effect(KILL rax, KILL rdx, KILL rcx, KILL cr); |
| format %{ "fast_pow $src0 $src1 -> $dst // KILL $rax, $rcx, $rdx" %} |
| ins_encode %{ |
| __ subptr(rsp, 8); |
| __ movdbl(Address(rsp, 0), $src1$$XMMRegister); |
| __ fld_d(Address(rsp, 0)); |
| __ movdbl(Address(rsp, 0), $src0$$XMMRegister); |
| __ fld_d(Address(rsp, 0)); |
| __ fast_pow(); |
| __ fstp_d(Address(rsp, 0)); |
| __ movdbl($dst$$XMMRegister, Address(rsp, 0)); |
| __ addptr(rsp, 8); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| |
| instruct expDPR_reg(regDPR1 dpr1, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{ |
| predicate (UseSSE<=1); |
| match(Set dpr1 (ExpD dpr1)); |
| effect(KILL rax, KILL rcx, KILL rdx, KILL cr); |
| format %{ "fast_exp $dpr1 -> $dpr1 // KILL $rax, $rcx, $rdx" %} |
| ins_encode %{ |
| __ fast_exp(); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct expD_reg(regD dst, regD src, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{ |
| predicate (UseSSE>=2); |
| match(Set dst (ExpD src)); |
| effect(KILL rax, KILL rcx, KILL rdx, KILL cr); |
| format %{ "fast_exp $dst -> $src // KILL $rax, $rcx, $rdx" %} |
| ins_encode %{ |
| __ subptr(rsp, 8); |
| __ movdbl(Address(rsp, 0), $src$$XMMRegister); |
| __ fld_d(Address(rsp, 0)); |
| __ fast_exp(); |
| __ fstp_d(Address(rsp, 0)); |
| __ movdbl($dst$$XMMRegister, Address(rsp, 0)); |
| __ addptr(rsp, 8); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct log10DPR_reg(regDPR1 dst, regDPR1 src) %{ |
| predicate (UseSSE<=1); |
| // The source Double operand on FPU stack |
| match(Set dst (Log10D src)); |
| // fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number |
| // fxch ; swap ST(0) with ST(1) |
| // fyl2x ; compute log_10(2) * log_2(x) |
| format %{ "FLDLG2 \t\t\t#Log10\n\t" |
| "FXCH \n\t" |
| "FYL2X \t\t\t# Q=Log10*Log_2(x)" |
| %} |
| ins_encode( Opcode(0xD9), Opcode(0xEC), // fldlg2 |
| Opcode(0xD9), Opcode(0xC9), // fxch |
| Opcode(0xD9), Opcode(0xF1)); // fyl2x |
| |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct log10D_reg(regD dst, regD src, eFlagsReg cr) %{ |
| predicate (UseSSE>=2); |
| effect(KILL cr); |
| match(Set dst (Log10D src)); |
| // fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number |
| // fyl2x ; compute log_10(2) * log_2(x) |
| format %{ "FLDLG2 \t\t\t#Log10\n\t" |
| "FYL2X \t\t\t# Q=Log10*Log_2(x)" |
| %} |
| ins_encode( Opcode(0xD9), Opcode(0xEC), // fldlg2 |
| Push_SrcD(src), |
| Opcode(0xD9), Opcode(0xF1), // fyl2x |
| Push_ResultD(dst)); |
| |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct logDPR_reg(regDPR1 dst, regDPR1 src) %{ |
| predicate (UseSSE<=1); |
| // The source Double operand on FPU stack |
| match(Set dst (LogD src)); |
| // fldln2 ; push log_e(2) on the FPU stack; full 80-bit number |
| // fxch ; swap ST(0) with ST(1) |
| // fyl2x ; compute log_e(2) * log_2(x) |
| format %{ "FLDLN2 \t\t\t#Log_e\n\t" |
| "FXCH \n\t" |
| "FYL2X \t\t\t# Q=Log_e*Log_2(x)" |
| %} |
| ins_encode( Opcode(0xD9), Opcode(0xED), // fldln2 |
| Opcode(0xD9), Opcode(0xC9), // fxch |
| Opcode(0xD9), Opcode(0xF1)); // fyl2x |
| |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct logD_reg(regD dst, regD src, eFlagsReg cr) %{ |
| predicate (UseSSE>=2); |
| effect(KILL cr); |
| // The source and result Double operands in XMM registers |
| match(Set dst (LogD src)); |
| // fldln2 ; push log_e(2) on the FPU stack; full 80-bit number |
| // fyl2x ; compute log_e(2) * log_2(x) |
| format %{ "FLDLN2 \t\t\t#Log_e\n\t" |
| "FYL2X \t\t\t# Q=Log_e*Log_2(x)" |
| %} |
| ins_encode( Opcode(0xD9), Opcode(0xED), // fldln2 |
| Push_SrcD(src), |
| Opcode(0xD9), Opcode(0xF1), // fyl2x |
| Push_ResultD(dst)); |
| ins_pipe( pipe_slow ); |
| %} |
| |
| //-------------Float Instructions------------------------------- |
| // Float Math |
| |
| // Code for float compare: |
| // fcompp(); |
| // fwait(); fnstsw_ax(); |
| // sahf(); |
| // movl(dst, unordered_result); |
| // jcc(Assembler::parity, exit); |
| // movl(dst, less_result); |
| // jcc(Assembler::below, exit); |
| // movl(dst, equal_result); |
| // jcc(Assembler::equal, exit); |
| // movl(dst, greater_result); |
| // exit: |
| |
| // P6 version of float compare, sets condition codes in EFLAGS |
| instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ |
| predicate(VM_Version::supports_cmov() && UseSSE == 0); |
| match(Set cr (CmpF src1 src2)); |
| effect(KILL rax); |
| ins_cost(150); |
| format %{ "FLD $src1\n\t" |
| "FUCOMIP ST,$src2 // P6 instruction\n\t" |
| "JNP exit\n\t" |
| "MOV ah,1 // saw a NaN, set CF (treat as LT)\n\t" |
| "SAHF\n" |
| "exit:\tNOP // avoid branch to branch" %} |
| opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ |
| ins_encode( Push_Reg_DPR(src1), |
| OpcP, RegOpc(src2), |
| cmpF_P6_fixup ); |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{ |
| predicate(VM_Version::supports_cmov() && UseSSE == 0); |
| match(Set cr (CmpF src1 src2)); |
| ins_cost(100); |
| format %{ "FLD $src1\n\t" |
| "FUCOMIP ST,$src2 // P6 instruction" %} |
| opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ |
| ins_encode( Push_Reg_DPR(src1), |
| OpcP, RegOpc(src2)); |
| ins_pipe( pipe_slow ); |
| %} |
| |
| |
| // Compare & branch |
| instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ |
| predicate(UseSSE == 0); |
| match(Set cr (CmpF src1 src2)); |
| effect(KILL rax); |
| ins_cost(200); |
| format %{ "FLD $src1\n\t" |
| "FCOMp $src2\n\t" |
| "FNSTSW AX\n\t" |
| "TEST AX,0x400\n\t" |
| "JZ,s flags\n\t" |
| "MOV AH,1\t# unordered treat as LT\n" |
| "flags:\tSAHF" %} |
| opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ |
| ins_encode( Push_Reg_DPR(src1), |
| OpcP, RegOpc(src2), |
| fpu_flags); |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Compare vs zero into -1,0,1 |
| instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{ |
| predicate(UseSSE == 0); |
| match(Set dst (CmpF3 src1 zero)); |
| effect(KILL cr, KILL rax); |
| ins_cost(280); |
| format %{ "FTSTF $dst,$src1" %} |
| opcode(0xE4, 0xD9); |
| ins_encode( Push_Reg_DPR(src1), |
| OpcS, OpcP, PopFPU, |
| CmpF_Result(dst)); |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Compare into -1,0,1 |
| instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ |
| predicate(UseSSE == 0); |
| match(Set dst (CmpF3 src1 src2)); |
| effect(KILL cr, KILL rax); |
| ins_cost(300); |
| format %{ "FCMPF $dst,$src1,$src2" %} |
| opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ |
| ins_encode( Push_Reg_DPR(src1), |
| OpcP, RegOpc(src2), |
| CmpF_Result(dst)); |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // float compare and set condition codes in EFLAGS by XMM regs |
| instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{ |
| predicate(UseSSE>=1); |
| match(Set cr (CmpF src1 src2)); |
| ins_cost(145); |
| format %{ "UCOMISS $src1,$src2\n\t" |
| "JNP,s exit\n\t" |
| "PUSHF\t# saw NaN, set CF\n\t" |
| "AND [rsp], #0xffffff2b\n\t" |
| "POPF\n" |
| "exit:" %} |
| ins_encode %{ |
| __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); |
| emit_cmpfp_fixup(_masm); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{ |
| predicate(UseSSE>=1); |
| match(Set cr (CmpF src1 src2)); |
| ins_cost(100); |
| format %{ "UCOMISS $src1,$src2" %} |
| ins_encode %{ |
| __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // float compare and set condition codes in EFLAGS by XMM regs |
| instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{ |
| predicate(UseSSE>=1); |
| match(Set cr (CmpF src1 (LoadF src2))); |
| ins_cost(165); |
| format %{ "UCOMISS $src1,$src2\n\t" |
| "JNP,s exit\n\t" |
| "PUSHF\t# saw NaN, set CF\n\t" |
| "AND [rsp], #0xffffff2b\n\t" |
| "POPF\n" |
| "exit:" %} |
| ins_encode %{ |
| __ ucomiss($src1$$XMMRegister, $src2$$Address); |
| emit_cmpfp_fixup(_masm); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{ |
| predicate(UseSSE>=1); |
| match(Set cr (CmpF src1 (LoadF src2))); |
| ins_cost(100); |
| format %{ "UCOMISS $src1,$src2" %} |
| ins_encode %{ |
| __ ucomiss($src1$$XMMRegister, $src2$$Address); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Compare into -1,0,1 in XMM |
| instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{ |
| predicate(UseSSE>=1); |
| match(Set dst (CmpF3 src1 src2)); |
| effect(KILL cr); |
| ins_cost(255); |
| format %{ "UCOMISS $src1, $src2\n\t" |
| "MOV $dst, #-1\n\t" |
| "JP,s done\n\t" |
| "JB,s done\n\t" |
| "SETNE $dst\n\t" |
| "MOVZB $dst, $dst\n" |
| "done:" %} |
| ins_encode %{ |
| __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); |
| emit_cmpfp3(_masm, $dst$$Register); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Compare into -1,0,1 in XMM and memory |
| instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{ |
| predicate(UseSSE>=1); |
| match(Set dst (CmpF3 src1 (LoadF src2))); |
| effect(KILL cr); |
| ins_cost(275); |
| format %{ "UCOMISS $src1, $src2\n\t" |
| "MOV $dst, #-1\n\t" |
| "JP,s done\n\t" |
| "JB,s done\n\t" |
| "SETNE $dst\n\t" |
| "MOVZB $dst, $dst\n" |
| "done:" %} |
| ins_encode %{ |
| __ ucomiss($src1$$XMMRegister, $src2$$Address); |
| emit_cmpfp3(_masm, $dst$$Register); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Spill to obtain 24-bit precision |
| instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ |
| predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); |
| match(Set dst (SubF src1 src2)); |
| |
| format %{ "FSUB $dst,$src1 - $src2" %} |
| opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */ |
| ins_encode( Push_Reg_FPR(src1), |
| OpcReg_FPR(src2), |
| Pop_Mem_FPR(dst) ); |
| ins_pipe( fpu_mem_reg_reg ); |
| %} |
| // |
| // This instruction does not round to 24-bits |
| instruct subFPR_reg(regFPR dst, regFPR src) %{ |
| predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); |
| match(Set dst (SubF dst src)); |
| |
| format %{ "FSUB $dst,$src" %} |
| opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ |
| ins_encode( Push_Reg_FPR(src), |
| OpcP, RegOpc(dst) ); |
| ins_pipe( fpu_reg_reg ); |
| %} |
| |
| // Spill to obtain 24-bit precision |
| instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ |
| predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); |
| match(Set dst (AddF src1 src2)); |
| |
| format %{ "FADD $dst,$src1,$src2" %} |
| opcode(0xD8, 0x0); /* D8 C0+i */ |
| ins_encode( Push_Reg_FPR(src2), |
| OpcReg_FPR(src1), |
| Pop_Mem_FPR(dst) ); |
| ins_pipe( fpu_mem_reg_reg ); |
| %} |
| // |
| // This instruction does not round to 24-bits |
| instruct addFPR_reg(regFPR dst, regFPR src) %{ |
| predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); |
| match(Set dst (AddF dst src)); |
| |
| format %{ "FLD $src\n\t" |
| "FADDp $dst,ST" %} |
| opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ |
| ins_encode( Push_Reg_FPR(src), |
| OpcP, RegOpc(dst) ); |
| ins_pipe( fpu_reg_reg ); |
| %} |
| |
| instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{ |
| predicate(UseSSE==0); |
| match(Set dst (AbsF src)); |
| ins_cost(100); |
| format %{ "FABS" %} |
| opcode(0xE1, 0xD9); |
| ins_encode( OpcS, OpcP ); |
| ins_pipe( fpu_reg_reg ); |
| %} |
| |
| instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{ |
| predicate(UseSSE==0); |
| match(Set dst (NegF src)); |
| ins_cost(100); |
| format %{ "FCHS" %} |
| opcode(0xE0, 0xD9); |
| ins_encode( OpcS, OpcP ); |
| ins_pipe( fpu_reg_reg ); |
| %} |
| |
| // Cisc-alternate to addFPR_reg |
| // Spill to obtain 24-bit precision |
| instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ |
| predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); |
| match(Set dst (AddF src1 (LoadF src2))); |
| |
| format %{ "FLD $src2\n\t" |
| "FADD ST,$src1\n\t" |
| "FSTP_S $dst" %} |
| opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ |
| ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), |
| OpcReg_FPR(src1), |
| Pop_Mem_FPR(dst) ); |
| ins_pipe( fpu_mem_reg_mem ); |
| %} |
| // |
| // Cisc-alternate to addFPR_reg |
| // This instruction does not round to 24-bits |
| instruct addFPR_reg_mem(regFPR dst, memory src) %{ |
| predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); |
| match(Set dst (AddF dst (LoadF src))); |
| |
| format %{ "FADD $dst,$src" %} |
| opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/ /* LoadF D9 /0 */ |
| ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), |
| OpcP, RegOpc(dst) ); |
| ins_pipe( fpu_reg_mem ); |
| %} |
| |
| // // Following two instructions for _222_mpegaudio |
| // Spill to obtain 24-bit precision |
| instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{ |
| predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); |
| match(Set dst (AddF src1 src2)); |
| |
| format %{ "FADD $dst,$src1,$src2" %} |
| opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ |
| ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1), |
| OpcReg_FPR(src2), |
| Pop_Mem_FPR(dst) ); |
| ins_pipe( fpu_mem_reg_mem ); |
| %} |
| |
| // Cisc-spill variant |
| // Spill to obtain 24-bit precision |
| instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{ |
| predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); |
| match(Set dst (AddF src1 (LoadF src2))); |
| |
| format %{ "FADD $dst,$src1,$src2 cisc" %} |
| opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ |
| ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), |
| set_instruction_start, |
| OpcP, RMopc_Mem(secondary,src1), |
| Pop_Mem_FPR(dst) ); |
| ins_pipe( fpu_mem_mem_mem ); |
| %} |
| |
| // Spill to obtain 24-bit precision |
| instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ |
| predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); |
| match(Set dst (AddF src1 src2)); |
| |
| format %{ "FADD $dst,$src1,$src2" %} |
| opcode(0xD8, 0x0, 0xD9); /* D8 /0 */ /* LoadF D9 /0 */ |
| ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), |
| set_instruction_start, |
| OpcP, RMopc_Mem(secondary,src1), |
| Pop_Mem_FPR(dst) ); |
| ins_pipe( fpu_mem_mem_mem ); |
| %} |
| |
| |
| // Spill to obtain 24-bit precision |
| instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ |
| predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); |
| match(Set dst (AddF src con)); |
| format %{ "FLD $src\n\t" |
| "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" |
| "FSTP_S $dst" %} |
| ins_encode %{ |
| __ fld_s($src$$reg - 1); // FLD ST(i-1) |
| __ fadd_s($constantaddress($con)); |
| __ fstp_s(Address(rsp, $dst$$disp)); |
| %} |
| ins_pipe(fpu_mem_reg_con); |
| %} |
| // |
| // This instruction does not round to 24-bits |
| instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ |
| predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); |
| match(Set dst (AddF src con)); |
| format %{ "FLD $src\n\t" |
| "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" |
| "FSTP $dst" %} |
| ins_encode %{ |
| __ fld_s($src$$reg - 1); // FLD ST(i-1) |
| __ fadd_s($constantaddress($con)); |
| __ fstp_d($dst$$reg); |
| %} |
| ins_pipe(fpu_reg_reg_con); |
| %} |
| |
| // Spill to obtain 24-bit precision |
| instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ |
| predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); |
| match(Set dst (MulF src1 src2)); |
| |
| format %{ "FLD $src1\n\t" |
| "FMUL $src2\n\t" |
| "FSTP_S $dst" %} |
| opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */ |
| ins_encode( Push_Reg_FPR(src1), |
| OpcReg_FPR(src2), |
| Pop_Mem_FPR(dst) ); |
| ins_pipe( fpu_mem_reg_reg ); |
| %} |
| // |
| // This instruction does not round to 24-bits |
| instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{ |
| predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); |
| match(Set dst (MulF src1 src2)); |
| |
| format %{ "FLD $src1\n\t" |
| "FMUL $src2\n\t" |
| "FSTP_S $dst" %} |
| opcode(0xD8, 0x1); /* D8 C8+i */ |
| ins_encode( Push_Reg_FPR(src2), |
| OpcReg_FPR(src1), |
| Pop_Reg_FPR(dst) ); |
| ins_pipe( fpu_reg_reg_reg ); |
| %} |
| |
| |
| // Spill to obtain 24-bit precision |
| // Cisc-alternate to reg-reg multiply |
| instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ |
| predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); |
| match(Set dst (MulF src1 (LoadF src2))); |
| |
| format %{ "FLD_S $src2\n\t" |
| "FMUL $src1\n\t" |
| "FSTP_S $dst" %} |
| opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/ /* LoadF D9 /0 */ |
| ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), |
| OpcReg_FPR(src1), |
| Pop_Mem_FPR(dst) ); |
| ins_pipe( fpu_mem_reg_mem ); |
| %} |
| // |
| // This instruction does not round to 24-bits |
| // Cisc-alternate to reg-reg multiply |
| instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{ |
| predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); |
| match(Set dst (MulF src1 (LoadF src2))); |
| |
| format %{ "FMUL $dst,$src1,$src2" %} |
| opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadF D9 /0 */ |
| ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), |
| OpcReg_FPR(src1), |
| Pop_Reg_FPR(dst) ); |
| ins_pipe( fpu_reg_reg_mem ); |
| %} |
| |
| // Spill to obtain 24-bit precision |
| instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ |
| predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); |
| match(Set dst (MulF src1 src2)); |
| |
| format %{ "FMUL $dst,$src1,$src2" %} |
| opcode(0xD8, 0x1, 0xD9); /* D8 /1 */ /* LoadF D9 /0 */ |
| ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), |
| set_instruction_start, |
| OpcP, RMopc_Mem(secondary,src1), |
| Pop_Mem_FPR(dst) ); |
| ins_pipe( fpu_mem_mem_mem ); |
| %} |
| |
| // Spill to obtain 24-bit precision |
| instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ |
| predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); |
| match(Set dst (MulF src con)); |
| |
| format %{ "FLD $src\n\t" |
| "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" |
| "FSTP_S $dst" %} |
| ins_encode %{ |
| __ fld_s($src$$reg - 1); // FLD ST(i-1) |
| __ fmul_s($constantaddress($con)); |
| __ fstp_s(Address(rsp, $dst$$disp)); |
| %} |
| ins_pipe(fpu_mem_reg_con); |
| %} |
| // |
| // This instruction does not round to 24-bits |
| instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ |
| predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); |
| match(Set dst (MulF src con)); |
| |
| format %{ "FLD $src\n\t" |
| "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" |
| "FSTP $dst" %} |
| ins_encode %{ |
| __ fld_s($src$$reg - 1); // FLD ST(i-1) |
| __ fmul_s($constantaddress($con)); |
| __ fstp_d($dst$$reg); |
| %} |
| ins_pipe(fpu_reg_reg_con); |
| %} |
| |
| |
| // |
| // MACRO1 -- subsume unshared load into mulFPR |
| // This instruction does not round to 24-bits |
| instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{ |
| predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); |
| match(Set dst (MulF (LoadF mem1) src)); |
| |
| format %{ "FLD $mem1 ===MACRO1===\n\t" |
| "FMUL ST,$src\n\t" |
| "FSTP $dst" %} |
| opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */ /* LoadF D9 /0 */ |
| ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1), |
| OpcReg_FPR(src), |
| Pop_Reg_FPR(dst) ); |
| ins_pipe( fpu_reg_reg_mem ); |
| %} |
| // |
| // MACRO2 -- addFPR a mulFPR which subsumed an unshared load |
| // This instruction does not round to 24-bits |
| instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{ |
| predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); |
| match(Set dst (AddF (MulF (LoadF mem1) src1) src2)); |
| ins_cost(95); |
| |
| format %{ "FLD $mem1 ===MACRO2===\n\t" |
| "FMUL ST,$src1 subsume mulFPR left load\n\t" |
| "FADD ST,$src2\n\t" |
| "FSTP $dst" %} |
| opcode(0xD9); /* LoadF D9 /0 */ |
| ins_encode( OpcP, RMopc_Mem(0x00,mem1), |
| FMul_ST_reg(src1), |
| FAdd_ST_reg(src2), |
| Pop_Reg_FPR(dst) ); |
| ins_pipe( fpu_reg_mem_reg_reg ); |
| %} |
| |
| // MACRO3 -- addFPR a mulFPR |
| // This instruction does not round to 24-bits. It is a '2-address' |
| // instruction in that the result goes back to src2. This eliminates |
| // a move from the macro; possibly the register allocator will have |
| // to add it back (and maybe not). |
| instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{ |
| predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); |
| match(Set src2 (AddF (MulF src0 src1) src2)); |
| |
| format %{ "FLD $src0 ===MACRO3===\n\t" |
| "FMUL ST,$src1\n\t" |
| "FADDP $src2,ST" %} |
| opcode(0xD9); /* LoadF D9 /0 */ |
| ins_encode( Push_Reg_FPR(src0), |
| FMul_ST_reg(src1), |
| FAddP_reg_ST(src2) ); |
| ins_pipe( fpu_reg_reg_reg ); |
| %} |
| |
| // MACRO4 -- divFPR subFPR |
| // This instruction does not round to 24-bits |
| instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{ |
| predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); |
| match(Set dst (DivF (SubF src2 src1) src3)); |
| |
| format %{ "FLD $src2 ===MACRO4===\n\t" |
| "FSUB ST,$src1\n\t" |
| "FDIV ST,$src3\n\t" |
| "FSTP $dst" %} |
| opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ |
| ins_encode( Push_Reg_FPR(src2), |
| subFPR_divFPR_encode(src1,src3), |
| Pop_Reg_FPR(dst) ); |
| ins_pipe( fpu_reg_reg_reg_reg ); |
| %} |
| |
| // Spill to obtain 24-bit precision |
| instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ |
| predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); |
| match(Set dst (DivF src1 src2)); |
| |
| format %{ "FDIV $dst,$src1,$src2" %} |
| opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/ |
| ins_encode( Push_Reg_FPR(src1), |
| OpcReg_FPR(src2), |
| Pop_Mem_FPR(dst) ); |
| ins_pipe( fpu_mem_reg_reg ); |
| %} |
| // |
| // This instruction does not round to 24-bits |
| instruct divFPR_reg(regFPR dst, regFPR src) %{ |
| predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); |
| match(Set dst (DivF dst src)); |
| |
| format %{ "FDIV $dst,$src" %} |
| opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ |
| ins_encode( Push_Reg_FPR(src), |
| OpcP, RegOpc(dst) ); |
| ins_pipe( fpu_reg_reg ); |
| %} |
| |
| |
| // Spill to obtain 24-bit precision |
| instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ |
| predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); |
| match(Set dst (ModF src1 src2)); |
| effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS |
| |
| format %{ "FMOD $dst,$src1,$src2" %} |
| ins_encode( Push_Reg_Mod_DPR(src1, src2), |
| emitModDPR(), |
| Push_Result_Mod_DPR(src2), |
| Pop_Mem_FPR(dst)); |
| ins_pipe( pipe_slow ); |
| %} |
| // |
| // This instruction does not round to 24-bits |
| instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{ |
| predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); |
| match(Set dst (ModF dst src)); |
| effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS |
| |
| format %{ "FMOD $dst,$src" %} |
| ins_encode(Push_Reg_Mod_DPR(dst, src), |
| emitModDPR(), |
| Push_Result_Mod_DPR(src), |
| Pop_Reg_FPR(dst)); |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{ |
| predicate(UseSSE>=1); |
| match(Set dst (ModF src0 src1)); |
| effect(KILL rax, KILL cr); |
| format %{ "SUB ESP,4\t # FMOD\n" |
| "\tMOVSS [ESP+0],$src1\n" |
| "\tFLD_S [ESP+0]\n" |
| "\tMOVSS [ESP+0],$src0\n" |
| "\tFLD_S [ESP+0]\n" |
| "loop:\tFPREM\n" |
| "\tFWAIT\n" |
| "\tFNSTSW AX\n" |
| "\tSAHF\n" |
| "\tJP loop\n" |
| "\tFSTP_S [ESP+0]\n" |
| "\tMOVSS $dst,[ESP+0]\n" |
| "\tADD ESP,4\n" |
| "\tFSTP ST0\t # Restore FPU Stack" |
| %} |
| ins_cost(250); |
| ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU); |
| ins_pipe( pipe_slow ); |
| %} |
| |
| |
| //----------Arithmetic Conversion Instructions--------------------------------- |
| // The conversions operations are all Alpha sorted. Please keep it that way! |
| |
| instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{ |
| predicate(UseSSE==0); |
| match(Set dst (RoundFloat src)); |
| ins_cost(125); |
| format %{ "FST_S $dst,$src\t# F-round" %} |
| ins_encode( Pop_Mem_Reg_FPR(dst, src) ); |
| ins_pipe( fpu_mem_reg ); |
| %} |
| |
| instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{ |
| predicate(UseSSE<=1); |
| match(Set dst (RoundDouble src)); |
| ins_cost(125); |
| format %{ "FST_D $dst,$src\t# D-round" %} |
| ins_encode( Pop_Mem_Reg_DPR(dst, src) ); |
| ins_pipe( fpu_mem_reg ); |
| %} |
| |
| // Force rounding to 24-bit precision and 6-bit exponent |
| instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{ |
| predicate(UseSSE==0); |
| match(Set dst (ConvD2F src)); |
| format %{ "FST_S $dst,$src\t# F-round" %} |
| expand %{ |
| roundFloat_mem_reg(dst,src); |
| %} |
| %} |
| |
| // Force rounding to 24-bit precision and 6-bit exponent |
| instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{ |
| predicate(UseSSE==1); |
| match(Set dst (ConvD2F src)); |
| effect( KILL cr ); |
| format %{ "SUB ESP,4\n\t" |
| "FST_S [ESP],$src\t# F-round\n\t" |
| "MOVSS $dst,[ESP]\n\t" |
| "ADD ESP,4" %} |
| ins_encode %{ |
| __ subptr(rsp, 4); |
| if ($src$$reg != FPR1L_enc) { |
| __ fld_s($src$$reg-1); |
| __ fstp_s(Address(rsp, 0)); |
| } else { |
| __ fst_s(Address(rsp, 0)); |
| } |
| __ movflt($dst$$XMMRegister, Address(rsp, 0)); |
| __ addptr(rsp, 4); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Force rounding double precision to single precision |
| instruct convD2F_reg(regF dst, regD src) %{ |
| predicate(UseSSE>=2); |
| match(Set dst (ConvD2F src)); |
| format %{ "CVTSD2SS $dst,$src\t# F-round" %} |
| ins_encode %{ |
| __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{ |
| predicate(UseSSE==0); |
| match(Set dst (ConvF2D src)); |
| format %{ "FST_S $dst,$src\t# D-round" %} |
| ins_encode( Pop_Reg_Reg_DPR(dst, src)); |
| ins_pipe( fpu_reg_reg ); |
| %} |
| |
| instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{ |
| predicate(UseSSE==1); |
| match(Set dst (ConvF2D src)); |
| format %{ "FST_D $dst,$src\t# D-round" %} |
| expand %{ |
| roundDouble_mem_reg(dst,src); |
| %} |
| %} |
| |
| instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{ |
| predicate(UseSSE==1); |
| match(Set dst (ConvF2D src)); |
| effect( KILL cr ); |
| format %{ "SUB ESP,4\n\t" |
| "MOVSS [ESP] $src\n\t" |
| "FLD_S [ESP]\n\t" |
| "ADD ESP,4\n\t" |
| "FSTP $dst\t# D-round" %} |
| ins_encode %{ |
| __ subptr(rsp, 4); |
| __ movflt(Address(rsp, 0), $src$$XMMRegister); |
| __ fld_s(Address(rsp, 0)); |
| __ addptr(rsp, 4); |
| __ fstp_d($dst$$reg); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct convF2D_reg(regD dst, regF src) %{ |
| predicate(UseSSE>=2); |
| match(Set dst (ConvF2D src)); |
| format %{ "CVTSS2SD $dst,$src\t# D-round" %} |
| ins_encode %{ |
| __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Convert a double to an int. If the double is a NAN, stuff a zero in instead. |
| instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{ |
| predicate(UseSSE<=1); |
| match(Set dst (ConvD2I src)); |
| effect( KILL tmp, KILL cr ); |
| format %{ "FLD $src\t# Convert double to int \n\t" |
| "FLDCW trunc mode\n\t" |
| "SUB ESP,4\n\t" |
| "FISTp [ESP + #0]\n\t" |
| "FLDCW std/24-bit mode\n\t" |
| "POP EAX\n\t" |
| "CMP EAX,0x80000000\n\t" |
| "JNE,s fast\n\t" |
| "FLD_D $src\n\t" |
| "CALL d2i_wrapper\n" |
| "fast:" %} |
| ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) ); |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Convert a double to an int. If the double is a NAN, stuff a zero in instead. |
| instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{ |
| predicate(UseSSE>=2); |
| match(Set dst (ConvD2I src)); |
| effect( KILL tmp, KILL cr ); |
| format %{ "CVTTSD2SI $dst, $src\n\t" |
| "CMP $dst,0x80000000\n\t" |
| "JNE,s fast\n\t" |
| "SUB ESP, 8\n\t" |
| "MOVSD [ESP], $src\n\t" |
| "FLD_D [ESP]\n\t" |
| "ADD ESP, 8\n\t" |
| "CALL d2i_wrapper\n" |
| "fast:" %} |
| ins_encode %{ |
| Label fast; |
| __ cvttsd2sil($dst$$Register, $src$$XMMRegister); |
| __ cmpl($dst$$Register, 0x80000000); |
| __ jccb(Assembler::notEqual, fast); |
| __ subptr(rsp, 8); |
| __ movdbl(Address(rsp, 0), $src$$XMMRegister); |
| __ fld_d(Address(rsp, 0)); |
| __ addptr(rsp, 8); |
| __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper()))); |
| __ bind(fast); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{ |
| predicate(UseSSE<=1); |
| match(Set dst (ConvD2L src)); |
| effect( KILL cr ); |
| format %{ "FLD $src\t# Convert double to long\n\t" |
| "FLDCW trunc mode\n\t" |
| "SUB ESP,8\n\t" |
| "FISTp [ESP + #0]\n\t" |
| "FLDCW std/24-bit mode\n\t" |
| "POP EAX\n\t" |
| "POP EDX\n\t" |
| "CMP EDX,0x80000000\n\t" |
| "JNE,s fast\n\t" |
| "TEST EAX,EAX\n\t" |
| "JNE,s fast\n\t" |
| "FLD $src\n\t" |
| "CALL d2l_wrapper\n" |
| "fast:" %} |
| ins_encode( Push_Reg_DPR(src), DPR2L_encoding(src) ); |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // XMM lacks a float/double->long conversion, so use the old FPU stack. |
| instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{ |
| predicate (UseSSE>=2); |
| match(Set dst (ConvD2L src)); |
| effect( KILL cr ); |
| format %{ "SUB ESP,8\t# Convert double to long\n\t" |
| "MOVSD [ESP],$src\n\t" |
| "FLD_D [ESP]\n\t" |
| "FLDCW trunc mode\n\t" |
| "FISTp [ESP + #0]\n\t" |
| "FLDCW std/24-bit mode\n\t" |
| "POP EAX\n\t" |
| "POP EDX\n\t" |
| "CMP EDX,0x80000000\n\t" |
| "JNE,s fast\n\t" |
| "TEST EAX,EAX\n\t" |
| "JNE,s fast\n\t" |
| "SUB ESP,8\n\t" |
| "MOVSD [ESP],$src\n\t" |
| "FLD_D [ESP]\n\t" |
| "ADD ESP,8\n\t" |
| "CALL d2l_wrapper\n" |
| "fast:" %} |
| ins_encode %{ |
| Label fast; |
| __ subptr(rsp, 8); |
| __ movdbl(Address(rsp, 0), $src$$XMMRegister); |
| __ fld_d(Address(rsp, 0)); |
| __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc())); |
| __ fistp_d(Address(rsp, 0)); |
| // Restore the rounding mode, mask the exception |
| if (Compile::current()->in_24_bit_fp_mode()) { |
| __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); |
| } else { |
| __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); |
| } |
| // Load the converted long, adjust CPU stack |
| __ pop(rax); |
| __ pop(rdx); |
| __ cmpl(rdx, 0x80000000); |
| __ jccb(Assembler::notEqual, fast); |
| __ testl(rax, rax); |
| __ jccb(Assembler::notEqual, fast); |
| __ subptr(rsp, 8); |
| __ movdbl(Address(rsp, 0), $src$$XMMRegister); |
| __ fld_d(Address(rsp, 0)); |
| __ addptr(rsp, 8); |
| __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper()))); |
| __ bind(fast); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Convert a double to an int. Java semantics require we do complex |
| // manglations in the corner cases. So we set the rounding mode to |
| // 'zero', store the darned double down as an int, and reset the |
| // rounding mode to 'nearest'. The hardware stores a flag value down |
| // if we would overflow or converted a NAN; we check for this and |
| // and go the slow path if needed. |
| instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{ |
| predicate(UseSSE==0); |
| match(Set dst (ConvF2I src)); |
| effect( KILL tmp, KILL cr ); |
| format %{ "FLD $src\t# Convert float to int \n\t" |
| "FLDCW trunc mode\n\t" |
| "SUB ESP,4\n\t" |
| "FISTp [ESP + #0]\n\t" |
| "FLDCW std/24-bit mode\n\t" |
| "POP EAX\n\t" |
| "CMP EAX,0x80000000\n\t" |
| "JNE,s fast\n\t" |
| "FLD $src\n\t" |
| "CALL d2i_wrapper\n" |
| "fast:" %} |
| // DPR2I_encoding works for FPR2I |
| ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) ); |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Convert a float in xmm to an int reg. |
| instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{ |
| predicate(UseSSE>=1); |
| match(Set dst (ConvF2I src)); |
| effect( KILL tmp, KILL cr ); |
| format %{ "CVTTSS2SI $dst, $src\n\t" |
| "CMP $dst,0x80000000\n\t" |
| "JNE,s fast\n\t" |
| "SUB ESP, 4\n\t" |
| "MOVSS [ESP], $src\n\t" |
| "FLD [ESP]\n\t" |
| "ADD ESP, 4\n\t" |
| "CALL d2i_wrapper\n" |
| "fast:" %} |
| ins_encode %{ |
| Label fast; |
| __ cvttss2sil($dst$$Register, $src$$XMMRegister); |
| __ cmpl($dst$$Register, 0x80000000); |
| __ jccb(Assembler::notEqual, fast); |
| __ subptr(rsp, 4); |
| __ movflt(Address(rsp, 0), $src$$XMMRegister); |
| __ fld_s(Address(rsp, 0)); |
| __ addptr(rsp, 4); |
| __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper()))); |
| __ bind(fast); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{ |
| predicate(UseSSE==0); |
| match(Set dst (ConvF2L src)); |
| effect( KILL cr ); |
| format %{ "FLD $src\t# Convert float to long\n\t" |
| "FLDCW trunc mode\n\t" |
| "SUB ESP,8\n\t" |
| "FISTp [ESP + #0]\n\t" |
| "FLDCW std/24-bit mode\n\t" |
| "POP EAX\n\t" |
| "POP EDX\n\t" |
| "CMP EDX,0x80000000\n\t" |
| "JNE,s fast\n\t" |
| "TEST EAX,EAX\n\t" |
| "JNE,s fast\n\t" |
| "FLD $src\n\t" |
| "CALL d2l_wrapper\n" |
| "fast:" %} |
| // DPR2L_encoding works for FPR2L |
| ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) ); |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // XMM lacks a float/double->long conversion, so use the old FPU stack. |
| instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{ |
| predicate (UseSSE>=1); |
| match(Set dst (ConvF2L src)); |
| effect( KILL cr ); |
| format %{ "SUB ESP,8\t# Convert float to long\n\t" |
| "MOVSS [ESP],$src\n\t" |
| "FLD_S [ESP]\n\t" |
| "FLDCW trunc mode\n\t" |
| "FISTp [ESP + #0]\n\t" |
| "FLDCW std/24-bit mode\n\t" |
| "POP EAX\n\t" |
| "POP EDX\n\t" |
| "CMP EDX,0x80000000\n\t" |
| "JNE,s fast\n\t" |
| "TEST EAX,EAX\n\t" |
| "JNE,s fast\n\t" |
| "SUB ESP,4\t# Convert float to long\n\t" |
| "MOVSS [ESP],$src\n\t" |
| "FLD_S [ESP]\n\t" |
| "ADD ESP,4\n\t" |
| "CALL d2l_wrapper\n" |
| "fast:" %} |
| ins_encode %{ |
| Label fast; |
| __ subptr(rsp, 8); |
| __ movflt(Address(rsp, 0), $src$$XMMRegister); |
| __ fld_s(Address(rsp, 0)); |
| __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc())); |
| __ fistp_d(Address(rsp, 0)); |
| // Restore the rounding mode, mask the exception |
| if (Compile::current()->in_24_bit_fp_mode()) { |
| __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); |
| } else { |
| __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); |
| } |
| // Load the converted long, adjust CPU stack |
| __ pop(rax); |
| __ pop(rdx); |
| __ cmpl(rdx, 0x80000000); |
| __ jccb(Assembler::notEqual, fast); |
| __ testl(rax, rax); |
| __ jccb(Assembler::notEqual, fast); |
| __ subptr(rsp, 4); |
| __ movflt(Address(rsp, 0), $src$$XMMRegister); |
| __ fld_s(Address(rsp, 0)); |
| __ addptr(rsp, 4); |
| __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper()))); |
| __ bind(fast); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{ |
| predicate( UseSSE<=1 ); |
| match(Set dst (ConvI2D src)); |
| format %{ "FILD $src\n\t" |
| "FSTP $dst" %} |
| opcode(0xDB, 0x0); /* DB /0 */ |
| ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst)); |
| ins_pipe( fpu_reg_mem ); |
| %} |
| |
| instruct convI2D_reg(regD dst, rRegI src) %{ |
| predicate( UseSSE>=2 && !UseXmmI2D ); |
| match(Set dst (ConvI2D src)); |
| format %{ "CVTSI2SD $dst,$src" %} |
| ins_encode %{ |
| __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct convI2D_mem(regD dst, memory mem) %{ |
| predicate( UseSSE>=2 ); |
| match(Set dst (ConvI2D (LoadI mem))); |
| format %{ "CVTSI2SD $dst,$mem" %} |
| ins_encode %{ |
| __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct convXI2D_reg(regD dst, rRegI src) |
| %{ |
| predicate( UseSSE>=2 && UseXmmI2D ); |
| match(Set dst (ConvI2D src)); |
| |
| format %{ "MOVD $dst,$src\n\t" |
| "CVTDQ2PD $dst,$dst\t# i2d" %} |
| ins_encode %{ |
| __ movdl($dst$$XMMRegister, $src$$Register); |
| __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister); |
| %} |
| ins_pipe(pipe_slow); // XXX |
| %} |
| |
| instruct convI2DPR_mem(regDPR dst, memory mem) %{ |
| predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr()); |
| match(Set dst (ConvI2D (LoadI mem))); |
| format %{ "FILD $mem\n\t" |
| "FSTP $dst" %} |
| opcode(0xDB); /* DB /0 */ |
| ins_encode( OpcP, RMopc_Mem(0x00,mem), |
| Pop_Reg_DPR(dst)); |
| ins_pipe( fpu_reg_mem ); |
| %} |
| |
| // Convert a byte to a float; no rounding step needed. |
| instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{ |
| predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 ); |
| match(Set dst (ConvI2F src)); |
| format %{ "FILD $src\n\t" |
| "FSTP $dst" %} |
| |
| opcode(0xDB, 0x0); /* DB /0 */ |
| ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst)); |
| ins_pipe( fpu_reg_mem ); |
| %} |
| |
| // In 24-bit mode, force exponent rounding by storing back out |
| instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{ |
| predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); |
| match(Set dst (ConvI2F src)); |
| ins_cost(200); |
| format %{ "FILD $src\n\t" |
| "FSTP_S $dst" %} |
| opcode(0xDB, 0x0); /* DB /0 */ |
| ins_encode( Push_Mem_I(src), |
| Pop_Mem_FPR(dst)); |
| ins_pipe( fpu_mem_mem ); |
| %} |
| |
| // In 24-bit mode, force exponent rounding by storing back out |
| instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{ |
| predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); |
| match(Set dst (ConvI2F (LoadI mem))); |
| ins_cost(200); |
| format %{ "FILD $mem\n\t" |
| "FSTP_S $dst" %} |
| opcode(0xDB); /* DB /0 */ |
| ins_encode( OpcP, RMopc_Mem(0x00,mem), |
| Pop_Mem_FPR(dst)); |
| ins_pipe( fpu_mem_mem ); |
| %} |
| |
| // This instruction does not round to 24-bits |
| instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{ |
| predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); |
| match(Set dst (ConvI2F src)); |
| format %{ "FILD $src\n\t" |
| "FSTP $dst" %} |
| opcode(0xDB, 0x0); /* DB /0 */ |
| ins_encode( Push_Mem_I(src), |
| Pop_Reg_FPR(dst)); |
| ins_pipe( fpu_reg_mem ); |
| %} |
| |
| // This instruction does not round to 24-bits |
| instruct convI2FPR_mem(regFPR dst, memory mem) %{ |
| predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); |
| match(Set dst (ConvI2F (LoadI mem))); |
| format %{ "FILD $mem\n\t" |
| "FSTP $dst" %} |
| opcode(0xDB); /* DB /0 */ |
| ins_encode( OpcP, RMopc_Mem(0x00,mem), |
| Pop_Reg_FPR(dst)); |
| ins_pipe( fpu_reg_mem ); |
| %} |
| |
| // Convert an int to a float in xmm; no rounding step needed. |
| instruct convI2F_reg(regF dst, rRegI src) %{ |
| predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F ); |
| match(Set dst (ConvI2F src)); |
| format %{ "CVTSI2SS $dst, $src" %} |
| ins_encode %{ |
| __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct convXI2F_reg(regF dst, rRegI src) |
| %{ |
| predicate( UseSSE>=2 && UseXmmI2F ); |
| match(Set dst (ConvI2F src)); |
| |
| format %{ "MOVD $dst,$src\n\t" |
| "CVTDQ2PS $dst,$dst\t# i2f" %} |
| ins_encode %{ |
| __ movdl($dst$$XMMRegister, $src$$Register); |
| __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister); |
| %} |
| ins_pipe(pipe_slow); // XXX |
| %} |
| |
| instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{ |
| match(Set dst (ConvI2L src)); |
| effect(KILL cr); |
| ins_cost(375); |
| format %{ "MOV $dst.lo,$src\n\t" |
| "MOV $dst.hi,$src\n\t" |
| "SAR $dst.hi,31" %} |
| ins_encode(convert_int_long(dst,src)); |
| ins_pipe( ialu_reg_reg_long ); |
| %} |
| |
| // Zero-extend convert int to long |
| instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{ |
| match(Set dst (AndL (ConvI2L src) mask) ); |
| effect( KILL flags ); |
| ins_cost(250); |
| format %{ "MOV $dst.lo,$src\n\t" |
| "XOR $dst.hi,$dst.hi" %} |
| opcode(0x33); // XOR |
| ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); |
| ins_pipe( ialu_reg_reg_long ); |
| %} |
| |
| // Zero-extend long |
| instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{ |
| match(Set dst (AndL src mask) ); |
| effect( KILL flags ); |
| ins_cost(250); |
| format %{ "MOV $dst.lo,$src.lo\n\t" |
| "XOR $dst.hi,$dst.hi\n\t" %} |
| opcode(0x33); // XOR |
| ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); |
| ins_pipe( ialu_reg_reg_long ); |
| %} |
| |
| instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{ |
| predicate (UseSSE<=1); |
| match(Set dst (ConvL2D src)); |
| effect( KILL cr ); |
| format %{ "PUSH $src.hi\t# Convert long to double\n\t" |
| "PUSH $src.lo\n\t" |
| "FILD ST,[ESP + #0]\n\t" |
| "ADD ESP,8\n\t" |
| "FSTP_D $dst\t# D-round" %} |
| opcode(0xDF, 0x5); /* DF /5 */ |
| ins_encode(convert_long_double(src), Pop_Mem_DPR(dst)); |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{ |
| predicate (UseSSE>=2); |
| match(Set dst (ConvL2D src)); |
| effect( KILL cr ); |
| format %{ "PUSH $src.hi\t# Convert long to double\n\t" |
| "PUSH $src.lo\n\t" |
| "FILD_D [ESP]\n\t" |
| "FSTP_D [ESP]\n\t" |
| "MOVSD $dst,[ESP]\n\t" |
| "ADD ESP,8" %} |
| opcode(0xDF, 0x5); /* DF /5 */ |
| ins_encode(convert_long_double2(src), Push_ResultD(dst)); |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{ |
| predicate (UseSSE>=1); |
| match(Set dst (ConvL2F src)); |
| effect( KILL cr ); |
| format %{ "PUSH $src.hi\t# Convert long to single float\n\t" |
| "PUSH $src.lo\n\t" |
| "FILD_D [ESP]\n\t" |
| "FSTP_S [ESP]\n\t" |
| "MOVSS $dst,[ESP]\n\t" |
| "ADD ESP,8" %} |
| opcode(0xDF, 0x5); /* DF /5 */ |
| ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8)); |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{ |
| match(Set dst (ConvL2F src)); |
| effect( KILL cr ); |
| format %{ "PUSH $src.hi\t# Convert long to single float\n\t" |
| "PUSH $src.lo\n\t" |
| "FILD ST,[ESP + #0]\n\t" |
| "ADD ESP,8\n\t" |
| "FSTP_S $dst\t# F-round" %} |
| opcode(0xDF, 0x5); /* DF /5 */ |
| ins_encode(convert_long_double(src), Pop_Mem_FPR(dst)); |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct convL2I_reg( rRegI dst, eRegL src ) %{ |
| match(Set dst (ConvL2I src)); |
| effect( DEF dst, USE src ); |
| format %{ "MOV $dst,$src.lo" %} |
| ins_encode(enc_CopyL_Lo(dst,src)); |
| ins_pipe( ialu_reg_reg ); |
| %} |
| |
| |
| instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{ |
| match(Set dst (MoveF2I src)); |
| effect( DEF dst, USE src ); |
| ins_cost(100); |
| format %{ "MOV $dst,$src\t# MoveF2I_stack_reg" %} |
| ins_encode %{ |
| __ movl($dst$$Register, Address(rsp, $src$$disp)); |
| %} |
| ins_pipe( ialu_reg_mem ); |
| %} |
| |
| instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{ |
| predicate(UseSSE==0); |
| match(Set dst (MoveF2I src)); |
| effect( DEF dst, USE src ); |
| |
| ins_cost(125); |
| format %{ "FST_S $dst,$src\t# MoveF2I_reg_stack" %} |
| ins_encode( Pop_Mem_Reg_FPR(dst, src) ); |
| ins_pipe( fpu_mem_reg ); |
| %} |
| |
| instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{ |
| predicate(UseSSE>=1); |
| match(Set dst (MoveF2I src)); |
| effect( DEF dst, USE src ); |
| |
| ins_cost(95); |
| format %{ "MOVSS $dst,$src\t# MoveF2I_reg_stack_sse" %} |
| ins_encode %{ |
| __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{ |
| predicate(UseSSE>=2); |
| match(Set dst (MoveF2I src)); |
| effect( DEF dst, USE src ); |
| ins_cost(85); |
| format %{ "MOVD $dst,$src\t# MoveF2I_reg_reg_sse" %} |
| ins_encode %{ |
| __ movdl($dst$$Register, $src$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{ |
| match(Set dst (MoveI2F src)); |
| effect( DEF dst, USE src ); |
| |
| ins_cost(100); |
| format %{ "MOV $dst,$src\t# MoveI2F_reg_stack" %} |
| ins_encode %{ |
| __ movl(Address(rsp, $dst$$disp), $src$$Register); |
| %} |
| ins_pipe( ialu_mem_reg ); |
| %} |
| |
| |
| instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{ |
| predicate(UseSSE==0); |
| match(Set dst (MoveI2F src)); |
| effect(DEF dst, USE src); |
| |
| ins_cost(125); |
| format %{ "FLD_S $src\n\t" |
| "FSTP $dst\t# MoveI2F_stack_reg" %} |
| opcode(0xD9); /* D9 /0, FLD m32real */ |
| ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), |
| Pop_Reg_FPR(dst) ); |
| ins_pipe( fpu_reg_mem ); |
| %} |
| |
| instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{ |
| predicate(UseSSE>=1); |
| match(Set dst (MoveI2F src)); |
| effect( DEF dst, USE src ); |
| |
| ins_cost(95); |
| format %{ "MOVSS $dst,$src\t# MoveI2F_stack_reg_sse" %} |
| ins_encode %{ |
| __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp)); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{ |
| predicate(UseSSE>=2); |
| match(Set dst (MoveI2F src)); |
| effect( DEF dst, USE src ); |
| |
| ins_cost(85); |
| format %{ "MOVD $dst,$src\t# MoveI2F_reg_reg_sse" %} |
| ins_encode %{ |
| __ movdl($dst$$XMMRegister, $src$$Register); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{ |
| match(Set dst (MoveD2L src)); |
| effect(DEF dst, USE src); |
| |
| ins_cost(250); |
| format %{ "MOV $dst.lo,$src\n\t" |
| "MOV $dst.hi,$src+4\t# MoveD2L_stack_reg" %} |
| opcode(0x8B, 0x8B); |
| ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src)); |
| ins_pipe( ialu_mem_long_reg ); |
| %} |
| |
| instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{ |
| predicate(UseSSE<=1); |
| match(Set dst (MoveD2L src)); |
| effect(DEF dst, USE src); |
| |
| ins_cost(125); |
| format %{ "FST_D $dst,$src\t# MoveD2L_reg_stack" %} |
| ins_encode( Pop_Mem_Reg_DPR(dst, src) ); |
| ins_pipe( fpu_mem_reg ); |
| %} |
| |
| instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{ |
| predicate(UseSSE>=2); |
| match(Set dst (MoveD2L src)); |
| effect(DEF dst, USE src); |
| ins_cost(95); |
| format %{ "MOVSD $dst,$src\t# MoveD2L_reg_stack_sse" %} |
| ins_encode %{ |
| __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{ |
| predicate(UseSSE>=2); |
| match(Set dst (MoveD2L src)); |
| effect(DEF dst, USE src, TEMP tmp); |
| ins_cost(85); |
| format %{ "MOVD $dst.lo,$src\n\t" |
| "PSHUFLW $tmp,$src,0x4E\n\t" |
| "MOVD $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %} |
| ins_encode %{ |
| __ movdl($dst$$Register, $src$$XMMRegister); |
| __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e); |
| __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{ |
| match(Set dst (MoveL2D src)); |
| effect(DEF dst, USE src); |
| |
| ins_cost(200); |
| format %{ "MOV $dst,$src.lo\n\t" |
| "MOV $dst+4,$src.hi\t# MoveL2D_reg_stack" %} |
| opcode(0x89, 0x89); |
| ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); |
| ins_pipe( ialu_mem_long_reg ); |
| %} |
| |
| |
| instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{ |
| predicate(UseSSE<=1); |
| match(Set dst (MoveL2D src)); |
| effect(DEF dst, USE src); |
| ins_cost(125); |
| |
| format %{ "FLD_D $src\n\t" |
| "FSTP $dst\t# MoveL2D_stack_reg" %} |
| opcode(0xDD); /* DD /0, FLD m64real */ |
| ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), |
| Pop_Reg_DPR(dst) ); |
| ins_pipe( fpu_reg_mem ); |
| %} |
| |
| |
| instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{ |
| predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); |
| match(Set dst (MoveL2D src)); |
| effect(DEF dst, USE src); |
| |
| ins_cost(95); |
| format %{ "MOVSD $dst,$src\t# MoveL2D_stack_reg_sse" %} |
| ins_encode %{ |
| __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{ |
| predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); |
| match(Set dst (MoveL2D src)); |
| effect(DEF dst, USE src); |
| |
| ins_cost(95); |
| format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %} |
| ins_encode %{ |
| __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{ |
| predicate(UseSSE>=2); |
| match(Set dst (MoveL2D src)); |
| effect(TEMP dst, USE src, TEMP tmp); |
| ins_cost(85); |
| format %{ "MOVD $dst,$src.lo\n\t" |
| "MOVD $tmp,$src.hi\n\t" |
| "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %} |
| ins_encode %{ |
| __ movdl($dst$$XMMRegister, $src$$Register); |
| __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); |
| __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| |
| // ======================================================================= |
| // fast clearing of an array |
| instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ |
| predicate(!UseFastStosb); |
| match(Set dummy (ClearArray cnt base)); |
| effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr); |
| format %{ "XOR EAX,EAX\t# ClearArray:\n\t" |
| "SHL ECX,1\t# Convert doublewords to words\n\t" |
| "REP STOS\t# store EAX into [EDI++] while ECX--" %} |
| ins_encode %{ |
| __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct rep_fast_stosb(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ |
| predicate(UseFastStosb); |
| match(Set dummy (ClearArray cnt base)); |
| effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr); |
| format %{ "XOR EAX,EAX\t# ClearArray:\n\t" |
| "SHL ECX,3\t# Convert doublewords to bytes\n\t" |
| "REP STOSB\t# store EAX into [EDI++] while ECX--" %} |
| ins_encode %{ |
| __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct string_compare(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, |
| eAXRegI result, regD tmp1, eFlagsReg cr) %{ |
| match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); |
| effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); |
| |
| format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} |
| ins_encode %{ |
| __ string_compare($str1$$Register, $str2$$Register, |
| $cnt1$$Register, $cnt2$$Register, $result$$Register, |
| $tmp1$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // fast string equals |
| instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result, |
| regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{ |
| match(Set result (StrEquals (Binary str1 str2) cnt)); |
| effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr); |
| |
| format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %} |
| ins_encode %{ |
| __ char_arrays_equals(false, $str1$$Register, $str2$$Register, |
| $cnt$$Register, $result$$Register, $tmp3$$Register, |
| $tmp1$$XMMRegister, $tmp2$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // fast search of substring with known size. |
| instruct string_indexof_con(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, |
| eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ |
| predicate(UseSSE42Intrinsics); |
| match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); |
| effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); |
| |
| format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec, $cnt1, $cnt2, $tmp" %} |
| ins_encode %{ |
| int icnt2 = (int)$int_cnt2$$constant; |
| if (icnt2 >= 8) { |
| // IndexOf for constant substrings with size >= 8 elements |
| // which don't need to be loaded through stack. |
| __ string_indexofC8($str1$$Register, $str2$$Register, |
| $cnt1$$Register, $cnt2$$Register, |
| icnt2, $result$$Register, |
| $vec$$XMMRegister, $tmp$$Register); |
| } else { |
| // Small strings are loaded through stack if they cross page boundary. |
| __ string_indexof($str1$$Register, $str2$$Register, |
| $cnt1$$Register, $cnt2$$Register, |
| icnt2, $result$$Register, |
| $vec$$XMMRegister, $tmp$$Register); |
| } |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct string_indexof(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, |
| eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{ |
| predicate(UseSSE42Intrinsics); |
| match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); |
| effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); |
| |
| format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} |
| ins_encode %{ |
| __ string_indexof($str1$$Register, $str2$$Register, |
| $cnt1$$Register, $cnt2$$Register, |
| (-1), $result$$Register, |
| $vec$$XMMRegister, $tmp$$Register); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // fast array equals |
| instruct array_equals(eDIRegP ary1, eSIRegP ary2, eAXRegI result, |
| regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) |
| %{ |
| match(Set result (AryEq ary1 ary2)); |
| effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); |
| //ins_cost(300); |
| |
| format %{ "Array Equals $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} |
| ins_encode %{ |
| __ char_arrays_equals(true, $ary1$$Register, $ary2$$Register, |
| $tmp3$$Register, $result$$Register, $tmp4$$Register, |
| $tmp1$$XMMRegister, $tmp2$$XMMRegister); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // encode char[] to byte[] in ISO_8859_1 |
| instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len, |
| regD tmp1, regD tmp2, regD tmp3, regD tmp4, |
| eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ |
| match(Set result (EncodeISOArray src (Binary dst len))); |
| effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); |
| |
| format %{ "Encode array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %} |
| ins_encode %{ |
| __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, |
| $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, |
| $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| |
| //----------Control Flow Instructions------------------------------------------ |
| // Signed compare Instructions |
| instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{ |
| match(Set cr (CmpI op1 op2)); |
| effect( DEF cr, USE op1, USE op2 ); |
| format %{ "CMP $op1,$op2" %} |
| opcode(0x3B); /* Opcode 3B /r */ |
| ins_encode( OpcP, RegReg( op1, op2) ); |
| ins_pipe( ialu_cr_reg_reg ); |
| %} |
| |
| instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{ |
| match(Set cr (CmpI op1 op2)); |
| effect( DEF cr, USE op1 ); |
| format %{ "CMP $op1,$op2" %} |
| opcode(0x81,0x07); /* Opcode 81 /7 */ |
| // ins_encode( RegImm( op1, op2) ); /* Was CmpImm */ |
| ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); |
| ins_pipe( ialu_cr_reg_imm ); |
| %} |
| |
| // Cisc-spilled version of cmpI_eReg |
| instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{ |
| match(Set cr (CmpI op1 (LoadI op2))); |
| |
| format %{ "CMP $op1,$op2" %} |
| ins_cost(500); |
| opcode(0x3B); /* Opcode 3B /r */ |
| ins_encode( OpcP, RegMem( op1, op2) ); |
| ins_pipe( ialu_cr_reg_mem ); |
| %} |
| |
| instruct testI_reg( eFlagsReg cr, rRegI src, immI0 zero ) %{ |
| match(Set cr (CmpI src zero)); |
| effect( DEF cr, USE src ); |
| |
| format %{ "TEST $src,$src" %} |
| opcode(0x85); |
| ins_encode( OpcP, RegReg( src, src ) ); |
| ins_pipe( ialu_cr_reg_imm ); |
| %} |
| |
| instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI0 zero ) %{ |
| match(Set cr (CmpI (AndI src con) zero)); |
| |
| format %{ "TEST $src,$con" %} |
| opcode(0xF7,0x00); |
| ins_encode( OpcP, RegOpc(src), Con32(con) ); |
| ins_pipe( ialu_cr_reg_imm ); |
| %} |
| |
| instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI0 zero ) %{ |
| match(Set cr (CmpI (AndI src mem) zero)); |
| |
| format %{ "TEST $src,$mem" %} |
| opcode(0x85); |
| ins_encode( OpcP, RegMem( src, mem ) ); |
| ins_pipe( ialu_cr_reg_mem ); |
| %} |
| |
| // Unsigned compare Instructions; really, same as signed except they |
| // produce an eFlagsRegU instead of eFlagsReg. |
| instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{ |
| match(Set cr (CmpU op1 op2)); |
| |
| format %{ "CMPu $op1,$op2" %} |
| opcode(0x3B); /* Opcode 3B /r */ |
| ins_encode( OpcP, RegReg( op1, op2) ); |
| ins_pipe( ialu_cr_reg_reg ); |
| %} |
| |
| instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{ |
| match(Set cr (CmpU op1 op2)); |
| |
| format %{ "CMPu $op1,$op2" %} |
| opcode(0x81,0x07); /* Opcode 81 /7 */ |
| ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); |
| ins_pipe( ialu_cr_reg_imm ); |
| %} |
| |
| // // Cisc-spilled version of cmpU_eReg |
| instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{ |
| match(Set cr (CmpU op1 (LoadI op2))); |
| |
| format %{ "CMPu $op1,$op2" %} |
| ins_cost(500); |
| opcode(0x3B); /* Opcode 3B /r */ |
| ins_encode( OpcP, RegMem( op1, op2) ); |
| ins_pipe( ialu_cr_reg_mem ); |
| %} |
| |
| // // Cisc-spilled version of cmpU_eReg |
| //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{ |
| // match(Set cr (CmpU (LoadI op1) op2)); |
| // |
| // format %{ "CMPu $op1,$op2" %} |
| // ins_cost(500); |
| // opcode(0x39); /* Opcode 39 /r */ |
| // ins_encode( OpcP, RegMem( op1, op2) ); |
| //%} |
| |
| instruct testU_reg( eFlagsRegU cr, rRegI src, immI0 zero ) %{ |
| match(Set cr (CmpU src zero)); |
| |
| format %{ "TESTu $src,$src" %} |
| opcode(0x85); |
| ins_encode( OpcP, RegReg( src, src ) ); |
| ins_pipe( ialu_cr_reg_imm ); |
| %} |
| |
| // Unsigned pointer compare Instructions |
| instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{ |
| match(Set cr (CmpP op1 op2)); |
| |
| format %{ "CMPu $op1,$op2" %} |
| opcode(0x3B); /* Opcode 3B /r */ |
| ins_encode( OpcP, RegReg( op1, op2) ); |
| ins_pipe( ialu_cr_reg_reg ); |
| %} |
| |
| instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{ |
| match(Set cr (CmpP op1 op2)); |
| |
| format %{ "CMPu $op1,$op2" %} |
| opcode(0x81,0x07); /* Opcode 81 /7 */ |
| ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); |
| ins_pipe( ialu_cr_reg_imm ); |
| %} |
| |
| // // Cisc-spilled version of cmpP_eReg |
| instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{ |
| match(Set cr (CmpP op1 (LoadP op2))); |
| |
| format %{ "CMPu $op1,$op2" %} |
| ins_cost(500); |
| opcode(0x3B); /* Opcode 3B /r */ |
| ins_encode( OpcP, RegMem( op1, op2) ); |
| ins_pipe( ialu_cr_reg_mem ); |
| %} |
| |
| // // Cisc-spilled version of cmpP_eReg |
| //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{ |
| // match(Set cr (CmpP (LoadP op1) op2)); |
| // |
| // format %{ "CMPu $op1,$op2" %} |
| // ins_cost(500); |
| // opcode(0x39); /* Opcode 39 /r */ |
| // ins_encode( OpcP, RegMem( op1, op2) ); |
| //%} |
| |
| // Compare raw pointer (used in out-of-heap check). |
| // Only works because non-oop pointers must be raw pointers |
| // and raw pointers have no anti-dependencies. |
| instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{ |
| predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none ); |
| match(Set cr (CmpP op1 (LoadP op2))); |
| |
| format %{ "CMPu $op1,$op2" %} |
| opcode(0x3B); /* Opcode 3B /r */ |
| ins_encode( OpcP, RegMem( op1, op2) ); |
| ins_pipe( ialu_cr_reg_mem ); |
| %} |
| |
| // |
| // This will generate a signed flags result. This should be ok |
| // since any compare to a zero should be eq/neq. |
| instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{ |
| match(Set cr (CmpP src zero)); |
| |
| format %{ "TEST $src,$src" %} |
| opcode(0x85); |
| ins_encode( OpcP, RegReg( src, src ) ); |
| ins_pipe( ialu_cr_reg_imm ); |
| %} |
| |
| // Cisc-spilled version of testP_reg |
| // This will generate a signed flags result. This should be ok |
| // since any compare to a zero should be eq/neq. |
| instruct testP_Reg_mem( eFlagsReg cr, memory op, immI0 zero ) %{ |
| match(Set cr (CmpP (LoadP op) zero)); |
| |
| format %{ "TEST $op,0xFFFFFFFF" %} |
| ins_cost(500); |
| opcode(0xF7); /* Opcode F7 /0 */ |
| ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) ); |
| ins_pipe( ialu_cr_reg_imm ); |
| %} |
| |
| // Yanked all unsigned pointer compare operations. |
| // Pointer compares are done with CmpP which is already unsigned. |
| |
| //----------Max and Min-------------------------------------------------------- |
| // Min Instructions |
| //// |
| // *** Min and Max using the conditional move are slower than the |
| // *** branch version on a Pentium III. |
| // // Conditional move for min |
| //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ |
| // effect( USE_DEF op2, USE op1, USE cr ); |
| // format %{ "CMOVlt $op2,$op1\t! min" %} |
| // opcode(0x4C,0x0F); |
| // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); |
| // ins_pipe( pipe_cmov_reg ); |
| //%} |
| // |
| //// Min Register with Register (P6 version) |
| //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{ |
| // predicate(VM_Version::supports_cmov() ); |
| // match(Set op2 (MinI op1 op2)); |
| // ins_cost(200); |
| // expand %{ |
| // eFlagsReg cr; |
| // compI_eReg(cr,op1,op2); |
| // cmovI_reg_lt(op2,op1,cr); |
| // %} |
| //%} |
| |
| // Min Register with Register (generic version) |
| instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ |
| match(Set dst (MinI dst src)); |
| effect(KILL flags); |
| ins_cost(300); |
| |
| format %{ "MIN $dst,$src" %} |
| opcode(0xCC); |
| ins_encode( min_enc(dst,src) ); |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Max Register with Register |
| // *** Min and Max using the conditional move are slower than the |
| // *** branch version on a Pentium III. |
| // // Conditional move for max |
| //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ |
| // effect( USE_DEF op2, USE op1, USE cr ); |
| // format %{ "CMOVgt $op2,$op1\t! max" %} |
| // opcode(0x4F,0x0F); |
| // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); |
| // ins_pipe( pipe_cmov_reg ); |
| //%} |
| // |
| // // Max Register with Register (P6 version) |
| //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{ |
| // predicate(VM_Version::supports_cmov() ); |
| // match(Set op2 (MaxI op1 op2)); |
| // ins_cost(200); |
| // expand %{ |
| // eFlagsReg cr; |
| // compI_eReg(cr,op1,op2); |
| // cmovI_reg_gt(op2,op1,cr); |
| // %} |
| //%} |
| |
| // Max Register with Register (generic version) |
| instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ |
| match(Set dst (MaxI dst src)); |
| effect(KILL flags); |
| ins_cost(300); |
| |
| format %{ "MAX $dst,$src" %} |
| opcode(0xCC); |
| ins_encode( max_enc(dst,src) ); |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // ============================================================================ |
| // Counted Loop limit node which represents exact final iterator value. |
| // Note: the resulting value should fit into integer range since |
| // counted loops have limit check on overflow. |
| instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{ |
| match(Set limit (LoopLimit (Binary init limit) stride)); |
| effect(TEMP limit_hi, TEMP tmp, KILL flags); |
| ins_cost(300); |
| |
| format %{ "loopLimit $init,$limit,$stride # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %} |
| ins_encode %{ |
| int strd = (int)$stride$$constant; |
| assert(strd != 1 && strd != -1, "sanity"); |
| int m1 = (strd > 0) ? 1 : -1; |
| // Convert limit to long (EAX:EDX) |
| __ cdql(); |
| // Convert init to long (init:tmp) |
| __ movl($tmp$$Register, $init$$Register); |
| __ sarl($tmp$$Register, 31); |
| // $limit - $init |
| __ subl($limit$$Register, $init$$Register); |
| __ sbbl($limit_hi$$Register, $tmp$$Register); |
| // + ($stride - 1) |
| if (strd > 0) { |
| __ addl($limit$$Register, (strd - 1)); |
| __ adcl($limit_hi$$Register, 0); |
| __ movl($tmp$$Register, strd); |
| } else { |
| __ addl($limit$$Register, (strd + 1)); |
| __ adcl($limit_hi$$Register, -1); |
| __ lneg($limit_hi$$Register, $limit$$Register); |
| __ movl($tmp$$Register, -strd); |
| } |
| // signed devision: (EAX:EDX) / pos_stride |
| __ idivl($tmp$$Register); |
| if (strd < 0) { |
| // restore sign |
| __ negl($tmp$$Register); |
| } |
| // (EAX) * stride |
| __ mull($tmp$$Register); |
| // + init (ignore upper bits) |
| __ addl($limit$$Register, $init$$Register); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // ============================================================================ |
| // Branch Instructions |
| // Jump Table |
| instruct jumpXtnd(rRegI switch_val) %{ |
| match(Jump switch_val); |
| ins_cost(350); |
| format %{ "JMP [$constantaddress](,$switch_val,1)\n\t" %} |
| ins_encode %{ |
| // Jump to Address(table_base + switch_reg) |
| Address index(noreg, $switch_val$$Register, Address::times_1); |
| __ jump(ArrayAddress($constantaddress, index)); |
| %} |
| ins_pipe(pipe_jmp); |
| %} |
| |
| // Jump Direct - Label defines a relative address from JMP+1 |
| instruct jmpDir(label labl) %{ |
| match(Goto); |
| effect(USE labl); |
| |
| ins_cost(300); |
| format %{ "JMP $labl" %} |
| size(5); |
| ins_encode %{ |
| Label* L = $labl$$label; |
| __ jmp(*L, false); // Always long jump |
| %} |
| ins_pipe( pipe_jmp ); |
| %} |
| |
| // Jump Direct Conditional - Label defines a relative address from Jcc+1 |
| instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{ |
| match(If cop cr); |
| effect(USE labl); |
| |
| ins_cost(300); |
| format %{ "J$cop $labl" %} |
| size(6); |
| ins_encode %{ |
| Label* L = $labl$$label; |
| __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump |
| %} |
| ins_pipe( pipe_jcc ); |
| %} |
| |
| // Jump Direct Conditional - Label defines a relative address from Jcc+1 |
| instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{ |
| match(CountedLoopEnd cop cr); |
| effect(USE labl); |
| |
| ins_cost(300); |
| format %{ "J$cop $labl\t# Loop end" %} |
| size(6); |
| ins_encode %{ |
| Label* L = $labl$$label; |
| __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump |
| %} |
| ins_pipe( pipe_jcc ); |
| %} |
| |
| // Jump Direct Conditional - Label defines a relative address from Jcc+1 |
| instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ |
| match(CountedLoopEnd cop cmp); |
| effect(USE labl); |
| |
| ins_cost(300); |
| format %{ "J$cop,u $labl\t# Loop end" %} |
| size(6); |
| ins_encode %{ |
| Label* L = $labl$$label; |
| __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump |
| %} |
| ins_pipe( pipe_jcc ); |
| %} |
| |
| instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ |
| match(CountedLoopEnd cop cmp); |
| effect(USE labl); |
| |
| ins_cost(200); |
| format %{ "J$cop,u $labl\t# Loop end" %} |
| size(6); |
| ins_encode %{ |
| Label* L = $labl$$label; |
| __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump |
| %} |
| ins_pipe( pipe_jcc ); |
| %} |
| |
| // Jump Direct Conditional - using unsigned comparison |
| instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ |
| match(If cop cmp); |
| effect(USE labl); |
| |
| ins_cost(300); |
| format %{ "J$cop,u $labl" %} |
| size(6); |
| ins_encode %{ |
| Label* L = $labl$$label; |
| __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump |
| %} |
| ins_pipe(pipe_jcc); |
| %} |
| |
| instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ |
| match(If cop cmp); |
| effect(USE labl); |
| |
| ins_cost(200); |
| format %{ "J$cop,u $labl" %} |
| size(6); |
| ins_encode %{ |
| Label* L = $labl$$label; |
| __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump |
| %} |
| ins_pipe(pipe_jcc); |
| %} |
| |
| instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ |
| match(If cop cmp); |
| effect(USE labl); |
| |
| ins_cost(200); |
| format %{ $$template |
| if ($cop$$cmpcode == Assembler::notEqual) { |
| $$emit$$"JP,u $labl\n\t" |
| $$emit$$"J$cop,u $labl" |
| } else { |
| $$emit$$"JP,u done\n\t" |
| $$emit$$"J$cop,u $labl\n\t" |
| $$emit$$"done:" |
| } |
| %} |
| ins_encode %{ |
| Label* l = $labl$$label; |
| if ($cop$$cmpcode == Assembler::notEqual) { |
| __ jcc(Assembler::parity, *l, false); |
| __ jcc(Assembler::notEqual, *l, false); |
| } else if ($cop$$cmpcode == Assembler::equal) { |
| Label done; |
| __ jccb(Assembler::parity, done); |
| __ jcc(Assembler::equal, *l, false); |
| __ bind(done); |
| } else { |
| ShouldNotReachHere(); |
| } |
| %} |
| ins_pipe(pipe_jcc); |
| %} |
| |
| // ============================================================================ |
| // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass |
| // array for an instance of the superklass. Set a hidden internal cache on a |
| // hit (cache is checked with exposed code in gen_subtype_check()). Return |
| // NZ for a miss or zero for a hit. The encoding ALSO sets flags. |
| instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{ |
| match(Set result (PartialSubtypeCheck sub super)); |
| effect( KILL rcx, KILL cr ); |
| |
| ins_cost(1100); // slightly larger than the next version |
| format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" |
| "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" |
| "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" |
| "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" |
| "JNE,s miss\t\t# Missed: EDI not-zero\n\t" |
| "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t" |
| "XOR $result,$result\t\t Hit: EDI zero\n\t" |
| "miss:\t" %} |
| |
| opcode(0x1); // Force a XOR of EDI |
| ins_encode( enc_PartialSubtypeCheck() ); |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{ |
| match(Set cr (CmpP (PartialSubtypeCheck sub super) zero)); |
| effect( KILL rcx, KILL result ); |
| |
| ins_cost(1000); |
| format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" |
| "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" |
| "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" |
| "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" |
| "JNE,s miss\t\t# Missed: flags NZ\n\t" |
| "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t" |
| "miss:\t" %} |
| |
| opcode(0x0); // No need to XOR EDI |
| ins_encode( enc_PartialSubtypeCheck() ); |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // ============================================================================ |
| // Branch Instructions -- short offset versions |
| // |
| // These instructions are used to replace jumps of a long offset (the default |
| // match) with jumps of a shorter offset. These instructions are all tagged |
| // with the ins_short_branch attribute, which causes the ADLC to suppress the |
| // match rules in general matching. Instead, the ADLC generates a conversion |
| // method in the MachNode which can be used to do in-place replacement of the |
| // long variant with the shorter variant. The compiler will determine if a |
| // branch can be taken by the is_short_branch_offset() predicate in the machine |
| // specific code section of the file. |
| |
| // Jump Direct - Label defines a relative address from JMP+1 |
| instruct jmpDir_short(label labl) %{ |
| match(Goto); |
| effect(USE labl); |
| |
| ins_cost(300); |
| format %{ "JMP,s $labl" %} |
| size(2); |
| ins_encode %{ |
| Label* L = $labl$$label; |
| __ jmpb(*L); |
| %} |
| ins_pipe( pipe_jmp ); |
| ins_short_branch(1); |
| %} |
| |
| // Jump Direct Conditional - Label defines a relative address from Jcc+1 |
| instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{ |
| match(If cop cr); |
| effect(USE labl); |
| |
| ins_cost(300); |
| format %{ "J$cop,s $labl" %} |
| size(2); |
| ins_encode %{ |
| Label* L = $labl$$label; |
| __ jccb((Assembler::Condition)($cop$$cmpcode), *L); |
| %} |
| ins_pipe( pipe_jcc ); |
| ins_short_branch(1); |
| %} |
| |
| // Jump Direct Conditional - Label defines a relative address from Jcc+1 |
| instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{ |
| match(CountedLoopEnd cop cr); |
| effect(USE labl); |
| |
| ins_cost(300); |
| format %{ "J$cop,s $labl\t# Loop end" %} |
| size(2); |
| ins_encode %{ |
| Label* L = $labl$$label; |
| __ jccb((Assembler::Condition)($cop$$cmpcode), *L); |
| %} |
| ins_pipe( pipe_jcc ); |
| ins_short_branch(1); |
| %} |
| |
| // Jump Direct Conditional - Label defines a relative address from Jcc+1 |
| instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ |
| match(CountedLoopEnd cop cmp); |
| effect(USE labl); |
| |
| ins_cost(300); |
| format %{ "J$cop,us $labl\t# Loop end" %} |
| size(2); |
| ins_encode %{ |
| Label* L = $labl$$label; |
| __ jccb((Assembler::Condition)($cop$$cmpcode), *L); |
| %} |
| ins_pipe( pipe_jcc ); |
| ins_short_branch(1); |
| %} |
| |
| instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ |
| match(CountedLoopEnd cop cmp); |
| effect(USE labl); |
| |
| ins_cost(300); |
| format %{ "J$cop,us $labl\t# Loop end" %} |
| size(2); |
| ins_encode %{ |
| Label* L = $labl$$label; |
| __ jccb((Assembler::Condition)($cop$$cmpcode), *L); |
| %} |
| ins_pipe( pipe_jcc ); |
| ins_short_branch(1); |
| %} |
| |
| // Jump Direct Conditional - using unsigned comparison |
| instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ |
| match(If cop cmp); |
| effect(USE labl); |
| |
| ins_cost(300); |
| format %{ "J$cop,us $labl" %} |
| size(2); |
| ins_encode %{ |
| Label* L = $labl$$label; |
| __ jccb((Assembler::Condition)($cop$$cmpcode), *L); |
| %} |
| ins_pipe( pipe_jcc ); |
| ins_short_branch(1); |
| %} |
| |
| instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ |
| match(If cop cmp); |
| effect(USE labl); |
| |
| ins_cost(300); |
| format %{ "J$cop,us $labl" %} |
| size(2); |
| ins_encode %{ |
| Label* L = $labl$$label; |
| __ jccb((Assembler::Condition)($cop$$cmpcode), *L); |
| %} |
| ins_pipe( pipe_jcc ); |
| ins_short_branch(1); |
| %} |
| |
| instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ |
| match(If cop cmp); |
| effect(USE labl); |
| |
| ins_cost(300); |
| format %{ $$template |
| if ($cop$$cmpcode == Assembler::notEqual) { |
| $$emit$$"JP,u,s $labl\n\t" |
| $$emit$$"J$cop,u,s $labl" |
| } else { |
| $$emit$$"JP,u,s done\n\t" |
| $$emit$$"J$cop,u,s $labl\n\t" |
| $$emit$$"done:" |
| } |
| %} |
| size(4); |
| ins_encode %{ |
| Label* l = $labl$$label; |
| if ($cop$$cmpcode == Assembler::notEqual) { |
| __ jccb(Assembler::parity, *l); |
| __ jccb(Assembler::notEqual, *l); |
| } else if ($cop$$cmpcode == Assembler::equal) { |
| Label done; |
| __ jccb(Assembler::parity, done); |
| __ jccb(Assembler::equal, *l); |
| __ bind(done); |
| } else { |
| ShouldNotReachHere(); |
| } |
| %} |
| ins_pipe(pipe_jcc); |
| ins_short_branch(1); |
| %} |
| |
| // ============================================================================ |
| // Long Compare |
| // |
| // Currently we hold longs in 2 registers. Comparing such values efficiently |
| // is tricky. The flavor of compare used depends on whether we are testing |
| // for LT, LE, or EQ. For a simple LT test we can check just the sign bit. |
| // The GE test is the negated LT test. The LE test can be had by commuting |
| // the operands (yielding a GE test) and then negating; negate again for the |
| // GT test. The EQ test is done by ORcc'ing the high and low halves, and the |
| // NE test is negated from that. |
| |
| // Due to a shortcoming in the ADLC, it mixes up expressions like: |
| // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)). Note the |
| // difference between 'Y' and '0L'. The tree-matches for the CmpI sections |
| // are collapsed internally in the ADLC's dfa-gen code. The match for |
| // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the |
| // foo match ends up with the wrong leaf. One fix is to not match both |
| // reg-reg and reg-zero forms of long-compare. This is unfortunate because |
| // both forms beat the trinary form of long-compare and both are very useful |
| // on Intel which has so few registers. |
| |
| // Manifest a CmpL result in an integer register. Very painful. |
| // This is the test to avoid. |
| instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{ |
| match(Set dst (CmpL3 src1 src2)); |
| effect( KILL flags ); |
| ins_cost(1000); |
| format %{ "XOR $dst,$dst\n\t" |
| "CMP $src1.hi,$src2.hi\n\t" |
| "JLT,s m_one\n\t" |
| "JGT,s p_one\n\t" |
| "CMP $src1.lo,$src2.lo\n\t" |
| "JB,s m_one\n\t" |
| "JEQ,s done\n" |
| "p_one:\tINC $dst\n\t" |
| "JMP,s done\n" |
| "m_one:\tDEC $dst\n" |
| "done:" %} |
| ins_encode %{ |
| Label p_one, m_one, done; |
| __ xorptr($dst$$Register, $dst$$Register); |
| __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register)); |
| __ jccb(Assembler::less, m_one); |
| __ jccb(Assembler::greater, p_one); |
| __ cmpl($src1$$Register, $src2$$Register); |
| __ jccb(Assembler::below, m_one); |
| __ jccb(Assembler::equal, done); |
| __ bind(p_one); |
| __ incrementl($dst$$Register); |
| __ jmpb(done); |
| __ bind(m_one); |
| __ decrementl($dst$$Register); |
| __ bind(done); |
| %} |
| ins_pipe( pipe_slow ); |
| %} |
| |
| //====== |
| // Manifest a CmpL result in the normal flags. Only good for LT or GE |
| // compares. Can be used for LE or GT compares by reversing arguments. |
| // NOT GOOD FOR EQ/NE tests. |
| instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{ |
| match( Set flags (CmpL src zero )); |
| ins_cost(100); |
| format %{ "TEST $src.hi,$src.hi" %} |
| opcode(0x85); |
| ins_encode( OpcP, RegReg_Hi2( src, src ) ); |
| ins_pipe( ialu_cr_reg_reg ); |
| %} |
| |
| // Manifest a CmpL result in the normal flags. Only good for LT or GE |
| // compares. Can be used for LE or GT compares by reversing arguments. |
| // NOT GOOD FOR EQ/NE tests. |
| instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{ |
| match( Set flags (CmpL src1 src2 )); |
| effect( TEMP tmp ); |
| ins_cost(300); |
| format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" |
| "MOV $tmp,$src1.hi\n\t" |
| "SBB $tmp,$src2.hi\t! Compute flags for long compare" %} |
| ins_encode( long_cmp_flags2( src1, src2, tmp ) ); |
| ins_pipe( ialu_cr_reg_reg ); |
| %} |
| |
| // Long compares reg < zero/req OR reg >= zero/req. |
| // Just a wrapper for a normal branch, plus the predicate test. |
| instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{ |
| match(If cmp flags); |
| effect(USE labl); |
| predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); |
| expand %{ |
| jmpCon(cmp,flags,labl); // JLT or JGE... |
| %} |
| %} |
| |
| // Compare 2 longs and CMOVE longs. |
| instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{ |
| match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); |
| predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); |
| ins_cost(400); |
| format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" |
| "CMOV$cmp $dst.hi,$src.hi" %} |
| opcode(0x0F,0x40); |
| ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); |
| ins_pipe( pipe_cmov_reg_long ); |
| %} |
| |
| instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{ |
| match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); |
| predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); |
| ins_cost(500); |
| format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" |
| "CMOV$cmp $dst.hi,$src.hi" %} |
| opcode(0x0F,0x40); |
| ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); |
| ins_pipe( pipe_cmov_reg_long ); |
| %} |
| |
| // Compare 2 longs and CMOVE ints. |
| instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{ |
| predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); |
| match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); |
| ins_cost(200); |
| format %{ "CMOV$cmp $dst,$src" %} |
| opcode(0x0F,0x40); |
| ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); |
| ins_pipe( pipe_cmov_reg ); |
| %} |
| |
| instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{ |
| predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); |
| match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); |
| ins_cost(250); |
| format %{ "CMOV$cmp $dst,$src" %} |
| opcode(0x0F,0x40); |
| ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); |
| ins_pipe( pipe_cmov_mem ); |
| %} |
| |
| // Compare 2 longs and CMOVE ints. |
| instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{ |
| predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); |
| match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); |
| ins_cost(200); |
| format %{ "CMOV$cmp $dst,$src" %} |
| opcode(0x0F,0x40); |
| ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); |
| ins_pipe( pipe_cmov_reg ); |
| %} |
| |
| // Compare 2 longs and CMOVE doubles |
| instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{ |
| predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); |
| match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); |
| ins_cost(200); |
| expand %{ |
| fcmovDPR_regS(cmp,flags,dst,src); |
| %} |
| %} |
| |
| // Compare 2 longs and CMOVE doubles |
| instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{ |
| predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); |
| match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); |
| ins_cost(200); |
| expand %{ |
| fcmovD_regS(cmp,flags,dst,src); |
| %} |
| %} |
| |
| instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{ |
| predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); |
| match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); |
| ins_cost(200); |
| expand %{ |
| fcmovFPR_regS(cmp,flags,dst,src); |
| %} |
| %} |
| |
| instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{ |
| predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); |
| match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); |
| ins_cost(200); |
| expand %{ |
| fcmovF_regS(cmp,flags,dst,src); |
| %} |
| %} |
| |
| //====== |
| // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. |
| instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{ |
| match( Set flags (CmpL src zero )); |
| effect(TEMP tmp); |
| ins_cost(200); |
| format %{ "MOV $tmp,$src.lo\n\t" |
| "OR $tmp,$src.hi\t! Long is EQ/NE 0?" %} |
| ins_encode( long_cmp_flags0( src, tmp ) ); |
| ins_pipe( ialu_reg_reg_long ); |
| %} |
| |
| // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. |
| instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{ |
| match( Set flags (CmpL src1 src2 )); |
| ins_cost(200+300); |
| format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" |
| "JNE,s skip\n\t" |
| "CMP $src1.hi,$src2.hi\n\t" |
| "skip:\t" %} |
| ins_encode( long_cmp_flags1( src1, src2 ) ); |
| ins_pipe( ialu_cr_reg_reg ); |
| %} |
| |
| // Long compare reg == zero/reg OR reg != zero/reg |
| // Just a wrapper for a normal branch, plus the predicate test. |
| instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{ |
| match(If cmp flags); |
| effect(USE labl); |
| predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); |
| expand %{ |
| jmpCon(cmp,flags,labl); // JEQ or JNE... |
| %} |
| %} |
| |
| // Compare 2 longs and CMOVE longs. |
| instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{ |
| match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); |
| predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); |
| ins_cost(400); |
| format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" |
| "CMOV$cmp $dst.hi,$src.hi" %} |
| opcode(0x0F,0x40); |
| ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); |
| ins_pipe( pipe_cmov_reg_long ); |
| %} |
| |
| instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{ |
| match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); |
| predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); |
| ins_cost(500); |
| format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" |
| "CMOV$cmp $dst.hi,$src.hi" %} |
| opcode(0x0F,0x40); |
| ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); |
| ins_pipe( pipe_cmov_reg_long ); |
| %} |
| |
| // Compare 2 longs and CMOVE ints. |
| instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{ |
| predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); |
| match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); |
| ins_cost(200); |
| format %{ "CMOV$cmp $dst,$src" %} |
| opcode(0x0F,0x40); |
| ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); |
| ins_pipe( pipe_cmov_reg ); |
| %} |
| |
| instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{ |
| predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); |
| match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); |
| ins_cost(250); |
| format %{ "CMOV$cmp $dst,$src" %} |
| opcode(0x0F,0x40); |
| ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); |
| ins_pipe( pipe_cmov_mem ); |
| %} |
| |
| // Compare 2 longs and CMOVE ints. |
| instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{ |
| predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); |
| match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); |
| ins_cost(200); |
| format %{ "CMOV$cmp $dst,$src" %} |
| opcode(0x0F,0x40); |
| ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); |
| ins_pipe( pipe_cmov_reg ); |
| %} |
| |
| // Compare 2 longs and CMOVE doubles |
| instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{ |
| predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); |
| match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); |
| ins_cost(200); |
| expand %{ |
| fcmovDPR_regS(cmp,flags,dst,src); |
| %} |
| %} |
| |
| // Compare 2 longs and CMOVE doubles |
| instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{ |
| predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); |
| match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); |
| ins_cost(200); |
| expand %{ |
| fcmovD_regS(cmp,flags,dst,src); |
| %} |
| %} |
| |
| instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{ |
| predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); |
| match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); |
| ins_cost(200); |
| expand %{ |
| fcmovFPR_regS(cmp,flags,dst,src); |
| %} |
| %} |
| |
| instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{ |
| predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); |
| match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); |
| ins_cost(200); |
| expand %{ |
| fcmovF_regS(cmp,flags,dst,src); |
| %} |
| %} |
| |
| //====== |
| // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. |
| // Same as cmpL_reg_flags_LEGT except must negate src |
| instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{ |
| match( Set flags (CmpL src zero )); |
| effect( TEMP tmp ); |
| ins_cost(300); |
| format %{ "XOR $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t" |
| "CMP $tmp,$src.lo\n\t" |
| "SBB $tmp,$src.hi\n\t" %} |
| ins_encode( long_cmp_flags3(src, tmp) ); |
| ins_pipe( ialu_reg_reg_long ); |
| %} |
| |
| // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. |
| // Same as cmpL_reg_flags_LTGE except operands swapped. Swapping operands |
| // requires a commuted test to get the same result. |
| instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{ |
| match( Set flags (CmpL src1 src2 )); |
| effect( TEMP tmp ); |
| ins_cost(300); |
| format %{ "CMP $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t" |
| "MOV $tmp,$src2.hi\n\t" |
| "SBB $tmp,$src1.hi\t! Compute flags for long compare" %} |
| ins_encode( long_cmp_flags2( src2, src1, tmp ) ); |
| ins_pipe( ialu_cr_reg_reg ); |
| %} |
| |
| // Long compares reg < zero/req OR reg >= zero/req. |
| // Just a wrapper for a normal branch, plus the predicate test |
| instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{ |
| match(If cmp flags); |
| effect(USE labl); |
| predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le ); |
| ins_cost(300); |
| expand %{ |
| jmpCon(cmp,flags,labl); // JGT or JLE... |
| %} |
| %} |
| |
| // Compare 2 longs and CMOVE longs. |
| instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{ |
| match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); |
| predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); |
| ins_cost(400); |
| format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" |
| "CMOV$cmp $dst.hi,$src.hi" %} |
| opcode(0x0F,0x40); |
| ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); |
| ins_pipe( pipe_cmov_reg_long ); |
| %} |
| |
| instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{ |
| match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); |
| predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); |
| ins_cost(500); |
| format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" |
| "CMOV$cmp $dst.hi,$src.hi+4" %} |
| opcode(0x0F,0x40); |
| ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); |
| ins_pipe( pipe_cmov_reg_long ); |
| %} |
| |
| // Compare 2 longs and CMOVE ints. |
| instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{ |
| predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); |
| match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); |
| ins_cost(200); |
| format %{ "CMOV$cmp $dst,$src" %} |
| opcode(0x0F,0x40); |
| ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); |
| ins_pipe( pipe_cmov_reg ); |
| %} |
| |
| instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{ |
| predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); |
| match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); |
| ins_cost(250); |
| format %{ "CMOV$cmp $dst,$src" %} |
| opcode(0x0F,0x40); |
| ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); |
| ins_pipe( pipe_cmov_mem ); |
| %} |
| |
| // Compare 2 longs and CMOVE ptrs. |
| instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{ |
| predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); |
| match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); |
| ins_cost(200); |
| format %{ "CMOV$cmp $dst,$src" %} |
| opcode(0x0F,0x40); |
| ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); |
| ins_pipe( pipe_cmov_reg ); |
| %} |
| |
| // Compare 2 longs and CMOVE doubles |
| instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{ |
| predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); |
| match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); |
| ins_cost(200); |
| expand %{ |
| fcmovDPR_regS(cmp,flags,dst,src); |
| %} |
| %} |
| |
| // Compare 2 longs and CMOVE doubles |
| instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{ |
| predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); |
| match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); |
| ins_cost(200); |
| expand %{ |
| fcmovD_regS(cmp,flags,dst,src); |
| %} |
| %} |
| |
| instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{ |
| predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); |
| match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); |
| ins_cost(200); |
| expand %{ |
| fcmovFPR_regS(cmp,flags,dst,src); |
| %} |
| %} |
| |
| |
| instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{ |
| predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); |
| match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); |
| ins_cost(200); |
| expand %{ |
| fcmovF_regS(cmp,flags,dst,src); |
| %} |
| %} |
| |
| |
| // ============================================================================ |
| // Procedure Call/Return Instructions |
| // Call Java Static Instruction |
| // Note: If this code changes, the corresponding ret_addr_offset() and |
| // compute_padding() functions will have to be adjusted. |
| instruct CallStaticJavaDirect(method meth) %{ |
| match(CallStaticJava); |
| effect(USE meth); |
| |
| ins_cost(300); |
| format %{ "CALL,static " %} |
| opcode(0xE8); /* E8 cd */ |
| ins_encode( pre_call_resets, |
| Java_Static_Call( meth ), |
| call_epilog, |
| post_call_FPU ); |
| ins_pipe( pipe_slow ); |
| ins_alignment(4); |
| %} |
| |
| // Call Java Dynamic Instruction |
| // Note: If this code changes, the corresponding ret_addr_offset() and |
| // compute_padding() functions will have to be adjusted. |
| instruct CallDynamicJavaDirect(method meth) %{ |
| match(CallDynamicJava); |
| effect(USE meth); |
| |
| ins_cost(300); |
| format %{ "MOV EAX,(oop)-1\n\t" |
| "CALL,dynamic" %} |
| opcode(0xE8); /* E8 cd */ |
| ins_encode( pre_call_resets, |
| Java_Dynamic_Call( meth ), |
| call_epilog, |
| post_call_FPU ); |
| ins_pipe( pipe_slow ); |
| ins_alignment(4); |
| %} |
| |
| // Call Runtime Instruction |
| instruct CallRuntimeDirect(method meth) %{ |
| match(CallRuntime ); |
| effect(USE meth); |
| |
| ins_cost(300); |
| format %{ "CALL,runtime " %} |
| opcode(0xE8); /* E8 cd */ |
| // Use FFREEs to clear entries in float stack |
| ins_encode( pre_call_resets, |
| FFree_Float_Stack_All, |
| Java_To_Runtime( meth ), |
| post_call_FPU ); |
| ins_pipe( pipe_slow ); |
| %} |
| |
| // Call runtime without safepoint |
| instruct CallLeafDirect(method meth) %{ |
| match(CallLeaf); |
| effect(USE meth); |
| |
| ins_cost(300); |
| format %{ "CALL_LEAF,runtime " %} |
| opcode(0xE8); /* E8 cd */ |
| ins_encode( pre_call_resets, |
| FFree_Float_Stack_All, |
| Java_To_Runtime( meth ), |
| Verify_FPU_For_Leaf, post_call_FPU ); |
| ins_pipe( pipe_slow ); |
| %} |
| |
| instruct CallLeafNoFPDirect(method meth) %{ |
| match(CallLeafNoFP); |
| effect(USE meth); |
| |
| ins_cost(300); |
| format %{ "CALL_LEAF_NOFP,runtime " %} |
| opcode(0xE8); /* E8 cd */ |
| ins_encode(Java_To_Runtime(meth)); |
| ins_pipe( pipe_slow ); |
| %} |
| |
| |
| // Return Instruction |
| // Remove the return address & jump to it. |
| instruct Ret() %{ |
| match(Return); |
| format %{ "RET" %} |
| opcode(0xC3); |
| ins_encode(OpcP); |
| ins_pipe( pipe_jmp ); |
| %} |
| |
| // Tail Call; Jump from runtime stub to Java code. |
| // Also known as an 'interprocedural jump'. |
| // Target of jump will eventually return to caller. |
| // TailJump below removes the return address. |
| instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_oop) %{ |
| match(TailCall jump_target method_oop ); |
| ins_cost(300); |
| format %{ "JMP $jump_target \t# EBX holds method oop" %} |
| opcode(0xFF, 0x4); /* Opcode FF /4 */ |
| ins_encode( OpcP, RegOpc(jump_target) ); |
| ins_pipe( pipe_jmp ); |
| %} |
| |
| |
| // Tail Jump; remove the return address; jump to target. |
| // TailCall above leaves the return address around. |
| instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{ |
| match( TailJump jump_target ex_oop ); |
| ins_cost(300); |
| format %{ "POP EDX\t# pop return address into dummy\n\t" |
| "JMP $jump_target " %} |
| opcode(0xFF, 0x4); /* Opcode FF /4 */ |
| ins_encode( enc_pop_rdx, |
| OpcP, RegOpc(jump_target) ); |
| ins_pipe( pipe_jmp ); |
| %} |
| |
| // Create exception oop: created by stack-crawling runtime code. |
| // Created exception is now available to this handler, and is setup |
| // just prior to jumping to this handler. No code emitted. |
| instruct CreateException( eAXRegP ex_oop ) |
| %{ |
| match(Set ex_oop (CreateEx)); |
| |
| size(0); |
| // use the following format syntax |
| format %{ "# exception oop is in EAX; no code emitted" %} |
| ins_encode(); |
| ins_pipe( empty ); |
| %} |
| |
| |
| // Rethrow exception: |
| // The exception oop will come in the first argument position. |
| // Then JUMP (not call) to the rethrow stub code. |
| instruct RethrowException() |
| %{ |
| match(Rethrow); |
| |
| // use the following format syntax |
| format %{ "JMP rethrow_stub" %} |
| ins_encode(enc_rethrow); |
| ins_pipe( pipe_jmp ); |
| %} |
| |
| // inlined locking and unlocking |
| |
| instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{ |
| predicate(Compile::current()->use_rtm()); |
| match(Set cr (FastLock object box)); |
| effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box); |
| ins_cost(300); |
| format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %} |
| ins_encode %{ |
| __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, |
| $scr$$Register, $cx1$$Register, $cx2$$Register, |
| _counters, _rtm_counters, _stack_rtm_counters, |
| ((Method*)(ra_->C->method()->constant_encoding()))->method_data(), |
| true, ra_->C->profile_rtm()); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{ |
| predicate(!Compile::current()->use_rtm()); |
| match(Set cr (FastLock object box)); |
| effect(TEMP tmp, TEMP scr, USE_KILL box); |
| ins_cost(300); |
| format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %} |
| ins_encode %{ |
| __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, |
| $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{ |
| match(Set cr (FastUnlock object box)); |
| effect(TEMP tmp, USE_KILL box); |
| ins_cost(300); |
| format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %} |
| ins_encode %{ |
| __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm()); |
| %} |
| ins_pipe(pipe_slow); |
| %} |
| |
| |
| |
| // ============================================================================ |
| // Safepoint Instruction |
| instruct safePoint_poll(eFlagsReg cr) %{ |
| match(SafePoint); |
| effect(KILL cr); |
| |
| // TODO-FIXME: we currently poll at offset 0 of the safepoint polling page. |
| // On SPARC that might be acceptable as we can generate the address with |
| // just a sethi, saving an or. By polling at offset 0 we can end up |
| // putting additional pressure on the index-0 in the D$. Because of |
| // alignment (just like the situation at hand) the lower indices tend |
| // to see more traffic. It'd be better to change the polling address |
| // to offset 0 of the last $line in the polling page. |
| |
| format %{ "TSTL #polladdr,EAX\t! Safepoint: poll for GC" %} |
| ins_cost(125); |
| size(6) ; |
| ins_encode( Safepoint_Poll() ); |
| ins_pipe( ialu_reg_mem ); |
| %} |
| |
| |
| // ============================================================================ |
| // This name is KNOWN by the ADLC and cannot be changed. |
| // The ADLC forces a 'TypeRawPtr::BOTTOM' output type |
| // for this guy. |
| instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{ |
| match(Set dst (ThreadLocal)); |
| effect(DEF dst, KILL cr); |
| |
| format %{ "MOV $dst, Thread::current()" %} |
| ins_encode %{ |
| Register dstReg = as_Register($dst$$reg); |
| __ get_thread(dstReg); |
| %} |
| ins_pipe( ialu_reg_fat ); |
| %} |
| |
| |
| |
| //----------PEEPHOLE RULES----------------------------------------------------- |
| // These must follow all instruction definitions as they use the names |
| // defined in the instructions definitions. |
| // |
| // peepmatch ( root_instr_name [preceding_instruction]* ); |
| // |
| // peepconstraint %{ |
| // (instruction_number.operand_name relational_op instruction_number.operand_name |
| // [, ...] ); |
| // // instruction numbers are zero-based using left to right order in peepmatch |
| // |
| // peepreplace ( instr_name ( [instruction_number.operand_name]* ) ); |
| // // provide an instruction_number.operand_name for each operand that appears |
| // // in the replacement instruction's match rule |
| // |
| // ---------VM FLAGS--------------------------------------------------------- |
| // |
| // All peephole optimizations can be turned off using -XX:-OptoPeephole |
| // |
| // Each peephole rule is given an identifying number starting with zero and |
| // increasing by one in the order seen by the parser. An individual peephole |
| // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=# |
| // on the command-line. |
| // |
| // ---------CURRENT LIMITATIONS---------------------------------------------- |
| // |
| // Only match adjacent instructions in same basic block |
| // Only equality constraints |
| // Only constraints between operands, not (0.dest_reg == EAX_enc) |
| // Only one replacement instruction |
| // |
| // ---------EXAMPLE---------------------------------------------------------- |
| // |
| // // pertinent parts of existing instructions in architecture description |
| // instruct movI(rRegI dst, rRegI src) %{ |
| // match(Set dst (CopyI src)); |
| // %} |
| // |
| // instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{ |
| // match(Set dst (AddI dst src)); |
| // effect(KILL cr); |
| // %} |
| // |
| // // Change (inc mov) to lea |
| // peephole %{ |
| // // increment preceeded by register-register move |
| // peepmatch ( incI_eReg movI ); |
| // // require that the destination register of the increment |
| // // match the destination register of the move |
| // peepconstraint ( 0.dst == 1.dst ); |
| // // construct a replacement instruction that sets |
| // // the destination to ( move's source register + one ) |
| // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); |
| // %} |
| // |
| // Implementation no longer uses movX instructions since |
| // machine-independent system no longer uses CopyX nodes. |
| // |
| // peephole %{ |
| // peepmatch ( incI_eReg movI ); |
| // peepconstraint ( 0.dst == 1.dst ); |
| // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); |
| // %} |
| // |
| // peephole %{ |
| // peepmatch ( decI_eReg movI ); |
| // peepconstraint ( 0.dst == 1.dst ); |
| // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); |
| // %} |
| // |
| // peephole %{ |
| // peepmatch ( addI_eReg_imm movI ); |
| // peepconstraint ( 0.dst == 1.dst ); |
| // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); |
| // %} |
| // |
| // peephole %{ |
| // peepmatch ( addP_eReg_imm movP ); |
| // peepconstraint ( 0.dst == 1.dst ); |
| // peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) ); |
| // %} |
| |
| // // Change load of spilled value to only a spill |
| // instruct storeI(memory mem, rRegI src) %{ |
| // match(Set mem (StoreI mem src)); |
| // %} |
| // |
| // instruct loadI(rRegI dst, memory mem) %{ |
| // match(Set dst (LoadI mem)); |
| // %} |
| // |
| peephole %{ |
| peepmatch ( loadI storeI ); |
| peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem ); |
| peepreplace ( storeI( 1.mem 1.mem 1.src ) ); |
| %} |
| |
| //----------SMARTSPILL RULES--------------------------------------------------- |
| // These must follow all instruction definitions as they use the names |
| // defined in the instructions definitions. |