hotspot/src/cpu/ppc/vm/assembler_ppc.cpp - edge/openjdk - Git at Google

 /*
  * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
  * Copyright 2012, 2014 SAP AG. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License version 2 only, as
  * published by the Free Software Foundation.
  *
  * This code is distributed in the hope that it will be useful, but WITHOUT
  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  * version 2 for more details (a copy is included in the LICENSE file that
  * accompanied this code).
  *
  * You should have received a copy of the GNU General Public License version
  * 2 along with this work; if not, write to the Free Software Foundation,
  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  *
  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  * or visit www.oracle.com if you need additional information or have any
  * questions.
  *
  */

 #include "precompiled.hpp"
 #include "asm/assembler.inline.hpp"
 #include "gc_interface/collectedHeap.inline.hpp"
 #include "interpreter/interpreter.hpp"
 #include "memory/cardTableModRefBS.hpp"
 #include "memory/resourceArea.hpp"
 #include "prims/methodHandles.hpp"
 #include "runtime/biasedLocking.hpp"
 #include "runtime/interfaceSupport.hpp"
 #include "runtime/objectMonitor.hpp"
 #include "runtime/os.hpp"
 #include "runtime/sharedRuntime.hpp"
 #include "runtime/stubRoutines.hpp"
 #include "utilities/macros.hpp"
 #if INCLUDE_ALL_GCS
 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
 #include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
 #include "gc_implementation/g1/heapRegion.hpp"
 #endif // INCLUDE_ALL_GCS

 #ifdef PRODUCT
 #define BLOCK_COMMENT(str) // nothing
 #else
 #define BLOCK_COMMENT(str) block_comment(str)
 #endif

 int AbstractAssembler::code_fill_byte() {
   return 0x00;                  // illegal instruction 0x00000000
 }

 void Assembler::print_instruction(int inst) {
   Unimplemented();
 }

 // Patch instruction `inst' at offset `inst_pos' to refer to
 // `dest_pos' and return the resulting instruction.  We should have
 // pcs, not offsets, but since all is relative, it will work out fine.
 int Assembler::patched_branch(int dest_pos, int inst, int inst_pos) {
   int m = 0; // mask for displacement field
   int v = 0; // new value for displacement field

   switch (inv_op_ppc(inst)) {
   case b_op:  m = li(-1); v = li(disp(dest_pos, inst_pos)); break;
   case bc_op: m = bd(-1); v = bd(disp(dest_pos, inst_pos)); break;
     default: ShouldNotReachHere();
   }
   return inst & ~m | v;
 }

 // Return the offset, relative to _code_begin, of the destination of
 // the branch inst at offset pos.
 int Assembler::branch_destination(int inst, int pos) {
   int r = 0;
   switch (inv_op_ppc(inst)) {
     case b_op:  r = bxx_destination_offset(inst, pos); break;
     case bc_op: r = inv_bd_field(inst, pos); break;
     default: ShouldNotReachHere();
   }
   return r;
 }

 // Low-level andi-one-instruction-macro.
 void Assembler::andi(Register a, Register s, const int ui16) {
   assert(is_uimm(ui16, 16), "must be 16-bit unsigned immediate");
   if (is_power_of_2_long(((jlong) ui16)+1)) {
     // pow2minus1
     clrldi(a, s, 64-log2_long((((jlong) ui16)+1)));
   } else if (is_power_of_2_long((jlong) ui16)) {
     // pow2
     rlwinm(a, s, 0, 31-log2_long((jlong) ui16), 31-log2_long((jlong) ui16));
   } else if (is_power_of_2_long((jlong)-ui16)) {
     // negpow2
     clrrdi(a, s, log2_long((jlong)-ui16));
   } else {
     andi_(a, s, ui16);
   }
 }

 // RegisterOrConstant version.
 void Assembler::ld(Register d, RegisterOrConstant roc, Register s1) {
   if (roc.is_constant()) {
     if (s1 == noreg) {
       int simm16_rest = load_const_optimized(d, roc.as_constant(), noreg, true);
       Assembler::ld(d, simm16_rest, d);
     } else if (is_simm(roc.as_constant(), 16)) {
       Assembler::ld(d, roc.as_constant(), s1);
     } else {
       load_const_optimized(d, roc.as_constant());
       Assembler::ldx(d, d, s1);
     }
   } else {
     if (s1 == noreg)
       Assembler::ld(d, 0, roc.as_register());
     else
       Assembler::ldx(d, roc.as_register(), s1);
   }
 }

 void Assembler::lwa(Register d, RegisterOrConstant roc, Register s1) {
   if (roc.is_constant()) {
     if (s1 == noreg) {
       int simm16_rest = load_const_optimized(d, roc.as_constant(), noreg, true);
       Assembler::lwa(d, simm16_rest, d);
     } else if (is_simm(roc.as_constant(), 16)) {
       Assembler::lwa(d, roc.as_constant(), s1);
     } else {
       load_const_optimized(d, roc.as_constant());
       Assembler::lwax(d, d, s1);
     }
   } else {
     if (s1 == noreg)
       Assembler::lwa(d, 0, roc.as_register());
     else
       Assembler::lwax(d, roc.as_register(), s1);
   }
 }

 void Assembler::lwz(Register d, RegisterOrConstant roc, Register s1) {
   if (roc.is_constant()) {
     if (s1 == noreg) {
       int simm16_rest = load_const_optimized(d, roc.as_constant(), noreg, true);
       Assembler::lwz(d, simm16_rest, d);
     } else if (is_simm(roc.as_constant(), 16)) {
       Assembler::lwz(d, roc.as_constant(), s1);
     } else {
       load_const_optimized(d, roc.as_constant());
       Assembler::lwzx(d, d, s1);
     }
   } else {
     if (s1 == noreg)
       Assembler::lwz(d, 0, roc.as_register());
     else
       Assembler::lwzx(d, roc.as_register(), s1);
   }
 }

 void Assembler::lha(Register d, RegisterOrConstant roc, Register s1) {
   if (roc.is_constant()) {
     if (s1 == noreg) {
       int simm16_rest = load_const_optimized(d, roc.as_constant(), noreg, true);
       Assembler::lha(d, simm16_rest, d);
     } else if (is_simm(roc.as_constant(), 16)) {
       Assembler::lha(d, roc.as_constant(), s1);
     } else {
       load_const_optimized(d, roc.as_constant());
       Assembler::lhax(d, d, s1);
     }
   } else {
     if (s1 == noreg)
       Assembler::lha(d, 0, roc.as_register());
     else
       Assembler::lhax(d, roc.as_register(), s1);
   }
 }

 void Assembler::lhz(Register d, RegisterOrConstant roc, Register s1) {
   if (roc.is_constant()) {
     if (s1 == noreg) {
       int simm16_rest = load_const_optimized(d, roc.as_constant(), noreg, true);
       Assembler::lhz(d, simm16_rest, d);
     } else if (is_simm(roc.as_constant(), 16)) {
       Assembler::lhz(d, roc.as_constant(), s1);
     } else {
       load_const_optimized(d, roc.as_constant());
       Assembler::lhzx(d, d, s1);
     }
   } else {
     if (s1 == noreg)
       Assembler::lhz(d, 0, roc.as_register());
     else
       Assembler::lhzx(d, roc.as_register(), s1);
   }
 }

 void Assembler::lbz(Register d, RegisterOrConstant roc, Register s1) {
   if (roc.is_constant()) {
     if (s1 == noreg) {
       int simm16_rest = load_const_optimized(d, roc.as_constant(), noreg, true);
       Assembler::lbz(d, simm16_rest, d);
     } else if (is_simm(roc.as_constant(), 16)) {
       Assembler::lbz(d, roc.as_constant(), s1);
     } else {
       load_const_optimized(d, roc.as_constant());
       Assembler::lbzx(d, d, s1);
     }
   } else {
     if (s1 == noreg)
       Assembler::lbz(d, 0, roc.as_register());
     else
       Assembler::lbzx(d, roc.as_register(), s1);
   }
 }

 void Assembler::std(Register d, RegisterOrConstant roc, Register s1, Register tmp) {
   if (roc.is_constant()) {
     if (s1 == noreg) {
       guarantee(tmp != noreg, "Need tmp reg to encode large constants");
       int simm16_rest = load_const_optimized(tmp, roc.as_constant(), noreg, true);
       Assembler::std(d, simm16_rest, tmp);
     } else if (is_simm(roc.as_constant(), 16)) {
       Assembler::std(d, roc.as_constant(), s1);
     } else {
       guarantee(tmp != noreg, "Need tmp reg to encode large constants");
       load_const_optimized(tmp, roc.as_constant());
       Assembler::stdx(d, tmp, s1);
     }
   } else {
     if (s1 == noreg)
       Assembler::std(d, 0, roc.as_register());
     else
       Assembler::stdx(d, roc.as_register(), s1);
   }
 }

 void Assembler::stw(Register d, RegisterOrConstant roc, Register s1, Register tmp) {
   if (roc.is_constant()) {
     if (s1 == noreg) {
       guarantee(tmp != noreg, "Need tmp reg to encode large constants");
       int simm16_rest = load_const_optimized(tmp, roc.as_constant(), noreg, true);
       Assembler::stw(d, simm16_rest, tmp);
     } else if (is_simm(roc.as_constant(), 16)) {
       Assembler::stw(d, roc.as_constant(), s1);
     } else {
       guarantee(tmp != noreg, "Need tmp reg to encode large constants");
       load_const_optimized(tmp, roc.as_constant());
       Assembler::stwx(d, tmp, s1);
     }
   } else {
     if (s1 == noreg)
       Assembler::stw(d, 0, roc.as_register());
     else
       Assembler::stwx(d, roc.as_register(), s1);
   }
 }

 void Assembler::sth(Register d, RegisterOrConstant roc, Register s1, Register tmp) {
   if (roc.is_constant()) {
     if (s1 == noreg) {
       guarantee(tmp != noreg, "Need tmp reg to encode large constants");
       int simm16_rest = load_const_optimized(tmp, roc.as_constant(), noreg, true);
       Assembler::sth(d, simm16_rest, tmp);
     } else if (is_simm(roc.as_constant(), 16)) {
       Assembler::sth(d, roc.as_constant(), s1);
     } else {
       guarantee(tmp != noreg, "Need tmp reg to encode large constants");
       load_const_optimized(tmp, roc.as_constant());
       Assembler::sthx(d, tmp, s1);
     }
   } else {
     if (s1 == noreg)
       Assembler::sth(d, 0, roc.as_register());
     else
       Assembler::sthx(d, roc.as_register(), s1);
   }
 }

 void Assembler::stb(Register d, RegisterOrConstant roc, Register s1, Register tmp) {
   if (roc.is_constant()) {
     if (s1 == noreg) {
       guarantee(tmp != noreg, "Need tmp reg to encode large constants");
       int simm16_rest = load_const_optimized(tmp, roc.as_constant(), noreg, true);
       Assembler::stb(d, simm16_rest, tmp);
     } else if (is_simm(roc.as_constant(), 16)) {
       Assembler::stb(d, roc.as_constant(), s1);
     } else {
       guarantee(tmp != noreg, "Need tmp reg to encode large constants");
       load_const_optimized(tmp, roc.as_constant());
       Assembler::stbx(d, tmp, s1);
     }
   } else {
     if (s1 == noreg)
       Assembler::stb(d, 0, roc.as_register());
     else
       Assembler::stbx(d, roc.as_register(), s1);
   }
 }

 void Assembler::add(Register d, RegisterOrConstant roc, Register s1) {
   if (roc.is_constant()) {
     intptr_t c = roc.as_constant();
     assert(is_simm(c, 16), "too big");
     addi(d, s1, (int)c);
   }
   else add(d, roc.as_register(), s1);
 }

 void Assembler::subf(Register d, RegisterOrConstant roc, Register s1) {
   if (roc.is_constant()) {
     intptr_t c = roc.as_constant();
     assert(is_simm(-c, 16), "too big");
     addi(d, s1, (int)-c);
   }
   else subf(d, roc.as_register(), s1);
 }

 void Assembler::cmpd(ConditionRegister d, RegisterOrConstant roc, Register s1) {
   if (roc.is_constant()) {
     intptr_t c = roc.as_constant();
     assert(is_simm(c, 16), "too big");
     cmpdi(d, s1, (int)c);
   }
   else cmpd(d, roc.as_register(), s1);
 }

 // Load a 64 bit constant. Patchable.
 void Assembler::load_const(Register d, long x, Register tmp) {
   // 64-bit value: x = xa xb xc xd
   int xa = (x >> 48) & 0xffff;
   int xb = (x >> 32) & 0xffff;
   int xc = (x >> 16) & 0xffff;
   int xd = (x >>  0) & 0xffff;
   if (tmp == noreg) {
     Assembler::lis( d, (int)(short)xa);
     Assembler::ori( d, d, (unsigned int)xb);
     Assembler::sldi(d, d, 32);
     Assembler::oris(d, d, (unsigned int)xc);
     Assembler::ori( d, d, (unsigned int)xd);
   } else {
     // exploit instruction level parallelism if we have a tmp register
     assert_different_registers(d, tmp);
     Assembler::lis(tmp, (int)(short)xa);
     Assembler::lis(d, (int)(short)xc);
     Assembler::ori(tmp, tmp, (unsigned int)xb);
     Assembler::ori(d, d, (unsigned int)xd);
     Assembler::insrdi(d, tmp, 32, 0);
   }
 }

 // Load a 64 bit constant, optimized, not identifyable.
 // Tmp can be used to increase ILP. Set return_simm16_rest=true to get a
 // 16 bit immediate offset.
 int Assembler::load_const_optimized(Register d, long x, Register tmp, bool return_simm16_rest) {
   // Avoid accidentally trying to use R0 for indexed addressing.
   assert(d != R0, "R0 not allowed");
   assert_different_registers(d, tmp);

   short xa, xb, xc, xd; // Four 16-bit chunks of const.
   long rem = x;         // Remaining part of const.

   xd = rem & 0xFFFF;    // Lowest 16-bit chunk.
   rem = (rem >> 16) + ((unsigned short)xd >> 15); // Compensation for sign extend.

   if (rem == 0) { // opt 1: simm16
     li(d, xd);
     return 0;
   }

   xc = rem & 0xFFFF; // Next 16-bit chunk.
   rem = (rem >> 16) + ((unsigned short)xc >> 15); // Compensation for sign extend.

   if (rem == 0) { // opt 2: simm32
     lis(d, xc);
   } else { // High 32 bits needed.

     if (tmp != noreg) { // opt 3: We have a temp reg.
       // No carry propagation between xc and higher chunks here (use logical instructions).
       xa = (x >> 48) & 0xffff;
       xb = (x >> 32) & 0xffff; // No sign compensation, we use lis+ori or li to allow usage of R0.
       bool load_xa = (xa != 0) || (xb < 0);
       bool return_xd = false;

       if (load_xa) { lis(tmp, xa); }
       if (xc) { lis(d, xc); }
       if (load_xa) {
         if (xb) { ori(tmp, tmp, (unsigned short)xb); } // No addi, we support tmp == R0.
       } else {
         li(tmp, xb); // non-negative
       }
       if (xc) {
         if (return_simm16_rest && xd >= 0) { return_xd = true; } // >= 0 to avoid carry propagation after insrdi/rldimi.
         else if (xd) { addi(d, d, xd); }
       } else {
         li(d, xd);
       }
       insrdi(d, tmp, 32, 0);
       return return_xd ? xd : 0; // non-negative
     }

     xb = rem & 0xFFFF; // Next 16-bit chunk.
     rem = (rem >> 16) + ((unsigned short)xb >> 15); // Compensation for sign extend.

     xa = rem & 0xFFFF; // Highest 16-bit chunk.

     // opt 4: avoid adding 0
     if (xa) { // Highest 16-bit needed?
       lis(d, xa);
       if (xb) { addi(d, d, xb); }
     } else {
       li(d, xb);
     }
     sldi(d, d, 32);
     if (xc) { addis(d, d, xc); }
   }

   // opt 5: Return offset to be inserted into following instruction.
   if (return_simm16_rest) return xd;

   if (xd) { addi(d, d, xd); }
   return 0;
 }

 #ifndef PRODUCT
 // Test of ppc assembler.
 void Assembler::test_asm() {
   // PPC 1, section 3.3.8, Fixed-Point Arithmetic Instructions
   addi(   R0,  R1,  10);
   addis(  R5,  R2,  11);
   addic_( R3,  R31, 42);
   subfic( R21, R12, 2112);
   add(    R3,  R2,  R1);
   add_(   R11, R22, R30);
   subf(   R7,  R6,  R5);
   subf_(  R8,  R9,  R4);
   addc(   R11, R12, R13);
   addc_(  R14, R14, R14);
   subfc(  R15, R16, R17);
   subfc_( R18, R20, R19);
   adde(   R20, R22, R24);
   adde_(  R29, R27, R26);
   subfe(  R28, R1,  R0);
   subfe_( R21, R11, R29);
   neg(    R21, R22);
   neg_(   R13, R23);
   mulli(  R0,  R11, -31);
   mulld(  R1,  R18, R21);
   mulld_( R2,  R17, R22);
   mullw(  R3,  R16, R23);
   mullw_( R4,  R15, R24);
   divd(   R5,  R14, R25);
   divd_(  R6,  R13, R26);
   divw(   R7,  R12, R27);
   divw_(  R8,  R11, R28);

   li(     R3, -4711);

   // PPC 1, section 3.3.9, Fixed-Point Compare Instructions
   cmpi(   CCR7,  0, R27, 4711);
   cmp(    CCR0, 1, R14, R11);
   cmpli(  CCR5,  1, R17, 45);
   cmpl(   CCR3, 0, R9,  R10);

   cmpwi(  CCR7,  R27, 4711);
   cmpw(   CCR0, R14, R11);
   cmplwi( CCR5,  R17, 45);
   cmplw(  CCR3, R9,  R10);

   cmpdi(  CCR7,  R27, 4711);
   cmpd(   CCR0, R14, R11);
   cmpldi( CCR5,  R17, 45);
   cmpld(  CCR3, R9,  R10);

   // PPC 1, section 3.3.11, Fixed-Point Logical Instructions
   andi_(  R4,  R5,  0xff);
   andis_( R12, R13, 0x7b51);
   ori(    R1,  R4,  13);
   oris(   R3,  R5,  177);
   xori(   R7,  R6,  51);
   xoris(  R29, R0,  1);
   andr(   R17, R21, R16);
   and_(   R3,  R5,  R15);
   orr(    R2,  R1,  R9);
   or_(    R17, R15, R11);
   xorr(   R19, R18, R10);
   xor_(   R31, R21, R11);
   nand(   R5,  R7,  R3);
   nand_(  R3,  R1,  R0);
   nor(    R2,  R3,  R5);
   nor_(   R3,  R6,  R8);
   andc(   R25, R12, R11);
   andc_(  R24, R22, R21);
   orc(    R20, R10, R12);
   orc_(   R22, R2,  R13);

   nop();

   // PPC 1, section 3.3.12, Fixed-Point Rotate and Shift Instructions
   sld(    R5,  R6,  R8);
   sld_(   R3,  R5,  R9);
   slw(    R2,  R1,  R10);
   slw_(   R6,  R26, R16);
   srd(    R16, R24, R8);
   srd_(   R21, R14, R7);
   srw(    R22, R25, R29);
   srw_(   R5,  R18, R17);
   srad(   R7,  R11, R0);
   srad_(  R9,  R13, R1);
   sraw(   R7,  R15, R2);
   sraw_(  R4,  R17, R3);
   sldi(   R3,  R18, 63);
   sldi_(  R2,  R20, 30);
   slwi(   R1,  R21, 30);
   slwi_(  R7,  R23, 8);
   srdi(   R0,  R19, 2);
   srdi_(  R12, R24, 5);
   srwi(   R13, R27, 6);
   srwi_(  R14, R29, 7);
   sradi(  R15, R30, 9);
   sradi_( R16, R31, 19);
   srawi(  R17, R31, 15);
   srawi_( R18, R31, 12);

   clrrdi( R3, R30, 5);
   clrldi( R9, R10, 11);

   rldicr( R19, R20, 13, 15);
   rldicr_(R20, R20, 16, 14);
   rldicl( R21, R21, 30, 33);
   rldicl_(R22, R1,  20, 25);
   rlwinm( R23, R2,  25, 10, 11);
   rlwinm_(R24, R3,  12, 13, 14);

   // PPC 1, section 3.3.2 Fixed-Point Load Instructions
   lwzx(   R3,  R5, R7);
   lwz(    R11,  0, R1);
   lwzu(   R31, -4, R11);

   lwax(   R3,  R5, R7);
   lwa(    R31, -4, R11);
   lhzx(   R3,  R5, R7);
   lhz(    R31, -4, R11);
   lhzu(   R31, -4, R11);


   lhax(   R3,  R5, R7);
   lha(    R31, -4, R11);
   lhau(   R11,  0, R1);

   lbzx(   R3,  R5, R7);
   lbz(    R31, -4, R11);
   lbzu(   R11,  0, R1);

   ld(     R31, -4, R11);
   ldx(    R3,  R5, R7);
   ldu(    R31, -4, R11);

   //  PPC 1, section 3.3.3 Fixed-Point Store Instructions
   stwx(   R3,  R5, R7);
   stw(    R31, -4, R11);
   stwu(   R11,  0, R1);

   sthx(   R3,  R5, R7 );
   sth(    R31, -4, R11);
   sthu(   R31, -4, R11);

   stbx(   R3,  R5, R7);
   stb(    R31, -4, R11);
   stbu(   R31, -4, R11);

   std(    R31, -4, R11);
   stdx(   R3,  R5, R7);
   stdu(   R31, -4, R11);

  // PPC 1, section 3.3.13 Move To/From System Register Instructions
   mtlr(   R3);
   mflr(   R3);
   mtctr(  R3);
   mfctr(  R3);
   mtcrf(  0xff, R15);
   mtcr(   R15);
   mtcrf(  0x03, R15);
   mtcr(   R15);
   mfcr(   R15);

  // PPC 1, section 2.4.1 Branch Instructions
   Label lbl1, lbl2, lbl3;
   bind(lbl1);

   b(pc());
   b(pc() - 8);
   b(lbl1);
   b(lbl2);
   b(lbl3);

   bl(pc() - 8);
   bl(lbl1);
   bl(lbl2);

   bcl(4, 10, pc() - 8);
   bcl(4, 10, lbl1);
   bcl(4, 10, lbl2);

   bclr( 4, 6, 0);
   bclrl(4, 6, 0);

   bind(lbl2);

   bcctr( 4, 6, 0);
   bcctrl(4, 6, 0);

   blt(CCR0, lbl2);
   bgt(CCR1, lbl2);
   beq(CCR2, lbl2);
   bso(CCR3, lbl2);
   bge(CCR4, lbl2);
   ble(CCR5, lbl2);
   bne(CCR6, lbl2);
   bns(CCR7, lbl2);

   bltl(CCR0, lbl2);
   bgtl(CCR1, lbl2);
   beql(CCR2, lbl2);
   bsol(CCR3, lbl2);
   bgel(CCR4, lbl2);
   blel(CCR5, lbl2);
   bnel(CCR6, lbl2);
   bnsl(CCR7, lbl2);
   blr();

   sync();
   icbi( R1, R2);
   dcbst(R2, R3);

   // FLOATING POINT instructions ppc.
   // PPC 1, section 4.6.2 Floating-Point Load Instructions
   lfs( F1, -11, R3);
   lfsu(F2, 123, R4);
   lfsx(F3, R5,  R6);
   lfd( F4, 456, R7);
   lfdu(F5, 789, R8);
   lfdx(F6, R10, R11);

   // PPC 1, section 4.6.3 Floating-Point Store Instructions
   stfs(  F7,  876, R12);
   stfsu( F8,  543, R13);
   stfsx( F9,  R14, R15);
   stfd(  F10, 210, R16);
   stfdu( F11, 111, R17);
   stfdx( F12, R18, R19);

   // PPC 1, section 4.6.4 Floating-Point Move Instructions
   fmr(   F13, F14);
   fmr_(  F14, F15);
   fneg(  F16, F17);
   fneg_( F18, F19);
   fabs(  F20, F21);
   fabs_( F22, F23);
   fnabs( F24, F25);
   fnabs_(F26, F27);

   // PPC 1, section 4.6.5.1 Floating-Point Elementary Arithmetic
   // Instructions
   fadd(  F28, F29, F30);
   fadd_( F31, F0,  F1);
   fadds( F2,  F3,  F4);
   fadds_(F5,  F6,  F7);
   fsub(  F8,  F9,  F10);
   fsub_( F11, F12, F13);
   fsubs( F14, F15, F16);
   fsubs_(F17, F18, F19);
   fmul(  F20, F21, F22);
   fmul_( F23, F24, F25);
   fmuls( F26, F27, F28);
   fmuls_(F29, F30, F31);
   fdiv(  F0,  F1,  F2);
   fdiv_( F3,  F4,  F5);
   fdivs( F6,  F7,  F8);
   fdivs_(F9,  F10, F11);

   // PPC 1, section 4.6.6 Floating-Point Rounding and Conversion
   // Instructions
   frsp(  F12, F13);
   fctid( F14, F15);
   fctidz(F16, F17);
   fctiw( F18, F19);
   fctiwz(F20, F21);
   fcfid( F22, F23);

   // PPC 1, section 4.6.7 Floating-Point Compare Instructions
   fcmpu( CCR7, F24, F25);

   tty->print_cr("\ntest_asm disassembly (0x%lx 0x%lx):", p2i(code()->insts_begin()), p2i(code()->insts_end()));
   code()->decode();
 }

 #endif // !PRODUCT
	/*
	* Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
	* Copyright 2012, 2014 SAP AG. All rights reserved.
	* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
	*
	* This code is free software; you can redistribute it and/or modify it
	* under the terms of the GNU General Public License version 2 only, as
	* published by the Free Software Foundation.
	*
	* This code is distributed in the hope that it will be useful, but WITHOUT
	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
	* version 2 for more details (a copy is included in the LICENSE file that
	* accompanied this code).
	*
	* You should have received a copy of the GNU General Public License version
	* 2 along with this work; if not, write to the Free Software Foundation,
	* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
	*
	* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
	* or visit www.oracle.com if you need additional information or have any
	* questions.
	*
	*/

	#include "precompiled.hpp"
	#include "asm/assembler.inline.hpp"
	#include "gc_interface/collectedHeap.inline.hpp"
	#include "interpreter/interpreter.hpp"
	#include "memory/cardTableModRefBS.hpp"
	#include "memory/resourceArea.hpp"
	#include "prims/methodHandles.hpp"
	#include "runtime/biasedLocking.hpp"
	#include "runtime/interfaceSupport.hpp"
	#include "runtime/objectMonitor.hpp"
	#include "runtime/os.hpp"
	#include "runtime/sharedRuntime.hpp"
	#include "runtime/stubRoutines.hpp"
	#include "utilities/macros.hpp"
	#if INCLUDE_ALL_GCS
	#include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
	#include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
	#include "gc_implementation/g1/heapRegion.hpp"
	#endif // INCLUDE_ALL_GCS

	#ifdef PRODUCT
	#define BLOCK_COMMENT(str) // nothing
	#else
	#define BLOCK_COMMENT(str) block_comment(str)
	#endif

	int AbstractAssembler::code_fill_byte() {
	return 0x00; // illegal instruction 0x00000000
	}

	void Assembler::print_instruction(int inst) {
	Unimplemented();
	}

	// Patch instruction `inst' at offset `inst_pos' to refer to
	// `dest_pos' and return the resulting instruction. We should have
	// pcs, not offsets, but since all is relative, it will work out fine.
	int Assembler::patched_branch(int dest_pos, int inst, int inst_pos) {
	int m = 0; // mask for displacement field
	int v = 0; // new value for displacement field

	switch (inv_op_ppc(inst)) {
	case b_op: m = li(-1); v = li(disp(dest_pos, inst_pos)); break;
	case bc_op: m = bd(-1); v = bd(disp(dest_pos, inst_pos)); break;
	default: ShouldNotReachHere();
	}
	return inst & ~m \| v;
	}

	// Return the offset, relative to _code_begin, of the destination of
	// the branch inst at offset pos.
	int Assembler::branch_destination(int inst, int pos) {
	int r = 0;
	switch (inv_op_ppc(inst)) {
	case b_op: r = bxx_destination_offset(inst, pos); break;
	case bc_op: r = inv_bd_field(inst, pos); break;
	default: ShouldNotReachHere();
	}
	return r;
	}

	// Low-level andi-one-instruction-macro.
	void Assembler::andi(Register a, Register s, const int ui16) {
	assert(is_uimm(ui16, 16), "must be 16-bit unsigned immediate");
	if (is_power_of_2_long(((jlong) ui16)+1)) {
	// pow2minus1
	clrldi(a, s, 64-log2_long((((jlong) ui16)+1)));
	} else if (is_power_of_2_long((jlong) ui16)) {
	// pow2
	rlwinm(a, s, 0, 31-log2_long((jlong) ui16), 31-log2_long((jlong) ui16));
	} else if (is_power_of_2_long((jlong)-ui16)) {
	// negpow2
	clrrdi(a, s, log2_long((jlong)-ui16));
	} else {
	andi_(a, s, ui16);
	}
	}

	// RegisterOrConstant version.
	void Assembler::ld(Register d, RegisterOrConstant roc, Register s1) {
	if (roc.is_constant()) {
	if (s1 == noreg) {
	int simm16_rest = load_const_optimized(d, roc.as_constant(), noreg, true);
	Assembler::ld(d, simm16_rest, d);
	} else if (is_simm(roc.as_constant(), 16)) {
	Assembler::ld(d, roc.as_constant(), s1);
	} else {
	load_const_optimized(d, roc.as_constant());
	Assembler::ldx(d, d, s1);
	}
	} else {
	if (s1 == noreg)
	Assembler::ld(d, 0, roc.as_register());
	else
	Assembler::ldx(d, roc.as_register(), s1);
	}
	}

	void Assembler::lwa(Register d, RegisterOrConstant roc, Register s1) {
	if (roc.is_constant()) {
	if (s1 == noreg) {
	int simm16_rest = load_const_optimized(d, roc.as_constant(), noreg, true);
	Assembler::lwa(d, simm16_rest, d);
	} else if (is_simm(roc.as_constant(), 16)) {
	Assembler::lwa(d, roc.as_constant(), s1);
	} else {
	load_const_optimized(d, roc.as_constant());
	Assembler::lwax(d, d, s1);
	}
	} else {
	if (s1 == noreg)
	Assembler::lwa(d, 0, roc.as_register());
	else
	Assembler::lwax(d, roc.as_register(), s1);
	}
	}

	void Assembler::lwz(Register d, RegisterOrConstant roc, Register s1) {
	if (roc.is_constant()) {
	if (s1 == noreg) {
	int simm16_rest = load_const_optimized(d, roc.as_constant(), noreg, true);
	Assembler::lwz(d, simm16_rest, d);
	} else if (is_simm(roc.as_constant(), 16)) {
	Assembler::lwz(d, roc.as_constant(), s1);
	} else {
	load_const_optimized(d, roc.as_constant());
	Assembler::lwzx(d, d, s1);
	}
	} else {
	if (s1 == noreg)
	Assembler::lwz(d, 0, roc.as_register());
	else
	Assembler::lwzx(d, roc.as_register(), s1);
	}
	}

	void Assembler::lha(Register d, RegisterOrConstant roc, Register s1) {
	if (roc.is_constant()) {
	if (s1 == noreg) {
	int simm16_rest = load_const_optimized(d, roc.as_constant(), noreg, true);
	Assembler::lha(d, simm16_rest, d);
	} else if (is_simm(roc.as_constant(), 16)) {
	Assembler::lha(d, roc.as_constant(), s1);
	} else {
	load_const_optimized(d, roc.as_constant());
	Assembler::lhax(d, d, s1);
	}
	} else {
	if (s1 == noreg)
	Assembler::lha(d, 0, roc.as_register());
	else
	Assembler::lhax(d, roc.as_register(), s1);
	}
	}

	void Assembler::lhz(Register d, RegisterOrConstant roc, Register s1) {
	if (roc.is_constant()) {
	if (s1 == noreg) {
	int simm16_rest = load_const_optimized(d, roc.as_constant(), noreg, true);
	Assembler::lhz(d, simm16_rest, d);
	} else if (is_simm(roc.as_constant(), 16)) {
	Assembler::lhz(d, roc.as_constant(), s1);
	} else {
	load_const_optimized(d, roc.as_constant());
	Assembler::lhzx(d, d, s1);
	}
	} else {
	if (s1 == noreg)
	Assembler::lhz(d, 0, roc.as_register());
	else
	Assembler::lhzx(d, roc.as_register(), s1);
	}
	}

	void Assembler::lbz(Register d, RegisterOrConstant roc, Register s1) {
	if (roc.is_constant()) {
	if (s1 == noreg) {
	int simm16_rest = load_const_optimized(d, roc.as_constant(), noreg, true);
	Assembler::lbz(d, simm16_rest, d);
	} else if (is_simm(roc.as_constant(), 16)) {
	Assembler::lbz(d, roc.as_constant(), s1);
	} else {
	load_const_optimized(d, roc.as_constant());
	Assembler::lbzx(d, d, s1);
	}
	} else {
	if (s1 == noreg)
	Assembler::lbz(d, 0, roc.as_register());
	else
	Assembler::lbzx(d, roc.as_register(), s1);
	}
	}

	void Assembler::std(Register d, RegisterOrConstant roc, Register s1, Register tmp) {
	if (roc.is_constant()) {
	if (s1 == noreg) {
	guarantee(tmp != noreg, "Need tmp reg to encode large constants");
	int simm16_rest = load_const_optimized(tmp, roc.as_constant(), noreg, true);
	Assembler::std(d, simm16_rest, tmp);
	} else if (is_simm(roc.as_constant(), 16)) {
	Assembler::std(d, roc.as_constant(), s1);
	} else {
	guarantee(tmp != noreg, "Need tmp reg to encode large constants");
	load_const_optimized(tmp, roc.as_constant());
	Assembler::stdx(d, tmp, s1);
	}
	} else {
	if (s1 == noreg)
	Assembler::std(d, 0, roc.as_register());
	else
	Assembler::stdx(d, roc.as_register(), s1);
	}
	}

	void Assembler::stw(Register d, RegisterOrConstant roc, Register s1, Register tmp) {
	if (roc.is_constant()) {
	if (s1 == noreg) {
	guarantee(tmp != noreg, "Need tmp reg to encode large constants");
	int simm16_rest = load_const_optimized(tmp, roc.as_constant(), noreg, true);
	Assembler::stw(d, simm16_rest, tmp);
	} else if (is_simm(roc.as_constant(), 16)) {
	Assembler::stw(d, roc.as_constant(), s1);
	} else {
	guarantee(tmp != noreg, "Need tmp reg to encode large constants");
	load_const_optimized(tmp, roc.as_constant());
	Assembler::stwx(d, tmp, s1);
	}
	} else {
	if (s1 == noreg)
	Assembler::stw(d, 0, roc.as_register());
	else
	Assembler::stwx(d, roc.as_register(), s1);
	}
	}

	void Assembler::sth(Register d, RegisterOrConstant roc, Register s1, Register tmp) {
	if (roc.is_constant()) {
	if (s1 == noreg) {
	guarantee(tmp != noreg, "Need tmp reg to encode large constants");
	int simm16_rest = load_const_optimized(tmp, roc.as_constant(), noreg, true);
	Assembler::sth(d, simm16_rest, tmp);
	} else if (is_simm(roc.as_constant(), 16)) {
	Assembler::sth(d, roc.as_constant(), s1);
	} else {
	guarantee(tmp != noreg, "Need tmp reg to encode large constants");
	load_const_optimized(tmp, roc.as_constant());
	Assembler::sthx(d, tmp, s1);
	}
	} else {
	if (s1 == noreg)
	Assembler::sth(d, 0, roc.as_register());
	else
	Assembler::sthx(d, roc.as_register(), s1);
	}
	}

	void Assembler::stb(Register d, RegisterOrConstant roc, Register s1, Register tmp) {
	if (roc.is_constant()) {
	if (s1 == noreg) {
	guarantee(tmp != noreg, "Need tmp reg to encode large constants");
	int simm16_rest = load_const_optimized(tmp, roc.as_constant(), noreg, true);
	Assembler::stb(d, simm16_rest, tmp);
	} else if (is_simm(roc.as_constant(), 16)) {
	Assembler::stb(d, roc.as_constant(), s1);
	} else {
	guarantee(tmp != noreg, "Need tmp reg to encode large constants");
	load_const_optimized(tmp, roc.as_constant());
	Assembler::stbx(d, tmp, s1);
	}
	} else {
	if (s1 == noreg)
	Assembler::stb(d, 0, roc.as_register());
	else
	Assembler::stbx(d, roc.as_register(), s1);
	}
	}

	void Assembler::add(Register d, RegisterOrConstant roc, Register s1) {
	if (roc.is_constant()) {
	intptr_t c = roc.as_constant();
	assert(is_simm(c, 16), "too big");
	addi(d, s1, (int)c);
	}
	else add(d, roc.as_register(), s1);
	}

	void Assembler::subf(Register d, RegisterOrConstant roc, Register s1) {
	if (roc.is_constant()) {
	intptr_t c = roc.as_constant();
	assert(is_simm(-c, 16), "too big");
	addi(d, s1, (int)-c);
	}
	else subf(d, roc.as_register(), s1);
	}

	void Assembler::cmpd(ConditionRegister d, RegisterOrConstant roc, Register s1) {
	if (roc.is_constant()) {
	intptr_t c = roc.as_constant();
	assert(is_simm(c, 16), "too big");
	cmpdi(d, s1, (int)c);
	}
	else cmpd(d, roc.as_register(), s1);
	}

	// Load a 64 bit constant. Patchable.
	void Assembler::load_const(Register d, long x, Register tmp) {
	// 64-bit value: x = xa xb xc xd
	int xa = (x >> 48) & 0xffff;
	int xb = (x >> 32) & 0xffff;
	int xc = (x >> 16) & 0xffff;
	int xd = (x >> 0) & 0xffff;
	if (tmp == noreg) {
	Assembler::lis( d, (int)(short)xa);
	Assembler::ori( d, d, (unsigned int)xb);
	Assembler::sldi(d, d, 32);
	Assembler::oris(d, d, (unsigned int)xc);
	Assembler::ori( d, d, (unsigned int)xd);
	} else {
	// exploit instruction level parallelism if we have a tmp register
	assert_different_registers(d, tmp);
	Assembler::lis(tmp, (int)(short)xa);
	Assembler::lis(d, (int)(short)xc);
	Assembler::ori(tmp, tmp, (unsigned int)xb);
	Assembler::ori(d, d, (unsigned int)xd);
	Assembler::insrdi(d, tmp, 32, 0);
	}
	}

	// Load a 64 bit constant, optimized, not identifyable.
	// Tmp can be used to increase ILP. Set return_simm16_rest=true to get a
	// 16 bit immediate offset.
	int Assembler::load_const_optimized(Register d, long x, Register tmp, bool return_simm16_rest) {
	// Avoid accidentally trying to use R0 for indexed addressing.
	assert(d != R0, "R0 not allowed");
	assert_different_registers(d, tmp);

	short xa, xb, xc, xd; // Four 16-bit chunks of const.
	long rem = x; // Remaining part of const.

	xd = rem & 0xFFFF; // Lowest 16-bit chunk.
	rem = (rem >> 16) + ((unsigned short)xd >> 15); // Compensation for sign extend.

	if (rem == 0) { // opt 1: simm16
	li(d, xd);
	return 0;
	}

	xc = rem & 0xFFFF; // Next 16-bit chunk.
	rem = (rem >> 16) + ((unsigned short)xc >> 15); // Compensation for sign extend.

	if (rem == 0) { // opt 2: simm32
	lis(d, xc);
	} else { // High 32 bits needed.

	if (tmp != noreg) { // opt 3: We have a temp reg.
	// No carry propagation between xc and higher chunks here (use logical instructions).
	xa = (x >> 48) & 0xffff;
	xb = (x >> 32) & 0xffff; // No sign compensation, we use lis+ori or li to allow usage of R0.
	bool load_xa = (xa != 0) \|\| (xb < 0);
	bool return_xd = false;

	if (load_xa) { lis(tmp, xa); }
	if (xc) { lis(d, xc); }
	if (load_xa) {
	if (xb) { ori(tmp, tmp, (unsigned short)xb); } // No addi, we support tmp == R0.
	} else {
	li(tmp, xb); // non-negative
	}
	if (xc) {
	if (return_simm16_rest && xd >= 0) { return_xd = true; } // >= 0 to avoid carry propagation after insrdi/rldimi.
	else if (xd) { addi(d, d, xd); }
	} else {
	li(d, xd);
	}
	insrdi(d, tmp, 32, 0);
	return return_xd ? xd : 0; // non-negative
	}

	xb = rem & 0xFFFF; // Next 16-bit chunk.
	rem = (rem >> 16) + ((unsigned short)xb >> 15); // Compensation for sign extend.

	xa = rem & 0xFFFF; // Highest 16-bit chunk.

	// opt 4: avoid adding 0
	if (xa) { // Highest 16-bit needed?
	lis(d, xa);
	if (xb) { addi(d, d, xb); }
	} else {
	li(d, xb);
	}
	sldi(d, d, 32);
	if (xc) { addis(d, d, xc); }
	}

	// opt 5: Return offset to be inserted into following instruction.
	if (return_simm16_rest) return xd;

	if (xd) { addi(d, d, xd); }
	return 0;
	}

	#ifndef PRODUCT
	// Test of ppc assembler.
	void Assembler::test_asm() {
	// PPC 1, section 3.3.8, Fixed-Point Arithmetic Instructions
	addi( R0, R1, 10);
	addis( R5, R2, 11);
	addic_( R3, R31, 42);
	subfic( R21, R12, 2112);
	add( R3, R2, R1);
	add_( R11, R22, R30);
	subf( R7, R6, R5);
	subf_( R8, R9, R4);
	addc( R11, R12, R13);
	addc_( R14, R14, R14);
	subfc( R15, R16, R17);
	subfc_( R18, R20, R19);
	adde( R20, R22, R24);
	adde_( R29, R27, R26);
	subfe( R28, R1, R0);
	subfe_( R21, R11, R29);
	neg( R21, R22);
	neg_( R13, R23);
	mulli( R0, R11, -31);
	mulld( R1, R18, R21);
	mulld_( R2, R17, R22);
	mullw( R3, R16, R23);
	mullw_( R4, R15, R24);
	divd( R5, R14, R25);
	divd_( R6, R13, R26);
	divw( R7, R12, R27);
	divw_( R8, R11, R28);

	li( R3, -4711);

	// PPC 1, section 3.3.9, Fixed-Point Compare Instructions
	cmpi( CCR7, 0, R27, 4711);
	cmp( CCR0, 1, R14, R11);
	cmpli( CCR5, 1, R17, 45);
	cmpl( CCR3, 0, R9, R10);

	cmpwi( CCR7, R27, 4711);
	cmpw( CCR0, R14, R11);
	cmplwi( CCR5, R17, 45);
	cmplw( CCR3, R9, R10);

	cmpdi( CCR7, R27, 4711);
	cmpd( CCR0, R14, R11);
	cmpldi( CCR5, R17, 45);
	cmpld( CCR3, R9, R10);

	// PPC 1, section 3.3.11, Fixed-Point Logical Instructions
	andi_( R4, R5, 0xff);
	andis_( R12, R13, 0x7b51);
	ori( R1, R4, 13);
	oris( R3, R5, 177);
	xori( R7, R6, 51);
	xoris( R29, R0, 1);
	andr( R17, R21, R16);
	and_( R3, R5, R15);
	orr( R2, R1, R9);
	or_( R17, R15, R11);
	xorr( R19, R18, R10);
	xor_( R31, R21, R11);
	nand( R5, R7, R3);
	nand_( R3, R1, R0);
	nor( R2, R3, R5);
	nor_( R3, R6, R8);
	andc( R25, R12, R11);
	andc_( R24, R22, R21);
	orc( R20, R10, R12);
	orc_( R22, R2, R13);

	nop();

	// PPC 1, section 3.3.12, Fixed-Point Rotate and Shift Instructions
	sld( R5, R6, R8);
	sld_( R3, R5, R9);
	slw( R2, R1, R10);
	slw_( R6, R26, R16);
	srd( R16, R24, R8);
	srd_( R21, R14, R7);
	srw( R22, R25, R29);
	srw_( R5, R18, R17);
	srad( R7, R11, R0);
	srad_( R9, R13, R1);
	sraw( R7, R15, R2);
	sraw_( R4, R17, R3);
	sldi( R3, R18, 63);
	sldi_( R2, R20, 30);
	slwi( R1, R21, 30);
	slwi_( R7, R23, 8);
	srdi( R0, R19, 2);
	srdi_( R12, R24, 5);
	srwi( R13, R27, 6);
	srwi_( R14, R29, 7);
	sradi( R15, R30, 9);
	sradi_( R16, R31, 19);
	srawi( R17, R31, 15);
	srawi_( R18, R31, 12);

	clrrdi( R3, R30, 5);
	clrldi( R9, R10, 11);

	rldicr( R19, R20, 13, 15);
	rldicr_(R20, R20, 16, 14);
	rldicl( R21, R21, 30, 33);
	rldicl_(R22, R1, 20, 25);
	rlwinm( R23, R2, 25, 10, 11);
	rlwinm_(R24, R3, 12, 13, 14);

	// PPC 1, section 3.3.2 Fixed-Point Load Instructions
	lwzx( R3, R5, R7);
	lwz( R11, 0, R1);
	lwzu( R31, -4, R11);

	lwax( R3, R5, R7);
	lwa( R31, -4, R11);
	lhzx( R3, R5, R7);
	lhz( R31, -4, R11);
	lhzu( R31, -4, R11);


	lhax( R3, R5, R7);
	lha( R31, -4, R11);
	lhau( R11, 0, R1);

	lbzx( R3, R5, R7);
	lbz( R31, -4, R11);
	lbzu( R11, 0, R1);

	ld( R31, -4, R11);
	ldx( R3, R5, R7);
	ldu( R31, -4, R11);

	// PPC 1, section 3.3.3 Fixed-Point Store Instructions
	stwx( R3, R5, R7);
	stw( R31, -4, R11);
	stwu( R11, 0, R1);

	sthx( R3, R5, R7 );
	sth( R31, -4, R11);
	sthu( R31, -4, R11);

	stbx( R3, R5, R7);
	stb( R31, -4, R11);
	stbu( R31, -4, R11);

	std( R31, -4, R11);
	stdx( R3, R5, R7);
	stdu( R31, -4, R11);

	// PPC 1, section 3.3.13 Move To/From System Register Instructions
	mtlr( R3);
	mflr( R3);
	mtctr( R3);
	mfctr( R3);
	mtcrf( 0xff, R15);
	mtcr( R15);
	mtcrf( 0x03, R15);
	mtcr( R15);
	mfcr( R15);

	// PPC 1, section 2.4.1 Branch Instructions
	Label lbl1, lbl2, lbl3;
	bind(lbl1);

	b(pc());
	b(pc() - 8);
	b(lbl1);
	b(lbl2);
	b(lbl3);

	bl(pc() - 8);
	bl(lbl1);
	bl(lbl2);

	bcl(4, 10, pc() - 8);
	bcl(4, 10, lbl1);
	bcl(4, 10, lbl2);

	bclr( 4, 6, 0);
	bclrl(4, 6, 0);

	bind(lbl2);

	bcctr( 4, 6, 0);
	bcctrl(4, 6, 0);

	blt(CCR0, lbl2);
	bgt(CCR1, lbl2);
	beq(CCR2, lbl2);
	bso(CCR3, lbl2);
	bge(CCR4, lbl2);
	ble(CCR5, lbl2);
	bne(CCR6, lbl2);
	bns(CCR7, lbl2);

	bltl(CCR0, lbl2);
	bgtl(CCR1, lbl2);
	beql(CCR2, lbl2);
	bsol(CCR3, lbl2);
	bgel(CCR4, lbl2);
	blel(CCR5, lbl2);
	bnel(CCR6, lbl2);
	bnsl(CCR7, lbl2);
	blr();

	sync();
	icbi( R1, R2);
	dcbst(R2, R3);

	// FLOATING POINT instructions ppc.
	// PPC 1, section 4.6.2 Floating-Point Load Instructions
	lfs( F1, -11, R3);
	lfsu(F2, 123, R4);
	lfsx(F3, R5, R6);
	lfd( F4, 456, R7);
	lfdu(F5, 789, R8);
	lfdx(F6, R10, R11);

	// PPC 1, section 4.6.3 Floating-Point Store Instructions
	stfs( F7, 876, R12);
	stfsu( F8, 543, R13);
	stfsx( F9, R14, R15);
	stfd( F10, 210, R16);
	stfdu( F11, 111, R17);
	stfdx( F12, R18, R19);

	// PPC 1, section 4.6.4 Floating-Point Move Instructions
	fmr( F13, F14);
	fmr_( F14, F15);
	fneg( F16, F17);
	fneg_( F18, F19);
	fabs( F20, F21);
	fabs_( F22, F23);
	fnabs( F24, F25);
	fnabs_(F26, F27);

	// PPC 1, section 4.6.5.1 Floating-Point Elementary Arithmetic
	// Instructions
	fadd( F28, F29, F30);
	fadd_( F31, F0, F1);
	fadds( F2, F3, F4);
	fadds_(F5, F6, F7);
	fsub( F8, F9, F10);
	fsub_( F11, F12, F13);
	fsubs( F14, F15, F16);
	fsubs_(F17, F18, F19);
	fmul( F20, F21, F22);
	fmul_( F23, F24, F25);
	fmuls( F26, F27, F28);
	fmuls_(F29, F30, F31);
	fdiv( F0, F1, F2);
	fdiv_( F3, F4, F5);
	fdivs( F6, F7, F8);
	fdivs_(F9, F10, F11);

	// PPC 1, section 4.6.6 Floating-Point Rounding and Conversion
	// Instructions
	frsp( F12, F13);
	fctid( F14, F15);
	fctidz(F16, F17);
	fctiw( F18, F19);
	fctiwz(F20, F21);
	fcfid( F22, F23);

	// PPC 1, section 4.6.7 Floating-Point Compare Instructions
	fcmpu( CCR7, F24, F25);

	tty->print_cr("\ntest_asm disassembly (0x%lx 0x%lx):", p2i(code()->insts_begin()), p2i(code()->insts_end()));
	code()->decode();
	}

	#endif // !PRODUCT