| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224 |
- //
- // Generated by NVIDIA NVVM Compiler
- //
- // Compiler Build ID: UNKNOWN
- // Unknown Toolkit Version
- // Based on NVVM 7.0.1
- //
- .version 8.1
- .target sm_86, texmode_independent
- .address_size 32
- // .globl DynamicKernel_nop_fsub_fsub
- .entry DynamicKernel_nop_fsub_fsub(
- .param .u32 .ptr .global .align 8 DynamicKernel_nop_fsub_fsub_param_0,
- .param .f64 DynamicKernel_nop_fsub_fsub_param_1,
- .param .f64 DynamicKernel_nop_fsub_fsub_param_2,
- .param .f64 DynamicKernel_nop_fsub_fsub_param_3
- )
- {
- .reg .pred %p<61>;
- .reg .b32 %r<14>;
- .reg .f64 %fd<42>;
- .reg .b64 %rd<7>;
- ld.param.u32 %r2, [DynamicKernel_nop_fsub_fsub_param_0];
- ld.param.f64 %fd14, [DynamicKernel_nop_fsub_fsub_param_1];
- ld.param.f64 %fd15, [DynamicKernel_nop_fsub_fsub_param_2];
- ld.param.f64 %fd16, [DynamicKernel_nop_fsub_fsub_param_3];
- mov.b32 %r3, %envreg3;
- mov.u32 %r4, %ctaid.x;
- mov.u32 %r5, %ntid.x;
- mov.u32 %r6, %tid.x;
- add.s32 %r7, %r6, %r3;
- mad.lo.s32 %r1, %r5, %r4, %r7;
- abs.f64 %fd17, %fd16;
- setp.gtu.f64 %p1, %fd17, 0d7FF0000000000000;
- selp.f64 %fd1, 0d0000000000000000, %fd16, %p1;
- abs.f64 %fd18, %fd15;
- setp.gtu.f64 %p2, %fd18, 0d7FF0000000000000;
- selp.f64 %fd2, 0d0000000000000000, %fd15, %p2;
- abs.f64 %fd3, %fd14;
- setp.gtu.f64 %p3, %fd3, 0d7FF0000000000000;
- @%p3 bra $L__BB0_15;
- bra.uni $L__BB0_1;
- $L__BB0_15:
- mov.f64 %fd29, 0d0000000000000000;
- sub.f64 %fd40, %fd29, %fd2;
- bra.uni $L__BB0_16;
- $L__BB0_1:
- setp.lt.f64 %p4, %fd14, 0d0000000000000000;
- setp.lt.f64 %p5, %fd2, 0d0000000000000000;
- and.pred %p6, %p4, %p5;
- @%p6 bra $L__BB0_3;
- setp.leu.f64 %p7, %fd14, 0d0000000000000000;
- setp.leu.f64 %p8, %fd2, 0d0000000000000000;
- or.pred %p9, %p7, %p8;
- @%p9 bra $L__BB0_14;
- $L__BB0_3:
- setp.eq.f64 %p10, %fd2, %fd14;
- mov.f64 %fd40, 0d0000000000000000;
- @%p10 bra $L__BB0_16;
- setp.eq.f64 %p11, %fd14, 0d0000000000000000;
- setp.eq.f64 %p12, %fd2, 0d0000000000000000;
- or.pred %p13, %p11, %p12;
- @%p13 bra $L__BB0_14;
- sub.f64 %fd20, %fd14, %fd2;
- abs.f64 %fd4, %fd20;
- {
- .reg .b32 %temp;
- mov.b64 {%temp, %r8}, %fd4;
- }
- and.b32 %r9, %r8, 2146435072;
- setp.eq.s32 %p14, %r9, 2146435072;
- mul.f64 %fd21, %fd3, 0d3D30000000000000;
- setp.gt.f64 %p15, %fd4, %fd21;
- or.pred %p16, %p15, %p14;
- @%p16 bra $L__BB0_14;
- abs.f64 %fd5, %fd2;
- mul.f64 %fd22, %fd5, 0d3D30000000000000;
- setp.gt.f64 %p17, %fd4, %fd22;
- @%p17 bra $L__BB0_14;
- setp.gtu.f64 %p18, %fd4, 0d433FFFFFFFFFFFFF;
- @%p18 bra $L__BB0_13;
- cvt.rzi.s64.f64 %rd1, %fd4;
- setp.gt.s64 %p19, %rd1, 9007199254740991;
- @%p19 bra $L__BB0_13;
- cvt.rn.f64.s64 %fd23, %rd1;
- setp.ne.f64 %p20, %fd4, %fd23;
- setp.gtu.f64 %p21, %fd3, 0d433FFFFFFFFFFFFF;
- or.pred %p22, %p21, %p20;
- @%p22 bra $L__BB0_13;
- cvt.rzi.s64.f64 %rd2, %fd3;
- setp.gt.s64 %p23, %rd2, 9007199254740991;
- @%p23 bra $L__BB0_13;
- cvt.rn.f64.s64 %fd24, %rd2;
- setp.ne.f64 %p24, %fd3, %fd24;
- setp.gtu.f64 %p25, %fd5, 0d433FFFFFFFFFFFFF;
- or.pred %p26, %p24, %p25;
- @%p26 bra $L__BB0_13;
- cvt.rzi.s64.f64 %rd5, %fd5;
- setp.lt.s64 %p27, %rd5, 9007199254740992;
- cvt.rn.f64.s64 %fd25, %rd5;
- setp.equ.f64 %p28, %fd5, %fd25;
- and.pred %p29, %p27, %p28;
- @%p29 bra $L__BB0_14;
- $L__BB0_13:
- mul.f64 %fd27, %fd3, 0d3CF0000000000000;
- setp.lt.f64 %p30, %fd4, %fd27;
- mul.f64 %fd28, %fd5, 0d3CF0000000000000;
- setp.lt.f64 %p31, %fd4, %fd28;
- and.pred %p32, %p30, %p31;
- @%p32 bra $L__BB0_16;
- $L__BB0_14:
- sub.f64 %fd40, %fd14, %fd2;
- $L__BB0_16:
- setp.lt.f64 %p33, %fd1, 0d0000000000000000;
- setp.lt.f64 %p34, %fd40, 0d0000000000000000;
- and.pred %p35, %p33, %p34;
- @%p35 bra $L__BB0_18;
- setp.leu.f64 %p36, %fd40, 0d0000000000000000;
- setp.leu.f64 %p37, %fd1, 0d0000000000000000;
- or.pred %p38, %p37, %p36;
- @%p38 bra $L__BB0_30;
- $L__BB0_18:
- setp.eq.f64 %p39, %fd40, %fd1;
- mov.f64 %fd41, 0d0000000000000000;
- @%p39 bra $L__BB0_31;
- setp.eq.f64 %p40, %fd40, 0d0000000000000000;
- setp.eq.f64 %p41, %fd1, 0d0000000000000000;
- or.pred %p42, %p41, %p40;
- @%p42 bra $L__BB0_30;
- sub.f64 %fd31, %fd40, %fd1;
- abs.f64 %fd9, %fd31;
- {
- .reg .b32 %temp;
- mov.b64 {%temp, %r10}, %fd9;
- }
- and.b32 %r11, %r10, 2146435072;
- setp.eq.s32 %p43, %r11, 2146435072;
- @%p43 bra $L__BB0_30;
- abs.f64 %fd10, %fd40;
- mul.f64 %fd32, %fd10, 0d3D30000000000000;
- setp.gt.f64 %p44, %fd9, %fd32;
- @%p44 bra $L__BB0_30;
- abs.f64 %fd11, %fd1;
- mul.f64 %fd33, %fd11, 0d3D30000000000000;
- setp.gt.f64 %p45, %fd9, %fd33;
- @%p45 bra $L__BB0_30;
- setp.gtu.f64 %p46, %fd9, 0d433FFFFFFFFFFFFF;
- @%p46 bra $L__BB0_29;
- cvt.rzi.s64.f64 %rd3, %fd9;
- setp.gt.s64 %p47, %rd3, 9007199254740991;
- @%p47 bra $L__BB0_29;
- cvt.rn.f64.s64 %fd34, %rd3;
- setp.ne.f64 %p48, %fd9, %fd34;
- setp.gtu.f64 %p49, %fd10, 0d433FFFFFFFFFFFFF;
- or.pred %p50, %p48, %p49;
- @%p50 bra $L__BB0_29;
- cvt.rzi.s64.f64 %rd4, %fd10;
- setp.gt.s64 %p51, %rd4, 9007199254740991;
- @%p51 bra $L__BB0_29;
- cvt.rn.f64.s64 %fd35, %rd4;
- setp.ne.f64 %p52, %fd10, %fd35;
- setp.gtu.f64 %p53, %fd11, 0d433FFFFFFFFFFFFF;
- or.pred %p54, %p52, %p53;
- @%p54 bra $L__BB0_29;
- cvt.rzi.s64.f64 %rd6, %fd11;
- setp.lt.s64 %p55, %rd6, 9007199254740992;
- cvt.rn.f64.s64 %fd36, %rd6;
- setp.equ.f64 %p56, %fd11, %fd36;
- and.pred %p57, %p55, %p56;
- @%p57 bra $L__BB0_30;
- $L__BB0_29:
- mul.f64 %fd38, %fd10, 0d3CF0000000000000;
- setp.lt.f64 %p58, %fd9, %fd38;
- mul.f64 %fd39, %fd11, 0d3CF0000000000000;
- setp.lt.f64 %p59, %fd9, %fd39;
- and.pred %p60, %p58, %p59;
- @%p60 bra $L__BB0_31;
- $L__BB0_30:
- sub.f64 %fd41, %fd40, %fd1;
- $L__BB0_31:
- shl.b32 %r12, %r1, 3;
- add.s32 %r13, %r2, %r12;
- st.global.f64 [%r13], %fd41;
- ret;
- }
- ��
|