_nop_fsub_fsubcae63ff924034fa148b6b64425ec67-154403DFDAAEC7869A45D983AA0425BC.bin 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224
  1. //
  2. // Generated by NVIDIA NVVM Compiler
  3. //
  4. // Compiler Build ID: UNKNOWN
  5. // Unknown Toolkit Version
  6. // Based on NVVM 7.0.1
  7. //
  8. .version 8.1
  9. .target sm_86, texmode_independent
  10. .address_size 32
  11. // .globl DynamicKernel_nop_fsub_fsub
  12. .entry DynamicKernel_nop_fsub_fsub(
  13. .param .u32 .ptr .global .align 8 DynamicKernel_nop_fsub_fsub_param_0,
  14. .param .f64 DynamicKernel_nop_fsub_fsub_param_1,
  15. .param .f64 DynamicKernel_nop_fsub_fsub_param_2,
  16. .param .f64 DynamicKernel_nop_fsub_fsub_param_3
  17. )
  18. {
  19. .reg .pred %p<61>;
  20. .reg .b32 %r<14>;
  21. .reg .f64 %fd<42>;
  22. .reg .b64 %rd<7>;
  23. ld.param.u32 %r2, [DynamicKernel_nop_fsub_fsub_param_0];
  24. ld.param.f64 %fd14, [DynamicKernel_nop_fsub_fsub_param_1];
  25. ld.param.f64 %fd15, [DynamicKernel_nop_fsub_fsub_param_2];
  26. ld.param.f64 %fd16, [DynamicKernel_nop_fsub_fsub_param_3];
  27. mov.b32 %r3, %envreg3;
  28. mov.u32 %r4, %ctaid.x;
  29. mov.u32 %r5, %ntid.x;
  30. mov.u32 %r6, %tid.x;
  31. add.s32 %r7, %r6, %r3;
  32. mad.lo.s32 %r1, %r5, %r4, %r7;
  33. abs.f64 %fd17, %fd16;
  34. setp.gtu.f64 %p1, %fd17, 0d7FF0000000000000;
  35. selp.f64 %fd1, 0d0000000000000000, %fd16, %p1;
  36. abs.f64 %fd18, %fd15;
  37. setp.gtu.f64 %p2, %fd18, 0d7FF0000000000000;
  38. selp.f64 %fd2, 0d0000000000000000, %fd15, %p2;
  39. abs.f64 %fd3, %fd14;
  40. setp.gtu.f64 %p3, %fd3, 0d7FF0000000000000;
  41. @%p3 bra $L__BB0_15;
  42. bra.uni $L__BB0_1;
  43. $L__BB0_15:
  44. mov.f64 %fd29, 0d0000000000000000;
  45. sub.f64 %fd40, %fd29, %fd2;
  46. bra.uni $L__BB0_16;
  47. $L__BB0_1:
  48. setp.lt.f64 %p4, %fd14, 0d0000000000000000;
  49. setp.lt.f64 %p5, %fd2, 0d0000000000000000;
  50. and.pred %p6, %p4, %p5;
  51. @%p6 bra $L__BB0_3;
  52. setp.leu.f64 %p7, %fd14, 0d0000000000000000;
  53. setp.leu.f64 %p8, %fd2, 0d0000000000000000;
  54. or.pred %p9, %p7, %p8;
  55. @%p9 bra $L__BB0_14;
  56. $L__BB0_3:
  57. setp.eq.f64 %p10, %fd2, %fd14;
  58. mov.f64 %fd40, 0d0000000000000000;
  59. @%p10 bra $L__BB0_16;
  60. setp.eq.f64 %p11, %fd14, 0d0000000000000000;
  61. setp.eq.f64 %p12, %fd2, 0d0000000000000000;
  62. or.pred %p13, %p11, %p12;
  63. @%p13 bra $L__BB0_14;
  64. sub.f64 %fd20, %fd14, %fd2;
  65. abs.f64 %fd4, %fd20;
  66. {
  67. .reg .b32 %temp;
  68. mov.b64 {%temp, %r8}, %fd4;
  69. }
  70. and.b32 %r9, %r8, 2146435072;
  71. setp.eq.s32 %p14, %r9, 2146435072;
  72. mul.f64 %fd21, %fd3, 0d3D30000000000000;
  73. setp.gt.f64 %p15, %fd4, %fd21;
  74. or.pred %p16, %p15, %p14;
  75. @%p16 bra $L__BB0_14;
  76. abs.f64 %fd5, %fd2;
  77. mul.f64 %fd22, %fd5, 0d3D30000000000000;
  78. setp.gt.f64 %p17, %fd4, %fd22;
  79. @%p17 bra $L__BB0_14;
  80. setp.gtu.f64 %p18, %fd4, 0d433FFFFFFFFFFFFF;
  81. @%p18 bra $L__BB0_13;
  82. cvt.rzi.s64.f64 %rd1, %fd4;
  83. setp.gt.s64 %p19, %rd1, 9007199254740991;
  84. @%p19 bra $L__BB0_13;
  85. cvt.rn.f64.s64 %fd23, %rd1;
  86. setp.ne.f64 %p20, %fd4, %fd23;
  87. setp.gtu.f64 %p21, %fd3, 0d433FFFFFFFFFFFFF;
  88. or.pred %p22, %p21, %p20;
  89. @%p22 bra $L__BB0_13;
  90. cvt.rzi.s64.f64 %rd2, %fd3;
  91. setp.gt.s64 %p23, %rd2, 9007199254740991;
  92. @%p23 bra $L__BB0_13;
  93. cvt.rn.f64.s64 %fd24, %rd2;
  94. setp.ne.f64 %p24, %fd3, %fd24;
  95. setp.gtu.f64 %p25, %fd5, 0d433FFFFFFFFFFFFF;
  96. or.pred %p26, %p24, %p25;
  97. @%p26 bra $L__BB0_13;
  98. cvt.rzi.s64.f64 %rd5, %fd5;
  99. setp.lt.s64 %p27, %rd5, 9007199254740992;
  100. cvt.rn.f64.s64 %fd25, %rd5;
  101. setp.equ.f64 %p28, %fd5, %fd25;
  102. and.pred %p29, %p27, %p28;
  103. @%p29 bra $L__BB0_14;
  104. $L__BB0_13:
  105. mul.f64 %fd27, %fd3, 0d3CF0000000000000;
  106. setp.lt.f64 %p30, %fd4, %fd27;
  107. mul.f64 %fd28, %fd5, 0d3CF0000000000000;
  108. setp.lt.f64 %p31, %fd4, %fd28;
  109. and.pred %p32, %p30, %p31;
  110. @%p32 bra $L__BB0_16;
  111. $L__BB0_14:
  112. sub.f64 %fd40, %fd14, %fd2;
  113. $L__BB0_16:
  114. setp.lt.f64 %p33, %fd1, 0d0000000000000000;
  115. setp.lt.f64 %p34, %fd40, 0d0000000000000000;
  116. and.pred %p35, %p33, %p34;
  117. @%p35 bra $L__BB0_18;
  118. setp.leu.f64 %p36, %fd40, 0d0000000000000000;
  119. setp.leu.f64 %p37, %fd1, 0d0000000000000000;
  120. or.pred %p38, %p37, %p36;
  121. @%p38 bra $L__BB0_30;
  122. $L__BB0_18:
  123. setp.eq.f64 %p39, %fd40, %fd1;
  124. mov.f64 %fd41, 0d0000000000000000;
  125. @%p39 bra $L__BB0_31;
  126. setp.eq.f64 %p40, %fd40, 0d0000000000000000;
  127. setp.eq.f64 %p41, %fd1, 0d0000000000000000;
  128. or.pred %p42, %p41, %p40;
  129. @%p42 bra $L__BB0_30;
  130. sub.f64 %fd31, %fd40, %fd1;
  131. abs.f64 %fd9, %fd31;
  132. {
  133. .reg .b32 %temp;
  134. mov.b64 {%temp, %r10}, %fd9;
  135. }
  136. and.b32 %r11, %r10, 2146435072;
  137. setp.eq.s32 %p43, %r11, 2146435072;
  138. @%p43 bra $L__BB0_30;
  139. abs.f64 %fd10, %fd40;
  140. mul.f64 %fd32, %fd10, 0d3D30000000000000;
  141. setp.gt.f64 %p44, %fd9, %fd32;
  142. @%p44 bra $L__BB0_30;
  143. abs.f64 %fd11, %fd1;
  144. mul.f64 %fd33, %fd11, 0d3D30000000000000;
  145. setp.gt.f64 %p45, %fd9, %fd33;
  146. @%p45 bra $L__BB0_30;
  147. setp.gtu.f64 %p46, %fd9, 0d433FFFFFFFFFFFFF;
  148. @%p46 bra $L__BB0_29;
  149. cvt.rzi.s64.f64 %rd3, %fd9;
  150. setp.gt.s64 %p47, %rd3, 9007199254740991;
  151. @%p47 bra $L__BB0_29;
  152. cvt.rn.f64.s64 %fd34, %rd3;
  153. setp.ne.f64 %p48, %fd9, %fd34;
  154. setp.gtu.f64 %p49, %fd10, 0d433FFFFFFFFFFFFF;
  155. or.pred %p50, %p48, %p49;
  156. @%p50 bra $L__BB0_29;
  157. cvt.rzi.s64.f64 %rd4, %fd10;
  158. setp.gt.s64 %p51, %rd4, 9007199254740991;
  159. @%p51 bra $L__BB0_29;
  160. cvt.rn.f64.s64 %fd35, %rd4;
  161. setp.ne.f64 %p52, %fd10, %fd35;
  162. setp.gtu.f64 %p53, %fd11, 0d433FFFFFFFFFFFFF;
  163. or.pred %p54, %p52, %p53;
  164. @%p54 bra $L__BB0_29;
  165. cvt.rzi.s64.f64 %rd6, %fd11;
  166. setp.lt.s64 %p55, %rd6, 9007199254740992;
  167. cvt.rn.f64.s64 %fd36, %rd6;
  168. setp.equ.f64 %p56, %fd11, %fd36;
  169. and.pred %p57, %p55, %p56;
  170. @%p57 bra $L__BB0_30;
  171. $L__BB0_29:
  172. mul.f64 %fd38, %fd10, 0d3CF0000000000000;
  173. setp.lt.f64 %p58, %fd9, %fd38;
  174. mul.f64 %fd39, %fd11, 0d3CF0000000000000;
  175. setp.lt.f64 %p59, %fd9, %fd39;
  176. and.pred %p60, %p58, %p59;
  177. @%p60 bra $L__BB0_31;
  178. $L__BB0_30:
  179. sub.f64 %fd41, %fd40, %fd1;
  180. $L__BB0_31:
  181. shl.b32 %r12, %r1, 3;
  182. add.s32 %r13, %r2, %r12;
  183. st.global.f64 [%r13], %fd41;
  184. ret;
  185. }
  186. ��