Add the rt linux 4.1.3-rt3 as base
[kvmfornfv.git] / kernel / drivers / gpu / drm / nouveau / nvkm / engine / sec / fuc / g98.fuc0s
diff --git a/kernel/drivers/gpu/drm/nouveau/nvkm/engine/sec/fuc/g98.fuc0s b/kernel/drivers/gpu/drm/nouveau/nvkm/engine/sec/fuc/g98.fuc0s
new file mode 100644 (file)
index 0000000..06ee060
--- /dev/null
@@ -0,0 +1,698 @@
+/*
+ *  fuc microcode for g98 psec engine
+ *  Copyright (C) 2010  Marcin Koƛcielnicki
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+.section #g98_psec_data
+
+ctx_dma:
+ctx_dma_query:         .b32 0
+ctx_dma_src:           .b32 0
+ctx_dma_dst:           .b32 0
+.equ #dma_count 3
+ctx_query_address_high:        .b32 0
+ctx_query_address_low: .b32 0
+ctx_query_counter:     .b32 0
+ctx_cond_address_high: .b32 0
+ctx_cond_address_low:  .b32 0
+ctx_cond_off:          .b32 0
+ctx_src_address_high:  .b32 0
+ctx_src_address_low:   .b32 0
+ctx_dst_address_high:  .b32 0
+ctx_dst_address_low:   .b32 0
+ctx_mode:              .b32 0
+.align 16
+ctx_key:               .skip 16
+ctx_iv:                        .skip 16
+
+.align 0x80
+swap:
+.skip 32
+
+.align 8
+common_cmd_dtable:
+.b32 #ctx_query_address_high + 0x20000 ~0xff
+.b32 #ctx_query_address_low + 0x20000 ~0xfffffff0
+.b32 #ctx_query_counter + 0x20000 ~0xffffffff
+.b32 #cmd_query_get + 0x00000 ~1
+.b32 #ctx_cond_address_high + 0x20000 ~0xff
+.b32 #ctx_cond_address_low + 0x20000 ~0xfffffff0
+.b32 #cmd_cond_mode + 0x00000 ~7
+.b32 #cmd_wrcache_flush + 0x00000 ~0
+.equ #common_cmd_max 0x88
+
+
+.align 8
+engine_cmd_dtable:
+.b32 #ctx_key + 0x0 + 0x20000 ~0xffffffff
+.b32 #ctx_key + 0x4 + 0x20000 ~0xffffffff
+.b32 #ctx_key + 0x8 + 0x20000 ~0xffffffff
+.b32 #ctx_key + 0xc + 0x20000 ~0xffffffff
+.b32 #ctx_iv + 0x0 + 0x20000 ~0xffffffff
+.b32 #ctx_iv + 0x4 + 0x20000 ~0xffffffff
+.b32 #ctx_iv + 0x8 + 0x20000 ~0xffffffff
+.b32 #ctx_iv + 0xc + 0x20000 ~0xffffffff
+.b32 #ctx_src_address_high + 0x20000 ~0xff
+.b32 #ctx_src_address_low + 0x20000 ~0xfffffff0
+.b32 #ctx_dst_address_high + 0x20000 ~0xff
+.b32 #ctx_dst_address_low + 0x20000 ~0xfffffff0
+.b32 #sec_cmd_mode + 0x00000 ~0xf
+.b32 #sec_cmd_length + 0x10000 ~0x0ffffff0
+.equ #engine_cmd_max 0xce
+
+.align 4
+sec_dtable:
+.b16 #sec_copy_prep #sec_do_inout
+.b16 #sec_store_prep #sec_do_out
+.b16 #sec_ecb_e_prep #sec_do_inout
+.b16 #sec_ecb_d_prep #sec_do_inout
+.b16 #sec_cbc_e_prep #sec_do_inout
+.b16 #sec_cbc_d_prep #sec_do_inout
+.b16 #sec_pcbc_e_prep #sec_do_inout
+.b16 #sec_pcbc_d_prep #sec_do_inout
+.b16 #sec_cfb_e_prep #sec_do_inout
+.b16 #sec_cfb_d_prep #sec_do_inout
+.b16 #sec_ofb_prep #sec_do_inout
+.b16 #sec_ctr_prep #sec_do_inout
+.b16 #sec_cbc_mac_prep #sec_do_in
+.b16 #sec_cmac_finish_complete_prep #sec_do_in
+.b16 #sec_cmac_finish_partial_prep #sec_do_in
+
+.align 0x100
+
+.section #g98_psec_code
+
+       // $r0 is always set to 0 in our code - this allows some space savings.
+       clear b32 $r0
+
+       // set up the interrupt handler
+       mov $r1 #ih
+       mov $iv0 $r1
+
+       // init stack pointer
+       mov $sp $r0
+
+       // set interrupt dispatch - route timer, fifo, ctxswitch to i0, others to host
+       movw $r1 0xfff0
+       sethi $r1 0
+       mov $r2 0x400
+       iowr I[$r2 + 0x300] $r1
+
+       // enable the interrupts
+       or $r1 0xc
+       iowr I[$r2] $r1
+
+       // enable fifo access and context switching
+       mov $r1 3
+       mov $r2 0x1200
+       iowr I[$r2] $r1
+
+       // enable i0 delivery
+       bset $flags ie0
+
+       // sleep forver, waking only for interrupts.
+       bset $flags $p0
+       spin:
+       sleep $p0
+       bra #spin
+
+// i0 handler
+ih:
+       // see which interrupts we got
+       iord $r1 I[$r0 + 0x200]
+
+       and $r2 $r1 0x8
+       cmpu b32 $r2 0
+       bra e #noctx
+
+               // context switch... prepare the regs for xfer
+               mov $r2 0x7700
+               mov $xtargets $r2
+               mov $xdbase $r0
+               // 128-byte context.
+               mov $r2 0
+               sethi $r2 0x50000
+
+               // read current channel
+               mov $r3 0x1400
+               iord $r4 I[$r3]
+               // if bit 30 set, it's active, so we have to unload it first.
+               shl b32 $r5 $r4 1
+               cmps b32 $r5 0
+               bra nc #ctxload
+
+                       // unload the current channel - save the context
+                       xdst $r0 $r2
+                       xdwait
+                       // and clear bit 30, then write back
+                       bclr $r4 0x1e
+                       iowr I[$r3] $r4
+                       // tell PFIFO we unloaded
+                       mov $r4 1
+                       iowr I[$r3 + 0x200] $r4
+
+               bra #noctx
+
+               ctxload:
+                       // no channel loaded - perhaps we're requested to load one
+                       iord $r4 I[$r3 + 0x100]
+                       shl b32 $r15 $r4 1
+                       cmps b32 $r15 0
+                       // if bit 30 of next channel not set, probably PFIFO is just
+                       // killing a context. do a faux load, without the active bit.
+                       bra nc #dummyload
+
+                               // ok, do a real context load.
+                               xdld $r0 $r2
+                               xdwait
+                               mov $r5 #ctx_dma
+                               mov $r6 #dma_count - 1
+                               ctxload_dma_loop:
+                                       ld b32 $r7 D[$r5 + $r6 * 4]
+                                       add b32 $r8 $r6 0x180
+                                       shl b32 $r8 8
+                                       iowr I[$r8] $r7
+                                       sub b32 $r6 1
+                               bra nc #ctxload_dma_loop
+
+                       dummyload:
+                       // tell PFIFO we're done
+                       mov $r5 2
+                       iowr I[$r3 + 0x200] $r5
+
+       noctx:
+       and $r2 $r1 0x4
+       cmpu b32 $r2 0
+       bra e #nocmd
+
+               // incoming fifo command.
+               mov $r3 0x1900
+               iord $r2 I[$r3 + 0x100]
+               iord $r3 I[$r3]
+               // extract the method
+               and $r4 $r2 0x7ff
+               // shift the addr to proper position if we need to interrupt later
+               shl b32 $r2 0x10
+
+               // mthd 0 and 0x100 [NAME, NOP]: ignore
+               and $r5 $r4 0x7bf
+               cmpu b32 $r5 0
+               bra e #cmddone
+
+               mov $r5 #engine_cmd_dtable - 0xc0 * 8
+               mov $r6 #engine_cmd_max
+               cmpu b32 $r4 0xc0
+               bra nc #dtable_cmd
+               mov $r5 #common_cmd_dtable - 0x80 * 8
+               mov $r6 #common_cmd_max
+               cmpu b32 $r4 0x80
+               bra nc #dtable_cmd
+               cmpu b32 $r4 0x60
+               bra nc #dma_cmd
+               cmpu b32 $r4 0x50
+               bra ne #illegal_mthd
+
+                       // mthd 0x140: PM_TRIGGER
+                       mov $r2 0x2200
+                       clear b32 $r3
+                       sethi $r3 0x20000
+                       iowr I[$r2] $r3
+                       bra #cmddone
+
+               dma_cmd:
+                       // mthd 0x180...: DMA_*
+                       cmpu b32 $r4 0x60+#dma_count
+                       bra nc #illegal_mthd
+                       shl b32 $r5 $r4 2
+                       add b32 $r5 ((#ctx_dma - 0x60 * 4) & 0xffff)
+                       bset $r3 0x1e
+                       st b32 D[$r5] $r3
+                       add b32 $r4 0x180 - 0x60
+                       shl b32 $r4 8
+                       iowr I[$r4] $r3
+                       bra #cmddone
+
+               dtable_cmd:
+                       cmpu b32 $r4 $r6
+                       bra nc #illegal_mthd
+                       shl b32 $r4 3
+                       add b32 $r4 $r5
+                       ld b32 $r5 D[$r4 + 4]
+                       and $r5 $r3
+                       cmpu b32 $r5 0
+                       bra ne #invalid_bitfield
+                       ld b16 $r5 D[$r4]
+                       ld b16 $r6 D[$r4 + 2]
+                       cmpu b32 $r6 2
+                       bra e #cmd_setctx
+                       ld b32 $r7 D[$r0 + #ctx_cond_off]
+                       and $r6 $r7
+                       cmpu b32 $r6 1
+                       bra e #cmddone
+                       call $r5
+                       bra $p1 #dispatch_error
+                       bra #cmddone
+
+               cmd_setctx:
+                       st b32 D[$r5] $r3
+                       bra #cmddone
+
+
+               invalid_bitfield:
+                       or $r2 1
+               dispatch_error:
+               illegal_mthd:
+                       mov $r4 0x1000
+                       iowr I[$r4] $r2
+                       iowr I[$r4 + 0x100] $r3
+                       mov $r4 0x40
+                       iowr I[$r0] $r4
+
+                       im_loop:
+                               iord $r4 I[$r0 + 0x200]
+                               and $r4 0x40
+                               cmpu b32 $r4 0
+                       bra ne #im_loop
+
+               cmddone:
+               // remove the command from FIFO
+               mov $r3 0x1d00
+               mov $r4 1
+               iowr I[$r3] $r4
+
+       nocmd:
+       // ack the processed interrupts
+       and $r1 $r1 0xc
+       iowr I[$r0 + 0x100] $r1
+iret
+
+cmd_query_get:
+       // if bit 0 of param set, trigger interrupt afterwards.
+       setp $p1 $r3
+       or $r2 3
+
+       // read PTIMER, beware of races...
+       mov $r4 0xb00
+       ptimer_retry:
+               iord $r6 I[$r4 + 0x100]
+               iord $r5 I[$r4]
+               iord $r7 I[$r4 + 0x100]
+               cmpu b32 $r6 $r7
+       bra ne #ptimer_retry
+
+       // prepare the query structure
+       ld b32 $r4 D[$r0 + #ctx_query_counter]
+       st b32 D[$r0 + #swap + 0x0] $r4
+       st b32 D[$r0 + #swap + 0x4] $r0
+       st b32 D[$r0 + #swap + 0x8] $r5
+       st b32 D[$r0 + #swap + 0xc] $r6
+
+       // will use target 0, DMA_QUERY.
+       mov $xtargets $r0
+
+       ld b32 $r4 D[$r0 + #ctx_query_address_high]
+       shl b32 $r4 0x18
+       mov $xdbase $r4
+
+       ld b32 $r4 D[$r0 + #ctx_query_address_low]
+       mov $r5 #swap
+       sethi $r5 0x20000
+       xdst $r4 $r5
+       xdwait
+
+       ret
+
+cmd_cond_mode:
+       // if >= 5, INVALID_ENUM
+       bset $flags $p1
+       or $r2 2
+       cmpu b32 $r3 5
+       bra nc #return
+
+       // otherwise, no error.
+       bclr $flags $p1
+
+       // if < 2, no QUERY object is involved
+       cmpu b32 $r3 2
+       bra nc #cmd_cond_mode_queryful
+
+               xor $r3 1
+               st b32 D[$r0 + #ctx_cond_off] $r3
+       return:
+               ret
+
+       cmd_cond_mode_queryful:
+       // ok, will need to pull a QUERY object, prepare offsets
+       ld b32 $r4 D[$r0 + #ctx_cond_address_high]
+       ld b32 $r5 D[$r0 + #ctx_cond_address_low]
+       and $r6 $r5 0xff
+       shr b32 $r5 8
+       shl b32 $r4 0x18
+       or $r4 $r5
+       mov $xdbase $r4
+       mov $xtargets $r0
+
+       // pull the first one
+       mov $r5 #swap
+       sethi $r5 0x20000
+       xdld $r6 $r5
+
+       // if == 2, only a single QUERY is involved...
+       cmpu b32 $r3 2
+       bra ne #cmd_cond_mode_double
+
+               xdwait
+               ld b32 $r4 D[$r0 + #swap + 4]
+               cmpu b32 $r4 0
+               xbit $r4 $flags z
+               st b32 D[$r0 + #ctx_cond_off] $r4
+               ret
+
+       // ok, we'll need to pull second one too
+       cmd_cond_mode_double:
+       add b32 $r6 0x10
+       add b32 $r5 0x10
+       xdld $r6 $r5
+       xdwait
+
+       // compare COUNTERs
+       ld b32 $r5 D[$r0 + #swap + 0x00]
+       ld b32 $r6 D[$r0 + #swap + 0x10]
+       cmpu b32 $r5 $r6
+       xbit $r4 $flags z
+
+       // compare RESen
+       ld b32 $r5 D[$r0 + #swap + 0x04]
+       ld b32 $r6 D[$r0 + #swap + 0x14]
+       cmpu b32 $r5 $r6
+       xbit $r5 $flags z
+       and $r4 $r5
+
+       // and negate or not, depending on mode
+       cmpu b32 $r3 3
+       xbit $r5 $flags z
+       xor $r4 $r5
+       st b32 D[$r0 + #ctx_cond_off] $r4
+       ret
+
+cmd_wrcache_flush:
+       bclr $flags $p1
+       mov $r2 0x2200
+       clear b32 $r3
+       sethi $r3 0x10000
+       iowr I[$r2] $r3
+       ret
+
+sec_cmd_mode:
+       // if >= 0xf, INVALID_ENUM
+       bset $flags $p1
+       or $r2 2
+       cmpu b32 $r3 0xf
+       bra nc #sec_cmd_mode_return
+
+               bclr $flags $p1
+               st b32 D[$r0 + #ctx_mode] $r3
+
+       sec_cmd_mode_return:
+       ret
+
+sec_cmd_length:
+       // nop if length == 0
+       cmpu b32 $r3 0
+       bra e #sec_cmd_mode_return
+
+       // init key, IV
+       cxset 3
+       mov $r4 #ctx_key
+       sethi $r4 0x70000
+       xdst $r0 $r4
+       mov $r4 #ctx_iv
+       sethi $r4 0x60000
+       xdst $r0 $r4
+       xdwait
+       ckeyreg $c7
+
+       // prepare the targets
+       mov $r4 0x2100
+       mov $xtargets $r4
+
+       // prepare src address
+       ld b32 $r4 D[$r0 + #ctx_src_address_high]
+       ld b32 $r5 D[$r0 + #ctx_src_address_low]
+       shr b32 $r8 $r5 8
+       shl b32 $r4 0x18
+       or $r4 $r8
+       and $r5 $r5 0xff
+
+       // prepare dst address
+       ld b32 $r6 D[$r0 + #ctx_dst_address_high]
+       ld b32 $r7 D[$r0 + #ctx_dst_address_low]
+       shr b32 $r8 $r7 8
+       shl b32 $r6 0x18
+       or $r6 $r8
+       and $r7 $r7 0xff
+
+       // find the proper prep & do functions
+       ld b32 $r8 D[$r0 + #ctx_mode]
+       shl b32 $r8 2
+
+       // run prep
+       ld b16 $r9 D[$r8 + #sec_dtable]
+       call $r9
+
+       // do it
+       ld b16 $r9 D[$r8 + #sec_dtable + 2]
+       call $r9
+       cxset 1
+       xdwait
+       cxset 0x61
+       xdwait
+       xdwait
+
+       // update src address
+       shr b32 $r8 $r4 0x18
+       shl b32 $r9 $r4 8
+       add b32 $r9 $r5
+       adc b32 $r8 0
+       st b32 D[$r0 + #ctx_src_address_high] $r8
+       st b32 D[$r0 + #ctx_src_address_low] $r9
+
+       // update dst address
+       shr b32 $r8 $r6 0x18
+       shl b32 $r9 $r6 8
+       add b32 $r9 $r7
+       adc b32 $r8 0
+       st b32 D[$r0 + #ctx_dst_address_high] $r8
+       st b32 D[$r0 + #ctx_dst_address_low] $r9
+
+       // pull updated IV
+       cxset 2
+       mov $r4 #ctx_iv
+       sethi $r4 0x60000
+       xdld $r0 $r4
+       xdwait
+
+       ret
+
+
+sec_copy_prep:
+       cs0begin 2
+               cxsin $c0
+               cxsout $c0
+       ret
+
+sec_store_prep:
+       cs0begin 1
+               cxsout $c6
+       ret
+
+sec_ecb_e_prep:
+       cs0begin 3
+               cxsin $c0
+               cenc $c0 $c0
+               cxsout $c0
+       ret
+
+sec_ecb_d_prep:
+       ckexp $c7 $c7
+       cs0begin 3
+               cxsin $c0
+               cdec $c0 $c0
+               cxsout $c0
+       ret
+
+sec_cbc_e_prep:
+       cs0begin 4
+               cxsin $c0
+               cxor $c6 $c0
+               cenc $c6 $c6
+               cxsout $c6
+       ret
+
+sec_cbc_d_prep:
+       ckexp $c7 $c7
+       cs0begin 5
+               cmov $c2 $c6
+               cxsin $c6
+               cdec $c0 $c6
+               cxor $c0 $c2
+               cxsout $c0
+       ret
+
+sec_pcbc_e_prep:
+       cs0begin 5
+               cxsin $c0
+               cxor $c6 $c0
+               cenc $c6 $c6
+               cxsout $c6
+               cxor $c6 $c0
+       ret
+
+sec_pcbc_d_prep:
+       ckexp $c7 $c7
+       cs0begin 5
+               cxsin $c0
+               cdec $c1 $c0
+               cxor $c6 $c1
+               cxsout $c6
+               cxor $c6 $c0
+       ret
+
+sec_cfb_e_prep:
+       cs0begin 4
+               cenc $c6 $c6
+               cxsin $c0
+               cxor $c6 $c0
+               cxsout $c6
+       ret
+
+sec_cfb_d_prep:
+       cs0begin 4
+               cenc $c0 $c6
+               cxsin $c6
+               cxor $c0 $c6
+               cxsout $c0
+       ret
+
+sec_ofb_prep:
+       cs0begin 4
+               cenc $c6 $c6
+               cxsin $c0
+               cxor $c0 $c6
+               cxsout $c0
+       ret
+
+sec_ctr_prep:
+       cs0begin 5
+               cenc $c1 $c6
+               cadd $c6 1
+               cxsin $c0
+               cxor $c0 $c1
+               cxsout $c0
+       ret
+
+sec_cbc_mac_prep:
+       cs0begin 3
+               cxsin $c0
+               cxor $c6 $c0
+               cenc $c6 $c6
+       ret
+
+sec_cmac_finish_complete_prep:
+       cs0begin 7
+               cxsin $c0
+               cxor $c6 $c0
+               cxor $c0 $c0
+               cenc $c0 $c0
+               cprecmac $c0 $c0
+               cxor $c6 $c0
+               cenc $c6 $c6
+       ret
+
+sec_cmac_finish_partial_prep:
+       cs0begin 8
+               cxsin $c0
+               cxor $c6 $c0
+               cxor $c0 $c0
+               cenc $c0 $c0
+               cprecmac $c0 $c0
+               cprecmac $c0 $c0
+               cxor $c6 $c0
+               cenc $c6 $c6
+       ret
+
+// TODO
+sec_do_in:
+       add b32 $r3 $r5
+       mov $xdbase $r4
+       mov $r9 #swap
+       sethi $r9 0x20000
+       sec_do_in_loop:
+               xdld $r5 $r9
+               xdwait
+               cxset 0x22
+               xdst $r0 $r9
+               cs0exec 1
+               xdwait
+               add b32 $r5 0x10
+               cmpu b32 $r5 $r3
+       bra ne #sec_do_in_loop
+       cxset 1
+       xdwait
+       ret
+
+sec_do_out:
+       add b32 $r3 $r7
+       mov $xdbase $r6
+       mov $r9 #swap
+       sethi $r9 0x20000
+       sec_do_out_loop:
+               cs0exec 1
+               cxset 0x61
+               xdld $r7 $r9
+               xdst $r7 $r9
+               cxset 1
+               xdwait
+               add b32 $r7 0x10
+               cmpu b32 $r7 $r3
+       bra ne #sec_do_out_loop
+       ret
+
+sec_do_inout:
+       add b32 $r3 $r5
+       mov $r9 #swap
+       sethi $r9 0x20000
+       sec_do_inout_loop:
+               mov $xdbase $r4
+               xdld $r5 $r9
+               xdwait
+               cxset 0x21
+               xdst $r0 $r9
+               cs0exec 1
+               cxset 0x61
+               mov $xdbase $r6
+               xdld $r7 $r9
+               xdst $r7 $r9
+               cxset 1
+               xdwait
+               add b32 $r5 0x10
+               add b32 $r7 0x10
+               cmpu b32 $r5 $r3
+       bra ne #sec_do_inout_loop
+       ret
+
+.align 0x100