123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389 |
- /*
- * copy_page, __copy_user_page, __copy_user implementation of SuperH
- *
- * Copyright (C) 2001 Niibe Yutaka & Kaz Kojima
- * Copyright (C) 2002 Toshinobu Sugioka
- * Copyright (C) 2006 Paul Mundt
- */
- #include <linux/linkage.h>
- #include <asm/page.h>
- /*
- * copy_page
- * @to: P1 address
- * @from: P1 address
- *
- * void copy_page(void *to, void *from)
- */
- /*
- * r0, r1, r2, r3, r4, r5, r6, r7 --- scratch
- * r8 --- from + PAGE_SIZE
- * r9 --- not used
- * r10 --- to
- * r11 --- from
- */
- ENTRY(copy_page)
- mov.l r8,@-r15
- mov.l r10,@-r15
- mov.l r11,@-r15
- mov r4,r10
- mov r5,r11
- mov r5,r8
- mov #(PAGE_SIZE >> 10), r0
- shll8 r0
- shll2 r0
- add r0,r8
- !
- 1: mov.l @r11+,r0
- mov.l @r11+,r1
- mov.l @r11+,r2
- mov.l @r11+,r3
- mov.l @r11+,r4
- mov.l @r11+,r5
- mov.l @r11+,r6
- mov.l @r11+,r7
- #if defined(CONFIG_CPU_SH4)
- movca.l r0,@r10
- #else
- mov.l r0,@r10
- #endif
- add #32,r10
- mov.l r7,@-r10
- mov.l r6,@-r10
- mov.l r5,@-r10
- mov.l r4,@-r10
- mov.l r3,@-r10
- mov.l r2,@-r10
- mov.l r1,@-r10
- cmp/eq r11,r8
- bf/s 1b
- add #28,r10
- !
- mov.l @r15+,r11
- mov.l @r15+,r10
- mov.l @r15+,r8
- rts
- nop
- /*
- * __kernel_size_t __copy_user(void *to, const void *from, __kernel_size_t n);
- * Return the number of bytes NOT copied
- */
- #define EX(...) \
- 9999: __VA_ARGS__ ; \
- .section __ex_table, "a"; \
- .long 9999b, 6000f ; \
- .previous
- #define EX_NO_POP(...) \
- 9999: __VA_ARGS__ ; \
- .section __ex_table, "a"; \
- .long 9999b, 6005f ; \
- .previous
- ENTRY(__copy_user)
- ! Check if small number of bytes
- mov #11,r0
- mov r4,r3
- cmp/gt r0,r6 ! r6 (len) > r0 (11)
- bf/s .L_cleanup_loop_no_pop
- add r6,r3 ! last destination address
- ! Calculate bytes needed to align to src
- mov.l r11,@-r15
- neg r5,r0
- mov.l r10,@-r15
- add #4,r0
- mov.l r9,@-r15
- and #3,r0
- mov.l r8,@-r15
- tst r0,r0
- bt 2f
- 1:
- ! Copy bytes to long word align src
- EX( mov.b @r5+,r1 )
- dt r0
- add #-1,r6
- EX( mov.b r1,@r4 )
- bf/s 1b
- add #1,r4
- ! Jump to appropriate routine depending on dest
- 2: mov #3,r1
- mov r6, r2
- and r4,r1
- shlr2 r2
- shll2 r1
- mova .L_jump_tbl,r0
- mov.l @(r0,r1),r1
- jmp @r1
- nop
- .align 2
- .L_jump_tbl:
- .long .L_dest00
- .long .L_dest01
- .long .L_dest10
- .long .L_dest11
- /*
- * Come here if there are less than 12 bytes to copy
- *
- * Keep the branch target close, so the bf/s callee doesn't overflow
- * and result in a more expensive branch being inserted. This is the
- * fast-path for small copies, the jump via the jump table will hit the
- * default slow-path cleanup. -PFM.
- */
- .L_cleanup_loop_no_pop:
- tst r6,r6 ! Check explicitly for zero
- bt 1f
- 2:
- EX_NO_POP( mov.b @r5+,r0 )
- dt r6
- EX_NO_POP( mov.b r0,@r4 )
- bf/s 2b
- add #1,r4
- 1: mov #0,r0 ! normal return
- 5000:
- # Exception handler:
- .section .fixup, "ax"
- 6005:
- mov.l 8000f,r1
- mov r3,r0
- jmp @r1
- sub r4,r0
- .align 2
- 8000: .long 5000b
- .previous
- rts
- nop
- ! Destination = 00
- .L_dest00:
- ! Skip the large copy for small transfers
- mov #(32+32-4), r0
- cmp/gt r6, r0 ! r0 (60) > r6 (len)
- bt 1f
- ! Align dest to a 32 byte boundary
- neg r4,r0
- add #0x20, r0
- and #0x1f, r0
- tst r0, r0
- bt 2f
- sub r0, r6
- shlr2 r0
- 3:
- EX( mov.l @r5+,r1 )
- dt r0
- EX( mov.l r1,@r4 )
- bf/s 3b
- add #4,r4
- 2:
- EX( mov.l @r5+,r0 )
- EX( mov.l @r5+,r1 )
- EX( mov.l @r5+,r2 )
- EX( mov.l @r5+,r7 )
- EX( mov.l @r5+,r8 )
- EX( mov.l @r5+,r9 )
- EX( mov.l @r5+,r10 )
- EX( mov.l @r5+,r11 )
- #ifdef CONFIG_CPU_SH4
- EX( movca.l r0,@r4 )
- #else
- EX( mov.l r0,@r4 )
- #endif
- add #-32, r6
- EX( mov.l r1,@(4,r4) )
- mov #32, r0
- EX( mov.l r2,@(8,r4) )
- cmp/gt r6, r0 ! r0 (32) > r6 (len)
- EX( mov.l r7,@(12,r4) )
- EX( mov.l r8,@(16,r4) )
- EX( mov.l r9,@(20,r4) )
- EX( mov.l r10,@(24,r4) )
- EX( mov.l r11,@(28,r4) )
- bf/s 2b
- add #32,r4
- 1: mov r6, r0
- shlr2 r0
- tst r0, r0
- bt .L_cleanup
- 1:
- EX( mov.l @r5+,r1 )
- dt r0
- EX( mov.l r1,@r4 )
- bf/s 1b
- add #4,r4
- bra .L_cleanup
- nop
- ! Destination = 10
- .L_dest10:
- mov r2,r7
- shlr2 r7
- shlr r7
- tst r7,r7
- mov #7,r0
- bt/s 1f
- and r0,r2
- 2:
- dt r7
- #ifdef CONFIG_CPU_LITTLE_ENDIAN
- EX( mov.l @r5+,r0 )
- EX( mov.l @r5+,r1 )
- EX( mov.l @r5+,r8 )
- EX( mov.l @r5+,r9 )
- EX( mov.l @r5+,r10 )
- EX( mov.w r0,@r4 )
- add #2,r4
- xtrct r1,r0
- xtrct r8,r1
- xtrct r9,r8
- xtrct r10,r9
- EX( mov.l r0,@r4 )
- EX( mov.l r1,@(4,r4) )
- EX( mov.l r8,@(8,r4) )
- EX( mov.l r9,@(12,r4) )
- EX( mov.l @r5+,r1 )
- EX( mov.l @r5+,r8 )
- EX( mov.l @r5+,r0 )
- xtrct r1,r10
- xtrct r8,r1
- xtrct r0,r8
- shlr16 r0
- EX( mov.l r10,@(16,r4) )
- EX( mov.l r1,@(20,r4) )
- EX( mov.l r8,@(24,r4) )
- EX( mov.w r0,@(28,r4) )
- bf/s 2b
- add #30,r4
- #else
- EX( mov.l @(28,r5),r0 )
- EX( mov.l @(24,r5),r8 )
- EX( mov.l @(20,r5),r9 )
- EX( mov.l @(16,r5),r10 )
- EX( mov.w r0,@(30,r4) )
- add #-2,r4
- xtrct r8,r0
- xtrct r9,r8
- xtrct r10,r9
- EX( mov.l r0,@(28,r4) )
- EX( mov.l r8,@(24,r4) )
- EX( mov.l r9,@(20,r4) )
- EX( mov.l @(12,r5),r0 )
- EX( mov.l @(8,r5),r8 )
- xtrct r0,r10
- EX( mov.l @(4,r5),r9 )
- mov.l r10,@(16,r4)
- EX( mov.l @r5,r10 )
- xtrct r8,r0
- xtrct r9,r8
- xtrct r10,r9
- EX( mov.l r0,@(12,r4) )
- EX( mov.l r8,@(8,r4) )
- swap.w r10,r0
- EX( mov.l r9,@(4,r4) )
- EX( mov.w r0,@(2,r4) )
- add #32,r5
- bf/s 2b
- add #34,r4
- #endif
- tst r2,r2
- bt .L_cleanup
- 1: ! Read longword, write two words per iteration
- EX( mov.l @r5+,r0 )
- dt r2
- #ifdef CONFIG_CPU_LITTLE_ENDIAN
- EX( mov.w r0,@r4 )
- shlr16 r0
- EX( mov.w r0,@(2,r4) )
- #else
- EX( mov.w r0,@(2,r4) )
- shlr16 r0
- EX( mov.w r0,@r4 )
- #endif
- bf/s 1b
- add #4,r4
- bra .L_cleanup
- nop
- ! Destination = 01 or 11
- .L_dest01:
- .L_dest11:
- ! Read longword, write byte, word, byte per iteration
- EX( mov.l @r5+,r0 )
- dt r2
- #ifdef CONFIG_CPU_LITTLE_ENDIAN
- EX( mov.b r0,@r4 )
- shlr8 r0
- add #1,r4
- EX( mov.w r0,@r4 )
- shlr16 r0
- EX( mov.b r0,@(2,r4) )
- bf/s .L_dest01
- add #3,r4
- #else
- EX( mov.b r0,@(3,r4) )
- shlr8 r0
- swap.w r0,r7
- EX( mov.b r7,@r4 )
- add #1,r4
- EX( mov.w r0,@r4 )
- bf/s .L_dest01
- add #3,r4
- #endif
- ! Cleanup last few bytes
- .L_cleanup:
- mov r6,r0
- and #3,r0
- tst r0,r0
- bt .L_exit
- mov r0,r6
- .L_cleanup_loop:
- EX( mov.b @r5+,r0 )
- dt r6
- EX( mov.b r0,@r4 )
- bf/s .L_cleanup_loop
- add #1,r4
- .L_exit:
- mov #0,r0 ! normal return
- 5000:
- # Exception handler:
- .section .fixup, "ax"
- 6000:
- mov.l 8000f,r1
- mov r3,r0
- jmp @r1
- sub r4,r0
- .align 2
- 8000: .long 5000b
- .previous
- mov.l @r15+,r8
- mov.l @r15+,r9
- mov.l @r15+,r10
- rts
- mov.l @r15+,r11
|