1 | ;*************************************************************************** |
---|
2 | ;* Copyright (C) 2005 by Prakash Punnoor * |
---|
3 | ;* prakash@punnoor.de * |
---|
4 | ;* * |
---|
5 | ;* This program is free software; you can redistribute it and/or modify * |
---|
6 | ;* it under the terms of the GNU Library General Public License as * |
---|
7 | ;* published by the Free Software Foundation; either version 2 of the * |
---|
8 | ;* License, or (at your option) any later version. * |
---|
9 | ;* * |
---|
10 | ;* This program is distributed in the hope that it will be useful, * |
---|
11 | ;* but WITHOUT ANY WARRANTY; without even the implied warranty of * |
---|
12 | ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * |
---|
13 | ;* GNU General Public License for more details. * |
---|
14 | ;* * |
---|
15 | ;* You should have received a copy of the GNU Library General Public * |
---|
16 | ;* License along with this program; if not, write to the * |
---|
17 | ;* Free Software Foundation, Inc., * |
---|
18 | ;* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * |
---|
19 | ;*************************************************************************** |
---|
20 | |
---|
21 | ; void _alMMXmemcpy(void* dst, void* src, unsigned int n); |
---|
22 | ; pretty straight-forward implementation |
---|
23 | ; by design broken for n<8, so check that before calling |
---|
24 | ; x86 32 bit only! |
---|
25 | global __alMMXmemcpy |
---|
26 | global _alMMXmemcpy |
---|
27 | |
---|
28 | __alMMXmemcpy: |
---|
29 | _alMMXmemcpy: |
---|
30 | |
---|
31 | ; Save the registers affected |
---|
32 | pushf |
---|
33 | push edi |
---|
34 | push esi |
---|
35 | |
---|
36 | cld |
---|
37 | |
---|
38 | mov edi, [esp + 16] ;char* dst |
---|
39 | mov esi, [esp + 20] ;char* src |
---|
40 | mov edx, [esp + 24] ;int n |
---|
41 | |
---|
42 | ; align dest |
---|
43 | mov ecx, edi |
---|
44 | and ecx, 7 ;MMX align - 1 |
---|
45 | sub ecx, 8 ;MMX align |
---|
46 | neg ecx ;eax has pre copy bytes |
---|
47 | |
---|
48 | sub edx, ecx ;less to copy after this |
---|
49 | ; pre copy |
---|
50 | ; copying first dwords and then |
---|
51 | ; remaining bytes wasn't faster |
---|
52 | rep movsb |
---|
53 | |
---|
54 | ; calc MMX copy length |
---|
55 | mov ecx, edx |
---|
56 | and ecx, 63 ;post copy bytes |
---|
57 | shr edx, 6 ;MMX copy iterations |
---|
58 | cmp edx, 0 |
---|
59 | |
---|
60 | jz .loopend |
---|
61 | ; MMX copy |
---|
62 | .loopstart |
---|
63 | movq mm0, [esi] |
---|
64 | movq mm1, [esi + 8] |
---|
65 | movq mm2, [esi + 16] |
---|
66 | movq mm3, [esi + 24] |
---|
67 | movq mm4, [esi + 32] |
---|
68 | movq mm5, [esi + 40] |
---|
69 | movq mm6, [esi + 48] |
---|
70 | movq mm7, [esi + 56] |
---|
71 | movq [edi], mm0 |
---|
72 | movq [edi + 8], mm1 |
---|
73 | movq [edi + 16], mm2 |
---|
74 | movq [edi + 24], mm3 |
---|
75 | movq [edi + 32], mm4 |
---|
76 | movq [edi + 40], mm5 |
---|
77 | movq [edi + 48], mm6 |
---|
78 | movq [edi + 56], mm7 |
---|
79 | add esi, 64 |
---|
80 | add edi, 64 |
---|
81 | dec edx |
---|
82 | jnz .loopstart |
---|
83 | emms |
---|
84 | .loopend |
---|
85 | |
---|
86 | ; post copy |
---|
87 | rep movsb |
---|
88 | |
---|
89 | ; Restore registers |
---|
90 | pop esi |
---|
91 | pop edi |
---|
92 | popf |
---|
93 | ret |
---|
94 | |
---|
95 | ; prevent executable stack |
---|
96 | %ifidn __OUTPUT_FORMAT__,elf |
---|
97 | section .note.GNU-stack noalloc noexec nowrite progbits |
---|
98 | %endif |
---|