1 | /*************************************************************************** |
---|
2 | * MMX routine * |
---|
3 | * Copyright (C) 2005 by Prakash Punnoor * |
---|
4 | * prakash@punnoor.de * |
---|
5 | * * |
---|
6 | * This program is free software; you can redistribute it and/or modify * |
---|
7 | * it under the terms of the GNU Library General Public License as * |
---|
8 | * published by the Free Software Foundation; either version 2 of the * |
---|
9 | * License, or (at your option) any later version. * |
---|
10 | * * |
---|
11 | * This program is distributed in the hope that it will be useful, * |
---|
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of * |
---|
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * |
---|
14 | * GNU General Public License for more details. * |
---|
15 | * * |
---|
16 | * You should have received a copy of the GNU Library General Public * |
---|
17 | * License along with this program; if not, write to the * |
---|
18 | * Free Software Foundation, Inc., * |
---|
19 | * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * |
---|
20 | ***************************************************************************/ |
---|
21 | #include "al_siteconfig.h" |
---|
22 | |
---|
23 | #include <AL/al.h> |
---|
24 | #include "al_cpu_caps.h" |
---|
25 | #include "x86_simd_support_prk.h" |
---|
26 | |
---|
27 | /* MMX routine needs 16 */ |
---|
28 | #define SCALING_POWER 16 |
---|
29 | #define SCALING_FACTOR (1 << SCALING_POWER) |
---|
30 | |
---|
31 | void _alFloatMul(ALshort *bpt, ALfloat sa, ALuint len); |
---|
32 | |
---|
33 | void _alFloatMul(ALshort *bpt, ALfloat sa, ALuint len) { |
---|
34 | ALint scaled_sa = sa * SCALING_FACTOR; |
---|
35 | ALint iter; |
---|
36 | |
---|
37 | #ifdef __MMX__ |
---|
38 | if (_alHaveMMX()) { |
---|
39 | union { |
---|
40 | short s[4]; |
---|
41 | v4hi v; |
---|
42 | } ALIGN16(v_sa); |
---|
43 | ALuint samples_main; |
---|
44 | ALuint samples_pre; |
---|
45 | ALuint samples_post; |
---|
46 | v4hi temp; |
---|
47 | |
---|
48 | |
---|
49 | samples_pre = MMX_ALIGN - (aint)bpt % MMX_ALIGN; |
---|
50 | samples_pre /= sizeof(ALshort); |
---|
51 | samples_main = len - samples_pre; |
---|
52 | samples_post = samples_main % 8; |
---|
53 | samples_main = samples_main / 8; |
---|
54 | len = samples_post; |
---|
55 | |
---|
56 | while(samples_pre--) { |
---|
57 | iter = *bpt; |
---|
58 | iter *= scaled_sa; |
---|
59 | iter >>= SCALING_POWER; |
---|
60 | *bpt = iter; |
---|
61 | ++bpt; |
---|
62 | } |
---|
63 | |
---|
64 | if (scaled_sa < (1 << 15)) { |
---|
65 | /* we do signed multiplication, so 1 << 15 is the max */ |
---|
66 | v_sa.s[0] = scaled_sa; |
---|
67 | v_sa.s[1] = v_sa.s[0]; |
---|
68 | v_sa.s[2] = scaled_sa; |
---|
69 | v_sa.s[3] = v_sa.s[0]; |
---|
70 | |
---|
71 | while (samples_main--) { |
---|
72 | *(v4hi*)bpt = __builtin_ia32_pmulhw(*(v4hi*)bpt, v_sa.v); |
---|
73 | bpt += 4; |
---|
74 | *(v4hi*)bpt = __builtin_ia32_pmulhw(*(v4hi*)bpt, v_sa.v); |
---|
75 | bpt += 4; |
---|
76 | } |
---|
77 | } else { |
---|
78 | /* we lose 1 bit here, but well... */ |
---|
79 | v_sa.s[0] = scaled_sa >> 1; |
---|
80 | v_sa.s[1] = v_sa.s[0]; |
---|
81 | v_sa.s[2] = v_sa.s[0]; |
---|
82 | v_sa.s[3] = v_sa.s[0]; |
---|
83 | |
---|
84 | while (samples_main--) { |
---|
85 | temp = __builtin_ia32_pmulhw(*(v4hi*)bpt, v_sa.v); |
---|
86 | *(v4hi*)bpt = __builtin_ia32_psllw(temp, 1LL); |
---|
87 | bpt += 4; |
---|
88 | temp = __builtin_ia32_pmulhw(*(v4hi*)bpt, v_sa.v); |
---|
89 | *(v4hi*)bpt = __builtin_ia32_psllw(temp, 1LL); |
---|
90 | bpt += 4; |
---|
91 | } |
---|
92 | } |
---|
93 | __builtin_ia32_emms(); |
---|
94 | } |
---|
95 | #endif /* __MMX__ */ |
---|
96 | |
---|
97 | while(len--) { |
---|
98 | iter = *bpt; |
---|
99 | iter *= scaled_sa; |
---|
100 | iter >>= SCALING_POWER; |
---|
101 | *bpt = iter; |
---|
102 | ++bpt; |
---|
103 | } |
---|
104 | } |
---|