1 | /* |
---|
2 | ----------------------------------------------------------------------------- |
---|
3 | This source file is part of OGRE |
---|
4 | (Object-oriented Graphics Rendering Engine) |
---|
5 | For the latest info, see http://www.ogre3d.org/ |
---|
6 | |
---|
7 | Copyright (c) 2000-2006 Torus Knot Software Ltd |
---|
8 | Also see acknowledgements in Readme.html |
---|
9 | |
---|
10 | This program is free software; you can redistribute it and/or modify it under |
---|
11 | the terms of the GNU Lesser General Public License as published by the Free Software |
---|
12 | Foundation; either version 2 of the License, or (at your option) any later |
---|
13 | version. |
---|
14 | |
---|
15 | This program is distributed in the hope that it will be useful, but WITHOUT |
---|
16 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
---|
17 | FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. |
---|
18 | |
---|
19 | You should have received a copy of the GNU Lesser General Public License along with |
---|
20 | this program; if not, write to the Free Software Foundation, Inc., 59 Temple |
---|
21 | Place - Suite 330, Boston, MA 02111-1307, USA, or go to |
---|
22 | http://www.gnu.org/copyleft/lesser.txt. |
---|
23 | |
---|
24 | You may alternatively use this source under the terms of a specific version of |
---|
25 | the OGRE Unrestricted License provided you have obtained such a license from |
---|
26 | Torus Knot Software Ltd. |
---|
27 | ----------------------------------------------------------------------------- |
---|
28 | */ |
---|
29 | |
---|
30 | |
---|
31 | /** |
---|
32 | A number of invaluable references were used to put together this ps.1.x compiler for ATI_fragment_shader execution |
---|
33 | |
---|
34 | References: |
---|
35 | 1. MSDN: DirectX 8.1 Reference |
---|
36 | 2. Wolfgang F. Engel "Fundamentals of Pixel Shaders - Introduction to Shader Programming Part III" on gamedev.net |
---|
37 | 3. Martin Ecker - XEngine |
---|
38 | 4. Shawn Kirst - ps14toATIfs |
---|
39 | 5. Jason L. Mitchell "Real-Time 3D Graphics With Pixel Shaders" |
---|
40 | 6. Jason L. Mitchell "1.4 Pixel Shaders" |
---|
41 | 7. Jason L. Mitchell and Evan Hart "Hardware Shading with EXT_vertex_shader and ATI_fragment_shader" |
---|
42 | 6. ATI 8500 SDK |
---|
43 | 7. GL_ATI_fragment_shader extension reference |
---|
44 | |
---|
45 | */ |
---|
46 | //--------------------------------------------------------------------------- |
---|
47 | #ifndef ps_1_4H |
---|
48 | #define ps_1_4H |
---|
49 | |
---|
50 | #include <stdio.h> |
---|
51 | #include <stdlib.h> |
---|
52 | #include <string.h> |
---|
53 | |
---|
54 | #include "OgreGLPrerequisites.h" |
---|
55 | #include "Compiler2Pass.h" |
---|
56 | |
---|
57 | |
---|
58 | //--------------------------------------------------------------------------- |
---|
59 | // macro to get the size of a static array |
---|
60 | #define ARRAYSIZE(array) (sizeof(array)/sizeof(array[0])) |
---|
61 | |
---|
62 | #define ALPHA_BIT 0x08 |
---|
63 | #define RGB_BITS 0x07 |
---|
64 | |
---|
65 | // Context key patterns |
---|
66 | #define ckp_PS_BASE 0x1 |
---|
67 | #define ckp_PS_1_1 0x2 |
---|
68 | #define ckp_PS_1_2 0x4 |
---|
69 | #define ckp_PS_1_3 0x8 |
---|
70 | #define ckp_PS_1_4 0x10 |
---|
71 | |
---|
72 | #define ckp_PS_1_4_BASE (ckp_PS_BASE + ckp_PS_1_4) |
---|
73 | |
---|
74 | |
---|
75 | |
---|
76 | |
---|
77 | /** Subclasses Compiler2Pass to provide a ps_1_x compiler that takes DirectX pixel shader assembly |
---|
78 | and converts it to a form that can be used by ATI_fragment_shader OpenGL API |
---|
79 | @remarks |
---|
80 | all ps_1_1, ps_1_2, ps_1_3, ps_1_4 assembly instructions are recognized but not all are passed |
---|
81 | on to ATI_fragment_shader. ATI_fragment_shader does not have an equivelant directive for |
---|
82 | texkill or texdepth instructions. |
---|
83 | |
---|
84 | The user must provide the GL binding interfaces. |
---|
85 | |
---|
86 | A Test method is provided to verify the basic operation of the compiler which outputs the test |
---|
87 | results to a file. |
---|
88 | |
---|
89 | |
---|
90 | */ |
---|
91 | class PS_1_4 : public Compiler2Pass{ |
---|
92 | private: |
---|
93 | enum RWAflags {rwa_NONE = 0, rwa_READ = 1, rwa_WRITE = 2}; |
---|
94 | |
---|
95 | enum MachineInstID {mi_COLOROP1, mi_COLOROP2, mi_COLOROP3, mi_ALPHAOP1, mi_ALPHAOP2, |
---|
96 | mi_ALPHAOP3, mi_SETCONSTANTS, mi_PASSTEXCOORD, mi_SAMPLEMAP, mi_TEX, |
---|
97 | mi_TEXCOORD, mi_TEXREG2RGB, mi_NOP |
---|
98 | }; |
---|
99 | |
---|
100 | struct TokenInstType{ |
---|
101 | char* Name; |
---|
102 | GLuint ID; |
---|
103 | |
---|
104 | }; |
---|
105 | |
---|
106 | struct RegisterUsage { |
---|
107 | bool Phase1Write; |
---|
108 | bool Phase2Write; |
---|
109 | }; |
---|
110 | |
---|
111 | // Token ID enumeration |
---|
112 | enum SymbolID { |
---|
113 | // Terminal Tokens section |
---|
114 | |
---|
115 | // DirectX pixel shader source formats |
---|
116 | sid_PS_1_4, sid_PS_1_1, sid_PS_1_2, sid_PS_1_3, |
---|
117 | |
---|
118 | // PS_BASE |
---|
119 | sid_C0, sid_C1, sid_C2, sid_C3, sid_C4, sid_C5, sid_C6, sid_C7, |
---|
120 | sid_V0, sid_V1, |
---|
121 | sid_ADD, sid_SUB, sid_MUL, sid_MAD, sid_LRP, sid_MOV, sid_CMP, sid_CND, |
---|
122 | sid_DP3, sid_DP4, sid_DEF, |
---|
123 | sid_R, sid_RA, sid_G, sid_GA, sid_B, sid_BA, sid_A, sid_RGBA, sid_RGB, |
---|
124 | sid_RG, sid_RGA, sid_RB, sid_RBA, sid_GB, sid_GBA, |
---|
125 | sid_RRRR, sid_GGGG, sid_BBBB, sid_AAAA, |
---|
126 | sid_X2, sid_X4, sid_D2, sid_SAT, |
---|
127 | sid_BIAS, sid_INVERT, sid_NEGATE, sid_BX2, |
---|
128 | sid_COMMA, sid_VALUE, |
---|
129 | |
---|
130 | //PS_1_4 sid |
---|
131 | sid_R0, sid_R1, sid_R2, sid_R3, sid_R4, sid_R5, |
---|
132 | sid_T0, sid_T1, sid_T2, sid_T3, sid_T4, sid_T5, |
---|
133 | sid_DP2ADD, |
---|
134 | sid_X8, sid_D8, sid_D4, |
---|
135 | sid_TEXCRD, sid_TEXLD, |
---|
136 | sid_STR, sid_STQ, |
---|
137 | sid_STRDR, sid_STQDQ, |
---|
138 | sid_BEM, |
---|
139 | sid_PHASE, |
---|
140 | |
---|
141 | //PS_1_1 sid |
---|
142 | sid_1R0, sid_1R1, sid_1T0, sid_1T1, sid_1T2, sid_1T3, |
---|
143 | sid_TEX, sid_TEXCOORD, sid_TEXM3X2PAD, |
---|
144 | sid_TEXM3X2TEX, sid_TEXM3X3PAD, sid_TEXM3X3TEX, sid_TEXM3X3SPEC, sid_TEXM3X3VSPEC, |
---|
145 | sid_TEXREG2AR, sid_TEXREG2GB, |
---|
146 | |
---|
147 | //PS_1_2 side |
---|
148 | sid_TEXREG2RGB, sid_TEXDP3, sid_TEXDP3TEX, |
---|
149 | |
---|
150 | // common |
---|
151 | sid_SKIP, sid_PLUS, |
---|
152 | |
---|
153 | // non-terminal tokens section |
---|
154 | sid_PROGRAM, sid_PROGRAMTYPE, sid_DECLCONSTS, sid_DEFCONST, |
---|
155 | sid_CONSTANT, sid_COLOR, |
---|
156 | sid_TEXSWIZZLE, sid_UNARYOP, |
---|
157 | sid_NUMVAL, sid_SEPERATOR, sid_ALUOPS, sid_TEXMASK, sid_TEXOP_PS1_1_3, |
---|
158 | sid_TEXOP_PS1_4, |
---|
159 | sid_ALU_STATEMENT, sid_DSTMODSAT, sid_UNARYOP_ARGS, sid_REG_PS1_4, |
---|
160 | sid_TEX_PS1_4, sid_REG_PS1_1_3, sid_TEX_PS1_1_3, sid_DSTINFO, |
---|
161 | sid_SRCINFO, sid_BINARYOP_ARGS, sid_TERNARYOP_ARGS, sid_TEMPREG, |
---|
162 | sid_DSTMASK, sid_PRESRCMOD, sid_SRCNAME, sid_SRCREP, sid_POSTSRCMOD, |
---|
163 | sid_DSTMOD, sid_DSTSAT, sid_BINARYOP, sid_TERNARYOP, |
---|
164 | sid_TEXOPS_PHASE1, sid_COISSUE, sid_PHASEMARKER, sid_TEXOPS_PHASE2, |
---|
165 | sid_TEXREG_PS1_4, sid_TEXOPS_PS1_4, sid_TEXOPS_PS1_1_3, sid_TEXCISCOP_PS1_1_3, |
---|
166 | |
---|
167 | |
---|
168 | // last token |
---|
169 | sid_INVALID = BAD_TOKEN // must be last in enumeration |
---|
170 | }; |
---|
171 | |
---|
172 | /// structure used to keep track of arguments and instruction parameters |
---|
173 | struct OpParram { |
---|
174 | GLuint Arg; // type of argument |
---|
175 | bool Filled; // has it been filled yet |
---|
176 | GLuint MaskRep; // Mask/Replicator flags |
---|
177 | GLuint Mod; // argument modifier |
---|
178 | }; |
---|
179 | |
---|
180 | typedef std::vector<uint> MachineInstContainer; |
---|
181 | //typedef MachineInstContainer::iterator MachineInstIterator; |
---|
182 | |
---|
183 | |
---|
184 | // there are 2 phases with 2 subphases each |
---|
185 | enum PhaseType {ptPHASE1TEX, ptPHASE1ALU, ptPHASE2TEX, ptPHASE2ALU }; |
---|
186 | |
---|
187 | struct RegModOffset { |
---|
188 | uint MacroOffset; |
---|
189 | uint RegisterBase; |
---|
190 | uint OpParramsIndex; |
---|
191 | }; |
---|
192 | |
---|
193 | struct MacroRegModify { |
---|
194 | TokenInst * Macro; |
---|
195 | uint MacroSize; |
---|
196 | RegModOffset * RegMods; |
---|
197 | uint RegModSize; |
---|
198 | |
---|
199 | }; |
---|
200 | |
---|
201 | #define R_BASE (sid_R0 - GL_REG_0_ATI) |
---|
202 | #define C_BASE (sid_C0 - GL_CON_0_ATI) |
---|
203 | #define T_BASE (sid_1T0 - GL_REG_0_ATI) |
---|
204 | |
---|
205 | // static library database for tokens and BNF rules |
---|
206 | static SymbolDef PS_1_4_SymbolTypeLib[]; |
---|
207 | static TokenRule PS_1_x_RulePath[]; |
---|
208 | static bool LibInitialized; |
---|
209 | |
---|
210 | // Static Macro database for ps.1.1 ps.1.2 ps.1.3 instructions |
---|
211 | |
---|
212 | static TokenInst texreg2ar[]; |
---|
213 | static RegModOffset texreg2xx_RegMods[]; |
---|
214 | static MacroRegModify texreg2ar_MacroMods; |
---|
215 | |
---|
216 | static TokenInst texreg2gb[]; |
---|
217 | static MacroRegModify texreg2gb_MacroMods; |
---|
218 | |
---|
219 | static TokenInst texdp3[]; |
---|
220 | static RegModOffset texdp3_RegMods[]; |
---|
221 | static MacroRegModify texdp3_MacroMods; |
---|
222 | |
---|
223 | static TokenInst texdp3tex[]; |
---|
224 | static RegModOffset texdp3tex_RegMods[]; |
---|
225 | static MacroRegModify texdp3tex_MacroMods; |
---|
226 | |
---|
227 | static TokenInst texm3x2pad[]; |
---|
228 | static RegModOffset texm3xxpad_RegMods[]; |
---|
229 | static MacroRegModify texm3x2pad_MacroMods; |
---|
230 | |
---|
231 | static TokenInst texm3x2tex[]; |
---|
232 | static RegModOffset texm3xxtex_RegMods[]; |
---|
233 | static MacroRegModify texm3x2tex_MacroMods; |
---|
234 | |
---|
235 | static TokenInst texm3x3pad[]; |
---|
236 | static MacroRegModify texm3x3pad_MacroMods; |
---|
237 | |
---|
238 | static TokenInst texm3x3tex[]; |
---|
239 | static MacroRegModify texm3x3tex_MacroMods; |
---|
240 | |
---|
241 | static TokenInst texm3x3spec[]; |
---|
242 | static RegModOffset texm3x3spec_RegMods[]; |
---|
243 | static MacroRegModify texm3x3spec_MacroMods; |
---|
244 | |
---|
245 | static TokenInst texm3x3vspec[]; |
---|
246 | static RegModOffset texm3x3vspec_RegMods[]; |
---|
247 | static MacroRegModify texm3x3vspec_MacroMods; |
---|
248 | |
---|
249 | |
---|
250 | MachineInstContainer mPhase1TEX_mi; /// machine instructions for phase one texture section |
---|
251 | MachineInstContainer mPhase1ALU_mi; /// machine instructions for phase one ALU section |
---|
252 | MachineInstContainer mPhase2TEX_mi; /// machine instructions for phase two texture section |
---|
253 | MachineInstContainer mPhase2ALU_mi; /// machine instructions for phase two ALU section |
---|
254 | |
---|
255 | MachineInstContainer* mActivePhaseMachineInstructions; |
---|
256 | // vars used during pass 2 |
---|
257 | MachineInstID mOpType; |
---|
258 | uint mOpInst; |
---|
259 | bool mDo_Alpha; |
---|
260 | PhaseType mInstructionPhase; |
---|
261 | int mArgCnt; |
---|
262 | int mConstantsPos; |
---|
263 | |
---|
264 | #define MAXOPPARRAMS 5 // max number of parrams bound to an instruction |
---|
265 | |
---|
266 | OpParram mOpParrams[MAXOPPARRAMS]; |
---|
267 | |
---|
268 | /// keeps track of which registers are written to in each phase |
---|
269 | /// if a register is read from but has not been written to in phase 2 |
---|
270 | /// then if it was written to in phase 1 perform a register pass function |
---|
271 | /// at the begining of phase2 so that the register has something worthwhile in it |
---|
272 | /// NB: check ALU and TEX section of phase 1 and phase 2 |
---|
273 | /// there are 6 temp registers r0 to r5 to keep track off |
---|
274 | /// checks are performed in pass 2 when building machine instructions |
---|
275 | RegisterUsage Phase_RegisterUsage[6]; |
---|
276 | |
---|
277 | bool mMacroOn; // if true then put all ALU instructions in phase 1 |
---|
278 | |
---|
279 | uint mTexm3x3padCount; // keep track of how many texm3x3pad instructions are used so know which mask to use |
---|
280 | |
---|
281 | size_t mLastInstructionPos; // keep track of last phase 2 ALU instruction to check for R0 setting |
---|
282 | size_t mSecondLastInstructionPos; |
---|
283 | |
---|
284 | // keep track if phase marker found: determines which phase the ALU instructions go into |
---|
285 | bool mPhaseMarkerFound; |
---|
286 | |
---|
287 | #ifdef _DEBUG |
---|
288 | FILE* fp; |
---|
289 | // full compiler test with output results going to a text file |
---|
290 | void testCompile(char* testname, char* teststr, SymbolID* testresult, |
---|
291 | uint testresultsize, GLuint* MachinInstResults = NULL, uint MachinInstResultsSize = 0); |
---|
292 | #endif // _DEBUG |
---|
293 | |
---|
294 | |
---|
295 | /** attempt to build a machine instruction using current tokens |
---|
296 | determines what phase machine insturction should be in and if an Alpha Op is required |
---|
297 | calls expandMachineInstruction() to expand the token into machine instructions |
---|
298 | */ |
---|
299 | bool BuildMachineInst(); |
---|
300 | |
---|
301 | void clearMachineInstState(); |
---|
302 | |
---|
303 | bool setOpParram(const SymbolDef* symboldef); |
---|
304 | |
---|
305 | /** optimizes machine instructions depending on pixel shader context |
---|
306 | only applies to ps.1.1 ps.1.2 and ps.1.3 since they use CISC instructions |
---|
307 | that must be transformed into RISC instructions |
---|
308 | */ |
---|
309 | void optimize(); |
---|
310 | |
---|
311 | // the method is expected to be recursive to allow for inline expansion of instructions if required |
---|
312 | bool Pass2scan(const TokenInst * Tokens, const size_t size); |
---|
313 | |
---|
314 | // supply virtual functions for Compiler2Pass |
---|
315 | /// Pass 1 is completed so now take tokens generated and build machine instructions |
---|
316 | bool doPass2(); |
---|
317 | |
---|
318 | /** Build a machine instruction from token and ready it for expansion |
---|
319 | will expand CISC tokens using macro database |
---|
320 | |
---|
321 | */ |
---|
322 | bool bindMachineInstInPassToFragmentShader(const MachineInstContainer & PassMachineInstructions); |
---|
323 | |
---|
324 | /** Expand CISC tokens into PS1_4 token equivalents |
---|
325 | |
---|
326 | */ |
---|
327 | bool expandMacro(const MacroRegModify & MacroMod); |
---|
328 | |
---|
329 | /** Expand Machine instruction into operation type and arguments and put into proper machine |
---|
330 | instruction container |
---|
331 | also expands scaler alpha machine instructions if required |
---|
332 | |
---|
333 | */ |
---|
334 | bool expandMachineInstruction(); |
---|
335 | |
---|
336 | // mainly used by tests - too slow for use in binding |
---|
337 | size_t getMachineInst(size_t Idx); |
---|
338 | |
---|
339 | size_t getMachineInstCount(); |
---|
340 | |
---|
341 | void addMachineInst(PhaseType phase, const uint inst); |
---|
342 | |
---|
343 | void clearAllMachineInst(); |
---|
344 | |
---|
345 | void updateRegisterWriteState(const PhaseType phase); |
---|
346 | |
---|
347 | bool isRegisterReadValid(const PhaseType phase, const int param); |
---|
348 | |
---|
349 | public: |
---|
350 | |
---|
351 | /// constructor |
---|
352 | PS_1_4(); |
---|
353 | |
---|
354 | /// binds machine instructions generated in Pass 2 to the ATI GL fragment shader |
---|
355 | bool bindAllMachineInstToFragmentShader(); |
---|
356 | |
---|
357 | #ifdef _DEBUG |
---|
358 | /// perform compiler tests - only available in _DEBUG mode |
---|
359 | void test(); |
---|
360 | void testbinder(); |
---|
361 | |
---|
362 | #endif |
---|
363 | }; |
---|
364 | |
---|
365 | |
---|
366 | #endif |
---|
367 | |
---|