1 | /* |
---|
2 | ----------------------------------------------------------------------------- |
---|
3 | This source file is part of OGRE |
---|
4 | (Object-oriented Graphics Rendering Engine) |
---|
5 | For the latest info, see http://www.ogre3d.org/ |
---|
6 | |
---|
7 | Copyright (c) 2000-2006 Torus Knot Software Ltd |
---|
8 | Also see acknowledgements in Readme.html |
---|
9 | |
---|
10 | This program is free software; you can redistribute it and/or modify it under |
---|
11 | the terms of the GNU Lesser General Public License as published by the Free Software |
---|
12 | Foundation; either version 2 of the License, or (at your option) any later |
---|
13 | version. |
---|
14 | |
---|
15 | This program is distributed in the hope that it will be useful, but WITHOUT |
---|
16 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
---|
17 | FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. |
---|
18 | |
---|
19 | You should have received a copy of the GNU Lesser General Public License along with |
---|
20 | this program; if not, write to the Free Software Foundation, Inc., 59 Temple |
---|
21 | Place - Suite 330, Boston, MA 02111-1307, USA, or go to |
---|
22 | http://www.gnu.org/copyleft/lesser.txt. |
---|
23 | |
---|
24 | You may alternatively use this source under the terms of a specific version of |
---|
25 | the OGRE Unrestricted License provided you have obtained such a license from |
---|
26 | Torus Knot Software Ltd. |
---|
27 | ----------------------------------------------------------------------------- |
---|
28 | */ |
---|
29 | #ifndef OGREIMAGERESAMPLER_H |
---|
30 | #define OGREIMAGERESAMPLER_H |
---|
31 | |
---|
32 | #include <algorithm> |
---|
33 | |
---|
34 | // this file is inlined into OgreImage.cpp! |
---|
35 | // do not include anywhere else. |
---|
36 | namespace Ogre { |
---|
37 | |
---|
38 | // define uint64 type |
---|
39 | #if OGRE_COMPILER == OGRE_COMPILER_MSVC |
---|
40 | typedef unsigned __int64 uint64; |
---|
41 | #else |
---|
42 | typedef unsigned long long uint64; |
---|
43 | #endif |
---|
44 | |
---|
45 | // variable name hints: |
---|
46 | // sx_48 = 16/48-bit fixed-point x-position in source |
---|
47 | // stepx = difference between adjacent sx_48 values |
---|
48 | // sx1 = lower-bound integer x-position in source |
---|
49 | // sx2 = upper-bound integer x-position in source |
---|
50 | // sxf = fractional weight beween sx1 and sx2 |
---|
51 | // x,y,z = location of output pixel in destination |
---|
52 | |
---|
53 | // nearest-neighbor resampler, does not convert formats. |
---|
54 | // templated on bytes-per-pixel to allow compiler optimizations, such |
---|
55 | // as simplifying memcpy() and replacing multiplies with bitshifts |
---|
56 | template<unsigned int elemsize> struct NearestResampler { |
---|
57 | static void scale(const PixelBox& src, const PixelBox& dst) { |
---|
58 | // assert(src.format == dst.format); |
---|
59 | |
---|
60 | // srcdata stays at beginning, pdst is a moving pointer |
---|
61 | uchar* srcdata = (uchar*)src.data; |
---|
62 | uchar* pdst = (uchar*)dst.data; |
---|
63 | |
---|
64 | // sx_48,sy_48,sz_48 represent current position in source |
---|
65 | // using 16/48-bit fixed precision, incremented by steps |
---|
66 | uint64 stepx = ((uint64)src.getWidth() << 48) / dst.getWidth(); |
---|
67 | uint64 stepy = ((uint64)src.getHeight() << 48) / dst.getHeight(); |
---|
68 | uint64 stepz = ((uint64)src.getDepth() << 48) / dst.getDepth(); |
---|
69 | |
---|
70 | // note: ((stepz>>1) - 1) is an extra half-step increment to adjust |
---|
71 | // for the center of the destination pixel, not the top-left corner |
---|
72 | uint64 sz_48 = (stepz >> 1) - 1; |
---|
73 | for (size_t z = dst.front; z < dst.back; z++, sz_48 += stepz) { |
---|
74 | size_t srczoff = (size_t)(sz_48 >> 48) * src.slicePitch; |
---|
75 | |
---|
76 | uint64 sy_48 = (stepy >> 1) - 1; |
---|
77 | for (size_t y = dst.top; y < dst.bottom; y++, sy_48 += stepy) { |
---|
78 | size_t srcyoff = (size_t)(sy_48 >> 48) * src.rowPitch; |
---|
79 | |
---|
80 | uint64 sx_48 = (stepx >> 1) - 1; |
---|
81 | for (size_t x = dst.left; x < dst.right; x++, sx_48 += stepx) { |
---|
82 | uchar* psrc = srcdata + |
---|
83 | elemsize*((size_t)(sx_48 >> 48) + srcyoff + srczoff); |
---|
84 | memcpy(pdst, psrc, elemsize); |
---|
85 | pdst += elemsize; |
---|
86 | } |
---|
87 | pdst += elemsize*dst.getRowSkip(); |
---|
88 | } |
---|
89 | pdst += elemsize*dst.getSliceSkip(); |
---|
90 | } |
---|
91 | } |
---|
92 | }; |
---|
93 | |
---|
94 | |
---|
95 | // default floating-point linear resampler, does format conversion |
---|
96 | struct LinearResampler { |
---|
97 | static void scale(const PixelBox& src, const PixelBox& dst) { |
---|
98 | size_t srcelemsize = PixelUtil::getNumElemBytes(src.format); |
---|
99 | size_t dstelemsize = PixelUtil::getNumElemBytes(dst.format); |
---|
100 | |
---|
101 | // srcdata stays at beginning, pdst is a moving pointer |
---|
102 | uchar* srcdata = (uchar*)src.data; |
---|
103 | uchar* pdst = (uchar*)dst.data; |
---|
104 | |
---|
105 | // sx_48,sy_48,sz_48 represent current position in source |
---|
106 | // using 16/48-bit fixed precision, incremented by steps |
---|
107 | uint64 stepx = ((uint64)src.getWidth() << 48) / dst.getWidth(); |
---|
108 | uint64 stepy = ((uint64)src.getHeight() << 48) / dst.getHeight(); |
---|
109 | uint64 stepz = ((uint64)src.getDepth() << 48) / dst.getDepth(); |
---|
110 | |
---|
111 | // temp is 16/16 bit fixed precision, used to adjust a source |
---|
112 | // coordinate (x, y, or z) backwards by half a pixel so that the |
---|
113 | // integer bits represent the first sample (eg, sx1) and the |
---|
114 | // fractional bits are the blend weight of the second sample |
---|
115 | unsigned int temp; |
---|
116 | |
---|
117 | // note: ((stepz>>1) - 1) is an extra half-step increment to adjust |
---|
118 | // for the center of the destination pixel, not the top-left corner |
---|
119 | uint64 sz_48 = (stepz >> 1) - 1; |
---|
120 | for (size_t z = dst.front; z < dst.back; z++, sz_48+=stepz) { |
---|
121 | temp = sz_48 >> 32; |
---|
122 | temp = (temp > 0x8000)? temp - 0x8000 : 0; |
---|
123 | size_t sz1 = temp >> 16; // src z, sample #1 |
---|
124 | size_t sz2 = std::min(sz1+1,src.getDepth()-1);// src z, sample #2 |
---|
125 | float szf = (temp & 0xFFFF) / 65536.f; // weight of sample #2 |
---|
126 | |
---|
127 | uint64 sy_48 = (stepy >> 1) - 1; |
---|
128 | for (size_t y = dst.top; y < dst.bottom; y++, sy_48+=stepy) { |
---|
129 | temp = sy_48 >> 32; |
---|
130 | temp = (temp > 0x8000)? temp - 0x8000 : 0; |
---|
131 | size_t sy1 = temp >> 16; // src y #1 |
---|
132 | size_t sy2 = std::min(sy1+1,src.getHeight()-1);// src y #2 |
---|
133 | float syf = (temp & 0xFFFF) / 65536.f; // weight of #2 |
---|
134 | |
---|
135 | uint64 sx_48 = (stepx >> 1) - 1; |
---|
136 | for (size_t x = dst.left; x < dst.right; x++, sx_48+=stepx) { |
---|
137 | temp = sx_48 >> 32; |
---|
138 | temp = (temp > 0x8000)? temp - 0x8000 : 0; |
---|
139 | size_t sx1 = temp >> 16; // src x #1 |
---|
140 | size_t sx2 = std::min(sx1+1,src.getWidth()-1);// src x #2 |
---|
141 | float sxf = (temp & 0xFFFF) / 65536.f; // weight of #2 |
---|
142 | |
---|
143 | ColourValue x1y1z1, x2y1z1, x1y2z1, x2y2z1; |
---|
144 | ColourValue x1y1z2, x2y1z2, x1y2z2, x2y2z2; |
---|
145 | |
---|
146 | #define UNPACK(dst,x,y,z) PixelUtil::unpackColour(&dst, src.format, \ |
---|
147 | srcdata + srcelemsize*((x)+(y)*src.rowPitch+(z)*src.slicePitch)) |
---|
148 | |
---|
149 | UNPACK(x1y1z1,sx1,sy1,sz1); UNPACK(x2y1z1,sx2,sy1,sz1); |
---|
150 | UNPACK(x1y2z1,sx1,sy2,sz1); UNPACK(x2y2z1,sx2,sy2,sz1); |
---|
151 | UNPACK(x1y1z2,sx1,sy1,sz2); UNPACK(x2y1z2,sx2,sy1,sz2); |
---|
152 | UNPACK(x1y2z2,sx1,sy2,sz2); UNPACK(x2y2z2,sx2,sy2,sz2); |
---|
153 | #undef UNPACK |
---|
154 | |
---|
155 | ColourValue accum = |
---|
156 | x1y1z1 * ((1.0f - sxf)*(1.0f - syf)*(1.0f - szf)) + |
---|
157 | x2y1z1 * ( sxf *(1.0f - syf)*(1.0f - szf)) + |
---|
158 | x1y2z1 * ((1.0f - sxf)* syf *(1.0f - szf)) + |
---|
159 | x2y2z1 * ( sxf * syf *(1.0f - szf)) + |
---|
160 | x1y1z2 * ((1.0f - sxf)*(1.0f - syf)* szf ) + |
---|
161 | x2y1z2 * ( sxf *(1.0f - syf)* szf ) + |
---|
162 | x1y2z2 * ((1.0f - sxf)* syf * szf ) + |
---|
163 | x2y2z2 * ( sxf * syf * szf ); |
---|
164 | |
---|
165 | PixelUtil::packColour(accum, dst.format, pdst); |
---|
166 | |
---|
167 | pdst += dstelemsize; |
---|
168 | } |
---|
169 | pdst += dstelemsize*dst.getRowSkip(); |
---|
170 | } |
---|
171 | pdst += dstelemsize*dst.getSliceSkip(); |
---|
172 | } |
---|
173 | } |
---|
174 | }; |
---|
175 | |
---|
176 | |
---|
177 | // float32 linear resampler, converts FLOAT32_RGB/FLOAT32_RGBA only. |
---|
178 | // avoids overhead of pixel unpack/repack function calls |
---|
179 | struct LinearResampler_Float32 { |
---|
180 | static void scale(const PixelBox& src, const PixelBox& dst) { |
---|
181 | size_t srcchannels = PixelUtil::getNumElemBytes(src.format) / sizeof(float); |
---|
182 | size_t dstchannels = PixelUtil::getNumElemBytes(dst.format) / sizeof(float); |
---|
183 | // assert(srcchannels == 3 || srcchannels == 4); |
---|
184 | // assert(dstchannels == 3 || dstchannels == 4); |
---|
185 | |
---|
186 | // srcdata stays at beginning, pdst is a moving pointer |
---|
187 | float* srcdata = (float*)src.data; |
---|
188 | float* pdst = (float*)dst.data; |
---|
189 | |
---|
190 | // sx_48,sy_48,sz_48 represent current position in source |
---|
191 | // using 16/48-bit fixed precision, incremented by steps |
---|
192 | uint64 stepx = ((uint64)src.getWidth() << 48) / dst.getWidth(); |
---|
193 | uint64 stepy = ((uint64)src.getHeight() << 48) / dst.getHeight(); |
---|
194 | uint64 stepz = ((uint64)src.getDepth() << 48) / dst.getDepth(); |
---|
195 | |
---|
196 | // temp is 16/16 bit fixed precision, used to adjust a source |
---|
197 | // coordinate (x, y, or z) backwards by half a pixel so that the |
---|
198 | // integer bits represent the first sample (eg, sx1) and the |
---|
199 | // fractional bits are the blend weight of the second sample |
---|
200 | unsigned int temp; |
---|
201 | |
---|
202 | // note: ((stepz>>1) - 1) is an extra half-step increment to adjust |
---|
203 | // for the center of the destination pixel, not the top-left corner |
---|
204 | uint64 sz_48 = (stepz >> 1) - 1; |
---|
205 | for (size_t z = dst.front; z < dst.back; z++, sz_48+=stepz) { |
---|
206 | temp = sz_48 >> 32; |
---|
207 | temp = (temp > 0x8000)? temp - 0x8000 : 0; |
---|
208 | size_t sz1 = temp >> 16; // src z, sample #1 |
---|
209 | size_t sz2 = std::min(sz1+1,src.getDepth()-1);// src z, sample #2 |
---|
210 | float szf = (temp & 0xFFFF) / 65536.f; // weight of sample #2 |
---|
211 | |
---|
212 | uint64 sy_48 = (stepy >> 1) - 1; |
---|
213 | for (size_t y = dst.top; y < dst.bottom; y++, sy_48+=stepy) { |
---|
214 | temp = sy_48 >> 32; |
---|
215 | temp = (temp > 0x8000)? temp - 0x8000 : 0; |
---|
216 | size_t sy1 = temp >> 16; // src y #1 |
---|
217 | size_t sy2 = std::min(sy1+1,src.getHeight()-1);// src y #2 |
---|
218 | float syf = (temp & 0xFFFF) / 65536.f; // weight of #2 |
---|
219 | |
---|
220 | uint64 sx_48 = (stepx >> 1) - 1; |
---|
221 | for (size_t x = dst.left; x < dst.right; x++, sx_48+=stepx) { |
---|
222 | temp = sx_48 >> 32; |
---|
223 | temp = (temp > 0x8000)? temp - 0x8000 : 0; |
---|
224 | size_t sx1 = temp >> 16; // src x #1 |
---|
225 | size_t sx2 = std::min(sx1+1,src.getWidth()-1);// src x #2 |
---|
226 | float sxf = (temp & 0xFFFF) / 65536.f; // weight of #2 |
---|
227 | |
---|
228 | // process R,G,B,A simultaneously for cache coherence? |
---|
229 | float accum[4] = { 0.0f, 0.0f, 0.0f, 0.0f }; |
---|
230 | |
---|
231 | #define ACCUM3(x,y,z,factor) \ |
---|
232 | { float f = factor; \ |
---|
233 | size_t off = (x+y*src.rowPitch+z*src.slicePitch)*srcchannels; \ |
---|
234 | accum[0]+=srcdata[off+0]*f; accum[1]+=srcdata[off+1]*f; \ |
---|
235 | accum[2]+=srcdata[off+2]*f; } |
---|
236 | |
---|
237 | #define ACCUM4(x,y,z,factor) \ |
---|
238 | { float f = factor; \ |
---|
239 | size_t off = (x+y*src.rowPitch+z*src.slicePitch)*srcchannels; \ |
---|
240 | accum[0]+=srcdata[off+0]*f; accum[1]+=srcdata[off+1]*f; \ |
---|
241 | accum[2]+=srcdata[off+2]*f; accum[3]+=srcdata[off+3]*f; } |
---|
242 | |
---|
243 | if (srcchannels == 3 || dstchannels == 3) { |
---|
244 | // RGB, no alpha |
---|
245 | ACCUM3(sx1,sy1,sz1,(1.0f-sxf)*(1.0f-syf)*(1.0f-szf)); |
---|
246 | ACCUM3(sx2,sy1,sz1, sxf *(1.0f-syf)*(1.0f-szf)); |
---|
247 | ACCUM3(sx1,sy2,sz1,(1.0f-sxf)* syf *(1.0f-szf)); |
---|
248 | ACCUM3(sx2,sy2,sz1, sxf * syf *(1.0f-szf)); |
---|
249 | ACCUM3(sx1,sy1,sz2,(1.0f-sxf)*(1.0f-syf)* szf ); |
---|
250 | ACCUM3(sx2,sy1,sz2, sxf *(1.0f-syf)* szf ); |
---|
251 | ACCUM3(sx1,sy2,sz2,(1.0f-sxf)* syf * szf ); |
---|
252 | ACCUM3(sx2,sy2,sz2, sxf * syf * szf ); |
---|
253 | accum[3] = 1.0f; |
---|
254 | } else { |
---|
255 | // RGBA |
---|
256 | ACCUM4(sx1,sy1,sz1,(1.0f-sxf)*(1.0f-syf)*(1.0f-szf)); |
---|
257 | ACCUM4(sx2,sy1,sz1, sxf *(1.0f-syf)*(1.0f-szf)); |
---|
258 | ACCUM4(sx1,sy2,sz1,(1.0f-sxf)* syf *(1.0f-szf)); |
---|
259 | ACCUM4(sx2,sy2,sz1, sxf * syf *(1.0f-szf)); |
---|
260 | ACCUM4(sx1,sy1,sz2,(1.0f-sxf)*(1.0f-syf)* szf ); |
---|
261 | ACCUM4(sx2,sy1,sz2, sxf *(1.0f-syf)* szf ); |
---|
262 | ACCUM4(sx1,sy2,sz2,(1.0f-sxf)* syf * szf ); |
---|
263 | ACCUM4(sx2,sy2,sz2, sxf * syf * szf ); |
---|
264 | } |
---|
265 | |
---|
266 | memcpy(pdst, accum, sizeof(float)*dstchannels); |
---|
267 | |
---|
268 | #undef ACCUM3 |
---|
269 | #undef ACCUM4 |
---|
270 | |
---|
271 | pdst += dstchannels; |
---|
272 | } |
---|
273 | pdst += dstchannels*dst.getRowSkip(); |
---|
274 | } |
---|
275 | pdst += dstchannels*dst.getSliceSkip(); |
---|
276 | } |
---|
277 | } |
---|
278 | }; |
---|
279 | |
---|
280 | |
---|
281 | |
---|
282 | // byte linear resampler, does not do any format conversions. |
---|
283 | // only handles pixel formats that use 1 byte per color channel. |
---|
284 | // 2D only; punts 3D pixelboxes to default LinearResampler (slow). |
---|
285 | // templated on bytes-per-pixel to allow compiler optimizations, such |
---|
286 | // as unrolling loops and replacing multiplies with bitshifts |
---|
287 | template<unsigned int channels> struct LinearResampler_Byte { |
---|
288 | static void scale(const PixelBox& src, const PixelBox& dst) { |
---|
289 | // assert(src.format == dst.format); |
---|
290 | |
---|
291 | // only optimized for 2D |
---|
292 | if (src.getDepth() > 1 || dst.getDepth() > 1) { |
---|
293 | LinearResampler::scale(src, dst); |
---|
294 | return; |
---|
295 | } |
---|
296 | |
---|
297 | // srcdata stays at beginning of slice, pdst is a moving pointer |
---|
298 | uchar* srcdata = (uchar*)src.data; |
---|
299 | uchar* pdst = (uchar*)dst.data; |
---|
300 | |
---|
301 | // sx_48,sy_48 represent current position in source |
---|
302 | // using 16/48-bit fixed precision, incremented by steps |
---|
303 | uint64 stepx = ((uint64)src.getWidth() << 48) / dst.getWidth(); |
---|
304 | uint64 stepy = ((uint64)src.getHeight() << 48) / dst.getHeight(); |
---|
305 | |
---|
306 | // bottom 28 bits of temp are 16/12 bit fixed precision, used to |
---|
307 | // adjust a source coordinate backwards by half a pixel so that the |
---|
308 | // integer bits represent the first sample (eg, sx1) and the |
---|
309 | // fractional bits are the blend weight of the second sample |
---|
310 | unsigned int temp; |
---|
311 | |
---|
312 | uint64 sy_48 = (stepy >> 1) - 1; |
---|
313 | for (size_t y = dst.top; y < dst.bottom; y++, sy_48+=stepy) { |
---|
314 | temp = sy_48 >> 36; |
---|
315 | temp = (temp > 0x800)? temp - 0x800: 0; |
---|
316 | unsigned int syf = temp & 0xFFF; |
---|
317 | size_t sy1 = temp >> 12; |
---|
318 | size_t sy2 = std::min(sy1+1, src.bottom-src.top-1); |
---|
319 | size_t syoff1 = sy1 * src.rowPitch; |
---|
320 | size_t syoff2 = sy2 * src.rowPitch; |
---|
321 | |
---|
322 | uint64 sx_48 = (stepx >> 1) - 1; |
---|
323 | for (size_t x = dst.left; x < dst.right; x++, sx_48+=stepx) { |
---|
324 | temp = sx_48 >> 36; |
---|
325 | temp = (temp > 0x800)? temp - 0x800 : 0; |
---|
326 | unsigned int sxf = temp & 0xFFF; |
---|
327 | size_t sx1 = temp >> 12; |
---|
328 | size_t sx2 = std::min(sx1+1, src.right-src.left-1); |
---|
329 | |
---|
330 | unsigned int sxfsyf = sxf*syf; |
---|
331 | for (unsigned int k = 0; k < channels; k++) { |
---|
332 | unsigned int accum = |
---|
333 | srcdata[(sx1 + syoff1)*channels+k]*(0x1000000-(sxf<<12)-(syf<<12)+sxfsyf) + |
---|
334 | srcdata[(sx2 + syoff1)*channels+k]*((sxf<<12)-sxfsyf) + |
---|
335 | srcdata[(sx1 + syoff2)*channels+k]*((syf<<12)-sxfsyf) + |
---|
336 | srcdata[(sx2 + syoff2)*channels+k]*sxfsyf; |
---|
337 | // accum is computed using 8/24-bit fixed-point math |
---|
338 | // (maximum is 0xFF000000; rounding will not cause overflow) |
---|
339 | *pdst++ = (accum + 0x800000) >> 24; |
---|
340 | } |
---|
341 | } |
---|
342 | pdst += channels*dst.getRowSkip(); |
---|
343 | } |
---|
344 | } |
---|
345 | }; |
---|
346 | |
---|
347 | } |
---|
348 | |
---|
349 | #endif |
---|