[1] | 1 | /* |
---|
| 2 | ----------------------------------------------------------------------------- |
---|
| 3 | This source file is part of OGRE |
---|
| 4 | (Object-oriented Graphics Rendering Engine) |
---|
| 5 | For the latest info, see http://www.ogre3d.org/ |
---|
| 6 | |
---|
| 7 | Copyright (c) 2000-2006 Torus Knot Software Ltd |
---|
| 8 | Also see acknowledgements in Readme.html |
---|
| 9 | |
---|
| 10 | This program is free software; you can redistribute it and/or modify it under |
---|
| 11 | the terms of the GNU Lesser General Public License as published by the Free Software |
---|
| 12 | Foundation; either version 2 of the License, or (at your option) any later |
---|
| 13 | version. |
---|
| 14 | |
---|
| 15 | This program is distributed in the hope that it will be useful, but WITHOUT |
---|
| 16 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
---|
| 17 | FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. |
---|
| 18 | |
---|
| 19 | You should have received a copy of the GNU Lesser General Public License along with |
---|
| 20 | this program; if not, write to the Free Software Foundation, Inc., 59 Temple |
---|
| 21 | Place - Suite 330, Boston, MA 02111-1307, USA, or go to |
---|
| 22 | http://www.gnu.org/copyleft/lesser.txt. |
---|
| 23 | |
---|
| 24 | You may alternatively use this source under the terms of a specific version of |
---|
| 25 | the OGRE Unrestricted License provided you have obtained such a license from |
---|
| 26 | Torus Knot Software Ltd. |
---|
| 27 | ----------------------------------------------------------------------------- |
---|
| 28 | */ |
---|
| 29 | #ifndef OGREIMAGERESAMPLER_H |
---|
| 30 | #define OGREIMAGERESAMPLER_H |
---|
| 31 | |
---|
| 32 | #include <algorithm> |
---|
| 33 | |
---|
| 34 | // this file is inlined into OgreImage.cpp! |
---|
| 35 | // do not include anywhere else. |
---|
| 36 | namespace Ogre { |
---|
| 37 | |
---|
| 38 | // define uint64 type |
---|
| 39 | #if OGRE_COMPILER == OGRE_COMPILER_MSVC |
---|
| 40 | typedef unsigned __int64 uint64; |
---|
| 41 | #else |
---|
| 42 | typedef unsigned long long uint64; |
---|
| 43 | #endif |
---|
| 44 | |
---|
| 45 | // variable name hints: |
---|
| 46 | // sx_48 = 16/48-bit fixed-point x-position in source |
---|
| 47 | // stepx = difference between adjacent sx_48 values |
---|
| 48 | // sx1 = lower-bound integer x-position in source |
---|
| 49 | // sx2 = upper-bound integer x-position in source |
---|
| 50 | // sxf = fractional weight beween sx1 and sx2 |
---|
| 51 | // x,y,z = location of output pixel in destination |
---|
| 52 | |
---|
| 53 | // nearest-neighbor resampler, does not convert formats. |
---|
| 54 | // templated on bytes-per-pixel to allow compiler optimizations, such |
---|
| 55 | // as simplifying memcpy() and replacing multiplies with bitshifts |
---|
| 56 | template<unsigned int elemsize> struct NearestResampler { |
---|
| 57 | static void scale(const PixelBox& src, const PixelBox& dst) { |
---|
| 58 | // assert(src.format == dst.format); |
---|
| 59 | |
---|
| 60 | // srcdata stays at beginning, pdst is a moving pointer |
---|
| 61 | uchar* srcdata = (uchar*)src.data; |
---|
| 62 | uchar* pdst = (uchar*)dst.data; |
---|
| 63 | |
---|
| 64 | // sx_48,sy_48,sz_48 represent current position in source |
---|
| 65 | // using 16/48-bit fixed precision, incremented by steps |
---|
| 66 | uint64 stepx = ((uint64)src.getWidth() << 48) / dst.getWidth(); |
---|
| 67 | uint64 stepy = ((uint64)src.getHeight() << 48) / dst.getHeight(); |
---|
| 68 | uint64 stepz = ((uint64)src.getDepth() << 48) / dst.getDepth(); |
---|
| 69 | |
---|
| 70 | // note: ((stepz>>1) - 1) is an extra half-step increment to adjust |
---|
| 71 | // for the center of the destination pixel, not the top-left corner |
---|
| 72 | uint64 sz_48 = (stepz >> 1) - 1; |
---|
| 73 | for (size_t z = dst.front; z < dst.back; z++, sz_48 += stepz) { |
---|
| 74 | size_t srczoff = (size_t)(sz_48 >> 48) * src.slicePitch; |
---|
| 75 | |
---|
| 76 | uint64 sy_48 = (stepy >> 1) - 1; |
---|
| 77 | for (size_t y = dst.top; y < dst.bottom; y++, sy_48 += stepy) { |
---|
| 78 | size_t srcyoff = (size_t)(sy_48 >> 48) * src.rowPitch; |
---|
| 79 | |
---|
| 80 | uint64 sx_48 = (stepx >> 1) - 1; |
---|
| 81 | for (size_t x = dst.left; x < dst.right; x++, sx_48 += stepx) { |
---|
| 82 | uchar* psrc = srcdata + |
---|
| 83 | elemsize*((size_t)(sx_48 >> 48) + srcyoff + srczoff); |
---|
| 84 | memcpy(pdst, psrc, elemsize); |
---|
| 85 | pdst += elemsize; |
---|
| 86 | } |
---|
| 87 | pdst += elemsize*dst.getRowSkip(); |
---|
| 88 | } |
---|
| 89 | pdst += elemsize*dst.getSliceSkip(); |
---|
| 90 | } |
---|
| 91 | } |
---|
| 92 | }; |
---|
| 93 | |
---|
| 94 | |
---|
| 95 | // default floating-point linear resampler, does format conversion |
---|
| 96 | struct LinearResampler { |
---|
| 97 | static void scale(const PixelBox& src, const PixelBox& dst) { |
---|
| 98 | size_t srcelemsize = PixelUtil::getNumElemBytes(src.format); |
---|
| 99 | size_t dstelemsize = PixelUtil::getNumElemBytes(dst.format); |
---|
| 100 | |
---|
| 101 | // srcdata stays at beginning, pdst is a moving pointer |
---|
| 102 | uchar* srcdata = (uchar*)src.data; |
---|
| 103 | uchar* pdst = (uchar*)dst.data; |
---|
| 104 | |
---|
| 105 | // sx_48,sy_48,sz_48 represent current position in source |
---|
| 106 | // using 16/48-bit fixed precision, incremented by steps |
---|
| 107 | uint64 stepx = ((uint64)src.getWidth() << 48) / dst.getWidth(); |
---|
| 108 | uint64 stepy = ((uint64)src.getHeight() << 48) / dst.getHeight(); |
---|
| 109 | uint64 stepz = ((uint64)src.getDepth() << 48) / dst.getDepth(); |
---|
| 110 | |
---|
| 111 | // temp is 16/16 bit fixed precision, used to adjust a source |
---|
| 112 | // coordinate (x, y, or z) backwards by half a pixel so that the |
---|
| 113 | // integer bits represent the first sample (eg, sx1) and the |
---|
| 114 | // fractional bits are the blend weight of the second sample |
---|
| 115 | unsigned int temp; |
---|
| 116 | |
---|
| 117 | // note: ((stepz>>1) - 1) is an extra half-step increment to adjust |
---|
| 118 | // for the center of the destination pixel, not the top-left corner |
---|
| 119 | uint64 sz_48 = (stepz >> 1) - 1; |
---|
| 120 | for (size_t z = dst.front; z < dst.back; z++, sz_48+=stepz) { |
---|
| 121 | temp = sz_48 >> 32; |
---|
| 122 | temp = (temp > 0x8000)? temp - 0x8000 : 0; |
---|
| 123 | size_t sz1 = temp >> 16; // src z, sample #1 |
---|
| 124 | size_t sz2 = std::min(sz1+1,src.getDepth()-1);// src z, sample #2 |
---|
| 125 | float szf = (temp & 0xFFFF) / 65536.f; // weight of sample #2 |
---|
| 126 | |
---|
| 127 | uint64 sy_48 = (stepy >> 1) - 1; |
---|
| 128 | for (size_t y = dst.top; y < dst.bottom; y++, sy_48+=stepy) { |
---|
| 129 | temp = sy_48 >> 32; |
---|
| 130 | temp = (temp > 0x8000)? temp - 0x8000 : 0; |
---|
| 131 | size_t sy1 = temp >> 16; // src y #1 |
---|
| 132 | size_t sy2 = std::min(sy1+1,src.getHeight()-1);// src y #2 |
---|
| 133 | float syf = (temp & 0xFFFF) / 65536.f; // weight of #2 |
---|
| 134 | |
---|
| 135 | uint64 sx_48 = (stepx >> 1) - 1; |
---|
| 136 | for (size_t x = dst.left; x < dst.right; x++, sx_48+=stepx) { |
---|
| 137 | temp = sx_48 >> 32; |
---|
| 138 | temp = (temp > 0x8000)? temp - 0x8000 : 0; |
---|
| 139 | size_t sx1 = temp >> 16; // src x #1 |
---|
| 140 | size_t sx2 = std::min(sx1+1,src.getWidth()-1);// src x #2 |
---|
| 141 | float sxf = (temp & 0xFFFF) / 65536.f; // weight of #2 |
---|
| 142 | |
---|
| 143 | ColourValue x1y1z1, x2y1z1, x1y2z1, x2y2z1; |
---|
| 144 | ColourValue x1y1z2, x2y1z2, x1y2z2, x2y2z2; |
---|
| 145 | |
---|
| 146 | #define UNPACK(dst,x,y,z) PixelUtil::unpackColour(&dst, src.format, \ |
---|
| 147 | srcdata + srcelemsize*((x)+(y)*src.rowPitch+(z)*src.slicePitch)) |
---|
| 148 | |
---|
| 149 | UNPACK(x1y1z1,sx1,sy1,sz1); UNPACK(x2y1z1,sx2,sy1,sz1); |
---|
| 150 | UNPACK(x1y2z1,sx1,sy2,sz1); UNPACK(x2y2z1,sx2,sy2,sz1); |
---|
| 151 | UNPACK(x1y1z2,sx1,sy1,sz2); UNPACK(x2y1z2,sx2,sy1,sz2); |
---|
| 152 | UNPACK(x1y2z2,sx1,sy2,sz2); UNPACK(x2y2z2,sx2,sy2,sz2); |
---|
| 153 | #undef UNPACK |
---|
| 154 | |
---|
| 155 | ColourValue accum = |
---|
| 156 | x1y1z1 * ((1.0f - sxf)*(1.0f - syf)*(1.0f - szf)) + |
---|
| 157 | x2y1z1 * ( sxf *(1.0f - syf)*(1.0f - szf)) + |
---|
| 158 | x1y2z1 * ((1.0f - sxf)* syf *(1.0f - szf)) + |
---|
| 159 | x2y2z1 * ( sxf * syf *(1.0f - szf)) + |
---|
| 160 | x1y1z2 * ((1.0f - sxf)*(1.0f - syf)* szf ) + |
---|
| 161 | x2y1z2 * ( sxf *(1.0f - syf)* szf ) + |
---|
| 162 | x1y2z2 * ((1.0f - sxf)* syf * szf ) + |
---|
| 163 | x2y2z2 * ( sxf * syf * szf ); |
---|
| 164 | |
---|
| 165 | PixelUtil::packColour(accum, dst.format, pdst); |
---|
| 166 | |
---|
| 167 | pdst += dstelemsize; |
---|
| 168 | } |
---|
| 169 | pdst += dstelemsize*dst.getRowSkip(); |
---|
| 170 | } |
---|
| 171 | pdst += dstelemsize*dst.getSliceSkip(); |
---|
| 172 | } |
---|
| 173 | } |
---|
| 174 | }; |
---|
| 175 | |
---|
| 176 | |
---|
| 177 | // float32 linear resampler, converts FLOAT32_RGB/FLOAT32_RGBA only. |
---|
| 178 | // avoids overhead of pixel unpack/repack function calls |
---|
| 179 | struct LinearResampler_Float32 { |
---|
| 180 | static void scale(const PixelBox& src, const PixelBox& dst) { |
---|
| 181 | size_t srcchannels = PixelUtil::getNumElemBytes(src.format) / sizeof(float); |
---|
| 182 | size_t dstchannels = PixelUtil::getNumElemBytes(dst.format) / sizeof(float); |
---|
| 183 | // assert(srcchannels == 3 || srcchannels == 4); |
---|
| 184 | // assert(dstchannels == 3 || dstchannels == 4); |
---|
| 185 | |
---|
| 186 | // srcdata stays at beginning, pdst is a moving pointer |
---|
| 187 | float* srcdata = (float*)src.data; |
---|
| 188 | float* pdst = (float*)dst.data; |
---|
| 189 | |
---|
| 190 | // sx_48,sy_48,sz_48 represent current position in source |
---|
| 191 | // using 16/48-bit fixed precision, incremented by steps |
---|
| 192 | uint64 stepx = ((uint64)src.getWidth() << 48) / dst.getWidth(); |
---|
| 193 | uint64 stepy = ((uint64)src.getHeight() << 48) / dst.getHeight(); |
---|
| 194 | uint64 stepz = ((uint64)src.getDepth() << 48) / dst.getDepth(); |
---|
| 195 | |
---|
| 196 | // temp is 16/16 bit fixed precision, used to adjust a source |
---|
| 197 | // coordinate (x, y, or z) backwards by half a pixel so that the |
---|
| 198 | // integer bits represent the first sample (eg, sx1) and the |
---|
| 199 | // fractional bits are the blend weight of the second sample |
---|
| 200 | unsigned int temp; |
---|
| 201 | |
---|
| 202 | // note: ((stepz>>1) - 1) is an extra half-step increment to adjust |
---|
| 203 | // for the center of the destination pixel, not the top-left corner |
---|
| 204 | uint64 sz_48 = (stepz >> 1) - 1; |
---|
| 205 | for (size_t z = dst.front; z < dst.back; z++, sz_48+=stepz) { |
---|
| 206 | temp = sz_48 >> 32; |
---|
| 207 | temp = (temp > 0x8000)? temp - 0x8000 : 0; |
---|
| 208 | size_t sz1 = temp >> 16; // src z, sample #1 |
---|
| 209 | size_t sz2 = std::min(sz1+1,src.getDepth()-1);// src z, sample #2 |
---|
| 210 | float szf = (temp & 0xFFFF) / 65536.f; // weight of sample #2 |
---|
| 211 | |
---|
| 212 | uint64 sy_48 = (stepy >> 1) - 1; |
---|
| 213 | for (size_t y = dst.top; y < dst.bottom; y++, sy_48+=stepy) { |
---|
| 214 | temp = sy_48 >> 32; |
---|
| 215 | temp = (temp > 0x8000)? temp - 0x8000 : 0; |
---|
| 216 | size_t sy1 = temp >> 16; // src y #1 |
---|
| 217 | size_t sy2 = std::min(sy1+1,src.getHeight()-1);// src y #2 |
---|
| 218 | float syf = (temp & 0xFFFF) / 65536.f; // weight of #2 |
---|
| 219 | |
---|
| 220 | uint64 sx_48 = (stepx >> 1) - 1; |
---|
| 221 | for (size_t x = dst.left; x < dst.right; x++, sx_48+=stepx) { |
---|
| 222 | temp = sx_48 >> 32; |
---|
| 223 | temp = (temp > 0x8000)? temp - 0x8000 : 0; |
---|
| 224 | size_t sx1 = temp >> 16; // src x #1 |
---|
| 225 | size_t sx2 = std::min(sx1+1,src.getWidth()-1);// src x #2 |
---|
| 226 | float sxf = (temp & 0xFFFF) / 65536.f; // weight of #2 |
---|
| 227 | |
---|
| 228 | // process R,G,B,A simultaneously for cache coherence? |
---|
| 229 | float accum[4] = { 0.0f, 0.0f, 0.0f, 0.0f }; |
---|
| 230 | |
---|
| 231 | #define ACCUM3(x,y,z,factor) \ |
---|
| 232 | { float f = factor; \ |
---|
| 233 | size_t off = (x+y*src.rowPitch+z*src.slicePitch)*srcchannels; \ |
---|
| 234 | accum[0]+=srcdata[off+0]*f; accum[1]+=srcdata[off+1]*f; \ |
---|
| 235 | accum[2]+=srcdata[off+2]*f; } |
---|
| 236 | |
---|
| 237 | #define ACCUM4(x,y,z,factor) \ |
---|
| 238 | { float f = factor; \ |
---|
| 239 | size_t off = (x+y*src.rowPitch+z*src.slicePitch)*srcchannels; \ |
---|
| 240 | accum[0]+=srcdata[off+0]*f; accum[1]+=srcdata[off+1]*f; \ |
---|
| 241 | accum[2]+=srcdata[off+2]*f; accum[3]+=srcdata[off+3]*f; } |
---|
| 242 | |
---|
| 243 | if (srcchannels == 3 || dstchannels == 3) { |
---|
| 244 | // RGB, no alpha |
---|
| 245 | ACCUM3(sx1,sy1,sz1,(1.0f-sxf)*(1.0f-syf)*(1.0f-szf)); |
---|
| 246 | ACCUM3(sx2,sy1,sz1, sxf *(1.0f-syf)*(1.0f-szf)); |
---|
| 247 | ACCUM3(sx1,sy2,sz1,(1.0f-sxf)* syf *(1.0f-szf)); |
---|
| 248 | ACCUM3(sx2,sy2,sz1, sxf * syf *(1.0f-szf)); |
---|
| 249 | ACCUM3(sx1,sy1,sz2,(1.0f-sxf)*(1.0f-syf)* szf ); |
---|
| 250 | ACCUM3(sx2,sy1,sz2, sxf *(1.0f-syf)* szf ); |
---|
| 251 | ACCUM3(sx1,sy2,sz2,(1.0f-sxf)* syf * szf ); |
---|
| 252 | ACCUM3(sx2,sy2,sz2, sxf * syf * szf ); |
---|
| 253 | accum[3] = 1.0f; |
---|
| 254 | } else { |
---|
| 255 | // RGBA |
---|
| 256 | ACCUM4(sx1,sy1,sz1,(1.0f-sxf)*(1.0f-syf)*(1.0f-szf)); |
---|
| 257 | ACCUM4(sx2,sy1,sz1, sxf *(1.0f-syf)*(1.0f-szf)); |
---|
| 258 | ACCUM4(sx1,sy2,sz1,(1.0f-sxf)* syf *(1.0f-szf)); |
---|
| 259 | ACCUM4(sx2,sy2,sz1, sxf * syf *(1.0f-szf)); |
---|
| 260 | ACCUM4(sx1,sy1,sz2,(1.0f-sxf)*(1.0f-syf)* szf ); |
---|
| 261 | ACCUM4(sx2,sy1,sz2, sxf *(1.0f-syf)* szf ); |
---|
| 262 | ACCUM4(sx1,sy2,sz2,(1.0f-sxf)* syf * szf ); |
---|
| 263 | ACCUM4(sx2,sy2,sz2, sxf * syf * szf ); |
---|
| 264 | } |
---|
| 265 | |
---|
| 266 | memcpy(pdst, accum, sizeof(float)*dstchannels); |
---|
| 267 | |
---|
| 268 | #undef ACCUM3 |
---|
| 269 | #undef ACCUM4 |
---|
| 270 | |
---|
| 271 | pdst += dstchannels; |
---|
| 272 | } |
---|
| 273 | pdst += dstchannels*dst.getRowSkip(); |
---|
| 274 | } |
---|
| 275 | pdst += dstchannels*dst.getSliceSkip(); |
---|
| 276 | } |
---|
| 277 | } |
---|
| 278 | }; |
---|
| 279 | |
---|
| 280 | |
---|
| 281 | |
---|
| 282 | // byte linear resampler, does not do any format conversions. |
---|
| 283 | // only handles pixel formats that use 1 byte per color channel. |
---|
| 284 | // 2D only; punts 3D pixelboxes to default LinearResampler (slow). |
---|
| 285 | // templated on bytes-per-pixel to allow compiler optimizations, such |
---|
| 286 | // as unrolling loops and replacing multiplies with bitshifts |
---|
| 287 | template<unsigned int channels> struct LinearResampler_Byte { |
---|
| 288 | static void scale(const PixelBox& src, const PixelBox& dst) { |
---|
| 289 | // assert(src.format == dst.format); |
---|
| 290 | |
---|
| 291 | // only optimized for 2D |
---|
| 292 | if (src.getDepth() > 1 || dst.getDepth() > 1) { |
---|
| 293 | LinearResampler::scale(src, dst); |
---|
| 294 | return; |
---|
| 295 | } |
---|
| 296 | |
---|
| 297 | // srcdata stays at beginning of slice, pdst is a moving pointer |
---|
| 298 | uchar* srcdata = (uchar*)src.data; |
---|
| 299 | uchar* pdst = (uchar*)dst.data; |
---|
| 300 | |
---|
| 301 | // sx_48,sy_48 represent current position in source |
---|
| 302 | // using 16/48-bit fixed precision, incremented by steps |
---|
| 303 | uint64 stepx = ((uint64)src.getWidth() << 48) / dst.getWidth(); |
---|
| 304 | uint64 stepy = ((uint64)src.getHeight() << 48) / dst.getHeight(); |
---|
| 305 | |
---|
| 306 | // bottom 28 bits of temp are 16/12 bit fixed precision, used to |
---|
| 307 | // adjust a source coordinate backwards by half a pixel so that the |
---|
| 308 | // integer bits represent the first sample (eg, sx1) and the |
---|
| 309 | // fractional bits are the blend weight of the second sample |
---|
| 310 | unsigned int temp; |
---|
| 311 | |
---|
| 312 | uint64 sy_48 = (stepy >> 1) - 1; |
---|
| 313 | for (size_t y = dst.top; y < dst.bottom; y++, sy_48+=stepy) { |
---|
| 314 | temp = sy_48 >> 36; |
---|
| 315 | temp = (temp > 0x800)? temp - 0x800: 0; |
---|
| 316 | unsigned int syf = temp & 0xFFF; |
---|
| 317 | size_t sy1 = temp >> 12; |
---|
| 318 | size_t sy2 = std::min(sy1+1, src.bottom-src.top-1); |
---|
| 319 | size_t syoff1 = sy1 * src.rowPitch; |
---|
| 320 | size_t syoff2 = sy2 * src.rowPitch; |
---|
| 321 | |
---|
| 322 | uint64 sx_48 = (stepx >> 1) - 1; |
---|
| 323 | for (size_t x = dst.left; x < dst.right; x++, sx_48+=stepx) { |
---|
| 324 | temp = sx_48 >> 36; |
---|
| 325 | temp = (temp > 0x800)? temp - 0x800 : 0; |
---|
| 326 | unsigned int sxf = temp & 0xFFF; |
---|
| 327 | size_t sx1 = temp >> 12; |
---|
| 328 | size_t sx2 = std::min(sx1+1, src.right-src.left-1); |
---|
| 329 | |
---|
| 330 | unsigned int sxfsyf = sxf*syf; |
---|
| 331 | for (unsigned int k = 0; k < channels; k++) { |
---|
| 332 | unsigned int accum = |
---|
| 333 | srcdata[(sx1 + syoff1)*channels+k]*(0x1000000-(sxf<<12)-(syf<<12)+sxfsyf) + |
---|
| 334 | srcdata[(sx2 + syoff1)*channels+k]*((sxf<<12)-sxfsyf) + |
---|
| 335 | srcdata[(sx1 + syoff2)*channels+k]*((syf<<12)-sxfsyf) + |
---|
| 336 | srcdata[(sx2 + syoff2)*channels+k]*sxfsyf; |
---|
| 337 | // accum is computed using 8/24-bit fixed-point math |
---|
| 338 | // (maximum is 0xFF000000; rounding will not cause overflow) |
---|
| 339 | *pdst++ = (accum + 0x800000) >> 24; |
---|
| 340 | } |
---|
| 341 | } |
---|
| 342 | pdst += channels*dst.getRowSkip(); |
---|
| 343 | } |
---|
| 344 | } |
---|
| 345 | }; |
---|
| 346 | |
---|
| 347 | } |
---|
| 348 | |
---|
| 349 | #endif |
---|