1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 | /++ Colour conversion and comparison + Authors: Mio Iwakura, mio.iwakura@gmail.com +/ module treecount.colours; import core.stdc.stdio; import std.math, std.range, std.stdio; import gl3n.linalg; import treecount.coords, treecount.globals, treecount.math; /++ The CIEDE2000 algorithm for computing the difference between two colours + + Adjusting the weighting factors is not recommended unless you know what you + are doing. For our purposes, the defaults of 1.0 are currently used. + + If you compile with the debug tag `deltaE_00`, intermediate values will be + printed to standard out for every invocation of the function for debugging + purposes. + Standards: Conforms to CIEDE2000 specification + Returns: The difference between the two input colours as defined by the + CIEDE2000 algorithm. Small values mean the colours are close, + big values mean they are far away. The range of values seems to + be around [0, 100] but I'm not an expert on this algorithm and did + not find clear documentation regarding the range of values produced + by the algorithm. + Params: + colour1 = The first input colour + colour2 = The second input colour + kL = A weighting factor for lightness + kC = A weighting factor for chroma + kH = A weighting factor for hue + Bugs: The unit tests all pass with phobos's default std.math.approxEqual, + but I've noticed small discrepancies, a few of which are documented + in code comments. If you have any insights or suggestions, please + don't hesitate to submit an issue and/or open a pull request! + Github issues is the prefered platform for discussion, but if + nothing else, you can contact the author of this module by email. + See_Also: + $(LINK http://www.ece.rochester.edu/~gsharma/ciede2000/ciede2000noteCRNA.pdf) +/ float deltaE_00(in Lab colour1, in Lab colour2, in float kL = 1.0, in float kC = 1.0, in float kH = 1.0) pure nothrow @nogc @trusted { /* The number comments correspond to sections in * "The CIEDE2000 Color-Difference Formula: Implementation Notes, * Supplementary Test Data, and Mathematical Observations" */ /* 1. */ //(2, 3) immutable avgC = (hypot(colour1.a, colour1.b) + hypot(colour2.a, colour2.b)) / 2; //(4) immutable G = 0.5 * (1 - sqrt(avgC ^^ 7 / (avgC ^^ 7 + 25L ^^ 7))); auto a(float colourA) pure nothrow @nogc @safe { immutable result = (1 + G) * colourA; return result; } //(5) immutable a1 = a(colour1.a); immutable a2 = a(colour2.a); //BUG: pair 22 a2 -> 4.9449 instead of 4.9450 // 4.94492, these bugs aren't just bankers rounding! //NOTE: equations 6-7 are Lab -> LCh //(6) immutable C1 = hypot(a1, colour1.b); immutable C2 = hypot(a2, colour2.b); //BUG: pair 21 C2 -> 4.7955 instead of 4.7954 //BUG: pair 22 C2 -> 4.9449 instead of 4.9450 auto h(float a, float b) pure nothrow @nogc @safe { //article does not mention floating point quirks, //but I assume accounting for it is necessary immutable result = a.approxEqual(0) && b.approxEqual(0) ? 0 : atan2(b, a).toDegrees(); return result; } //(7) immutable h1 = h(a1, colour1.b); //BUG: various values are slightly off immutable h2 = h(a2, colour2.b); /* 2. */ //(8) immutable deltaL = colour2.L - colour1.L; //(9) immutable deltaC = C2 - C1; //(10) immutable C1C2 = C1 * C2; float deltah = h2 - h1; if (C1C2.approxEqual(0.0)) deltah = 0.0; else if (deltah > 180.0) deltah -= 360.0; else if (deltah < -180.0) deltah += 360.0; //(11) immutable deltaH = 2 * sqrt(C1C2) * sin((deltah / 2).toRadians()); /* 3. */ //(12) immutable avgL = (colour1.L + colour2.L) / 2; //(13) immutable avgNewC = (C1 + C2) / 2; //(14) float avgh; { immutable h1ph2 = h1 + h2; if (C1C2.approxEqual(0.0)) avgh = h1ph2; else if (abs(h1 - h2) <= 180.0) avgh = h1ph2 / 2.0; else if (h1ph2 < 360.0) avgh = (h1ph2 + 360.0) / 2.0; else avgh = (h1ph2 - 360.0) / 2.0; } //(15) immutable T = 1 - 0.17 * cos((avgh - 30).toRadians()) + 0.24 * cos((2 * avgh).toRadians()) + 0.32 * cos( (3 * avgh + 6).toRadians()) - 0.20 * cos((4 * avgh - 63).toRadians()); //(16) immutable deltaTheta = 30 * exp(-1 * ((avgh - 275) / 25) ^^ 2); //(17) immutable RC = 2 * sqrt(avgNewC ^^ 7 / (avgNewC ^^ 7 + 25L ^^ 7)); //(18) immutable SL = 1 + (0.015 * (avgL - 50) ^^ 2) / (sqrt(20 + (avgL - 50) ^^ 2)); //(19) immutable SC = 1 + 0.045 * avgNewC; //(20) immutable SH = 1 + 0.015 * avgNewC * T; //(21) immutable RT = -1 * sin((2 * deltaTheta).toRadians()) * RC; //(22) immutable dCdkCSC = deltaC / (kC * SC); immutable dHdkHSH = deltaH / (kH * SH); debug (deltaE_00) { printf( "%.4f\t%.4f\t%.4f\t%.4Lf\t%.4Lf\t%.4lf\t%.4lf\t" ~ "%.4Lf\t%.4lf\t%.4Lf\t%.4Lf\t%.4Lf\t%.4Lf\t%.4Lf\n", colour1.L, colour1.a, colour1.b, a1, C1, h1, avgh, G, T, SL, SC, SH, RT, sqrt((deltaL / (kL * SL)) ^^ 2 + dCdkCSC ^^ 2 + dHdkHSH ^^ 2 + RT * dCdkCSC * dHdkHSH)); printf("%.4f\t%.4f\t%.4f\t%.4Lf\t%.4Lf\t%.4lf\n", colour2.L, colour2.a, colour2.b, a2, C2, h2); } immutable result = sqrt((deltaL / (kL * SL)) ^^ 2 + dCdkCSC ^^ 2 + dHdkHSH ^^ 2 + RT * dCdkCSC * dHdkHSH); return result; } /// unittest { immutable Lab[34] inputA = [{L: 50.0000, a : 2.6772, b : -79.7751}, {L: 50.0000, a : 3.1571, b : -77.2803}, {L: 50.0000, a : 2.8361, b : -74.0200}, {L: 50.0000, a : -1.3802, b : -84.2814}, {L: 50.0000, a : -1.1848, b : -84.8006}, {L: 50.0000, a : -0.9009, b : -85.5211}, {L: 50.0000, a : 0.0000, b : 0.0000}, {L: 50.0000, a : -1.0000, b : 2.0000}, {L: 50.0000, a : 2.4900, b : -0.0010}, {L: 50.0000, a : 2.4900, b : -0.0010}, {L: 50.0000, a : 2.4900, b : -0.0010}, {L: 50.0000, a : 2.4900, b : -0.0010}, {L: 50.0000, a : -0.0010, b : 2.4900}, {L: 50.0000, a : -0.0010, b : 2.4900}, {L: 50.0000, a : -0.0010, b : 2.4900}, {L: 50.0000, a : 2.5000, b : 0.0000}, {L: 50.0000, a : 2.5000, b : 0.0000}, {L: 50.0000, a : 2.5000, b : 0.0000}, {L: 50.0000, a : 2.5000, b : 0.0000}, {L: 50.0000, a : 2.5000, b : 0.0000}, {L: 50.0000, a : 2.5000, b : 0.0000}, {L: 50.0000, a : 2.5000, b : 0.0000}, {L: 50.0000, a : 2.5000, b : 0.0000}, {L: 50.0000, a : 2.5000, b : 0.0000}, {L: 60.2574, a : -34.0099, b : 36.2677}, {L: 63.0109, a : -31.0961, b : -5.8663}, {L: 61.2901, a : 3.7196, b : -5.3901}, {L: 35.0831, a : -44.1164, b : 3.7933}, {L: 22.7233, a : 20.0904, b : -46.6940}, {L: 36.4612, a : 47.8580, b : 18.3852}, {L: 90.8027, a : -2.0831, b : 1.4410}, {L: 90.9257, a : -0.5406, b : -0.9208}, {L: 6.7747, a : -0.2908, b : -2.4247}, {L: 2.0776, a : 0.0795, b : -1.1350}]; immutable Lab[34] inputB = [{L: 50.0000, a : 0.0000, b : -82.7485}, {L: 50.0000, a : 0.0000, b : -82.7485}, {L: 50.0000, a : 0.0000, b : -82.7485}, {L: 50.0000, a : 0.0000, b : -82.7485}, {L: 50.0000, a : 0.0000, b : -82.7485}, {L: 50.0000, a : 0.0000, b : -82.7485}, {L: 50.0000, a : -1.0000, b : 2.0000}, {L: 50.0000, a : 0.0000, b : 0.0000}, {L: 50.0000, a : -2.4900, b : 0.0009}, {L: 50.0000, a : -2.4900, b : 0.0010}, {L: 50.0000, a : -2.4900, b : 0.0011}, {L: 50.0000, a : -2.4900, b : 0.0012}, {L: 50.0000, a : 0.0009, b : -2.4900}, {L: 50.0000, a : 0.0010, b : -2.4900}, {L: 50.0000, a : 0.0011, b : -2.4900}, {L: 50.0000, a : 0.0000, b : -2.5000}, {L: 73.0000, a : 25.0000, b : -18.0000}, {L: 61.0000, a : -5.0000, b : 29.0000}, {L: 56.0000, a : -27.0000, b : -3.0000}, {L: 58.0000, a : 24.0000, b : 15.0000}, {L: 50.0000, a : 3.1736, b : 0.5854}, {L: 50.0000, a : 3.2972, b : 0.0000}, {L: 50.0000, a : 1.8634, b : 0.5757}, {L: 50.0000, a : 3.2592, b : 0.3350}, {L: 60.4626, a : -34.1751, b : 39.4387}, {L: 62.8187, a : -29.7946, b : -4.0864}, {L: 61.4292, a : 2.2480, b : -4.9620}, {L: 35.0232, a : -40.0716, b : 1.5901}, {L: 23.0331, a : 14.9730, b : -42.5619}, {L: 36.2715, a : 50.5065, b : 21.2231}, {L: 91.1528, a : -1.6435, b : 0.0447}, {L: 88.6381, a : -0.8985, b : -0.7239}, {L: 5.8714, a : -0.0985, b : -2.2286}, {L: 0.9033, a : -0.0636, b : -0.5514}]; immutable float[34] expectedResults = [ 2.0425, 2.8615, 3.4412, 1.0000, 1.0000, 1.0000, 2.3669, 2.3669, 7.1792, 7.1792, 7.2195, 7.2195, 4.8045, 4.8045, 4.7461, 4.3065, 27.1492, 22.8977, 31.9030, 19.4535, 1.0000, 1.0000, 1.0000, 1.0000, 1.2644, 1.2630, 1.8731, 1.8645, 2.0373, 1.4146, 1.4441, 1.5381, 0.6377, 0.9082 ]; float[] actualResultA; float[] actualResultB; debug (deltaE_00) { foreach (ref a, ref b; lockstep(inputA[], inputB[])) { actualResultA ~= deltaE_00(a, b); } } else { writeln("[deltaE_00 test]"); foreach (ref a, ref b; lockstep(inputA[], inputB[])) { actualResultA ~= deltaE_00(a, b); actualResultB ~= deltaE_00(b, a); } foreach (size_t i, ref a, ref b, ref e; lockstep(actualResultA[], actualResultB[], expectedResults[])) { writefln("(%s) ΔE₀₀¹²: %.4f\tΔE₀₀²¹: %.4f\tΔE₀₀: %.4f", i + 1, a, b, e); assert(a.approxEqual(e) && b.approxEqual(e)); } } } /++ Calculates the average colour of every pixel of the sample + in Lab space + Returns: the average colour + Params: + rect = the sample to average +/ auto calculateAvgColour(in vec4 rect) in { assert(!imageLab.empty); } body { /** This naive implementation suffers from floating-point error. * I recommend using pairwise summation because it could be sped up with * SIMD, which is probably the best option to get better speed and accuracy. * OpenCL would be too much overhead, because our samples are small, although * task parallelism could be viable if samples are averaged in batch. */ immutable sample = rect.asPixelCoords(); auto average = Lab(0.0f, 0.0f, 0.0f); for (int i = sample.y; i < sample.y + sample.q; ++i) for (int j = sample.x; j < sample.x + sample.p; ++j) { immutable idx = j + i * originalImageDimensions.x; average.L += imageLab[idx].L; average.a += imageLab[idx].a; average.b += imageLab[idx].b; } average.L /= sample.p * sample.q; average.a /= sample.p * sample.q; average.b /= sample.p * sample.q; return average; } ///A colour in the CIE L*a*b* (CIELAB) colour space struct Lab { float L; ///L* component representing lightness float a; ///a* component representing position between red/magenta and green float b; ///b* component representing position between yellow and blue private float _p; //padding /* The padding is because OpenCL uses 32bpp images, and we want to be able * to cast the output of our rgb to lab kernel to Lab[] */ } |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 | /// Converts raster images from RGB space to Lab space module treecount.rgbtolab; import std.algorithm, std.array, std.math, std.range, std.stdio; import derelict.opencl.cl, derelict.sdl2.sdl; import treecount.cl.program, treecount.colours, treecount.globals, treecount.init; /++ Wrapper function for calling the rgb to Lab kernel in OpenCL + + Returns: The raster as an array of `treecount.colours.Lab` + Params: + imageSurface = An input raster in SDL_PIXELFORMAT_ABGR8888 +/ auto rgbRasterToLab(SDL_Surface* imageSurface) in { assert(imageSurface); assert(imageSurface.format.format == SDL_PIXELFORMAT_ABGR8888); } out (result) { assert(!result.empty); } body { auto program = createProgram!"rgbToLab.cl"(Device.GPU); scope (exit) clReleaseProgram(program); auto rgbToLabKernel = clCreateKernel(program, "clRGBToLab".ptr, &err); scope (exit) clReleaseKernel(rgbToLabKernel); assert(rgbToLabKernel); Lab[] result; result.length = imageSurface.w * imageSurface.h; /+ We need to chunk this computation up because the image might be larger than + the supported texture memory. + + The amount of memory we need to allocate on the host is equal to + chunkSize^2 * 4 bytes-per-pixel for the input image + chunkSize^2 * 16 bytes-per-pixel for the output image + + Solving 16chunkSize^2 = gpuMaxAlloc for chunkSize gives us + chunkSize = sqrt(gpuMaxAlloc/16) + + This ensures we will be able to allocate the mem on the GPU, but we may + need to go even smaller if the max texture size is less +/ auto chunkSize = sqrt(gpuMaxAlloc / 16.0).lround(); { auto textureSize = min(gpuMaxWidth, gpuMaxHeight); if (chunkSize > textureSize) chunkSize = textureSize; } //TODO: use math~ size_t widthInChunks = 0; while (widthInChunks * chunkSize < imageSurface.w) ++widthInChunks; size_t heightInChunks = 0; while (heightInChunks * chunkSize < imageSurface.h) ++heightInChunks; for (size_t i = 0; i < heightInChunks; ++i) for (size_t j = 0; j < widthInChunks; ++j) { /+ The chunks on the right and bottom sides of the image might have + smaller width/height than the others because they hit the side of + the image +/ size_t width = j * chunkSize + chunkSize <= imageSurface.w ? chunkSize : imageSurface.w - j * chunkSize; size_t height = i * chunkSize + chunkSize <= imageSurface.h ? chunkSize : imageSurface.h - i * chunkSize; size_t inputOffset = 4 * (chunkSize * j + imageSurface.w * chunkSize * i); cl_image_format inputImageFormat; inputImageFormat.image_channel_order = CL_RGBA; inputImageFormat.image_channel_data_type = CL_UNSIGNED_INT8; cl_image_desc inputImageDesc; inputImageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; inputImageDesc.image_width = width; inputImageDesc.image_height = height; inputImageDesc.image_depth = 1; inputImageDesc.image_row_pitch = imageSurface.pitch; inputImageDesc.image_slice_pitch = 0; inputImageDesc.num_mip_levels = 0; inputImageDesc.num_samples = 0; inputImageDesc.buffer = null; cl_mem clImageInput = clCreateImage(gpuContext, CL_MEM_USE_HOST_PTR | CL_MEM_READ_ONLY, &inputImageFormat, &inputImageDesc, cast(ubyte*) imageSurface.pixels + inputOffset, &err); assert(err == CL_SUCCESS); assert(clImageInput); scope (exit) { err = clReleaseMemObject(clImageInput); assert(err == CL_SUCCESS); } err = clSetKernelArg(rgbToLabKernel, 0, cl_mem.sizeof, &clImageInput); assert(err == CL_SUCCESS); size_t outputOffset = 4 * (chunkSize * j + imageSurface.w * chunkSize * i); cl_image_format outputImageFormat; outputImageFormat.image_channel_order = CL_RGBA; outputImageFormat.image_channel_data_type = CL_FLOAT; cl_image_desc outputImageDesc; outputImageDesc.image_type = CL_MEM_OBJECT_IMAGE2D; outputImageDesc.image_width = width; outputImageDesc.image_height = height; outputImageDesc.image_depth = 1; outputImageDesc.image_row_pitch = 0; outputImageDesc.image_slice_pitch = 0; outputImageDesc.num_mip_levels = 0; outputImageDesc.num_samples = 0; outputImageDesc.buffer = null; /+ I'm not using CL_MEM_USE_HOST_PTR here because I currently don't + know exactly how to properly map and unmap the output. + Do you need to unmap before you free the object? + I can replace the read call with a map call, but is that the + right place to map? Are you supposed to map before you run the + kernel, do a read, and then unmap? + Until I know the answers to these questions, it is simpler to just + allocate on the device and read the output back to host. +/ cl_mem clImageOutput = clCreateImage(gpuContext, CL_MEM_WRITE_ONLY, &outputImageFormat, &outputImageDesc, null, &err); assert(err == CL_SUCCESS); assert(clImageOutput); scope (exit) { err = clReleaseMemObject(clImageOutput); assert(err == CL_SUCCESS); } err = clSetKernelArg(rgbToLabKernel, 1, cl_mem.sizeof, &clImageOutput); assert(err == CL_SUCCESS); size_t[2] rgbToLabWorkSize = [width, height]; cl_event rgbToLabKernelEvent; err = clEnqueueNDRangeKernel(gpuQueue, rgbToLabKernel, 2, null, rgbToLabWorkSize.ptr, null, 0, null, &rgbToLabKernelEvent); assert(err == CL_SUCCESS); size_t[3] readImageOrigin = [0, 0, 0]; size_t[3] readImageRegion = [width, height, 1]; size_t readImageRowPitch = float.sizeof * 4 * imageSurface.w; cl_event enqueueReadImageEvent; err = clEnqueueReadImage(gpuQueue, clImageOutput, CL_TRUE, readImageOrigin.ptr, readImageRegion.ptr, readImageRowPitch, 0, cast(float*) result.ptr + outputOffset, 1, &rgbToLabKernelEvent, &enqueueReadImageEvent); assert(err == CL_SUCCESS); } clFlush(gpuQueue); clFinish(gpuQueue); return result; } /// unittest { writeln("[rgbRasterToLab test]"); initOpenCL(); scope (exit) closeOpenCL(); /* pretend we have 2x2px texture memory so rgbRasterToLab has to chunk up * the computation into multiple kernel runs. */ gpuMaxHeight = 2; gpuMaxWidth = 2; initSDL2(); scope (exit) closeSDL2(); /+ It was simpler to just type the structured art in the source than open and + parse an image file. There are redundant computations in this test because + the image is rectangular (like real data). +/ immutable ubyte[] input = [ 0x00, 0x00, 0x00, 0xFF, //black 0xFF, 0xFF, 0xFF, 0xFF, //white 0xFF, 0xFF, 0x00, 0xFF, //yellow 0x00, 0xFF, 0x00, 0xFF, //green 0x00, 0x00, 0x00, 0xFF, //black 0xFF, 0xFF, 0xFF, 0xFF, //white 0xFF, 0xFF, 0xFF, 0xFF, //white 0xFF, 0xFF, 0xFF, 0xFF, //white 0xFF, 0xFF, 0xFF, 0xFF, //white 0x00, 0x00, 0x00, 0xFF, //black 0xFF, 0x00, 0xFF, 0xFF, //magenta 0xFF, 0xFF, 0xFF, 0xFF, //white 0x00, 0xFF, 0xFF, 0xFF, //cyan 0xFF, 0xFF, 0xFF, 0xFF, //white 0x00, 0x00, 0x00, 0xFF, //black 0xFF, 0x00, 0x00, 0xFF, //red 0xFF, 0xFF, 0xFF, 0xFF, //white 0xFF, 0xFF, 0xFF, 0xFF, //white 0x00, 0x00, 0xFF, 0xFF, //blue 0x00, 0x00, 0x00, 0xFF, //black 0x00, 0x00, 0x00, 0xFF, //black 0x00, 0x00, 0x00, 0xFF, //black 0x00, 0x00, 0x00, 0xFF, //black 0x00, 0x00, 0x00, 0xFF, //black 0x00, 0x00, 0x00, 0xFF ]; //black auto surface = SDL_CreateRGBSurfaceFrom(cast(void*) input.ptr, //pixels 5, //width 5, //height 32, //depth 20, //pitch 0x000000FF, //rmask 0x0000FF00, //gmask 0x00FF0000, //bmask 0xFF000000); //amask assert(surface); scope (exit) { SDL_FreeSurface(surface); } auto result = rgbRasterToLab(surface); immutable Lab[] expected = [{L: 0.0, a : 0.0, b : 0.0}, //black {L: 100.0000, a : 0.0052, b : -0.0104}, //white {L: 97.1382, a : -21.5559, b : 94.4825}, //yellow {L: 87.7370, a : -86.1846, b : 83.1812}, //green {L: 0.0, a : 0.0, b : 0.0}, //black {L: 100.0000, a : 0.0052, b : -0.0104}, //white {L: 100.0000, a : 0.0052, b : -0.0104}, //white {L: 100.0000, a : 0.0052, b : -0.0104}, //white {L: 100.0000, a : 0.0052, b : -0.0104}, //white {L: 0.0, a : 0.0, b : 0.0}, //black {L: 60.3199, a : 98.2542, b : -60.8430}, //magenta {L: 100.0000, a : 0.0052, b : -0.0104}, //white {L: 91.1165, a : -48.0796, b : -14.1381}, //cyan {L: 100.0000, a : 0.0052, b : -0.0104}, //white {L: 0.0, a : 0.0, b : 0.0}, //black {L: 53.2329, a : 80.1093, b : 67.2200}, //red {L: 100.0000, a : 0.0052, b : -0.0104}, //white {L: 100.0000, a : 0.0052, b : -0.0104}, //white {L: 32.3026, a : 79.1967, b : -107.8637}, //blue {L: 0.0, a : 0.0, b : 0.0}, //black {L: 0.0, a : 0.0, b : 0.0}, //black {L: 0.0, a : 0.0, b : 0.0}, //black {L: 0.0, a : 0.0, b : 0.0}, //black {L: 0.0, a : 0.0, b : 0.0}, //black {L: 0.0, a : 0.0, b : 0.0}]; //black foreach (size_t i, ref a, ref e; lockstep(result[], expected[])) { writefln("(%s) Actual: (%.4f, %.4f, %.4f)\tExpected: (%.4f, %.4f, %.4f)", i + 1, a.L, a.a, a.b, e.L, e.a, e.b); assert(a.L.approxEqual(e.L) && a.a.approxEqual(e.a) && a.b.approxEqual(e.b)); } } |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 | ///Functionality relating to building OpenCL programs module treecount.cl.program; import std.stdio; import derelict.opencl.cl; import treecount.globals; /++ Wrapper to create and build an OpenCL 1.2 program from source + + Returns: The program (if it built successfully) + Params: + source = The file name of the source. Must be known at compile-time as + this function embeds the file contents into the executable. + deviceType = The type of device to build for (CPU or GPU) +/ cl_program createProgram(string source)(Device deviceType) { auto sourcePtr = import(source).ptr; auto context = (deviceType == Device.CPU) ? cpuContext : gpuContext; auto device = (deviceType == Device.CPU) ? cpuDevice : gpuDevice; auto program = clCreateProgramWithSource(context, 1, &sourcePtr, null, &err); assert(program); err = clBuildProgram(program, 1, &device, "-Werror -cl-std=CL1.2".ptr, null, null); assert(err == CL_SUCCESS); cl_build_status programBuildStatus; err = clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_STATUS, cl_build_status.sizeof, &programBuildStatus, null); assert(err == CL_SUCCESS); debug if (programBuildStatus != CL_BUILD_SUCCESS) { size_t logLength; clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 0, null, &logLength); char[] log; log.length = logLength; clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, logLength, log.ptr, null); stderr.writeln(log); assert(false); } return program; } ///Device types for creating programs enum Device { CPU, /// GPU /// } |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 | float4 normalizeRGB(const uint4); float4 normalizeRGB(const uint4 c) { float4 result = (float4)(c.x/255.0,c.y/255.0,c.z/255.0,c.w/255.0); return result; } float XYZc(const float); float XYZc(const float channel) { if(channel <= 0.04045) return channel/12.92; return pow((channel + 0.055)/1.055, 2.4) * 100.0; } float4 rgbToXYZ(const float4); float4 rgbToXYZ(const float4 c) { const float r = XYZc(c.x); const float g = XYZc(c.y); const float b = XYZc(c.z); const float4 result = (float4)((r*0.4124 + g*0.3576 + b*0.1805), (r*0.2126 + g*0.7152 + b*0.0722), (r*0.0193 + g*0.1192 + b*0.9505), c.w); return result; } float Labf(const float); float Labf(const float v) { const float epsilon = 0.008856; const float kappa = 903.3; if(v > epsilon) return pow((float)v, (float)(1.0/3.0)); return (kappa*v + 16.0)/116.0; } float4 XYZToLab(const float4); float4 XYZToLab(const float4 c) { const float4 D65 = (float4)(95.047, 100.0, 108.883, 0.0); const float4 normalizedColour = (float4)(c.x/D65.x, c.y/D65.y, c.z/D65.z, c.w); float4 result = (float4)(116.0*Labf(normalizedColour.y) - 16.0, 500.0*(Labf(normalizedColour.x) - Labf(normalizedColour.y)), 200.0*(Labf(normalizedColour.y) - Labf(normalizedColour.z)), c.w); return result; } __kernel void clRGBToLab(__read_only image2d_t src, __write_only image2d_t dest) { const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE |CLK_ADDRESS_CLAMP |CLK_FILTER_NEAREST; const int x = get_global_id(0); const int y = get_global_id(1); const int2 idx = (int2)(x, y); write_imagef( dest, idx, XYZToLab( rgbToXYZ( normalizeRGB( read_imageui( src, sampler, idx))))); } |