/+ Alpha blended blitting routine. +/ import std.stdio; version = SDL; version( SDL ) { import derelict.sdl.sdl; } // TODO: RGB32? enum : uint { INVALID = 0, RGBA32, RGB24, RGB16_555, RGB16_565, RGBA8_I32, // indexed to 32 bit values A8, } private template readSource( uint RGBA ) { static if ( RGBA == RGBA32 ) { uint readS_dummy1 = srgb = source[si]; uint readS_dummy2 = alpha = srgb & sourceAMask; uint readS_dummy3 = srgb = srgb & ~sourceAMask; } else static if ( RGBA == RGB24 ) { // There is no such thing as an array with 24-bit elements, so we have // to use pointers. uint readS_dummy1 = srgb = *(cast(uint*)(source + si)); } else static if ( RGBA == RGB16_555 || RGBA == RGB16_565 ) { // cast(uint) is not necessary in all cases, only if dest is 32 bpp uint readS_dummy1 = srgb = cast(uint)source[si]; } else static if ( RGBA == RGBA8_I32 ) { uint readS_dummy1 = srgb = rgbaTable[source[si]]; uint readS_dummy2 = alpha = srgb & sourceAMask; uint readS_dummy3 = srgb = srgb & ~sourceAMask; } else static if ( RGBA == A8 ) { uint readS_dummy1 = alpha = cast(uint)source[si]; } else { pragma(msg,"Invalid source RGBA format for reading."); static assert(0); } } private template readDestination( uint RGBA, ubyte half16bpp = NOT_APPLICABLE ) { static if ( RGBA == RGBA32 ) { uint readD_dummy1 = drgb = dest[di]; } else static if ( RGBA == RGB24 ) { // There is no such thing as an array with 24-bit elements, so we have // to use pointers. uint readD_dummy1 = drgb = *(cast(uint*)(dest + di)); // Since we can't write 24 bits, we can either write 3 bytes (slow), // or we can overwrite 8 bits of the next pixel. The latter is // faster and can be done safely if we overwrite those 8 bits with // their previous contents. uint drgbOriginal = drgb; } else static if ( RGBA == RGB16_555 || RGBA == RGB16_565 ) { uint readD_dummy1 = drgb = dest[di]; static if ( half16bpp == LOW_ADDRESS_HALF || half16bpp == HIGH_ADDRESS_HALF ) { // Store the original values of both pixels being read. // When reading and writing 2 pixels at a time, it is impossible // to prevent overwriting a pixel that we don't want to. At // least not without some rather complicated code. So instead, // we just make sure that the pixel we don't want to overwrite // is overwritten with it's original value. The original value // is stored here. uint drgbOriginal = destReadResult; } } else static if ( RGBA == RGBA8_I32 ) { uint readD_dummy1 = drgb = rgbaTable[dest[di]]; } else { pragma(msg,"Invalid destination RGBA format for reading."); static assert(0); } } private template read( uint sourceRGBA, uint destRGBA, ubyte half16bpp = NOT_APPLICABLE ) { mixin readSource!( sourceRGBA ); mixin readDestination!( destRGBA, half16bpp ); } private template convert( uint sourceRGBA, uint destRGBA ) { static if ( sourceRGBA == RGBA32 || sourceRGBA == RGB24 || sourceRGBA == RGBA8_I32 ) { static if ( destRGBA == RGBA32 || destRGBA == RGB24 ) { alias sourceReadResult srgb; // do nothing } static if ( destRGBA == RGB16_565 ) { // Here we must shrink a 32 bit pixel from the source into a // 16 bit pixel. // in this situation we write the 16 bit resultant pixels one at // a time so the extra 16 bits will be safely discarded. uint convert_dummy1 = srgb = ((0xf800 & (sourceReadResult >> 8 )) + (0x07e0 & (sourceReadResult >> 5 )) + (0x001f & (sourceReadResult >> 3 ))); } else static assert(0); } else static if ( sourceRGBA == RGB16_565 ) { static if ( destRGBA == RGBA32 || destRGBA == RGB24 ) { // Here we must expand a 16 bit pixel from the source into a // 32 bit pixel. // In this situation we read the 16 bit pixels one at a time // so the extra 16 bits can be safely discarded. uint convert_dummy1 = srgb = (((sourceReadResult & 0xf800) << 8 ) + ((sourceReadResult & 0x07e0) << 5 ) + ((sourceReadResult & 0x001f) << 3 )); } else static if ( destRGBA == RGB16_565 ) { //alias sourceReadResult srgb; // do nothing } else static assert(0); } else static if ( sourceRGBA == A8 ) { //alias srcColor srgb; } else static assert(0); } private template blend( uint RGBA ) { // Note that this will get it right regardless of which color is in which // channel. Of course, the channels' placements must be correct. // It also preserves the destination's alpha channel, if present. static if ( RGBA == RGBA32 || RGBA == RGB24 || RGBA == RGBA8_I32 || RGBA == RGB16_565 || RGBA == RGB16_555 ) { static if ( RGBA == RGBA32 || RGBA == RGB24 || RGBA == RGBA8_I32 ) { const shift = 8; const evenMask = 0x00ff00ff; } else { // For 16bpp formats: // alpha must be a 5 bit value (the 3 hi bits MUST be clear) // this does 2 16bit pixels at a time in one 32 bit word. // endianness doesn't matter on 565 formats due to symmetry // TODO: take into account endianness on 555 formats // (probably only noticable on big endian machines) const shift = 5; const evenMask = 0x07e0f81f; } const oddMask = ~evenMask; static if ( RGBA == RGBA32 || RGBA == RGBA8_I32 ) uint originalDestAlpha = drgb & sourceAMask; static if ( destbpp == 16 ) uint blend_dummy1 = alpha = alpha >> 3; static if ( destbpp == 16 && sourceRGBA == A8 ) { // Extract the middle channel and shift it into the high 16 bits, giving // at least 5 bits above it to hold the multiplication overflow, and at // least 5 bits below it to hold the high channel's multiplication overflow. uint sourceChannels = ((srgb << 16) | srgb) & evenMask; uint destChannels = ((drgb << 16) | drgb) & evenMask; // do the blending uint blend_temp = (((sourceChannels - destChannels) * alpha) >> shift) + destChannels; // Now we move the middle channel from the high 16 bits, back into its // rightful place in the middle. uint blend_dummy2 = drgb = (blend_temp & (evenMask & 0x0000ffff)) | ((blend_temp & (evenMask & 0xffff0000)) >> 16 ); } else { uint blend_dummy2 = drgb = ((((((srgb & evenMask)-(drgb & evenMask)) * alpha) >> shift) + drgb) & evenMask) | ((((((srgb & oddMask )-(drgb & oddMask )) >> shift) * alpha) + drgb) & oddMask); } static if ( RGBA == RGBA32 || RGBA == RGBA8_I32 ) // preserve alpha uint blend_dummy3 = drgb = (drgb & ~sourceAMask) | originalDestAlpha; } else { pragma(msg,"Invalid RGBA format for alpha blending."); static assert(0); } } private template write( uint RGBA, ubyte half16bpp = NOT_APPLICABLE ) { static if ( RGBA == RGBA32 ) { uint write_dummy1 = dest[di] = drgb; } else static if ( RGBA == RGB24 ) { uint* address = cast(uint*)(dest + di); version ( BigEndian ) uint write_dummy1 = *address = (drgb & 0xffffff00) | (drgbOriginal & 0x000000ff); else uint write_dummy1 = *address = (drgb & 0x00ffffff) | (drgbOriginal & 0xff000000); } else static if ( RGBA == RGB16_565 || RGBA == RGB16_555 ) { // for selecting the lowest or highest pixel in terms of // address in memory rather than place in the word/register version ( BigEndian ) const writeMask = 0x0000ffff; else const writeMask = 0xffff0000; static if ( half16bpp == HIGH_ADDRESS_HALF ) uint write_dummy1 = dest[di] = (drgb & writeMask) | (drgbOriginal & ~writeMask); else static if ( half16bpp == LOW_ADDRESS_HALF ) uint write_dummy1 = dest[di] = (drgb & ~writeMask) | (drgbOriginal & writeMask); else uint write_dummy1 = dest[di] = drgb; } // TODO: writing RGBA8_I32. needs an algo to reverse a 32 bpp value into // the an 8 bit indexed value. else { pragma(msg,"Invalid RGBA format for alpha blending."); static assert(0); } uint write_dummy2 = si = si + sourceIncrement; uint write_dummy3 = di = di + destIncrement; } private enum : ubyte { NOT_APPLICABLE = 0, LOW_ADDRESS_HALF, HIGH_ADDRESS_HALF, } private template innerLoop( uint sourceRGBA, uint destRGBA, ubyte half16bpp = NOT_APPLICABLE ) { static if ( !(destRGBA == RGB16_565 || destRGBA == RGB16_555) && half16bpp > 0 ) { pragma(msg,"The half16bpp argument is only to be used when the " "destination format is 16 bits per pixel."); static assert(0); } mixin read!(sourceRGBA,destRGBA,half16bpp); mixin convert!(sourceRGBA,destRGBA); mixin blend!(destRGBA); mixin write!(destRGBA,half16bpp); } private template calculatePaddingAndArrays( bool isSource ) { static if ( isSource ) { alias srcSurface surface; alias sourcebpp bpp; alias destbpp otherbpp; } else { alias dstSurface surface; alias destbpp bpp; alias sourcebpp otherbpp; } // Padding is the amount of extra data at the end of a scanline used to // ensure that the end of the scanline lines up on a 32 bit boundary. // spadding = source padding // dpadding = dest padding // In this case, the units padding is measured in change depending on // the source and destination format. // The amount of data that is handled in each iteration also changes, // and is reflected by the different types of arrays. static if ( bpp == 32 ) { auto padding = 0; uint[] pixelData = cast(uint[])surface.pixels; } else static if ( bpp == 24 ) { // padding measured in bytes auto padding = surface.pitch - (surface.width * 3); ubyte* pixelData = surface.pixels.ptr; } else static if ( bpp == 16 ) { static if ( otherbpp != 16 /+otherbpp == 32 || otherbpp == 24 || otherbpp == 8+/ ) { // padding measured in shorts auto padding = (surface.pitch >> 1) - surface.width; ushort[] pixelData = cast(ushort[])surface.pixels; } else { auto padding = 0; uint[] pixelData = cast(uint[])surface.pixels; } } else static if ( bpp == 8 ) { auto padding = surface.pitch - surface.width; // padding measured in bytes ubyte[] pixelData = surface.pixels; } else static assert(0); static if ( isSource ) { alias padding spadding; alias pixelData source; } else { alias padding dpadding; alias pixelData dest; } } // This function shall do no clipping. void blit( uint sourceRGBA, uint destRGBA ) ( short sourceX, short sourceY, short destX, short destY, short width, short height, inout Surface srcSurface, inout Surface dstSurface, uint srcColor, uint alpha ) { // this stuff just determines the bits per pixel of the source and // destination surfaces static if ( sourceRGBA == RGBA32 ) const sourcebpp = 32; else static if ( sourceRGBA == RGB24 ) const sourcebpp = 24; else static if ( sourceRGBA == RGB16_565 || sourceRGBA == RGB16_555 ) const sourcebpp = 16; else const sourcebpp = 8; static if ( destRGBA == RGBA32 ) const destbpp = 32; else static if ( destRGBA == RGB24 ) const destbpp = 24; else static if ( destRGBA == RGB16_565 || destRGBA == RGB16_555 ) const destbpp = 16; else const destbpp = 8; // static if ( (sourcebpp == 32 || sourcebpp == 24) && destbpp == 16 ) const convert32to16 = true; else const convert32to16 = false; static if ( sourcebpp == 16 && (destbpp == 32 || destbpp == 24) ) const convert16to32 = true; else const convert16to32 = false; static if ( (destRGBA == RGB16_565 || destRGBA == RGB16_555) && sourceRGBA != A8 ) srcColor |= (srcColor << 16); static if ( destbpp == 16 ) alpha >>= 3; // note that the padding quantities are necessarily zero if // unitWidth = width / 2; // that's important because they have different units of measurement! mixin calculatePaddingAndArrays!( true ); mixin calculatePaddingAndArrays!( false ); static if ( destbpp == 24 ) { uint lineWidth = width * 3; static if ( sourcebpp == 24 ) { // same as: unitSrcSurfaceWidth = srcSurface.width * 3; uint unitSrcSurfaceWidth = srcSurface.pitch - spadding; uint unitSrcWidth = lineWidth; } else { uint unitSrcSurfaceWidth = srcSurface.width; uint unitSrcWidth = width; } uint unitDstSurfaceWidth = dstSurface.pitch - dpadding; uint unitDstWidth = lineWidth; } else static if ( sourcebpp == 16 && destbpp == 16 ) { uint lineWidth = width / 2; // because we do 2 pixels at a time // The +(width & 1) part is used to make the division round up. uint unitSrcSurfaceWidth = (srcSurface.width / 2) + (srcSurface.width & 1); uint unitDstSurfaceWidth = (dstSurface.width / 2) + (dstSurface.width & 1); // The lineWidth variable rounds down on division, so it may be // missing a pixel. That is desirable since we don't want alphablend // onto the pixel next to the missing pixel. Of course, we will // handle the missing pixel individually, but it is still useful to // have access to a rounded-up version of the blit's width. uint unitSrcWidth = lineWidth + (width & 1); uint unitDstWidth = unitSrcWidth; } else { uint lineWidth = width; uint unitSrcWidth = width; uint unitDstWidth = width; uint unitSrcSurfaceWidth = srcSurface.width; uint unitDstSurfaceWidth = dstSurface.width; } uint sourceAMask = srcSurface.alphaMask; version( SDL ) { auto sourceSdlSurface = srcSurface.sdl_surface; if ( sourceSdlSurface !is null ) { bool srcLocked = lock( sourceSdlSurface ); scope(exit) { if ( srcLocked ) SDL_UnlockSurface( sourceSdlSurface ); } } auto destSdlSurface = srcSurface.sdl_surface; if ( destSdlSurface !is null ) { bool dstLocked = lock( destSdlSurface ); scope(exit) { if ( dstLocked ) SDL_UnlockSurface( destSdlSurface ); } } } static if ( sourcebpp == 24 ) uint sourceIncrement = 3; else uint sourceIncrement = 1; static if ( destbpp == 24 ) uint destIncrement = 3; else uint destIncrement = 1; // Since we are not necessarily blitting accross the entire width of the // destination surface or source surface, we have to skip some of the // pixels on the end of the current scanline and on the beginning of // the next scanline. // Add that to the padding (which is explained above), and the result // is these source/dest LineExtra variables. int sLineExtra = unitSrcSurfaceWidth + spadding - unitSrcWidth; int dLineExtra = unitDstSurfaceWidth + dpadding - unitDstWidth; // initialize the index variables // si = source index // di = destination index int si = (sourceX * sourceIncrement) + (unitSrcSurfaceWidth * sourceY); int di = (destX * destIncrement) + (unitDstSurfaceWidth * destY); // nextLine is always ahead of di by the amount of pixels left in one line // of the blit. int nextLine; // endi is the index to stop at. //int endi = destX + unitWidth + (unitDstSurfaceWidth + dpadding) * (destY + height); int endi = di + (unitDstSurfaceWidth * height); assert( lineWidth + dLineExtra == unitDstSurfaceWidth + dpadding ); // TODO: remove this void writeHex ( char[] name, uint number ) { writef( "(",name,std.string.toString( cast(ulong)number, cast(uint)16 ),")|" ); } // uint srgb; uint drgb; static if ( sourceRGBA == A8 ) srgb = srcColor; while( di < endi ) { nextLine = di + lineWidth; static if ( convert32to16 || convert16to32 ) mixin innerLoop!(sourceRGBA,destRGBA,HIGH_ADDRESS_HALF); while( di < nextLine ) { mixin innerLoop!(sourceRGBA,destRGBA); } static if ( convert32to16 || convert16to32 ) mixin innerLoop!(sourceRGBA,destRGBA,LOW_ADDRESS_HALF); si += sLineExtra; di += dLineExtra; } } version( SDL ) { private bool lock( SDL_Surface* surface ) { if ( SDL_MUSTLOCK( surface ) && !surface.locked ) { safe_SDL_LockSurface( surface ); return true; } return false; } // automatically throw errors resulting from the SDL_LockSurface function. private void safe_SDL_LockSurface( SDL_Surface* surface ) { if ( SDL_LockSurface( surface ) != 0 ) { char* sdlError = SDL_GetError(); char[] error = sdlError[0..std.c.string.strlen(sdlError)]; throw new Exception( "SDL_LockSurface failed to lock a surface: "~error ); } } } struct Surface { ubyte[] pixels; uint alphaMask = 0; ushort width = 0xffff; ushort height = 0xffff; ushort pitch = 0xffff; /// width of a scanline in bytes. ushort RGBAformat = INVALID; /// width and height are in pixels. static Surface opCall( ubyte[] pixels, uint alphaMask, ushort width, ushort height, ushort pitch, ushort RGBAformat ) { Surface result; assert( pixels !is null ); result.pixels = pixels; result.width = width; result.height = height; result.pitch = pitch; result.RGBAformat = RGBAformat; result.alphaMask = alphaMask; return result; } version ( SDL ) { SDL_Surface* sdl_surface = null; static Surface opCall( SDL_Surface* surface, ushort RGBAformat ) { Surface result; result.pixels = cast(ubyte[])surface.pixels[0.. surface.pitch * surface.h]; result.width = surface.w; result.height = surface.h; result.pitch = surface.pitch; assert ( RGBAformat != INVALID ); result.RGBAformat = RGBAformat; result.alphaMask = surface.format.Amask; result.sdl_surface = surface; return result; } } }