/* $XConsortium: cir_textblt.s,v 1.4 95/01/05 20:47:57 kaleb Exp $ */ /* $XFree86: xc/programs/Xserver/hw/xfree86/vga256/drivers/cirrus/cir_textblt.s,v 3.6 1995/01/28 17:08:23 dawes Exp $ */ /* * * Copyright 1993 by H. Hanemaayer, Utrecht, The Netherlands * * Permission to use, copy, modify, distribute, and sell this software and its * documentation for any purpose is hereby granted without fee, provided that * the above copyright notice appear in all copies and that both that * copyright notice and this permission notice appear in supporting * documentation, and that the name of H. Hanemaayer not be used in * advertising or publicity pertaining to distribution of the software without * specific, written prior permission. H. Hanemaayer makes no representations * about the suitability of this software for any purpose. It is provided * "as is" without express or implied warranty. * * H. HANEMAAYER DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO * EVENT SHALL H. HANEMAAYER BE LIABLE FOR ANY SPECIAL, INDIRECT OR * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR * PERFORMANCE OF THIS SOFTWARE. * * Author: H. Hanemaayer, * */ /* * This low-level routine writes a text string bitmap video memory for the * blitter, which must be setup for system-memory-to-video-memory BLT. * The video address where the data is written doesn't matter. Each bitmap * scanline transmitted is padded to a byte boundary; the bitmap is * transfered in 16-bit words. This means that bytes from different scanlines * have to be combined if necessary. * * This function is used by the 5426 and 5428. * * Prototype: * CirrusTextTransfer( int nglyph, int height, unsigned long **glyphp, * int glyphwidth, void *vaddr ) * * nglyph is the number of characters * height is the height of the area. * glyphp is an array of pointers to character bitmaps (stored as one 32-bit * word per line) * glyphwidth is the width of the font in pixels. * vaddr is a video memory address (doesn't really matter). * * Optimized for 486 pipeline (somewhat out-of-place instructions are * labeled with 'blend'). */ #include "assyntax.h" FILE("cir_textblt.S") AS_BEGIN /* Definition of stack frame function arguments. */ #define nglyph_arg REGOFF(8,EBP) #define height_arg REGOFF(12,EBP) #define glyphp_arg REGOFF(16,EBP) #define glyphwidth_arg REGOFF(20,EBP) #define vaddr_arg REGOFF(24,EBP) #define BYTE_REVERSED GLNAME(byte_reversed) /* I assume %eax and %edx can be trashed. */ /* Saving %ebx and %ecx may be unnecessary. */ SEG_TEXT #if 0 /* Unused. The 1994 databook specifies 32-bit tranfers for 5426/8. */ ALIGNTEXT4 GLOBL GLNAME(CirrusTransferText) GLNAME(CirrusTransferText): PUSH_L (EBP) MOV_L (ESP,EBP) PUSH_L (EBX) PUSH_L (ECX) PUSH_L (ESI) PUSH_L (EDI) XOR_L (EDX,EDX) /* line = 0 */ MOV_B (DL,CL) /* shift = 0 */ MOV_B (glyphwidth_arg,CH) MOV_L (EDX,EAX) /* dworddata = 0 */ .line_loop: CMP_L (height_arg,EDX) JGE (.finished) MOV_L (glyphp_arg,EDI) /* glyphp */ MOV_L (nglyph_arg,ESI) LEA_L (REGBISD(EDI,ESI,4,0),ESI) /* &(glyphp[nglyph]) */ .char_loop: CMP_L (ESI,EDI) JGE (.line_finished) MOV_L (REGIND(EDI),EBX) /* glyphp[chari] */ ADD_L (CONST(4),EDI) /* glyphp += 4 */ MOV_L (REGBISD(EBX,EDX,4,0),EBX) /* data = glyphp[chari][line] */ SHL_L (CL,EBX) ADD_L (EBX,EAX) /* dworddata += data << shift */ ADD_B (CH,CL) /* shift += glyphwidth */ CMP_B (CONST(16),CL) /* shift < 16? */ JL (.char_loop) /* Write a 16-bit word. */ XOR_L (EBX,EBX) MOV_B (AL,BL) MOV_B (REGOFF(BYTE_REVERSED,EBX),AL) MOV_B (AH,BL) MOV_B (REGOFF(BYTE_REVERSED,EBX),AH) MOV_L (vaddr_arg,EBX) SUB_B (CONST(16),CL) /* shift -= 16 (blend) */ MOV_W (AX,REGIND(EBX)) /* *(short)vaddr = dworddata */ SHR_L (CONST(16),EAX) /* dworddata >>= 16 */ JMP (.char_loop) .line_finished: INC_L (EDX) /* line++ */ AND_B (CL,CL) JZ (.line_loop) /* Make sure last bits of scanline are padded to a byte boundary. */ ADD_B (CONST(7),CL) AND_B (CONST(24),CL) CMP_B (CONST(16),CL) /* extra 16-bit word to write? */ JL (.line_loop) /* Write a 16-bit word. */ XOR_L (EBX,EBX) MOV_B (AL,BL) MOV_B (REGOFF(BYTE_REVERSED,EBX),AL) MOV_B (AH,BL) MOV_B (REGOFF(BYTE_REVERSED,EBX),AH) MOV_L (vaddr_arg,EBX) SUB_B (CONST(16),CL) /* shift -= 16 */ MOV_W (AX,REGIND(EBX)) /* *(short)vaddr = dworddata */ SHR_L (CONST(16),EAX) /* dworddata >>= 16 */ JMP (.line_loop) .finished: /* Handle the last fews bits and alignment. */ XOR_L (EBX,EBX) MOV_B (AL,BL) MOV_B (REGOFF(BYTE_REVERSED,EBX),DL) MOV_B (AH,BL) MOV_B (REGOFF(BYTE_REVERSED,EBX),DH) /* bytes = ((nglyph * glyphwidth + 7) >> 3) * h; */ MOV_L (nglyph_arg,EAX) IMUL_L (glyphwidth_arg,EAX) ADD_L (CONST(7),EAX) SHR_L (CONST(3),EAX) IMUL_L (height_arg,EAX) AND_L (CONST(0x000000ff),ECX) ADD_B (CONST(7),CL) SHR_B (CONST(3),CL) /* ((shift + 7) >> 3) */ SUB_L (ECX,EAX) /* bytes - ((shift + 7) >> 3) */ /* Make sure we transfer a multiple of 4 bytes in total. */ TEST_B (CONST(2),AL) JZ (.skipword) MOV_L (vaddr_arg,EBX) /* write 16-bit word */ MOV_W (DX,REGIND(EBX)) JMP (.end) .skipword: AND_B (CL,CL) JZ (.end) /* if shift != 0 */ MOV_L (vaddr_arg,EBX) MOV_L (EDX,REGIND(EBX)) /* then write 32-bit word */ .end: POP_L (EDI) POP_L (ESI) POP_L (ECX) POP_L (EBX) POP_L (EBP) RET #endif /* Unused. */ /* * This is version of the above function that exclusively does 32-bit * transfers, as required by the 543x (also works on the 5426). It can * handle font widths up to 32. */ #define bound_var REGIND(ESP) ALIGNTEXT4 GLOBL GLNAME(CirrusTransferText32bit) GLNAME(CirrusTransferText32bit): PUSH_L (EBP) MOV_L (ESP,EBP) PUSH_L (EBX) PUSH_L (ECX) PUSH_L (ESI) PUSH_L (EDI) SUB_L (CONST(4),ESP) /* one local variable */ XOR_L (EDX,EDX) /* line = 0 */ MOV_B (CONST(0),CL) /* shift = 0 */ MOV_B (glyphwidth_arg,CH) MOV_L (EDX,EAX) /* dworddata = 0 */ .line_loop2: CMP_L (height_arg,EDX) JGE (.finished2) MOV_L (glyphp_arg,EDI) /* glyphp */ MOV_L (nglyph_arg,ESI) LEA_L (REGBISD(EDI,ESI,4,0),ESI) /* &(glyphp[nglyph]) */ MOV_L (ESI,bound_var) .char_loop2: CMP_L (bound_var,EDI) JGE (.line_finished2) MOV_L (REGIND(EDI),EBX) /* glyphp[chari] */ MOV_L (REGBISD(EBX,EDX,4,0),EBX) /* data = glyphp[chari][line] */ MOV_L (EBX,ESI) SHL_L (CL,EBX) ADD_L (EBX,EAX) /* dworddata += low(data << shift) */ ADD_B (CH,CL) /* shift += glyphwidth */ ADD_L (CONST(4),EDI) /* glyphp += 4 */ CMP_B (CONST(32),CL) /* shift < 32? */ JL (.char_loop2) /* At this point, high32(data << old_shift) is equal to */ /* ESI >> (32 - (shift - glyphwidth)) */ MOV_B (CL,BL) SUB_B (CH,CL) /* shift - glyphwidth */ NEG_B (CL) ADD_B (CONST(32),CL) SHR_L (CL,ESI) MOV_B (BL,CL) /* Write a 32-bit word. */ XOR_L (EBX,EBX) MOV_B (AL,BL) MOV_B (REGOFF(BYTE_REVERSED,EBX),AL) MOV_B (AH,BL) MOV_B (REGOFF(BYTE_REVERSED,EBX),AH) ROL_L (CONST(16),EAX) MOV_B (AL,BL) MOV_B (REGOFF(BYTE_REVERSED,EBX),AL) MOV_B (AH,BL) MOV_B (REGOFF(BYTE_REVERSED,EBX),AH) MOV_L (vaddr_arg,EBX) ROL_L (CONST(16),EAX) SUB_B (CONST(32),CL) /* shift -= 32 (blend) */ MOV_L (EAX,REGIND(EBX)) /* *(short)vaddr = dworddata */ MOV_L (ESI,EAX) /* dworddata = high(data << shift) */ JMP (.char_loop2) .line_finished2: INC_L (EDX) /* line++ */ AND_B (CL,CL) JZ (.line_loop2) /* Make sure last bits of scanline are padded to a byte boundary. */ ADD_B (CONST(7),CL) AND_B (CONST(56),CL) CMP_B (CONST(32),CL) /* extra 32-bit word to write? */ JL (.line_loop2) /* Write a 32-bit word. */ XOR_L (EBX,EBX) MOV_B (AL,BL) MOV_B (REGOFF(BYTE_REVERSED,EBX),AL) MOV_B (AH,BL) MOV_B (REGOFF(BYTE_REVERSED,EBX),AH) ROL_L (CONST(16),EAX) MOV_B (AL,BL) MOV_B (REGOFF(BYTE_REVERSED,EBX),AL) MOV_B (AH,BL) MOV_B (REGOFF(BYTE_REVERSED,EBX),AH) MOV_L (vaddr_arg,EBX) ROL_L (CONST(16),EAX) SUB_B (CONST(32),CL) /* shift -= 32 (blend) */ MOV_L (EAX,REGIND(EBX)) /* *vaddr = dworddata */ MOV_L (CONST(0),EAX) /* dworddata = 0 */ JMP (.line_loop2) .finished2: /* Handle the last fews bits and alignment. */ TEST_B (CL,CL) JZ (.end2) XOR_L (EBX,EBX) MOV_B (AL,BL) MOV_B (REGOFF(BYTE_REVERSED,EBX),DL) MOV_B (AH,BL) MOV_B (REGOFF(BYTE_REVERSED,EBX),DH) SHR_L (CONST(16),EAX) /* shr preferred over rol */ ROL_L (CONST(16),EDX) MOV_B (AL,BL) MOV_B (REGOFF(BYTE_REVERSED,EBX),DL) MOV_B (AH,BL) MOV_B (REGOFF(BYTE_REVERSED,EBX),DH) ROL_L (CONST(16),EDX) /* No need to check for alignment, multiple of 32-bit words is */ /* guaranteed. */ MOV_L (vaddr_arg,EBX) MOV_L (EDX,REGIND(EBX)) /* write 32-bit word */ .end2: ADD_L (CONST(4),ESP) POP_L (EDI) POP_L (ESI) POP_L (ECX) POP_L (EBX) POP_L (EBP) RET /* * This is a version of the 16-bit (5426/8) text transfer routine that * assumes 32-bit font data is stored with the leftmost pixel at the highest * bit (MSB First). This way the bit order reversal is not required. Fonts can * be 'cached' and converted to MSB in system memory, and this function can * then be used for writing. * */ #if 0 /* Unused. */ ALIGNTEXT4 GLOBL GLNAME(CirrusTransferTextMSB) GLNAME(CirrusTransferTextMSB): PUSH_L (EBP) MOV_L (ESP,EBP) PUSH_L (EBX) PUSH_L (ECX) PUSH_L (ESI) PUSH_L (EDI) XOR_L (EDX,EDX) /* line = 0 */ MOV_B (DL,CL) /* shift = 0 */ MOV_B (glyphwidth_arg,CH) MOV_L (EDX,EAX) /* dworddata = 0 */ .line_loop3: CMP_L (height_arg,EDX) JGE (.finished3) MOV_L (glyphp_arg,EDI) /* glyphp */ MOV_L (nglyph_arg,ESI) LEA_L (REGBISD(EDI,ESI,4,0),ESI) /* &(glyphp[nglyph]) */ .char_loop3: CMP_L (ESI,EDI) JGE (.line_finished3) MOV_L (REGIND(EDI),EBX) /* glyphp[chari] */ ADD_L (CONST(4),EDI) /* glyphp += 4 */ MOV_L (REGBISD(EBX,EDX,4,0),EBX) /* data = glyphp[chari][line] */ SHR_L (CL,EBX) ADD_L (EBX,EAX) /* dworddata += data >> shift */ ADD_B (CH,CL) /* shift += glyphwidth */ CMP_B (CONST(16),CL) /* shift < 16? */ JL (.char_loop3) /* Write a 16-bit word. */ ROL_L (CONST(16),EAX) MOV_L (vaddr_arg,EBX) XCHG_B (AL,AH) SUB_B (CONST(16),CL) /* shift -= 16 (blend) */ MOV_W (AX,REGIND(EBX)) /* *(short)vaddr = dworddata */ JMP (.char_loop3) .line_finished3: INC_L (EDX) /* line++ */ AND_B (CL,CL) JZ (.line_loop3) /* Make sure last bits of scanline are padded to a byte boundary. */ ADD_B (CONST(7),CL) AND_B (CONST(24),CL) CMP_B (CONST(16),CL) /* extra 16-bit word to write? */ JL (.line_loop3) /* Write a 16-bit word. */ ROL_L (CONST(16),EAX) MOV_L (vaddr_arg,EBX) XCHG_B (AL,AH) SUB_B (CONST(16),CL) /* shift -= 16 */ MOV_W (AX,REGIND(EBX)) /* *(short)vaddr = dworddata */ JMP (.line_loop3) .finished3: /* Handle the last fews bits and alignment. */ ROL_L (CONST(16),EAX) MOV_B (AL,DH) MOV_B (AH,DL) /* bytes = ((nglyph * glyphwidth + 7) >> 3) * h; */ MOV_L (nglyph_arg,EAX) IMUL_L (glyphwidth_arg,EAX) ADD_L (CONST(7),EAX) SHR_L (CONST(3),EAX) IMUL_L (height_arg,EAX) AND_L (CONST(0x000000ff),ECX) ADD_B (CONST(7),CL) SHR_B (CONST(3),CL) /* ((shift + 7) >> 3) */ SUB_L (ECX,EAX) /* bytes - ((shift + 7) >> 3) */ /* Make sure we transfer a multiple of 4 bytes in total. */ TEST_B (CONST(2),AL) JZ (.skipword3) MOV_L (vaddr_arg,EBX) /* write 16-bit word */ MOV_W (DX,REGIND(EBX)) JMP (.end3) .skipword3: AND_B (CL,CL) JZ (.end3) /* if shift != 0 */ MOV_L (vaddr_arg,EBX) MOV_L (EDX,REGIND(EBX)) /* then write 32-bit word */ .end3: POP_L (EDI) POP_L (ESI) POP_L (ECX) POP_L (EBX) POP_L (EBP) RET #endif /* Unused. */ /* * This is the MSB version of the 32-bit (5434) text transfer routine. */ #if 0 /* Unused. */ ALIGNTEXT4 GLOBL GLNAME(CirrusTransferText32bitMSB) GLNAME(CirrusTransferText32bitMSB): PUSH_L (EBP) MOV_L (ESP,EBP) PUSH_L (EBX) PUSH_L (ECX) PUSH_L (ESI) PUSH_L (EDI) SUB_L (CONST(4),ESP) /* one local variable */ XOR_L (EDX,EDX) /* line = 0 */ MOV_B (CONST(0),CL) /* shift = 0 */ MOV_B (glyphwidth_arg,CH) MOV_L (EDX,EAX) /* dworddata = 0 */ .line_loop4: CMP_L (height_arg,EDX) JGE (.finished4) MOV_L (glyphp_arg,EDI) /* glyphp */ MOV_L (nglyph_arg,ESI) LEA_L (REGBISD(EDI,ESI,4,0),ESI) /* &(glyphp[nglyph]) */ MOV_L (ESI,bound_var) .char_loop4: CMP_L (bound_var,EDI) JGE (.line_finished4) MOV_L (REGIND(EDI),EBX) /* glyphp[chari] */ MOV_L (REGBISD(EBX,EDX,4,0),EBX) /* data = glyphp[chari][line] */ MOV_L (EBX,ESI) SHR_L (CL,EBX) ADD_L (EBX,EAX) /* dworddata += data >> shift */ ADD_B (CH,CL) /* shift += glyphwidth */ ADD_L (CONST(4),EDI) /* glyphp += 4 */ CMP_B (CONST(32),CL) /* shift < 32? */ JL (.char_loop4) /* At this point, overflow(data >> shift) is equal to */ /* ESI << (32 - (shift - glyphwidth)) */ MOV_B (CL,BL) SUB_B (CH,CL) /* shift - glyphwidth */ NEG_B (CL) MOV_B (AL,BH) /* instr. blended in -- belongs to byteswap */ ADD_B (CONST(32),CL) SHL_L (CL,ESI) MOV_B (BL,CL) /* Write a 32-bit word. */ MOV_B (AH,BL) /* do a 32-bit byteswap */ SHLD_L (CONST(16),EAX,EBX) XCHG_B (BL,BH) MOV_L (vaddr_arg,EAX) SUB_B (CONST(32),CL) /* shift -= 32 (blend) */ MOV_L (EBX,REGIND(EAX)) /* *(short)vaddr = dworddata */ MOV_L (ESI,EAX) /* dworddata = overflow(data << shift) */ JMP (.char_loop4) .line_finished4: INC_L (EDX) /* line++ */ AND_B (CL,CL) JZ (.line_loop4) /* Make sure last bits of scanline are padded to a byte boundary. */ ADD_B (CONST(7),CL) AND_B (CONST(56),CL) CMP_B (CONST(32),CL) /* extra 32-bit word to write? */ JL (.line_loop4) /* Write a 32-bit word. */ /* First do a 32-bit byteswap. Btw. there is an i486 instruction */ /* that does exactly that, but the 386 doesn't have it. */ MOV_B (AL,BH) MOV_B (AH,BL) SHLD_L (CONST(16),EAX,EBX) XCHG_B (BL,BH) MOV_L (vaddr_arg,EAX) SUB_B (CONST(32),CL) /* shift -= 32 (blend) */ MOV_L (EBX,REGIND(EAX)) /* *vaddr = dworddata */ MOV_L (CONST(0),EAX) /* dworddata = 0 */ JMP (.line_loop4) .finished4: /* Handle the last fews bits and alignment. */ TEST_B (CL,CL) JZ (.end4) MOV_B (AL,DH) /* do a 32-bit byteswap */ MOV_B (AH,DL) SHLD_L (CONST(16),EAX,EDX) XCHG_B (DL,DH) /* No need to check for alignment, multiple of 32-bit words is */ /* guaranteed. */ MOV_L (vaddr_arg,EBX) MOV_L (EDX,REGIND(EBX)) /* write 32-bit word */ .end4: ADD_L (CONST(4),ESP) POP_L (EDI) POP_L (ESI) POP_L (ECX) POP_L (EBX) POP_L (EBP) RET #endif /* Unused. */ /* * This a 32-bit transfer function that checks for special font widths, * with fontwidth-specific routines that write stretches of characters * efficiently. */ #define stretchsize_var REGIND(ESP) #define widthcode_var REGOFF(4,ESP) #define glyphsleft_var REGOFF(8,ESP) ALIGNTEXT4 GLOBL GLNAME(CirrusTransferText32bitSpecial) GLNAME(CirrusTransferText32bitSpecial): PUSH_L (EBP) MOV_L (ESP,EBP) PUSH_L (EBX) PUSH_L (ECX) PUSH_L (ESI) PUSH_L (EDI) SUB_L (CONST(12),ESP) /* three local variables */ XOR_L (EDX,EDX) /* line = 0 */ MOV_B (CONST(0),CL) /* shift = 0 */ MOV_B (glyphwidth_arg,CH) MOV_L (EDX,EAX) /* dworddata = 0 */ CMP_B (CONST(8),CH) JZ (.fontwidth8) CMP_B (CONST(16),CH) JZ (.fontwidth16) CMP_B (CONST(32),CH) JZ (.fontwidth32) CMP_B (CONST(6),CH) JZ (.fontwidth6) CMP_B (CONST(14),CH) JZ (.fontwidth14) #if 0 CMP_B (CONST(12),CH) JZ (.fontwidth12) CMP_B (CONST(24),CH) JZ (.fontwidth24) #endif /* not reached */ JMP (.line_loop5) .fontwidth6: MOV_L (CONST(16),stretchsize_var) MOV_L (CONST(.width6code),widthcode_var) JMP (.line_loop5) .fontwidth8: MOV_L (CONST(4),stretchsize_var) MOV_L (CONST(.width8code),widthcode_var) JMP (.line_loop5) .fontwidth16: MOV_L (CONST(2),stretchsize_var) MOV_L (CONST(.width16code),widthcode_var) JMP (.line_loop5) .fontwidth32: MOV_L (CONST(1),stretchsize_var) MOV_L (CONST(.width16code),widthcode_var) JMP (.line_loop5) .fontwidth14: MOV_L (CONST(5),stretchsize_var) MOV_L (CONST(.width14code),widthcode_var) JMP (.line_loop5) .line_loop5: CMP_L (height_arg,EDX) JGE (.finished5) MOV_L (glyphp_arg,EDI) /* glyphp */ MOV_L (nglyph_arg,ESI) #if 0 LEA_L (REGBISD(EDI,ESI,4,0),EBX) /* &(glyphp[nglyph]) */ MOV_L (EBX,bound_var) #endif INC_L (ESI) MOV_L (ESI,glyphsleft_var) .char_loop5: MOV_L (glyphsleft_var,EBX) DEC_L (EBX) JZ (.line_finished5) MOV_L (EBX,glyphsleft_var) TEST_B (CL,CL) JNZ (.continue_loop5) /* If shift == 0 */ CMP_L (stretchsize_var,EBX) /* check for stretch. */ JGE (.dostretch) .continue_loop5: MOV_L (REGIND(EDI),EBX) /* glyphp[chari] */ MOV_L (REGBISD(EBX,EDX,4,0),EBX) /* data = glyphp[chari][line] */ MOV_L (EBX,ESI) SHL_L (CL,EBX) ADD_L (EBX,EAX) /* dworddata += low(data << shift) */ ADD_B (CH,CL) /* shift += glyphwidth */ ADD_L (CONST(4),EDI) /* glyphp += 4 */ CMP_B (CONST(32),CL) /* shift < 32? */ JL (.char_loop5) /* At this point, high32(data << old_shift) is equal to */ /* ESI >> (32 - (shift - glyphwidth)) */ MOV_B (CL,BL) SUB_B (CH,CL) /* shift - glyphwidth */ NEG_B (CL) ADD_B (CONST(32),CL) SHR_L (CL,ESI) MOV_B (BL,CL) /* Write a 32-bit word. */ XOR_L (EBX,EBX) MOV_B (AL,BL) MOV_B (REGOFF(BYTE_REVERSED,EBX),AL) MOV_B (AH,BL) MOV_B (REGOFF(BYTE_REVERSED,EBX),AH) ROL_L (CONST(16),EAX) MOV_B (AL,BL) MOV_B (REGOFF(BYTE_REVERSED,EBX),AL) MOV_B (AH,BL) MOV_B (REGOFF(BYTE_REVERSED,EBX),AH) MOV_L (vaddr_arg,EBX) ROL_L (CONST(16),EAX) SUB_B (CONST(32),CL) /* shift -= 32 (blend) */ MOV_L (EAX,REGIND(EBX)) /* *(short)vaddr = dworddata */ MOV_L (ESI,EAX) /* dworddata = high(data << shift) */ JMP (.char_loop5) .line_finished5: INC_L (EDX) /* line++ */ AND_B (CL,CL) JZ (.line_loop5) /* Make sure last bits of scanline are padded to a byte boundary. */ ADD_B (CONST(7),CL) AND_B (CONST(56),CL) CMP_B (CONST(32),CL) /* extra 32-bit word to write? */ JL (.line_loop5) /* Write a 32-bit word. */ XOR_L (EBX,EBX) MOV_B (AL,BL) MOV_B (REGOFF(BYTE_REVERSED,EBX),AL) MOV_B (AH,BL) MOV_B (REGOFF(BYTE_REVERSED,EBX),AH) ROL_L (CONST(16),EAX) MOV_B (AL,BL) MOV_B (REGOFF(BYTE_REVERSED,EBX),AL) MOV_B (AH,BL) MOV_B (REGOFF(BYTE_REVERSED,EBX),AH) MOV_L (vaddr_arg,EBX) ROL_L (CONST(16),EAX) SUB_B (CONST(32),CL) /* shift -= 32 (blend) */ MOV_L (EAX,REGIND(EBX)) /* *vaddr = dworddata */ MOV_L (CONST(0),EAX) /* dworddata = 0 */ JMP (.line_loop5) .finished5: /* Handle the last fews bits and alignment. */ TEST_B (CL,CL) JZ (.end5) XOR_L (EBX,EBX) MOV_B (AL,BL) MOV_B (REGOFF(BYTE_REVERSED,EBX),DL) MOV_B (AH,BL) MOV_B (REGOFF(BYTE_REVERSED,EBX),DH) SHR_L (CONST(16),EAX) /* shr preferred over rol */ ROL_L (CONST(16),EDX) MOV_B (AL,BL) MOV_B (REGOFF(BYTE_REVERSED,EBX),DL) MOV_B (AH,BL) MOV_B (REGOFF(BYTE_REVERSED,EBX),DH) ROL_L (CONST(16),EDX) /* No need to check for alignment, multiple of 32-bit words is */ /* guaranteed. */ MOV_L (vaddr_arg,EBX) MOV_L (EDX,REGIND(EBX)) /* write 32-bit word */ .end5: ADD_L (CONST(12),ESP) POP_L (EDI) POP_L (ESI) POP_L (ECX) POP_L (EBX) POP_L (EBP) RET .dostretch: JMP (CODEPTR(widthcode_var)) /* Routines for specific font widths; ECX can be used as long as it is */ /* properly restored. */ .width8code: /* Speedup ~ 80% */ MOV_L (glyphsleft_var,ESI) .width8loop: /* Width 8; process 4 chars. */ MOV_L (REGIND(EDI),EBX) /* glyphp[chari] */ MOV_L (REGBISD(EBX,EDX,4,0),EBX) /* data = glyphp[chari][line] */ MOV_B (REGOFF(BYTE_REVERSED,EBX),AL) MOV_L (REGOFF(4,EDI),EBX) /* Next char */ MOV_L (REGBISD(EBX,EDX,4,0),EBX) MOV_B (REGOFF(BYTE_REVERSED,EBX),AH) SHL_L (CONST(16),EAX) MOV_L (REGOFF(8,EDI),EBX) /* Next char */ MOV_L (REGBISD(EBX,EDX,4,0),EBX) ADD_L (CONST(16),EDI) /* glyphp += 16 (blend) */ MOV_B (REGOFF(BYTE_REVERSED,EBX),AL) MOV_L (REGOFF(-4,EDI),EBX) /* Next char */ MOV_L (vaddr_arg,ECX) /* blend */ MOV_L (REGBISD(EBX,EDX,4,0),EBX) SUB_L (CONST(4),ESI) /* blend */ MOV_B (REGOFF(BYTE_REVERSED,EBX),AH) ROL_L (CONST(16),EAX) MOV_L (EAX,REGIND(ECX)) /* Write dword. */ CMP_L (CONST(4),ESI) JGE (.width8loop) MOV_W (CONST(0x0800),CX) /* shift = 0, glyphwidth = 8 */ XOR_L (EAX,EAX) /* dworddata = 0 */ /* Ugly loop semantics. */ TEST_L (ESI,ESI) JZ (.line_finished5) MOV_L (ESI,glyphsleft_var) JMP (.continue_loop5) .width16code: MOV_L (glyphsleft_var,ESI) .width16loop: /* Width 16; process 2 chars. */ XOR_L (ECX,ECX) MOV_L (REGIND(EDI),EBX) /* glyphp[chari] */ MOV_L (REGBISD(EBX,EDX,4,0),EBX) /* data = glyphp[chari][line] */ MOV_B (BL,CL) MOV_B (REGOFF(BYTE_REVERSED,ECX),AL) MOV_B (BH,CL) MOV_B (REGOFF(BYTE_REVERSED,ECX),AH) SHL_L (CONST(16),EAX) MOV_L (REGOFF(8,EDI),EBX) /* Next char */ MOV_L (REGBISD(EBX,EDX,4,0),EBX) MOV_B (BL,CL) MOV_B (REGOFF(BYTE_REVERSED,ECX),AL) MOV_B (BH,CL) MOV_B (REGOFF(BYTE_REVERSED,ECX),AH) ROL_L (CONST(16),EAX) MOV_L (vaddr_arg,EBX) MOV_L (EAX,REGIND(EBX)) /* Write dword. */ ADD_L (CONST(8),EDI) /* glyphp += 8 */ SUB_L (CONST(2),ESI) CMP_L (CONST(2),ESI) JGE (.width16loop) MOV_W (CONST(0x1000),CX) /* shift = 0, glyphwidth = 16 */ XOR_L (EAX,EAX) /* dworddata = 0 */ /* Ugly loop semantics. */ TEST_L (ESI,ESI) JZ (.line_finished5) MOV_L (ESI,glyphsleft_var) JMP (.continue_loop5) .width32code: MOV_L (glyphsleft_var,ESI) .width32loop: /* Width 32; process 4 chars. */ XOR_L (ECX,ECX) MOV_L (REGIND(EDI),EBX) /* glyphp[chari] */ MOV_L (REGBISD(EBX,EDX,4,0),EBX) /* data = glyphp[chari][line] */ MOV_B (BL,CL) MOV_B (REGOFF(BYTE_REVERSED,ECX),AL) MOV_B (BH,CL) MOV_B (REGOFF(BYTE_REVERSED,ECX),AH) SHL_L (CONST(16),EAX) SHR_L (CONST(16),EBX) MOV_B (BL,CL) MOV_B (REGOFF(BYTE_REVERSED,ECX),AL) MOV_B (BH,CL) MOV_B (REGOFF(BYTE_REVERSED,ECX),AH) ROL_L (CONST(16),EAX) MOV_L (vaddr_arg,EBX) MOV_L (EAX,REGIND(EBX)) /* Write dword. */ ADD_L (CONST(4),EDI) /* glyphp += 4 */ DEC_L (ESI) JNZ (.width32loop) MOV_W (CONST(0x2000),CX) /* shift = 0, glyphwidth = 32 */ XOR_L (EAX,EAX) /* dworddata = 0 */ JMP (.line_finished5) .width6code: /* Speedup ~ 35% */ MOV_L (glyphsleft_var,ESI) .width6loop: /* Width 6; process 16 chars. */ MOV_L (REGIND(EDI),EBX) /* glyphp[chari] */ XOR_L (ECX,ECX) MOV_L (REGBISD(EBX,EDX,4,0),EBX) /* data = glyphp[chari][line] */ MOV_L (ECX,EAX) MOV_B (REGOFF(BYTE_REVERSED,EBX),AL) MOV_L (REGOFF(4,EDI),EBX) /* Next char */ MOV_L (REGBISD(EBX,EDX,4,0),EBX) MOV_B (REGOFF(BYTE_REVERSED,EBX),CL) MOV_B (CL,BL) SHR_L (CONST(6),ECX) /* byte2 >> 6 */ ADD_L (ECX,EAX) AND_B (CONST(0x3c),BL) SHL_L (CONST(10),EBX) /* (byte2 & 0x3c) << 10 */ ADD_L (EBX,EAX) MOV_L (REGOFF(8,EDI),EBX) /* Next char */ MOV_L (REGBISD(EBX,EDX,4,0),EBX) MOV_B (REGOFF(BYTE_REVERSED,EBX),CL) MOV_B (CL,BL) AND_B (CONST(0xf0),CL) SHL_L (CONST(4),ECX) /* (byte3 & 0xf0) << 4 */ ADD_L (ECX,EAX) AND_B (CONST(0x0c),BL) SHL_L (CONST(20),EBX) /* (byte3 & 0x0c) << 20 */ ADD_L (EBX,EAX) MOV_L (REGOFF(12,EDI),EBX) /* Next char */ XOR_L (ECX,ECX) MOV_L (REGBISD(EBX,EDX,4,0),EBX) MOV_B (REGOFF(BYTE_REVERSED,EBX),CL) SHL_L (CONST(14),ECX) /* byte4 << 14 */ ADD_L (ECX,EAX) MOV_L (REGOFF(16,EDI),EBX) /* Next char */ XOR_L (ECX,ECX) MOV_L (REGBISD(EBX,EDX,4,0),EBX) MOV_B (REGOFF(BYTE_REVERSED,EBX),CL) SHL_L (CONST(24),ECX) /* byte5 << 24 */ ADD_L (ECX,EAX) MOV_L (REGOFF(20,EDI),EBX) /* Next char */ XOR_L (ECX,ECX) MOV_L (REGBISD(EBX,EDX,4,0),EBX) MOV_B (REGOFF(BYTE_REVERSED,EBX),CL) MOV_B (CL,BL) AND_B (CONST(0x0c0),CL) SHL_L (CONST(18),ECX) /* (byte6 & 0xc0) << 18 */ ADD_L (ECX,EAX) MOV_L (vaddr_arg,ECX) MOV_L (EAX,REGIND(ECX)) /* Write dword. */ AND_B (CONST(0x3c),BL) SHL_L (CONST(2),EBX) /* (byte6 & 0x3c) << 2 */ MOV_L (EBX,EAX) MOV_L (REGOFF(24,EDI),EBX) /* Next char */ XOR_L (ECX,ECX) MOV_L (REGBISD(EBX,EDX,4,0),EBX) MOV_B (REGOFF(BYTE_REVERSED,EBX),CL) MOV_B (CL,BL) AND_B (CONST(0xf0),CL) SHR_L (CONST(4),ECX) /* (byte7 & 0xf0) >> 4 */ ADD_L (ECX,EAX) AND_B (CONST(0x0c),BL) SHL_L (CONST(12),EBX) /* (byte7 & 0x0c) << 12 */ ADD_L (EBX,EAX) MOV_L (REGOFF(28,EDI),EBX) /* Next char */ SUB_L (CONST(16),ESI) /* blend */ MOV_L (REGBISD(EBX,EDX,4,0),EBX) MOV_B (REGOFF(BYTE_REVERSED,EBX),CL) SHL_L (CONST(6),ECX) /* byte8 << 6 */ ADD_L (ECX,EAX) MOV_L (REGOFF(32,EDI),EBX) /* Next char */ MOV_L (REGBISD(EBX,EDX,4,0),EBX) XOR_L (ECX,ECX) MOV_B (REGOFF(BYTE_REVERSED,EBX),CL) SHL_L (CONST(16),ECX) /* byte9 << 16 */ ADD_L (ECX,EAX) MOV_L (REGOFF(36,EDI),EBX) /* Next char */ XOR_L (ECX,ECX) MOV_L (REGBISD(EBX,EDX,4,0),EBX) MOV_B (REGOFF(BYTE_REVERSED,EBX),CL) MOV_B (CL,BL) AND_B (CONST(0xc0),CL) SHL_L (CONST(10),ECX) /* (byte10 & 0xc0) << 10 */ ADD_L (ECX,EAX) AND_B (CONST(0x3c),BL) SHL_L (CONST(26),EBX) /* (byte10 & 0x3c) << 26 */ ADD_L (EBX,EAX) MOV_L (REGOFF(40,EDI),EBX) /* Next char */ XOR_L (ECX,ECX) MOV_L (REGBISD(EBX,EDX,4,0),EBX) MOV_B (REGOFF(BYTE_REVERSED,EBX),CL) /* get four bits */ MOV_B (CL,BL) AND_B (CONST(0xf0),CL) SHL_L (CONST(20),ECX) /* (byte11 & 0xf0) << 20 */ ADD_L (ECX,EAX) MOV_L (vaddr_arg,ECX) MOV_L (EAX,REGIND(ECX)) /* Write dword. */ /* get two bits */ AND_B (CONST(0x0c),BL) SHL_L (CONST(4),EBX) /* (byte11 & 0x0c) << 4 */ MOV_L (EBX,EAX) MOV_L (REGOFF(44,EDI),EBX) /* Next char */ XOR_L (ECX,ECX) MOV_L (REGBISD(EBX,EDX,4,0),EBX) MOV_B (REGOFF(BYTE_REVERSED,EBX),CL) SHR_L (CONST(2),ECX) /* byte12 >> 2 */ ADD_L (ECX,EAX) MOV_L (REGOFF(48,EDI),EBX) /* Next char */ MOV_L (REGBISD(EBX,EDX,4,0),EBX) MOV_B (REGOFF(BYTE_REVERSED,EBX),CL) SHL_L (CONST(8),ECX) /* byte13 << 8 */ ADD_L (ECX,EAX) MOV_L (REGOFF(52,EDI),EBX) /* Next char */ XOR_L (ECX,ECX) MOV_L (REGBISD(EBX,EDX,4,0),EBX) MOV_B (REGOFF(BYTE_REVERSED,EBX),CL) /* get two bits */ MOV_B (CL,BL) AND_B (CONST(0xc0),CL) SHL_L (CONST(2),ECX) /* (byte14 & 0xc0) << 2 */ ADD_L (ECX,EAX) /* get four bits */ AND_B (CONST(0x3c),BL) SHL_L (CONST(18),EBX) ADD_L (EBX,EAX) /* (byte14 & 0x3c) << 18 */ MOV_L (REGOFF(56,EDI),EBX) /* Next char */ XOR_L (ECX,ECX) MOV_L (REGBISD(EBX,EDX,4,0),EBX) MOV_B (REGOFF(BYTE_REVERSED,EBX),CL) /* get four bits */ MOV_B (CL,BL) AND_B (CONST(0xf0),CL) SHL_L (CONST(12),ECX) /* (byte15 & 0xf0) << 12 */ ADD_L (ECX,EAX) /* get two bits */ AND_B (CONST(0x0c),BL) SHL_L (CONST(28),EBX) /* (byte15 & 0x0c) << 28 */ ADD_L (EBX,EAX) MOV_L (REGOFF(60,EDI),EBX) /* Next char */ XOR_L (ECX,ECX) MOV_L (REGBISD(EBX,EDX,4,0),EBX) ADD_L (CONST(64),EDI) /* (blend) glyphp += 16 */ MOV_B (REGOFF(BYTE_REVERSED,EBX),CL) SHL_L (CONST(22),ECX) /* byte16 << 22 */ ADD_L (ECX,EAX) MOV_L (vaddr_arg,EBX) CMP_L (CONST(16),ESI) /* blend */ MOV_L (EAX,REGIND(EBX)) /* Write dword. */ JGE (.width6loop) XOR_L (EAX,EAX) /* dworddata = 0 */ MOV_W (CONST(0x0600),CX) /* shift = 0, glyphwidth = 6 */ CMP_L (CONST(6),ESI) JL (.width6end) /* Process another 6 chars for good measure. */ MOV_L (REGIND(EDI),EBX) /* glyphp[chari] */ XOR_L (ECX,ECX) MOV_L (REGBISD(EBX,EDX,4,0),EBX) /* data = glyphp[chari][line] */ MOV_L (ECX,EAX) MOV_B (REGOFF(BYTE_REVERSED,EBX),AL) MOV_L (REGOFF(4,EDI),EBX) /* Next char */ MOV_L (REGBISD(EBX,EDX,4,0),EBX) MOV_B (REGOFF(BYTE_REVERSED,EBX),CL) MOV_B (CL,BL) SHR_L (CONST(6),ECX) /* byte2 >> 6 */ ADD_L (ECX,EAX) AND_B (CONST(0x3c),BL) SHL_L (CONST(10),EBX) /* (byte2 & 0x3c) << 10 */ ADD_L (EBX,EAX) MOV_L (REGOFF(8,EDI),EBX) /* Next char */ MOV_L (REGBISD(EBX,EDX,4,0),EBX) MOV_B (REGOFF(BYTE_REVERSED,EBX),CL) MOV_B (CL,BL) AND_B (CONST(0xf0),CL) SHL_L (CONST(4),ECX) /* (byte3 & 0xf0) << 4 */ ADD_L (ECX,EAX) AND_B (CONST(0x0c),BL) SHL_L (CONST(20),EBX) /* (byte3 & 0x0c) << 20 */ ADD_L (EBX,EAX) MOV_L (REGOFF(12,EDI),EBX) /* Next char */ XOR_L (ECX,ECX) MOV_L (REGBISD(EBX,EDX,4,0),EBX) MOV_B (REGOFF(BYTE_REVERSED,EBX),CL) SHL_L (CONST(14),ECX) /* byte4 << 14 */ ADD_L (ECX,EAX) MOV_L (REGOFF(16,EDI),EBX) /* Next char */ XOR_L (ECX,ECX) MOV_L (REGBISD(EBX,EDX,4,0),EBX) MOV_B (REGOFF(BYTE_REVERSED,EBX),CL) SHL_L (CONST(24),ECX) /* byte5 << 24 */ ADD_L (ECX,EAX) MOV_L (REGOFF(20,EDI),EBX) /* Next char */ XOR_L (ECX,ECX) MOV_L (REGBISD(EBX,EDX,4,0),EBX) MOV_L (EBX,ECX) /* save lsb byte6 bits */ MOV_B (REGOFF(BYTE_REVERSED,EBX),BL) AND_B (CONST(0xc0),BL) SHL_L (CONST(18),EBX) /* (byte6 & 0xc0) << 18 */ ADD_L (EBX,EAX) MOV_L (vaddr_arg,EBX) MOV_L (EAX,REGIND(EBX)) /* Write dword. */ MOV_L (ECX,EAX) SHR_L (CONST(2),EAX) ADD_L (CONST(24),EDI) SUB_L (CONST(6),ESI) MOV_W (CONST(0x0604),CX) /* shift = 4, glyphwidth = 6 */ .width6end: /* Ugly loop semantics. */ TEST_L (ESI,ESI) JZ (.line_finished5) MOV_L (ESI,glyphsleft_var) JMP (.continue_loop5) .width14code: /* Speedup ~25% */ /* Width 14; process 5 chars, no loop. */ MOV_L (REGIND(EDI),EBX) /* glyphp[chari] */ MOV_L (REGBISD(EBX,EDX,4,0),EAX) /* data = glyphp[chari][line] */ MOV_L (REGOFF(4,EDI),EBX) /* Next char */ MOV_L (REGBISD(EBX,EDX,4,0),ECX) SHL_L (CONST(14),ECX) MOV_L (REGOFF(8,EDI),EBX) /* Next char */ ADD_L (ECX,EAX) MOV_L (REGBISD(EBX,EDX,4,0),ECX) MOV_L (ECX,EBX) SHL_L (CONST(28),EBX) ADD_L (EBX,EAX) /* write dword */ XOR_L (EBX,EBX) MOV_B (AL,BL) MOV_B (REGOFF(BYTE_REVERSED,EBX),AL) MOV_B (AH,BL) MOV_B (REGOFF(BYTE_REVERSED,EBX),AH) ROL_L (CONST(16),EAX) MOV_B (AL,BL) MOV_B (REGOFF(BYTE_REVERSED,EBX),AL) MOV_B (AH,BL) MOV_B (REGOFF(BYTE_REVERSED,EBX),AH) MOV_L (vaddr_arg,EBX) ROL_L (CONST(16),EAX) MOV_L (EAX,REGIND(EBX)) /* Write dword. */ SHR_L (CONST(4),ECX) MOV_L (ECX,EAX) MOV_L (REGOFF(12,EDI),EBX) /* Next char */ MOV_L (REGBISD(EBX,EDX,4,0),ECX) SHL_L (CONST(10),ECX) ADD_L (ECX,EAX) MOV_L (REGOFF(16,EDI),EBX) /* Next char */ MOV_L (REGBISD(EBX,EDX,4,0),ECX) MOV_L (ECX,EBX) SHL_L (CONST(24),EBX) /* (was 22) */ ADD_L (EBX,EAX) /* write dword */ XOR_L (EBX,EBX) MOV_B (AL,BL) MOV_B (REGOFF(BYTE_REVERSED,EBX),AL) MOV_B (AH,BL) MOV_B (REGOFF(BYTE_REVERSED,EBX),AH) ROL_L (CONST(16),EAX) MOV_B (AL,BL) MOV_B (REGOFF(BYTE_REVERSED,EBX),AL) MOV_B (AH,BL) MOV_B (REGOFF(BYTE_REVERSED,EBX),AH) MOV_L (vaddr_arg,EBX) ROL_L (CONST(16),EAX) MOV_L (EAX,REGIND(EBX)) /* Write dword. */ SHR_L (CONST(8),ECX) MOV_L (ECX,EAX) ADD_L (CONST(20),EDI) /* glyphp += 8 */ MOV_L (glyphsleft_var,ESI) SUB_L (CONST(5),ESI) MOV_W (CONST(0x0e06),CX) /* shift = 6, glyphwidth = 14 */ /* Ugly loop semantics. */ TEST_L (ESI,ESI) JZ (.line_finished5) MOV_L (ESI,glyphsleft_var) JMP (.continue_loop5)