src/hermes/x86p_16.asm
branchSDL-1.2
changeset 5884 d832552bc9e5
parent 5392 30e4d724fe98
equal deleted inserted replaced
5883:739ad55fe50d 5884:d832552bc9e5
    28     ; check short
    28     ; check short
    29     cmp ecx,BYTE 16
    29     cmp ecx,BYTE 16
    30     ja .L3
    30     ja .L3
    31 
    31 
    32 
    32 
    33 .L1 ; short loop
    33 .L1: ; short loop
    34     mov al,[esi]
    34     mov al,[esi]
    35     mov ah,[esi+1]
    35     mov ah,[esi+1]
    36     mov ebx,eax
    36     mov ebx,eax
    37     mov edx,eax
    37     mov edx,eax
    38     shr eax,11
    38     shr eax,11
    45     mov [edi+1],ah
    45     mov [edi+1],ah
    46     add esi,BYTE 2
    46     add esi,BYTE 2
    47     add edi,BYTE 2
    47     add edi,BYTE 2
    48     dec ecx
    48     dec ecx
    49     jnz .L1
    49     jnz .L1
    50 .L2
    50 .L2:
    51     retn
    51     retn
    52 
    52 
    53 .L3 ; head
    53 .L3: ; head
    54     mov eax,edi
    54     mov eax,edi
    55     and eax,BYTE 11b
    55     and eax,BYTE 11b
    56     jz .L4
    56     jz .L4
    57     mov al,[esi]
    57     mov al,[esi]
    58     mov ah,[esi+1]
    58     mov ah,[esi+1]
    68     mov [edi+1],ah
    68     mov [edi+1],ah
    69     add esi,BYTE 2
    69     add esi,BYTE 2
    70     add edi,BYTE 2
    70     add edi,BYTE 2
    71     dec ecx
    71     dec ecx
    72 
    72 
    73 .L4 ; save count
    73 .L4: ; save count
    74     push ecx
    74     push ecx
    75 
    75 
    76     ; unroll twice
    76     ; unroll twice
    77     shr ecx,1
    77     shr ecx,1
    78     
    78     
    82 
    82 
    83     ; negative counter 
    83     ; negative counter 
    84     neg ecx
    84     neg ecx
    85     jmp SHORT .L6
    85     jmp SHORT .L6
    86                               
    86                               
    87 .L5     mov [edi+ecx*4-4],eax
    87 .L5:    mov [edi+ecx*4-4],eax
    88 .L6     mov eax,[esi+ecx*4]
    88 .L6:    mov eax,[esi+ecx*4]
    89 
    89 
    90         mov ebx,[esi+ecx*4]
    90         mov ebx,[esi+ecx*4]
    91         and eax,07E007E0h         
    91         and eax,07E007E0h         
    92 
    92 
    93         mov edx,[esi+ecx*4]
    93         mov edx,[esi+ecx*4]
   123     mov [edi],al
   123     mov [edi],al
   124     mov [edi+1],ah
   124     mov [edi+1],ah
   125     add esi,BYTE 2
   125     add esi,BYTE 2
   126     add edi,BYTE 2
   126     add edi,BYTE 2
   127 
   127 
   128 .L7
   128 .L7:
   129     retn
   129     retn
   130 
   130 
   131 
   131 
   132 
   132 
   133 
   133 
   138     ; check short
   138     ; check short
   139     cmp ecx,BYTE 32
   139     cmp ecx,BYTE 32
   140     ja .L3
   140     ja .L3
   141 
   141 
   142 
   142 
   143 .L1 ; short loop
   143 .L1: ; short loop
   144     mov al,[esi]
   144     mov al,[esi]
   145     mov ah,[esi+1]
   145     mov ah,[esi+1]
   146     mov ebx,eax
   146     mov ebx,eax
   147     shr ebx,1
   147     shr ebx,1
   148     and ebx,     0111111111100000b
   148     and ebx,     0111111111100000b
   152     mov [edi+1],ah
   152     mov [edi+1],ah
   153     add esi,BYTE 2
   153     add esi,BYTE 2
   154     add edi,BYTE 2
   154     add edi,BYTE 2
   155     dec ecx
   155     dec ecx
   156     jnz .L1
   156     jnz .L1
   157 .L2
   157 .L2:
   158     retn
   158     retn
   159 
   159 
   160 .L3 ; head
   160 .L3: ; head
   161     mov eax,edi
   161     mov eax,edi
   162     and eax,BYTE 11b
   162     and eax,BYTE 11b
   163     jz .L4
   163     jz .L4
   164     mov al,[esi]
   164     mov al,[esi]
   165     mov ah,[esi+1]
   165     mov ah,[esi+1]
   172     mov [edi+1],ah
   172     mov [edi+1],ah
   173     add esi,BYTE 2
   173     add esi,BYTE 2
   174     add edi,BYTE 2
   174     add edi,BYTE 2
   175     dec ecx
   175     dec ecx
   176 
   176 
   177 .L4 ; save ebp
   177 .L4: ; save ebp
   178     push ebp
   178     push ebp
   179 
   179 
   180     ; save count
   180     ; save count
   181     push ecx
   181     push ecx
   182 
   182 
   189 
   189 
   190     ; negative counter 
   190     ; negative counter 
   191     xor ebp,ebp
   191     xor ebp,ebp
   192     sub ebp,ecx
   192     sub ebp,ecx
   193 
   193 
   194 .L5     mov eax,[esi+ebp*8]        ; agi?
   194 .L5:    mov eax,[esi+ebp*8]        ; agi?
   195         mov ecx,[esi+ebp*8+4]
   195         mov ecx,[esi+ebp*8+4]
   196        
   196        
   197         mov ebx,eax
   197         mov ebx,eax
   198         mov edx,ecx
   198         mov edx,ecx
   199 
   199 
   215         inc ebp
   215         inc ebp
   216         jnz .L5                 
   216         jnz .L5                 
   217 
   217 
   218     ; tail
   218     ; tail
   219     pop ecx
   219     pop ecx
   220 .L6 and ecx,BYTE 11b
   220 .L6: and ecx,BYTE 11b
   221     jz .L7
   221     jz .L7
   222     mov al,[esi]
   222     mov al,[esi]
   223     mov ah,[esi+1]
   223     mov ah,[esi+1]
   224     mov ebx,eax
   224     mov ebx,eax
   225     shr ebx,1
   225     shr ebx,1
   231     add esi,BYTE 2
   231     add esi,BYTE 2
   232     add edi,BYTE 2
   232     add edi,BYTE 2
   233     dec ecx
   233     dec ecx
   234     jmp SHORT .L6
   234     jmp SHORT .L6
   235 
   235 
   236 .L7 pop ebp
   236 .L7: pop ebp
   237     retn
   237     retn
   238 
   238 
   239 
   239 
   240 
   240 
   241 
   241 
   246     ; check short
   246     ; check short
   247     cmp ecx,BYTE 16
   247     cmp ecx,BYTE 16
   248     ja .L3
   248     ja .L3
   249 
   249 
   250 	
   250 	
   251 .L1 ; short loop
   251 .L1: ; short loop
   252     mov al,[esi]
   252     mov al,[esi]
   253     mov ah,[esi+1]
   253     mov ah,[esi+1]
   254     mov ebx,eax
   254     mov ebx,eax
   255     mov edx,eax
   255     mov edx,eax
   256     shr eax,11
   256     shr eax,11
   265     mov [edi+1],ah
   265     mov [edi+1],ah
   266     add esi,BYTE 2
   266     add esi,BYTE 2
   267     add edi,BYTE 2
   267     add edi,BYTE 2
   268     dec ecx
   268     dec ecx
   269     jnz .L1
   269     jnz .L1
   270 .L2
   270 .L2:
   271     retn
   271     retn
   272 
   272 
   273 .L3 ; head
   273 .L3: ; head
   274     mov eax,edi
   274     mov eax,edi
   275     and eax,BYTE 11b
   275     and eax,BYTE 11b
   276     jz .L4
   276     jz .L4
   277     mov al,[esi]
   277     mov al,[esi]
   278     mov ah,[esi+1]
   278     mov ah,[esi+1]
   290     mov [edi+1],ah
   290     mov [edi+1],ah
   291     add esi,BYTE 2
   291     add esi,BYTE 2
   292     add edi,BYTE 2
   292     add edi,BYTE 2
   293     dec ecx
   293     dec ecx
   294 
   294 
   295 .L4 ; save count
   295 .L4: ; save count
   296     push ecx
   296     push ecx
   297 
   297 
   298     ; unroll twice
   298     ; unroll twice
   299     shr ecx,1
   299     shr ecx,1
   300     
   300     
   304 
   304 
   305     ; negative counter 
   305     ; negative counter 
   306     neg ecx
   306     neg ecx
   307     jmp SHORT .L6
   307     jmp SHORT .L6
   308                               
   308                               
   309 .L5     mov [edi+ecx*4-4],eax
   309 .L5:     mov [edi+ecx*4-4],eax
   310 .L6     mov eax,[esi+ecx*4]
   310 .L6:     mov eax,[esi+ecx*4]
   311 
   311 
   312         shr eax,1
   312         shr eax,1
   313         mov ebx,[esi+ecx*4]
   313         mov ebx,[esi+ecx*4]
   314         
   314         
   315         and eax,03E003E0h         
   315         and eax,03E003E0h         
   349     mov [edi],al
   349     mov [edi],al
   350     mov [edi+1],ah
   350     mov [edi+1],ah
   351     add esi,BYTE 2
   351     add esi,BYTE 2
   352     add edi,BYTE 2
   352     add edi,BYTE 2
   353 
   353 
   354 .L7
   354 .L7:
   355     retn
   355     retn
   356 
   356 
   357 
   357 
   358 
   358 
   359 
   359 
   364     ; check short
   364     ; check short
   365     cmp ecx,BYTE 16
   365     cmp ecx,BYTE 16
   366     ja .L3
   366     ja .L3
   367 
   367 
   368 
   368 
   369 .L1 ; short loop
   369 .L1: ; short loop
   370     mov al,[esi+0]
   370     mov al,[esi+0]
   371     mov ah,[esi+1]
   371     mov ah,[esi+1]
   372     mov ebx,eax
   372     mov ebx,eax
   373     mov edx,eax
   373     mov edx,eax
   374     and eax,BYTE 11000b         ; blue
   374     and eax,BYTE 11000b         ; blue
   382     mov [edi],al
   382     mov [edi],al
   383     add esi,BYTE 2
   383     add esi,BYTE 2
   384     inc edi
   384     inc edi
   385     dec ecx
   385     dec ecx
   386     jnz .L1
   386     jnz .L1
   387 .L2
   387 .L2:
   388     retn
   388     retn
   389 
   389 
   390 .L3 mov eax,edi
   390 .L3: mov eax,edi
   391     and eax,BYTE 11b
   391     and eax,BYTE 11b
   392     jz .L4
   392     jz .L4
   393     mov al,[esi+0]
   393     mov al,[esi+0]
   394     mov ah,[esi+1]
   394     mov ah,[esi+1]
   395     mov ebx,eax
   395     mov ebx,eax
   406     add esi,BYTE 2
   406     add esi,BYTE 2
   407     inc edi
   407     inc edi
   408     dec ecx
   408     dec ecx
   409     jmp SHORT .L3
   409     jmp SHORT .L3
   410 
   410 
   411 .L4 ; save ebp
   411 .L4: ; save ebp
   412     push ebp
   412     push ebp
   413 
   413 
   414     ; save count
   414     ; save count
   415     push ecx
   415     push ecx
   416 
   416 
   420     ; prestep
   420     ; prestep
   421     mov dl,[esi+0]
   421     mov dl,[esi+0]
   422     mov bl,[esi+1]
   422     mov bl,[esi+1]
   423     mov dh,[esi+2]
   423     mov dh,[esi+2]
   424         
   424         
   425 .L5     shl edx,16
   425 .L5:     shl edx,16
   426         mov bh,[esi+3]
   426         mov bh,[esi+3]
   427         
   427         
   428         shl ebx,16
   428         shl ebx,16
   429         mov dl,[esi+4]
   429         mov dl,[esi+4]
   430 
   430 
   461     ; check tail
   461     ; check tail
   462     pop ecx
   462     pop ecx
   463     and ecx,BYTE 11b
   463     and ecx,BYTE 11b
   464     jz .L7
   464     jz .L7
   465 
   465 
   466 .L6 ; tail
   466 .L6: ; tail
   467     mov al,[esi+0]
   467     mov al,[esi+0]
   468     mov ah,[esi+1]
   468     mov ah,[esi+1]
   469     mov ebx,eax
   469     mov ebx,eax
   470     mov edx,eax
   470     mov edx,eax
   471     and eax,BYTE 11000b         ; blue
   471     and eax,BYTE 11000b         ; blue
   480     add esi,BYTE 2
   480     add esi,BYTE 2
   481     inc edi
   481     inc edi
   482     dec ecx
   482     dec ecx
   483     jnz .L6
   483     jnz .L6
   484 
   484 
   485 .L7 pop ebp
   485 .L7: pop ebp
   486     retn
   486     retn
   487 
   487 
   488 %ifidn __OUTPUT_FORMAT__,elf32
   488 %ifidn __OUTPUT_FORMAT__,elf32
   489 section .note.GNU-stack noalloc noexec nowrite progbits
   489 section .note.GNU-stack noalloc noexec nowrite progbits
   490 %endif
   490 %endif