2012-08-16 83 views
5

爲了嘗試學習ARM彙編程序,我編寫了一個簡單的測試項目,使用內聯彙編和NEON指令執行圖像縮減。經過一番努力,我設法得到它的工作,快樂的日子爲什麼鏗鏘優化打破我的內聯彙編代碼?

https://github.com/rmaz/NEON-Image-Downscaling

:你可以在這裏看到它。除了它只適用於小於-O2的優化級別。我查看了生成的ASM,但是我看不出爲什麼會出現這種情況。誰能提供任何見解?這裏負責聯彙編部分的功能:

static void inline resizeRow(uint32_t *dst, uint32_t *src, uint32_t pixelsPerRow) 
{ 
    const uint32_t * rowB = src + pixelsPerRow; 

    // force the number of pixels per row to a mutliple of 8 
    pixelsPerRow = 8 * (pixelsPerRow/8);  

    __asm__ volatile("Lresizeloop:      \n" // start loop 
        "vld1.32  {d0-d3}, [%1]!  \n" // load 8 pixels from the top row 
        "vld1.32  {d4-d7}, [%2]!  \n" // load 8 pixels from the bottom row 
        "vhadd.u8  q0, q0, q2   \n" // average the pixels vertically 
        "vhadd.u8  q1, q1, q3   \n" 
        "vtrn.32  q0, q2    \n" // transpose to put the horizontally adjacent pixels in different registers 
        "vtrn.32  q1, q3    \n" 
        "vhadd.u8  q0, q0, q2   \n" // average the pixels horizontally 
        "vhadd.u8  q1, q1, q3   \n" 
        "vtrn.32  d0, d1    \n" // fill the registers with pixels 
        "vtrn.32  d2, d3    \n" 
        "vswp   d1, d2    \n" 
        "vst1.64  {d0-d1}, [%0]!  \n" // store the result 
        "subs   %3, %3, #8   \n" // subtract 8 from the pixel count 
        "bne   Lresizeloop   \n" // repeat until the row is complete 
        : "=r"(dst), "=r"(src), "=r"(rowB), "=r"(pixelsPerRow) 
        : "0"(dst), "1"(src), "2"(rowB), "3"(pixelsPerRow) 
        : "q0", "q1", "q2", "q3" 
        ); 
} 

運作產生的輸出在O1周圍函數和循環如下:

.align 2 
    .code 16      @ @"\01-[BDPViewController downscaleImageNeon:]" 
    .thumb_func "-[BDPViewController downscaleImageNeon:]" 
"-[BDPViewController downscaleImageNeon:]": 
    .cfi_startproc 
Lfunc_begin4: 
    .loc 1 86 0     @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:86:0 
@ BB#0: 
    .loc 1 86 1 prologue_end  @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:86:1 
    push {r4, r5, r6, r7, lr} 
    add r7, sp, #12 
    push.w {r8, r10, r11} 
    sub sp, #20 
    @DEBUG_VALUE: -[BDPViewController downscaleImageNeon:]:self <- R0+0 
    @DEBUG_VALUE: -[BDPViewController downscaleImageNeon:]:_cmd <- R1+0 
    @DEBUG_VALUE: -[BDPViewController downscaleImageNeon:]:image <- R2+0 
    .loc 1 88 20     @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:88:20 
Ltmp41: 
    movw r0, :lower16:(L_OBJC_SELECTOR_REFERENCES_2-(LPC4_0+4)) 
Ltmp42: 
    mov r6, r2 
Ltmp43: 
    @DEBUG_VALUE: -[BDPViewController downscaleImageNeon:]:image <- R6+0 
    movt r0, :upper16:(L_OBJC_SELECTOR_REFERENCES_2-(LPC4_0+4)) 
LPC4_0: 
    add r0, pc 
    ldr.w r11, [r0] 
    mov r0, r6 
    blx _objc_retain 
    mov r4, r0 
    mov r0, r6 
    mov r1, r11 
Ltmp44: 
    blx _objc_msgSend 
    blx _CGImageGetWidth 
    mov r5, r0 
Ltmp45: 
    @DEBUG_VALUE: width <- R5+0 
    .loc 1 89 21     @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:89:21 
    mov r0, r6 
    mov r1, r11 
    str r5, [sp, #16]   @ 4-byte Spill 
    blx _objc_msgSend 
    blx _CGImageGetHeight 
    mov r10, r0 
Ltmp46: 
    @DEBUG_VALUE: height <- R10+0 
    .loc 1 90 26     @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:90:26 
    mov r0, r6 
    mov r1, r11 
    blx _objc_msgSend 
    blx _CGImageGetBytesPerRow 
    str r0, [sp, #12]   @ 4-byte Spill 
Ltmp47: 
    @DEBUG_VALUE: bytesPerRow <- [sp+#12]+#0 
    .loc 1 91 35     @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:91:35 
    mov r0, r6 
    mov r1, r11 
    blx _objc_msgSend 
    blx _CGImageGetAlphaInfo 
    str r0, [sp, #4]   @ 4-byte Spill 
Ltmp48: 
    @DEBUG_VALUE: imageAlpha <- [sp+#4]+#0 
    .loc 1 94 45     @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:94:45 
    mov r0, r6 
    mov r1, r11 
    blx _objc_msgSend 
    mov r6, r0 
Ltmp49: 
    mov r0, r4 
    blx _objc_release 
    mov r0, r6 
    .loc 1 98 29     @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:98:29 
    mul r8, r10, r5 
Ltmp50: 
    @DEBUG_VALUE: width <- [sp+#16]+#0 
    .loc 1 94 45     @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:94:45 
    blx _CGImageGetDataProvider 
    blx _CGDataProviderCopyData 
Ltmp51: 
    @DEBUG_VALUE: data <- R0+0 
    str r0, [sp, #8]   @ 4-byte Spill 
Ltmp52: 
    @DEBUG_VALUE: data <- [sp+#8]+#0 
    .loc 1 95 29     @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:95:29 
    blx _CFDataGetBytePtr 
    mov r4, r0 
Ltmp53: 
    @DEBUG_VALUE: buffer <- R4+0 
    .loc 1 98 29     @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:98:29 
    lsr.w r0, r8, #2 
    movs r1, #4 
    blx _calloc 
    mov r5, r0 
Ltmp54: 
    @DEBUG_VALUE: outputBuffer <- R5+0 
    mov r0, r10 
Ltmp55: 
    @DEBUG_VALUE: height <- R0+0 
    .loc 1 101 29    @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:101:29 
    cmp r0, #0 
Ltmp56: 
    @DEBUG_VALUE: rowIndex <- 0+0 
    beq LBB4_3 
@ BB#1:         @ %.lr.ph 
Ltmp57: 
    @DEBUG_VALUE: -[BDPViewController downscaleImageNeon:]:self <- R0+0 
    @DEBUG_VALUE: -[BDPViewController downscaleImageNeon:]:_cmd <- R1+0 
    @DEBUG_VALUE: width <- [sp+#16]+#0 
    @DEBUG_VALUE: height <- R0+0 
    @DEBUG_VALUE: bytesPerRow <- [sp+#12]+#0 
    @DEBUG_VALUE: imageAlpha <- [sp+#4]+#0 
    @DEBUG_VALUE: data <- [sp+#8]+#0 
    @DEBUG_VALUE: buffer <- R4+0 
    @DEBUG_VALUE: outputBuffer <- R5+0 
    @DEBUG_VALUE: rowIndex <- 0+0 
    ldr r1, [sp, #12]   @ 4-byte Reload 
Ltmp58: 
    @DEBUG_VALUE: bytesPerRow <- R1+0 
    mov.w r8, #0 
    lsl.w r11, r1, #1 
    .loc 1 104 74    @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:104:74 
Ltmp59: 
    lsr.w r10, r1, #1 
Ltmp60: 
    @DEBUG_VALUE: bytesPerRow <- [sp+#12]+#0 
LBB4_2:         @ =>This Inner Loop Header: Depth=1 
    @DEBUG_VALUE: -[BDPViewController downscaleImageNeon:]:self <- R0+0 
    @DEBUG_VALUE: -[BDPViewController downscaleImageNeon:]:_cmd <- R1+0 
    @DEBUG_VALUE: width <- [sp+#16]+#0 
    @DEBUG_VALUE: height <- R0+0 
    @DEBUG_VALUE: bytesPerRow <- [sp+#12]+#0 
    @DEBUG_VALUE: imageAlpha <- [sp+#4]+#0 
    @DEBUG_VALUE: data <- [sp+#8]+#0 
    @DEBUG_VALUE: outputBuffer <- R5+0 
    @DEBUG_VALUE: rowIndex <- 0+0 
    lsr.w r1, r8, #1 
Ltmp61: 
    mov r6, r0 
Ltmp62: 
    @DEBUG_VALUE: height <- R6+0 
    mla r0, r1, r10, r5 
Ltmp63: 
    @DEBUG_VALUE: destRow <- R1+0 
    .loc 1 105 9     @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:105:9 
    ldr r2, [sp, #16]   @ 4-byte Reload 
    mov r1, r4 
Ltmp64: 
    bl _resizeRow 
    mov r0, r6 
Ltmp65: 
    @DEBUG_VALUE: height <- R0+0 
    .loc 1 101 50    @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:101:50 
    add.w r8, r8, #2 
Ltmp66: 
    @DEBUG_VALUE: rowIndex <- R8+0 
    .loc 1 101 29    @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:101:29 
    add r4, r11 
    cmp r8, r0 
    blo LBB4_2 
Ltmp67: 
LBB4_3:         @ %._crit_edge 
    @DEBUG_VALUE: -[BDPViewController downscaleImageNeon:]:self <- R0+0 
    @DEBUG_VALUE: -[BDPViewController downscaleImageNeon:]:_cmd <- R1+0 
    @DEBUG_VALUE: width <- [sp+#16]+#0 
    @DEBUG_VALUE: height <- R0+0 
    @DEBUG_VALUE: bytesPerRow <- [sp+#12]+#0 
    @DEBUG_VALUE: imageAlpha <- [sp+#4]+#0 
    @DEBUG_VALUE: data <- [sp+#8]+#0 
    @DEBUG_VALUE: outputBuffer <- R5+0 
    .loc 1 109 28    @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:109:28 
    ldr r1, [sp, #4]   @ 4-byte Reload 
Ltmp68: 
    lsrs r2, r0, #1 
    str r1, [sp] 
    mov r6, r5 
Ltmp69: 
    @DEBUG_VALUE: outputBuffer <- R6+0 
    ldr r1, [sp, #16]   @ 4-byte Reload 
    ldr r0, [sp, #12]   @ 4-byte Reload 
Ltmp70: 
    lsrs r1, r1, #1 
    lsrs r3, r0, #1 
    mov r0, r5 
    bl _createBitmapContext 
    mov r4, r0 
Ltmp71: 
    @DEBUG_VALUE: context <- R4+0 
    .loc 1 110 30    @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:110:30 
    blx _CGBitmapContextCreateImage 
    .loc 1 111 66    @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:111:66 
    movw r1, :lower16:(L_OBJC_SELECTOR_REFERENCES_4-(LPC4_1+4)) 
    .loc 1 110 30    @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:110:30 
    mov r5, r0 
Ltmp72: 
    @DEBUG_VALUE: scaledImage <- R5+0 
    .loc 1 111 66    @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:111:66 
    movt r1, :upper16:(L_OBJC_SELECTOR_REFERENCES_4-(LPC4_1+4)) 
    movw r0, :lower16:(L_OBJC_CLASSLIST_REFERENCES_$_-(LPC4_2+4)) 
    movt r0, :upper16:(L_OBJC_CLASSLIST_REFERENCES_$_-(LPC4_2+4)) 
LPC4_1: 
    add r1, pc 
LPC4_2: 
    add r0, pc 
    mov r2, r5 
    ldr r1, [r1] 
    ldr r0, [r0] 
    blx _objc_msgSend 
Ltmp73: 
    @DEBUG_VALUE: returnImage <- R0+0 
    @ InlineAsm Start 
    mov r7, r7  @ marker for objc_retainAutoreleaseReturnValue 
    @ InlineAsm End 
    blx _objc_retainAutoreleasedReturnValue 
Ltmp74: 
    mov r8, r0 
    .loc 1 112 5     @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:112:5 
    mov r0, r5 
    blx _CGImageRelease 
    .loc 1 113 5     @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:113:5 
    mov r0, r4 
    blx _CGContextRelease 
    .loc 1 114 5     @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:114:5 
    ldr r0, [sp, #8]   @ 4-byte Reload 
    blx _CFRelease 
    .loc 1 115 5     @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:115:5 
    mov r0, r6 
    blx _free 
Ltmp75: 
    .loc 1 118 1     @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:118:1 
    mov r0, r8 
    add sp, #20 
    pop.w {r8, r10, r11} 
    pop.w {r4, r5, r6, r7, lr} 
Ltmp76: 
    b.w _objc_autoreleaseReturnValue 
Ltmp77: 
Lfunc_end4: 
    .cfi_endproc 

    .align 2 
    .code 16      @ @resizeRow 
    .thumb_func _resizeRow 
_resizeRow: 
    .cfi_startproc 
Lfunc_begin5: 
    .loc 1 26 0     @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:26:0 
@ BB#0: 
    @DEBUG_VALUE: resizeRow:dst <- R0+0 
    @DEBUG_VALUE: resizeRow:src <- R1+0 
    @DEBUG_VALUE: resizeRow:pixelsPerRow <- R2+0 
    .loc 1 27 47 prologue_end @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:27:47 
    add.w r3, r1, r2, lsl #2 
Ltmp78: 
    @DEBUG_VALUE: rowB <- R3+0 
    .loc 1 30 5     @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:30:5 
    bic r2, r2, #7 
Ltmp79: 
    .loc 1 32 5     @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:32:5 
    @ InlineAsm Start 
    Lresizeloop:      
vld1.32  {d0-d3}, [r1]!  
vld1.32  {d4-d7}, [r3]!  
vhadd.u8  q0, q0, q2   
vhadd.u8  q1, q1, q3   
vtrn.32  q0, q2    
vtrn.32  q1, q3    
vhadd.u8  q0, q0, q2   
vhadd.u8  q1, q1, q3   
vtrn.32  d0, d1    
vtrn.32  d2, d3    
vswp   d1, d2    
vst1.64  {d0-d1}, [r0]!  
subs   r2, r2, #8   
bne   Lresizeloop   

    @ InlineAsm End 
Ltmp80: 
    .loc 1 51 1     @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:51:1 
    bx lr 
Ltmp81: 
Lfunc_end5: 
    .cfi_endproc 

並在O2非運作輸出是作爲如下:

.align 2 
    .code 16      @ @"\01-[BDPViewController downscaleImageNeon:]" 
    .thumb_func "-[BDPViewController downscaleImageNeon:]" 
"-[BDPViewController downscaleImageNeon:]": 
    .cfi_startproc 
Lfunc_begin4: 
    .loc 1 86 0     @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:86:0 
@ BB#0: 
    .loc 1 86 1 prologue_end  @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:86:1 
    push {r4, r5, r6, r7, lr} 
    add r7, sp, #12 
    push.w {r8, r10, r11} 
    sub sp, #20 
    @DEBUG_VALUE: -[BDPViewController downscaleImageNeon:]:self <- R0+0 
    @DEBUG_VALUE: -[BDPViewController downscaleImageNeon:]:_cmd <- R1+0 
    @DEBUG_VALUE: -[BDPViewController downscaleImageNeon:]:image <- R2+0 
    .loc 1 88 20     @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:88:20 
Ltmp41: 
    movw r0, :lower16:(L_OBJC_SELECTOR_REFERENCES_2-(LPC4_0+4)) 
Ltmp42: 
    mov r6, r2 
Ltmp43: 
    @DEBUG_VALUE: -[BDPViewController downscaleImageNeon:]:image <- R6+0 
    movt r0, :upper16:(L_OBJC_SELECTOR_REFERENCES_2-(LPC4_0+4)) 
LPC4_0: 
    add r0, pc 
    ldr.w r11, [r0] 
    mov r0, r6 
    blx _objc_retain 
    mov r4, r0 
    mov r0, r6 
    mov r1, r11 
Ltmp44: 
    blx _objc_msgSend 
    blx _CGImageGetWidth 
    mov r5, r0 
Ltmp45: 
    @DEBUG_VALUE: width <- R5+0 
    .loc 1 89 21     @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:89:21 
    mov r0, r6 
    mov r1, r11 
    str r5, [sp, #16]   @ 4-byte Spill 
    blx _objc_msgSend 
    blx _CGImageGetHeight 
    mov r10, r0 
Ltmp46: 
    @DEBUG_VALUE: height <- R10+0 
    .loc 1 90 26     @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:90:26 
    mov r0, r6 
    mov r1, r11 
    blx _objc_msgSend 
    blx _CGImageGetBytesPerRow 
    str r0, [sp, #12]   @ 4-byte Spill 
Ltmp47: 
    @DEBUG_VALUE: bytesPerRow <- [sp+#12]+#0 
    .loc 1 91 35     @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:91:35 
    mov r0, r6 
    mov r1, r11 
    blx _objc_msgSend 
    blx _CGImageGetAlphaInfo 
    str r0, [sp, #4]   @ 4-byte Spill 
Ltmp48: 
    @DEBUG_VALUE: imageAlpha <- [sp+#4]+#0 
    .loc 1 94 45     @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:94:45 
    mov r0, r6 
    mov r1, r11 
    blx _objc_msgSend 
    mov r6, r0 
Ltmp49: 
    mov r0, r4 
    blx _objc_release 
    mov r0, r6 
    .loc 1 98 29     @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:98:29 
    mul r8, r10, r5 
Ltmp50: 
    @DEBUG_VALUE: width <- [sp+#16]+#0 
    .loc 1 94 45     @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:94:45 
    blx _CGImageGetDataProvider 
    blx _CGDataProviderCopyData 
Ltmp51: 
    @DEBUG_VALUE: data <- R0+0 
    str r0, [sp, #8]   @ 4-byte Spill 
Ltmp52: 
    @DEBUG_VALUE: data <- [sp+#8]+#0 
    .loc 1 95 29     @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:95:29 
    blx _CFDataGetBytePtr 
    mov r4, r0 
Ltmp53: 
    @DEBUG_VALUE: buffer <- R4+0 
    .loc 1 98 29     @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:98:29 
    lsr.w r0, r8, #2 
    movs r1, #4 
    blx _calloc 
    mov r5, r0 
Ltmp54: 
    @DEBUG_VALUE: outputBuffer <- R5+0 
    mov r0, r10 
Ltmp55: 
    @DEBUG_VALUE: height <- R0+0 
    .loc 1 101 29    @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:101:29 
    cmp r0, #0 
Ltmp56: 
    @DEBUG_VALUE: rowIndex <- 0+0 
    beq LBB4_3 
@ BB#1:         @ %.lr.ph 
Ltmp57: 
    @DEBUG_VALUE: -[BDPViewController downscaleImageNeon:]:self <- R0+0 
    @DEBUG_VALUE: -[BDPViewController downscaleImageNeon:]:_cmd <- R1+0 
    @DEBUG_VALUE: width <- [sp+#16]+#0 
    @DEBUG_VALUE: height <- R0+0 
    @DEBUG_VALUE: bytesPerRow <- [sp+#12]+#0 
    @DEBUG_VALUE: imageAlpha <- [sp+#4]+#0 
    @DEBUG_VALUE: data <- [sp+#8]+#0 
    @DEBUG_VALUE: buffer <- R4+0 
    @DEBUG_VALUE: outputBuffer <- R5+0 
    @DEBUG_VALUE: rowIndex <- 0+0 
    ldr r1, [sp, #12]   @ 4-byte Reload 
Ltmp58: 
    @DEBUG_VALUE: bytesPerRow <- R1+0 
    mov.w r8, #0 
    lsl.w r11, r1, #1 
    .loc 1 104 74    @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:104:74 
Ltmp59: 
    lsr.w r10, r1, #1 
Ltmp60: 
    @DEBUG_VALUE: bytesPerRow <- [sp+#12]+#0 
LBB4_2:         @ =>This Inner Loop Header: Depth=1 
    @DEBUG_VALUE: -[BDPViewController downscaleImageNeon:]:self <- R0+0 
    @DEBUG_VALUE: -[BDPViewController downscaleImageNeon:]:_cmd <- R1+0 
    @DEBUG_VALUE: width <- [sp+#16]+#0 
    @DEBUG_VALUE: height <- R0+0 
    @DEBUG_VALUE: bytesPerRow <- [sp+#12]+#0 
    @DEBUG_VALUE: imageAlpha <- [sp+#4]+#0 
    @DEBUG_VALUE: data <- [sp+#8]+#0 
    @DEBUG_VALUE: outputBuffer <- R5+0 
    @DEBUG_VALUE: rowIndex <- 0+0 
    lsr.w r1, r8, #1 
Ltmp61: 
    mov r6, r0 
Ltmp62: 
    @DEBUG_VALUE: height <- R6+0 
    mla r0, r1, r10, r5 
Ltmp63: 
    @DEBUG_VALUE: destRow <- R1+0 
    .loc 1 105 9     @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:105:9 
    ldr r2, [sp, #16]   @ 4-byte Reload 
    mov r1, r4 
Ltmp64: 
    bl _resizeRow 
    mov r0, r6 
Ltmp65: 
    @DEBUG_VALUE: height <- R0+0 
    .loc 1 101 50    @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:101:50 
    add.w r8, r8, #2 
Ltmp66: 
    @DEBUG_VALUE: rowIndex <- R8+0 
    .loc 1 101 29    @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:101:29 
    add r4, r11 
    cmp r8, r0 
    blo LBB4_2 
Ltmp67: 
LBB4_3:         @ %._crit_edge 
    @DEBUG_VALUE: -[BDPViewController downscaleImageNeon:]:self <- R0+0 
    @DEBUG_VALUE: -[BDPViewController downscaleImageNeon:]:_cmd <- R1+0 
    @DEBUG_VALUE: width <- [sp+#16]+#0 
    @DEBUG_VALUE: height <- R0+0 
    @DEBUG_VALUE: bytesPerRow <- [sp+#12]+#0 
    @DEBUG_VALUE: imageAlpha <- [sp+#4]+#0 
    @DEBUG_VALUE: data <- [sp+#8]+#0 
    @DEBUG_VALUE: outputBuffer <- R5+0 
    .loc 1 109 28    @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:109:28 
    ldr r1, [sp, #4]   @ 4-byte Reload 
Ltmp68: 
    lsrs r2, r0, #1 
    str r1, [sp] 
    mov r6, r5 
Ltmp69: 
    @DEBUG_VALUE: outputBuffer <- R6+0 
    ldr r1, [sp, #16]   @ 4-byte Reload 
    ldr r0, [sp, #12]   @ 4-byte Reload 
Ltmp70: 
    lsrs r1, r1, #1 
    lsrs r3, r0, #1 
    mov r0, r5 
    bl _createBitmapContext 
    mov r4, r0 
Ltmp71: 
    @DEBUG_VALUE: context <- R4+0 
    .loc 1 110 30    @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:110:30 
    blx _CGBitmapContextCreateImage 
    .loc 1 111 66    @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:111:66 
    movw r1, :lower16:(L_OBJC_SELECTOR_REFERENCES_4-(LPC4_1+4)) 
    .loc 1 110 30    @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:110:30 
    mov r5, r0 
Ltmp72: 
    @DEBUG_VALUE: scaledImage <- R5+0 
    .loc 1 111 66    @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:111:66 
    movt r1, :upper16:(L_OBJC_SELECTOR_REFERENCES_4-(LPC4_1+4)) 
    movw r0, :lower16:(L_OBJC_CLASSLIST_REFERENCES_$_-(LPC4_2+4)) 
    movt r0, :upper16:(L_OBJC_CLASSLIST_REFERENCES_$_-(LPC4_2+4)) 
LPC4_1: 
    add r1, pc 
LPC4_2: 
    add r0, pc 
    mov r2, r5 
    ldr r1, [r1] 
    ldr r0, [r0] 
    blx _objc_msgSend 
Ltmp73: 
    @DEBUG_VALUE: returnImage <- R0+0 
    @ InlineAsm Start 
    mov r7, r7  @ marker for objc_retainAutoreleaseReturnValue 
    @ InlineAsm End 
    blx _objc_retainAutoreleasedReturnValue 
Ltmp74: 
    mov r8, r0 
    .loc 1 112 5     @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:112:5 
    mov r0, r5 
    blx _CGImageRelease 
    .loc 1 113 5     @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:113:5 
    mov r0, r4 
    blx _CGContextRelease 
    .loc 1 114 5     @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:114:5 
    ldr r0, [sp, #8]   @ 4-byte Reload 
    blx _CFRelease 
    .loc 1 115 5     @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:115:5 
    mov r0, r6 
    blx _free 
Ltmp75: 
    .loc 1 118 1     @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:118:1 
    mov r0, r8 
    add sp, #20 
    pop.w {r8, r10, r11} 
    pop.w {r4, r5, r6, r7, lr} 
Ltmp76: 
    b.w _objc_autoreleaseReturnValue 
Ltmp77: 
Lfunc_end4: 
    .cfi_endproc 

    .align 2 
    .code 16      @ @resizeRow 
    .thumb_func _resizeRow 
_resizeRow: 
    .cfi_startproc 
Lfunc_begin5: 
    .loc 1 26 0     @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:26:0 
@ BB#0: 
    @DEBUG_VALUE: resizeRow:dst <- R0+0 
    @DEBUG_VALUE: resizeRow:src <- R1+0 
    @DEBUG_VALUE: resizeRow:pixelsPerRow <- R2+0 
    .loc 1 27 47 prologue_end @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:27:47 
    add.w r3, r1, r2, lsl #2 
Ltmp78: 
    @DEBUG_VALUE: rowB <- R3+0 
    .loc 1 30 5     @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:30:5 
    bic r2, r2, #7 
Ltmp79: 
    .loc 1 32 5     @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:32:5 
    @ InlineAsm Start 
    Lresizeloop:      
vld1.32  {d0-d3}, [r1]!  
vld1.32  {d4-d7}, [r3]!  
vhadd.u8  q0, q0, q2   
vhadd.u8  q1, q1, q3   
vtrn.32  q0, q2    
vtrn.32  q1, q3    
vhadd.u8  q0, q0, q2   
vhadd.u8  q1, q1, q3   
vtrn.32  d0, d1    
vtrn.32  d2, d3    
vswp   d1, d2    
vst1.64  {d0-d1}, [r0]!  
subs   r2, r2, #8   
bne   Lresizeloop   

    @ InlineAsm End 
Ltmp80: 
    .loc 1 51 1     @ NEON-Image-Downscaling/ImageResize/BDPViewController.m:51:1 
    bx lr 
Ltmp81: 
Lfunc_end5: 
    .cfi_endproc 
+1

爲什麼不發佈生成的代碼? – 2012-08-16 13:20:37

+0

這兩個看起來完全相同。這是編譯器的彙編輸出嗎?嘗試使用objdump從兩個不同編譯的二進制文件中獲取程序集。 – auselen 2012-08-17 21:49:39

回答

13

這裏的彙編代碼我從-O2 Xcode項目獲得的片段。 (與-O1大廈不打擾到內聯函數,所以我並不感到驚訝,它工作正常。)

Ltmp55: 
    @DEBUG_VALUE: rowIndex <- R3+0 
    .loc 1 101 29    @ /tmp/NEON-Image-Downscaling/ImageResize/BDPViewController.m:101:29 
    add r8, r12 
    cmp r3, r11 
    .loc 1 32 5     @ /tmp/NEON-Image-Downscaling/ImageResize/BDPViewController.m:32:5 
Ltmp56: 
    @ InlineAsm Start 
    Lresizeloop:      
vld1.32  {d0-d3}, [r4]!  
vld1.32  {d4-d7}, [r5]!  
vhadd.u8  q0, q0, q2   
vhadd.u8  q1, q1, q3   
vtrn.32  q0, q2    
vtrn.32  q1, q3    
vhadd.u8  q0, q0, q2   
vhadd.u8  q1, q1, q3   
vtrn.32  d0, d1    
vtrn.32  d2, d3    
vswp   d1, d2    
vst1.64  {d0-d1}, [r6]!  
subs   r2, r2, #8   
bne   Lresizeloop   

    @ InlineAsm End 
Ltmp57: 
    blo LBB2_2 

請參閱上最後一行blo(分支如果-更低)的指令?它使用匯編塊頂部的cmp r3, r11設置的條件代碼。但是當然你的內聯彙編代碼已經完全拋棄了條件代碼寄存器。那麼這是一個編譯器錯誤?... 不!你只是忘了告訴編譯器你的內聯彙編代碼破壞了條件代碼。替換

    : "=r"(dst), "=r"(src), "=r"(rowB), "=r"(pixelsPerRow) 
       : "0"(dst), "1"(src), "2"(rowB), "3"(pixelsPerRow) 
       : "q0", "q1", "q2", "q3" 
       ); 

    : "=r"(dst), "=r"(src), "=r"(rowB), "=r"(pixelsPerRow) 
       : "0"(dst), "1"(src), "2"(rowB), "3"(pixelsPerRow) 
       : "q0", "q1", "q2", "q3", "cc" 
       ); 

和組件輸出修正本身。我沒有運行該應用程序,但我敢打賭,你會發現它現在好多了。 :)

+0

不錯的一個quux,解決了這個問題,最高分。 – Tark 2012-08-30 21:36:18