2017-02-23 152 views
0

我有一個16字節的排列掩碼uint8_t[16]和一個16字節的數據陣列uint32_t[4]。我想 「洗牌」 這個數據陣列使用vtbl這樣的:排序與ARM混淆霓虹燈vtbx

  0 1 2 3 4 5 6 7 8 9 A B C D E F 
    Data ||0x0,0x0,0x1,0x2|0x0,0x3,0x0,0x4||0x5,0x6, 0x7, 0x8| 0x0, 0x0, 0x0, 0x9|| 

SMask ||0x2,0x3,0x5,0x6|0x7,0x8,0x9,0xA||0xB,0xF,0x10,0x10|0x10,0x10,0x10,0x10|| 

Result ||0x1,0x2,0x3,0x0|0x4,0x5,0x6,0x7||0x8,0x9, 0x0, 0x0| 0x0, 0x0, 0x0, 0x0|| 

這是我到目前爲止的代碼:

#include <iostream> 
#include <arm_neon.h> 

inline uint8x16_t Shuffle(const uint8x16_t & src, const uint8x16_t & shuffle) { 
    return vcombine_u8(
      vtbl2_u8(
      (const uint8x8x2_t &)src, 
      vget_low_u8(shuffle) 
     ), 
      vtbl2_u8(
      (const uint8x8x2_t &)src, 
      vget_high_u8(shuffle) 
     ) 
     ); 
} 

int main() { 
    uint32_t* data32 = new uint32_t[4]; 
    data32[0] = 258;    // [0x00 0x00 0x01 0x02] 
    data32[1] = 196612;   // [0x00 0x03 0x00 0x04] 
    data32[2] = 84281096;  // [0x05 0x06 0x07 0x08] 
    data32[3] = 9;    // [0x00 0x00 0x00 0x09]  
    /*load structure*/ 
    uint32x4_t data32Vec = vld1q_u32(data32); 

    uint8_t* sMask = new uint8_t[16]; 
    sMask[0] = 2; 
    sMask[1] = 3; 
    sMask[2] = 5; 
    sMask[3] = 6; 
    sMask[4] = 7; 
    sMask[5] = 8; 
    sMask[6] = 9; 
    sMask[7] = 10; 
    sMask[8] = 11; 
    sMask[9] = 15; 
    sMask[10] = 16; 
    sMask[11] = 16; 
    sMask[12] = 16; 
    sMask[13] = 16; 
    sMask[14] = 16; 
    sMask[15] = 16; 
    /*load permutationmask into vector register*/ 
    uint8x16_t shuffleMask = vld1q_u8(sMask); 

    uint8_t* comprData = new uint8_t[16]; 
    /*shuffle the data with the mask and store it into an uint8_t[16]*/ 
    vst1q_u8(comprData, Shuffle(vreinterpretq_u8_u32(data32Vec),shuffleMask)); 
    for(int i = 0; i < 16; ++i) { 
     std::cout << (unsigned)comprData[i] << " " ; 
    } 
    std::cout << std::endl; 
    delete[] comprData; 
    delete[] sMask; 
    delete[] data32; 
    return 0; 
} 

輸出類似於如下:

0 0 0 3 0 8 7 6 5 0 0 0 0 0 0 0 

它應該是這樣的:

1 2 3 0 4 5 6 7 8 9 0 0 0 0 0 0 

我認爲它與排序有關,但只是看不到問題。有沒有人有提示?

我更新了有關ErmIg答案的代碼。主要的問題是,我混淆了vtbx和vtbl。

真誠

回答

1

可能是它可以幫助你(我使用這些功能洗牌ARM的NEON矢量內字節):

inline uint8x16_t Shuffle(const uint8x16_t & src, const uint8x16_t & shuffle) 
    { 
     return vcombine_u8(
      vtbl2_u8((const uint8x8x2_t &)src, vget_low_u8(shuffle)), 
      vtbl2_u8((const uint8x8x2_t &)src, vget_high_u8(shuffle))); 
    } 

    inline uint8x16_t Shuffle(const uint8x16x2_t & src, const uint8x16_t & shuffle) 
    { 
     return vcombine_u8(
      vtbl4_u8((const uint8x8x4_t &)src, vget_low_u8(shuffle)), 
      vtbl4_u8((const uint8x8x4_t &)src, vget_high_u8(shuffle))); 
    } 
+0

我想vtbl2需要在第一個參數一個目標寄存器?但這看起來不錯。事實上,我認爲我混淆了vtbl和vtbx的行爲。 Vtbl是我需要的,因此如果索引超出範圍,它將插入ZERO。我初步認爲vtbx會有這種行爲:( – Hymir