2008-10-28 85 views
14

編輯:這裏的代碼仍然有一些錯誤在裏面,它可以做的表演系更好,但不是試圖解決這一問題,備案我把這個問題交給英特爾討論組和得到了很多很好的意見,如果一切順利的原子浮動拋光版本將包含在英特爾線程構建模塊的近期發佈這是一個Atomic浮點安全的C++實現嗎?

確定這裏是一個艱難的,我希望有一個原子浮動,而不是超快速的圖形性能,而是常規地用作類的數據成員。而且我不想支付在這些類上使用鎖的價格,因爲它不會爲我的需求提供額外的好處。

現在,我看到intel的tbb和其他原子庫支持整數類型,但不支持浮點。因此,我繼續實施了一個,並且它可以工作......但我不確定它是否真的有效,或者我真的很幸運。

這裏的任何人都知道這是不是某種形式的穿線異端?

typedef unsigned int uint_32; 

    struct AtomicFloat 
    { 
    private: 
    tbb::atomic<uint_32> atomic_value_; 

    public: 
    template<memory_semantics M> 
    float fetch_and_store(float value) 
    { 
     const uint_32 value_ = atomic_value_.tbb::atomic<uint_32>::fetch_and_store<M>((uint_32&)value); 
     return reinterpret_cast<const float&>(value_); 
    } 

    float fetch_and_store(float value) 
    { 
     const uint_32 value_ = atomic_value_.tbb::atomic<uint_32>::fetch_and_store((uint_32&)value); 
     return reinterpret_cast<const float&>(value_); 
    } 

    template<memory_semantics M> 
    float compare_and_swap(float value, float comparand) 
    { 
     const uint_32 value_ = atomic_value_.tbb::atomic<uint_32>::compare_and_swap<M>((uint_32&)value,(uint_32&)compare); 
     return reinterpret_cast<const float&>(value_); 
    } 

    float compare_and_swap(float value, float compare) 
    { 
     const uint_32 value_ = atomic_value_.tbb::atomic<uint_32>::compare_and_swap((uint_32&)value,(uint_32&)compare); 
     return reinterpret_cast<const float&>(value_); 
    } 

    operator float() const volatile // volatile qualifier here for backwards compatibility 
    { 
     const uint_32 value_ = atomic_value_; 
     return reinterpret_cast<const float&>(value_); 
    } 

    float operator=(float value) 
    { 
     const uint_32 value_ = atomic_value_.tbb::atomic<uint_32>::operator =((uint_32&)value); 
     return reinterpret_cast<const float&>(value_); 
    } 

    float operator+=(float value) 
    { 
     volatile float old_value_, new_value_; 
     do 
     { 
      old_value_ = reinterpret_cast<float&>(atomic_value_); 
      new_value_ = old_value_ + value; 
     } while(compare_and_swap(new_value_,old_value_) != old_value_); 
     return (new_value_); 
    } 

    float operator*=(float value) 
    { 
     volatile float old_value_, new_value_; 
     do 
     { 
      old_value_ = reinterpret_cast<float&>(atomic_value_); 
      new_value_ = old_value_ * value; 
     } while(compare_and_swap(new_value_,old_value_) != old_value_); 
     return (new_value_); 
    } 

    float operator/=(float value) 
    { 
     volatile float old_value_, new_value_; 
     do 
     { 
      old_value_ = reinterpret_cast<float&>(atomic_value_); 
      new_value_ = old_value_/value; 
     } while(compare_and_swap(new_value_,old_value_) != old_value_); 
     return (new_value_); 
    } 

    float operator-=(float value) 
    { 
     return this->operator+=(-value); 
    } 

    float operator++() 
    { 
     return this->operator+=(1); 
    } 

    float operator--() 
    { 
     return this->operator+=(-1); 
    } 

    float fetch_and_add(float addend) 
    { 
     return this->operator+=(-addend); 
    } 

    float fetch_and_increment() 
    { 
     return this->operator+=(1); 
    } 

    float fetch_and_decrement() 
    { 
     return this->operator+=(-1); 
    } 
    }; 

謝謝!

編輯:改變爲size_t到uint32_t的格雷戈·羅傑斯建議,這樣它更輕便

編輯:增加了對整個事情上市,一些修正。

多個編輯:性能明智使用鎖定的浮動爲5.000.000 + =我的機器上100個線程操作需要3.6s,而我的原子浮動即使其愚蠢的DO-而需要0.2秒做同樣的工作。所以> 30倍的性能提升意味着它的價值,如果它是正確的,那麼這就是捕獲。

甚至更​​多編輯:由於Awgn指出我的fetch_and_xxxx部分都是錯誤的。修復並刪除了我不確定的部分API(模板化內存模型)。並按照運算符+ =執行其他操作以避免代碼重複

已添加:添加了運算符* =和運算符/ =,因爲如果沒有它們,浮點數將不會浮點。由於Peterchen的評論說,這是發現

編輯:最新的代碼版本如下(我會離開舊版本以供參考雖然)

#include <tbb/atomic.h> 
    typedef unsigned int  uint_32; 
    typedef __TBB_LONG_LONG  uint_64; 

    template<typename FLOATING_POINT,typename MEMORY_BLOCK> 
    struct atomic_float_ 
    { 
    /* CRC Card ----------------------------------------------------- 
    | Class:   atmomic float template class 
    | 
    | Responsability: handle integral atomic memory as it were a float, 
    |     but partially bypassing FPU, SSE/MMX, so it is 
    |     slower than a true float, but faster and smaller 
    |     than a locked float. 
    |      *Warning* If your float usage is thwarted by 
    |     the A-B-A problem this class isn't for you 
    |      *Warning* Atomic specification says we return, 
    |     values not l-values. So (i = j) = k doesn't work. 
    | 
    | Collaborators: intel's tbb::atomic handles memory atomicity 
    ----------------------------------------------------------------*/ 
    typedef typename atomic_float_<FLOATING_POINT,MEMORY_BLOCK> self_t; 

    tbb::atomic<MEMORY_BLOCK> atomic_value_; 

    template<memory_semantics M> 
    FLOATING_POINT fetch_and_store(FLOATING_POINT value) 
    { 
     const MEMORY_BLOCK value_ = 
      atomic_value_.tbb::atomic<MEMORY_BLOCK>::fetch_and_store<M>((MEMORY_BLOCK&)value); 
     //atomic specification requires returning old value, not new one 
     return reinterpret_cast<const FLOATING_POINT&>(value_); 
    } 

    FLOATING_POINT fetch_and_store(FLOATING_POINT value) 
    { 
     const MEMORY_BLOCK value_ = 
      atomic_value_.tbb::atomic<MEMORY_BLOCK>::fetch_and_store((MEMORY_BLOCK&)value); 
     //atomic specification requires returning old value, not new one 
     return reinterpret_cast<const FLOATING_POINT&>(value_); 
    } 

    template<memory_semantics M> 
    FLOATING_POINT compare_and_swap(FLOATING_POINT value, FLOATING_POINT comparand) 
    { 
     const MEMORY_BLOCK value_ = 
      atomic_value_.tbb::atomic<MEMORY_BLOCK>::compare_and_swap<M>((MEMORY_BLOCK&)value,(MEMORY_BLOCK&)compare); 
     //atomic specification requires returning old value, not new one 
     return reinterpret_cast<const FLOATING_POINT&>(value_); 
    } 

    FLOATING_POINT compare_and_swap(FLOATING_POINT value, FLOATING_POINT compare) 
    { 
     const MEMORY_BLOCK value_ = 
      atomic_value_.tbb::atomic<MEMORY_BLOCK>::compare_and_swap((MEMORY_BLOCK&)value,(MEMORY_BLOCK&)compare); 
     //atomic specification requires returning old value, not new one 
     return reinterpret_cast<const FLOATING_POINT&>(value_); 
    } 

    operator FLOATING_POINT() const volatile // volatile qualifier here for backwards compatibility 
    { 
     const MEMORY_BLOCK value_ = atomic_value_; 
     return reinterpret_cast<const FLOATING_POINT&>(value_); 
    } 

    //Note: atomic specification says we return the a copy of the base value not an l-value 
    FLOATING_POINT operator=(FLOATING_POINT rhs) 
    { 
     const MEMORY_BLOCK value_ = atomic_value_.tbb::atomic<MEMORY_BLOCK>::operator =((MEMORY_BLOCK&)rhs); 
     return reinterpret_cast<const FLOATING_POINT&>(value_); 
    } 

    //Note: atomic specification says we return an l-value when operating among atomics 
    self_t& operator=(self_t& rhs) 
    { 
     const MEMORY_BLOCK value_ = atomic_value_.tbb::atomic<MEMORY_BLOCK>::operator =((MEMORY_BLOCK&)rhs); 
     return *this; 
    } 

    FLOATING_POINT& _internal_reference() const 
    { 
     return reinterpret_cast<FLOATING_POINT&>(atomic_value_.tbb::atomic<MEMORY_BLOCK>::_internal_reference()); 
    } 

    FLOATING_POINT operator+=(FLOATING_POINT value) 
    { 
     FLOATING_POINT old_value_, new_value_; 
     do 
     { 
      old_value_ = reinterpret_cast<FLOATING_POINT&>(atomic_value_); 
      new_value_ = old_value_ + value; 
     //floating point binary representation is not an issue because 
     //we are using our self's compare and swap, thus comparing floats and floats 
     } while(self_t::compare_and_swap(new_value_,old_value_) != old_value_); 
     return (new_value_); //return resulting value 
    } 

    FLOATING_POINT operator*=(FLOATING_POINT value) 
    { 
     FLOATING_POINT old_value_, new_value_; 
     do 
     { 
      old_value_ = reinterpret_cast<FLOATING_POINT&>(atomic_value_); 
      new_value_ = old_value_ * value; 
     //floating point binary representation is not an issue becaus 
     //we are using our self's compare and swap, thus comparing floats and floats 
     } while(self_t::compare_and_swap(new_value_,old_value_) != old_value_); 
     return (new_value_); //return resulting value 
    } 

    FLOATING_POINT operator/=(FLOATING_POINT value) 
    { 
     FLOATING_POINT old_value_, new_value_; 
     do 
     { 
      old_value_ = reinterpret_cast<FLOATING_POINT&>(atomic_value_); 
      new_value_ = old_value_/value; 
     //floating point binary representation is not an issue because 
     //we are using our self's compare and swap, thus comparing floats and floats 
     } while(self_t::compare_and_swap(new_value_,old_value_) != old_value_); 
     return (new_value_); //return resulting value 
    } 

    FLOATING_POINT operator-=(FLOATING_POINT value) 
    { 
     return this->operator+=(-value); //return resulting value 
    } 

    //Prefix operator 
    FLOATING_POINT operator++() 
    { 
     return this->operator+=(1); //return resulting value 
    } 

    //Prefix operator 
    FLOATING_POINT operator--() 
    { 
     return this->operator+=(-1); //return resulting value 
    } 

    //Postfix operator 
    FLOATING_POINT operator++(int) 
    { 
     const FLOATING_POINT temp = this; 
     this->operator+=(1); 
     return temp//return resulting value 
    } 

    //Postfix operator 
    FLOATING_POINT operator--(int) 
    { 
     const FLOATING_POINT temp = this; 
     this->operator+=(1); 
     return temp//return resulting value 
    } 

    FLOATING_POINT fetch_and_add(FLOATING_POINT addend) 
    { 
     const FLOATING_POINT old_value_ = atomic_value_; 
     this->operator+=(addend); 
     //atomic specification requires returning old value, not new one as in operator x= 
     return old_value_; 
    } 

    FLOATING_POINT fetch_and_increment() 
    { 
     const FLOATING_POINT old_value_ = atomic_value_; 
     this->operator+=(+1); 
     //atomic specification requires returning old value, not new one as in operator x= 
     return old_value_; 
    } 

    FLOATING_POINT fetch_and_decrement() 
    { 
     const FLOATING_POINT old_value_ = atomic_value_; 
     this->operator+=(-1); 
     //atomic specification requires returning old value, not new one as in operator x= 
     return old_value_; 
    } 
    }; 

    typedef atomic_float_<float,uint_32> AtomicFloat; 
    typedef atomic_float_<double,uint_64> AtomicDouble; 
+0

這對操作者略惡=返回一個值,由於在內建類型它的計算結果爲一個左值(T&其中T是類型)。對於那些類型「(i = j)= k」是不常見的但是合法的,並且將k的值賦給i。 – 2008-10-28 14:57:05

+0

一個很好的觀點,並在我的最新版本的代碼中回答。但是返回T而不是運算符=的左值是tbb中原子值的正確行爲。 – 2008-10-29 03:35:11

+0

嘿@RobertGould。非常感謝您實施功能並在此分享。我有兩個問題:(1)時間是否仍然有效?我的意思是,在我的平臺上,當我使用原子版本而不是`std :: mutex`時,我無法加速,(2)這段代碼是否有許可證?如果我借用它並使其在我的項目的標準庫中使用`std :: atomic`,我該怎麼辦? – 2018-02-20 13:43:14

回答

5

我會嚴肅反對公衆繼承。我不知道原子實現是什麼樣的,但我假設它重載了使用它作爲整數類型的操作符,這意味着在許多(也許是大多數情況下)使用這些促銷而不是你的float。

我看不出有任何理由爲什麼這是行不通的,但像你我有辦法證明......

一個注意:您operator float()例行沒有負載獲得語義,不應該被標記爲const volatile(或者肯定至少是const)?

編輯:如果你要提供運營商 - (),你應該同時提供前綴/後綴形式。

3

它看起來像你實現假定sizeof(size_t) == sizeof(float)。對於你的目標平臺,這會永遠如此嗎?

而且我也不會說線程異端這麼多鑄造異端。 :)

+0

好吧不一定,但我打算把靜態聲明比較sizeof(float)== sizeof(size_t)作爲編譯後衛 – 2008-10-28 03:44:45

+0

這是什麼讓你僅僅使用uint32_t? – 2008-10-28 03:46:13

+0

好點我的朋友! – 2008-10-28 03:53:22

0

從我的代碼的閱讀,我會在這樣一個編譯器真的瘋了,以推出裝配了這一點,並沒有原子。

0

讓您的編譯器生成彙編代碼並查看它。如果操作不止是單個彙編語言指令,則它是而不是的原子操作,並且需要鎖才能在多處理器系統中正常運行。

不幸的是,我不能確定相反的情況也是如此 - 單指令操作保證是原子的。我不知道多處理器編程的細節。我可以爲任何結果提供一個案例。 (如果任何人有一些明確的信息,請隨時在附和。)

1

這是因爲它在英特爾主板會談後目前停留在代碼的狀態,但仍然沒有得到徹底驗證工作正確地在所有情況下。

#include <tbb/atomic.h> 
    typedef unsigned int  uint_32; 
    typedef __TBB_LONG_LONG  uint_64; 

    template<typename FLOATING_POINT,typename MEMORY_BLOCK> 
    struct atomic_float_ 
    { 
    /* CRC Card ----------------------------------------------------- 
    | Class:   atmomic float template class 
    | 
    | Responsability: handle integral atomic memory as it were a float, 
    |     but partially bypassing FPU, SSE/MMX, so it is 
    |     slower than a true float, but faster and smaller 
    |     than a locked float. 
    |      *Warning* If your float usage is thwarted by 
    |     the A-B-A problem this class isn't for you 
    |      *Warning* Atomic specification says we return, 
    |     values not l-values. So (i = j) = k doesn't work. 
    | 
    | Collaborators: intel's tbb::atomic handles memory atomicity 
    ----------------------------------------------------------------*/ 
    typedef typename atomic_float_<FLOATING_POINT,MEMORY_BLOCK> self_t; 

    tbb::atomic<MEMORY_BLOCK> atomic_value_; 

    template<memory_semantics M> 
    FLOATING_POINT fetch_and_store(FLOATING_POINT value) 
    { 
     const MEMORY_BLOCK value_ = 
      atomic_value_.tbb::atomic<MEMORY_BLOCK>::fetch_and_store<M>((MEMORY_BLOCK&)value); 
     //atomic specification requires returning old value, not new one 
     return reinterpret_cast<const FLOATING_POINT&>(value_); 
    } 

    FLOATING_POINT fetch_and_store(FLOATING_POINT value) 
    { 
     const MEMORY_BLOCK value_ = 
      atomic_value_.tbb::atomic<MEMORY_BLOCK>::fetch_and_store((MEMORY_BLOCK&)value); 
     //atomic specification requires returning old value, not new one 
     return reinterpret_cast<const FLOATING_POINT&>(value_); 
    } 

    template<memory_semantics M> 
    FLOATING_POINT compare_and_swap(FLOATING_POINT value, FLOATING_POINT comparand) 
    { 
     const MEMORY_BLOCK value_ = 
      atomic_value_.tbb::atomic<MEMORY_BLOCK>::compare_and_swap<M>((MEMORY_BLOCK&)value,(MEMORY_BLOCK&)compare); 
     //atomic specification requires returning old value, not new one 
     return reinterpret_cast<const FLOATING_POINT&>(value_); 
    } 

    FLOATING_POINT compare_and_swap(FLOATING_POINT value, FLOATING_POINT compare) 
    { 
     const MEMORY_BLOCK value_ = 
      atomic_value_.tbb::atomic<MEMORY_BLOCK>::compare_and_swap((MEMORY_BLOCK&)value,(MEMORY_BLOCK&)compare); 
     //atomic specification requires returning old value, not new one 
     return reinterpret_cast<const FLOATING_POINT&>(value_); 
    } 

    operator FLOATING_POINT() const volatile // volatile qualifier here for backwards compatibility 
    { 
     const MEMORY_BLOCK value_ = atomic_value_; 
     return reinterpret_cast<const FLOATING_POINT&>(value_); 
    } 

    //Note: atomic specification says we return the a copy of the base value not an l-value 
    FLOATING_POINT operator=(FLOATING_POINT rhs) 
    { 
     const MEMORY_BLOCK value_ = atomic_value_.tbb::atomic<MEMORY_BLOCK>::operator =((MEMORY_BLOCK&)rhs); 
     return reinterpret_cast<const FLOATING_POINT&>(value_); 
    } 

    //Note: atomic specification says we return an l-value when operating among atomics 
    self_t& operator=(self_t& rhs) 
    { 
     const MEMORY_BLOCK value_ = atomic_value_.tbb::atomic<MEMORY_BLOCK>::operator =((MEMORY_BLOCK&)rhs); 
     return *this; 
    } 

    FLOATING_POINT& _internal_reference() const 
    { 
     return reinterpret_cast<FLOATING_POINT&>(atomic_value_.tbb::atomic<MEMORY_BLOCK>::_internal_reference()); 
    } 

    FLOATING_POINT operator+=(FLOATING_POINT value) 
    { 
     FLOATING_POINT old_value_, new_value_; 
     do 
     { 
      old_value_ = reinterpret_cast<FLOATING_POINT&>(atomic_value_); 
      new_value_ = old_value_ + value; 
     //floating point binary representation is not an issue because 
     //we are using our self's compare and swap, thus comparing floats and floats 
     } while(self_t::compare_and_swap(new_value_,old_value_) != old_value_); 
     return (new_value_); //return resulting value 
    } 

    FLOATING_POINT operator*=(FLOATING_POINT value) 
    { 
     FLOATING_POINT old_value_, new_value_; 
     do 
     { 
      old_value_ = reinterpret_cast<FLOATING_POINT&>(atomic_value_); 
      new_value_ = old_value_ * value; 
     //floating point binary representation is not an issue becaus 
     //we are using our self's compare and swap, thus comparing floats and floats 
     } while(self_t::compare_and_swap(new_value_,old_value_) != old_value_); 
     return (new_value_); //return resulting value 
    } 

    FLOATING_POINT operator/=(FLOATING_POINT value) 
    { 
     FLOATING_POINT old_value_, new_value_; 
     do 
     { 
      old_value_ = reinterpret_cast<FLOATING_POINT&>(atomic_value_); 
      new_value_ = old_value_/value; 
     //floating point binary representation is not an issue because 
     //we are using our self's compare and swap, thus comparing floats and floats 
     } while(self_t::compare_and_swap(new_value_,old_value_) != old_value_); 
     return (new_value_); //return resulting value 
    } 

    FLOATING_POINT operator-=(FLOATING_POINT value) 
    { 
     return this->operator+=(-value); //return resulting value 
    } 

    //Prefix operator 
    FLOATING_POINT operator++() 
    { 
     return this->operator+=(1); //return resulting value 
    } 

    //Prefix operator 
    FLOATING_POINT operator--() 
    { 
     return this->operator+=(-1); //return resulting value 
    } 

    //Postfix operator 
    FLOATING_POINT operator++(int) 
    { 
     const FLOATING_POINT temp = this; 
     this->operator+=(1); 
     return temp//return resulting value 
    } 

    //Postfix operator 
    FLOATING_POINT operator--(int) 
    { 
     const FLOATING_POINT temp = this; 
     this->operator+=(1); 
     return temp//return resulting value 
    } 

    FLOATING_POINT fetch_and_add(FLOATING_POINT addend) 
    { 
     const FLOATING_POINT old_value_ = atomic_value_; 
     this->operator+=(addend); 
     //atomic specification requires returning old value, not new one as in operator x= 
     return old_value_; 
    } 

    FLOATING_POINT fetch_and_increment() 
    { 
     const FLOATING_POINT old_value_ = atomic_value_; 
     this->operator+=(+1); 
     //atomic specification requires returning old value, not new one as in operator x= 
     return old_value_; 
    } 

    FLOATING_POINT fetch_and_decrement() 
    { 
     const FLOATING_POINT old_value_ = atomic_value_; 
     this->operator+=(-1); 
     //atomic specification requires returning old value, not new one as in operator x= 
     return old_value_; 
    } 
    }; 

    typedef atomic_float_<float,uint_32> AtomicFloat; 
    typedef atomic_float_<double,uint_64> AtomicDouble; 
1

雖然uint32_t的的大小可能相當於一個給定拱浮動的,通過重新解釋從一個鑄造成其他的你都隱含假設原子遞增,遞減和所有對比特的其他操作在語義上等同於兩種類型,這在實際中並不存在。我懷疑它按預期工作。

1

我強烈懷疑,你在fetch_and_add等正確的價值觀,除了浮動是從int除了不同。

這是我從這些算術獲得:

1 + 1 = 1.70141e+038 
100 + 1 = -1.46937e-037 
100 + 0.01 = 1.56743e+038 
23 + 42 = -1.31655e-036 

所以是的,線程而不是你所期望的。

無鎖算法(運營商+等)應有關原子工作(沒有檢查算法本身..)


其他解決方案: 因爲它是所有的加法和減法,您可能能夠爲每個線程提供自己的實例,然後添加來自多個線程的結果。

1

只是這個說明(我想作出評論,但顯然新用戶不允許評論):上引用使用的reinterpret_cast產生不正確的代碼使用GCC 4.1 -O3。這似乎是在4.4中修復的,因爲它有效。將reinterpret_casts更改爲指針雖然略微醜陋,但適用於這兩種情況。