2016-11-15 62 views
2

我想禁用咖啡中某些卷積圖層的反向計算,我該怎麼做?
我已經使用propagate_down設置,但是找出它適用於fc層,但不適用於卷積層。如何防止咖啡中特定圖層的向後計算

請幫幫忙〜

第一次更新:我設置propagate_down:在測試/ pool_proj層假。我不希望它落後(但其他層落後)。但是從日誌文件來看,它表示該層仍然需要向後。我們表示一個深度學習模型,從輸入層到輸出層有兩條路徑,p1:A-> B-> C-> D,p2:A-> B-> C1- > D,A是輸入層,D是fc層,其他是conv層。當從D向後漸變到前一層時,p1與正常的漸變 - 後向過程沒有區別,但是對於p2,它在C1處停止(但是C1層的權重仍然更新,它不會向前一層返回它的錯誤)。

prototxt

layer { 
    name: "data" 
    type: "Data" 
    top: "data" 
    top: "label" 
    include { 
    phase: TRAIN 
    } 
    transform_param { 
    mirror: true 
    crop_size: 224 
    mean_value: 104 
    mean_value: 117 
    mean_value: 123 
    } 
    data_param { 
    source: "/media/eric/main/data/ImageNet/ilsvrc12_train_lmdb" 
    batch_size: 32 
    backend: LMDB 
    } 
} 
layer { 
    name: "data" 
    type: "Data" 
    top: "data" 
    top: "label" 
    include { 
    phase: TEST 
    } 
    transform_param { 
    mirror: false 
    crop_size: 224 
    mean_value: 104 
    mean_value: 117 
    mean_value: 123 
    } 
    data_param { 
    source: "/media/eric/main/data/ImageNet/ilsvrc12_val_lmdb" 
    batch_size: 50 
    backend: LMDB 
    } 
} 
layer { 
    name: "conv1/7x7_s2" 
    type: "Convolution" 
    bottom: "data" 
    top: "conv1/7x7_s2" 
    param { 
    lr_mult: 1 
    decay_mult: 1 
    } 
    param { 
    lr_mult: 2 
    decay_mult: 0 
    } 
    convolution_param { 
    num_output: 64 
    pad: 3 
    kernel_size: 7 
    stride: 2 
    weight_filler { 
     type: "xavier" 
    } 
    bias_filler { 
     type: "constant" 
     value: 0.2 
    } 
    } 
} 
layer { 
    name: "conv1/relu_7x7" 
    type: "ReLU" 
    bottom: "conv1/7x7_s2" 
    top: "conv1/7x7_s2" 
} 
layer { 
    name: "pool1/3x3_s2" 
    type: "Pooling" 
    bottom: "conv1/7x7_s2" 
    top: "pool1/3x3_s2" 
    pooling_param { 
    pool: MAX 
    kernel_size: 3 
    stride: 2 
    } 
} 
layer { 
    name: "pool1/norm1" 
    type: "LRN" 
    bottom: "pool1/3x3_s2" 
    top: "pool1/norm1" 
    lrn_param { 
    local_size: 5 
    alpha: 0.0001 
    beta: 0.75 
    } 
} 
layer { 
    name: "conv2/3x3_reduce" 
    type: "Convolution" 
    bottom: "pool1/norm1" 
    top: "conv2/3x3_reduce" 
    param { 
    lr_mult: 1 
    decay_mult: 1 
    } 
    param { 
    lr_mult: 2 
    decay_mult: 0 
    } 
    convolution_param { 
    num_output: 64 
    kernel_size: 1 
    weight_filler { 
     type: "xavier" 
    } 
    bias_filler { 
     type: "constant" 
     value: 0.2 
    } 
    } 
} 
layer { 
    name: "conv2/relu_3x3_reduce" 
    type: "ReLU" 
    bottom: "conv2/3x3_reduce" 
    top: "conv2/3x3_reduce" 
} 
layer { 
    name: "conv2/3x3" 
    type: "Convolution" 
    bottom: "conv2/3x3_reduce" 
    top: "conv2/3x3" 
    param { 
    lr_mult: 1 
    decay_mult: 1 
    } 
    param { 
    lr_mult: 2 
    decay_mult: 0 
    } 
    convolution_param { 
    num_output: 192 
    pad: 1 
    kernel_size: 3 
    weight_filler { 
     type: "xavier" 
    } 
    bias_filler { 
     type: "constant" 
     value: 0.2 
    } 
    } 
} 
layer { 
    name: "conv2/relu_3x3" 
    type: "ReLU" 
    bottom: "conv2/3x3" 
    top: "conv2/3x3" 
} 
layer { 
    name: "conv2/norm2" 
    type: "LRN" 
    bottom: "conv2/3x3" 
    top: "conv2/norm2" 
    lrn_param { 
    local_size: 5 
    alpha: 0.0001 
    beta: 0.75 
    } 
} 
layer { 
    name: "pool2/3x3_s2" 
    type: "Pooling" 
    bottom: "conv2/norm2" 
    top: "pool2/3x3_s2" 
    pooling_param { 
    pool: MAX 
    kernel_size: 3 
    stride: 2 
    } 
} 


layer { 
    name: "test/5x5_reduce" 
    type: "Convolution" 
    bottom: "pool2/3x3_s2" 
    top: "test/5x5_reduce" 
    param { 
    lr_mult: 1 
    decay_mult: 1 
    } 
    param { 
    lr_mult: 2 
    decay_mult: 0 
    } 
    convolution_param { 
    num_output: 16 
    kernel_size: 1 
    weight_filler { 
     type: "xavier" 
    } 
    bias_filler { 
     type: "constant" 
     value: 0.2 
    } 
    } 
} 
layer { 
    name: "test/relu_5x5_reduce" 
    type: "ReLU" 
    bottom: "test/5x5_reduce" 
    top: "test/5x5_reduce" 
} 
layer { 
    name: "test/5x5" 
    type: "Convolution" 
    bottom: "test/5x5_reduce" 
    top: "test/5x5" 
    param { 
    lr_mult: 1 
    decay_mult: 1 
    } 
    param { 
    lr_mult: 2 
    decay_mult: 0 
    } 
    convolution_param { 
    num_output: 32 
    pad: 2 
    kernel_size: 5 
    weight_filler { 
     type: "xavier" 
    } 
    bias_filler { 
     type: "constant" 
     value: 0.2 
    } 
    } 
} 
layer { 
    name: "test/relu_5x5" 
    type: "ReLU" 
    bottom: "test/5x5" 
    top: "test/5x5" 
} 
layer { 
    name: "test/pool" 
    type: "Pooling" 
    bottom: "pool2/3x3_s2" 
    top: "test/pool" 
    pooling_param { 
    pool: MAX 
    kernel_size: 3 
    stride: 1 
    pad: 1 
    } 
} 
layer { 
    name: "test/pool_proj" 
    type: "Convolution" 
    bottom: "test/pool" 
    top: "test/pool_proj" 
    propagate_down:false 
    param { 
    lr_mult: 1 
    decay_mult: 1 
    } 
    param { 
    lr_mult: 2 
    decay_mult: 0 
    } 
    convolution_param { 
    num_output: 32 
    kernel_size: 1 
    weight_filler { 
     type: "xavier" 
    } 
    bias_filler { 
     type: "constant" 
     value: 0.2 
    } 
    } 
} 
layer { 
    name: "test/relu_pool_proj" 
    type: "ReLU" 
    bottom: "test/pool_proj" 
    top: "test/pool_proj" 
} 
layer { 
    name: "test/output" 
    type: "Concat" 
    bottom: "test/5x5" 
    bottom: "test/pool_proj" 
    top: "test/output" 
} 

layer{ 
    name: "test_output/pool" 
    type: "Pooling" 
    bottom: "test/output" 
    top: "test/output" 
    pooling_param{ 
    pool: MAX 
    kernel_size: 28 
    } 
} 

layer { 
    name: "classifier" 
    type: "InnerProduct" 
    bottom: "test/output" 
    top: "classifier" 
    param { 
    lr_mult: 1 
    decay_mult: 1 
    } 
    param { 
    lr_mult: 2 
    decay_mult: 0 
    } 
    inner_product_param { 
    num_output: 1000 
    weight_filler { 
     type: "xavier" 
    } 
    bias_filler { 
     type: "constant" 
     value: 0 
    } 
    } 
} 

layer { 
    name: "loss3" 
    type: "SoftmaxWithLoss" 
    bottom: "classifier" 
    bottom: "label" 
    top: "loss3" 
    loss_weight: 1 
} 
layer { 
    name: "top-1" 
    type: "Accuracy" 
    bottom: "classifier" 
    bottom: "label" 
    top: "top-1" 
    include { 
    phase: TEST 
    } 
} 
layer { 
    name: "top-5" 
    type: "Accuracy" 
    bottom: "classifier" 
    bottom: "label" 
    top: "top-5" 
    include { 
    phase: TEST 
    } 
    accuracy_param { 
    top_k: 5 
    } 
} 

日誌

I1116 15:44:04.405261 19358 net.cpp:226] loss3 needs backward computation. 
I1116 15:44:04.405283 19358 net.cpp:226] classifier needs backward computation. 
I1116 15:44:04.405302 19358 net.cpp:226] test_output/pool needs backward computation. 
I1116 15:44:04.405320 19358 net.cpp:226] test/output needs backward computation. 
I1116 15:44:04.405339 19358 net.cpp:226] test/relu_pool_proj needs backward computation. 
I1116 15:44:04.405357 19358 net.cpp:226] test/pool_proj needs backward computation. 
I1116 15:44:04.405375 19358 net.cpp:228] test/pool does not need backward computation. 
I1116 15:44:04.405395 19358 net.cpp:226] test/relu_5x5 needs backward computation. 
I1116 15:44:04.405412 19358 net.cpp:226] test/5x5 needs backward computation. 
I1116 15:44:04.405431 19358 net.cpp:226] test/relu_5x5_reduce needs backward computation. 
I1116 15:44:04.405448 19358 net.cpp:226] test/5x5_reduce needs backward computation. 
I1116 15:44:04.405468 19358 net.cpp:226] pool2/3x3_s2_pool2/3x3_s2_0_split needs backward computation. 
I1116 15:44:04.405485 19358 net.cpp:226] pool2/3x3_s2 needs backward computation. 
I1116 15:44:04.405505 19358 net.cpp:226] conv2/norm2 needs backward computation. 
I1116 15:44:04.405522 19358 net.cpp:226] conv2/relu_3x3 needs backward computation. 
I1116 15:44:04.405542 19358 net.cpp:226] conv2/3x3 needs backward computation. 
I1116 15:44:04.405560 19358 net.cpp:226] conv2/relu_3x3_reduce needs backward computation. 
I1116 15:44:04.405578 19358 net.cpp:226] conv2/3x3_reduce needs backward computation. 
I1116 15:44:04.405596 19358 net.cpp:226] pool1/norm1 needs backward computation. 
I1116 15:44:04.405616 19358 net.cpp:226] pool1/3x3_s2 needs backward computation. 
I1116 15:44:04.405632 19358 net.cpp:226] conv1/relu_7x7 needs backward computation. 
I1116 15:44:04.405652 19358 net.cpp:226] conv1/7x7_s2 needs backward computation. 
I1116 15:44:04.405670 19358 net.cpp:228] data does not need backward computation. 
I1116 15:44:04.405705 19358 net.cpp:270] This network produces output loss3 
I1116 15:44:04.405745 19358 net.cpp:283] Network initialization done. 
+0

'propagate_down'應是防止梯度現在蔓延的方法。 (1)「不工作」是什麼意思? (2)您可以發佈有關conv層的prototxt部分嗎? (3)你可以發佈相關的['debug_info'日誌](http://stackoverflow.com/q/40510706/1714410)? – Shai

+0

感謝您的建議。我已經更新了這個問題。 – Lehyu

回答

1

從埃文Shelhamer(https://groups.google.com/forum/#!topic/caffe-users/54Z-B-CXmLE):

propagate_down旨在從損失沿一定路徑 關掉backprop雖然不完全先關閉 圖中的圖層。如果梯度通過另一條路徑傳播到一個圖層,或者沒有禁用諸如重量衰減之類的正則化,那麼這些圖層的參數 仍將被更新。我懷疑 這些圖層的衰減仍然存在,因此您可以將decay_mult:0設置爲權重和 偏差。

另一方面,設置lr_mult:0修復參數,並在不需要的地方跳過012-backprop。

您在某些早期圖層中有decay_mult: 1,所以仍然計算出梯度。在所有不需要權重更新的圖層中設置lr_mult: 0

例如,更改以下:

layer { 
    name: "conv1/7x7_s2" 
    type: "Convolution" 
    bottom: "data" 
    top: "conv1/7x7_s2" 
    param { 
    lr_mult: 1 
    decay_mult: 1 
    } 
    param { 
    lr_mult: 2 
    decay_mult: 0 
    } 
    convolution_param { 
    num_output: 64 
    pad: 3 
    kernel_size: 7 
    stride: 2 
    weight_filler { 
     type: "xavier" 
    } 
    bias_filler { 
     type: "constant" 
     value: 0.2 
    } 
    } 
} 

​​

另外供參考:

+0

謝謝你的回答。我們表示一個深度學習模型,從輸入層到輸出層有兩條路徑,p1:A-> B-> C-> D,p2:A-> B-> C1-> D,A是輸入層, D是fc層,其他是conv層。當從D向後漸變到前一層時,p1與正常的漸變 - 後向過程沒有區別,但是對於p2,它在C1處停止(但是C1層的權重仍然更新,它不會向前一層返回它的錯誤)。 – Lehyu