2

我試圖使用dlib來使用默認數據集(/dlib-19.0/examples/faces/training_with_face_landmarks.xml)和默認訓練樣本(train_shape_predictor_ex.cpp)訓練形狀預測器。使用默認數據集和訓練的形狀預測器的準確性較低

所以我想訓練形狀的預測,這將是完全一樣的默認形狀預測(shape_predictor_68_face_landmarks.dat),因爲我用同樣的數據集和相同的訓練碼。但我得到一些問題。

訓練結束後,我得到我的.dat文件與16.6mb(但默認的dlib預測器shape_predictor_68_face_landmarks.dat有99.7mb)。 在測試我的.dat文件(16.6mb)後,我得到的準確度較低,但在測試默認.dat文件(shape_predictor_68_face_landmarks.dat,16.6mb)之後,我獲得了高精度。

我的形狀預測: My shape predictor shape_predictor_68_face_landmarks.datshape_predictor_68_face_landmarks.dat

培訓:

#include <QCoreApplication> 

#include <dlib/image_processing.h> 
#include <dlib/data_io.h> 
#include <iostream> 

using namespace dlib; 
using namespace std; 

std::vector<std::vector<double> > get_interocular_distances (
     const std::vector<std::vector<full_object_detection> >& objects 
     ); 

int main(int argc, char *argv[]) 
{ 
    QCoreApplication a(argc, argv); 

    try 
    { 

     const std::string faces_directory = "/home/user/Documents/dlib-19.0/examples/faces/"; 

     dlib::array<array2d<unsigned char> > images_train; 
     std::vector<std::vector<full_object_detection> > faces_train; 

     load_image_dataset(images_train, faces_train, faces_directory+"training_with_face_landmarks.xml"); 

     shape_predictor_trainer trainer; 

     trainer.set_oversampling_amount(300); 

     trainer.set_nu(0.05); 
     trainer.set_tree_depth(2); 

     trainer.be_verbose(); 

     shape_predictor sp = trainer.train(images_train, faces_train); 
     cout << "mean training error: "<< 
       test_shape_predictor(sp, images_train, faces_train, get_interocular_distances(faces_train)) << endl; 

     serialize(faces_directory+"sp_default_settings.dat") << sp; 
    } 
    catch (exception& e) 
    { 
     cout << "\nexception thrown!" << endl; 
     cout << e.what() << endl; 
    } 

    return a.exec(); 
} 

double interocular_distance (
     const full_object_detection& det 
     ) 
{ 
    dlib::vector<double,2> l, r; 
    double cnt = 0; 
    // Find the center of the left eye by averaging the points around 
    // the eye. 
    for (unsigned long i = 36; i <= 41; ++i) 
    { 
     l += det.part(i); 
     ++cnt; 
    } 
    l /= cnt; 

    // Find the center of the right eye by averaging the points around 
    // the eye. 
    cnt = 0; 
    for (unsigned long i = 42; i <= 47; ++i) 
    { 
     r += det.part(i); 
     ++cnt; 
    } 
    r /= cnt; 

    // Now return the distance between the centers of the eyes 
    return length(l-r); 
} 

std::vector<std::vector<double> > get_interocular_distances (
     const std::vector<std::vector<full_object_detection> >& objects 
     ) 
{ 
    std::vector<std::vector<double> > temp(objects.size()); 
    for (unsigned long i = 0; i < objects.size(); ++i) 
    { 
     for (unsigned long j = 0; j < objects[i].size(); ++j) 
     { 
      temp[i].push_back(interocular_distance(objects[i][j])); 
     } 
    } 
    return temp; 
} 

測試:

#include <QCoreApplication> 
#include <dlib/image_processing/frontal_face_detector.h> 
#include <dlib/image_processing/render_face_detections.h> 
#include <dlib/image_processing.h> 
#include <dlib/gui_widgets.h> 
#include <dlib/image_io.h> 
#include <dlib/data_io.h> 
#include <iostream> 

using namespace dlib; 
using namespace std; 

int main(int argc, char *argv[]) 
{ 
    QCoreApplication a(argc, argv); 

    try 
     { 

      // We need a face detector. We will use this to get bounding boxes for 
      // each face in an image. 
      frontal_face_detector detector = get_frontal_face_detector(); 
      // And we also need a shape_predictor. This is the tool that will predict face 
      // landmark positions given an image and face bounding box. Here we are just 
      // loading the model from the shape_predictor_68_face_landmarks.dat file you gave 
      // as a command line argument. 
      shape_predictor sp; 
      deserialize("/home/user/Downloads/muct-master/samples/sp_default_settings.dat") >> sp; 

      string srcDir = "/home/user/Downloads/muct-master/samples/selection/"; 
      string dstDir = "/home/user/Downloads/muct-master/samples/my_results_default/"; 

      std::vector<string> vecOfImg; 

      vecOfImg.push_back("i001qa-mn.jpg"); 
      vecOfImg.push_back("i002ra-mn.jpg"); 
      vecOfImg.push_back("i003ra-fn.jpg"); 
      vecOfImg.push_back("i003sa-fn.jpg"); 
      vecOfImg.push_back("i004qa-mn.jpg"); 
      vecOfImg.push_back("i004ra-mn.jpg"); 
      vecOfImg.push_back("i005ra-fn.jpg"); 
      vecOfImg.push_back("i006ra-mn.jpg"); 
      vecOfImg.push_back("i007qa-fn.jpg"); 
      vecOfImg.push_back("i008ra-mn.jpg"); 
      vecOfImg.push_back("i009qa-mn.jpg"); 
      vecOfImg.push_back("i009ra-mn.jpg"); 
      vecOfImg.push_back("i009sa-mn.jpg"); 
      vecOfImg.push_back("i010qa-mn.jpg"); 
      vecOfImg.push_back("i010sa-mn.jpg"); 
      vecOfImg.push_back("i011qa-mn.jpg"); 
      vecOfImg.push_back("i011ra-mn.jpg"); 
      vecOfImg.push_back("i012ra-mn.jpg"); 
      vecOfImg.push_back("i012sa-mn.jpg"); 
      vecOfImg.push_back("i014qa-fn.jpg"); 

      for(int imgC = 0; imgC < vecOfImg.size(); imgC++){ 

       array2d<rgb_pixel> img; 
       load_image(img, srcDir + vecOfImg.at(imgC)); 
       // Make the image larger so we can detect small faces. 
       pyramid_up(img); 

       // Now tell the face detector to give us a list of bounding boxes 
       // around all the faces in the image. 
       std::vector<rectangle> dets = detector(img); 
       cout << "Number of faces detected: " << dets.size() << endl; 

       // Now we will go ask the shape_predictor to tell us the pose of 
       // each face we detected. 
       std::vector<full_object_detection> shapes; 
       for (unsigned long j = 0; j < dets.size(); ++j) 
       { 
        full_object_detection shape = sp(img, dets[j]); 
        cout << "number of parts: "<< shape.num_parts() << endl; 
        cout << "pixel position of first part: " << shape.part(0) << endl; 
        cout << "pixel position of second part: " << shape.part(1) << endl; 

        for(unsigned long i = 0; i < shape.num_parts(); i++){ 
         draw_solid_circle(img, shape.part(i), 2, rgb_pixel(100,255,100)); 
        } 

        save_jpeg(img, dstDir + vecOfImg.at(imgC)); 
        // You get the idea, you can get all the face part locations if 
        // you want them. Here we just store them in shapes so we can 
        // put them on the screen. 
        shapes.push_back(shape); 
       } 

      } 

     } 
     catch (exception& e) 
     { 
      cout << "\nexception thrown!" << endl; 
      cout << e.what() << endl; 
     } 
    return a.exec(); 
} 

是什麼違約和我的訓練和測試,如果我使用之間的區別默認數據集和示例萊?我如何將shape形狀預測器訓練成shape_predictor_68_face_landmarks.dat?

+1

即使你在sourceforge頁面上提出了一個問題(但沒有得到答案),那裏仍然有很多信息,很確定這個問題已經被討論過:) –

回答

0

它正在生成一個16.6MB的DAT文件,因爲您要麼使用幾個圖像來訓練,要麼不使用正確的設置。

根據this Github issue,您在列車過程中沒有使用最佳/默認設置。

在您的設置中,培訓師具有非常高的過採樣量(300),默認值爲20. 您還在通過增加正則化(使nu參數變小)和通過使用較小的樹來減少模型的容量深處。

您的nu參數:0.05。默認值是0.1

你的樹深度:2,默認值爲4

通過改變通過試驗和錯誤的PARAMS和訓練,你會發現更小的文件大小的最佳精度。

請記住,每個培訓過程大約需要45分鐘,而且您至少需要一臺16GB的RAM計算機。

1

示例數據集(/dlib-19.0/examples/faces/training_with_face_landmarks.xml)太小而無法訓練高質量模型。這不是dlib自帶的模型訓練出來的。

這些示例使用小數據集來使示例運行得更快。所有例子的要點是解釋dlib API,而不是有用的程序。他們只是文件。這取決於你使用dlib API做些有趣的事情。

相關問題