2012-07-29 71 views
0

我想使用多維數據集選項但出現錯誤。我的代碼如下: 。Android Ocr(在Tesseract中使用多維數據集選項,應用程序崩潰)

package com.datumdroid.android.ocr.simple; 

import java.io.File; 
import java.io.FileInputStream; 
import java.io.FileOutputStream; 
import java.io.IOException; 
import java.io.InputStream; 
import java.io.OutputStream; 
import java.util.zip.GZIPInputStream; 

import android.app.Activity; 
import android.content.Intent; 
import android.content.res.AssetManager; 
import android.graphics.Bitmap; 
import android.graphics.BitmapFactory; 
import android.graphics.Matrix; 
import android.media.ExifInterface; 
import android.net.Uri; 
import android.os.Bundle; 
import android.os.Environment; 
import android.provider.MediaStore; 
import android.util.Log; 
import android.view.View; 
import android.widget.Button; 
import android.widget.EditText; 

import com.googlecode.tesseract.android.TessBaseAPI; 

public class SimpleAndroidOCRActivity extends Activity { 
    public static final String PACKAGE_NAME = "com.datumdroid.android.ocr.simple"; 
    public static final String DATA_PATH = Environment 
      .getExternalStorageDirectory().toString() + "/SimpleAndroidOCR/"; 

    // You should have the trained data file in assets folder 
    // You can get them at: 
    // http://code.google.com/p/tesseract-ocr/downloads/list 
    public static final String lang = "eng"; 

    private static final String TAG = "SimpleAndroidOCR.java"; 

    protected Button _button; 
    // protected ImageView _image; 
    protected EditText _field; 
    protected String _path; 
    protected boolean _taken; 

    protected static final String PHOTO_TAKEN = "photo_taken"; 

    @Override 
    public void onCreate(Bundle savedInstanceState) { 

     super.onCreate(savedInstanceState); 

     setContentView(R.layout.main); 

     String[] paths = new String[] { DATA_PATH, DATA_PATH + "tessdata/" }; 

     for (String path : paths) { 
      File dir = new File(path); 
      if (!dir.exists()) { 
       if (!dir.mkdirs()) { 
        Log.v(TAG, "ERROR: Creation of directory " + path + " on sdcard failed"); 
        return; 
       } else { 
        Log.v(TAG, "Created directory " + path + " on sdcard"); 
       } 
      } 

     } 

     // lang.traineddata file with the app (in assets folder) 
       // You can get them at: 
       // http://code.google.com/p/tesseract-ocr/downloads/list 
       // This area needs work and optimization 
       if (!(new File(DATA_PATH + "tessdata/" + lang + ".traineddata")).exists()) { 
        try { 

         AssetManager assetManager = getAssets(); 
         InputStream in = assetManager.open("tessdata/eng.traineddata"); 
         //GZIPInputStream gin = new GZIPInputStream(in); 
         OutputStream out = new FileOutputStream(DATA_PATH 
           + "tessdata/eng.traineddata"); 

         // Transfer bytes from in to out 
         byte[] buf = new byte[1024]; 
         int len; 
         //while ((lenf = gin.read(buff)) > 0) { 
         while ((len = in.read(buf)) > 0) { 
          out.write(buf, 0, len); 
         } 
         in.close(); 
         //gin.close(); 
         out.close(); 

         Log.v(TAG, "Copied " + lang + " traineddata"); 
        } catch (IOException e) { 
         Log.e(TAG, "Was unable to copy " + lang + " traineddata " + e.toString()); 
        } 
       } 


       if (!(new File(DATA_PATH + "tessdata/ara.cube.bigrams")).exists()) { 
        try { 

         AssetManager assetManager = getAssets(); 
         InputStream in = assetManager.open("tessdata/ara.cube.bigrams"); 
         //GZIPInputStream gin = new GZIPInputStream(in); 
         OutputStream out = new FileOutputStream(DATA_PATH 
           + "tessdata/ara.cube.bigrams"); 

         // Transfer bytes from in to out 
         byte[] buf = new byte[1024]; 
         int len; 
         //while ((lenf = gin.read(buff)) > 0) { 
         while ((len = in.read(buf)) > 0) { 
          out.write(buf, 0, len); 
         } 
         in.close(); 
         //gin.close(); 
         out.close(); 

         Log.v(TAG, "Copied ara.cube.bigrams"); 
        } catch (IOException e) { 
         Log.e(TAG, "Was unable to copy ara.cube.bigrams " + e.toString()); 
        } 
       } 


       if (!(new File(DATA_PATH + "tessdata/ara.cube.fold")).exists()) { 
        try { 

         AssetManager assetManager = getAssets(); 
         InputStream in = assetManager.open("tessdata/ara.cube.fold"); 
         //GZIPInputStream gin = new GZIPInputStream(in); 
         OutputStream out = new FileOutputStream(DATA_PATH 
           + "tessdata/ara.cube.fold"); 

         // Transfer bytes from in to out 
         byte[] buf = new byte[1024]; 
         int len; 
         //while ((lenf = gin.read(buff)) > 0) { 
         while ((len = in.read(buf)) > 0) { 
          out.write(buf, 0, len); 
         } 
         in.close(); 
         //gin.close(); 
         out.close(); 

         Log.v(TAG, "Copied ara.cube.fold"); 
        } catch (IOException e) { 
         Log.e(TAG, "Was unable to copy ara.cube.fold " + e.toString()); 
        } 
       } 


       if (!(new File(DATA_PATH + "tessdata/ara.cube.lm")).exists()) { 
        try { 

         AssetManager assetManager = getAssets(); 
         InputStream in = assetManager.open("tessdata/ara.cube.lm"); 
         //GZIPInputStream gin = new GZIPInputStream(in); 
         OutputStream out = new FileOutputStream(DATA_PATH 
           + "tessdata/ara.cube.lm"); 

         // Transfer bytes from in to out 
         byte[] buf = new byte[1024]; 
         int len; 
         //while ((lenf = gin.read(buff)) > 0) { 
         while ((len = in.read(buf)) > 0) { 
          out.write(buf, 0, len); 
         } 
         in.close(); 
         //gin.close(); 
         out.close(); 

         Log.v(TAG, "Copied ara.cube.lm"); 
        } catch (IOException e) { 
         Log.e(TAG, "Was unable to copy ara.cube.lm " + e.toString()); 
        } 
       } 

       if (!(new File(DATA_PATH + "tessdata/ara.cube.nn")).exists()) { 
        try { 

         AssetManager assetManager = getAssets(); 
         InputStream in = assetManager.open("tessdata/ara.cube.nn"); 
         //GZIPInputStream gin = new GZIPInputStream(in); 
         OutputStream out = new FileOutputStream(DATA_PATH 
           + "tessdata/ara.cube.nn"); 

         // Transfer bytes from in to out 
         byte[] buf = new byte[1024]; 
         int len; 
         //while ((lenf = gin.read(buff)) > 0) { 
         while ((len = in.read(buf)) > 0) { 
          out.write(buf, 0, len); 
         } 
         in.close(); 
         //gin.close(); 
         out.close(); 

         Log.v(TAG, "Copied ara.cube.nn"); 
        } catch (IOException e) { 
         Log.e(TAG, "Was unable to copy ara.cube.nn " + e.toString()); 
        } 
       } 

       if (!(new File(DATA_PATH + "tessdata/ara.cube.params")).exists()) { 
        try { 

         AssetManager assetManager = getAssets(); 
         InputStream in = assetManager.open("tessdata/ara.cube.params"); 
         //GZIPInputStream gin = new GZIPInputStream(in); 
         OutputStream out = new FileOutputStream(DATA_PATH 
           + "tessdata/ara.cube.params"); 

         // Transfer bytes from in to out 
         byte[] buf = new byte[1024]; 
         int len; 
         //while ((lenf = gin.read(buff)) > 0) { 
         while ((len = in.read(buf)) > 0) { 
          out.write(buf, 0, len); 
         } 
         in.close(); 
         //gin.close(); 
         out.close(); 

         Log.v(TAG, "Copied ara.cube.params"); 
        } catch (IOException e) { 
         Log.e(TAG, "Was unable to copy ara.cube.params " + e.toString()); 
        } 
       } 

       if (!(new File(DATA_PATH + "tessdata/ara.cube.size")).exists()) { 
        try { 

         AssetManager assetManager = getAssets(); 
         InputStream in = assetManager.open("tessdata/ara.cube.size"); 
         //GZIPInputStream gin = new GZIPInputStream(in); 
         OutputStream out = new FileOutputStream(DATA_PATH 
           + "tessdata/ara.cube.size"); 

         // Transfer bytes from in to out 
         byte[] buf = new byte[1024]; 
         int len; 
         //while ((lenf = gin.read(buff)) > 0) { 
         while ((len = in.read(buf)) > 0) { 
          out.write(buf, 0, len); 
         } 
         in.close(); 
         //gin.close(); 
         out.close(); 

         Log.v(TAG, "Copied ara.cube.size"); 
        } catch (IOException e) { 
         Log.e(TAG, "Was unable to copy ara.cube.size " + e.toString()); 
        } 
       } 


       if (!(new File(DATA_PATH + "tessdata/ara.cube.word-freq")).exists()) { 
        try { 

         AssetManager assetManager = getAssets(); 
         InputStream in = assetManager.open("tessdata/ara.cube.word-freq"); 
         //GZIPInputStream gin = new GZIPInputStream(in); 
         OutputStream out = new FileOutputStream(DATA_PATH 
           + "tessdata/ara.cube.word-freq"); 

         // Transfer bytes from in to out 
         byte[] buf = new byte[1024]; 
         int len; 
         //while ((lenf = gin.read(buff)) > 0) { 
         while ((len = in.read(buf)) > 0) { 
          out.write(buf, 0, len); 
         } 
         in.close(); 
         //gin.close(); 
         out.close(); 

         Log.v(TAG, "Copied ara.cube.word-freq"); 
        } catch (IOException e) { 
         Log.e(TAG, "Was unable to copy ara.cube.word-freq " + e.toString()); 
        } 
       } 

       if (!(new File(DATA_PATH + "tessdata/ara.traineddata")).exists()) { 
        try { 

         AssetManager assetManager = getAssets(); 
         InputStream in = assetManager.open("tessdata/ara.traineddata"); 
         //GZIPInputStream gin = new GZIPInputStream(in); 
         OutputStream out = new FileOutputStream(DATA_PATH 
           + "tessdata/ara.traineddata"); 

         // Transfer bytes from in to out 
         byte[] buf = new byte[1024]; 
         int len; 
         //while ((lenf = gin.read(buff)) > 0) { 
         while ((len = in.read(buf)) > 0) { 
          out.write(buf, 0, len); 
         } 
         in.close(); 
         //gin.close(); 
         out.close(); 

         Log.v(TAG, "Copied ara.traineddata"); 
        } catch (IOException e) { 
         Log.e(TAG, "Was unable to copy ara.traineddata " + e.toString()); 
        } 
       } 


     // _image = (ImageView) findViewById(R.id.image); 
     _field = (EditText) findViewById(R.id.field); 
     _button = (Button) findViewById(R.id.button); 
     _button.setOnClickListener(new ButtonClickHandler()); 

     _path = DATA_PATH + "/ocr.jpg"; 
    } 

    public class ButtonClickHandler implements View.OnClickListener { 
     public void onClick(View view) { 
      Log.v(TAG, "Starting Camera app"); 
      startCameraActivity(); 
     } 
    } 

    // Simple android photo capture: 
    // http://labs.makemachine.net/2010/03/simple-android-photo-capture/ 

    protected void startCameraActivity() { 
     File file = new File(_path); 
     Uri outputFileUri = Uri.fromFile(file); 

     final Intent intent = new Intent(MediaStore.ACTION_IMAGE_CAPTURE); 
     intent.putExtra(MediaStore.EXTRA_OUTPUT, outputFileUri); 

     startActivityForResult(intent, 0); 
    } 

    @Override 
    protected void onActivityResult(int requestCode, int resultCode, Intent data) { 

     Log.i(TAG, "resultCode: " + resultCode); 

     if (resultCode == -1) { 
      onPhotoTaken(); 
     } else { 
      Log.v(TAG, "User cancelled"); 
     } 
    } 

    @Override 
    protected void onSaveInstanceState(Bundle outState) { 
     outState.putBoolean(SimpleAndroidOCRActivity.PHOTO_TAKEN, _taken); 
    } 

    @Override 
    protected void onRestoreInstanceState(Bundle savedInstanceState) { 
     Log.i(TAG, "onRestoreInstanceState()"); 
     if (savedInstanceState.getBoolean(SimpleAndroidOCRActivity.PHOTO_TAKEN)) { 
      onPhotoTaken(); 
     } 
    } 

    protected void onPhotoTaken() { 
     _taken = true; 

     BitmapFactory.Options options = new BitmapFactory.Options(); 
     options.inSampleSize = 4; 

     Bitmap bitmap = BitmapFactory.decodeFile(_path, options); 

     try { 
      ExifInterface exif = new ExifInterface(_path); 
      int exifOrientation = exif.getAttributeInt(
        ExifInterface.TAG_ORIENTATION, 
        ExifInterface.ORIENTATION_NORMAL); 

      Log.v(TAG, "Orient: " + exifOrientation); 

      int rotate = 0; 

      switch (exifOrientation) { 
      case ExifInterface.ORIENTATION_ROTATE_90: 
       rotate = 90; 
       break; 
      case ExifInterface.ORIENTATION_ROTATE_180: 
       rotate = 180; 
       break; 
      case ExifInterface.ORIENTATION_ROTATE_270: 
       rotate = 270; 
       break; 
      } 

      Log.v(TAG, "Rotation: " + rotate); 

      if (rotate != 0) { 

       // Getting width & height of the given image. 
       int w = bitmap.getWidth(); 
       int h = bitmap.getHeight(); 

       // Setting pre rotate 
       Matrix mtx = new Matrix(); 
       mtx.preRotate(rotate); 

       // Rotating Bitmap 
       bitmap = Bitmap.createBitmap(bitmap, 0, 0, w, h, mtx, false); 
      } 

      // Convert to ARGB_8888, required by tess 
      bitmap = bitmap.copy(Bitmap.Config.ARGB_8888, true); 

     } catch (IOException e) { 
      Log.e(TAG, "Couldn't correct orientation: " + e.toString()); 
     } 

     // _image.setImageBitmap(bitmap); 

     Log.v(TAG, "Before baseApi"); 

     TessBaseAPI baseApi = new TessBaseAPI(); 
     baseApi.setDebug(true); 
     baseApi.init(DATA_PATH, lang, TessBaseAPI.OEM_CUBE_ONLY); 
     baseApi.setImage(bitmap); 

     String recognizedText = baseApi.getUTF8Text(); 

     baseApi.end(); 

     // You now have the text in recognizedText var, you can do anything with it. 
     // We will display a stripped out trimmed alpha-numeric version of it (if lang is eng) 
     // so that garbage doesn't make it to the display. 

     Log.v(TAG, "OCRED TEXT: " + recognizedText); 

     if (lang.equalsIgnoreCase("eng")) { 
      recognizedText = recognizedText.replaceAll("[^a-zA-Z0-9]+", " "); 
     } 

     recognizedText = recognizedText.trim(); 

     if (recognizedText.length() != 0) { 
      _field.setText(_field.getText().toString().length() == 0 ? recognizedText : _field.getText() + " " + recognizedText); 
      _field.setSelection(_field.getText().toString().length()); 
     } 

     // Cycle done. 
    } 

    // www.Gaut.am was here 
    // Thanks for reading! 
} 

我的logcat如下

07-29 14:04:03.028: I/Choreographer(601): Skipped 36 frames! The application may be doing too much work on its main thread. 
07-29 14:04:03.355: D/dalvikvm(601): Added shared lib /data/data/com.project.ocrutilityv4/lib/liblept.so 0x411e7cb8 
07-29 14:04:03.355: D/dalvikvm(601): Trying to load lib /data/data/com.project.ocrutilityv4/lib/libtess.so 0x411e7cb8 
07-29 14:04:04.735: D/dalvikvm(601): Added shared lib /data/data/com.project.ocrutilityv4/lib/libtess.so 0x411e7cb8 
07-29 14:04:06.595: A/libc(601): Fatal signal 11 (SIGSEGV) at 0x00000000 (code=1), thread 719 (AsyncTask #1) 
07-29 14:04:10.515: I/Choreographer(601): Skipped 948 frames! The application may be doing too much work on its main thread. 
07-29 14:04:12.666: E/Trace(728): error opening trace file: No such file or directory (2) 
07-29 14:04:13.555: W/Resources(728): Converting to int: TypedValue{t=0x3/d=0x35 "1900" a=2 r=0x7f06000f} 
07-29 14:04:13.555: W/Resources(728): Converting to int: TypedValue{t=0x3/d=0x35 "1900" a=2 r=0x7f06000f} 
07-29 14:04:22.065: D/dalvikvm(728): GC_CONCURRENT freed 123K, 3% free 8331K/8519K, paused 80ms+95ms, total 352ms 
07-29 14:04:22.886: I/Choreographer(728): Skipped 32 frames! The application may be doing too much work on its main thread. 
07-29 14:04:22.945: D/gralloc_goldfish(728): Emulator without GPU emulation detected. 

我添加以下tessdata文件夾中但它不是我的幫助。:(

ara.cube.bigrams 
ara.cube.fold 
ara.cube.lm 
ara.cube.nn 
ara.cube.params 
ara.cube.size 
ara.cube.word-freq 
ara.traineddata 

沒有ocrEngineMode = TessBaseAPI.OEM_TESSERACT_CUBE_COMBINED我的應用程序運行正常。

+0

我已經讀到的談話,但是這不是對我有幫助。:( – Sham 2012-07-31 11:45:28

回答

0

我在我的代碼中發現了錯誤,如下所示ING。

public static final String lang =「ara」; //(非)public static final String lang =「eng」;

然後將以下文件添加到assets/tessdata文件夾後,上述代碼完美運行。

ara.cube.bigrams 
ara.cube.fold 
ara.cube.lm 
ara.cube.nn 
ara.cube.params 
ara.cube.size 
ara.cube.word-freq 
ara.traineddata 

感謝所有誰的回答我,幫助我。:)

+0

我也更新我上面的代碼...如果你刪除上面提到的錯誤然後代碼將完美運行。:) – Sham 2012-07-31 22:47:43

+0

感謝您的指導線... :) – Sham 2012-08-01 10:39:46

+0

這讓我瘋狂。謝謝您的回答:)!不幸的是,當拍攝一個字的時候,我會聽到胡言亂語。例如,如果我拍攝البيت的照片,我得到的結果有很多行和許多與البيت無關的奇怪單詞。 這是否與你一起發生? – 2015-09-10 13:53:31