VoskAPI: ExpectToken():io-funcs.cc:200) Failed to read token [started at file position 0], expected <TransitionModel>

0

I have a custom trained model using kaldi for speech recognition of a few words. Now when I integerate the kaldi model in vosk using android studio its giving me this error. Please someone help me.

VoskAPI: ExpectToken():io-funcs.cc:200) Failed to read token [started at file position 0], expected Transition model

Please click on the link to open my model structure.

Here is my code for vosk android

// Copyright 2019 Alpha Cephei Inc.

//
// Licensed under the Apache License, Version 2.0 (the “License”);
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an “AS IS” BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package org.vosk.demo;

import android.Manifest;
import android.app.Activity;
import android.content.pm.PackageManager;
import android.os.Bundle;
import android.text.method.ScrollingMovementMethod;
import android.widget.Button;
import android.widget.TextView;
import android.widget.Toast;
import android.widget.ToggleButton;

import org.json.JSONException;
import org.json.JSONObject;
import org.vosk.LibVosk;
import org.vosk.LogLevel;
import org.vosk.Model;
import org.vosk.Recognizer;
import org.vosk.android.RecognitionListener;
import org.vosk.android.SpeechService;
import org.vosk.android.SpeechStreamService;
import org.vosk.android.StorageService;

import java.io.IOException;
import java.io.InputStream;

import androidx.annotation.NonNull;
import androidx.core.app.ActivityCompat;
import androidx.core.content.ContextCompat;

public class VoskActivity extends Activity implements
RecognitionListener {

static private final int STATE_START = 0;
static private final int STATE_READY = 1;
static private final int STATE_DONE = 2;
static private final int STATE_FILE = 3;
static private final int STATE_MIC = 4;

/* Used to handle permission request */
private static final int PERMISSIONS_REQUEST_RECORD_AUDIO = 1;

private Model model;
private SpeechService speechService;
private SpeechStreamService speechStreamService;
private TextView resultView;

@Override
public void onCreate(Bundle state) {
    super.onCreate(state);
    setContentView(R.layout.main);

    // Setup layout
    resultView = findViewById(R.id.result_text);
    setUiState(STATE_START);

    findViewById(R.id.recognize_mic).setOnClickListener(view -> recognizeMicrophone());
    ((ToggleButton) findViewById(R.id.pause)).setOnCheckedChangeListener((view, isChecked) -> pause(isChecked));

    LibVosk.setLogLevel(LogLevel.INFO);

    // Check if user has given permission to record audio, init the model after permission is granted
    int permissionCheck = ContextCompat.checkSelfPermission(getApplicationContext(), Manifest.permission.RECORD_AUDIO);
    if (permissionCheck != PackageManager.PERMISSION_GRANTED) {
        ActivityCompat.requestPermissions(this, new String[]{Manifest.permission.RECORD_AUDIO}, PERMISSIONS_REQUEST_RECORD_AUDIO);
    } else {
        initModel();
    }
}

private void initModel() {
    StorageService.unpack(this, "model-en-us", "model",
            (model) -> {
                this.model = model;
                setUiState(STATE_READY);
            },
            (exception) -> setErrorState("Failed to unpack the model" + exception.getMessage()));
}


@Override
public void onRequestPermissionsResult(int requestCode,
                                       @NonNull String[] permissions, @NonNull int[] grantResults) {
    super.onRequestPermissionsResult(requestCode, permissions, grantResults);

    if (requestCode == PERMISSIONS_REQUEST_RECORD_AUDIO) {
        if (grantResults.length > 0 && grantResults[0] == PackageManager.PERMISSION_GRANTED) {
            // Recognizer initialization is a time-consuming and it involves IO,
            // so we execute it in async task
            initModel();
        } else {
            finish();
        }
    }
}

@Override
public void onDestroy() {
    super.onDestroy();

    if (speechService != null) {
        speechService.stop();
        speechService.shutdown();
    }

    if (speechStreamService != null) {
        speechStreamService.stop();
    }
}

@Override
public void onResult(String s) {

    String spoken="";
    try {
        JSONObject o = new JSONObject(s);
        spoken=o.getString("text");
        if(spoken.equals("yes") || spoken.equals("no") || spoken.equals("help") || spoken.equals("stop") || spoken.equals("hey"))
        {
            Toast.makeText(getApplicationContext(),spoken , Toast.LENGTH_SHORT).show();
        }

        else if(spoken.equals("one"))
        {
            Toast.makeText(getApplicationContext(),"1" , Toast.LENGTH_SHORT).show();
        }
        else if(spoken.equals("two") || spoken.equals("to"))
        {

            Toast.makeText(getApplicationContext(),"2" , Toast.LENGTH_SHORT).show();
        }
        else if(spoken.equals("three"))
        {

            Toast.makeText(getApplicationContext(),"3" , Toast.LENGTH_SHORT).show();
        }
        else if(spoken.equals("four")|| spoken.equals("for"))
        {

            Toast.makeText(getApplicationContext(),"4" , Toast.LENGTH_SHORT).show();
        }
        else if(spoken.equals("five"))
        {

            Toast.makeText(getApplicationContext(),"5" , Toast.LENGTH_SHORT).show();
        }
        else if(spoken.equals("six"))
        {

            Toast.makeText(getApplicationContext(),"6" , Toast.LENGTH_SHORT).show();
        }
        else if(spoken.equals("seven"))
        {

            Toast.makeText(getApplicationContext(),"7" , Toast.LENGTH_SHORT).show();
        }
        else if(spoken.equals("eight"))
        {

            Toast.makeText(getApplicationContext(),"8" , Toast.LENGTH_SHORT).show();
        }
        else if(spoken.equals("nine"))
        {

            Toast.makeText(getApplicationContext(),"9" , Toast.LENGTH_SHORT).show();
        }

    }
    catch (JSONException ignored) {
        Toast.makeText(getApplicationContext(), ignored.getMessage(), Toast.LENGTH_LONG).show();
    }


}

@Override
public void onFinalResult(String hypothesis) {
   // Toast.makeText(getApplicationContext(), hypothesis, Toast.LENGTH_LONG).show();
 //   setUiState(STATE_DONE);
    if (speechStreamService != null) {
        speechStreamService = null;
    }
}

@Override
public void onPartialResult(String hypothesis)
{
}

@Override
public void onError(Exception e) {
    setErrorState(e.getMessage());
}

@Override
public void onTimeout() {
    setUiState(STATE_DONE);
}

private void setUiState(int state) {
    switch (state) {
        case STATE_START:
            resultView.setText(R.string.preparing);
            resultView.setMovementMethod(new ScrollingMovementMethod());
            findViewById(R.id.recognize_mic).setEnabled(false);
            findViewById(R.id.pause).setEnabled((false));
            break;
        case STATE_READY:
            resultView.setText(R.string.ready);
            ((Button) findViewById(R.id.recognize_mic)).setText(R.string.recognize_microphone);
            findViewById(R.id.recognize_mic).setEnabled(true);
            findViewById(R.id.pause).setEnabled((false));
            break;
        case STATE_DONE:
            ((Button) findViewById(R.id.recognize_mic)).setText(R.string.recognize_microphone);
            findViewById(R.id.recognize_mic).setEnabled(true);
            findViewById(R.id.pause).setEnabled((false));
            break;
        case STATE_FILE:
            resultView.setText(getString(R.string.starting));
            findViewById(R.id.recognize_mic).setEnabled(false);
            findViewById(R.id.pause).setEnabled((false));
            break;
        case STATE_MIC:
            ((Button) findViewById(R.id.recognize_mic)).setText(R.string.stop_microphone);
            resultView.setText(getString(R.string.say_something));
            findViewById(R.id.recognize_mic).setEnabled(true);
            findViewById(R.id.pause).setEnabled((true));
            break;
        default:
            throw new IllegalStateException("Unexpected value: " + state);
    }
}

private void setErrorState(String message) {
    resultView.setText(message);
    ((Button) findViewById(R.id.recognize_mic)).setText(R.string.recognize_microphone);
    findViewById(R.id.recognize_mic).setEnabled(false);
}

private void recognizeFile() {
    if (speechStreamService != null) {
        setUiState(STATE_DONE);
        speechStreamService.stop();
        speechStreamService = null;
    } else {
        setUiState(STATE_FILE);
        try {
            Recognizer rec = new Recognizer(model, 16000.f, "[\"one zero zero zero one\", " +
                    "\"oh zero one two three four five six seven eight nine\", \"[unk]\"]");

            InputStream ais = getAssets().open(
                    "10001-90210-01803.wav");
            if (ais.skip(44) != 44) throw new IOException("File too short");

            speechStreamService = new SpeechStreamService(rec, ais, 16000);
            speechStreamService.start(this);
        } catch (IOException e) {
            setErrorState(e.getMessage());
        }
    }
}

private void recognizeMicrophone() {
    if (speechService != null) {
        setUiState(STATE_DONE);
        speechService.stop();
        speechService = null;
    } else {
        setUiState(STATE_MIC);
        try {
            Recognizer rec = new Recognizer(model, 16000.0f);
            speechService = new SpeechService(rec, 16000.0f);
            speechService.startListening(this);
        } catch (IOException e) {
            setErrorState(e.getMessage());
        }
    }
}


private void pause(boolean checked) {
    if (speechService != null) {
        speechService.setPause(checked);
    }
}

}

Did you copy the “final.mdl” symbolic link instead of the file it’s pointing to?

1 Like