AmberScript Transcription Service

Overview

The AmberScriptTranscriptionService uses the AmberScript Transcription API to transcribe audio files. Audio will get extracted from an opencast recording video file and sent to the AmberScript server to be processed. AmberScriptTranscriptionService will periodically check for a transcription result. Depending on your audio length and job type chosen, transcribing will take some time. When the transcription result is ready the service will transform it to VTT format and attach it to the recording. The recording will be available prior when its workflow finishes. As soon as the transcription gets attached, the Video will be able to be played back using transcriptions.

Configuration

Step 1: Get AmberScript API key

Step 2: Configure AmberscriptTranscriptionService

Edit opencast/etc/org.opencastproject.transcription.amberscript.AmberscriptTranscriptionService.cfg:

Step 3: Add the amberscript workflows to Opencast

In your Opencast workflow directory (usually /etc/workflows or /etc/opencast/workflows), add the workflows from the dropdowns below.

amberscript-attach-transcription.xml
<?xml version="1.0" encoding="UTF-8"?>
<definition xmlns="http://workflow.opencastproject.org">
  <id>amberscript-attach-transcripts</id>
  <title>Attach caption/transcripts generated by AmberScript</title>
  <tags />
  <description>Attach transcription generated by the AmberScript service.
    This is an internal workflow, started by the Transcription Service.
  </description>
  <configuration_panel />

  <operations>

    <!-- Attach caption/transcript -->

    <operation id="amberscript-attach-transcription"
      fail-on-error="true"
      exception-handler-workflow="partial-error"
      description="Attach captions/transcription">
      <configurations>
        <!-- This is filled out by the transcription service when starting this workflow -->
        <configuration key="transcription-job-id">${transcriptionJobId}</configuration>
        <!-- Caption generated will have the default flavor based on the target-caption-format e.g. captions/vtt -->
        <configuration key="target-caption-format">vtt</configuration>
        <configuration key="target-tag">engage-download</configuration>
      </configurations>
    </operation>

    <!-- Merge caption/transcript to existing publication and republish -->
    <operation id="publish-engage"
      fail-on-error="true"
      exception-handler-workflow="partial-error"
      description="Distribute and publish to engage server">
      <configurations>
        <configuration key="download-source-flavors">dublincore/*,security/*</configuration>
        <configuration key="download-source-tags">engage-download</configuration>
        <configuration key="strategy">merge</configuration>
        <configuration key="check-availability">false</configuration>
      </configurations>
    </operation>

    <operation
      id="republish-oaipmh"
      exception-handler-workflow="partial-error"
      description="Update recording metadata in default OAI-PMH repository">
      <configurations>
        <configuration key="source-flavors">dublincore/*,security/*</configuration>
        <configuration key="download-source-tags">engage-download</configuration>
        <configuration key="repository">default</configuration>
      </configurations>
    </operation>

    <!-- Archive media package -->

    <operation id="snapshot"
      fail-on-error="true"
      exception-handler-workflow="partial-error"
      description="Archive media package">
      <configurations>
        <configuration key="source-flavors">*/*</configuration>
      </configurations>
    </operation>

    <!-- Clean up work artifacts -->

    <operation
        id="cleanup"
        fail-on-error="false"
        description="Remove temporary processing artifacts">
      <configurations>
        <configuration key="delete-external">true</configuration>
        <!-- FixMe Don't clean up ACLs until workflow service no longer looks for them in the WFR. -->
        <configuration key="preserve-flavors">security/*</configuration>
      </configurations>
    </operation>

  </operations>

</definition>
amberscript-start-transcription.xml
<?xml version="1.0" encoding="UTF-8"?>
<definition xmlns="http://workflow.opencastproject.org">
  <id>amberscript-start-transcription</id>
  <title>Start AmberScript Transcription</title>
  <tags>
    <tag>archive</tag>
  </tags>
  <description>Start the AmberScript transcription</description>

  <operations>

    <operation
        id="defaults"
        description="Applying default values for AmberScript Transcriptions">
      <configurations>
        <configuration key="skipFlavor">captions/vtt</configuration>
        <configuration key="language">en</configuration>
        <configuration key="jobtype">direct</configuration>
      </configurations>
    </operation>

    <operation
        id="encode"
        fail-on-error="true"
        exception-handler-workflow="partial-error"
        description="Encoding audio for transcription">
      <configurations>
        <configuration key="source-flavor">*/source</configuration>
        <configuration key="target-flavor">audio/mp3</configuration>
        <configuration key="target-tags">transcript</configuration>
        <configuration key="encoding-profile">audio-mp3</configuration>
      </configurations>
    </operation>

    <operation
        id="amberscript-start-transcription"
        max-attempts="3"
        retry-strategy="hold"
        fail-on-error="true"
        exception-handler-workflow="partial-error"
        description="Start AmberScript transcription job">
      <configurations>
          <configuration key="source-tag">transcript</configuration>
          <configuration key="language">${language}</configuration>
        <configuration key="jobtype">${jobtype}</configuration>
        <configuration key="skip-if-flavor-exists">${skipFlavor}</configuration>
      </configurations>
    </operation>

  </operations>

</definition>

Step 4: Include workflow operations into your workflow

Integrate AmberScript workflow operations by including the provided workflow file amberscript-start-transcription.xml into your existing workflow:

<operation
  id="include"
  description="Start AmberScript Transcription">
  <configurations>
    <configuration key="workflow-id">amberscript-start-transcription</configuration>
  </configurations>
</operation>

Workflow Operations