AmberScript Transcription Service
Overview
The AmberScriptTranscriptionService uses the AmberScript Transcription API to transcribe audio files. Audio will get extracted from an opencast recording video file and sent to the AmberScript server to be processed. AmberScriptTranscriptionService will periodically check for a transcription result. Depending on your audio length and job type chosen, transcribing will take some time. When the transcription result is ready the service will transform it to VTT format and attach it to the recording. The recording will be available prior when its workflow finishes. As soon as the transcription gets attached, the Video will be able to be played back using transcriptions.
Configuration
Step 1: Get AmberScript API key
- Contact AmberScript via https://www.amberscript.com/en/opencast-integration
Step 2: Configure AmberscriptTranscriptionService
Edit opencast/etc/org.opencastproject.transcription.amberscript.AmberscriptTranscriptionService.cfg
:
- Set
enabled=true
to enable the service. - Set API key
client.key=__YOU-API-KEY__
. This is mandatory. - Change options to your liking.
Step 3: Add the amberscript workflows to Opencast
In your Opencast workflow directory (usually /etc/workflows
or /etc/opencast/workflows
), add the workflows from the
dropdowns below.
amberscript-attach-transcription.xml
<?xml version="1.0" encoding="UTF-8"?>
<definition xmlns="http://workflow.opencastproject.org">
<id>amberscript-attach-transcripts</id>
<title>Attach caption/transcripts generated by AmberScript</title>
<tags />
<description>Attach transcription generated by the AmberScript service.
This is an internal workflow, started by the Transcription Service.
</description>
<configuration_panel />
<operations>
<!-- Attach caption/transcript -->
<operation id="amberscript-attach-transcription"
fail-on-error="true"
exception-handler-workflow="partial-error"
description="Attach captions/transcription">
<configurations>
<!-- This is filled out by the transcription service when starting this workflow -->
<configuration key="transcription-job-id">${transcriptionJobId}</configuration>
<!-- Caption generated will have the default flavor based on the target-caption-format e.g. captions/vtt -->
<configuration key="target-caption-format">vtt</configuration>
<configuration key="target-tag">engage-download</configuration>
</configurations>
</operation>
<!-- Merge caption/transcript to existing publication and republish -->
<operation id="publish-engage"
fail-on-error="true"
exception-handler-workflow="partial-error"
description="Distribute and publish to engage server">
<configurations>
<configuration key="download-source-flavors">dublincore/*,security/*</configuration>
<configuration key="download-source-tags">engage-download</configuration>
<configuration key="strategy">merge</configuration>
<configuration key="check-availability">false</configuration>
</configurations>
</operation>
<operation
id="republish-oaipmh"
exception-handler-workflow="partial-error"
description="Update recording metadata in default OAI-PMH repository">
<configurations>
<configuration key="source-flavors">dublincore/*,security/*</configuration>
<configuration key="download-source-tags">engage-download</configuration>
<configuration key="repository">default</configuration>
</configurations>
</operation>
<!-- Archive media package -->
<operation id="snapshot"
fail-on-error="true"
exception-handler-workflow="partial-error"
description="Archive media package">
<configurations>
<configuration key="source-flavors">*/*</configuration>
</configurations>
</operation>
<!-- Clean up work artifacts -->
<operation
id="cleanup"
fail-on-error="false"
description="Remove temporary processing artifacts">
<configurations>
<configuration key="delete-external">true</configuration>
<!-- FixMe Don't clean up ACLs until workflow service no longer looks for them in the WFR. -->
<configuration key="preserve-flavors">security/*</configuration>
</configurations>
</operation>
</operations>
</definition>
amberscript-start-transcription.xml
<?xml version="1.0" encoding="UTF-8"?>
<definition xmlns="http://workflow.opencastproject.org">
<id>amberscript-start-transcription</id>
<title>Start AmberScript Transcription</title>
<tags>
<tag>archive</tag>
</tags>
<description>Start the AmberScript transcription</description>
<operations>
<operation
id="defaults"
description="Applying default values for AmberScript Transcriptions">
<configurations>
<configuration key="skipFlavor">captions/vtt</configuration>
<configuration key="language">en</configuration>
<configuration key="jobtype">direct</configuration>
</configurations>
</operation>
<operation
id="encode"
fail-on-error="true"
exception-handler-workflow="partial-error"
description="Encoding audio for transcription">
<configurations>
<configuration key="source-flavor">*/source</configuration>
<configuration key="target-flavor">audio/mp3</configuration>
<configuration key="target-tags">transcript</configuration>
<configuration key="encoding-profile">audio-mp3</configuration>
</configurations>
</operation>
<operation
id="amberscript-start-transcription"
max-attempts="3"
retry-strategy="hold"
fail-on-error="true"
exception-handler-workflow="partial-error"
description="Start AmberScript transcription job">
<configurations>
<configuration key="source-tag">transcript</configuration>
<configuration key="language">${language}</configuration>
<configuration key="jobtype">${jobtype}</configuration>
<configuration key="skip-if-flavor-exists">${skipFlavor}</configuration>
</configurations>
</operation>
</operations>
</definition>
Step 4: Include workflow operations into your workflow
Integrate AmberScript workflow operations by including the provided workflow file amberscript-start-transcription.xml
into your existing workflow:
<operation
id="include"
description="Start AmberScript Transcription">
<configurations>
<configuration key="workflow-id">amberscript-start-transcription</configuration>
</configurations>
</operation>