transcription docs

GetStream · Mar 5, 2024 · 7287778 · 7287778
1 parent b391edf
commit 7287778
Show file tree

Hide file tree

Showing 3 changed files with 497 additions and 0 deletions.
diff --git a/docusaurus/video/docusaurus/docs/api/transcription/_category_.json b/docusaurus/video/docusaurus/docs/api/transcription/_category_.json
@@ -0,0 +1,4 @@
+{
+  "label": "Transcription",
+  "position": 4
+}
diff --git a/docusaurus/video/docusaurus/docs/api/transcription/storage.mdx b/docusaurus/video/docusaurus/docs/api/transcription/storage.mdx
@@ -0,0 +1,306 @@
+---
+id: storage
+sidebar_position: 2
+slug: /transcribing/storage
+title: Storage
+---
+
+import Tabs from '@theme/Tabs';
+import TabItem from '@theme/TabItem';
+
+By default, calls are stored on a S3 bucket managed by Stream. Transcriptions stored on Stream S3 storage are retained for two weeks time, and then automatically deleted. If you want to keep your transcriptions for longer, you can use your own storage.
+
+Stream supports the following external storage providers:
+
+- [Amazon S3](#amazon-s3)
+- [Google Cloud Storage](#google-cloud-storage)
+- [Azure Blob Storage](#azure-blob-storage)
+
+If you need support for a different storage provider, you can participate in the conversation [here](https://github.com/GetStream/protocol/discussions/371).
+
+## Configuring transcribing storage
+
+To use your own storage you need to:
+
+1. Configure a new storage for your Stream application
+2. Configure your call type(s) to use the new storage
+
+Alternatively, you can also have the storage configured and use it for specific calls, while keeping existing calls on Stream storage.
+
+<Tabs groupId="examples">
+<TabItem value="js" label="JavaScript">
+
+```js
+// 1. create a new storage with all the required parameters
+await serverSideClient.createExternalStorage({
+    bucket: 'my-bucket',
+    name: 'my-s3',
+    storage_type: 's3',
+    path: 'directory_name/',
+    aws_s3: {
+      s3_region: 'us-east-1',
+      s3_api_key: 'my-access-key',
+      s3_secret: 'my-secret',
+    },
+});
+// 2. update the call type to use the new storage
+await serverSideClient.updateCallType('my-call-type', {
+    external_storage: "my-s3",
+});
+// 3. alternative, specify the storage when starting call transcribing
+await call.startTranscription({
+    transcription_external_storage: "my-s3",
+});
+```
+</TabItem>
+<TabItem value="py" label="Python">
+
+```py
+# 1. create a new storage with all the required parameters
+aws_s3_config = S3Request(
+    s3_region='us-east-1',
+    s3_api_key='my-access-key',
+    s3_secret='my-secret',
+)
+
+response = client.video.create_external_storage(
+    name='my-s3',
+    storage_type='s3',
+    bucket='my-bucket',
+    path='directory_name/',
+    aws_s3=aws_s3_config
+)
+# 2. update the call type to use the new storage
+client.video.update_call_type(name='allhands', external_storage= "my-s3")
+
+# 3. alternative, specify the storage when starting call transcribing
+call.start_transcription(transcription_external_storage= "my-s3")
+```
+</TabItem>
+<TabItem value="curl" label="cURL">
+
+```bash
+curl -X POST https://video.stream-io-api.com/video/external_storage?api_key=${API_KEY} \
+-H "Authorization: ${JWT_TOKEN}" \
+-H "stream-auth-type: jwt" \
+-d '{
+  "name": "my-storage",
+  "storage_type": "s3",
+  "bucket": "my-bucket",
+  "custom_folder": "my-folder",
+  "aws_s3": {
+    "s3_region": "us-east-1",
+    "s3_api_key": "my-api-key",
+    "s3_secret": "my-secret"
+  }
+}'
+
+
+curl -X PATCH https://video.stream-io-api.com/video/call_types/${CALL_TYPE_ID}?api_key=${API_KEY} \
+-H "Authorization: ${JWT_TOKEN}" -H "stream-auth-type: jwt" \
+-d '{
+    "external_storage": "my-storage"
+}'
+
+curl -X POST "https://video.stream-io-api.com/video/call/default/${CALL_ID}/start_transcription?api_key=${API_KEY}" \
+-H "Authorization: ${JWT_TOKEN}" -H "stream-auth-type: jwt" \
+-d '{
+    "transcription_external_storage": "my-storage"
+}'
+```
+
+</TabItem>
+</Tabs>
+
+*Note*: transcribing are only uploaded to the storage when the call is completed or when `stop_transcription` is called, whichever comes first.
+
+### Multiple storage providers and default storage
+
+You can configure multiple storage providers for your application. When starting a call transcription, you can specify which storage provider to use. If you don't specify a storage provider, the default storage provider will be used.
+
+When transcribing a call, this is the order in which the storage provider is selected:
+
+1. If specified at call-level, use the storage provider specified for this call (`call.start_transcription(storage_provider=...)`)
+2. If specified at call-type-level, use the storage provider specified for this call type
+3. Use Stream S3 storage
+
+Note: all Stream applications have Stream S3 storage enabled by default. You can refer to this configuration with the `stream-s3` name.
+
+<Tabs groupId="examples">
+<TabItem value="js" label="JavaScript">
+
+```js
+// 1. update the call type to use Stream S3 storage
+
+await serverSideClient.updateCallType('my-call-type', {
+  external_storage: "stream-s3",
+});
+
+  // 2. specify Stream S3 storage when starting call transcribing
+await call.startTranscription({
+  transcription_external_storage: "my-storage",
+});
+```
+
+</TabItem>
+<TabItem value="py" label="Python">
+
+```py
+# 1. update the call type to use Stream S3 storage
+client.video.update_call_type('my-call-type', external_storage="stream-s3")
+
+# 2. specify Stream S3 storage when starting call transcribing
+call.start_transcription(transcription_external_storage="my-storage")
+```
+
+</TabItem>
+<TabItem value="curl" label="cURL">
+
+```bash
+
+curl -X PATCH https://video.stream-io-api.com/video/call_types/${CALL_TYPE_ID}?api_key=${API_KEY} \
+-H "Authorization: ${JWT_TOKEN}" \
+-H "stream-auth-type: jwt" \
+-d '{
+  "external_storage": "my-first-storage"
+}'
+
+curl -X POST "https://video.stream-io-api.com/video/call/default/${CALL_ID}/start_transcription?api_key=${API_KEY}" \
+-H "Authorization: ${JWT_TOKEN}" -H "stream-auth-type: jwt" \
+-d '{
+    "transcription_external_storage": "my-second-storage"
+}'
+```
+
+</TabItem>
+</Tabs>
+
+## Storage configuration
+
+| Name          | Description | Required |
+|---------------|-------------|----------|
+| name          |             |          |
+| storage_type  |             |          |
+| bucket        |             |          |
+| custom_folder |             |          |
+
+## Amazon S3
+
+To use Amazon S3 as your storage provider, you have two authentication options: IAM role or API key.
+
+If you do not specify the `s3_api_key` parameter, Stream will use IAM role authentication. In that case make sure to have the correct IAM role configured for your application.
+
+| Name       | Description | Required |
+|------------|-------------|----------|
+| s3_region  |             |   yes    |
+| s3_api_key |             |          |
+| s3_secret  |             |          |
+
+There are 2 ways to configure authentication on your S3 bucket:
+- By providing a key and secret
+- Or by having Stream's AWS account assume a role on your SQS queue.
+With this option you omit the key and secret, but instead you set up a resource-based policy to grant Stream SendMessage permission on your S3 bucket.
+The following policy needs to be attached to your queue (replace the value of Resource with the fully qualified ARN of you S3 bucket):
+
+
+### Example S3 policy
+
+```json
+{
+    "Version": "2012-10-17",
+    "Id": "StreamExternalStoragePolicy",
+    "Statement": [
+        {
+            "Sid": "ExampleStatement01",
+            "Effect": "Allow",
+            "Principal": {
+                "AWS": "arn:aws:iam::185583345998:root"
+            },
+            "Action": [
+                "s3:PutObject"
+            ],
+            "Resource": [
+                "arn:aws:s3:::bucket_name/*",
+                "arn:aws:s3:::bucket_name"
+            ]
+        }
+    ]
+}
+```
+
+## Google Cloud Storage
+
+To use Google Cloud Storage as your storage provider, you need to send your (service account)[https://cloud.google.com/iam/docs/service-accounts-create ] credentials as they are stored in your JSON file.
+
+Note: you can find the JSON service account credentials in the Google Cloud Console (...)
+
+<Tabs groupId="examples">
+<TabItem value="js" label="JavaScript">
+
+```js
+// 1. create a new storage using the service account credentials
+
+await serverSideClient.createExternalStorage({
+    bucket: 'my-bucket',
+    name: 'my-gcs',
+    storage_type: 'gcs',
+    path: 'directory_name/',
+    "gcs_credentials": "content of the service account file",
+});
+```
+
+</TabItem>
+
+<TabItem value="py" label="Python">
+
+```py
+response = client.video.create_external_storage(
+    name='my-gcs',
+    storage_type='gcs',
+    bucket='my-bucket',
+    path='directory_name/',
+    gcs_credentials="content of the service account file"
+)
+```
+</TabItem>
+
+<TabItem value="curl" label="cURL">
+
+```bash
+
+curl -X POST https://video.stream-io-api.com/video/external_storage?api_key=${API_KEY} \
+-H "Authorization: ${JWT_TOKEN}" -H "stream-auth-type: jwt" \
+-d '{
+  "name": "my-storage",
+  "storage_type": "gcs",
+  "bucket": "my-bucket",
+  "custom_folder": "my-folder",
+  "gcs_credentials": "content of the service account file"
+}'
+
+```
+
+</TabItem>
+</Tabs>
+
+### Example policy
+
+```json
+{
+	"bindings": [
+		{
+			"role": "roles/storage.objectCreator",
+			"members": ["service_account_principal_identifier"]
+		}
+	]
+}
+```
+
+## Azure Blob Storage
+
+To use Azure Blob Storage as your storage provider, you need to create a container and a service principal with the following parameters:
+
+| Name             | Description | Required |
+|------------------|-------------|----------|
+| abs_account_name |             | yes      |
+| abs_account_key  |             | yes      |