diff --git a/Documentation~/README.md b/Documentation~/README.md
index f07f3dc..75937d6 100644
--- a/Documentation~/README.md
+++ b/Documentation~/README.md
@@ -48,7 +48,7 @@ The recommended installation method is though the unity package manager and [Ope
### Table of Contents
-- [Authentication](#authentication) :construction:
+- [Authentication](#authentication)
- [API Proxy](#api-proxy)
- [Editor Dashboard](#editor-dashboard)
- [Speech Synthesis Dashboard](#speech-synthesis-dashboard)
@@ -59,6 +59,7 @@ The recommended installation method is though the unity package manager and [Ope
- [Text to Speech](#text-to-speech)
- [Stream Text To Speech](#stream-text-to-speech)
- [Voices](#voices)
+ - [Get Shared Voices](#get-shared-voices) :new:
- [Get All Voices](#get-all-voices)
- [Get Default Voice Settings](#get-default-voice-settings)
- [Get Voice](#get-voice)
@@ -69,6 +70,13 @@ The recommended installation method is though the unity package manager and [Ope
- [Samples](#samples)
- [Download Voice Sample](#download-voice-sample)
- [Delete Voice Sample](#delete-voice-sample)
+- [Dubbing](#dubbing) :new:
+ - [Dub](#dub) :new:
+ - [Get Dubbing Metadata](#get-dubbing-metadata) :new:
+ - [Get Transcript for Dub](#get-transcript-for-dub) :new:
+ - [Get dubbed file](#get-dubbed-file) :new:
+ - [Delete Dubbing Project](#delete-dubbing-project) :new:
+- [SFX Generation](#sfx-generation) :new:
- [History](#history)
- [Get History](#get-history)
- [Get History Item](#get-history-item)
@@ -176,8 +184,9 @@ In this example, we demonstrate how to set up and use `ElevenLabsProxyStartup` i
1. Create a new [ASP.NET Core minimal web API](https://learn.microsoft.com/en-us/aspnet/core/tutorials/min-web-api?view=aspnetcore-6.0) project.
2. Add the ElevenLabs-DotNet nuget package to your project.
- Powershell install: `Install-Package ElevenLabs-DotNet-Proxy`
+ - Dotnet install: `dotnet add package ElevenLabs-DotNet-Proxy`
- Manually editing .csproj: ``
-3. Create a new class that inherits from `AbstractAuthenticationFilter` and override the `ValidateAuthentication` method. This will implement the `IAuthenticationFilter` that you will use to check user session token against your internal server.
+3. Create a new class that inherits from `AbstractAuthenticationFilter` and override the `ValidateAuthenticationAsync` method. This will implement the `IAuthenticationFilter` that you will use to check user session token against your internal server.
4. In `Program.cs`, create a new proxy web application by calling `ElevenLabsProxyStartup.CreateDefaultHost` method, passing your custom `AuthenticationFilter` as a type argument.
5. Create `ElevenLabsAuthentication` and `ElevenLabsClientSettings` as you would normally with your API keys, org id, or Azure settings.
@@ -186,11 +195,13 @@ public partial class Program
{
private class AuthenticationFilter : AbstractAuthenticationFilter
{
- public override void ValidateAuthentication(IHeaderDictionary request)
+ public override async Task ValidateAuthenticationAsync(IHeaderDictionary request)
{
+ await Task.CompletedTask; // remote resource call
+
// You will need to implement your own class to properly test
// custom issued tokens you've setup for your end users.
- if (!request["xi-api-key"].ToString().Contains(userToken))
+ if (!request["xi-api-key"].ToString().Contains(TestUserToken))
{
throw new AuthenticationException("User is not authorized");
}
@@ -265,7 +276,9 @@ audioSource.PlayOneShot(voiceClip.AudioClip);
voiceClip.CopyIntoProject(editorDownloadDirectory);
```
-### Stream Text to Speech
+#### [Stream Text To Speech](https://docs.elevenlabs.io/api-reference/text-to-speech-stream)
+
+Stream text to speech.
```csharp
var api = new ElevenLabsClient();
@@ -289,6 +302,19 @@ audioSource.clip = voiceClip.AudioClip;
Access to voices created either by the user or ElevenLabs.
+#### Get Shared Voices
+
+Gets a list of shared voices in the public voice library.
+
+```csharp
+var api = new ElevenLabsClient();
+var results = await ElevenLabsClient.SharedVoicesEndpoint.GetSharedVoicesAsync();
+foreach (var voice in results.Voices)
+{
+ Debug.Log($"{voice.OwnerId} | {voice.VoiceId} | {voice.Date} | {voice.Name}");
+}
+```
+
#### Get All Voices
Gets a list of all available voices.
@@ -383,6 +409,87 @@ var success = await api.VoicesEndpoint.DeleteVoiceSampleAsync(voiceId, sampleId)
Debug.Log($"Was successful? {success}");
```
+### [Dubbing](https://elevenlabs.io/docs/api-reference/create-dub)
+
+#### Dub
+
+Dubs provided audio or video file into given language.
+
+```csharp
+var api = new ElevenLabsClient();
+// from URI
+var request = new DubbingRequest(new Uri("https://youtu.be/Zo5-rhYOlNk"), "ja", "en", 1, true);
+// from file
+var request = new DubbingRequest(filePath, "es", "en", 1);
+var metadata = await api.DubbingEndpoint.DubAsync(request, progress: new Progress(metadata =>
+{
+ switch (metadata.Status)
+ {
+ case "dubbing":
+ Debug.Log($"Dubbing for {metadata.DubbingId} in progress... Expected Duration: {metadata.ExpectedDurationSeconds:0.00} seconds");
+ break;
+ case "dubbed":
+ Debug.Log($"Dubbing for {metadata.DubbingId} complete in {metadata.TimeCompleted.TotalSeconds:0.00} seconds!");
+ break;
+ default:
+ Debug.Log($"Status: {metadata.Status}");
+ break;
+ }
+}));
+```
+
+#### Get Dubbing Metadata
+
+Returns metadata about a dubbing project, including whether it’s still in progress or not.
+
+```csharp
+var api = new ElevenLabsClient();
+var metadata = api.await GetDubbingProjectMetadataAsync("dubbing-id");
+```
+
+#### Get Dubbed File
+
+Returns downloaded dubbed file path.
+
+> [!IMPORTANT]
+> Videos will be returned in MP4 format and audio only dubs will be returned in MP3.
+
+```csharp
+var dubbedClipPath = await ElevenLabsClient.DubbingEndpoint.GetDubbedFileAsync(metadata.DubbingId, request.TargetLanguage);
+var dubbedClip = await Rest.DownloadAudioClipAsync($"file://{dubbedClipPath}", AudioType.MPEG);
+audioSource.PlayOneShot(dubbedClip);
+```
+
+#### Get Transcript for Dub
+
+Returns transcript for the dub in the desired format.
+
+```csharp
+var srcFile = new FileInfo(audioPath);
+var transcriptPath = new FileInfo($"{srcFile.FullName}.dubbed.{request.TargetLanguage}.srt");
+var transcriptFile = await ElevenLabsClient.DubbingEndpoint.GetTranscriptForDubAsync(metadata.DubbingId, request.TargetLanguage);
+await File.WriteAllTextAsync(transcriptPath.FullName, transcriptFile);
+```
+
+#### Delete Dubbing Project
+
+Deletes a dubbing project.
+
+```csharp
+var api = new ElevenLabsClient();
+await api.DubbingEndpoint.DeleteDubbingProjectAsync("dubbing-id");
+```
+
+### SFX Generation
+
+API that converts text into sounds & uses the most advanced AI audio model ever.
+
+```csharp
+var api = new ElevenLabsClient();
+var request = new SoundGenerationRequest("Star Wars Light Saber parry");
+var clip = await api.SoundGenerationEndpoint.GenerateSoundAsync(request);
+```
+
### [History](https://docs.elevenlabs.io/api-reference/history)
Access to your previously synthesized audio clips including its metadata.
@@ -393,9 +500,9 @@ Get metadata about all your generated audio.
```csharp
var api = new ElevenLabsClient();
-var historyInfo = await api.HistoryEndpoint.GetHistoryAsync();
+var historyItems = await api.HistoryEndpoint.GetHistoryAsync();
-foreach (var item in historyInfo.HistoryItems.OrderBy(item => item.Date))
+foreach (var item in historyItems.OrderBy(historyItem => historyItem.Date))
{
Debug.Log($"{item.State} {item.Date} | {item.Id} | {item.Text.Length} | {item.Text}");
}
@@ -407,7 +514,7 @@ Get information about a specific item.
```csharp
var api = new ElevenLabsClient();
-var historyItem = api.HistoryEndpoint.GetHistoryItemAsync(voiceClip.Id);
+var historyItem = await api.HistoryEndpoint.GetHistoryItemAsync(voiceClip.Id);
```
#### Download History Audio
diff --git a/Editor/ElevenLabsDashboard.cs b/Editor/ElevenLabsDashboard.cs
index 450efde..0797c00 100644
--- a/Editor/ElevenLabsDashboard.cs
+++ b/Editor/ElevenLabsDashboard.cs
@@ -1,6 +1,5 @@
// Licensed under the MIT License. See LICENSE in the project root for license information.
-using ElevenLabs.Extensions;
using ElevenLabs.History;
using ElevenLabs.Models;
using ElevenLabs.User;
@@ -363,6 +362,8 @@ private async void AddVoice()
private static readonly GUIContent deleteContent = new("Delete");
+ private static readonly GUIContent copyContent = new("Copy");
+
private static readonly GUIContent refreshContent = new("Refresh");
private static readonly GUIContent downloadingContent = new("Download in progress...");
@@ -1224,8 +1225,24 @@ private void RenderVoiceLab()
EditorGUILayout.Space(EndWidth);
EditorGUILayout.EndHorizontal();
EditorGUI.indentLevel++;
- EditorGUILayout.LabelField(voice.Id, EditorStyles.boldLabel);
+
+ EditorGUILayout.BeginHorizontal();
+ {
+ EditorGUILayout.LabelField(voice.Id, EditorStyles.boldLabel);
+ GUILayout.FlexibleSpace();
+
+ if (GUILayout.Button(copyContent, defaultColumnWidthOption))
+ {
+ EditorGUIUtility.systemCopyBuffer = voice.Id;
+ Debug.Log($"Voice ID {voice.Id} copied to clipboard");
+ }
+ GUI.enabled = true;
+ }
+ EditorGUILayout.Space(EndWidth);
+ EditorGUILayout.EndHorizontal();
+ EditorGUI.indentLevel++;
+
if (!voiceLabels.TryGetValue(voice.Id, out var cachedLabels))
{
cachedLabels = new Dictionary();
diff --git a/Runtime/Authentication/ElevenLabsAuthentication.cs b/Runtime/Authentication/ElevenLabsAuthentication.cs
index 49b2188..d35203c 100644
--- a/Runtime/Authentication/ElevenLabsAuthentication.cs
+++ b/Runtime/Authentication/ElevenLabsAuthentication.cs
@@ -14,6 +14,7 @@ namespace ElevenLabs
public sealed class ElevenLabsAuthentication : AbstractAuthentication
{
internal const string CONFIG_FILE = ".elevenlabs";
+ private const string ELEVENLABS_API_KEY = nameof(ELEVENLABS_API_KEY);
private const string ELEVEN_LABS_API_KEY = nameof(ELEVEN_LABS_API_KEY);
///
@@ -85,6 +86,12 @@ public override ElevenLabsAuthentication LoadFromAsset(ElevenLabsConfiguration c
public override ElevenLabsAuthentication LoadFromEnvironment()
{
var apiKey = Environment.GetEnvironmentVariable(ELEVEN_LABS_API_KEY);
+
+ if (string.IsNullOrWhiteSpace(apiKey))
+ {
+ apiKey = Environment.GetEnvironmentVariable(ELEVENLABS_API_KEY);
+ }
+
return string.IsNullOrEmpty(apiKey) ? null : new ElevenLabsAuthentication(apiKey);
}
@@ -136,6 +143,7 @@ public override ElevenLabsAuthentication LoadFromDirectory(string directory = nu
apiKey = part switch
{
+ ELEVENLABS_API_KEY => nextPart.Trim(),
ELEVEN_LABS_API_KEY => nextPart.Trim(),
_ => apiKey
};
diff --git a/Runtime/Authentication/ElevenLabsSettingsInfo.cs b/Runtime/Authentication/ElevenLabsSettingsInfo.cs
index 9d76faa..4196bbb 100644
--- a/Runtime/Authentication/ElevenLabsSettingsInfo.cs
+++ b/Runtime/Authentication/ElevenLabsSettingsInfo.cs
@@ -7,6 +7,7 @@ namespace ElevenLabs
{
public sealed class ElevenLabsSettingsInfo : ISettingsInfo
{
+ internal const string Https = "https://";
internal const string ElevenLabsDomain = "api.elevenlabs.io";
internal const string DefaultApiVersion = "v1";
@@ -18,7 +19,7 @@ public ElevenLabsSettingsInfo()
Domain = ElevenLabsDomain;
ApiVersion = DefaultApiVersion;
BaseRequest = $"/{ApiVersion}/";
- BaseRequestUrlFormat = $"https://{Domain}{BaseRequest}{{0}}";
+ BaseRequestUrlFormat = $"{Https}{Domain}{BaseRequest}{{0}}";
}
///
@@ -33,8 +34,8 @@ public ElevenLabsSettingsInfo(string domain, string apiVersion = DefaultApiVersi
domain = ElevenLabsDomain;
}
- if (!domain.Contains(".") &&
- !domain.Contains(":"))
+ if (!domain.Contains('.') &&
+ !domain.Contains(':'))
{
throw new ArgumentException($"Invalid parameter \"{nameof(domain)}\".");
}
@@ -44,10 +45,10 @@ public ElevenLabsSettingsInfo(string domain, string apiVersion = DefaultApiVersi
apiVersion = DefaultApiVersion;
}
- Domain = domain;
+ Domain = domain.Contains("http") ? domain : $"{Https}{domain}";
ApiVersion = apiVersion;
BaseRequest = $"/{ApiVersion}/";
- BaseRequestUrlFormat = $"https://{Domain}{BaseRequest}{{0}}";
+ BaseRequestUrlFormat = $"{Domain}{BaseRequest}{{0}}";
}
public string Domain { get; }
diff --git a/Runtime/Common/GeneratedClip.cs b/Runtime/Common/GeneratedClip.cs
new file mode 100644
index 0000000..1965774
--- /dev/null
+++ b/Runtime/Common/GeneratedClip.cs
@@ -0,0 +1,59 @@
+// Licensed under the MIT License. See LICENSE in the project root for license information.
+
+using ElevenLabs.Extensions;
+using System;
+using UnityEngine;
+using UnityEngine.Scripting;
+
+namespace ElevenLabs
+{
+ [Preserve]
+ [Serializable]
+ public class GeneratedClip : ISerializationCallbackReceiver
+ {
+ [Preserve]
+ internal GeneratedClip(string id, string text, AudioClip audioClip, string cachedPath)
+ {
+ this.id = id;
+ this.text = text;
+ TextHash = $"{id}{text}".GenerateGuid();
+ textHash = TextHash.ToString();
+ this.audioClip = audioClip;
+ this.cachedPath = cachedPath;
+ }
+
+ [SerializeField]
+ private string id;
+
+ [Preserve]
+ public string Id => id;
+
+ [SerializeField]
+ private string text;
+
+ [Preserve]
+ public string Text => text;
+
+ [SerializeField]
+ private string textHash;
+
+ [Preserve]
+ public Guid TextHash { get; private set; }
+
+ [SerializeField]
+ private AudioClip audioClip;
+
+ [Preserve]
+ public AudioClip AudioClip => audioClip;
+
+ [SerializeField]
+ private string cachedPath;
+
+ [Preserve]
+ public string CachedPath => cachedPath;
+
+ public void OnBeforeSerialize() => textHash = TextHash.ToString();
+
+ public void OnAfterDeserialize() => TextHash = Guid.Parse(textHash);
+ }
+}
diff --git a/Runtime/Common/GeneratedClip.cs.meta b/Runtime/Common/GeneratedClip.cs.meta
new file mode 100644
index 0000000..8a845d0
--- /dev/null
+++ b/Runtime/Common/GeneratedClip.cs.meta
@@ -0,0 +1,11 @@
+fileFormatVersion: 2
+guid: 4239acd50bc44444591f287fc7d32f6a
+MonoImporter:
+ externalObjects: {}
+ serializedVersion: 2
+ defaultReferences: []
+ executionOrder: 0
+ icon: {fileID: 2800000, guid: 5b71cbcaf078a8e44a5c96dcd24376d5, type: 3}
+ userData:
+ assetBundleName:
+ assetBundleVariant:
diff --git a/Runtime/Common/VoiceClip.cs b/Runtime/Common/VoiceClip.cs
index 1c51f17..24803d8 100644
--- a/Runtime/Common/VoiceClip.cs
+++ b/Runtime/Common/VoiceClip.cs
@@ -1,8 +1,7 @@
// Licensed under the MIT License. See LICENSE in the project root for license information.
-using System;
-using ElevenLabs.Extensions;
using ElevenLabs.Voices;
+using System;
using UnityEngine;
using UnityEngine.Scripting;
@@ -10,58 +9,19 @@ namespace ElevenLabs
{
[Preserve]
[Serializable]
- public sealed class VoiceClip : ISerializationCallbackReceiver
+ public sealed class VoiceClip : GeneratedClip
{
[Preserve]
internal VoiceClip(string id, string text, Voice voice, AudioClip audioClip, string cachedPath)
+ : base(id, text, audioClip, cachedPath)
{
- this.id = id;
- this.text = text;
this.voice = voice;
- TextHash = $"{id}{text}".GenerateGuid();
- textHash = TextHash.ToString();
- this.audioClip = audioClip;
- this.cachedPath = cachedPath;
}
- [SerializeField]
- private string id;
-
- [Preserve]
- public string Id => id;
-
- [SerializeField]
- private string text;
-
- [Preserve]
- public string Text => text;
-
[SerializeField]
private Voice voice;
[Preserve]
public Voice Voice => voice;
-
- [SerializeField]
- private string textHash;
-
- [Preserve]
- public Guid TextHash { get; private set; }
-
- [SerializeField]
- private AudioClip audioClip;
-
- [Preserve]
- public AudioClip AudioClip => audioClip;
-
- [SerializeField]
- private string cachedPath;
-
- [Preserve]
- public string CachedPath => cachedPath;
-
- public void OnBeforeSerialize() => textHash = TextHash.ToString();
-
- public void OnAfterDeserialize() => TextHash = Guid.Parse(textHash);
}
}
diff --git a/Runtime/Dubbing.meta b/Runtime/Dubbing.meta
new file mode 100644
index 0000000..d75401b
--- /dev/null
+++ b/Runtime/Dubbing.meta
@@ -0,0 +1,8 @@
+fileFormatVersion: 2
+guid: 37e6aa53ad2e8da47b21e218b5617222
+folderAsset: yes
+DefaultImporter:
+ externalObjects: {}
+ userData:
+ assetBundleName:
+ assetBundleVariant:
diff --git a/Runtime/Dubbing/DubbingEndpoint.cs b/Runtime/Dubbing/DubbingEndpoint.cs
new file mode 100644
index 0000000..ff0010b
--- /dev/null
+++ b/Runtime/Dubbing/DubbingEndpoint.cs
@@ -0,0 +1,229 @@
+// Licensed under the MIT License. See LICENSE in the project root for license information.
+
+using ElevenLabs.Extensions;
+using Newtonsoft.Json;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Globalization;
+using System.IO;
+using System.Linq;
+using System.Threading;
+using System.Threading.Tasks;
+using UnityEngine;
+using UnityEngine.Networking;
+using Utilities.WebRequestRest;
+using Debug = UnityEngine.Debug;
+
+namespace ElevenLabs.Dubbing
+{
+ public class DubbingEndpoint : ElevenLabsBaseEndPoint
+ {
+ public DubbingEndpoint(ElevenLabsClient client) : base(client) { }
+
+ protected override string Root => "dubbing";
+
+ ///
+ /// Dubs provided audio or video file into given language.
+ ///
+ /// The containing dubbing configuration and files.
+ /// progress callback.
+ /// Optional, .
+ /// Optional, number of retry attempts when polling.
+ /// Optional, between making requests.
+ /// .
+ public async Task DubAsync(DubbingRequest request, int? maxRetries = null, TimeSpan? pollingInterval = null, IProgress progress = null, CancellationToken cancellationToken = default)
+ {
+ if (request == null)
+ {
+ throw new ArgumentNullException(nameof(request));
+ }
+
+ var payload = new WWWForm();
+
+ try
+ {
+ if (request.Files != null)
+ {
+ foreach (var (fileName, mediaType, stream) in request.Files)
+ {
+ using var audioData = new MemoryStream();
+ await stream.CopyToAsync(audioData, cancellationToken);
+ payload.AddBinaryData("file", audioData.ToArray(), fileName, mediaType);
+ }
+ }
+
+ if (!string.IsNullOrEmpty(request.ProjectName))
+ {
+ payload.AddField("name", request.ProjectName);
+ }
+
+ if (request.SourceUrl != null)
+ {
+ payload.AddField("source_url", request.SourceUrl.ToString());
+ }
+
+ if (!string.IsNullOrEmpty(request.SourceLanguage))
+ {
+ payload.AddField("source_lang", request.SourceLanguage);
+ }
+
+ if (!string.IsNullOrEmpty(request.TargetLanguage))
+ {
+ payload.AddField("target_lang", request.TargetLanguage);
+ }
+
+ if (request.NumberOfSpeakers.HasValue)
+ {
+ payload.AddField("num_speakers", request.NumberOfSpeakers.Value.ToString(CultureInfo.InvariantCulture));
+ }
+
+ if (request.Watermark.HasValue)
+ {
+ payload.AddField("watermark", request.Watermark.Value.ToString());
+ }
+
+ if (request.StartTime.HasValue)
+ {
+ payload.AddField("start_time", request.StartTime.Value.ToString(CultureInfo.InvariantCulture));
+ }
+
+ if (request.EndTime.HasValue)
+ {
+ payload.AddField("end_time", request.EndTime.Value.ToString(CultureInfo.InvariantCulture));
+ }
+
+ if (request.HighestResolution.HasValue)
+ {
+ payload.AddField("highest_resolution", request.HighestResolution.Value.ToString());
+ }
+ }
+ finally
+ {
+ request.Dispose();
+ }
+
+ var response = await Rest.PostAsync(GetUrl(), payload, new RestParameters(client.DefaultRequestHeaders), cancellationToken);
+ response.Validate(EnableDebug);
+ var dubResponse = JsonConvert.DeserializeObject(response.Body, ElevenLabsClient.JsonSerializationOptions);
+ return await WaitForDubbingCompletionAsync(dubResponse, maxRetries ?? 60, pollingInterval ?? TimeSpan.FromSeconds(dubResponse.ExpectedDuration), pollingInterval == null, progress, cancellationToken);
+ }
+
+ private async Task WaitForDubbingCompletionAsync(DubbingResponse dubbingResponse, int maxRetries, TimeSpan pollingInterval, bool adjustInterval, IProgress progress = null, CancellationToken cancellationToken = default)
+ {
+ var stopwatch = Stopwatch.StartNew();
+
+ for (var i = 1; i < maxRetries + 1; i++)
+ {
+ var metadata = await GetDubbingProjectMetadataAsync(dubbingResponse, cancellationToken).ConfigureAwait(false);
+ metadata.ExpectedDurationSeconds = dubbingResponse.ExpectedDuration;
+
+ if (metadata.Status.Equals("dubbed", StringComparison.Ordinal))
+ {
+ stopwatch.Stop();
+ metadata.TimeCompleted = stopwatch.Elapsed;
+ progress?.Report(metadata);
+ return metadata;
+ }
+
+ progress?.Report(metadata);
+
+ if (metadata.Status.Equals("dubbing", StringComparison.Ordinal))
+ {
+ if (adjustInterval && pollingInterval.TotalSeconds > 0.5f)
+ {
+ pollingInterval = TimeSpan.FromSeconds(dubbingResponse.ExpectedDuration / Math.Pow(2, i));
+ }
+
+ if (EnableDebug)
+ {
+ Debug.Log($"Dubbing for {dubbingResponse.DubbingId} in progress... Will check status again in {pollingInterval.TotalSeconds} seconds.");
+ }
+
+ await Task.Delay(pollingInterval, cancellationToken).ConfigureAwait(false);
+ }
+ else
+ {
+ throw new Exception($"Dubbing for {dubbingResponse.DubbingId} failed: {metadata.Error}");
+ }
+ }
+
+ throw new TimeoutException($"Dubbing for {dubbingResponse.DubbingId} timed out or exceeded expected duration.");
+ }
+
+ ///
+ /// Returns metadata about a dubbing project, including whether it’s still in progress or not.
+ ///
+ /// Dubbing project id.
+ /// Optional, .
+ /// .
+ public async Task GetDubbingProjectMetadataAsync(string dubbingId, CancellationToken cancellationToken = default)
+ {
+ var response = await Rest.GetAsync(GetUrl($"/{dubbingId}"), new RestParameters(client.DefaultRequestHeaders), cancellationToken);
+ response.Validate(EnableDebug);
+ return JsonConvert.DeserializeObject(response.Body, ElevenLabsClient.JsonSerializationOptions);
+ }
+
+ ///
+ /// Returns transcript for the dub in the specified format (SRT or WebVTT).
+ ///
+ /// Dubbing project id.
+ /// The language code of the transcript.
+ /// Optional. The format type of the transcript file, either or .
+ /// Optional, .
+ ///
+ /// A string containing the transcript content in the specified format.
+ ///
+ public async Task GetTranscriptForDubAsync(string dubbingId, string languageCode, DubbingFormat formatType = DubbingFormat.Srt, CancellationToken cancellationToken = default)
+ {
+ var @params = new Dictionary { { "format_type", formatType.ToString().ToLower() } };
+ var response = await Rest.GetAsync(GetUrl($"/{dubbingId}/transcript/{languageCode}", @params), new RestParameters(client.DefaultRequestHeaders), cancellationToken);
+ response.Validate(EnableDebug);
+ return response.Body;
+ }
+
+ ///
+ /// Returns dubbed file as an .
+ ///
+ /// Dubbing project id.
+ /// The language code of the transcript.
+ /// Optional, .
+ /// Path to downloaded file.
+ public async Task GetDubbedFileAsync(string dubbingId, string languageCode, CancellationToken cancellationToken = default)
+ {
+ var result = await Rest.GetAsync(GetUrl($"/{dubbingId}/audio/{languageCode}"), parameters: new RestParameters(client.DefaultRequestHeaders), cancellationToken: cancellationToken);
+ result.Validate(EnableDebug);
+ var cacheDir = await GetCacheDirectoryAsync();
+ var mimeType = result.Headers["Content-Type"];
+ var extension = mimeType switch
+ {
+ "video/mp4" => ".mp4",
+ "audio/mpeg" => ".mp3",
+ _ => throw new NotSupportedException($"Unsupported mime type: {mimeType}")
+ };
+ var fileName = $"{dubbingId}_{languageCode}{extension}";
+ var filePath = Path.Combine(cacheDir, fileName);
+ await File.WriteAllBytesAsync(filePath, result.Data, cancellationToken).ConfigureAwait(true);
+ return filePath;
+ }
+
+ ///
+ /// Deletes a dubbing project.
+ ///
+ /// Dubbing project id.
+ /// Optional, .
+ public async Task DeleteDubbingProjectAsync(string dubbingId, CancellationToken cancellationToken = default)
+ {
+ var response = await Rest.DeleteAsync(GetUrl($"/{dubbingId}"), new RestParameters(client.DefaultRequestHeaders), cancellationToken);
+ response.Validate(EnableDebug);
+ }
+
+ private static async Task GetCacheDirectoryAsync()
+ {
+ await Rest.ValidateCacheDirectoryAsync();
+ return Rest.DownloadCacheDirectory
+ .CreateNewDirectory(nameof(ElevenLabs))
+ .CreateNewDirectory(nameof(Dubbing));
+ }
+ }
+}
diff --git a/Runtime/Dubbing/DubbingEndpoint.cs.meta b/Runtime/Dubbing/DubbingEndpoint.cs.meta
new file mode 100644
index 0000000..7d9b457
--- /dev/null
+++ b/Runtime/Dubbing/DubbingEndpoint.cs.meta
@@ -0,0 +1,11 @@
+fileFormatVersion: 2
+guid: 3fcb78227a5727e448abfe3aca0738ae
+MonoImporter:
+ externalObjects: {}
+ serializedVersion: 2
+ defaultReferences: []
+ executionOrder: 0
+ icon: {fileID: 2800000, guid: 5b71cbcaf078a8e44a5c96dcd24376d5, type: 3}
+ userData:
+ assetBundleName:
+ assetBundleVariant:
diff --git a/Runtime/Dubbing/DubbingFormat.cs b/Runtime/Dubbing/DubbingFormat.cs
new file mode 100644
index 0000000..5388b28
--- /dev/null
+++ b/Runtime/Dubbing/DubbingFormat.cs
@@ -0,0 +1,14 @@
+// Licensed under the MIT License. See LICENSE in the project root for license information.
+
+using System.Runtime.Serialization;
+
+namespace ElevenLabs.Dubbing
+{
+ public enum DubbingFormat
+ {
+ [EnumMember(Value = "srt")]
+ Srt,
+ [EnumMember(Value = "webvtt")]
+ WebVtt
+ }
+}
diff --git a/Runtime/Dubbing/DubbingFormat.cs.meta b/Runtime/Dubbing/DubbingFormat.cs.meta
new file mode 100644
index 0000000..7f0d7b9
--- /dev/null
+++ b/Runtime/Dubbing/DubbingFormat.cs.meta
@@ -0,0 +1,11 @@
+fileFormatVersion: 2
+guid: ccb3faac50374b44e9a38e973387e7ea
+MonoImporter:
+ externalObjects: {}
+ serializedVersion: 2
+ defaultReferences: []
+ executionOrder: 0
+ icon: {fileID: 2800000, guid: 5b71cbcaf078a8e44a5c96dcd24376d5, type: 3}
+ userData:
+ assetBundleName:
+ assetBundleVariant:
diff --git a/Runtime/Dubbing/DubbingProjectMetadata.cs b/Runtime/Dubbing/DubbingProjectMetadata.cs
new file mode 100644
index 0000000..502f368
--- /dev/null
+++ b/Runtime/Dubbing/DubbingProjectMetadata.cs
@@ -0,0 +1,55 @@
+// Licensed under the MIT License. See LICENSE in the project root for license information.
+
+using Newtonsoft.Json;
+using System;
+using System.Collections.Generic;
+using UnityEngine.Scripting;
+
+namespace ElevenLabs.Dubbing
+{
+ [Preserve]
+ public sealed class DubbingProjectMetadata
+ {
+ [Preserve]
+ [JsonConstructor]
+ internal DubbingProjectMetadata(
+ [JsonProperty("dubbing_id")] string dubbingId,
+ [JsonProperty("name")] string name,
+ [JsonProperty("status")] string status,
+ [JsonProperty("target_languages")] IReadOnlyList targetLanguages,
+ [JsonProperty("error")] string error)
+ {
+ DubbingId = dubbingId;
+ Name = name;
+ Status = status;
+ TargetLanguages = targetLanguages;
+ Error = error;
+ }
+
+ [Preserve]
+ [JsonProperty("dubbing_id")]
+ public string DubbingId { get; }
+
+ [Preserve]
+ [JsonProperty("name")]
+ public string Name { get; }
+
+ [Preserve]
+ [JsonProperty("status")]
+ public string Status { get; }
+
+ [Preserve]
+ [JsonProperty("target_languages")]
+ public IReadOnlyList TargetLanguages { get; }
+
+ [Preserve]
+ [JsonProperty("error")]
+ public string Error { get; }
+
+ [JsonIgnore]
+ public float ExpectedDurationSeconds { get; internal set; }
+
+ [JsonIgnore]
+ public TimeSpan TimeCompleted { get; internal set; }
+ }
+}
diff --git a/Runtime/Dubbing/DubbingProjectMetadata.cs.meta b/Runtime/Dubbing/DubbingProjectMetadata.cs.meta
new file mode 100644
index 0000000..9dad58b
--- /dev/null
+++ b/Runtime/Dubbing/DubbingProjectMetadata.cs.meta
@@ -0,0 +1,11 @@
+fileFormatVersion: 2
+guid: e3e3a509892a32c49aed125bda39b6a4
+MonoImporter:
+ externalObjects: {}
+ serializedVersion: 2
+ defaultReferences: []
+ executionOrder: 0
+ icon: {fileID: 2800000, guid: 5b71cbcaf078a8e44a5c96dcd24376d5, type: 3}
+ userData:
+ assetBundleName:
+ assetBundleVariant:
diff --git a/Runtime/Dubbing/DubbingRequest.cs b/Runtime/Dubbing/DubbingRequest.cs
new file mode 100644
index 0000000..21e7f6c
--- /dev/null
+++ b/Runtime/Dubbing/DubbingRequest.cs
@@ -0,0 +1,263 @@
+// Licensed under the MIT License. See LICENSE in the project root for license information.
+
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using UnityEngine;
+using Utilities.Encoding.Wav;
+
+namespace ElevenLabs.Dubbing
+{
+ public sealed class DubbingRequest : IDisposable
+ {
+ public DubbingRequest(
+ string filePath,
+ string targetLanguage,
+ string sourceLanguage = null,
+ int? numberOfSpeakers = null,
+ bool? watermark = null,
+ int? startTime = null,
+ int? endTime = null,
+ bool? highestResolution = null,
+ bool? dropBackgroundAudio = null,
+ string projectName = null)
+ : this(new[] { filePath }, targetLanguage, sourceLanguage, numberOfSpeakers, watermark, startTime, endTime, highestResolution, dropBackgroundAudio, projectName)
+ {
+ }
+
+ public DubbingRequest(
+ IEnumerable filePaths,
+ string targetLanguage,
+ string sourceLanguage = null,
+ int? numberOfSpeakers = null,
+ bool? watermark = null,
+ int? startTime = null,
+ int? endTime = null,
+ bool? highestResolution = null,
+ bool? dropBackgroundAudio = null,
+ string projectName = null)
+ : this(targetLanguage, null, filePaths, sourceLanguage, numberOfSpeakers, watermark, startTime, endTime, highestResolution, dropBackgroundAudio, projectName)
+ {
+ }
+
+ public DubbingRequest(
+ Uri sourceUrl,
+ string targetLanguage,
+ string sourceLanguage = null,
+ int? numberOfSpeakers = null,
+ bool? watermark = null,
+ int? startTime = null,
+ int? endTime = null,
+ bool? highestResolution = null,
+ bool? dropBackgroundAudio = null,
+ string projectName = null)
+ : this(targetLanguage, sourceUrl, null, sourceLanguage, numberOfSpeakers, watermark, startTime, endTime, highestResolution, dropBackgroundAudio, projectName)
+ {
+ }
+
+ public DubbingRequest(AudioClip audioClip, string targetLanguage, string sourceLanguage = null, int? numberOfSpeakers = null, bool? watermark = null, int? startTime = null, int? endTime = null, bool? highestResolution = null, bool? dropBackgroundAudio = null, string projectName = null)
+ : this(new[] { audioClip }, targetLanguage, sourceLanguage, numberOfSpeakers, watermark, startTime, endTime, highestResolution, dropBackgroundAudio, projectName)
+ {
+ }
+
+ public DubbingRequest(IEnumerable audioClips, string targetLanguage, string sourceLanguage = null, int? numberOfSpeakers = null, bool? watermark = null, int? startTime = null, int? endTime = null, bool? highestResolution = null, bool? dropBackgroundAudio = null, string projectName = null)
+ {
+ if (audioClips == null)
+ {
+ throw new MissingReferenceException(nameof(audioClips));
+ }
+
+ var clips = audioClips.ToList();
+
+ if (clips.Count == 0)
+ {
+ throw new ArgumentException("At least one audio clip must be provided.");
+ }
+
+ if (string.IsNullOrWhiteSpace(targetLanguage))
+ {
+ throw new ArgumentException("Target language must be provided.");
+ }
+
+ TargetLanguage = targetLanguage;
+ SourceLanguage = sourceLanguage;
+ NumberOfSpeakers = numberOfSpeakers;
+ Watermark = watermark;
+ StartTime = startTime;
+ EndTime = endTime;
+ HighestResolution = highestResolution;
+ DropBackgroundAudio = dropBackgroundAudio;
+ ProjectName = projectName;
+ var files = new List<(string, string, Stream)>(clips.Count);
+ files.AddRange((from audioClip in clips let stream = new MemoryStream(audioClip.EncodeToWav()) select (audioClip.name, "audio/wav", stream)).Select(value => ((string, string, Stream))value));
+ Files = files;
+ }
+
+ private DubbingRequest(
+ string targetLanguage,
+ Uri sourceUrl = null,
+ IEnumerable filePaths = null,
+ string sourceLanguage = null,
+ int? numberOfSpeakers = null,
+ bool? watermark = null,
+ int? startTime = null,
+ int? endTime = null,
+ bool? highestResolution = null,
+ bool? dropBackgroundAudio = null,
+ string projectName = null)
+ {
+ if (string.IsNullOrWhiteSpace(targetLanguage))
+ {
+ throw new ArgumentException("Target language must be provided.");
+ }
+
+ TargetLanguage = targetLanguage;
+
+ if (filePaths == null && sourceUrl == null)
+ {
+ throw new ArgumentException("Either sourceUrl or filePaths must be provided.");
+ }
+
+ var files = new List<(string, string, Stream)>();
+
+ if (filePaths != null)
+ {
+ foreach (var filePath in filePaths)
+ {
+ if (string.IsNullOrWhiteSpace(filePath))
+ {
+ throw new ArgumentException("File path cannot be empty.");
+ }
+
+ var fileInfo = new FileInfo(filePath);
+
+ if (!fileInfo.Exists)
+ {
+ throw new FileNotFoundException($"File not found: {filePath}");
+ }
+
+ var stream = fileInfo.OpenRead();
+ var extension = fileInfo.Extension.ToLowerInvariant();
+ var mediaType = extension switch
+ {
+ ".3gp" => "video/3gpp",
+ ".acc" => "audio/aac",
+ ".avi" => "video/x-msvideo",
+ ".flac" => "audio/flac",
+ ".ogg" => "audio/ogg",
+ ".mov" => "video/quicktime",
+ ".mp3" => "audio/mp3",
+ ".mp4" => "video/mp4",
+ ".raw" => "audio/raw",
+ ".wav" => "audio/wav",
+ ".webm" => "video/webm",
+ _ => "application/octet-stream"
+ };
+ files.Add((fileInfo.Name, mediaType, stream));
+ }
+ }
+
+ Files = files;
+ SourceUrl = sourceUrl;
+ SourceLanguage = sourceLanguage;
+ NumberOfSpeakers = numberOfSpeakers;
+ Watermark = watermark;
+ StartTime = startTime;
+ EndTime = endTime;
+ HighestResolution = highestResolution;
+ DropBackgroundAudio = dropBackgroundAudio;
+ ProjectName = projectName;
+ }
+
+ ~DubbingRequest() => Dispose(false);
+
+ ///
+ /// Files to dub.
+ ///
+ public IReadOnlyList<(string, string, Stream)> Files { get; }
+
+ ///
+ /// URL of the source video/audio file.
+ ///
+ public Uri SourceUrl { get; }
+
+ ///
+ /// Source language.
+ ///
+ ///
+ /// A list of supported languages can be found at: https://elevenlabs.io/docs/api-reference/how-to-dub-a-video#list-of-supported-languages-for-dubbing
+ ///
+ public string SourceLanguage { get; }
+
+ ///
+ /// The Target language to dub the content into. Can be none if dubbing studio editor is enabled and running manual mode
+ ///
+ ///
+ /// A list of supported languages can be found at: https://elevenlabs.io/docs/api-reference/how-to-dub-a-video#list-of-supported-languages-for-dubbing
+ ///
+ public string TargetLanguage { get; }
+
+ ///
+ /// Number of speakers to use for the dubbing. Set to 0 to automatically detect the number of speakers
+ ///
+ public int? NumberOfSpeakers { get; }
+
+ ///
+ /// Whether to apply watermark to the output video.
+ ///
+ public bool? Watermark { get; }
+
+ ///
+ /// Start time of the source video/audio file.
+ ///
+ public int? StartTime { get; }
+
+ ///
+ /// End time of the source video/audio file.
+ ///
+ public int? EndTime { get; }
+
+ ///
+ /// Whether to use the highest resolution available.
+ ///
+ public bool? HighestResolution { get; }
+
+ ///
+ /// An advanced setting. Whether to drop background audio from the final dub.
+ /// This can improve dub quality where it's known that audio shouldn't have a background track such as for speeches or monologues.
+ ///
+ public bool? DropBackgroundAudio { get; }
+
+ ///
+ /// Name of the dubbing project.
+ ///
+ public string ProjectName { get; }
+
+ private void Dispose(bool disposing)
+ {
+ if (disposing)
+ {
+ if (Files == null) { return; }
+ foreach (var (_, _, stream) in Files)
+ {
+ try
+ {
+ stream?.Close();
+ stream?.Dispose();
+ }
+ catch (Exception e)
+ {
+ Debug.Log(e);
+ }
+ }
+ }
+ }
+
+ public void Dispose()
+ {
+ Dispose(true);
+ GC.SuppressFinalize(this);
+ }
+ }
+}
diff --git a/Runtime/Dubbing/DubbingRequest.cs.meta b/Runtime/Dubbing/DubbingRequest.cs.meta
new file mode 100644
index 0000000..9279cb4
--- /dev/null
+++ b/Runtime/Dubbing/DubbingRequest.cs.meta
@@ -0,0 +1,11 @@
+fileFormatVersion: 2
+guid: 9e2db7e411dc2c546b4ef4d884126533
+MonoImporter:
+ externalObjects: {}
+ serializedVersion: 2
+ defaultReferences: []
+ executionOrder: 0
+ icon: {fileID: 2800000, guid: 5b71cbcaf078a8e44a5c96dcd24376d5, type: 3}
+ userData:
+ assetBundleName:
+ assetBundleVariant:
diff --git a/Runtime/Dubbing/DubbingResponse.cs b/Runtime/Dubbing/DubbingResponse.cs
new file mode 100644
index 0000000..cf7109c
--- /dev/null
+++ b/Runtime/Dubbing/DubbingResponse.cs
@@ -0,0 +1,32 @@
+// Licensed under the MIT License. See LICENSE in the project root for license information.
+
+using Newtonsoft.Json;
+using UnityEngine.Scripting;
+
+namespace ElevenLabs.Dubbing
+{
+ [Preserve]
+ public sealed class DubbingResponse
+ {
+ [Preserve]
+ [JsonConstructor]
+ internal DubbingResponse(
+ [JsonProperty("dubbing_id")] string dubbingId,
+ [JsonProperty("expected_duration_sec")] float expectedDuration)
+ {
+ DubbingId = dubbingId;
+ ExpectedDuration = expectedDuration;
+ }
+
+ [Preserve]
+ [JsonProperty("dubbing_id")]
+ public string DubbingId { get; }
+
+ [Preserve]
+ [JsonProperty("expected_duration_sec")]
+ public float ExpectedDuration { get; }
+
+ [Preserve]
+ public static implicit operator string(DubbingResponse response) => response?.DubbingId;
+ }
+}
diff --git a/Runtime/Dubbing/DubbingResponse.cs.meta b/Runtime/Dubbing/DubbingResponse.cs.meta
new file mode 100644
index 0000000..d66306a
--- /dev/null
+++ b/Runtime/Dubbing/DubbingResponse.cs.meta
@@ -0,0 +1,11 @@
+fileFormatVersion: 2
+guid: 8b4738bd8a977f1499eecdd9bdae3182
+MonoImporter:
+ externalObjects: {}
+ serializedVersion: 2
+ defaultReferences: []
+ executionOrder: 0
+ icon: {fileID: 2800000, guid: 5b71cbcaf078a8e44a5c96dcd24376d5, type: 3}
+ userData:
+ assetBundleName:
+ assetBundleVariant:
diff --git a/Runtime/ElevenLabs.asmdef b/Runtime/ElevenLabs.asmdef
index 0c9de0e..ffab41f 100644
--- a/Runtime/ElevenLabs.asmdef
+++ b/Runtime/ElevenLabs.asmdef
@@ -6,7 +6,8 @@
"GUID:a6609af893242c7438d701ddd4cce46a",
"GUID:7958db66189566541a6363568aee1575",
"GUID:d25c28436b1dcc9408d86f49a0f5210b",
- "GUID:fe98ce187c2363b409d00954d687ec68"
+ "GUID:fe98ce187c2363b409d00954d687ec68",
+ "GUID:f7a0d77b5e1d79742a738fb859ee2f28"
],
"includePlatforms": [],
"excludePlatforms": [],
diff --git a/Runtime/ElevenLabsClient.cs b/Runtime/ElevenLabsClient.cs
index c574ebf..eb7d7ad 100644
--- a/Runtime/ElevenLabsClient.cs
+++ b/Runtime/ElevenLabsClient.cs
@@ -1,7 +1,9 @@
// Licensed under the MIT License. See LICENSE in the project root for license information.
+using ElevenLabs.Dubbing;
using ElevenLabs.History;
using ElevenLabs.Models;
+using ElevenLabs.SoundGeneration;
using ElevenLabs.TextToSpeech;
using ElevenLabs.User;
using ElevenLabs.VoiceGeneration;
@@ -40,6 +42,9 @@ public ElevenLabsClient(ElevenLabsAuthentication authentication = null, ElevenLa
HistoryEndpoint = new HistoryEndpoint(this);
TextToSpeechEndpoint = new TextToSpeechEndpoint(this);
VoiceGenerationEndpoint = new VoiceGenerationEndpoint(this);
+ SharedVoicesEndpoint = new SharedVoicesEndpoint(this);
+ DubbingEndpoint = new DubbingEndpoint(this);
+ SoundGenerationEndpoint = new SoundGenerationEndpoint(this);
}
protected override void SetupDefaultRequestHeaders()
@@ -88,6 +93,12 @@ protected override void ValidateAuthentication()
public TextToSpeechEndpoint TextToSpeechEndpoint { get; }
+ public SharedVoicesEndpoint SharedVoicesEndpoint { get; }
+
public VoiceGenerationEndpoint VoiceGenerationEndpoint { get; }
+
+ public DubbingEndpoint DubbingEndpoint { get; }
+
+ public SoundGenerationEndpoint SoundGenerationEndpoint { get; }
}
}
diff --git a/Runtime/Models/Model.cs b/Runtime/Models/Model.cs
index 6b4423e..b45a6f5 100644
--- a/Runtime/Models/Model.cs
+++ b/Runtime/Models/Model.cs
@@ -71,7 +71,7 @@ public Model(
public IReadOnlyList Languages { get; }
[Preserve]
- public static implicit operator string(Model model) => model.ToString();
+ public static implicit operator string(Model model) => model?.ToString();
[Preserve]
public override string ToString() => Id;
@@ -84,40 +84,56 @@ public Model(
public static Model MonoLingualV1 => EnglishV1;
///
- /// Use our standard English language model to generate speech in a variety of voices, styles and moods.
+ /// Our first ever text to speech model. Now outclassed by Multilingual v2 (for content creation) and Turbo v2.5 (for low latency use cases).
///
[Preserve]
[JsonIgnore]
public static Model EnglishV1 { get; } = new("eleven_monolingual_v1");
///
- /// Speech to speech model suitable for scenarios where you need maximum control over the content and prosody of your generations.
+ /// Our English-only, low latency model. Best for developer use cases where speed matters and you only need English. Performance is on par with Turbo v2.5.
///
[Preserve]
[JsonIgnore]
- public static Model EnglishV2 { get; } = new("eleven_english_sts_v2");
+ public static Model EnglishTurboV2 { get; } = new("eleven_turbo_v2");
///
- /// Cutting-edge turbo model is ideally suited for tasks demanding extremely low latency.
+ /// Our high quality, low latency model in 32 languages. Best for developer use cases where speed matters and you need non-English languages.
///
[Preserve]
[JsonIgnore]
- public static Model EnglishTurboV2 { get; } = new("eleven_turbo_v2");
+ public static Model TurboV2_5 { get; } = new("eleven_turbo_v2_5");
///
- /// Generate lifelike speech in multiple languages and create content that resonates with a broader audience.
+ /// Our first Multilingual model, capability of generating speech in 10 languages.
+ /// Now outclassed by Multilingual v2 (for content creation) and Turbo v2.5 (for low latency use cases).
///
[Preserve]
[JsonIgnore]
public static Model MultiLingualV1 { get; } = new("eleven_multilingual_v1");
///
- /// State of the art multilingual speech synthesis model, able to generate life-like speech in 29 languages.
+ /// Our most life-like, emotionally rich mode in 29 languages. Best for voice overs, audiobooks, post-production, or any other content creation needs.
///
[Preserve]
[JsonIgnore]
public static Model MultiLingualV2 { get; } = new("eleven_multilingual_v2");
+ ///
+ /// Our state-of-the-art speech to speech model suitable for scenarios where you need maximum control over the content and prosody of your generations.
+ ///
+ [Preserve]
+ [JsonIgnore]
+ public static Model EnglishSpeechToSpeechV2 { get; } = new("eleven_english_sts_v2");
+
+ ///
+ /// Our cutting-edge, multilingual speech-to-speech model is designed for situations that demand unparalleled control over both
+ /// the content and the prosody of the generated speech across various languages.
+ ///
+ [Preserve]
+ [JsonIgnore]
+ public static Model MultiLingualSpeechToSpeechV2 { get; } = new("eleven_multilingual_sts_v2");
+
#endregion Predefined Models
}
}
diff --git a/Runtime/SoundGeneration.meta b/Runtime/SoundGeneration.meta
new file mode 100644
index 0000000..2a7ef9a
--- /dev/null
+++ b/Runtime/SoundGeneration.meta
@@ -0,0 +1,8 @@
+fileFormatVersion: 2
+guid: ee54f9378f264ac4b8b65a576da166ba
+folderAsset: yes
+DefaultImporter:
+ externalObjects: {}
+ userData:
+ assetBundleName:
+ assetBundleVariant:
diff --git a/Runtime/SoundGeneration/SoundGenerationEndpoint.cs b/Runtime/SoundGeneration/SoundGenerationEndpoint.cs
new file mode 100644
index 0000000..ce244e0
--- /dev/null
+++ b/Runtime/SoundGeneration/SoundGenerationEndpoint.cs
@@ -0,0 +1,35 @@
+// Licensed under the MIT License. See LICENSE in the project root for license information.
+
+using Newtonsoft.Json;
+using System;
+using System.Threading;
+using System.Threading.Tasks;
+using UnityEngine;
+using UnityEngine.Networking;
+using Utilities.WebRequestRest;
+
+namespace ElevenLabs.SoundGeneration
+{
+ public class SoundGenerationEndpoint : ElevenLabsBaseEndPoint
+ {
+ public SoundGenerationEndpoint(ElevenLabsClient client) : base(client) { }
+
+ protected override string Root => "sound-generation";
+
+ ///
+ /// converts text into sounds & uses the most advanced AI audio model ever.
+ /// Create sound effects for your videos, voice-overs or video games.
+ ///
+ /// .
+ /// Optional, .
+ /// .
+ public async Task GenerateSoundAsync(SoundGenerationRequest request, CancellationToken cancellationToken = default)
+ {
+ var payload = JsonConvert.SerializeObject(request, ElevenLabsClient.JsonSerializationOptions);
+ var clipId = Guid.NewGuid().ToString();
+ var audioClip = await Rest.DownloadAudioClipAsync(GetUrl(), AudioType.MPEG, UnityWebRequest.kHttpVerbPOST, clipId, payload, parameters: new RestParameters(client.DefaultRequestHeaders), cancellationToken: cancellationToken);
+ Rest.TryGetDownloadCacheItem(clipId, out var cachedPath);
+ return new GeneratedClip(clipId, request.Text, audioClip, cachedPath);
+ }
+ }
+}
diff --git a/Runtime/SoundGeneration/SoundGenerationEndpoint.cs.meta b/Runtime/SoundGeneration/SoundGenerationEndpoint.cs.meta
new file mode 100644
index 0000000..df10a22
--- /dev/null
+++ b/Runtime/SoundGeneration/SoundGenerationEndpoint.cs.meta
@@ -0,0 +1,11 @@
+fileFormatVersion: 2
+guid: cebdd8018f416e0409bceefa4a68ff61
+MonoImporter:
+ externalObjects: {}
+ serializedVersion: 2
+ defaultReferences: []
+ executionOrder: 0
+ icon: {fileID: 2800000, guid: 5b71cbcaf078a8e44a5c96dcd24376d5, type: 3}
+ userData:
+ assetBundleName:
+ assetBundleVariant:
diff --git a/Runtime/SoundGeneration/SoundGenerationRequest.cs b/Runtime/SoundGeneration/SoundGenerationRequest.cs
new file mode 100644
index 0000000..79a58d1
--- /dev/null
+++ b/Runtime/SoundGeneration/SoundGenerationRequest.cs
@@ -0,0 +1,75 @@
+// Licensed under the MIT License. See LICENSE in the project root for license information.
+
+using Newtonsoft.Json;
+using System;
+using UnityEngine.Scripting;
+
+namespace ElevenLabs.SoundGeneration
+{
+ [Preserve]
+ public sealed class SoundGenerationRequest
+ {
+ ///
+ /// Constructor.
+ ///
+ ///
+ /// The text that will get converted into a sound effect.
+ ///
+ ///
+ /// The duration of the sound which will be generated in seconds.
+ /// Must be at least 0.5 and at most 22.
+ /// If set to None we will guess the optimal duration using the prompt.
+ /// Defaults to None.
+ ///
+ ///
+ /// A higher prompt influence makes your generation follow the prompt more closely while also making generations less variable.
+ /// Must be a value between 0 and 1.
+ /// Defaults to 0.3.
+ ///
+ [Preserve]
+ public SoundGenerationRequest(string text, float? duration = null, float? promptInfluence = null)
+ {
+ Text = text;
+
+ if (duration is > 22f or < 0.5f)
+ {
+ throw new ArgumentOutOfRangeException(nameof(duration), "Duration must be a value between 0.5 and 22.");
+ }
+
+ Duration = duration;
+
+ if (promptInfluence is > 1f or < 0f)
+ {
+ throw new ArgumentOutOfRangeException(nameof(promptInfluence), "Prompt influence must be a value between 0 and 1.");
+ }
+
+ PromptInfluence = promptInfluence;
+ }
+
+ ///
+ /// The text that will get converted into a sound effect.
+ ///
+ [Preserve]
+ [JsonProperty("text")]
+ public string Text { get; }
+
+ ///
+ /// The duration of the sound which will be generated in seconds.
+ /// Must be at least 0.5 and at most 22.
+ /// If set to None we will guess the optimal duration using the prompt.
+ /// Defaults to None.
+ ///
+ [Preserve]
+ [JsonProperty("duration_seconds")]
+ public float? Duration { get; }
+
+ ///
+ /// A higher prompt influence makes your generation follow the prompt more closely while also making generations less variable.
+ /// Must be a value between 0 and 1.
+ /// Defaults to 0.3.
+ ///
+ [Preserve]
+ [JsonProperty("prompt_influence")]
+ public float? PromptInfluence { get; }
+ }
+}
diff --git a/Runtime/SoundGeneration/SoundGenerationRequest.cs.meta b/Runtime/SoundGeneration/SoundGenerationRequest.cs.meta
new file mode 100644
index 0000000..3d87b77
--- /dev/null
+++ b/Runtime/SoundGeneration/SoundGenerationRequest.cs.meta
@@ -0,0 +1,11 @@
+fileFormatVersion: 2
+guid: df74482b216f40043b9f122500150455
+MonoImporter:
+ externalObjects: {}
+ serializedVersion: 2
+ defaultReferences: []
+ executionOrder: 0
+ icon: {fileID: 2800000, guid: 5b71cbcaf078a8e44a5c96dcd24376d5, type: 3}
+ userData:
+ assetBundleName:
+ assetBundleVariant:
diff --git a/Runtime/TextToSpeech/TextToSpeechEndpoint.cs b/Runtime/TextToSpeech/TextToSpeechEndpoint.cs
index 1d2075f..5032a63 100644
--- a/Runtime/TextToSpeech/TextToSpeechEndpoint.cs
+++ b/Runtime/TextToSpeech/TextToSpeechEndpoint.cs
@@ -244,7 +244,6 @@ void StreamCallback(Response partialResponse)
if (!audioClip.SetData(chunk, 0))
{
Debug.LogError("Failed to set pcm data to partial clip.");
-
return;
}
diff --git a/Runtime/Voices/SharedVoiceInfo.cs b/Runtime/Voices/SharedVoiceInfo.cs
new file mode 100644
index 0000000..b87ae01
--- /dev/null
+++ b/Runtime/Voices/SharedVoiceInfo.cs
@@ -0,0 +1,182 @@
+// Licensed under the MIT License. See LICENSE in the project root for license information.
+
+using Newtonsoft.Json;
+using System;
+using UnityEngine.Scripting;
+
+namespace ElevenLabs.Voices
+{
+ public sealed class SharedVoiceInfo
+ {
+ [Preserve]
+ [JsonConstructor]
+ internal SharedVoiceInfo(
+ [JsonProperty("public_owner_id")] string ownerId,
+ [JsonProperty("voice_id")] string voiceId,
+ [JsonProperty("date_unix")] int dateUnix,
+ [JsonProperty("name")] string name,
+ [JsonProperty("accent")] string accent,
+ [JsonProperty("gender")] string gender,
+ [JsonProperty("age")] string age,
+ [JsonProperty("descriptive")] string descriptive,
+ [JsonProperty("use_case")] string useCase,
+ [JsonProperty("category")] string category,
+ [JsonProperty("language")] string language,
+ [JsonProperty("description")] string description,
+ [JsonProperty("preview_url")] string previewUrl,
+ [JsonProperty("usage_character_count_1y")] int usageCharacterCount1Y,
+ [JsonProperty("usage_character_count_7d")] int usageCharacterCount7D,
+ [JsonProperty("play_api_usage_character_count_1y")] int playApiUsageCharacterCount1Y,
+ [JsonProperty("cloned_by_count")] int clonedByCount,
+ [JsonProperty("rate")] float rate,
+ [JsonProperty("free_users_allowed")] bool freeUsersAllowed,
+ [JsonProperty("live_moderation_enabled")] bool liveModerationEnabled,
+ [JsonProperty("featured")] bool featured,
+ [JsonProperty("notice_period")] int? noticePeriod,
+ [JsonProperty("instagram_username")] string instagramUsername,
+ [JsonProperty("twitter_username")] string twitterUsername,
+ [JsonProperty("youtube_username")] string youtubeUsername,
+ [JsonProperty("tiktok_username")] string tikTokUsername,
+ [JsonProperty("image_url")] string imageUrl)
+ {
+ OwnerId = ownerId;
+ VoiceId = voiceId;
+ DateUnix = dateUnix;
+ Name = name;
+ Accent = accent;
+ Gender = gender;
+ Age = age;
+ Descriptive = descriptive;
+ UseCase = useCase;
+ Category = category;
+ Language = language;
+ Description = description;
+ PreviewUrl = previewUrl;
+ UsageCharacterCount1Y = usageCharacterCount1Y;
+ UsageCharacterCount7D = usageCharacterCount7D;
+ PlayApiUsageCharacterCount1Y = playApiUsageCharacterCount1Y;
+ ClonedByCount = clonedByCount;
+ Rate = rate;
+ FreeUsersAllowed = freeUsersAllowed;
+ LiveModerationEnabled = liveModerationEnabled;
+ Featured = featured;
+ NoticePeriod = noticePeriod;
+ InstagramUsername = instagramUsername;
+ TwitterUsername = twitterUsername;
+ YoutubeUsername = youtubeUsername;
+ TikTokUsername = tikTokUsername;
+ ImageUrl = imageUrl;
+ }
+
+ [Preserve]
+ [JsonProperty("public_owner_id")]
+ public string OwnerId { get; }
+
+ [Preserve]
+ [JsonProperty("voice_id")]
+ public string VoiceId { get; }
+
+ [Preserve]
+ [JsonProperty("date_unix")]
+ public int DateUnix { get; }
+
+ [JsonIgnore]
+ public DateTime Date => DateTimeOffset.FromUnixTimeSeconds(DateUnix).DateTime;
+
+ [Preserve]
+ [JsonProperty("name")]
+ public string Name { get; }
+
+ [Preserve]
+ [JsonProperty("accent")]
+ public string Accent { get; }
+
+ [Preserve]
+ [JsonProperty("gender")]
+ public string Gender { get; }
+
+ [Preserve]
+ [JsonProperty("age")]
+ public string Age { get; }
+
+ [Preserve]
+ [JsonProperty("descriptive")]
+ public string Descriptive { get; }
+
+ [Preserve]
+ [JsonProperty("use_case")]
+ public string UseCase { get; }
+
+ [Preserve]
+ [JsonProperty("category")]
+ public string Category { get; }
+
+ [Preserve]
+ [JsonProperty("language")]
+ public string Language { get; }
+
+ [Preserve]
+ [JsonProperty("description")]
+ public string Description { get; }
+
+ [Preserve]
+ [JsonProperty("preview_url")]
+ public string PreviewUrl { get; }
+
+ [Preserve]
+ [JsonProperty("usage_character_count_1y")]
+ public int UsageCharacterCount1Y { get; }
+
+ [Preserve]
+ [JsonProperty("usage_character_count_7d")]
+ public int UsageCharacterCount7D { get; }
+
+ [Preserve]
+ [JsonProperty("play_api_usage_character_count_1y")]
+ public int PlayApiUsageCharacterCount1Y { get; }
+
+ [Preserve]
+ [JsonProperty("cloned_by_count")]
+ public int ClonedByCount { get; }
+
+ [Preserve]
+ [JsonProperty("rate")]
+ public float Rate { get; }
+
+ [Preserve]
+ [JsonProperty("free_users_allowed")]
+ public bool FreeUsersAllowed { get; }
+
+ [Preserve]
+ [JsonProperty("live_moderation_enabled")]
+ public bool LiveModerationEnabled { get; }
+
+ [Preserve]
+ [JsonProperty("featured")]
+ public bool Featured { get; }
+
+ [Preserve]
+ [JsonProperty("notice_period")]
+ public int? NoticePeriod { get; }
+
+ [Preserve]
+ [JsonProperty("instagram_username")]
+ public string InstagramUsername { get; }
+
+ [Preserve]
+ [JsonProperty("twitter_username")]
+ public string TwitterUsername { get; }
+
+ [Preserve]
+ [JsonProperty("youtube_username")]
+ public string YoutubeUsername { get; }
+
+ [Preserve]
+ [JsonProperty("tiktok_username")]
+ public string TikTokUsername { get; }
+
+ [Preserve]
+ [JsonProperty("image_url")]
+ public string ImageUrl { get; }
+ }
+}
diff --git a/Runtime/Voices/SharedVoiceInfo.cs.meta b/Runtime/Voices/SharedVoiceInfo.cs.meta
new file mode 100644
index 0000000..3676818
--- /dev/null
+++ b/Runtime/Voices/SharedVoiceInfo.cs.meta
@@ -0,0 +1,11 @@
+fileFormatVersion: 2
+guid: 2e3f6b28361ecf14ca892084b81967fe
+MonoImporter:
+ externalObjects: {}
+ serializedVersion: 2
+ defaultReferences: []
+ executionOrder: 0
+ icon: {fileID: 2800000, guid: 5b71cbcaf078a8e44a5c96dcd24376d5, type: 3}
+ userData:
+ assetBundleName:
+ assetBundleVariant:
diff --git a/Runtime/Voices/SharedVoiceList.cs b/Runtime/Voices/SharedVoiceList.cs
new file mode 100644
index 0000000..0418ed7
--- /dev/null
+++ b/Runtime/Voices/SharedVoiceList.cs
@@ -0,0 +1,36 @@
+// Licensed under the MIT License. See LICENSE in the project root for license information.
+
+using Newtonsoft.Json;
+using System.Collections.Generic;
+using UnityEngine.Scripting;
+
+namespace ElevenLabs.Voices
+{
+ [Preserve]
+ public sealed class SharedVoiceList
+ {
+ [Preserve]
+ [JsonConstructor]
+ internal SharedVoiceList(
+ [JsonProperty("voices")] IReadOnlyList voices,
+ [JsonProperty("has_more")] bool hasMore,
+ [JsonProperty("last_sort_id")] string lastId)
+ {
+ Voices = voices;
+ HasMore = hasMore;
+ LastId = lastId;
+ }
+
+ [Preserve]
+ [JsonProperty("voices")]
+ public IReadOnlyList Voices { get; }
+
+ [Preserve]
+ [JsonProperty("has_more")]
+ public bool HasMore { get; }
+
+ [Preserve]
+ [JsonProperty("last_sort_id")]
+ public string LastId { get; }
+ }
+}
diff --git a/Runtime/Voices/SharedVoiceList.cs.meta b/Runtime/Voices/SharedVoiceList.cs.meta
new file mode 100644
index 0000000..f0834bb
--- /dev/null
+++ b/Runtime/Voices/SharedVoiceList.cs.meta
@@ -0,0 +1,11 @@
+fileFormatVersion: 2
+guid: 6af582794cd0d5d4b99d747991cca6ea
+MonoImporter:
+ externalObjects: {}
+ serializedVersion: 2
+ defaultReferences: []
+ executionOrder: 0
+ icon: {fileID: 2800000, guid: 5b71cbcaf078a8e44a5c96dcd24376d5, type: 3}
+ userData:
+ assetBundleName:
+ assetBundleVariant:
diff --git a/Runtime/Voices/SharedVoiceQuery.cs b/Runtime/Voices/SharedVoiceQuery.cs
new file mode 100644
index 0000000..cf6be4f
--- /dev/null
+++ b/Runtime/Voices/SharedVoiceQuery.cs
@@ -0,0 +1,114 @@
+// Licensed under the MIT License. See LICENSE in the project root for license information.
+
+using System.Collections.Generic;
+
+namespace ElevenLabs.Voices
+{
+ public sealed class SharedVoiceQuery
+ {
+ public int? PageSize { get; set; } = null;
+
+ public string Category { get; set; } = null;
+
+ public string Gender { get; set; } = null;
+
+ public string Age { get; set; } = null;
+
+ public string Accent { get; set; } = null;
+
+ public string Language { get; set; } = null;
+
+ public string SearchTerms { get; set; } = null;
+
+ public List UseCases { get; set; } = null;
+
+ public List Descriptives { get; set; } = null;
+
+ public bool? Featured { get; set; } = null;
+
+ public bool? ReaderAppEnabled { get; set; } = null;
+
+ public string OwnerId { get; set; } = null;
+
+ public string Sort { get; set; } = null;
+
+ public int? Page { get; set; } = null;
+
+ public Dictionary ToQueryParams()
+ {
+ var parameters = new Dictionary();
+
+ if (PageSize.HasValue)
+ {
+ parameters.Add("page_size", PageSize.Value.ToString());
+ }
+
+ if (!string.IsNullOrWhiteSpace(Category))
+ {
+ parameters.Add("category", Category);
+ }
+
+ if (!string.IsNullOrWhiteSpace(Gender))
+ {
+ parameters.Add("gender", Gender);
+ }
+
+ if (!string.IsNullOrWhiteSpace(Age))
+ {
+ parameters.Add("age", Age);
+ }
+
+ if (!string.IsNullOrWhiteSpace(Accent))
+ {
+ parameters.Add("accent", Accent);
+ }
+
+ if (!string.IsNullOrWhiteSpace(Language))
+ {
+ parameters.Add("language", Language);
+ }
+
+ if (!string.IsNullOrWhiteSpace(SearchTerms))
+ {
+ parameters.Add("search", SearchTerms);
+ }
+
+ if (UseCases is { Count: > 0 })
+ {
+ parameters.Add("use_cases", string.Join(',', UseCases));
+ }
+
+ if (Descriptives is { Count: > 0 })
+ {
+ parameters.Add("descriptives", string.Join(',', Descriptives));
+ }
+
+ if (Featured.HasValue)
+ {
+ parameters.Add("featured", Featured.Value.ToString());
+ }
+
+ if (ReaderAppEnabled.HasValue)
+ {
+ parameters.Add("reader_app_enabled", ReaderAppEnabled.Value.ToString());
+ }
+
+ if (!string.IsNullOrWhiteSpace(OwnerId))
+ {
+ parameters.Add("owner_id", OwnerId);
+ }
+
+ if (!string.IsNullOrWhiteSpace(Sort))
+ {
+ parameters.Add("sort", Sort);
+ }
+
+ if (Page.HasValue)
+ {
+ parameters.Add("page", Page.Value.ToString());
+ }
+
+ return parameters;
+ }
+ }
+}
diff --git a/Runtime/Voices/SharedVoiceQuery.cs.meta b/Runtime/Voices/SharedVoiceQuery.cs.meta
new file mode 100644
index 0000000..2f69228
--- /dev/null
+++ b/Runtime/Voices/SharedVoiceQuery.cs.meta
@@ -0,0 +1,11 @@
+fileFormatVersion: 2
+guid: e28a0ccf7ce12674eaba7eb16f7dfa98
+MonoImporter:
+ externalObjects: {}
+ serializedVersion: 2
+ defaultReferences: []
+ executionOrder: 0
+ icon: {fileID: 2800000, guid: 5b71cbcaf078a8e44a5c96dcd24376d5, type: 3}
+ userData:
+ assetBundleName:
+ assetBundleVariant:
diff --git a/Runtime/Voices/SharedVoicesEndpoint.cs b/Runtime/Voices/SharedVoicesEndpoint.cs
new file mode 100644
index 0000000..c522ae6
--- /dev/null
+++ b/Runtime/Voices/SharedVoicesEndpoint.cs
@@ -0,0 +1,29 @@
+// Licensed under the MIT License. See LICENSE in the project root for license information.
+
+using Newtonsoft.Json;
+using System.Threading;
+using System.Threading.Tasks;
+using Utilities.WebRequestRest;
+
+namespace ElevenLabs.Voices
+{
+ public sealed class SharedVoicesEndpoint : ElevenLabsBaseEndPoint
+ {
+ public SharedVoicesEndpoint(ElevenLabsClient client) : base(client) { }
+
+ protected override string Root => "shared-voices";
+
+ ///
+ /// Gets a list of shared voices.
+ ///
+ /// Optional, .
+ /// Optional, .
+ /// .
+ public async Task GetSharedVoicesAsync(SharedVoiceQuery query = null, CancellationToken cancellationToken = default)
+ {
+ var response = await Rest.GetAsync(GetUrl(queryParameters: query?.ToQueryParams()), new RestParameters(client.DefaultRequestHeaders), cancellationToken);
+ response.Validate(EnableDebug);
+ return JsonConvert.DeserializeObject(response.Body, ElevenLabsClient.JsonSerializationOptions);
+ }
+ }
+}
diff --git a/Runtime/Voices/SharedVoicesEndpoint.cs.meta b/Runtime/Voices/SharedVoicesEndpoint.cs.meta
new file mode 100644
index 0000000..a50edb5
--- /dev/null
+++ b/Runtime/Voices/SharedVoicesEndpoint.cs.meta
@@ -0,0 +1,11 @@
+fileFormatVersion: 2
+guid: 66684bd4b4bc26c49b47ad0f8151da6a
+MonoImporter:
+ externalObjects: {}
+ serializedVersion: 2
+ defaultReferences: []
+ executionOrder: 0
+ icon: {fileID: 2800000, guid: 5b71cbcaf078a8e44a5c96dcd24376d5, type: 3}
+ userData:
+ assetBundleName:
+ assetBundleVariant:
diff --git a/Runtime/Voices/VoicesEndpoint.cs b/Runtime/Voices/VoicesEndpoint.cs
index 0705b14..ad67c0a 100644
--- a/Runtime/Voices/VoicesEndpoint.cs
+++ b/Runtime/Voices/VoicesEndpoint.cs
@@ -62,9 +62,7 @@ public VoicesEndpoint(ElevenLabsClient client) : base(client) { }
///
/// of s.
public Task> GetAllVoicesAsync(CancellationToken cancellationToken = default)
- {
- return GetAllVoicesAsync(true, cancellationToken);
- }
+ => GetAllVoicesAsync(true, cancellationToken);
///
/// Gets a list of all available voices for a user.
@@ -85,7 +83,7 @@ public async Task> GetAllVoicesAsync(bool downloadSettings,
foreach (var voice in voices)
{
voiceSettingsTasks.Add(LocalGetVoiceSettingsAsync());
-
+
async Task LocalGetVoiceSettingsAsync()
{
await Awaiters.UnityMainThread;
@@ -143,7 +141,7 @@ public async Task GetVoiceAsync(string voiceId, bool withSettings = false
throw new ArgumentNullException(nameof(voiceId));
}
- var response = await Rest.GetAsync(GetUrl($"/{voiceId}?with_settings={withSettings}"), new RestParameters(client.DefaultRequestHeaders), cancellationToken);
+ var response = await Rest.GetAsync(GetUrl($"/{voiceId}?with_settings={withSettings.ToString().ToLower()}"), new RestParameters(client.DefaultRequestHeaders), cancellationToken);
response.Validate(EnableDebug);
return JsonConvert.DeserializeObject(response.Body, ElevenLabsClient.JsonSerializationOptions);
}
@@ -203,7 +201,11 @@ public async Task AddVoiceAsync(string name, IEnumerable samplePa
try
{
var fileBytes = await File.ReadAllBytesAsync(sample, cancellationToken);
- form.AddBinaryData("files", fileBytes, Path.GetFileName(sample));
+
+ if (fileBytes.Length > 0)
+ {
+ form.AddBinaryData("files", fileBytes, Path.GetFileName(sample));
+ }
}
catch (Exception e)
{
diff --git a/Samples~/TextToSpeech/TextToSpeechDemo.cs b/Samples~/TextToSpeech/TextToSpeechDemo.cs
index 955f3e0..0e13968 100644
--- a/Samples~/TextToSpeech/TextToSpeechDemo.cs
+++ b/Samples~/TextToSpeech/TextToSpeechDemo.cs
@@ -6,7 +6,6 @@
using System.Collections.Generic;
using System.Linq;
using System.Threading;
-using System.Threading.Tasks;
using UnityEngine;
using Utilities.Async;
@@ -33,6 +32,11 @@ public class TextToSpeechDemo : MonoBehaviour
private readonly Queue streamClipQueue = new();
+#if !UNITY_2022_3_OR_NEWER
+ private readonly CancellationTokenSource lifetimeCts = new();
+ private CancellationToken destroyCancellationToken => lifetimeCts.Token;
+#endif
+
private void OnValidate()
{
if (audioSource == null)
@@ -79,6 +83,14 @@ private async void Start()
}
}
+#if !UNITY_2022_3_OR_NEWER
+ private void OnDestroy()
+ {
+ lifetimeCts.Cancel();
+ lifetimeCts.Dispose();
+ }
+#endif
+
private async void PlayStreamQueue(CancellationToken cancellationToken)
{
try
diff --git a/Tests/AbstractTestFixture.cs b/Tests/AbstractTestFixture.cs
index 0a2b71d..4b41dc0 100644
--- a/Tests/AbstractTestFixture.cs
+++ b/Tests/AbstractTestFixture.cs
@@ -11,7 +11,7 @@ protected AbstractTestFixture()
var auth = new ElevenLabsAuthentication().LoadDefaultsReversed();
var settings = new ElevenLabsSettings();
ElevenLabsClient = new ElevenLabsClient(auth, settings);
- //ElevenLabsClient.EnableDebug = true;
+ ElevenLabsClient.EnableDebug = true;
}
}
}
diff --git a/Tests/Test_Fixture_00_Authentication.cs b/Tests/Test_Fixture_00_Authentication.cs
index 0716a84..69d5843 100644
--- a/Tests/Test_Fixture_00_Authentication.cs
+++ b/Tests/Test_Fixture_00_Authentication.cs
@@ -146,7 +146,7 @@ public void Test_09_CustomDomainConfigurationSettings()
var auth = new ElevenLabsAuthentication("customIssuedToken");
var settings = new ElevenLabsSettings(domain: "api.your-custom-domain.com");
var api = new ElevenLabsClient(auth, settings);
- Console.WriteLine(api.Settings.BaseRequestUrlFormat);
+ Debug.Log(api.Settings.BaseRequestUrlFormat);
}
[TearDown]
diff --git a/Tests/Test_Fixture_06_Models.cs b/Tests/Test_Fixture_02_Models.cs
similarity index 92%
rename from Tests/Test_Fixture_06_Models.cs
rename to Tests/Test_Fixture_02_Models.cs
index 701b87a..b9d53f1 100644
--- a/Tests/Test_Fixture_06_Models.cs
+++ b/Tests/Test_Fixture_02_Models.cs
@@ -6,7 +6,7 @@
namespace ElevenLabs.Tests
{
- internal class Test_Fixture_06_Models : AbstractTestFixture
+ internal class Test_Fixture_02_Models : AbstractTestFixture
{
[Test]
public async Task Test_01_GetModels()
diff --git a/Tests/Test_Fixture_06_Models.cs.meta b/Tests/Test_Fixture_02_Models.cs.meta
similarity index 100%
rename from Tests/Test_Fixture_06_Models.cs.meta
rename to Tests/Test_Fixture_02_Models.cs.meta
diff --git a/Tests/Test_Fixture_05_VoiceGeneration.cs b/Tests/Test_Fixture_03_VoiceGeneration.cs
similarity index 96%
rename from Tests/Test_Fixture_05_VoiceGeneration.cs
rename to Tests/Test_Fixture_03_VoiceGeneration.cs
index 2629b27..7d315ee 100644
--- a/Tests/Test_Fixture_05_VoiceGeneration.cs
+++ b/Tests/Test_Fixture_03_VoiceGeneration.cs
@@ -10,7 +10,7 @@
namespace ElevenLabs.Tests
{
- internal class Test_Fixture_05_VoiceGeneration : AbstractTestFixture
+ internal class Test_Fixture_03_VoiceGeneration : AbstractTestFixture
{
[Test]
public async Task Test_01_GetVoiceGenerationOptions()
diff --git a/Tests/Test_Fixture_05_VoiceGeneration.cs.meta b/Tests/Test_Fixture_03_VoiceGeneration.cs.meta
similarity index 100%
rename from Tests/Test_Fixture_05_VoiceGeneration.cs.meta
rename to Tests/Test_Fixture_03_VoiceGeneration.cs.meta
diff --git a/Tests/Test_Fixture_03_TextToSpeechEndpoint.cs b/Tests/Test_Fixture_04_TextToSpeechEndpoint.cs
similarity index 96%
rename from Tests/Test_Fixture_03_TextToSpeechEndpoint.cs
rename to Tests/Test_Fixture_04_TextToSpeechEndpoint.cs
index 0dc30a7..a772e66 100644
--- a/Tests/Test_Fixture_03_TextToSpeechEndpoint.cs
+++ b/Tests/Test_Fixture_04_TextToSpeechEndpoint.cs
@@ -8,7 +8,7 @@
namespace ElevenLabs.Tests
{
- internal class Test_Fixture_03_TextToSpeechEndpoint : AbstractTestFixture
+ internal class Test_Fixture_04_TextToSpeechEndpoint : AbstractTestFixture
{
[Test]
public async Task Test_01_TextToSpeech()
diff --git a/Tests/Test_Fixture_03_TextToSpeechEndpoint.cs.meta b/Tests/Test_Fixture_04_TextToSpeechEndpoint.cs.meta
similarity index 100%
rename from Tests/Test_Fixture_03_TextToSpeechEndpoint.cs.meta
rename to Tests/Test_Fixture_04_TextToSpeechEndpoint.cs.meta
diff --git a/Tests/Test_Fixture_02_VoicesEndpoint.cs b/Tests/Test_Fixture_05_VoicesEndpoint.cs
similarity index 91%
rename from Tests/Test_Fixture_02_VoicesEndpoint.cs
rename to Tests/Test_Fixture_05_VoicesEndpoint.cs
index 89f6557..c0c0b9a 100644
--- a/Tests/Test_Fixture_02_VoicesEndpoint.cs
+++ b/Tests/Test_Fixture_05_VoicesEndpoint.cs
@@ -2,6 +2,7 @@
using ElevenLabs.Voices;
using NUnit.Framework;
+using System;
using System.Collections.Generic;
using System.Linq;
using System.Threading.Tasks;
@@ -10,10 +11,10 @@
namespace ElevenLabs.Tests
{
- internal class Test_Fixture_02_VoicesEndpoint : AbstractTestFixture
+ internal class Test_Fixture_05_VoicesEndpoint : AbstractTestFixture
{
[Test]
- public async Task Test_01_GetVoices()
+ public async Task Test_01_01_GetVoices()
{
Assert.NotNull(ElevenLabsClient.VoicesEndpoint);
var results = await ElevenLabsClient.VoicesEndpoint.GetAllVoicesAsync();
@@ -26,6 +27,20 @@ public async Task Test_01_GetVoices()
}
}
+ [Test]
+ public async Task Test_01_02_GetSharedVoices()
+ {
+ Assert.NotNull(ElevenLabsClient.SharedVoicesEndpoint);
+ var results = await ElevenLabsClient.SharedVoicesEndpoint.GetSharedVoicesAsync();
+ Assert.NotNull(results);
+ Assert.IsNotEmpty(results.Voices);
+
+ foreach (var voice in results.Voices)
+ {
+ Console.WriteLine($"{voice.OwnerId} | {voice.VoiceId} | {voice.Date} | {voice.Name}");
+ }
+ }
+
[Test]
public async Task Test_02_GetDefaultVoiceSettings()
{
diff --git a/Tests/Test_Fixture_02_VoicesEndpoint.cs.meta b/Tests/Test_Fixture_05_VoicesEndpoint.cs.meta
similarity index 100%
rename from Tests/Test_Fixture_02_VoicesEndpoint.cs.meta
rename to Tests/Test_Fixture_05_VoicesEndpoint.cs.meta
diff --git a/Tests/Test_Fixture_06_SoundGenerationEndpoint.cs b/Tests/Test_Fixture_06_SoundGenerationEndpoint.cs
new file mode 100644
index 0000000..64ffd5c
--- /dev/null
+++ b/Tests/Test_Fixture_06_SoundGenerationEndpoint.cs
@@ -0,0 +1,23 @@
+// Licensed under the MIT License. See LICENSE in the project root for license information.
+
+using ElevenLabs.SoundGeneration;
+using NUnit.Framework;
+using System.Threading.Tasks;
+
+namespace ElevenLabs.Tests
+{
+ internal class Test_Fixture_06_SoundGenerationEndpoint : AbstractTestFixture
+ {
+ [Test]
+ public async Task Test_01_GenerateSound()
+ {
+ Assert.NotNull(ElevenLabsClient.SoundGenerationEndpoint);
+ var request = new SoundGenerationRequest("Star Wars Light Saber parry");
+ var clip = await ElevenLabsClient.SoundGenerationEndpoint.GenerateSoundAsync(request);
+ Assert.NotNull(clip);
+ Assert.IsTrue(clip.AudioClip != null);
+ Assert.IsTrue(clip.AudioClip.length > 0);
+ Assert.IsFalse(string.IsNullOrWhiteSpace(clip.Text));
+ }
+ }
+}
diff --git a/Tests/Test_Fixture_06_SoundGenerationEndpoint.cs.meta b/Tests/Test_Fixture_06_SoundGenerationEndpoint.cs.meta
new file mode 100644
index 0000000..e0a4f38
--- /dev/null
+++ b/Tests/Test_Fixture_06_SoundGenerationEndpoint.cs.meta
@@ -0,0 +1,11 @@
+fileFormatVersion: 2
+guid: 37e39ec49270a01498a168590d7ff154
+MonoImporter:
+ externalObjects: {}
+ serializedVersion: 2
+ defaultReferences: []
+ executionOrder: 0
+ icon: {fileID: 2800000, guid: 5b71cbcaf078a8e44a5c96dcd24376d5, type: 3}
+ userData:
+ assetBundleName:
+ assetBundleVariant:
diff --git a/Tests/Test_Fixture_04_HistoryEndpoint.cs b/Tests/Test_Fixture_07_HistoryEndpoint.cs
similarity index 98%
rename from Tests/Test_Fixture_04_HistoryEndpoint.cs
rename to Tests/Test_Fixture_07_HistoryEndpoint.cs
index feb616c..bf4f460 100644
--- a/Tests/Test_Fixture_04_HistoryEndpoint.cs
+++ b/Tests/Test_Fixture_07_HistoryEndpoint.cs
@@ -8,7 +8,7 @@
namespace ElevenLabs.Tests
{
- internal class Test_Fixture_04_HistoryEndpoint : AbstractTestFixture
+ internal class Test_Fixture_07_HistoryEndpoint : AbstractTestFixture
{
[Test]
public async Task Test_01_GetHistory()
diff --git a/Tests/Test_Fixture_04_HistoryEndpoint.cs.meta b/Tests/Test_Fixture_07_HistoryEndpoint.cs.meta
similarity index 100%
rename from Tests/Test_Fixture_04_HistoryEndpoint.cs.meta
rename to Tests/Test_Fixture_07_HistoryEndpoint.cs.meta
diff --git a/Tests/Test_Fixture_08_Dubbing.cs b/Tests/Test_Fixture_08_Dubbing.cs
new file mode 100644
index 0000000..31bbab9
--- /dev/null
+++ b/Tests/Test_Fixture_08_Dubbing.cs
@@ -0,0 +1,158 @@
+// Licensed under the MIT License. See LICENSE in the project root for license information.
+
+using ElevenLabs.Dubbing;
+using NUnit.Framework;
+using System;
+using System.IO;
+using System.Threading.Tasks;
+using UnityEditor;
+using UnityEngine;
+using Utilities.WebRequestRest;
+
+namespace ElevenLabs.Tests
+{
+ internal class Test_Fixture_08_Dubbing : AbstractTestFixture
+ {
+ [Test]
+ public async Task Test_01_Dubbing_File()
+ {
+ try
+ {
+ Assert.NotNull(ElevenLabsClient.DubbingEndpoint);
+ var audioPath = Path.GetFullPath(AssetDatabase.GUIDToAssetPath("96e9fdf73bc7a944f93886694973b90e"));
+ var request = new DubbingRequest(audioPath, "es", "en", 1);
+ var metadata = await ElevenLabsClient.DubbingEndpoint.DubAsync(request, progress: new Progress(metadata =>
+ {
+ switch (metadata.Status)
+ {
+ case "dubbing":
+ Debug.Log($"Dubbing for {metadata.DubbingId} in progress... Expected Duration: {metadata.ExpectedDurationSeconds:0.00} seconds");
+ break;
+ case "dubbed":
+ Debug.Log($"Dubbing for {metadata.DubbingId} complete in {metadata.TimeCompleted.TotalSeconds:0.00} seconds!");
+ break;
+ default:
+ Debug.Log($"Status: {metadata.Status}");
+ break;
+ }
+ }));
+ Assert.IsFalse(string.IsNullOrEmpty(metadata.DubbingId));
+ Assert.IsTrue(metadata.ExpectedDurationSeconds > 0);
+
+ var dubbedClipPath = await ElevenLabsClient.DubbingEndpoint.GetDubbedFileAsync(metadata.DubbingId, request.TargetLanguage);
+ Assert.NotNull(dubbedClipPath);
+ Assert.IsTrue(File.Exists(dubbedClipPath));
+ var dubbedClip = await Rest.DownloadAudioClipAsync($"file://{dubbedClipPath}", AudioType.MPEG);
+ Assert.IsNotNull(dubbedClip);
+ Assert.IsTrue(dubbedClip.length > 0);
+
+ var srcFile = new FileInfo(audioPath);
+ var transcriptPath = new FileInfo($"{srcFile.FullName}.dubbed.{request.TargetLanguage}.srt");
+ var transcriptFile = await ElevenLabsClient.DubbingEndpoint.GetTranscriptForDubAsync(metadata.DubbingId, request.TargetLanguage);
+ await File.WriteAllTextAsync(transcriptPath.FullName, transcriptFile);
+ Assert.IsTrue(transcriptPath.Exists);
+ Assert.IsTrue(transcriptPath.Length > 0);
+
+ await ElevenLabsClient.DubbingEndpoint.DeleteDubbingProjectAsync(metadata.DubbingId);
+ }
+ catch (Exception e)
+ {
+ Debug.LogError(e);
+ }
+ }
+
+ [Test]
+ public async Task Test_02_Dubbing_Url()
+ {
+ try
+ {
+ Assert.NotNull(ElevenLabsClient.DubbingEndpoint);
+
+ var request = new DubbingRequest(new Uri("https://youtu.be/Zo5-rhYOlNk"), "ja", "en", 1, true);
+ var metadata = await ElevenLabsClient.DubbingEndpoint.DubAsync(request, progress: new Progress(metadata =>
+ {
+ switch (metadata.Status)
+ {
+ case "dubbing":
+ Debug.Log($"Dubbing for {metadata.DubbingId} in progress... Expected Duration: {metadata.ExpectedDurationSeconds:0.00} seconds");
+ break;
+ case "dubbed":
+ Debug.Log($"Dubbing for {metadata.DubbingId} complete in {metadata.TimeCompleted.TotalSeconds:0.00} seconds!");
+ break;
+ default:
+ Debug.Log($"Status: {metadata.Status}");
+ break;
+ }
+ }));
+ Assert.IsFalse(string.IsNullOrEmpty(metadata.DubbingId));
+ Assert.IsTrue(metadata.ExpectedDurationSeconds > 0);
+
+ var assetsDir = Path.GetFullPath(Application.dataPath);
+ var dubbedClip = await ElevenLabsClient.DubbingEndpoint.GetDubbedFileAsync(metadata.DubbingId, request.TargetLanguage);
+ Assert.IsNotNull(dubbedClip);
+ Assert.IsTrue(File.Exists(dubbedClip));
+
+ var transcriptPath = new FileInfo(Path.Combine(assetsDir, $"online.dubbed.{request.TargetLanguage}.srt"));
+ var transcriptFile = await ElevenLabsClient.DubbingEndpoint.GetTranscriptForDubAsync(metadata.DubbingId, request.TargetLanguage);
+ await File.WriteAllTextAsync(transcriptPath.FullName, transcriptFile);
+ Assert.IsTrue(transcriptPath.Exists);
+ Assert.IsTrue(transcriptPath.Length > 0);
+
+ await ElevenLabsClient.DubbingEndpoint.DeleteDubbingProjectAsync(metadata.DubbingId);
+ }
+ catch (Exception e)
+ {
+ Debug.LogError(e);
+ }
+ }
+
+ [Test]
+ public async Task Test_03_Dubbing_AudioClip()
+ {
+ try
+ {
+ Assert.NotNull(ElevenLabsClient.DubbingEndpoint);
+ var clipPath = AssetDatabase.GUIDToAssetPath("96e9fdf73bc7a944f93886694973b90e");
+ var audioClip = AssetDatabase.LoadAssetAtPath(clipPath);
+ var request = new DubbingRequest(audioClip, "es", "en", 1);
+ var metadata = await ElevenLabsClient.DubbingEndpoint.DubAsync(request, progress: new Progress(metadata =>
+ {
+ switch (metadata.Status)
+ {
+ case "dubbing":
+ Debug.Log($"Dubbing for {metadata.DubbingId} in progress... Expected Duration: {metadata.ExpectedDurationSeconds:0.00} seconds");
+ break;
+ case "dubbed":
+ Debug.Log($"Dubbing for {metadata.DubbingId} complete in {metadata.TimeCompleted.TotalSeconds:0.00} seconds!");
+ break;
+ default:
+ Debug.Log($"Status: {metadata.Status}");
+ break;
+ }
+ }));
+ Assert.IsFalse(string.IsNullOrEmpty(metadata.DubbingId));
+ Assert.IsTrue(metadata.ExpectedDurationSeconds > 0);
+
+ var srcFile = new FileInfo(Path.GetFullPath(clipPath));
+ var dubbedClipPath = await ElevenLabsClient.DubbingEndpoint.GetDubbedFileAsync(metadata.DubbingId, request.TargetLanguage);
+ Assert.IsNotNull(dubbedClipPath);
+ Assert.IsTrue(File.Exists(dubbedClipPath));
+ var dubbedClip = await Rest.DownloadAudioClipAsync($"file://{dubbedClipPath}", AudioType.MPEG);
+ Assert.IsNotNull(dubbedClip);
+ Assert.IsTrue(dubbedClip.length > 0);
+
+ var transcriptPath = new FileInfo($"{srcFile.FullName}.dubbed.{request.TargetLanguage}.srt");
+ var transcriptFile = await ElevenLabsClient.DubbingEndpoint.GetTranscriptForDubAsync(metadata.DubbingId, request.TargetLanguage);
+ await File.WriteAllTextAsync(transcriptPath.FullName, transcriptFile);
+ Assert.IsTrue(transcriptPath.Exists);
+ Assert.IsTrue(transcriptPath.Length > 0);
+
+ await ElevenLabsClient.DubbingEndpoint.DeleteDubbingProjectAsync(metadata.DubbingId);
+ }
+ catch (Exception e)
+ {
+ Debug.LogError(e);
+ }
+ }
+ }
+}
diff --git a/Tests/Test_Fixture_08_Dubbing.cs.meta b/Tests/Test_Fixture_08_Dubbing.cs.meta
new file mode 100644
index 0000000..55992e5
--- /dev/null
+++ b/Tests/Test_Fixture_08_Dubbing.cs.meta
@@ -0,0 +1,11 @@
+fileFormatVersion: 2
+guid: 670686861541fd14ca17ef2305b90b6f
+MonoImporter:
+ externalObjects: {}
+ serializedVersion: 2
+ defaultReferences: []
+ executionOrder: 0
+ icon: {fileID: 2800000, guid: 5b71cbcaf078a8e44a5c96dcd24376d5, type: 3}
+ userData:
+ assetBundleName:
+ assetBundleVariant:
diff --git a/package.json b/package.json
index ab9bc10..8abbc64 100644
--- a/package.json
+++ b/package.json
@@ -3,7 +3,7 @@
"displayName": "ElevenLabs",
"description": "A non-official Eleven Labs voice synthesis RESTful client.",
"keywords": [],
- "version": "3.2.9",
+ "version": "3.3.0",
"unity": "2021.3",
"documentationUrl": "https://github.com/RageAgainstThePixel/com.rest.elevenlabs#documentation",
"changelogUrl": "https://github.com/RageAgainstThePixel/com.rest.elevenlabs/releases",
@@ -17,8 +17,9 @@
"url": "https://github.com/StephenHodgson"
},
"dependencies": {
- "com.utilities.rest": "2.5.3",
- "com.utilities.encoder.ogg": "3.1.4"
+ "com.utilities.rest": "2.5.7",
+ "com.utilities.encoder.ogg": "3.1.4",
+ "com.utilities.encoder.wav": "1.2.2"
},
"samples": [
{