From 5060287adc4345033ee73f1dd257809c2c4aa7bd Mon Sep 17 00:00:00 2001 From: Stephen Hodgson Date: Mon, 2 Sep 2024 21:06:46 -0400 Subject: [PATCH] com.rest.elevenlabs 3.3.0 (#86) - Added ability to specify fully customizable domain proxies - Added env var parsing for `ELEVENLABS_API_KEY` - Added Copy Button for the Voice ID to Voice Lab Dashboard - Added Shared Voices API - Added SoundEffects API endpoints - Added Dubbing API endpoints - Updated models - Updated com.utilities.rest -> 2.5.7 --------- Co-authored-by: AmarTrivedi1 <149634562+AmarTrivedi1@users.noreply.github.com> --- Documentation~/README.md | 123 +++++++- Editor/ElevenLabsDashboard.cs | 21 +- .../ElevenLabsAuthentication.cs | 8 + .../Authentication/ElevenLabsSettingsInfo.cs | 11 +- Runtime/Common/GeneratedClip.cs | 59 ++++ Runtime/Common/GeneratedClip.cs.meta | 11 + Runtime/Common/VoiceClip.cs | 46 +-- Runtime/Dubbing.meta | 8 + Runtime/Dubbing/DubbingEndpoint.cs | 229 +++++++++++++++ Runtime/Dubbing/DubbingEndpoint.cs.meta | 11 + Runtime/Dubbing/DubbingFormat.cs | 14 + Runtime/Dubbing/DubbingFormat.cs.meta | 11 + Runtime/Dubbing/DubbingProjectMetadata.cs | 55 ++++ .../Dubbing/DubbingProjectMetadata.cs.meta | 11 + Runtime/Dubbing/DubbingRequest.cs | 263 ++++++++++++++++++ Runtime/Dubbing/DubbingRequest.cs.meta | 11 + Runtime/Dubbing/DubbingResponse.cs | 32 +++ Runtime/Dubbing/DubbingResponse.cs.meta | 11 + Runtime/ElevenLabs.asmdef | 3 +- Runtime/ElevenLabsClient.cs | 11 + Runtime/Models/Model.cs | 32 ++- Runtime/SoundGeneration.meta | 8 + .../SoundGenerationEndpoint.cs | 35 +++ .../SoundGenerationEndpoint.cs.meta | 11 + .../SoundGeneration/SoundGenerationRequest.cs | 75 +++++ .../SoundGenerationRequest.cs.meta | 11 + Runtime/TextToSpeech/TextToSpeechEndpoint.cs | 1 - Runtime/Voices/SharedVoiceInfo.cs | 182 ++++++++++++ Runtime/Voices/SharedVoiceInfo.cs.meta | 11 + Runtime/Voices/SharedVoiceList.cs | 36 +++ Runtime/Voices/SharedVoiceList.cs.meta | 11 + Runtime/Voices/SharedVoiceQuery.cs | 114 ++++++++ Runtime/Voices/SharedVoiceQuery.cs.meta | 11 + Runtime/Voices/SharedVoicesEndpoint.cs | 29 ++ Runtime/Voices/SharedVoicesEndpoint.cs.meta | 11 + Runtime/Voices/VoicesEndpoint.cs | 14 +- Samples~/TextToSpeech/TextToSpeechDemo.cs | 14 +- Tests/AbstractTestFixture.cs | 2 +- Tests/Test_Fixture_00_Authentication.cs | 2 +- ...06_Models.cs => Test_Fixture_02_Models.cs} | 2 +- ...cs.meta => Test_Fixture_02_Models.cs.meta} | 0 ....cs => Test_Fixture_03_VoiceGeneration.cs} | 2 +- ...> Test_Fixture_03_VoiceGeneration.cs.meta} | 0 ...> Test_Fixture_04_TextToSpeechEndpoint.cs} | 2 +- ...t_Fixture_04_TextToSpeechEndpoint.cs.meta} | 0 ...t.cs => Test_Fixture_05_VoicesEndpoint.cs} | 19 +- ...=> Test_Fixture_05_VoicesEndpoint.cs.meta} | 0 ...Test_Fixture_06_SoundGenerationEndpoint.cs | 23 ++ ...Fixture_06_SoundGenerationEndpoint.cs.meta | 11 + ....cs => Test_Fixture_07_HistoryEndpoint.cs} | 2 +- ...> Test_Fixture_07_HistoryEndpoint.cs.meta} | 0 Tests/Test_Fixture_08_Dubbing.cs | 158 +++++++++++ Tests/Test_Fixture_08_Dubbing.cs.meta | 11 + package.json | 7 +- 54 files changed, 1710 insertions(+), 86 deletions(-) create mode 100644 Runtime/Common/GeneratedClip.cs create mode 100644 Runtime/Common/GeneratedClip.cs.meta create mode 100644 Runtime/Dubbing.meta create mode 100644 Runtime/Dubbing/DubbingEndpoint.cs create mode 100644 Runtime/Dubbing/DubbingEndpoint.cs.meta create mode 100644 Runtime/Dubbing/DubbingFormat.cs create mode 100644 Runtime/Dubbing/DubbingFormat.cs.meta create mode 100644 Runtime/Dubbing/DubbingProjectMetadata.cs create mode 100644 Runtime/Dubbing/DubbingProjectMetadata.cs.meta create mode 100644 Runtime/Dubbing/DubbingRequest.cs create mode 100644 Runtime/Dubbing/DubbingRequest.cs.meta create mode 100644 Runtime/Dubbing/DubbingResponse.cs create mode 100644 Runtime/Dubbing/DubbingResponse.cs.meta create mode 100644 Runtime/SoundGeneration.meta create mode 100644 Runtime/SoundGeneration/SoundGenerationEndpoint.cs create mode 100644 Runtime/SoundGeneration/SoundGenerationEndpoint.cs.meta create mode 100644 Runtime/SoundGeneration/SoundGenerationRequest.cs create mode 100644 Runtime/SoundGeneration/SoundGenerationRequest.cs.meta create mode 100644 Runtime/Voices/SharedVoiceInfo.cs create mode 100644 Runtime/Voices/SharedVoiceInfo.cs.meta create mode 100644 Runtime/Voices/SharedVoiceList.cs create mode 100644 Runtime/Voices/SharedVoiceList.cs.meta create mode 100644 Runtime/Voices/SharedVoiceQuery.cs create mode 100644 Runtime/Voices/SharedVoiceQuery.cs.meta create mode 100644 Runtime/Voices/SharedVoicesEndpoint.cs create mode 100644 Runtime/Voices/SharedVoicesEndpoint.cs.meta rename Tests/{Test_Fixture_06_Models.cs => Test_Fixture_02_Models.cs} (92%) rename Tests/{Test_Fixture_06_Models.cs.meta => Test_Fixture_02_Models.cs.meta} (100%) rename Tests/{Test_Fixture_05_VoiceGeneration.cs => Test_Fixture_03_VoiceGeneration.cs} (96%) rename Tests/{Test_Fixture_05_VoiceGeneration.cs.meta => Test_Fixture_03_VoiceGeneration.cs.meta} (100%) rename Tests/{Test_Fixture_03_TextToSpeechEndpoint.cs => Test_Fixture_04_TextToSpeechEndpoint.cs} (96%) rename Tests/{Test_Fixture_03_TextToSpeechEndpoint.cs.meta => Test_Fixture_04_TextToSpeechEndpoint.cs.meta} (100%) rename Tests/{Test_Fixture_02_VoicesEndpoint.cs => Test_Fixture_05_VoicesEndpoint.cs} (91%) rename Tests/{Test_Fixture_02_VoicesEndpoint.cs.meta => Test_Fixture_05_VoicesEndpoint.cs.meta} (100%) create mode 100644 Tests/Test_Fixture_06_SoundGenerationEndpoint.cs create mode 100644 Tests/Test_Fixture_06_SoundGenerationEndpoint.cs.meta rename Tests/{Test_Fixture_04_HistoryEndpoint.cs => Test_Fixture_07_HistoryEndpoint.cs} (98%) rename Tests/{Test_Fixture_04_HistoryEndpoint.cs.meta => Test_Fixture_07_HistoryEndpoint.cs.meta} (100%) create mode 100644 Tests/Test_Fixture_08_Dubbing.cs create mode 100644 Tests/Test_Fixture_08_Dubbing.cs.meta diff --git a/Documentation~/README.md b/Documentation~/README.md index f07f3dc..75937d6 100644 --- a/Documentation~/README.md +++ b/Documentation~/README.md @@ -48,7 +48,7 @@ The recommended installation method is though the unity package manager and [Ope ### Table of Contents -- [Authentication](#authentication) :construction: +- [Authentication](#authentication) - [API Proxy](#api-proxy) - [Editor Dashboard](#editor-dashboard) - [Speech Synthesis Dashboard](#speech-synthesis-dashboard) @@ -59,6 +59,7 @@ The recommended installation method is though the unity package manager and [Ope - [Text to Speech](#text-to-speech) - [Stream Text To Speech](#stream-text-to-speech) - [Voices](#voices) + - [Get Shared Voices](#get-shared-voices) :new: - [Get All Voices](#get-all-voices) - [Get Default Voice Settings](#get-default-voice-settings) - [Get Voice](#get-voice) @@ -69,6 +70,13 @@ The recommended installation method is though the unity package manager and [Ope - [Samples](#samples) - [Download Voice Sample](#download-voice-sample) - [Delete Voice Sample](#delete-voice-sample) +- [Dubbing](#dubbing) :new: + - [Dub](#dub) :new: + - [Get Dubbing Metadata](#get-dubbing-metadata) :new: + - [Get Transcript for Dub](#get-transcript-for-dub) :new: + - [Get dubbed file](#get-dubbed-file) :new: + - [Delete Dubbing Project](#delete-dubbing-project) :new: +- [SFX Generation](#sfx-generation) :new: - [History](#history) - [Get History](#get-history) - [Get History Item](#get-history-item) @@ -176,8 +184,9 @@ In this example, we demonstrate how to set up and use `ElevenLabsProxyStartup` i 1. Create a new [ASP.NET Core minimal web API](https://learn.microsoft.com/en-us/aspnet/core/tutorials/min-web-api?view=aspnetcore-6.0) project. 2. Add the ElevenLabs-DotNet nuget package to your project. - Powershell install: `Install-Package ElevenLabs-DotNet-Proxy` + - Dotnet install: `dotnet add package ElevenLabs-DotNet-Proxy` - Manually editing .csproj: `` -3. Create a new class that inherits from `AbstractAuthenticationFilter` and override the `ValidateAuthentication` method. This will implement the `IAuthenticationFilter` that you will use to check user session token against your internal server. +3. Create a new class that inherits from `AbstractAuthenticationFilter` and override the `ValidateAuthenticationAsync` method. This will implement the `IAuthenticationFilter` that you will use to check user session token against your internal server. 4. In `Program.cs`, create a new proxy web application by calling `ElevenLabsProxyStartup.CreateDefaultHost` method, passing your custom `AuthenticationFilter` as a type argument. 5. Create `ElevenLabsAuthentication` and `ElevenLabsClientSettings` as you would normally with your API keys, org id, or Azure settings. @@ -186,11 +195,13 @@ public partial class Program { private class AuthenticationFilter : AbstractAuthenticationFilter { - public override void ValidateAuthentication(IHeaderDictionary request) + public override async Task ValidateAuthenticationAsync(IHeaderDictionary request) { + await Task.CompletedTask; // remote resource call + // You will need to implement your own class to properly test // custom issued tokens you've setup for your end users. - if (!request["xi-api-key"].ToString().Contains(userToken)) + if (!request["xi-api-key"].ToString().Contains(TestUserToken)) { throw new AuthenticationException("User is not authorized"); } @@ -265,7 +276,9 @@ audioSource.PlayOneShot(voiceClip.AudioClip); voiceClip.CopyIntoProject(editorDownloadDirectory); ``` -### Stream Text to Speech +#### [Stream Text To Speech](https://docs.elevenlabs.io/api-reference/text-to-speech-stream) + +Stream text to speech. ```csharp var api = new ElevenLabsClient(); @@ -289,6 +302,19 @@ audioSource.clip = voiceClip.AudioClip; Access to voices created either by the user or ElevenLabs. +#### Get Shared Voices + +Gets a list of shared voices in the public voice library. + +```csharp +var api = new ElevenLabsClient(); +var results = await ElevenLabsClient.SharedVoicesEndpoint.GetSharedVoicesAsync(); +foreach (var voice in results.Voices) +{ + Debug.Log($"{voice.OwnerId} | {voice.VoiceId} | {voice.Date} | {voice.Name}"); +} +``` + #### Get All Voices Gets a list of all available voices. @@ -383,6 +409,87 @@ var success = await api.VoicesEndpoint.DeleteVoiceSampleAsync(voiceId, sampleId) Debug.Log($"Was successful? {success}"); ``` +### [Dubbing](https://elevenlabs.io/docs/api-reference/create-dub) + +#### Dub + +Dubs provided audio or video file into given language. + +```csharp +var api = new ElevenLabsClient(); +// from URI +var request = new DubbingRequest(new Uri("https://youtu.be/Zo5-rhYOlNk"), "ja", "en", 1, true); +// from file +var request = new DubbingRequest(filePath, "es", "en", 1); +var metadata = await api.DubbingEndpoint.DubAsync(request, progress: new Progress(metadata => +{ + switch (metadata.Status) + { + case "dubbing": + Debug.Log($"Dubbing for {metadata.DubbingId} in progress... Expected Duration: {metadata.ExpectedDurationSeconds:0.00} seconds"); + break; + case "dubbed": + Debug.Log($"Dubbing for {metadata.DubbingId} complete in {metadata.TimeCompleted.TotalSeconds:0.00} seconds!"); + break; + default: + Debug.Log($"Status: {metadata.Status}"); + break; + } +})); +``` + +#### Get Dubbing Metadata + +Returns metadata about a dubbing project, including whether it’s still in progress or not. + +```csharp +var api = new ElevenLabsClient(); +var metadata = api.await GetDubbingProjectMetadataAsync("dubbing-id"); +``` + +#### Get Dubbed File + +Returns downloaded dubbed file path. + +> [!IMPORTANT] +> Videos will be returned in MP4 format and audio only dubs will be returned in MP3. + +```csharp +var dubbedClipPath = await ElevenLabsClient.DubbingEndpoint.GetDubbedFileAsync(metadata.DubbingId, request.TargetLanguage); +var dubbedClip = await Rest.DownloadAudioClipAsync($"file://{dubbedClipPath}", AudioType.MPEG); +audioSource.PlayOneShot(dubbedClip); +``` + +#### Get Transcript for Dub + +Returns transcript for the dub in the desired format. + +```csharp +var srcFile = new FileInfo(audioPath); +var transcriptPath = new FileInfo($"{srcFile.FullName}.dubbed.{request.TargetLanguage}.srt"); +var transcriptFile = await ElevenLabsClient.DubbingEndpoint.GetTranscriptForDubAsync(metadata.DubbingId, request.TargetLanguage); +await File.WriteAllTextAsync(transcriptPath.FullName, transcriptFile); +``` + +#### Delete Dubbing Project + +Deletes a dubbing project. + +```csharp +var api = new ElevenLabsClient(); +await api.DubbingEndpoint.DeleteDubbingProjectAsync("dubbing-id"); +``` + +### SFX Generation + +API that converts text into sounds & uses the most advanced AI audio model ever. + +```csharp +var api = new ElevenLabsClient(); +var request = new SoundGenerationRequest("Star Wars Light Saber parry"); +var clip = await api.SoundGenerationEndpoint.GenerateSoundAsync(request); +``` + ### [History](https://docs.elevenlabs.io/api-reference/history) Access to your previously synthesized audio clips including its metadata. @@ -393,9 +500,9 @@ Get metadata about all your generated audio. ```csharp var api = new ElevenLabsClient(); -var historyInfo = await api.HistoryEndpoint.GetHistoryAsync(); +var historyItems = await api.HistoryEndpoint.GetHistoryAsync(); -foreach (var item in historyInfo.HistoryItems.OrderBy(item => item.Date)) +foreach (var item in historyItems.OrderBy(historyItem => historyItem.Date)) { Debug.Log($"{item.State} {item.Date} | {item.Id} | {item.Text.Length} | {item.Text}"); } @@ -407,7 +514,7 @@ Get information about a specific item. ```csharp var api = new ElevenLabsClient(); -var historyItem = api.HistoryEndpoint.GetHistoryItemAsync(voiceClip.Id); +var historyItem = await api.HistoryEndpoint.GetHistoryItemAsync(voiceClip.Id); ``` #### Download History Audio diff --git a/Editor/ElevenLabsDashboard.cs b/Editor/ElevenLabsDashboard.cs index 450efde..0797c00 100644 --- a/Editor/ElevenLabsDashboard.cs +++ b/Editor/ElevenLabsDashboard.cs @@ -1,6 +1,5 @@ // Licensed under the MIT License. See LICENSE in the project root for license information. -using ElevenLabs.Extensions; using ElevenLabs.History; using ElevenLabs.Models; using ElevenLabs.User; @@ -363,6 +362,8 @@ private async void AddVoice() private static readonly GUIContent deleteContent = new("Delete"); + private static readonly GUIContent copyContent = new("Copy"); + private static readonly GUIContent refreshContent = new("Refresh"); private static readonly GUIContent downloadingContent = new("Download in progress..."); @@ -1224,8 +1225,24 @@ private void RenderVoiceLab() EditorGUILayout.Space(EndWidth); EditorGUILayout.EndHorizontal(); EditorGUI.indentLevel++; - EditorGUILayout.LabelField(voice.Id, EditorStyles.boldLabel); + + EditorGUILayout.BeginHorizontal(); + { + EditorGUILayout.LabelField(voice.Id, EditorStyles.boldLabel); + GUILayout.FlexibleSpace(); + + if (GUILayout.Button(copyContent, defaultColumnWidthOption)) + { + EditorGUIUtility.systemCopyBuffer = voice.Id; + Debug.Log($"Voice ID {voice.Id} copied to clipboard"); + } + GUI.enabled = true; + } + EditorGUILayout.Space(EndWidth); + EditorGUILayout.EndHorizontal(); + EditorGUI.indentLevel++; + if (!voiceLabels.TryGetValue(voice.Id, out var cachedLabels)) { cachedLabels = new Dictionary(); diff --git a/Runtime/Authentication/ElevenLabsAuthentication.cs b/Runtime/Authentication/ElevenLabsAuthentication.cs index 49b2188..d35203c 100644 --- a/Runtime/Authentication/ElevenLabsAuthentication.cs +++ b/Runtime/Authentication/ElevenLabsAuthentication.cs @@ -14,6 +14,7 @@ namespace ElevenLabs public sealed class ElevenLabsAuthentication : AbstractAuthentication { internal const string CONFIG_FILE = ".elevenlabs"; + private const string ELEVENLABS_API_KEY = nameof(ELEVENLABS_API_KEY); private const string ELEVEN_LABS_API_KEY = nameof(ELEVEN_LABS_API_KEY); /// @@ -85,6 +86,12 @@ public override ElevenLabsAuthentication LoadFromAsset(ElevenLabsConfiguration c public override ElevenLabsAuthentication LoadFromEnvironment() { var apiKey = Environment.GetEnvironmentVariable(ELEVEN_LABS_API_KEY); + + if (string.IsNullOrWhiteSpace(apiKey)) + { + apiKey = Environment.GetEnvironmentVariable(ELEVENLABS_API_KEY); + } + return string.IsNullOrEmpty(apiKey) ? null : new ElevenLabsAuthentication(apiKey); } @@ -136,6 +143,7 @@ public override ElevenLabsAuthentication LoadFromDirectory(string directory = nu apiKey = part switch { + ELEVENLABS_API_KEY => nextPart.Trim(), ELEVEN_LABS_API_KEY => nextPart.Trim(), _ => apiKey }; diff --git a/Runtime/Authentication/ElevenLabsSettingsInfo.cs b/Runtime/Authentication/ElevenLabsSettingsInfo.cs index 9d76faa..4196bbb 100644 --- a/Runtime/Authentication/ElevenLabsSettingsInfo.cs +++ b/Runtime/Authentication/ElevenLabsSettingsInfo.cs @@ -7,6 +7,7 @@ namespace ElevenLabs { public sealed class ElevenLabsSettingsInfo : ISettingsInfo { + internal const string Https = "https://"; internal const string ElevenLabsDomain = "api.elevenlabs.io"; internal const string DefaultApiVersion = "v1"; @@ -18,7 +19,7 @@ public ElevenLabsSettingsInfo() Domain = ElevenLabsDomain; ApiVersion = DefaultApiVersion; BaseRequest = $"/{ApiVersion}/"; - BaseRequestUrlFormat = $"https://{Domain}{BaseRequest}{{0}}"; + BaseRequestUrlFormat = $"{Https}{Domain}{BaseRequest}{{0}}"; } /// @@ -33,8 +34,8 @@ public ElevenLabsSettingsInfo(string domain, string apiVersion = DefaultApiVersi domain = ElevenLabsDomain; } - if (!domain.Contains(".") && - !domain.Contains(":")) + if (!domain.Contains('.') && + !domain.Contains(':')) { throw new ArgumentException($"Invalid parameter \"{nameof(domain)}\"."); } @@ -44,10 +45,10 @@ public ElevenLabsSettingsInfo(string domain, string apiVersion = DefaultApiVersi apiVersion = DefaultApiVersion; } - Domain = domain; + Domain = domain.Contains("http") ? domain : $"{Https}{domain}"; ApiVersion = apiVersion; BaseRequest = $"/{ApiVersion}/"; - BaseRequestUrlFormat = $"https://{Domain}{BaseRequest}{{0}}"; + BaseRequestUrlFormat = $"{Domain}{BaseRequest}{{0}}"; } public string Domain { get; } diff --git a/Runtime/Common/GeneratedClip.cs b/Runtime/Common/GeneratedClip.cs new file mode 100644 index 0000000..1965774 --- /dev/null +++ b/Runtime/Common/GeneratedClip.cs @@ -0,0 +1,59 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using ElevenLabs.Extensions; +using System; +using UnityEngine; +using UnityEngine.Scripting; + +namespace ElevenLabs +{ + [Preserve] + [Serializable] + public class GeneratedClip : ISerializationCallbackReceiver + { + [Preserve] + internal GeneratedClip(string id, string text, AudioClip audioClip, string cachedPath) + { + this.id = id; + this.text = text; + TextHash = $"{id}{text}".GenerateGuid(); + textHash = TextHash.ToString(); + this.audioClip = audioClip; + this.cachedPath = cachedPath; + } + + [SerializeField] + private string id; + + [Preserve] + public string Id => id; + + [SerializeField] + private string text; + + [Preserve] + public string Text => text; + + [SerializeField] + private string textHash; + + [Preserve] + public Guid TextHash { get; private set; } + + [SerializeField] + private AudioClip audioClip; + + [Preserve] + public AudioClip AudioClip => audioClip; + + [SerializeField] + private string cachedPath; + + [Preserve] + public string CachedPath => cachedPath; + + public void OnBeforeSerialize() => textHash = TextHash.ToString(); + + public void OnAfterDeserialize() => TextHash = Guid.Parse(textHash); + } +} diff --git a/Runtime/Common/GeneratedClip.cs.meta b/Runtime/Common/GeneratedClip.cs.meta new file mode 100644 index 0000000..8a845d0 --- /dev/null +++ b/Runtime/Common/GeneratedClip.cs.meta @@ -0,0 +1,11 @@ +fileFormatVersion: 2 +guid: 4239acd50bc44444591f287fc7d32f6a +MonoImporter: + externalObjects: {} + serializedVersion: 2 + defaultReferences: [] + executionOrder: 0 + icon: {fileID: 2800000, guid: 5b71cbcaf078a8e44a5c96dcd24376d5, type: 3} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Runtime/Common/VoiceClip.cs b/Runtime/Common/VoiceClip.cs index 1c51f17..24803d8 100644 --- a/Runtime/Common/VoiceClip.cs +++ b/Runtime/Common/VoiceClip.cs @@ -1,8 +1,7 @@ // Licensed under the MIT License. See LICENSE in the project root for license information. -using System; -using ElevenLabs.Extensions; using ElevenLabs.Voices; +using System; using UnityEngine; using UnityEngine.Scripting; @@ -10,58 +9,19 @@ namespace ElevenLabs { [Preserve] [Serializable] - public sealed class VoiceClip : ISerializationCallbackReceiver + public sealed class VoiceClip : GeneratedClip { [Preserve] internal VoiceClip(string id, string text, Voice voice, AudioClip audioClip, string cachedPath) + : base(id, text, audioClip, cachedPath) { - this.id = id; - this.text = text; this.voice = voice; - TextHash = $"{id}{text}".GenerateGuid(); - textHash = TextHash.ToString(); - this.audioClip = audioClip; - this.cachedPath = cachedPath; } - [SerializeField] - private string id; - - [Preserve] - public string Id => id; - - [SerializeField] - private string text; - - [Preserve] - public string Text => text; - [SerializeField] private Voice voice; [Preserve] public Voice Voice => voice; - - [SerializeField] - private string textHash; - - [Preserve] - public Guid TextHash { get; private set; } - - [SerializeField] - private AudioClip audioClip; - - [Preserve] - public AudioClip AudioClip => audioClip; - - [SerializeField] - private string cachedPath; - - [Preserve] - public string CachedPath => cachedPath; - - public void OnBeforeSerialize() => textHash = TextHash.ToString(); - - public void OnAfterDeserialize() => TextHash = Guid.Parse(textHash); } } diff --git a/Runtime/Dubbing.meta b/Runtime/Dubbing.meta new file mode 100644 index 0000000..d75401b --- /dev/null +++ b/Runtime/Dubbing.meta @@ -0,0 +1,8 @@ +fileFormatVersion: 2 +guid: 37e6aa53ad2e8da47b21e218b5617222 +folderAsset: yes +DefaultImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Runtime/Dubbing/DubbingEndpoint.cs b/Runtime/Dubbing/DubbingEndpoint.cs new file mode 100644 index 0000000..ff0010b --- /dev/null +++ b/Runtime/Dubbing/DubbingEndpoint.cs @@ -0,0 +1,229 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using ElevenLabs.Extensions; +using Newtonsoft.Json; +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Globalization; +using System.IO; +using System.Linq; +using System.Threading; +using System.Threading.Tasks; +using UnityEngine; +using UnityEngine.Networking; +using Utilities.WebRequestRest; +using Debug = UnityEngine.Debug; + +namespace ElevenLabs.Dubbing +{ + public class DubbingEndpoint : ElevenLabsBaseEndPoint + { + public DubbingEndpoint(ElevenLabsClient client) : base(client) { } + + protected override string Root => "dubbing"; + + /// + /// Dubs provided audio or video file into given language. + /// + /// The containing dubbing configuration and files. + /// progress callback. + /// Optional, . + /// Optional, number of retry attempts when polling. + /// Optional, between making requests. + /// . + public async Task DubAsync(DubbingRequest request, int? maxRetries = null, TimeSpan? pollingInterval = null, IProgress progress = null, CancellationToken cancellationToken = default) + { + if (request == null) + { + throw new ArgumentNullException(nameof(request)); + } + + var payload = new WWWForm(); + + try + { + if (request.Files != null) + { + foreach (var (fileName, mediaType, stream) in request.Files) + { + using var audioData = new MemoryStream(); + await stream.CopyToAsync(audioData, cancellationToken); + payload.AddBinaryData("file", audioData.ToArray(), fileName, mediaType); + } + } + + if (!string.IsNullOrEmpty(request.ProjectName)) + { + payload.AddField("name", request.ProjectName); + } + + if (request.SourceUrl != null) + { + payload.AddField("source_url", request.SourceUrl.ToString()); + } + + if (!string.IsNullOrEmpty(request.SourceLanguage)) + { + payload.AddField("source_lang", request.SourceLanguage); + } + + if (!string.IsNullOrEmpty(request.TargetLanguage)) + { + payload.AddField("target_lang", request.TargetLanguage); + } + + if (request.NumberOfSpeakers.HasValue) + { + payload.AddField("num_speakers", request.NumberOfSpeakers.Value.ToString(CultureInfo.InvariantCulture)); + } + + if (request.Watermark.HasValue) + { + payload.AddField("watermark", request.Watermark.Value.ToString()); + } + + if (request.StartTime.HasValue) + { + payload.AddField("start_time", request.StartTime.Value.ToString(CultureInfo.InvariantCulture)); + } + + if (request.EndTime.HasValue) + { + payload.AddField("end_time", request.EndTime.Value.ToString(CultureInfo.InvariantCulture)); + } + + if (request.HighestResolution.HasValue) + { + payload.AddField("highest_resolution", request.HighestResolution.Value.ToString()); + } + } + finally + { + request.Dispose(); + } + + var response = await Rest.PostAsync(GetUrl(), payload, new RestParameters(client.DefaultRequestHeaders), cancellationToken); + response.Validate(EnableDebug); + var dubResponse = JsonConvert.DeserializeObject(response.Body, ElevenLabsClient.JsonSerializationOptions); + return await WaitForDubbingCompletionAsync(dubResponse, maxRetries ?? 60, pollingInterval ?? TimeSpan.FromSeconds(dubResponse.ExpectedDuration), pollingInterval == null, progress, cancellationToken); + } + + private async Task WaitForDubbingCompletionAsync(DubbingResponse dubbingResponse, int maxRetries, TimeSpan pollingInterval, bool adjustInterval, IProgress progress = null, CancellationToken cancellationToken = default) + { + var stopwatch = Stopwatch.StartNew(); + + for (var i = 1; i < maxRetries + 1; i++) + { + var metadata = await GetDubbingProjectMetadataAsync(dubbingResponse, cancellationToken).ConfigureAwait(false); + metadata.ExpectedDurationSeconds = dubbingResponse.ExpectedDuration; + + if (metadata.Status.Equals("dubbed", StringComparison.Ordinal)) + { + stopwatch.Stop(); + metadata.TimeCompleted = stopwatch.Elapsed; + progress?.Report(metadata); + return metadata; + } + + progress?.Report(metadata); + + if (metadata.Status.Equals("dubbing", StringComparison.Ordinal)) + { + if (adjustInterval && pollingInterval.TotalSeconds > 0.5f) + { + pollingInterval = TimeSpan.FromSeconds(dubbingResponse.ExpectedDuration / Math.Pow(2, i)); + } + + if (EnableDebug) + { + Debug.Log($"Dubbing for {dubbingResponse.DubbingId} in progress... Will check status again in {pollingInterval.TotalSeconds} seconds."); + } + + await Task.Delay(pollingInterval, cancellationToken).ConfigureAwait(false); + } + else + { + throw new Exception($"Dubbing for {dubbingResponse.DubbingId} failed: {metadata.Error}"); + } + } + + throw new TimeoutException($"Dubbing for {dubbingResponse.DubbingId} timed out or exceeded expected duration."); + } + + /// + /// Returns metadata about a dubbing project, including whether it’s still in progress or not. + /// + /// Dubbing project id. + /// Optional, . + /// . + public async Task GetDubbingProjectMetadataAsync(string dubbingId, CancellationToken cancellationToken = default) + { + var response = await Rest.GetAsync(GetUrl($"/{dubbingId}"), new RestParameters(client.DefaultRequestHeaders), cancellationToken); + response.Validate(EnableDebug); + return JsonConvert.DeserializeObject(response.Body, ElevenLabsClient.JsonSerializationOptions); + } + + /// + /// Returns transcript for the dub in the specified format (SRT or WebVTT). + /// + /// Dubbing project id. + /// The language code of the transcript. + /// Optional. The format type of the transcript file, either or . + /// Optional, . + /// + /// A string containing the transcript content in the specified format. + /// + public async Task GetTranscriptForDubAsync(string dubbingId, string languageCode, DubbingFormat formatType = DubbingFormat.Srt, CancellationToken cancellationToken = default) + { + var @params = new Dictionary { { "format_type", formatType.ToString().ToLower() } }; + var response = await Rest.GetAsync(GetUrl($"/{dubbingId}/transcript/{languageCode}", @params), new RestParameters(client.DefaultRequestHeaders), cancellationToken); + response.Validate(EnableDebug); + return response.Body; + } + + /// + /// Returns dubbed file as an . + /// + /// Dubbing project id. + /// The language code of the transcript. + /// Optional, . + /// Path to downloaded file. + public async Task GetDubbedFileAsync(string dubbingId, string languageCode, CancellationToken cancellationToken = default) + { + var result = await Rest.GetAsync(GetUrl($"/{dubbingId}/audio/{languageCode}"), parameters: new RestParameters(client.DefaultRequestHeaders), cancellationToken: cancellationToken); + result.Validate(EnableDebug); + var cacheDir = await GetCacheDirectoryAsync(); + var mimeType = result.Headers["Content-Type"]; + var extension = mimeType switch + { + "video/mp4" => ".mp4", + "audio/mpeg" => ".mp3", + _ => throw new NotSupportedException($"Unsupported mime type: {mimeType}") + }; + var fileName = $"{dubbingId}_{languageCode}{extension}"; + var filePath = Path.Combine(cacheDir, fileName); + await File.WriteAllBytesAsync(filePath, result.Data, cancellationToken).ConfigureAwait(true); + return filePath; + } + + /// + /// Deletes a dubbing project. + /// + /// Dubbing project id. + /// Optional, . + public async Task DeleteDubbingProjectAsync(string dubbingId, CancellationToken cancellationToken = default) + { + var response = await Rest.DeleteAsync(GetUrl($"/{dubbingId}"), new RestParameters(client.DefaultRequestHeaders), cancellationToken); + response.Validate(EnableDebug); + } + + private static async Task GetCacheDirectoryAsync() + { + await Rest.ValidateCacheDirectoryAsync(); + return Rest.DownloadCacheDirectory + .CreateNewDirectory(nameof(ElevenLabs)) + .CreateNewDirectory(nameof(Dubbing)); + } + } +} diff --git a/Runtime/Dubbing/DubbingEndpoint.cs.meta b/Runtime/Dubbing/DubbingEndpoint.cs.meta new file mode 100644 index 0000000..7d9b457 --- /dev/null +++ b/Runtime/Dubbing/DubbingEndpoint.cs.meta @@ -0,0 +1,11 @@ +fileFormatVersion: 2 +guid: 3fcb78227a5727e448abfe3aca0738ae +MonoImporter: + externalObjects: {} + serializedVersion: 2 + defaultReferences: [] + executionOrder: 0 + icon: {fileID: 2800000, guid: 5b71cbcaf078a8e44a5c96dcd24376d5, type: 3} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Runtime/Dubbing/DubbingFormat.cs b/Runtime/Dubbing/DubbingFormat.cs new file mode 100644 index 0000000..5388b28 --- /dev/null +++ b/Runtime/Dubbing/DubbingFormat.cs @@ -0,0 +1,14 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using System.Runtime.Serialization; + +namespace ElevenLabs.Dubbing +{ + public enum DubbingFormat + { + [EnumMember(Value = "srt")] + Srt, + [EnumMember(Value = "webvtt")] + WebVtt + } +} diff --git a/Runtime/Dubbing/DubbingFormat.cs.meta b/Runtime/Dubbing/DubbingFormat.cs.meta new file mode 100644 index 0000000..7f0d7b9 --- /dev/null +++ b/Runtime/Dubbing/DubbingFormat.cs.meta @@ -0,0 +1,11 @@ +fileFormatVersion: 2 +guid: ccb3faac50374b44e9a38e973387e7ea +MonoImporter: + externalObjects: {} + serializedVersion: 2 + defaultReferences: [] + executionOrder: 0 + icon: {fileID: 2800000, guid: 5b71cbcaf078a8e44a5c96dcd24376d5, type: 3} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Runtime/Dubbing/DubbingProjectMetadata.cs b/Runtime/Dubbing/DubbingProjectMetadata.cs new file mode 100644 index 0000000..502f368 --- /dev/null +++ b/Runtime/Dubbing/DubbingProjectMetadata.cs @@ -0,0 +1,55 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using Newtonsoft.Json; +using System; +using System.Collections.Generic; +using UnityEngine.Scripting; + +namespace ElevenLabs.Dubbing +{ + [Preserve] + public sealed class DubbingProjectMetadata + { + [Preserve] + [JsonConstructor] + internal DubbingProjectMetadata( + [JsonProperty("dubbing_id")] string dubbingId, + [JsonProperty("name")] string name, + [JsonProperty("status")] string status, + [JsonProperty("target_languages")] IReadOnlyList targetLanguages, + [JsonProperty("error")] string error) + { + DubbingId = dubbingId; + Name = name; + Status = status; + TargetLanguages = targetLanguages; + Error = error; + } + + [Preserve] + [JsonProperty("dubbing_id")] + public string DubbingId { get; } + + [Preserve] + [JsonProperty("name")] + public string Name { get; } + + [Preserve] + [JsonProperty("status")] + public string Status { get; } + + [Preserve] + [JsonProperty("target_languages")] + public IReadOnlyList TargetLanguages { get; } + + [Preserve] + [JsonProperty("error")] + public string Error { get; } + + [JsonIgnore] + public float ExpectedDurationSeconds { get; internal set; } + + [JsonIgnore] + public TimeSpan TimeCompleted { get; internal set; } + } +} diff --git a/Runtime/Dubbing/DubbingProjectMetadata.cs.meta b/Runtime/Dubbing/DubbingProjectMetadata.cs.meta new file mode 100644 index 0000000..9dad58b --- /dev/null +++ b/Runtime/Dubbing/DubbingProjectMetadata.cs.meta @@ -0,0 +1,11 @@ +fileFormatVersion: 2 +guid: e3e3a509892a32c49aed125bda39b6a4 +MonoImporter: + externalObjects: {} + serializedVersion: 2 + defaultReferences: [] + executionOrder: 0 + icon: {fileID: 2800000, guid: 5b71cbcaf078a8e44a5c96dcd24376d5, type: 3} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Runtime/Dubbing/DubbingRequest.cs b/Runtime/Dubbing/DubbingRequest.cs new file mode 100644 index 0000000..21e7f6c --- /dev/null +++ b/Runtime/Dubbing/DubbingRequest.cs @@ -0,0 +1,263 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using UnityEngine; +using Utilities.Encoding.Wav; + +namespace ElevenLabs.Dubbing +{ + public sealed class DubbingRequest : IDisposable + { + public DubbingRequest( + string filePath, + string targetLanguage, + string sourceLanguage = null, + int? numberOfSpeakers = null, + bool? watermark = null, + int? startTime = null, + int? endTime = null, + bool? highestResolution = null, + bool? dropBackgroundAudio = null, + string projectName = null) + : this(new[] { filePath }, targetLanguage, sourceLanguage, numberOfSpeakers, watermark, startTime, endTime, highestResolution, dropBackgroundAudio, projectName) + { + } + + public DubbingRequest( + IEnumerable filePaths, + string targetLanguage, + string sourceLanguage = null, + int? numberOfSpeakers = null, + bool? watermark = null, + int? startTime = null, + int? endTime = null, + bool? highestResolution = null, + bool? dropBackgroundAudio = null, + string projectName = null) + : this(targetLanguage, null, filePaths, sourceLanguage, numberOfSpeakers, watermark, startTime, endTime, highestResolution, dropBackgroundAudio, projectName) + { + } + + public DubbingRequest( + Uri sourceUrl, + string targetLanguage, + string sourceLanguage = null, + int? numberOfSpeakers = null, + bool? watermark = null, + int? startTime = null, + int? endTime = null, + bool? highestResolution = null, + bool? dropBackgroundAudio = null, + string projectName = null) + : this(targetLanguage, sourceUrl, null, sourceLanguage, numberOfSpeakers, watermark, startTime, endTime, highestResolution, dropBackgroundAudio, projectName) + { + } + + public DubbingRequest(AudioClip audioClip, string targetLanguage, string sourceLanguage = null, int? numberOfSpeakers = null, bool? watermark = null, int? startTime = null, int? endTime = null, bool? highestResolution = null, bool? dropBackgroundAudio = null, string projectName = null) + : this(new[] { audioClip }, targetLanguage, sourceLanguage, numberOfSpeakers, watermark, startTime, endTime, highestResolution, dropBackgroundAudio, projectName) + { + } + + public DubbingRequest(IEnumerable audioClips, string targetLanguage, string sourceLanguage = null, int? numberOfSpeakers = null, bool? watermark = null, int? startTime = null, int? endTime = null, bool? highestResolution = null, bool? dropBackgroundAudio = null, string projectName = null) + { + if (audioClips == null) + { + throw new MissingReferenceException(nameof(audioClips)); + } + + var clips = audioClips.ToList(); + + if (clips.Count == 0) + { + throw new ArgumentException("At least one audio clip must be provided."); + } + + if (string.IsNullOrWhiteSpace(targetLanguage)) + { + throw new ArgumentException("Target language must be provided."); + } + + TargetLanguage = targetLanguage; + SourceLanguage = sourceLanguage; + NumberOfSpeakers = numberOfSpeakers; + Watermark = watermark; + StartTime = startTime; + EndTime = endTime; + HighestResolution = highestResolution; + DropBackgroundAudio = dropBackgroundAudio; + ProjectName = projectName; + var files = new List<(string, string, Stream)>(clips.Count); + files.AddRange((from audioClip in clips let stream = new MemoryStream(audioClip.EncodeToWav()) select (audioClip.name, "audio/wav", stream)).Select(value => ((string, string, Stream))value)); + Files = files; + } + + private DubbingRequest( + string targetLanguage, + Uri sourceUrl = null, + IEnumerable filePaths = null, + string sourceLanguage = null, + int? numberOfSpeakers = null, + bool? watermark = null, + int? startTime = null, + int? endTime = null, + bool? highestResolution = null, + bool? dropBackgroundAudio = null, + string projectName = null) + { + if (string.IsNullOrWhiteSpace(targetLanguage)) + { + throw new ArgumentException("Target language must be provided."); + } + + TargetLanguage = targetLanguage; + + if (filePaths == null && sourceUrl == null) + { + throw new ArgumentException("Either sourceUrl or filePaths must be provided."); + } + + var files = new List<(string, string, Stream)>(); + + if (filePaths != null) + { + foreach (var filePath in filePaths) + { + if (string.IsNullOrWhiteSpace(filePath)) + { + throw new ArgumentException("File path cannot be empty."); + } + + var fileInfo = new FileInfo(filePath); + + if (!fileInfo.Exists) + { + throw new FileNotFoundException($"File not found: {filePath}"); + } + + var stream = fileInfo.OpenRead(); + var extension = fileInfo.Extension.ToLowerInvariant(); + var mediaType = extension switch + { + ".3gp" => "video/3gpp", + ".acc" => "audio/aac", + ".avi" => "video/x-msvideo", + ".flac" => "audio/flac", + ".ogg" => "audio/ogg", + ".mov" => "video/quicktime", + ".mp3" => "audio/mp3", + ".mp4" => "video/mp4", + ".raw" => "audio/raw", + ".wav" => "audio/wav", + ".webm" => "video/webm", + _ => "application/octet-stream" + }; + files.Add((fileInfo.Name, mediaType, stream)); + } + } + + Files = files; + SourceUrl = sourceUrl; + SourceLanguage = sourceLanguage; + NumberOfSpeakers = numberOfSpeakers; + Watermark = watermark; + StartTime = startTime; + EndTime = endTime; + HighestResolution = highestResolution; + DropBackgroundAudio = dropBackgroundAudio; + ProjectName = projectName; + } + + ~DubbingRequest() => Dispose(false); + + /// + /// Files to dub. + /// + public IReadOnlyList<(string, string, Stream)> Files { get; } + + /// + /// URL of the source video/audio file. + /// + public Uri SourceUrl { get; } + + /// + /// Source language. + /// + /// + /// A list of supported languages can be found at: https://elevenlabs.io/docs/api-reference/how-to-dub-a-video#list-of-supported-languages-for-dubbing + /// + public string SourceLanguage { get; } + + /// + /// The Target language to dub the content into. Can be none if dubbing studio editor is enabled and running manual mode + /// + /// + /// A list of supported languages can be found at: https://elevenlabs.io/docs/api-reference/how-to-dub-a-video#list-of-supported-languages-for-dubbing + /// + public string TargetLanguage { get; } + + /// + /// Number of speakers to use for the dubbing. Set to 0 to automatically detect the number of speakers + /// + public int? NumberOfSpeakers { get; } + + /// + /// Whether to apply watermark to the output video. + /// + public bool? Watermark { get; } + + /// + /// Start time of the source video/audio file. + /// + public int? StartTime { get; } + + /// + /// End time of the source video/audio file. + /// + public int? EndTime { get; } + + /// + /// Whether to use the highest resolution available. + /// + public bool? HighestResolution { get; } + + /// + /// An advanced setting. Whether to drop background audio from the final dub. + /// This can improve dub quality where it's known that audio shouldn't have a background track such as for speeches or monologues. + /// + public bool? DropBackgroundAudio { get; } + + /// + /// Name of the dubbing project. + /// + public string ProjectName { get; } + + private void Dispose(bool disposing) + { + if (disposing) + { + if (Files == null) { return; } + foreach (var (_, _, stream) in Files) + { + try + { + stream?.Close(); + stream?.Dispose(); + } + catch (Exception e) + { + Debug.Log(e); + } + } + } + } + + public void Dispose() + { + Dispose(true); + GC.SuppressFinalize(this); + } + } +} diff --git a/Runtime/Dubbing/DubbingRequest.cs.meta b/Runtime/Dubbing/DubbingRequest.cs.meta new file mode 100644 index 0000000..9279cb4 --- /dev/null +++ b/Runtime/Dubbing/DubbingRequest.cs.meta @@ -0,0 +1,11 @@ +fileFormatVersion: 2 +guid: 9e2db7e411dc2c546b4ef4d884126533 +MonoImporter: + externalObjects: {} + serializedVersion: 2 + defaultReferences: [] + executionOrder: 0 + icon: {fileID: 2800000, guid: 5b71cbcaf078a8e44a5c96dcd24376d5, type: 3} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Runtime/Dubbing/DubbingResponse.cs b/Runtime/Dubbing/DubbingResponse.cs new file mode 100644 index 0000000..cf7109c --- /dev/null +++ b/Runtime/Dubbing/DubbingResponse.cs @@ -0,0 +1,32 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using Newtonsoft.Json; +using UnityEngine.Scripting; + +namespace ElevenLabs.Dubbing +{ + [Preserve] + public sealed class DubbingResponse + { + [Preserve] + [JsonConstructor] + internal DubbingResponse( + [JsonProperty("dubbing_id")] string dubbingId, + [JsonProperty("expected_duration_sec")] float expectedDuration) + { + DubbingId = dubbingId; + ExpectedDuration = expectedDuration; + } + + [Preserve] + [JsonProperty("dubbing_id")] + public string DubbingId { get; } + + [Preserve] + [JsonProperty("expected_duration_sec")] + public float ExpectedDuration { get; } + + [Preserve] + public static implicit operator string(DubbingResponse response) => response?.DubbingId; + } +} diff --git a/Runtime/Dubbing/DubbingResponse.cs.meta b/Runtime/Dubbing/DubbingResponse.cs.meta new file mode 100644 index 0000000..d66306a --- /dev/null +++ b/Runtime/Dubbing/DubbingResponse.cs.meta @@ -0,0 +1,11 @@ +fileFormatVersion: 2 +guid: 8b4738bd8a977f1499eecdd9bdae3182 +MonoImporter: + externalObjects: {} + serializedVersion: 2 + defaultReferences: [] + executionOrder: 0 + icon: {fileID: 2800000, guid: 5b71cbcaf078a8e44a5c96dcd24376d5, type: 3} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Runtime/ElevenLabs.asmdef b/Runtime/ElevenLabs.asmdef index 0c9de0e..ffab41f 100644 --- a/Runtime/ElevenLabs.asmdef +++ b/Runtime/ElevenLabs.asmdef @@ -6,7 +6,8 @@ "GUID:a6609af893242c7438d701ddd4cce46a", "GUID:7958db66189566541a6363568aee1575", "GUID:d25c28436b1dcc9408d86f49a0f5210b", - "GUID:fe98ce187c2363b409d00954d687ec68" + "GUID:fe98ce187c2363b409d00954d687ec68", + "GUID:f7a0d77b5e1d79742a738fb859ee2f28" ], "includePlatforms": [], "excludePlatforms": [], diff --git a/Runtime/ElevenLabsClient.cs b/Runtime/ElevenLabsClient.cs index c574ebf..eb7d7ad 100644 --- a/Runtime/ElevenLabsClient.cs +++ b/Runtime/ElevenLabsClient.cs @@ -1,7 +1,9 @@ // Licensed under the MIT License. See LICENSE in the project root for license information. +using ElevenLabs.Dubbing; using ElevenLabs.History; using ElevenLabs.Models; +using ElevenLabs.SoundGeneration; using ElevenLabs.TextToSpeech; using ElevenLabs.User; using ElevenLabs.VoiceGeneration; @@ -40,6 +42,9 @@ public ElevenLabsClient(ElevenLabsAuthentication authentication = null, ElevenLa HistoryEndpoint = new HistoryEndpoint(this); TextToSpeechEndpoint = new TextToSpeechEndpoint(this); VoiceGenerationEndpoint = new VoiceGenerationEndpoint(this); + SharedVoicesEndpoint = new SharedVoicesEndpoint(this); + DubbingEndpoint = new DubbingEndpoint(this); + SoundGenerationEndpoint = new SoundGenerationEndpoint(this); } protected override void SetupDefaultRequestHeaders() @@ -88,6 +93,12 @@ protected override void ValidateAuthentication() public TextToSpeechEndpoint TextToSpeechEndpoint { get; } + public SharedVoicesEndpoint SharedVoicesEndpoint { get; } + public VoiceGenerationEndpoint VoiceGenerationEndpoint { get; } + + public DubbingEndpoint DubbingEndpoint { get; } + + public SoundGenerationEndpoint SoundGenerationEndpoint { get; } } } diff --git a/Runtime/Models/Model.cs b/Runtime/Models/Model.cs index 6b4423e..b45a6f5 100644 --- a/Runtime/Models/Model.cs +++ b/Runtime/Models/Model.cs @@ -71,7 +71,7 @@ public Model( public IReadOnlyList Languages { get; } [Preserve] - public static implicit operator string(Model model) => model.ToString(); + public static implicit operator string(Model model) => model?.ToString(); [Preserve] public override string ToString() => Id; @@ -84,40 +84,56 @@ public Model( public static Model MonoLingualV1 => EnglishV1; /// - /// Use our standard English language model to generate speech in a variety of voices, styles and moods. + /// Our first ever text to speech model. Now outclassed by Multilingual v2 (for content creation) and Turbo v2.5 (for low latency use cases). /// [Preserve] [JsonIgnore] public static Model EnglishV1 { get; } = new("eleven_monolingual_v1"); /// - /// Speech to speech model suitable for scenarios where you need maximum control over the content and prosody of your generations. + /// Our English-only, low latency model. Best for developer use cases where speed matters and you only need English. Performance is on par with Turbo v2.5. /// [Preserve] [JsonIgnore] - public static Model EnglishV2 { get; } = new("eleven_english_sts_v2"); + public static Model EnglishTurboV2 { get; } = new("eleven_turbo_v2"); /// - /// Cutting-edge turbo model is ideally suited for tasks demanding extremely low latency. + /// Our high quality, low latency model in 32 languages. Best for developer use cases where speed matters and you need non-English languages. /// [Preserve] [JsonIgnore] - public static Model EnglishTurboV2 { get; } = new("eleven_turbo_v2"); + public static Model TurboV2_5 { get; } = new("eleven_turbo_v2_5"); /// - /// Generate lifelike speech in multiple languages and create content that resonates with a broader audience. + /// Our first Multilingual model, capability of generating speech in 10 languages. + /// Now outclassed by Multilingual v2 (for content creation) and Turbo v2.5 (for low latency use cases). /// [Preserve] [JsonIgnore] public static Model MultiLingualV1 { get; } = new("eleven_multilingual_v1"); /// - /// State of the art multilingual speech synthesis model, able to generate life-like speech in 29 languages. + /// Our most life-like, emotionally rich mode in 29 languages. Best for voice overs, audiobooks, post-production, or any other content creation needs. /// [Preserve] [JsonIgnore] public static Model MultiLingualV2 { get; } = new("eleven_multilingual_v2"); + /// + /// Our state-of-the-art speech to speech model suitable for scenarios where you need maximum control over the content and prosody of your generations. + /// + [Preserve] + [JsonIgnore] + public static Model EnglishSpeechToSpeechV2 { get; } = new("eleven_english_sts_v2"); + + /// + /// Our cutting-edge, multilingual speech-to-speech model is designed for situations that demand unparalleled control over both + /// the content and the prosody of the generated speech across various languages. + /// + [Preserve] + [JsonIgnore] + public static Model MultiLingualSpeechToSpeechV2 { get; } = new("eleven_multilingual_sts_v2"); + #endregion Predefined Models } } diff --git a/Runtime/SoundGeneration.meta b/Runtime/SoundGeneration.meta new file mode 100644 index 0000000..2a7ef9a --- /dev/null +++ b/Runtime/SoundGeneration.meta @@ -0,0 +1,8 @@ +fileFormatVersion: 2 +guid: ee54f9378f264ac4b8b65a576da166ba +folderAsset: yes +DefaultImporter: + externalObjects: {} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Runtime/SoundGeneration/SoundGenerationEndpoint.cs b/Runtime/SoundGeneration/SoundGenerationEndpoint.cs new file mode 100644 index 0000000..ce244e0 --- /dev/null +++ b/Runtime/SoundGeneration/SoundGenerationEndpoint.cs @@ -0,0 +1,35 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using Newtonsoft.Json; +using System; +using System.Threading; +using System.Threading.Tasks; +using UnityEngine; +using UnityEngine.Networking; +using Utilities.WebRequestRest; + +namespace ElevenLabs.SoundGeneration +{ + public class SoundGenerationEndpoint : ElevenLabsBaseEndPoint + { + public SoundGenerationEndpoint(ElevenLabsClient client) : base(client) { } + + protected override string Root => "sound-generation"; + + /// + /// converts text into sounds & uses the most advanced AI audio model ever. + /// Create sound effects for your videos, voice-overs or video games. + /// + /// . + /// Optional, . + /// . + public async Task GenerateSoundAsync(SoundGenerationRequest request, CancellationToken cancellationToken = default) + { + var payload = JsonConvert.SerializeObject(request, ElevenLabsClient.JsonSerializationOptions); + var clipId = Guid.NewGuid().ToString(); + var audioClip = await Rest.DownloadAudioClipAsync(GetUrl(), AudioType.MPEG, UnityWebRequest.kHttpVerbPOST, clipId, payload, parameters: new RestParameters(client.DefaultRequestHeaders), cancellationToken: cancellationToken); + Rest.TryGetDownloadCacheItem(clipId, out var cachedPath); + return new GeneratedClip(clipId, request.Text, audioClip, cachedPath); + } + } +} diff --git a/Runtime/SoundGeneration/SoundGenerationEndpoint.cs.meta b/Runtime/SoundGeneration/SoundGenerationEndpoint.cs.meta new file mode 100644 index 0000000..df10a22 --- /dev/null +++ b/Runtime/SoundGeneration/SoundGenerationEndpoint.cs.meta @@ -0,0 +1,11 @@ +fileFormatVersion: 2 +guid: cebdd8018f416e0409bceefa4a68ff61 +MonoImporter: + externalObjects: {} + serializedVersion: 2 + defaultReferences: [] + executionOrder: 0 + icon: {fileID: 2800000, guid: 5b71cbcaf078a8e44a5c96dcd24376d5, type: 3} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Runtime/SoundGeneration/SoundGenerationRequest.cs b/Runtime/SoundGeneration/SoundGenerationRequest.cs new file mode 100644 index 0000000..79a58d1 --- /dev/null +++ b/Runtime/SoundGeneration/SoundGenerationRequest.cs @@ -0,0 +1,75 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using Newtonsoft.Json; +using System; +using UnityEngine.Scripting; + +namespace ElevenLabs.SoundGeneration +{ + [Preserve] + public sealed class SoundGenerationRequest + { + /// + /// Constructor. + /// + /// + /// The text that will get converted into a sound effect. + /// + /// + /// The duration of the sound which will be generated in seconds. + /// Must be at least 0.5 and at most 22. + /// If set to None we will guess the optimal duration using the prompt. + /// Defaults to None. + /// + /// + /// A higher prompt influence makes your generation follow the prompt more closely while also making generations less variable. + /// Must be a value between 0 and 1. + /// Defaults to 0.3. + /// + [Preserve] + public SoundGenerationRequest(string text, float? duration = null, float? promptInfluence = null) + { + Text = text; + + if (duration is > 22f or < 0.5f) + { + throw new ArgumentOutOfRangeException(nameof(duration), "Duration must be a value between 0.5 and 22."); + } + + Duration = duration; + + if (promptInfluence is > 1f or < 0f) + { + throw new ArgumentOutOfRangeException(nameof(promptInfluence), "Prompt influence must be a value between 0 and 1."); + } + + PromptInfluence = promptInfluence; + } + + /// + /// The text that will get converted into a sound effect. + /// + [Preserve] + [JsonProperty("text")] + public string Text { get; } + + /// + /// The duration of the sound which will be generated in seconds. + /// Must be at least 0.5 and at most 22. + /// If set to None we will guess the optimal duration using the prompt. + /// Defaults to None. + /// + [Preserve] + [JsonProperty("duration_seconds")] + public float? Duration { get; } + + /// + /// A higher prompt influence makes your generation follow the prompt more closely while also making generations less variable. + /// Must be a value between 0 and 1. + /// Defaults to 0.3. + /// + [Preserve] + [JsonProperty("prompt_influence")] + public float? PromptInfluence { get; } + } +} diff --git a/Runtime/SoundGeneration/SoundGenerationRequest.cs.meta b/Runtime/SoundGeneration/SoundGenerationRequest.cs.meta new file mode 100644 index 0000000..3d87b77 --- /dev/null +++ b/Runtime/SoundGeneration/SoundGenerationRequest.cs.meta @@ -0,0 +1,11 @@ +fileFormatVersion: 2 +guid: df74482b216f40043b9f122500150455 +MonoImporter: + externalObjects: {} + serializedVersion: 2 + defaultReferences: [] + executionOrder: 0 + icon: {fileID: 2800000, guid: 5b71cbcaf078a8e44a5c96dcd24376d5, type: 3} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Runtime/TextToSpeech/TextToSpeechEndpoint.cs b/Runtime/TextToSpeech/TextToSpeechEndpoint.cs index 1d2075f..5032a63 100644 --- a/Runtime/TextToSpeech/TextToSpeechEndpoint.cs +++ b/Runtime/TextToSpeech/TextToSpeechEndpoint.cs @@ -244,7 +244,6 @@ void StreamCallback(Response partialResponse) if (!audioClip.SetData(chunk, 0)) { Debug.LogError("Failed to set pcm data to partial clip."); - return; } diff --git a/Runtime/Voices/SharedVoiceInfo.cs b/Runtime/Voices/SharedVoiceInfo.cs new file mode 100644 index 0000000..b87ae01 --- /dev/null +++ b/Runtime/Voices/SharedVoiceInfo.cs @@ -0,0 +1,182 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using Newtonsoft.Json; +using System; +using UnityEngine.Scripting; + +namespace ElevenLabs.Voices +{ + public sealed class SharedVoiceInfo + { + [Preserve] + [JsonConstructor] + internal SharedVoiceInfo( + [JsonProperty("public_owner_id")] string ownerId, + [JsonProperty("voice_id")] string voiceId, + [JsonProperty("date_unix")] int dateUnix, + [JsonProperty("name")] string name, + [JsonProperty("accent")] string accent, + [JsonProperty("gender")] string gender, + [JsonProperty("age")] string age, + [JsonProperty("descriptive")] string descriptive, + [JsonProperty("use_case")] string useCase, + [JsonProperty("category")] string category, + [JsonProperty("language")] string language, + [JsonProperty("description")] string description, + [JsonProperty("preview_url")] string previewUrl, + [JsonProperty("usage_character_count_1y")] int usageCharacterCount1Y, + [JsonProperty("usage_character_count_7d")] int usageCharacterCount7D, + [JsonProperty("play_api_usage_character_count_1y")] int playApiUsageCharacterCount1Y, + [JsonProperty("cloned_by_count")] int clonedByCount, + [JsonProperty("rate")] float rate, + [JsonProperty("free_users_allowed")] bool freeUsersAllowed, + [JsonProperty("live_moderation_enabled")] bool liveModerationEnabled, + [JsonProperty("featured")] bool featured, + [JsonProperty("notice_period")] int? noticePeriod, + [JsonProperty("instagram_username")] string instagramUsername, + [JsonProperty("twitter_username")] string twitterUsername, + [JsonProperty("youtube_username")] string youtubeUsername, + [JsonProperty("tiktok_username")] string tikTokUsername, + [JsonProperty("image_url")] string imageUrl) + { + OwnerId = ownerId; + VoiceId = voiceId; + DateUnix = dateUnix; + Name = name; + Accent = accent; + Gender = gender; + Age = age; + Descriptive = descriptive; + UseCase = useCase; + Category = category; + Language = language; + Description = description; + PreviewUrl = previewUrl; + UsageCharacterCount1Y = usageCharacterCount1Y; + UsageCharacterCount7D = usageCharacterCount7D; + PlayApiUsageCharacterCount1Y = playApiUsageCharacterCount1Y; + ClonedByCount = clonedByCount; + Rate = rate; + FreeUsersAllowed = freeUsersAllowed; + LiveModerationEnabled = liveModerationEnabled; + Featured = featured; + NoticePeriod = noticePeriod; + InstagramUsername = instagramUsername; + TwitterUsername = twitterUsername; + YoutubeUsername = youtubeUsername; + TikTokUsername = tikTokUsername; + ImageUrl = imageUrl; + } + + [Preserve] + [JsonProperty("public_owner_id")] + public string OwnerId { get; } + + [Preserve] + [JsonProperty("voice_id")] + public string VoiceId { get; } + + [Preserve] + [JsonProperty("date_unix")] + public int DateUnix { get; } + + [JsonIgnore] + public DateTime Date => DateTimeOffset.FromUnixTimeSeconds(DateUnix).DateTime; + + [Preserve] + [JsonProperty("name")] + public string Name { get; } + + [Preserve] + [JsonProperty("accent")] + public string Accent { get; } + + [Preserve] + [JsonProperty("gender")] + public string Gender { get; } + + [Preserve] + [JsonProperty("age")] + public string Age { get; } + + [Preserve] + [JsonProperty("descriptive")] + public string Descriptive { get; } + + [Preserve] + [JsonProperty("use_case")] + public string UseCase { get; } + + [Preserve] + [JsonProperty("category")] + public string Category { get; } + + [Preserve] + [JsonProperty("language")] + public string Language { get; } + + [Preserve] + [JsonProperty("description")] + public string Description { get; } + + [Preserve] + [JsonProperty("preview_url")] + public string PreviewUrl { get; } + + [Preserve] + [JsonProperty("usage_character_count_1y")] + public int UsageCharacterCount1Y { get; } + + [Preserve] + [JsonProperty("usage_character_count_7d")] + public int UsageCharacterCount7D { get; } + + [Preserve] + [JsonProperty("play_api_usage_character_count_1y")] + public int PlayApiUsageCharacterCount1Y { get; } + + [Preserve] + [JsonProperty("cloned_by_count")] + public int ClonedByCount { get; } + + [Preserve] + [JsonProperty("rate")] + public float Rate { get; } + + [Preserve] + [JsonProperty("free_users_allowed")] + public bool FreeUsersAllowed { get; } + + [Preserve] + [JsonProperty("live_moderation_enabled")] + public bool LiveModerationEnabled { get; } + + [Preserve] + [JsonProperty("featured")] + public bool Featured { get; } + + [Preserve] + [JsonProperty("notice_period")] + public int? NoticePeriod { get; } + + [Preserve] + [JsonProperty("instagram_username")] + public string InstagramUsername { get; } + + [Preserve] + [JsonProperty("twitter_username")] + public string TwitterUsername { get; } + + [Preserve] + [JsonProperty("youtube_username")] + public string YoutubeUsername { get; } + + [Preserve] + [JsonProperty("tiktok_username")] + public string TikTokUsername { get; } + + [Preserve] + [JsonProperty("image_url")] + public string ImageUrl { get; } + } +} diff --git a/Runtime/Voices/SharedVoiceInfo.cs.meta b/Runtime/Voices/SharedVoiceInfo.cs.meta new file mode 100644 index 0000000..3676818 --- /dev/null +++ b/Runtime/Voices/SharedVoiceInfo.cs.meta @@ -0,0 +1,11 @@ +fileFormatVersion: 2 +guid: 2e3f6b28361ecf14ca892084b81967fe +MonoImporter: + externalObjects: {} + serializedVersion: 2 + defaultReferences: [] + executionOrder: 0 + icon: {fileID: 2800000, guid: 5b71cbcaf078a8e44a5c96dcd24376d5, type: 3} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Runtime/Voices/SharedVoiceList.cs b/Runtime/Voices/SharedVoiceList.cs new file mode 100644 index 0000000..0418ed7 --- /dev/null +++ b/Runtime/Voices/SharedVoiceList.cs @@ -0,0 +1,36 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using Newtonsoft.Json; +using System.Collections.Generic; +using UnityEngine.Scripting; + +namespace ElevenLabs.Voices +{ + [Preserve] + public sealed class SharedVoiceList + { + [Preserve] + [JsonConstructor] + internal SharedVoiceList( + [JsonProperty("voices")] IReadOnlyList voices, + [JsonProperty("has_more")] bool hasMore, + [JsonProperty("last_sort_id")] string lastId) + { + Voices = voices; + HasMore = hasMore; + LastId = lastId; + } + + [Preserve] + [JsonProperty("voices")] + public IReadOnlyList Voices { get; } + + [Preserve] + [JsonProperty("has_more")] + public bool HasMore { get; } + + [Preserve] + [JsonProperty("last_sort_id")] + public string LastId { get; } + } +} diff --git a/Runtime/Voices/SharedVoiceList.cs.meta b/Runtime/Voices/SharedVoiceList.cs.meta new file mode 100644 index 0000000..f0834bb --- /dev/null +++ b/Runtime/Voices/SharedVoiceList.cs.meta @@ -0,0 +1,11 @@ +fileFormatVersion: 2 +guid: 6af582794cd0d5d4b99d747991cca6ea +MonoImporter: + externalObjects: {} + serializedVersion: 2 + defaultReferences: [] + executionOrder: 0 + icon: {fileID: 2800000, guid: 5b71cbcaf078a8e44a5c96dcd24376d5, type: 3} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Runtime/Voices/SharedVoiceQuery.cs b/Runtime/Voices/SharedVoiceQuery.cs new file mode 100644 index 0000000..cf6be4f --- /dev/null +++ b/Runtime/Voices/SharedVoiceQuery.cs @@ -0,0 +1,114 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using System.Collections.Generic; + +namespace ElevenLabs.Voices +{ + public sealed class SharedVoiceQuery + { + public int? PageSize { get; set; } = null; + + public string Category { get; set; } = null; + + public string Gender { get; set; } = null; + + public string Age { get; set; } = null; + + public string Accent { get; set; } = null; + + public string Language { get; set; } = null; + + public string SearchTerms { get; set; } = null; + + public List UseCases { get; set; } = null; + + public List Descriptives { get; set; } = null; + + public bool? Featured { get; set; } = null; + + public bool? ReaderAppEnabled { get; set; } = null; + + public string OwnerId { get; set; } = null; + + public string Sort { get; set; } = null; + + public int? Page { get; set; } = null; + + public Dictionary ToQueryParams() + { + var parameters = new Dictionary(); + + if (PageSize.HasValue) + { + parameters.Add("page_size", PageSize.Value.ToString()); + } + + if (!string.IsNullOrWhiteSpace(Category)) + { + parameters.Add("category", Category); + } + + if (!string.IsNullOrWhiteSpace(Gender)) + { + parameters.Add("gender", Gender); + } + + if (!string.IsNullOrWhiteSpace(Age)) + { + parameters.Add("age", Age); + } + + if (!string.IsNullOrWhiteSpace(Accent)) + { + parameters.Add("accent", Accent); + } + + if (!string.IsNullOrWhiteSpace(Language)) + { + parameters.Add("language", Language); + } + + if (!string.IsNullOrWhiteSpace(SearchTerms)) + { + parameters.Add("search", SearchTerms); + } + + if (UseCases is { Count: > 0 }) + { + parameters.Add("use_cases", string.Join(',', UseCases)); + } + + if (Descriptives is { Count: > 0 }) + { + parameters.Add("descriptives", string.Join(',', Descriptives)); + } + + if (Featured.HasValue) + { + parameters.Add("featured", Featured.Value.ToString()); + } + + if (ReaderAppEnabled.HasValue) + { + parameters.Add("reader_app_enabled", ReaderAppEnabled.Value.ToString()); + } + + if (!string.IsNullOrWhiteSpace(OwnerId)) + { + parameters.Add("owner_id", OwnerId); + } + + if (!string.IsNullOrWhiteSpace(Sort)) + { + parameters.Add("sort", Sort); + } + + if (Page.HasValue) + { + parameters.Add("page", Page.Value.ToString()); + } + + return parameters; + } + } +} diff --git a/Runtime/Voices/SharedVoiceQuery.cs.meta b/Runtime/Voices/SharedVoiceQuery.cs.meta new file mode 100644 index 0000000..2f69228 --- /dev/null +++ b/Runtime/Voices/SharedVoiceQuery.cs.meta @@ -0,0 +1,11 @@ +fileFormatVersion: 2 +guid: e28a0ccf7ce12674eaba7eb16f7dfa98 +MonoImporter: + externalObjects: {} + serializedVersion: 2 + defaultReferences: [] + executionOrder: 0 + icon: {fileID: 2800000, guid: 5b71cbcaf078a8e44a5c96dcd24376d5, type: 3} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Runtime/Voices/SharedVoicesEndpoint.cs b/Runtime/Voices/SharedVoicesEndpoint.cs new file mode 100644 index 0000000..c522ae6 --- /dev/null +++ b/Runtime/Voices/SharedVoicesEndpoint.cs @@ -0,0 +1,29 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using Newtonsoft.Json; +using System.Threading; +using System.Threading.Tasks; +using Utilities.WebRequestRest; + +namespace ElevenLabs.Voices +{ + public sealed class SharedVoicesEndpoint : ElevenLabsBaseEndPoint + { + public SharedVoicesEndpoint(ElevenLabsClient client) : base(client) { } + + protected override string Root => "shared-voices"; + + /// + /// Gets a list of shared voices. + /// + /// Optional, . + /// Optional, . + /// . + public async Task GetSharedVoicesAsync(SharedVoiceQuery query = null, CancellationToken cancellationToken = default) + { + var response = await Rest.GetAsync(GetUrl(queryParameters: query?.ToQueryParams()), new RestParameters(client.DefaultRequestHeaders), cancellationToken); + response.Validate(EnableDebug); + return JsonConvert.DeserializeObject(response.Body, ElevenLabsClient.JsonSerializationOptions); + } + } +} diff --git a/Runtime/Voices/SharedVoicesEndpoint.cs.meta b/Runtime/Voices/SharedVoicesEndpoint.cs.meta new file mode 100644 index 0000000..a50edb5 --- /dev/null +++ b/Runtime/Voices/SharedVoicesEndpoint.cs.meta @@ -0,0 +1,11 @@ +fileFormatVersion: 2 +guid: 66684bd4b4bc26c49b47ad0f8151da6a +MonoImporter: + externalObjects: {} + serializedVersion: 2 + defaultReferences: [] + executionOrder: 0 + icon: {fileID: 2800000, guid: 5b71cbcaf078a8e44a5c96dcd24376d5, type: 3} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Runtime/Voices/VoicesEndpoint.cs b/Runtime/Voices/VoicesEndpoint.cs index 0705b14..ad67c0a 100644 --- a/Runtime/Voices/VoicesEndpoint.cs +++ b/Runtime/Voices/VoicesEndpoint.cs @@ -62,9 +62,7 @@ public VoicesEndpoint(ElevenLabsClient client) : base(client) { } /// /// of s. public Task> GetAllVoicesAsync(CancellationToken cancellationToken = default) - { - return GetAllVoicesAsync(true, cancellationToken); - } + => GetAllVoicesAsync(true, cancellationToken); /// /// Gets a list of all available voices for a user. @@ -85,7 +83,7 @@ public async Task> GetAllVoicesAsync(bool downloadSettings, foreach (var voice in voices) { voiceSettingsTasks.Add(LocalGetVoiceSettingsAsync()); - + async Task LocalGetVoiceSettingsAsync() { await Awaiters.UnityMainThread; @@ -143,7 +141,7 @@ public async Task GetVoiceAsync(string voiceId, bool withSettings = false throw new ArgumentNullException(nameof(voiceId)); } - var response = await Rest.GetAsync(GetUrl($"/{voiceId}?with_settings={withSettings}"), new RestParameters(client.DefaultRequestHeaders), cancellationToken); + var response = await Rest.GetAsync(GetUrl($"/{voiceId}?with_settings={withSettings.ToString().ToLower()}"), new RestParameters(client.DefaultRequestHeaders), cancellationToken); response.Validate(EnableDebug); return JsonConvert.DeserializeObject(response.Body, ElevenLabsClient.JsonSerializationOptions); } @@ -203,7 +201,11 @@ public async Task AddVoiceAsync(string name, IEnumerable samplePa try { var fileBytes = await File.ReadAllBytesAsync(sample, cancellationToken); - form.AddBinaryData("files", fileBytes, Path.GetFileName(sample)); + + if (fileBytes.Length > 0) + { + form.AddBinaryData("files", fileBytes, Path.GetFileName(sample)); + } } catch (Exception e) { diff --git a/Samples~/TextToSpeech/TextToSpeechDemo.cs b/Samples~/TextToSpeech/TextToSpeechDemo.cs index 955f3e0..0e13968 100644 --- a/Samples~/TextToSpeech/TextToSpeechDemo.cs +++ b/Samples~/TextToSpeech/TextToSpeechDemo.cs @@ -6,7 +6,6 @@ using System.Collections.Generic; using System.Linq; using System.Threading; -using System.Threading.Tasks; using UnityEngine; using Utilities.Async; @@ -33,6 +32,11 @@ public class TextToSpeechDemo : MonoBehaviour private readonly Queue streamClipQueue = new(); +#if !UNITY_2022_3_OR_NEWER + private readonly CancellationTokenSource lifetimeCts = new(); + private CancellationToken destroyCancellationToken => lifetimeCts.Token; +#endif + private void OnValidate() { if (audioSource == null) @@ -79,6 +83,14 @@ private async void Start() } } +#if !UNITY_2022_3_OR_NEWER + private void OnDestroy() + { + lifetimeCts.Cancel(); + lifetimeCts.Dispose(); + } +#endif + private async void PlayStreamQueue(CancellationToken cancellationToken) { try diff --git a/Tests/AbstractTestFixture.cs b/Tests/AbstractTestFixture.cs index 0a2b71d..4b41dc0 100644 --- a/Tests/AbstractTestFixture.cs +++ b/Tests/AbstractTestFixture.cs @@ -11,7 +11,7 @@ protected AbstractTestFixture() var auth = new ElevenLabsAuthentication().LoadDefaultsReversed(); var settings = new ElevenLabsSettings(); ElevenLabsClient = new ElevenLabsClient(auth, settings); - //ElevenLabsClient.EnableDebug = true; + ElevenLabsClient.EnableDebug = true; } } } diff --git a/Tests/Test_Fixture_00_Authentication.cs b/Tests/Test_Fixture_00_Authentication.cs index 0716a84..69d5843 100644 --- a/Tests/Test_Fixture_00_Authentication.cs +++ b/Tests/Test_Fixture_00_Authentication.cs @@ -146,7 +146,7 @@ public void Test_09_CustomDomainConfigurationSettings() var auth = new ElevenLabsAuthentication("customIssuedToken"); var settings = new ElevenLabsSettings(domain: "api.your-custom-domain.com"); var api = new ElevenLabsClient(auth, settings); - Console.WriteLine(api.Settings.BaseRequestUrlFormat); + Debug.Log(api.Settings.BaseRequestUrlFormat); } [TearDown] diff --git a/Tests/Test_Fixture_06_Models.cs b/Tests/Test_Fixture_02_Models.cs similarity index 92% rename from Tests/Test_Fixture_06_Models.cs rename to Tests/Test_Fixture_02_Models.cs index 701b87a..b9d53f1 100644 --- a/Tests/Test_Fixture_06_Models.cs +++ b/Tests/Test_Fixture_02_Models.cs @@ -6,7 +6,7 @@ namespace ElevenLabs.Tests { - internal class Test_Fixture_06_Models : AbstractTestFixture + internal class Test_Fixture_02_Models : AbstractTestFixture { [Test] public async Task Test_01_GetModels() diff --git a/Tests/Test_Fixture_06_Models.cs.meta b/Tests/Test_Fixture_02_Models.cs.meta similarity index 100% rename from Tests/Test_Fixture_06_Models.cs.meta rename to Tests/Test_Fixture_02_Models.cs.meta diff --git a/Tests/Test_Fixture_05_VoiceGeneration.cs b/Tests/Test_Fixture_03_VoiceGeneration.cs similarity index 96% rename from Tests/Test_Fixture_05_VoiceGeneration.cs rename to Tests/Test_Fixture_03_VoiceGeneration.cs index 2629b27..7d315ee 100644 --- a/Tests/Test_Fixture_05_VoiceGeneration.cs +++ b/Tests/Test_Fixture_03_VoiceGeneration.cs @@ -10,7 +10,7 @@ namespace ElevenLabs.Tests { - internal class Test_Fixture_05_VoiceGeneration : AbstractTestFixture + internal class Test_Fixture_03_VoiceGeneration : AbstractTestFixture { [Test] public async Task Test_01_GetVoiceGenerationOptions() diff --git a/Tests/Test_Fixture_05_VoiceGeneration.cs.meta b/Tests/Test_Fixture_03_VoiceGeneration.cs.meta similarity index 100% rename from Tests/Test_Fixture_05_VoiceGeneration.cs.meta rename to Tests/Test_Fixture_03_VoiceGeneration.cs.meta diff --git a/Tests/Test_Fixture_03_TextToSpeechEndpoint.cs b/Tests/Test_Fixture_04_TextToSpeechEndpoint.cs similarity index 96% rename from Tests/Test_Fixture_03_TextToSpeechEndpoint.cs rename to Tests/Test_Fixture_04_TextToSpeechEndpoint.cs index 0dc30a7..a772e66 100644 --- a/Tests/Test_Fixture_03_TextToSpeechEndpoint.cs +++ b/Tests/Test_Fixture_04_TextToSpeechEndpoint.cs @@ -8,7 +8,7 @@ namespace ElevenLabs.Tests { - internal class Test_Fixture_03_TextToSpeechEndpoint : AbstractTestFixture + internal class Test_Fixture_04_TextToSpeechEndpoint : AbstractTestFixture { [Test] public async Task Test_01_TextToSpeech() diff --git a/Tests/Test_Fixture_03_TextToSpeechEndpoint.cs.meta b/Tests/Test_Fixture_04_TextToSpeechEndpoint.cs.meta similarity index 100% rename from Tests/Test_Fixture_03_TextToSpeechEndpoint.cs.meta rename to Tests/Test_Fixture_04_TextToSpeechEndpoint.cs.meta diff --git a/Tests/Test_Fixture_02_VoicesEndpoint.cs b/Tests/Test_Fixture_05_VoicesEndpoint.cs similarity index 91% rename from Tests/Test_Fixture_02_VoicesEndpoint.cs rename to Tests/Test_Fixture_05_VoicesEndpoint.cs index 89f6557..c0c0b9a 100644 --- a/Tests/Test_Fixture_02_VoicesEndpoint.cs +++ b/Tests/Test_Fixture_05_VoicesEndpoint.cs @@ -2,6 +2,7 @@ using ElevenLabs.Voices; using NUnit.Framework; +using System; using System.Collections.Generic; using System.Linq; using System.Threading.Tasks; @@ -10,10 +11,10 @@ namespace ElevenLabs.Tests { - internal class Test_Fixture_02_VoicesEndpoint : AbstractTestFixture + internal class Test_Fixture_05_VoicesEndpoint : AbstractTestFixture { [Test] - public async Task Test_01_GetVoices() + public async Task Test_01_01_GetVoices() { Assert.NotNull(ElevenLabsClient.VoicesEndpoint); var results = await ElevenLabsClient.VoicesEndpoint.GetAllVoicesAsync(); @@ -26,6 +27,20 @@ public async Task Test_01_GetVoices() } } + [Test] + public async Task Test_01_02_GetSharedVoices() + { + Assert.NotNull(ElevenLabsClient.SharedVoicesEndpoint); + var results = await ElevenLabsClient.SharedVoicesEndpoint.GetSharedVoicesAsync(); + Assert.NotNull(results); + Assert.IsNotEmpty(results.Voices); + + foreach (var voice in results.Voices) + { + Console.WriteLine($"{voice.OwnerId} | {voice.VoiceId} | {voice.Date} | {voice.Name}"); + } + } + [Test] public async Task Test_02_GetDefaultVoiceSettings() { diff --git a/Tests/Test_Fixture_02_VoicesEndpoint.cs.meta b/Tests/Test_Fixture_05_VoicesEndpoint.cs.meta similarity index 100% rename from Tests/Test_Fixture_02_VoicesEndpoint.cs.meta rename to Tests/Test_Fixture_05_VoicesEndpoint.cs.meta diff --git a/Tests/Test_Fixture_06_SoundGenerationEndpoint.cs b/Tests/Test_Fixture_06_SoundGenerationEndpoint.cs new file mode 100644 index 0000000..64ffd5c --- /dev/null +++ b/Tests/Test_Fixture_06_SoundGenerationEndpoint.cs @@ -0,0 +1,23 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using ElevenLabs.SoundGeneration; +using NUnit.Framework; +using System.Threading.Tasks; + +namespace ElevenLabs.Tests +{ + internal class Test_Fixture_06_SoundGenerationEndpoint : AbstractTestFixture + { + [Test] + public async Task Test_01_GenerateSound() + { + Assert.NotNull(ElevenLabsClient.SoundGenerationEndpoint); + var request = new SoundGenerationRequest("Star Wars Light Saber parry"); + var clip = await ElevenLabsClient.SoundGenerationEndpoint.GenerateSoundAsync(request); + Assert.NotNull(clip); + Assert.IsTrue(clip.AudioClip != null); + Assert.IsTrue(clip.AudioClip.length > 0); + Assert.IsFalse(string.IsNullOrWhiteSpace(clip.Text)); + } + } +} diff --git a/Tests/Test_Fixture_06_SoundGenerationEndpoint.cs.meta b/Tests/Test_Fixture_06_SoundGenerationEndpoint.cs.meta new file mode 100644 index 0000000..e0a4f38 --- /dev/null +++ b/Tests/Test_Fixture_06_SoundGenerationEndpoint.cs.meta @@ -0,0 +1,11 @@ +fileFormatVersion: 2 +guid: 37e39ec49270a01498a168590d7ff154 +MonoImporter: + externalObjects: {} + serializedVersion: 2 + defaultReferences: [] + executionOrder: 0 + icon: {fileID: 2800000, guid: 5b71cbcaf078a8e44a5c96dcd24376d5, type: 3} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Tests/Test_Fixture_04_HistoryEndpoint.cs b/Tests/Test_Fixture_07_HistoryEndpoint.cs similarity index 98% rename from Tests/Test_Fixture_04_HistoryEndpoint.cs rename to Tests/Test_Fixture_07_HistoryEndpoint.cs index feb616c..bf4f460 100644 --- a/Tests/Test_Fixture_04_HistoryEndpoint.cs +++ b/Tests/Test_Fixture_07_HistoryEndpoint.cs @@ -8,7 +8,7 @@ namespace ElevenLabs.Tests { - internal class Test_Fixture_04_HistoryEndpoint : AbstractTestFixture + internal class Test_Fixture_07_HistoryEndpoint : AbstractTestFixture { [Test] public async Task Test_01_GetHistory() diff --git a/Tests/Test_Fixture_04_HistoryEndpoint.cs.meta b/Tests/Test_Fixture_07_HistoryEndpoint.cs.meta similarity index 100% rename from Tests/Test_Fixture_04_HistoryEndpoint.cs.meta rename to Tests/Test_Fixture_07_HistoryEndpoint.cs.meta diff --git a/Tests/Test_Fixture_08_Dubbing.cs b/Tests/Test_Fixture_08_Dubbing.cs new file mode 100644 index 0000000..31bbab9 --- /dev/null +++ b/Tests/Test_Fixture_08_Dubbing.cs @@ -0,0 +1,158 @@ +// Licensed under the MIT License. See LICENSE in the project root for license information. + +using ElevenLabs.Dubbing; +using NUnit.Framework; +using System; +using System.IO; +using System.Threading.Tasks; +using UnityEditor; +using UnityEngine; +using Utilities.WebRequestRest; + +namespace ElevenLabs.Tests +{ + internal class Test_Fixture_08_Dubbing : AbstractTestFixture + { + [Test] + public async Task Test_01_Dubbing_File() + { + try + { + Assert.NotNull(ElevenLabsClient.DubbingEndpoint); + var audioPath = Path.GetFullPath(AssetDatabase.GUIDToAssetPath("96e9fdf73bc7a944f93886694973b90e")); + var request = new DubbingRequest(audioPath, "es", "en", 1); + var metadata = await ElevenLabsClient.DubbingEndpoint.DubAsync(request, progress: new Progress(metadata => + { + switch (metadata.Status) + { + case "dubbing": + Debug.Log($"Dubbing for {metadata.DubbingId} in progress... Expected Duration: {metadata.ExpectedDurationSeconds:0.00} seconds"); + break; + case "dubbed": + Debug.Log($"Dubbing for {metadata.DubbingId} complete in {metadata.TimeCompleted.TotalSeconds:0.00} seconds!"); + break; + default: + Debug.Log($"Status: {metadata.Status}"); + break; + } + })); + Assert.IsFalse(string.IsNullOrEmpty(metadata.DubbingId)); + Assert.IsTrue(metadata.ExpectedDurationSeconds > 0); + + var dubbedClipPath = await ElevenLabsClient.DubbingEndpoint.GetDubbedFileAsync(metadata.DubbingId, request.TargetLanguage); + Assert.NotNull(dubbedClipPath); + Assert.IsTrue(File.Exists(dubbedClipPath)); + var dubbedClip = await Rest.DownloadAudioClipAsync($"file://{dubbedClipPath}", AudioType.MPEG); + Assert.IsNotNull(dubbedClip); + Assert.IsTrue(dubbedClip.length > 0); + + var srcFile = new FileInfo(audioPath); + var transcriptPath = new FileInfo($"{srcFile.FullName}.dubbed.{request.TargetLanguage}.srt"); + var transcriptFile = await ElevenLabsClient.DubbingEndpoint.GetTranscriptForDubAsync(metadata.DubbingId, request.TargetLanguage); + await File.WriteAllTextAsync(transcriptPath.FullName, transcriptFile); + Assert.IsTrue(transcriptPath.Exists); + Assert.IsTrue(transcriptPath.Length > 0); + + await ElevenLabsClient.DubbingEndpoint.DeleteDubbingProjectAsync(metadata.DubbingId); + } + catch (Exception e) + { + Debug.LogError(e); + } + } + + [Test] + public async Task Test_02_Dubbing_Url() + { + try + { + Assert.NotNull(ElevenLabsClient.DubbingEndpoint); + + var request = new DubbingRequest(new Uri("https://youtu.be/Zo5-rhYOlNk"), "ja", "en", 1, true); + var metadata = await ElevenLabsClient.DubbingEndpoint.DubAsync(request, progress: new Progress(metadata => + { + switch (metadata.Status) + { + case "dubbing": + Debug.Log($"Dubbing for {metadata.DubbingId} in progress... Expected Duration: {metadata.ExpectedDurationSeconds:0.00} seconds"); + break; + case "dubbed": + Debug.Log($"Dubbing for {metadata.DubbingId} complete in {metadata.TimeCompleted.TotalSeconds:0.00} seconds!"); + break; + default: + Debug.Log($"Status: {metadata.Status}"); + break; + } + })); + Assert.IsFalse(string.IsNullOrEmpty(metadata.DubbingId)); + Assert.IsTrue(metadata.ExpectedDurationSeconds > 0); + + var assetsDir = Path.GetFullPath(Application.dataPath); + var dubbedClip = await ElevenLabsClient.DubbingEndpoint.GetDubbedFileAsync(metadata.DubbingId, request.TargetLanguage); + Assert.IsNotNull(dubbedClip); + Assert.IsTrue(File.Exists(dubbedClip)); + + var transcriptPath = new FileInfo(Path.Combine(assetsDir, $"online.dubbed.{request.TargetLanguage}.srt")); + var transcriptFile = await ElevenLabsClient.DubbingEndpoint.GetTranscriptForDubAsync(metadata.DubbingId, request.TargetLanguage); + await File.WriteAllTextAsync(transcriptPath.FullName, transcriptFile); + Assert.IsTrue(transcriptPath.Exists); + Assert.IsTrue(transcriptPath.Length > 0); + + await ElevenLabsClient.DubbingEndpoint.DeleteDubbingProjectAsync(metadata.DubbingId); + } + catch (Exception e) + { + Debug.LogError(e); + } + } + + [Test] + public async Task Test_03_Dubbing_AudioClip() + { + try + { + Assert.NotNull(ElevenLabsClient.DubbingEndpoint); + var clipPath = AssetDatabase.GUIDToAssetPath("96e9fdf73bc7a944f93886694973b90e"); + var audioClip = AssetDatabase.LoadAssetAtPath(clipPath); + var request = new DubbingRequest(audioClip, "es", "en", 1); + var metadata = await ElevenLabsClient.DubbingEndpoint.DubAsync(request, progress: new Progress(metadata => + { + switch (metadata.Status) + { + case "dubbing": + Debug.Log($"Dubbing for {metadata.DubbingId} in progress... Expected Duration: {metadata.ExpectedDurationSeconds:0.00} seconds"); + break; + case "dubbed": + Debug.Log($"Dubbing for {metadata.DubbingId} complete in {metadata.TimeCompleted.TotalSeconds:0.00} seconds!"); + break; + default: + Debug.Log($"Status: {metadata.Status}"); + break; + } + })); + Assert.IsFalse(string.IsNullOrEmpty(metadata.DubbingId)); + Assert.IsTrue(metadata.ExpectedDurationSeconds > 0); + + var srcFile = new FileInfo(Path.GetFullPath(clipPath)); + var dubbedClipPath = await ElevenLabsClient.DubbingEndpoint.GetDubbedFileAsync(metadata.DubbingId, request.TargetLanguage); + Assert.IsNotNull(dubbedClipPath); + Assert.IsTrue(File.Exists(dubbedClipPath)); + var dubbedClip = await Rest.DownloadAudioClipAsync($"file://{dubbedClipPath}", AudioType.MPEG); + Assert.IsNotNull(dubbedClip); + Assert.IsTrue(dubbedClip.length > 0); + + var transcriptPath = new FileInfo($"{srcFile.FullName}.dubbed.{request.TargetLanguage}.srt"); + var transcriptFile = await ElevenLabsClient.DubbingEndpoint.GetTranscriptForDubAsync(metadata.DubbingId, request.TargetLanguage); + await File.WriteAllTextAsync(transcriptPath.FullName, transcriptFile); + Assert.IsTrue(transcriptPath.Exists); + Assert.IsTrue(transcriptPath.Length > 0); + + await ElevenLabsClient.DubbingEndpoint.DeleteDubbingProjectAsync(metadata.DubbingId); + } + catch (Exception e) + { + Debug.LogError(e); + } + } + } +} diff --git a/Tests/Test_Fixture_08_Dubbing.cs.meta b/Tests/Test_Fixture_08_Dubbing.cs.meta new file mode 100644 index 0000000..55992e5 --- /dev/null +++ b/Tests/Test_Fixture_08_Dubbing.cs.meta @@ -0,0 +1,11 @@ +fileFormatVersion: 2 +guid: 670686861541fd14ca17ef2305b90b6f +MonoImporter: + externalObjects: {} + serializedVersion: 2 + defaultReferences: [] + executionOrder: 0 + icon: {fileID: 2800000, guid: 5b71cbcaf078a8e44a5c96dcd24376d5, type: 3} + userData: + assetBundleName: + assetBundleVariant: diff --git a/package.json b/package.json index ab9bc10..8abbc64 100644 --- a/package.json +++ b/package.json @@ -3,7 +3,7 @@ "displayName": "ElevenLabs", "description": "A non-official Eleven Labs voice synthesis RESTful client.", "keywords": [], - "version": "3.2.9", + "version": "3.3.0", "unity": "2021.3", "documentationUrl": "https://github.com/RageAgainstThePixel/com.rest.elevenlabs#documentation", "changelogUrl": "https://github.com/RageAgainstThePixel/com.rest.elevenlabs/releases", @@ -17,8 +17,9 @@ "url": "https://github.com/StephenHodgson" }, "dependencies": { - "com.utilities.rest": "2.5.3", - "com.utilities.encoder.ogg": "3.1.4" + "com.utilities.rest": "2.5.7", + "com.utilities.encoder.ogg": "3.1.4", + "com.utilities.encoder.wav": "1.2.2" }, "samples": [ {