com.rest.elevenlabs 3.4.3 (#114)

- Added flash models - Added stream input support to dubbing endpoint - Fixed http/https protocol in client settings --------- Co-authored-by: evya5 <76632693+evya5@users.noreply.github.com>
RageAgainstThePixel · Jan 13, 2025 · 3dd77e1 · 3dd77e1
1 parent d41517c
commit 3dd77e1
Show file tree

Hide file tree

Showing 9 changed files with 142 additions and 20 deletions.
diff --git a/Documentation~/README.md b/Documentation~/README.md
@@ -305,7 +305,7 @@ Gets a list of shared voices in the public voice library.
 
 ```csharp
 var api = new ElevenLabsClient();
-var results = await ElevenLabsClient.SharedVoicesEndpoint.GetSharedVoicesAsync();
+var results = await api.SharedVoicesEndpoint.GetSharedVoicesAsync();
 foreach (var voice in results.Voices)
 {
     Debug.Log($"{voice.OwnerId} | {voice.VoiceId} | {voice.Date} | {voice.Name}");
@@ -452,7 +452,7 @@ Returns downloaded dubbed file path.
 > Videos will be returned in MP4 format and audio only dubs will be returned in MP3.
 
 ```csharp
-var dubbedClipPath = await ElevenLabsClient.DubbingEndpoint.GetDubbedFileAsync(metadata.DubbingId, request.TargetLanguage);
+var dubbedClipPath = await api.DubbingEndpoint.GetDubbedFileAsync(metadata.DubbingId, request.TargetLanguage);
 var dubbedClip = await Rest.DownloadAudioClipAsync($"file://{dubbedClipPath}", AudioType.MPEG);
 audioSource.PlayOneShot(dubbedClip);
 ```
@@ -464,7 +464,7 @@ Returns transcript for the dub in the desired format.
 ```csharp
 var srcFile = new FileInfo(audioPath);
 var transcriptPath = new FileInfo($"{srcFile.FullName}.dubbed.{request.TargetLanguage}.srt");
-var transcriptFile = await ElevenLabsClient.DubbingEndpoint.GetTranscriptForDubAsync(metadata.DubbingId, request.TargetLanguage);
+var transcriptFile = await api.DubbingEndpoint.GetTranscriptForDubAsync(metadata.DubbingId, request.TargetLanguage);
 await File.WriteAllTextAsync(transcriptPath.FullName, transcriptFile);
 ```
 

diff --git a/Runtime/Authentication/ElevenLabsSettingsInfo.cs b/Runtime/Authentication/ElevenLabsSettingsInfo.cs
@@ -7,6 +7,7 @@ namespace ElevenLabs
 {
     public sealed class ElevenLabsSettingsInfo : ISettingsInfo
     {
+        internal const string Http = "http://";
         internal const string Https = "https://";
         internal const string ElevenLabsDomain = "api.elevenlabs.io";
         internal const string DefaultApiVersion = "v1";
@@ -45,7 +46,20 @@ public ElevenLabsSettingsInfo(string domain, string apiVersion = DefaultApiVersi
                 apiVersion = DefaultApiVersion;
             }
 
-            Domain = domain.Contains("http") ? domain : $"{Https}{domain}";
+            var protocol = Https;
+
+            if (domain.StartsWith(Http))
+            {
+                protocol = Http;
+                domain = domain.Replace(Http, string.Empty);
+            }
+            else if (domain.StartsWith(Https))
+            {
+                protocol = Https;
+                domain = domain.Replace(Https, string.Empty);
+            }
+
+            Domain = $"{protocol}{domain}";
             ApiVersion = apiVersion;
             BaseRequest = $"/{ApiVersion}/";
             BaseRequestUrlFormat = $"{Domain}{BaseRequest}{{0}}";

diff --git a/Runtime/Dubbing/DubbingEndpoint.cs b/Runtime/Dubbing/DubbingEndpoint.cs
@@ -43,11 +43,11 @@ public async Task<DubbingProjectMetadata> DubAsync(DubbingRequest request, int?
             {
                 if (request.Files != null)
                 {
-                    foreach (var (fileName, mediaType, stream) in request.Files)
+                    foreach (var dub in request.Files)
                     {
                         using var audioData = new MemoryStream();
-                        await stream.CopyToAsync(audioData, cancellationToken);
-                        payload.AddBinaryData("file", audioData.ToArray(), fileName, mediaType);
+                        await dub.Stream.CopyToAsync(audioData, cancellationToken);
+                        payload.AddBinaryData("file", audioData.ToArray(), dub.Name, dub.MediaType);
                     }
                 }
 

diff --git a/Runtime/Dubbing/DubbingRequest.cs b/Runtime/Dubbing/DubbingRequest.cs
@@ -39,7 +39,7 @@ public DubbingRequest(
             bool? dropBackgroundAudio = null,
             bool? useProfanityFilter = null,
             string projectName = null)
-            : this(targetLanguage, null, filePaths, sourceLanguage, numberOfSpeakers, watermark, startTime, endTime, highestResolution, dropBackgroundAudio, useProfanityFilter, projectName)
+            : this(targetLanguage, null, null, filePaths, sourceLanguage, numberOfSpeakers, watermark, startTime, endTime, highestResolution, dropBackgroundAudio, useProfanityFilter, projectName)
         {
         }
 
@@ -55,7 +55,7 @@ public DubbingRequest(
             bool? dropBackgroundAudio = null,
             bool? useProfanityFilter = null,
             string projectName = null)
-            : this(targetLanguage, sourceUrl, null, sourceLanguage, numberOfSpeakers, watermark, startTime, endTime, highestResolution, dropBackgroundAudio, useProfanityFilter, projectName)
+            : this(targetLanguage, sourceUrl, null, null, sourceLanguage, numberOfSpeakers, watermark, startTime, endTime, highestResolution, dropBackgroundAudio, useProfanityFilter, projectName)
         {
         }
 
@@ -104,14 +104,34 @@ public DubbingRequest(
             DropBackgroundAudio = dropBackgroundAudio;
             UseProfanityFilter = useProfanityFilter;
             ProjectName = projectName;
-            var files = new List<(string, string, Stream)>(clips.Count);
-            files.AddRange((from audioClip in clips let stream = new MemoryStream(audioClip.EncodeToWav()) select (audioClip.name, "audio/wav", stream)).Select(value => ((string, string, Stream))value));
+            var files = new List<DubbingStream>(clips.Count);
+            var streams = from audioClip in clips
+                          let stream = new MemoryStream(audioClip.EncodeToWav())
+                          select (stream, audioClip.name, mediaType: "audio/wav");
+            files.AddRange(streams.Select(dub => new DubbingStream(dub.stream, dub.name, dub.mediaType)));
             Files = files;
         }
 
+        public DubbingRequest(
+            List<DubbingStream> files,
+            string targetLanguage,
+            string sourceLanguage = null,
+            int? numberOfSpeakers = null,
+            bool? watermark = null,
+            int? startTime = null,
+            int? endTime = null,
+            bool? highestResolution = null,
+            bool? dropBackgroundAudio = null,
+            bool? useProfanityFilter = null,
+            string projectName = null)
+            : this(targetLanguage, null, files, null, sourceLanguage, numberOfSpeakers, watermark, startTime, endTime, highestResolution, dropBackgroundAudio, useProfanityFilter, projectName)
+        {
+        }
+
         private DubbingRequest(
             string targetLanguage,
             Uri sourceUrl = null,
+            List<DubbingStream> files = null,
             IEnumerable<string> filePaths = null,
             string sourceLanguage = null,
             int? numberOfSpeakers = null,
@@ -135,7 +155,7 @@ private DubbingRequest(
                 throw new ArgumentException("Either sourceUrl or filePaths must be provided.");
             }
 
-            var files = new List<(string, string, Stream)>();
+            files ??= new List<DubbingStream>();
 
             if (filePaths != null)
             {
@@ -170,7 +190,7 @@ private DubbingRequest(
                         ".webm" => "video/webm",
                         _ => "application/octet-stream"
                     };
-                    files.Add((fileInfo.Name, mediaType, stream));
+                    files.Add(new(stream, fileInfo.Name, mediaType));
                 }
             }
 
@@ -192,7 +212,7 @@ private DubbingRequest(
         /// <summary>
         /// Files to dub.
         /// </summary>
-        public IReadOnlyList<(string, string, Stream)> Files { get; }
+        public IReadOnlyList<DubbingStream> Files { get; }
 
         /// <summary>
         /// URL of the source video/audio file.
@@ -261,12 +281,11 @@ private void Dispose(bool disposing)
             if (disposing)
             {
                 if (Files == null) { return; }
-                foreach (var (_, _, stream) in Files)
+                foreach (var dub in Files)
                 {
                     try
                     {
-                        stream?.Close();
-                        stream?.Dispose();
+                        dub.Dispose();
                     }
                     catch (Exception e)
                     {

diff --git a/Runtime/Dubbing/DubbingStream.cs b/Runtime/Dubbing/DubbingStream.cs
@@ -0,0 +1,64 @@
+// Licensed under the MIT License. See LICENSE in the project root for license information.
+
+using System;
+using System.IO;
+
+namespace ElevenLabs.Dubbing
+{
+    public sealed class DubbingStream : IDisposable
+    {
+        public DubbingStream(Stream stream, string name, string mediaType)
+        {
+            Stream = stream ?? throw new ArgumentNullException(nameof(stream));
+
+            if (Stream.Length == 0)
+            {
+                throw new ArgumentException("Stream cannot be empty.");
+            }
+
+            if (!Stream.CanRead)
+            {
+                throw new ArgumentException("Stream must be readable.");
+            }
+
+            Name = name ?? throw new ArgumentNullException(nameof(name));
+
+            if (string.IsNullOrWhiteSpace(Name))
+            {
+                throw new ArgumentException("Name cannot be empty.");
+            }
+
+            MediaType = mediaType ?? throw new ArgumentNullException(nameof(mediaType));
+
+            if (string.IsNullOrWhiteSpace(MediaType))
+            {
+                throw new ArgumentException("Media type cannot be empty.");
+            }
+
+            if (MediaType.Contains("/"))
+            {
+                var parts = MediaType.Split('/');
+
+                if (parts.Length != 2 || string.IsNullOrWhiteSpace(parts[0]) || string.IsNullOrWhiteSpace(parts[1]))
+                {
+                    throw new ArgumentException("Invalid media type.");
+                }
+            }
+            else
+            {
+                throw new ArgumentException("Invalid media type.");
+            }
+        }
+
+        public Stream Stream { get; }
+
+        public string Name { get; }
+
+        public string MediaType { get; }
+
+        public void Dispose()
+        {
+            Stream?.Dispose();
+        }
+    }
+}
diff --git a/Runtime/Dubbing/DubbingStream.cs.meta b/Runtime/Dubbing/DubbingStream.cs.meta
diff --git a/Runtime/Models/Model.cs b/Runtime/Models/Model.cs
@@ -78,6 +78,20 @@ public Model(
 
         #region Predefined Models
 
+        /// <summary>
+        /// Our latest, ultra-low-latency model, generating speech in under 75ms.  Best for developer use cases requiring speed and multiple languages.
+        /// </summary>
+        [Preserve]
+        [JsonIgnore]
+        public static Model FlashV2 { get; } = new("eleven_flash_v2");
+
+        /// <summary>
+        /// Our latest, ultra-low-latency English only model, generating speech in under 75ms.  Best for developer use cases requiring speed.
+        /// </summary>
+        [Preserve]
+        [JsonIgnore]
+        public static Model FlashV2_5 { get; } = new("eleven_flash_v2_5");
+
         [Preserve]
         [JsonIgnore]
         [Obsolete("Use EnglishV1")]

diff --git a/Runtime/TextToSpeech/TextToSpeechRequest.cs b/Runtime/TextToSpeech/TextToSpeechRequest.cs
@@ -26,7 +26,7 @@ public sealed class TextToSpeechRequest
         /// Optional, <see cref="VoiceSettings"/> that will override the default settings in <see cref="Voice.Settings"/>.
         /// </param>
         /// <param name="model">
-        /// Optional, <see cref="Model"/> to use. Defaults to <see cref="Model.TurboV2_5"/>.
+        /// Optional, <see cref="Model"/> to use. Defaults to <see cref="Model.FlashV2"/>.
         /// </param>
         /// <param name="outputFormat">
         /// Output format of the generated audio.<br/>
@@ -87,7 +87,7 @@ public TextToSpeechRequest(
             }
 
             Text = text;
-            Model = model ?? Models.Model.TurboV2_5;
+            Model = model ?? Models.Model.FlashV2;
             Voice = string.IsNullOrWhiteSpace(voice) ? Voice.Adam : voice;
             VoiceSettings = voiceSettings ?? voice.Settings;
             OutputFormat = outputFormat;

diff --git a/package.json b/package.json
@@ -3,7 +3,7 @@
   "displayName": "ElevenLabs",
   "description": "A non-official Eleven Labs voice synthesis RESTful client.",
   "keywords": [],
-  "version": "3.4.2",
+  "version": "3.4.3",
   "unity": "2021.3",
   "documentationUrl": "https://github.com/RageAgainstThePixel/com.rest.elevenlabs#documentation",
   "changelogUrl": "https://github.com/RageAgainstThePixel/com.rest.elevenlabs/releases",