Skip to content

Commit

Permalink
com.rest.elevenlabs 3.4.3 (#114)
Browse files Browse the repository at this point in the history
- Added flash models
- Added stream input support to dubbing endpoint
- Fixed http/https protocol in client settings

---------

Co-authored-by: evya5 <76632693+evya5@users.noreply.github.com>
  • Loading branch information
StephenHodgson and evya5 authored Jan 13, 2025
1 parent d41517c commit 3dd77e1
Show file tree
Hide file tree
Showing 9 changed files with 142 additions and 20 deletions.
6 changes: 3 additions & 3 deletions Documentation~/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,7 @@ Gets a list of shared voices in the public voice library.

```csharp
var api = new ElevenLabsClient();
var results = await ElevenLabsClient.SharedVoicesEndpoint.GetSharedVoicesAsync();
var results = await api.SharedVoicesEndpoint.GetSharedVoicesAsync();
foreach (var voice in results.Voices)
{
Debug.Log($"{voice.OwnerId} | {voice.VoiceId} | {voice.Date} | {voice.Name}");
Expand Down Expand Up @@ -452,7 +452,7 @@ Returns downloaded dubbed file path.
> Videos will be returned in MP4 format and audio only dubs will be returned in MP3.
```csharp
var dubbedClipPath = await ElevenLabsClient.DubbingEndpoint.GetDubbedFileAsync(metadata.DubbingId, request.TargetLanguage);
var dubbedClipPath = await api.DubbingEndpoint.GetDubbedFileAsync(metadata.DubbingId, request.TargetLanguage);
var dubbedClip = await Rest.DownloadAudioClipAsync($"file://{dubbedClipPath}", AudioType.MPEG);
audioSource.PlayOneShot(dubbedClip);
```
Expand All @@ -464,7 +464,7 @@ Returns transcript for the dub in the desired format.
```csharp
var srcFile = new FileInfo(audioPath);
var transcriptPath = new FileInfo($"{srcFile.FullName}.dubbed.{request.TargetLanguage}.srt");
var transcriptFile = await ElevenLabsClient.DubbingEndpoint.GetTranscriptForDubAsync(metadata.DubbingId, request.TargetLanguage);
var transcriptFile = await api.DubbingEndpoint.GetTranscriptForDubAsync(metadata.DubbingId, request.TargetLanguage);
await File.WriteAllTextAsync(transcriptPath.FullName, transcriptFile);
```

Expand Down
16 changes: 15 additions & 1 deletion Runtime/Authentication/ElevenLabsSettingsInfo.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ namespace ElevenLabs
{
public sealed class ElevenLabsSettingsInfo : ISettingsInfo
{
internal const string Http = "http://";
internal const string Https = "https://";
internal const string ElevenLabsDomain = "api.elevenlabs.io";
internal const string DefaultApiVersion = "v1";
Expand Down Expand Up @@ -45,7 +46,20 @@ public ElevenLabsSettingsInfo(string domain, string apiVersion = DefaultApiVersi
apiVersion = DefaultApiVersion;
}

Domain = domain.Contains("http") ? domain : $"{Https}{domain}";
var protocol = Https;

if (domain.StartsWith(Http))
{
protocol = Http;
domain = domain.Replace(Http, string.Empty);
}
else if (domain.StartsWith(Https))
{
protocol = Https;
domain = domain.Replace(Https, string.Empty);
}

Domain = $"{protocol}{domain}";
ApiVersion = apiVersion;
BaseRequest = $"/{ApiVersion}/";
BaseRequestUrlFormat = $"{Domain}{BaseRequest}{{0}}";
Expand Down
6 changes: 3 additions & 3 deletions Runtime/Dubbing/DubbingEndpoint.cs
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,11 @@ public async Task<DubbingProjectMetadata> DubAsync(DubbingRequest request, int?
{
if (request.Files != null)
{
foreach (var (fileName, mediaType, stream) in request.Files)
foreach (var dub in request.Files)
{
using var audioData = new MemoryStream();
await stream.CopyToAsync(audioData, cancellationToken);
payload.AddBinaryData("file", audioData.ToArray(), fileName, mediaType);
await dub.Stream.CopyToAsync(audioData, cancellationToken);
payload.AddBinaryData("file", audioData.ToArray(), dub.Name, dub.MediaType);
}
}

Expand Down
39 changes: 29 additions & 10 deletions Runtime/Dubbing/DubbingRequest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ public DubbingRequest(
bool? dropBackgroundAudio = null,
bool? useProfanityFilter = null,
string projectName = null)
: this(targetLanguage, null, filePaths, sourceLanguage, numberOfSpeakers, watermark, startTime, endTime, highestResolution, dropBackgroundAudio, useProfanityFilter, projectName)
: this(targetLanguage, null, null, filePaths, sourceLanguage, numberOfSpeakers, watermark, startTime, endTime, highestResolution, dropBackgroundAudio, useProfanityFilter, projectName)
{
}

Expand All @@ -55,7 +55,7 @@ public DubbingRequest(
bool? dropBackgroundAudio = null,
bool? useProfanityFilter = null,
string projectName = null)
: this(targetLanguage, sourceUrl, null, sourceLanguage, numberOfSpeakers, watermark, startTime, endTime, highestResolution, dropBackgroundAudio, useProfanityFilter, projectName)
: this(targetLanguage, sourceUrl, null, null, sourceLanguage, numberOfSpeakers, watermark, startTime, endTime, highestResolution, dropBackgroundAudio, useProfanityFilter, projectName)
{
}

Expand Down Expand Up @@ -104,14 +104,34 @@ public DubbingRequest(
DropBackgroundAudio = dropBackgroundAudio;
UseProfanityFilter = useProfanityFilter;
ProjectName = projectName;
var files = new List<(string, string, Stream)>(clips.Count);
files.AddRange((from audioClip in clips let stream = new MemoryStream(audioClip.EncodeToWav()) select (audioClip.name, "audio/wav", stream)).Select(value => ((string, string, Stream))value));
var files = new List<DubbingStream>(clips.Count);
var streams = from audioClip in clips
let stream = new MemoryStream(audioClip.EncodeToWav())
select (stream, audioClip.name, mediaType: "audio/wav");
files.AddRange(streams.Select(dub => new DubbingStream(dub.stream, dub.name, dub.mediaType)));
Files = files;
}

public DubbingRequest(
List<DubbingStream> files,
string targetLanguage,
string sourceLanguage = null,
int? numberOfSpeakers = null,
bool? watermark = null,
int? startTime = null,
int? endTime = null,
bool? highestResolution = null,
bool? dropBackgroundAudio = null,
bool? useProfanityFilter = null,
string projectName = null)
: this(targetLanguage, null, files, null, sourceLanguage, numberOfSpeakers, watermark, startTime, endTime, highestResolution, dropBackgroundAudio, useProfanityFilter, projectName)
{
}

private DubbingRequest(
string targetLanguage,
Uri sourceUrl = null,
List<DubbingStream> files = null,
IEnumerable<string> filePaths = null,
string sourceLanguage = null,
int? numberOfSpeakers = null,
Expand All @@ -135,7 +155,7 @@ private DubbingRequest(
throw new ArgumentException("Either sourceUrl or filePaths must be provided.");
}

var files = new List<(string, string, Stream)>();
files ??= new List<DubbingStream>();

if (filePaths != null)
{
Expand Down Expand Up @@ -170,7 +190,7 @@ private DubbingRequest(
".webm" => "video/webm",
_ => "application/octet-stream"
};
files.Add((fileInfo.Name, mediaType, stream));
files.Add(new(stream, fileInfo.Name, mediaType));
}
}

Expand All @@ -192,7 +212,7 @@ private DubbingRequest(
/// <summary>
/// Files to dub.
/// </summary>
public IReadOnlyList<(string, string, Stream)> Files { get; }
public IReadOnlyList<DubbingStream> Files { get; }

/// <summary>
/// URL of the source video/audio file.
Expand Down Expand Up @@ -261,12 +281,11 @@ private void Dispose(bool disposing)
if (disposing)
{
if (Files == null) { return; }
foreach (var (_, _, stream) in Files)
foreach (var dub in Files)
{
try
{
stream?.Close();
stream?.Dispose();
dub.Dispose();
}
catch (Exception e)
{
Expand Down
64 changes: 64 additions & 0 deletions Runtime/Dubbing/DubbingStream.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
// Licensed under the MIT License. See LICENSE in the project root for license information.

using System;
using System.IO;

namespace ElevenLabs.Dubbing
{
public sealed class DubbingStream : IDisposable
{
public DubbingStream(Stream stream, string name, string mediaType)
{
Stream = stream ?? throw new ArgumentNullException(nameof(stream));

if (Stream.Length == 0)
{
throw new ArgumentException("Stream cannot be empty.");
}

if (!Stream.CanRead)
{
throw new ArgumentException("Stream must be readable.");
}

Name = name ?? throw new ArgumentNullException(nameof(name));

if (string.IsNullOrWhiteSpace(Name))
{
throw new ArgumentException("Name cannot be empty.");
}

MediaType = mediaType ?? throw new ArgumentNullException(nameof(mediaType));

if (string.IsNullOrWhiteSpace(MediaType))
{
throw new ArgumentException("Media type cannot be empty.");
}

if (MediaType.Contains("/"))
{
var parts = MediaType.Split('/');

if (parts.Length != 2 || string.IsNullOrWhiteSpace(parts[0]) || string.IsNullOrWhiteSpace(parts[1]))
{
throw new ArgumentException("Invalid media type.");
}
}
else
{
throw new ArgumentException("Invalid media type.");
}
}

public Stream Stream { get; }

public string Name { get; }

public string MediaType { get; }

public void Dispose()
{
Stream?.Dispose();
}
}
}
11 changes: 11 additions & 0 deletions Runtime/Dubbing/DubbingStream.cs.meta

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 14 additions & 0 deletions Runtime/Models/Model.cs
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,20 @@ public Model(

#region Predefined Models

/// <summary>
/// Our latest, ultra-low-latency model, generating speech in under 75ms. Best for developer use cases requiring speed and multiple languages.
/// </summary>
[Preserve]
[JsonIgnore]
public static Model FlashV2 { get; } = new("eleven_flash_v2");

/// <summary>
/// Our latest, ultra-low-latency English only model, generating speech in under 75ms. Best for developer use cases requiring speed.
/// </summary>
[Preserve]
[JsonIgnore]
public static Model FlashV2_5 { get; } = new("eleven_flash_v2_5");

[Preserve]
[JsonIgnore]
[Obsolete("Use EnglishV1")]
Expand Down
4 changes: 2 additions & 2 deletions Runtime/TextToSpeech/TextToSpeechRequest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ public sealed class TextToSpeechRequest
/// Optional, <see cref="VoiceSettings"/> that will override the default settings in <see cref="Voice.Settings"/>.
/// </param>
/// <param name="model">
/// Optional, <see cref="Model"/> to use. Defaults to <see cref="Model.TurboV2_5"/>.
/// Optional, <see cref="Model"/> to use. Defaults to <see cref="Model.FlashV2"/>.
/// </param>
/// <param name="outputFormat">
/// Output format of the generated audio.<br/>
Expand Down Expand Up @@ -87,7 +87,7 @@ public TextToSpeechRequest(
}

Text = text;
Model = model ?? Models.Model.TurboV2_5;
Model = model ?? Models.Model.FlashV2;
Voice = string.IsNullOrWhiteSpace(voice) ? Voice.Adam : voice;
VoiceSettings = voiceSettings ?? voice.Settings;
OutputFormat = outputFormat;
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"displayName": "ElevenLabs",
"description": "A non-official Eleven Labs voice synthesis RESTful client.",
"keywords": [],
"version": "3.4.2",
"version": "3.4.3",
"unity": "2021.3",
"documentationUrl": "https://github.com/RageAgainstThePixel/com.rest.elevenlabs#documentation",
"changelogUrl": "https://github.com/RageAgainstThePixel/com.rest.elevenlabs/releases",
Expand Down

0 comments on commit 3dd77e1

Please sign in to comment.