Skip to content

Commit

Permalink
com.rest.elevenlabs 3.4.3 (#114)
Browse files Browse the repository at this point in the history
- Added flash models
- Added stream input support to dubbing endpoint
- Fixed http/https protocol in client settings

---------

Co-authored-by: evya5 <76632693+evya5@users.noreply.github.com>
  • Loading branch information
StephenHodgson and evya5 authored Jan 13, 2025
1 parent 3f51d46 commit 689ad11
Show file tree
Hide file tree
Showing 10 changed files with 150 additions and 24 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,7 @@ Gets a list of shared voices in the public voice library.

```csharp
var api = new ElevenLabsClient();
var results = await ElevenLabsClient.SharedVoicesEndpoint.GetSharedVoicesAsync();
var results = await api.SharedVoicesEndpoint.GetSharedVoicesAsync();
foreach (var voice in results.Voices)
{
Debug.Log($"{voice.OwnerId} | {voice.VoiceId} | {voice.Date} | {voice.Name}");
Expand Down Expand Up @@ -452,7 +452,7 @@ Returns downloaded dubbed file path.
> Videos will be returned in MP4 format and audio only dubs will be returned in MP3.
```csharp
var dubbedClipPath = await ElevenLabsClient.DubbingEndpoint.GetDubbedFileAsync(metadata.DubbingId, request.TargetLanguage);
var dubbedClipPath = await api.DubbingEndpoint.GetDubbedFileAsync(metadata.DubbingId, request.TargetLanguage);
var dubbedClip = await Rest.DownloadAudioClipAsync($"file://{dubbedClipPath}", AudioType.MPEG);
audioSource.PlayOneShot(dubbedClip);
```
Expand All @@ -464,7 +464,7 @@ Returns transcript for the dub in the desired format.
```csharp
var srcFile = new FileInfo(audioPath);
var transcriptPath = new FileInfo($"{srcFile.FullName}.dubbed.{request.TargetLanguage}.srt");
var transcriptFile = await ElevenLabsClient.DubbingEndpoint.GetTranscriptForDubAsync(metadata.DubbingId, request.TargetLanguage);
var transcriptFile = await api.DubbingEndpoint.GetTranscriptForDubAsync(metadata.DubbingId, request.TargetLanguage);
await File.WriteAllTextAsync(transcriptPath.FullName, transcriptFile);
```

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ namespace ElevenLabs
{
public sealed class ElevenLabsSettingsInfo : ISettingsInfo
{
internal const string Http = "http://";
internal const string Https = "https://";
internal const string ElevenLabsDomain = "api.elevenlabs.io";
internal const string DefaultApiVersion = "v1";
Expand Down Expand Up @@ -45,7 +46,20 @@ public ElevenLabsSettingsInfo(string domain, string apiVersion = DefaultApiVersi
apiVersion = DefaultApiVersion;
}

Domain = domain.Contains("http") ? domain : $"{Https}{domain}";
var protocol = Https;

if (domain.StartsWith(Http))
{
protocol = Http;
domain = domain.Replace(Http, string.Empty);
}
else if (domain.StartsWith(Https))
{
protocol = Https;
domain = domain.Replace(Https, string.Empty);
}

Domain = $"{protocol}{domain}";
ApiVersion = apiVersion;
BaseRequest = $"/{ApiVersion}/";
BaseRequestUrlFormat = $"{Domain}{BaseRequest}{{0}}";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,11 @@ public async Task<DubbingProjectMetadata> DubAsync(DubbingRequest request, int?
{
if (request.Files != null)
{
foreach (var (fileName, mediaType, stream) in request.Files)
foreach (var dub in request.Files)
{
using var audioData = new MemoryStream();
await stream.CopyToAsync(audioData, cancellationToken);
payload.AddBinaryData("file", audioData.ToArray(), fileName, mediaType);
await dub.Stream.CopyToAsync(audioData, cancellationToken);
payload.AddBinaryData("file", audioData.ToArray(), dub.Name, dub.MediaType);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ public DubbingRequest(
bool? dropBackgroundAudio = null,
bool? useProfanityFilter = null,
string projectName = null)
: this(targetLanguage, null, filePaths, sourceLanguage, numberOfSpeakers, watermark, startTime, endTime, highestResolution, dropBackgroundAudio, useProfanityFilter, projectName)
: this(targetLanguage, null, null, filePaths, sourceLanguage, numberOfSpeakers, watermark, startTime, endTime, highestResolution, dropBackgroundAudio, useProfanityFilter, projectName)
{
}

Expand All @@ -55,7 +55,7 @@ public DubbingRequest(
bool? dropBackgroundAudio = null,
bool? useProfanityFilter = null,
string projectName = null)
: this(targetLanguage, sourceUrl, null, sourceLanguage, numberOfSpeakers, watermark, startTime, endTime, highestResolution, dropBackgroundAudio, useProfanityFilter, projectName)
: this(targetLanguage, sourceUrl, null, null, sourceLanguage, numberOfSpeakers, watermark, startTime, endTime, highestResolution, dropBackgroundAudio, useProfanityFilter, projectName)
{
}

Expand Down Expand Up @@ -104,14 +104,34 @@ public DubbingRequest(
DropBackgroundAudio = dropBackgroundAudio;
UseProfanityFilter = useProfanityFilter;
ProjectName = projectName;
var files = new List<(string, string, Stream)>(clips.Count);
files.AddRange((from audioClip in clips let stream = new MemoryStream(audioClip.EncodeToWav()) select (audioClip.name, "audio/wav", stream)).Select(value => ((string, string, Stream))value));
var files = new List<DubbingStream>(clips.Count);
var streams = from audioClip in clips
let stream = new MemoryStream(audioClip.EncodeToWav())
select (stream, audioClip.name, mediaType: "audio/wav");
files.AddRange(streams.Select(dub => new DubbingStream(dub.stream, dub.name, dub.mediaType)));
Files = files;
}

public DubbingRequest(
List<DubbingStream> files,
string targetLanguage,
string sourceLanguage = null,
int? numberOfSpeakers = null,
bool? watermark = null,
int? startTime = null,
int? endTime = null,
bool? highestResolution = null,
bool? dropBackgroundAudio = null,
bool? useProfanityFilter = null,
string projectName = null)
: this(targetLanguage, null, files, null, sourceLanguage, numberOfSpeakers, watermark, startTime, endTime, highestResolution, dropBackgroundAudio, useProfanityFilter, projectName)
{
}

private DubbingRequest(
string targetLanguage,
Uri sourceUrl = null,
List<DubbingStream> files = null,
IEnumerable<string> filePaths = null,
string sourceLanguage = null,
int? numberOfSpeakers = null,
Expand All @@ -135,7 +155,7 @@ private DubbingRequest(
throw new ArgumentException("Either sourceUrl or filePaths must be provided.");
}

var files = new List<(string, string, Stream)>();
files ??= new List<DubbingStream>();

if (filePaths != null)
{
Expand Down Expand Up @@ -170,7 +190,7 @@ private DubbingRequest(
".webm" => "video/webm",
_ => "application/octet-stream"
};
files.Add((fileInfo.Name, mediaType, stream));
files.Add(new(stream, fileInfo.Name, mediaType));
}
}

Expand All @@ -192,7 +212,7 @@ private DubbingRequest(
/// <summary>
/// Files to dub.
/// </summary>
public IReadOnlyList<(string, string, Stream)> Files { get; }
public IReadOnlyList<DubbingStream> Files { get; }

/// <summary>
/// URL of the source video/audio file.
Expand Down Expand Up @@ -261,12 +281,11 @@ private void Dispose(bool disposing)
if (disposing)
{
if (Files == null) { return; }
foreach (var (_, _, stream) in Files)
foreach (var dub in Files)
{
try
{
stream?.Close();
stream?.Dispose();
dub.Dispose();
}
catch (Exception e)
{
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
// Licensed under the MIT License. See LICENSE in the project root for license information.

using System;
using System.IO;

namespace ElevenLabs.Dubbing
{
public sealed class DubbingStream : IDisposable
{
public DubbingStream(Stream stream, string name, string mediaType)
{
Stream = stream ?? throw new ArgumentNullException(nameof(stream));

if (Stream.Length == 0)
{
throw new ArgumentException("Stream cannot be empty.");
}

if (!Stream.CanRead)
{
throw new ArgumentException("Stream must be readable.");
}

Name = name ?? throw new ArgumentNullException(nameof(name));

if (string.IsNullOrWhiteSpace(Name))
{
throw new ArgumentException("Name cannot be empty.");
}

MediaType = mediaType ?? throw new ArgumentNullException(nameof(mediaType));

if (string.IsNullOrWhiteSpace(MediaType))
{
throw new ArgumentException("Media type cannot be empty.");
}

if (MediaType.Contains("/"))
{
var parts = MediaType.Split('/');

if (parts.Length != 2 || string.IsNullOrWhiteSpace(parts[0]) || string.IsNullOrWhiteSpace(parts[1]))
{
throw new ArgumentException("Invalid media type.");
}
}
else
{
throw new ArgumentException("Invalid media type.");
}
}

public Stream Stream { get; }

public string Name { get; }

public string MediaType { get; }

public void Dispose()
{
Stream?.Dispose();
}
}
}

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 14 additions & 0 deletions ElevenLabs/Packages/com.rest.elevenlabs/Runtime/Models/Model.cs
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,20 @@ public Model(

#region Predefined Models

/// <summary>
/// Our latest, ultra-low-latency model, generating speech in under 75ms. Best for developer use cases requiring speed and multiple languages.
/// </summary>
[Preserve]
[JsonIgnore]
public static Model FlashV2 { get; } = new("eleven_flash_v2");

/// <summary>
/// Our latest, ultra-low-latency English only model, generating speech in under 75ms. Best for developer use cases requiring speed.
/// </summary>
[Preserve]
[JsonIgnore]
public static Model FlashV2_5 { get; } = new("eleven_flash_v2_5");

[Preserve]
[JsonIgnore]
[Obsolete("Use EnglishV1")]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ public sealed class TextToSpeechRequest
/// Optional, <see cref="VoiceSettings"/> that will override the default settings in <see cref="Voice.Settings"/>.
/// </param>
/// <param name="model">
/// Optional, <see cref="Model"/> to use. Defaults to <see cref="Model.TurboV2_5"/>.
/// Optional, <see cref="Model"/> to use. Defaults to <see cref="Model.FlashV2"/>.
/// </param>
/// <param name="outputFormat">
/// Output format of the generated audio.<br/>
Expand Down Expand Up @@ -87,7 +87,7 @@ public TextToSpeechRequest(
}

Text = text;
Model = model ?? Models.Model.TurboV2_5;
Model = model ?? Models.Model.FlashV2;
Voice = string.IsNullOrWhiteSpace(voice) ? Voice.Adam : voice;
VoiceSettings = voiceSettings ?? voice.Settings;
OutputFormat = outputFormat;
Expand Down
2 changes: 1 addition & 1 deletion ElevenLabs/Packages/com.rest.elevenlabs/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"displayName": "ElevenLabs",
"description": "A non-official Eleven Labs voice synthesis RESTful client.",
"keywords": [],
"version": "3.4.2",
"version": "3.4.3",
"unity": "2021.3",
"documentationUrl": "https://github.com/RageAgainstThePixel/com.rest.elevenlabs#documentation",
"changelogUrl": "https://github.com/RageAgainstThePixel/com.rest.elevenlabs/releases",
Expand Down
12 changes: 8 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,11 @@ The recommended installation method is though the unity package manager and [Ope

---

## Documentation
## [Documentation](https://rageagainstthepixel.github.io/ElevenLabs-DotNet)

> Check out our new api docs!
<https://rageagainstthepixel.github.io/ElevenLabs-DotNet>

### Table of Contents

Expand Down Expand Up @@ -305,7 +309,7 @@ Gets a list of shared voices in the public voice library.

```csharp
var api = new ElevenLabsClient();
var results = await ElevenLabsClient.SharedVoicesEndpoint.GetSharedVoicesAsync();
var results = await api.SharedVoicesEndpoint.GetSharedVoicesAsync();
foreach (var voice in results.Voices)
{
Debug.Log($"{voice.OwnerId} | {voice.VoiceId} | {voice.Date} | {voice.Name}");
Expand Down Expand Up @@ -452,7 +456,7 @@ Returns downloaded dubbed file path.
> Videos will be returned in MP4 format and audio only dubs will be returned in MP3.
```csharp
var dubbedClipPath = await ElevenLabsClient.DubbingEndpoint.GetDubbedFileAsync(metadata.DubbingId, request.TargetLanguage);
var dubbedClipPath = await api.DubbingEndpoint.GetDubbedFileAsync(metadata.DubbingId, request.TargetLanguage);
var dubbedClip = await Rest.DownloadAudioClipAsync($"file://{dubbedClipPath}", AudioType.MPEG);
audioSource.PlayOneShot(dubbedClip);
```
Expand All @@ -464,7 +468,7 @@ Returns transcript for the dub in the desired format.
```csharp
var srcFile = new FileInfo(audioPath);
var transcriptPath = new FileInfo($"{srcFile.FullName}.dubbed.{request.TargetLanguage}.srt");
var transcriptFile = await ElevenLabsClient.DubbingEndpoint.GetTranscriptForDubAsync(metadata.DubbingId, request.TargetLanguage);
var transcriptFile = await api.DubbingEndpoint.GetTranscriptForDubAsync(metadata.DubbingId, request.TargetLanguage);
await File.WriteAllTextAsync(transcriptPath.FullName, transcriptFile);
```

Expand Down

0 comments on commit 689ad11

Please sign in to comment.