Skip to main content

Speech-to-Text (STT)

Transcribe audio into text.​

Converts audio input to text using a speech-to-text (STT) model. This enables transcription services, voice commands, and other voice-driven interactions.

Example​

JavaScript

const axios = require('axios').default;

const api = new axios.create({
baseURL: 'https://api.umamiai.xyz/v1',
headers: { Authorization: 'Bearer <YOUR_UMAMIAI_API_KEY>' },
});

const main = async () => {
const response = await api.post('/stt', {
model: '#g1_whisper-large',
url: 'https://audio-samples.github.io/samples/mp3/blizzard_unconditional/sample-0.mp3',
});

console.log('[transcription]', response.data.results.channels[0].alternatives[0].transcript);
};

main();

Python

import requests


headers = {"Authorization": "Bearer <YOUR_UMAMIAI_API_KEY>"}
base_url = "https://api.umamiai.xyz/v1"


def main():
url = f"{base_url}/stt"
data = {
"model": "#g1_whisper-large",
"url": "https://audio-samples.github.io/samples/mp3/blizzard_unconditional/sample-0.mp3",
}

response = requests.post(url, json=data, headers=headers)

if response.status_code >= 400:
print(f"Error: {response.status_code} - {response.text}")
else:
response_data = response.json()
transcript = response_data["results"]["channels"][0]["alternatives"][0][
"transcript"
]
print("[transcription]", transcript)


main()

path=/stt method=post

Request

JavaScript

const response = await fetch('https://api.umamiai.xyz/v1/stt', {
method: 'POST',
headers: {
"Content-Type": "application/json",
"Authorization": "Bearer YOUR_UMAMIAI_API_KEY"
},
body: JSON.stringify({
"model": "text"
}),
});
const data = await response.json();

Python

import requests

response = requests.post(
"https://api.umamiai.xyz/v1/stt",
headers={"Content-Type":"application/json","Authorization": "Bearer YOUR_UMAMIAI_API_KEY"},
json={"model":"text"}
)
data = response.json()

Curl

curl -L \
-X POST \
-H 'Authorization: Bearer YOUR_UMAMIAI_API_KEY' \
-H 'Content-Type: application/json' \
'https://api.umamiai.xyz/v1/stt' \
-d '{"model":"text"}'