Lightning-Fast Speech Recognition with OpenAI Quality
Experience the powerful speech recognition capabilities of Whisper Turbo directly in your browser
8x faster than Large V3 with only 4 decoder layers, optimized for real-time transcription while maintaining exceptional accuracy.
Transcribe speech in over 99 languages with robust performance across diverse accents and dialects.
MIT licensed with support for popular frameworks like Hugging Face Transformers and MLX for Apple Silicon.
Achieves accuracy equivalent to Large V2 with only ~1% lower WER than Distil-Whisper.
Compatible with existing Whisper Large V3 inference code, requiring no modifications to your pipeline.
Approximately 50% smaller than Large V3 at just 1.6GB, perfect for edge deployment and faster loading.
import whisper # Load the turbo model model = whisper.load_model("turbo") # Load and process audio audio = whisper.load_audio("speech.mp3") audio = whisper.pad_or_trim(audio) # Generate mel spectrogram mel = whisper.log_mel_spectrogram(audio).to(model.device) # Detect language _, probs = model.detect_language(mel) print(f"Detected language: {max(probs, key=probs.get)}") # Transcribe options = whisper.DecodingOptions() result = whisper.decode(model, mel, options) print(result.text)
import torch from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor device = "cuda:0" if torch.cuda.is_available() else "cpu" torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32 # Load model and processor model_id = "openai/whisper-large-v3-turbo" model = AutoModelForSpeechSeq2Seq.from_pretrained( model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True ) model.to(device) processor = AutoProcessor.from_pretrained(model_id) # Process audio pipe = pipeline( "automatic-speech-recognition", model=model, tokenizer=processor.tokenizer, feature_extractor=processor.feature_extractor, torch_dtype=torch_dtype, device=device, ) # Transcribe result = pipe("audio.mp3") print(result["text"])
from faster_whisper import WhisperModel, BatchedInferencePipeline # Initialize model with turbo model = WhisperModel("turbo", device="cuda", compute_type="float16") # Enable batched inference for better performance batched_model = BatchedInferencePipeline(model=model) # Transcribe with batch processing segments, info = batched_model.transcribe( "audio.mp3", batch_size=16, language="en" ) # Print results with timestamps for segment in segments: print(f"[{segment.start:.2f}s -> {segment.end:.2f}s] {segment.text}")