mirror of
https://github.com/home-assistant/iOS.git
synced 2026-02-04 02:46:35 -06:00
<!-- Thank you for submitting a Pull Request and helping to improve Home Assistant. Please complete the following sections to help the processing and review of your changes. Please do not delete anything from this template. --> ## Summary <!-- Provide a brief summary of the changes you have made and most importantly what they aim to achieve --> ## Screenshots <!-- If this is a user-facing change not in the frontend, please include screenshots in light and dark mode. --> <img width="1776" height="1124" alt="CleanShot 2025-12-23 at 03 29 25@2x" src="https://github.com/user-attachments/assets/ab1a5c9f-72a9-4dc8-ae44-1f7d574738f6" /> ## Link to pull request in Documentation repository <!-- Pull requests that add, change or remove functionality must have a corresponding pull request in the Companion App Documentation repository (https://github.com/home-assistant/companion.home-assistant). Please add the number of this pull request after the "#" --> Documentation: home-assistant/companion.home-assistant# ## Any other notes <!-- If there is any other information of note, like if this Pull Request is part of a bigger change, please include it here. --> --------- Co-authored-by: Copilot <198982749+Copilot@users.noreply.github.com>
184 lines
6.0 KiB
Swift
184 lines
6.0 KiB
Swift
import AVFoundation
|
|
import Speech
|
|
|
|
/// A class that manages speech transcription using Apple's SpeechTranscriber API.
|
|
/// Automatically detects when the user has finished speaking.
|
|
@available(iOS 18.0, *)
|
|
@MainActor
|
|
final class SpeechTranscriber: ObservableObject {
|
|
// MARK: - Published Properties
|
|
|
|
/// The current transcription text
|
|
@Published private(set) var transcription: String = ""
|
|
|
|
/// Whether the transcriber is currently listening
|
|
@Published private(set) var isListening: Bool = false
|
|
|
|
/// The current authorization status
|
|
@Published private(set) var authorizationStatus: SFSpeechRecognizerAuthorizationStatus = .notDetermined
|
|
|
|
/// Any error that occurred during transcription
|
|
@Published private(set) var error: Error?
|
|
|
|
// MARK: - Private Properties
|
|
|
|
private var speechTranscriber: Speech.SpeechTranscriber?
|
|
private var transcriptionTask: Task<Void, Never>?
|
|
private let audioEngine = AVAudioEngine()
|
|
|
|
// MARK: - Initialization
|
|
|
|
init() {
|
|
self.authorizationStatus = SFSpeechRecognizer.authorizationStatus()
|
|
}
|
|
|
|
// MARK: - Public Methods
|
|
|
|
/// Requests authorization to use speech recognition
|
|
func requestAuthorization() async {
|
|
let status = await SFSpeechRecognizer.requestAuthorization()
|
|
authorizationStatus = status
|
|
}
|
|
|
|
/// Starts listening and transcribing speech
|
|
/// - Parameter locale: The locale for speech recognition (defaults to device locale)
|
|
func startListening(locale: Locale = .current) async throws {
|
|
// Check authorization
|
|
guard authorizationStatus == .authorized else {
|
|
throw TranscriberError.notAuthorized
|
|
}
|
|
|
|
// Stop any existing transcription
|
|
if isListening {
|
|
await stopListening()
|
|
}
|
|
|
|
// Reset state
|
|
transcription = ""
|
|
error = nil
|
|
isListening = true
|
|
|
|
// Request audio session
|
|
let audioSession = AVAudioSession.sharedInstance()
|
|
try audioSession.setCategory(.record, mode: .measurement, options: .duckOthers)
|
|
try audioSession.setActive(true, options: .notifyOthersOnDeactivation)
|
|
|
|
// Create the speech transcriber
|
|
let transcriber = Speech.SpeechTranscriber(locale: locale)
|
|
speechTranscriber = transcriber
|
|
|
|
// Start transcription task
|
|
transcriptionTask = Task {
|
|
do {
|
|
// Get the audio input
|
|
let inputNode = audioEngine.inputNode
|
|
let recordingFormat = inputNode.outputFormat(forBus: 0)
|
|
|
|
// Create transcription session
|
|
let session = transcriber.addsPunctuation().onAudioPacketAvailable { [weak self] _ in
|
|
// Audio packet callback if needed for visualization
|
|
}
|
|
|
|
// Install tap on audio engine
|
|
inputNode.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat) { [weak self] buffer, _ in
|
|
guard let self else { return }
|
|
|
|
// Send audio to transcriber
|
|
Task { @MainActor in
|
|
try? await self.speechTranscriber?.transcribe(audioBuffer: buffer)
|
|
}
|
|
}
|
|
|
|
// Start the audio engine
|
|
audioEngine.prepare()
|
|
try audioEngine.start()
|
|
|
|
// Process transcription results
|
|
for try await result in transcriber.transcribedResults() {
|
|
await handleTranscriptionResult(result)
|
|
}
|
|
|
|
} catch {
|
|
await handleError(error)
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Stops listening and transcribing
|
|
func stopListening() async {
|
|
isListening = false
|
|
|
|
// Stop audio engine
|
|
if audioEngine.isRunning {
|
|
audioEngine.stop()
|
|
audioEngine.inputNode.removeTap(onBus: 0)
|
|
}
|
|
|
|
// Finish transcription
|
|
try? await speechTranscriber?.finishTranscription()
|
|
|
|
// Cancel task
|
|
transcriptionTask?.cancel()
|
|
transcriptionTask = nil
|
|
speechTranscriber = nil
|
|
|
|
// Deactivate audio session
|
|
try? AVAudioSession.sharedInstance().setActive(false, options: .notifyOthersOnDeactivation)
|
|
}
|
|
|
|
// MARK: - Private Methods
|
|
|
|
private func handleTranscriptionResult(_ result: SFTranscriptionResult) {
|
|
// Update transcription
|
|
transcription = result.bestTranscription.formattedString
|
|
|
|
// Check if speech has finished
|
|
if result.isFinal {
|
|
Task {
|
|
await stopListening()
|
|
}
|
|
} else if result.bestTranscription.segments.last?.confidence ?? 0 > 0.5 {
|
|
// If we have high confidence and a pause, consider stopping
|
|
// This helps auto-detect when the user is done speaking
|
|
let lastSegmentTimestamp = result.bestTranscription.segments.last?.timestamp ?? 0
|
|
let duration = result.bestTranscription.segments.last?.duration ?? 0
|
|
|
|
// You can adjust this threshold for pause detection
|
|
if duration > 2.0 { // 2 second pause
|
|
Task {
|
|
try? await Task.sleep(for: .seconds(1))
|
|
if self.isListening {
|
|
await stopListening()
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
private func handleError(_ error: Error) {
|
|
self.error = error
|
|
Task {
|
|
await stopListening()
|
|
}
|
|
}
|
|
|
|
// MARK: - Error Types
|
|
|
|
enum TranscriberError: LocalizedError {
|
|
case notAuthorized
|
|
case audioEngineFailure
|
|
case transcriptionFailed
|
|
|
|
var errorDescription: String? {
|
|
switch self {
|
|
case .notAuthorized:
|
|
return "Speech recognition is not authorized. Please enable it in Settings."
|
|
case .audioEngineFailure:
|
|
return "Failed to start audio engine."
|
|
case .transcriptionFailed:
|
|
return "Speech transcription failed."
|
|
}
|
|
}
|
|
}
|
|
}
|