Compare commits

...

3 Commits

Author SHA1 Message Date
Adam Gastineau
e0bf9756fc Make sure LLM client preserves Content-Type 2025-12-03 15:44:38 -08:00
Adam Gastineau
3759fd7cd5 Separate out empty transcription STT error 2025-11-27 09:21:36 -08:00
Adam Gastineau
a8719a2140 Immediately start listening when touchpad tapped, removing start delay 2025-11-27 07:09:56 -08:00
7 changed files with 117 additions and 19 deletions

View File

@ -65,15 +65,28 @@ open class PlatformInputHandler(
object : ITouchpadGestureDelegate {
override fun onGesture(gesture: TouchpadGesture) {
// TODO: Build proper API for Input Handler to perform standardized triggers
if (gesture.kind != TouchpadGestureKind.HOLD_END) {
// Any gesture that isn't a release should halt talking
interactionFlowManager.finishListening()
if (gesture.kind != TouchpadGestureKind.HOLD_END &&
gesture.kind != TouchpadGestureKind.FINGER_DOWN &&
gesture.kind != TouchpadGestureKind.GESTURE_CANCEL) {
// Any gesture that isn't a release (or intermediate finger down/cancel) should halt talking
interactionFlowManager.cancelTalking()
}
when (gesture.kind) {
TouchpadGestureKind.FINGER_DOWN -> {
// Immediately start listening, even if we abort later
interactionFlowManager.startListening()
}
TouchpadGestureKind.GESTURE_CANCEL -> {
interactionFlowManager.finishListening(abort = true)
}
TouchpadGestureKind.DOUBLE_TAP -> {
// TODO: Fix double tap with two fingers
// if (gesture.fingerCount == 2) {
// Cancel listening if it is ongoing
interactionFlowManager.finishListening(abort = true)
interactionFlowManager.takePicture()
// }
}

View File

@ -7,6 +7,8 @@ interface ITouchpadGestureDelegate {
data class TouchpadGesture(val kind: TouchpadGestureKind, val duration: Long, val fingerCount: Int)
enum class TouchpadGestureKind {
FINGER_DOWN,
GESTURE_CANCEL,
SINGLE_TAP,
DOUBLE_TAP,
HOLD_START,

View File

@ -73,6 +73,14 @@ class TouchpadGestureManager(
MotionEvent.ACTION_DOWN -> {
activePointers.add(event.getPointerId(0))
sendEventIfAllowed(event, updateLastEventTime = false) {
TouchpadGesture(
TouchpadGestureKind.FINGER_DOWN,
0,
activePointers.size
)
}
if (activePointers.size == 1) {
holdStartTime = event.eventTime
singleFingerHoldHandler = Handler(Looper.getMainLooper())
@ -91,14 +99,26 @@ class TouchpadGestureManager(
activePointers.remove(event.getPointerId(0))
// Cancel any pending single finger hold
val wasPendingHold = singleFingerHoldHandler != null
singleFingerHoldHandler?.removeCallbacksAndMessages(null)
singleFingerHoldHandler = null
val duration = event.eventTime - holdStartTime
// Handle hold end
if (isHolding) {
val duration = event.eventTime - holdStartTime
delegate.onGesture(TouchpadGesture(TouchpadGestureKind.HOLD_END, duration, 1))
isHolding = false
} else if (wasPendingHold && activePointers.isEmpty()) {
// Finger was lifted before any gesture started
// Only send if we didn't just send a recognized gesture
sendEventIfAllowed(event, updateLastEventTime = false) {
TouchpadGesture(
TouchpadGestureKind.GESTURE_CANCEL,
duration,
1
)
}
}
}
// A non-primary touch has changed
@ -149,12 +169,18 @@ class TouchpadGestureManager(
/**
* Send TouchpadGesture if allowed based on time since last event. Specifically to prevent sending gesture start events too close together
*/
private fun sendEventIfAllowed(event: MotionEvent, lambda: () -> TouchpadGesture) {
private fun sendEventIfAllowed(
event: MotionEvent,
updateLastEventTime: Boolean = true,
lambda: () -> TouchpadGesture,
) {
if (event.eventTime < lastEventTime + MIN_GESTURE_SEPARATION_MS) {
return
}
lastEventTime = event.eventTime
if (updateLastEventTime) {
lastEventTime = event.eventTime
}
delegate.onGesture(lambda())
}
@ -163,8 +189,15 @@ class TouchpadGestureManager(
if (isHolding) {
delegate.onGesture(TouchpadGesture(TouchpadGestureKind.HOLD_END, duration, 2))
isHolding = false
} else if (duration < 200) {
delegate.onGesture(TouchpadGesture(TouchpadGestureKind.SINGLE_TAP, duration, 2))
} else {
// Finger was lifted before any gesture completed
// Only send if we didn't just send a recognized gesture
sendEventIfAllowed(event, updateLastEventTime = false) {
TouchpadGesture(TouchpadGestureKind.GESTURE_CANCEL, duration, 2)
}
}
}
}

View File

@ -6,7 +6,8 @@ import com.penumbraos.mabl.types.Error
interface IInteractionFlowManager {
fun startListening(requestImage: Boolean = false)
fun startConversationFromInput(userInput: String)
fun finishListening()
fun finishListening(abort: Boolean = false)
fun cancelTalking()
fun isFlowActive(): Boolean
fun getCurrentFlowState(): InteractionFlowState

View File

@ -44,6 +44,8 @@ class InteractionFlowManager
private var stateCallback: InteractionStateCallback? = null
private var contentCallback: InteractionContentCallback? = null
private var didAbort: Boolean = false
private var cameraService: CameraService? = null
private var isCameraServiceBound = false
@ -64,17 +66,30 @@ class InteractionFlowManager
private val sttCallback = object : ISttCallback.Stub() {
override fun onPartialTranscription(partialText: String) {
if (didAbort) {
return
}
Log.d(TAG, "STT partial transcription: $partialText")
contentCallback?.onPartialTranscription(partialText)
}
override fun onFinalTranscription(finalText: String) {
Log.d(TAG, "STT final transcription: $finalText")
setState(InteractionFlowState.PROCESSING)
contentCallback?.onFinalTranscription(finalText)
if (didAbort) {
return
}
// Start conversation with the transcribed text
startConversationFromInput(finalText)
if (finalText.trim().isEmpty()) {
Log.d(TAG, "STT transcription was empty, skipping")
setState(InteractionFlowState.IDLE)
stateCallback?.onError(Error.SttError("Empty transcription"))
} else {
Log.d(TAG, "STT final transcription: $finalText")
setState(InteractionFlowState.PROCESSING)
contentCallback?.onFinalTranscription(finalText)
// Start conversation with the transcribed text
startConversationFromInput(finalText)
}
}
override fun onError(errorMessage: String) {
@ -95,16 +110,22 @@ class InteractionFlowManager
}
override fun startListening(requestImage: Boolean) {
if (currentState != InteractionFlowState.IDLE) {
currentModality =
if (requestImage) InteractionFlowModality.Vision else InteractionFlowModality.Speech
if (currentState == InteractionFlowState.LISTENING) {
Log.d(TAG, "Already listening. Continuing")
return
} else if (currentState != InteractionFlowState.IDLE) {
Log.w(TAG, "Cannot start listening, current state: $currentState")
return
}
didAbort = false
try {
allControllers.stt.startListening()
setState(InteractionFlowState.LISTENING)
currentModality =
if (requestImage) InteractionFlowModality.Vision else InteractionFlowModality.Speech
} catch (e: Exception) {
Log.e(TAG, "Failed to start listening: ${e.message}")
stateCallback?.onError(Error.SttError("Failed to start listening: ${e.message}"))
@ -171,10 +192,11 @@ class InteractionFlowManager
}
}
override fun finishListening() {
override fun finishListening(abort: Boolean) {
Log.d(TAG, "Stopping listening, state: $currentState")
setState(InteractionFlowState.CANCELLING)
didAbort = abort
allControllers.stt.cancelListening()
allControllers.tts.service?.stopSpeaking()
@ -182,6 +204,10 @@ class InteractionFlowManager
stateCallback?.onUserFinished()
}
override fun cancelTalking() {
allControllers.tts.service?.stopSpeaking()
}
override fun isFlowActive(): Boolean {
return currentState != InteractionFlowState.IDLE
}

View File

@ -18,6 +18,8 @@ import io.ktor.client.request.setBody
import io.ktor.client.request.url
import io.ktor.client.statement.HttpResponse
import io.ktor.client.statement.bodyAsChannel
import io.ktor.content.TextContent
import io.ktor.http.ContentType
import io.ktor.http.isSuccess
import io.ktor.util.toMap
import io.ktor.utils.io.jvm.javaio.toInputStream
@ -32,6 +34,8 @@ class KtorHttpClient : HttpClient {
constructor(coroutineScope: CoroutineScope, penumbraClient: PenumbraClient) {
this.coroutineScope = coroutineScope
this.ktorClient = io.ktor.client.HttpClient {
// Otherwise ktor strips ContentType
useDefaultTransformers = false
install(HttpClientPlugin) {
this.penumbraClient = penumbraClient
}
@ -94,7 +98,20 @@ class KtorHttpClient : HttpClient {
for ((key, values) in langChainRequest.headers()) {
builder.headers.appendAll(key, values)
}
builder.setBody(langChainRequest.body())
val contentTypeString = langChainRequest.headers()["ContentType"]?.first() ?: ""
val contentType = try {
ContentType.parse(contentTypeString)
} catch (_: Exception) {
ContentType.Application.Json
}
builder.setBody(
TextContent(
langChainRequest.body(),
contentType
)
)
}
private suspend fun buildResponse(

View File

@ -39,8 +39,14 @@ class DemoSttService : MablService("DemoSttService") {
client.stt.initialize(object : SttRecognitionListener() {
override fun onError(error: Int) {
try {
currentCallback?.onError("Recognition error: $error")
} catch (e: RemoteException) {
// RecognitionError.ERROR_NO_MATCH
if (error == 7) {
Log.d("DemoSttService", "No speech recognized")
currentCallback?.onFinalTranscription("")
} else {
currentCallback?.onError("Recognition error: $error")
}
} catch (e: Exception) {
Log.e("DemoSttService", "Callback error", e)
}
}