Async answering machine detection using LLM

Overview

This module provides asynchronous answering machine detection (AMD) capabilities using a Large Language Model. It can detect and classify responses into four categories:

Human – Spoken by a real person
IVR – Automated menu systems
AMD – Generic automated announcements
Voicemail – Messages asking to leave a voicemail

The implementation uses an async block pattern to process incoming audio without blocking the main conversation flow.

How It Works

The library initializes an async child block to handle the classification
Conversation audio is continuously analyzed for potential IVR/AMD patterns
When potential patterns are detected, audio is sent to the LLM for classification
Results are processed asynchronously, allowing the conversation to continue uninterrupted
When IVR/AMD/Voicemail is detected, the system can automatically exit or take custom actions

Usage

Create a file asyncamd.dsl with the following content and import it in your main DSL file:

library
context {
  // Output variables to track detection results
  output ivrDetected: boolean = false;
  output ivrDetectedHistory: string = "";
  output ivrDetectedReply: string = "";
  
  // Configuration variables
  ivrChildBlock: string = "";
  lastIvrHistory: string = "";
  input exitOnIVR: boolean = true;
  
  // LLM prompt for classification
  amdPrompt: string = `Given the conversation history as {{history}}, focus ONLY on the callee's very first response after the caller's initial statement or question. Categorize this single reply into ONE—and only one—of the following four types:

1. Human – Spoken by a person, showing interactive intent (e.g., questions, answers, small talk, or personalized remarks relevant to the request).
2. IVR – Automated menu options/instructions for input (e.g., "Press 1 for sales. Press 2 for support," or "Say the department you want").
3. AMD – A generic automated or pre-recorded announcement about status or availability (e.g., "office is closed," "no one is available," or "please try again later") that does NOT mention leaving a message.
4. Voicemail – Any message (personal or automated) that mentions, asks, or instructs you to leave a voicemail (e.g., "leave a message after the tone" or "record your message").

Review ONLY the first callee reply, disregarding any conversation that follows. Return EXACTLY one of these four words as your answer: Human, IVR, AMD, or Voicemail.`;
}

// Initialize the async child block
when starting do {
    set $ivrChildBlock = (blockcall ivr_capturer($amdPrompt)).id;
}

// Analyze potential IVR/AMD speech patterns
digression potential_ivr {
  conditions {
    on #getVisitCount("ivr_full_sentence") < 3 tags: onpotential; 
  }
  do {
    var text = #getMessageTextV2().text;
    $.RequestIvrUpdate(text);
    return;
  }
}

// Process complete sentences for IVR/AMD detection
preprocessor digression ivr_full_sentence {
  conditions {
    on #getVisitCount("ivr_full_sentence") < 3;
  }
  do {
    var text = #getMessageTextV2().text;
    $.RequestIvrUpdate(text, true);
    return;
  }
}

// Helper function to request IVR/AMD classification
function RequestIvrUpdate(history: string, force: boolean = false): boolean {
  // Skip duplicate requests
  if (history == $this.lastIvrHistory) {
    return false;
  }
  
  // Optimization: only process substantial utterances unless forced
  if (!force) {
    if (history.length() < 30) {
      // Skip short sentences
      return false;
    }

    if (history.startsWith($this.lastIvrHistory) && history.length() - $this.lastIvrHistory.length() < 10) {
      // Skip minor updates
      return false;
    }
  }

  set $this.lastIvrHistory = history;
  var childId = $this.ivrChildBlock;
  var fullHistory = #getFormattedTranscription({history_length: 4});
  if (!fullHistory.includes(history)) {
    set fullHistory = fullHistory + "\nhuman: " + history;
  }

  var request = {
    history: fullHistory
  };
  #sendMessageToAsyncBlock(childId, "Content", request);
  return true;
}

// Define response message type
type AsyncIvrResponse = {
  messageType: "Content";
  sourceRouteId: string;
  targetRouteId: string;
  content: {
    isIvr: boolean;
    history: string;
    reply: string;
  };
};

// Handle classification results from the async block
when blockMessage handle_ivr_update do {
  if (#isBlockMessage("Content") && (#getAsyncBlockMessage() as { sourceRouteId: string; })?.sourceRouteId == $ivrChildBlock)
  {
    var message = #getAsyncBlockMessage() as AsyncIvrResponse; 
    if (message is null) { 
      return; 
    }

    var content = message.content;
    set $ivrDetected = $ivrDetected || content.isIvr;
    if ($ivrDetected) {
      set $ivrDetectedHistory = content.history;
      set $ivrDetectedReply = content.reply;
      #log("Detected IVR, exiting");
      if ($exitOnIVR) {
        exit;
      }
    }
    return;
  }
}

// Async block for LLM-based classification
async block ivr_capturer(amdPrompt: string) {
  type AsyncIvrRequest = {
    messageType: "Content";
    sourceRouteId: string;
    targetRouteId: string;
    content: {   
      history: string;
    };
  };

  digression @exit_dig {
    conditions {
      on true tags: onclosed;
    }
    do {
      exit;
    }
  }

  start node root {
    do {
      wait *;
    }
  }

  digression exit_when_parent_block_exit {
    conditions {
      on #isBlockMessage("Terminated") tags: onblock;
    }
    do {
      exit;
    }
  }
  
  digression handle_block_content_message {
    conditions {
        on #isBlockMessage("Content") tags: onblock;
    }
    do {
      var message = #getAsyncBlockMessage() as AsyncIvrRequest; 
      if (message is null) { return; }
      var content = message.content;
      var history = content.history;

      // Define JSON Schema for structured output
      var schema = {
        name: "result",
        strict: true,
        description: "The result of the AMD detection",
        schema: {
          @type: "object",
          properties: {
            result: {
              @type: "string",
              description: "The result of the AMD detection",
              enum: ["Human", "IVR", "AMD", "Voicemail"]
            }
          },
          additionalProperties: false,
          required: ["result"]
        }
      }.toString();

      // Call LLM with structured output format
      var ask = #askGPT($amdPrompt, {
        model: "openai/gpt-4.1-nano",
        function_call: "none",
        history_length: 0,
        save_response_in_history: false,
        response_format: schema
      },
      promptName: "ivr_capturer");

      // Parse and process the result
      var response = (ask.responseText.parseJSON() as { result: string; })?.result ?? "Human";
      if (response == "IVR" || response == "AMD" || response == "Voicemail") {
        #sendMessageToAsyncBlock(message.sourceRouteId, "Content", { isIvr: true, history: history, reply: ask.responseText } );
      }
      
      return;
    }
  }
}

Configuration Options

You can customize the behavior by setting these parameters:

exitOnIVR - Set to false if you want to continue the conversation after detection
amdPrompt - Modify the prompt to customize the detection logic

Troubleshooting

If detection is not working as expected:

Check the logs for classification results
Review the ivrDetectedReply variable to see exact LLM responses
Adjust the prompt to better match your use case
Try different LLM models for improved performance