{
  "patent_number": "US 6424946",
  "country": "US",
  "title": "How Computers Automatically Label Different Speakers in Audio Recordings",
  "original_title": "Methods and apparatus for unknown speaker labeling using concurrent speech recognition, segmentation, classification and clustering",
  "summary": "A method for identifying and labeling speakers in audio recordings, even if the system has never heard the person speak before, by grouping similar voices and asking a user to name them.",
  "what_it_does": "This system processes audio to identify who is speaking by breaking the audio into segments and comparing them against a database. It uses 'background models' for people not yet in the system, such as a generic 'unenrolled male' or 'unenrolled female' profile. When the system detects a recurring voice that it doesn't recognize, it groups those segments together into a cluster. A user can then provide a name for that cluster, and the system automatically updates its database to recognize that person in future recordings.",
  "what_it_does_not_cover": [
    "Does not cover real-time voice synthesis or voice modification.",
    "Does not cover hardware-based microphones or physical audio capture devices.",
    "Does not cover methods that require every speaker to be pre-enrolled before identification can begin.",
    "Does not cover the specific linguistic analysis of the words being spoken, only the identification of the speaker."
  ],
  "filed": "1999-11-05",
  "granted": "2002-07-23",
  "expires": "2019-11-05",
  "status": "expired",
  "holder": "International Business Machines Corp",
  "holder_url": "https://patentbrief.org/company/international-business-machines-corp",
  "inventors": [
    {
      "name": "Alain Charles Louis Tritschler",
      "url": "https://patentbrief.org/inventor/alain-charles-louis-tritschler"
    },
    {
      "name": "Mahesh Viswanathan",
      "url": "https://patentbrief.org/inventor/mahesh-viswanathan"
    }
  ],
  "times_cited": 75,
  "tags": [
    "software",
    "ai_ml",
    "telecommunications"
  ],
  "abstract": "A method and apparatus are disclosed for identifying speakers participating in an audio-video source, whether or not such speakers have been previously registered or enrolled. The speaker identification system uses an enrolled speaker database that includes background models for unenrolled speakers, such as “unenrolled male” or “unenrolled female,” to assign a speaker label to each identified segment. Speaker labels are identified for each speech segment by comparing the segment utterances to the enrolled speaker database and finding the “closest” speaker, if any. A speech segment having an unknown speaker is initially assigned a general speaker label from the set of background models. The “unenrolled” segment is assigned a segment number and receives a cluster identifier assigned by the clustering system. If a given segment is assigned a temporary speaker label associated with an unenrolled speaker, the user can be prompted by the present invention to identify the speaker. Once the user assigns a speaker label to an audio segment having an unknown speaker, the same speaker name can be automatically assigned to any segments that are assigned to the same cluster and the enrolled speaker database can be automatically updated to enroll the previously unknown speaker.",
  "url": "https://patentbrief.org/patent/us/6424946/amazon-personalized-recommendations",
  "markdown_url": "https://patentbrief.org/patent/us/6424946/amazon-personalized-recommendations/md",
  "google_patents_url": "https://patents.google.com/patent/US6424946",
  "relatedPatents": [
    {
      "patentNumber": "8168877",
      "countryCode": "US",
      "title": "How to Automatically Generate Musical Harmonies from Audio",
      "url": "https://patentbrief.org/patent/us/8168877/musical-harmony-generation-from-polyphonic-audio-signals"
    },
    {
      "patentNumber": "10452978",
      "countryCode": "US",
      "title": "How AI Models Understand Language Using 'Attention'",
      "url": "https://patentbrief.org/patent/us/10452978/transformer-attention-mechanism"
    },
    {
      "patentNumber": "6732183",
      "countryCode": "US",
      "title": "How to Seamlessly Switch Video Streams for Many Viewers",
      "url": "https://patentbrief.org/patent/us/6732183/video-and-audio-streaming-for-multiple-users"
    },
    {
      "patentNumber": "9965705",
      "countryCode": "US",
      "title": "How AI Uses Question-Guided Attention to Answer Questions About Images",
      "url": "https://patentbrief.org/patent/us/9965705/systems-and-methods-for-attention-based-configurable-convolutional-neural-networks-abc-cnn-for-visual-question-answering"
    },
    {
      "patentNumber": "9361579",
      "countryCode": "US",
      "title": "How Computers Calculate Probabilities in Large Knowledge Bases",
      "url": "https://patentbrief.org/patent/us/9361579/large-scale-probabilistic-ontology-reasoning"
    }
  ]
}