{
  "patent_number": "US 10289962",
  "country": "US",
  "title": "How to Shrink Large AI Models Using Knowledge Distillation",
  "original_title": "Training distilled machine learning models",
  "summary": "A method for teaching small, efficient AI models to mimic the complex decision-making patterns of much larger, more powerful neural networks.",
  "what_it_does": "This patent describes a process called knowledge distillation. First, a large, heavy 'cumbersome' model is trained on a dataset to learn complex patterns. Then, a smaller 'distilled' model is trained, not just to predict the correct answer, but to mimic the probability distribution (the 'soft outputs') of the large model. By using a 'temperature constant' higher than 1 during training, the model is forced to pay attention to the relationships between incorrect answers, which provides more information than a simple right-or-wrong label. This allows the smaller model to achieve performance levels close to the large model while being much faster and lighter for mobile devices.",
  "what_it_does_not_cover": [
    "Does not cover training models from scratch without a pre-existing cumbersome model.",
    "Does not cover hardware-specific optimization techniques like model quantization or pruning.",
    "Does not cover methods where the distilled model is trained using only hard labels (e.g., just the correct class) instead of soft outputs.",
    "Does not cover architectures where the distilled model has more parameters than the cumbersome model."
  ],
  "filed": "2015-06-04",
  "granted": "2019-05-14",
  "expires": null,
  "status": "active",
  "holder": "Google LLC",
  "holder_url": "https://patentbrief.org/company/google-llc",
  "inventors": [
    {
      "name": "Oriol Vinyals",
      "url": "https://patentbrief.org/inventor/oriol-vinyals"
    },
    {
      "name": "Geoffrey E. Hinton",
      "url": "https://patentbrief.org/inventor/geoffrey-e-hinton"
    },
    {
      "name": "Jeffrey A. Dean",
      "url": "https://patentbrief.org/inventor/jeffrey-a-dean"
    }
  ],
  "times_cited": 4,
  "tags": [
    "ai_ml",
    "software",
    "consumer_electronics"
  ],
  "abstract": "Methods, systems, and apparatus, including computer programs encoded on computer storage media, for training a distilled machine learning model. One of the methods includes training a cumbersome machine learning model, wherein the cumbersome machine learning model is configured to receive an input and generate a respective score for each of a plurality of classes; and training a distilled machine learning model on a plurality of training inputs, wherein the distilled machine learning model is also configured to receive inputs and generate scores for the plurality of classes, comprising: processing each training input using the cumbersome machine learning model to generate a cumbersome target soft output for the training input; and training the distilled machine learning model to, for each of the training inputs, generate a soft output that matches the cumbersome target soft output for the training input.",
  "url": "https://patentbrief.org/patent/us/10289962/deep-q-networks-dqn",
  "markdown_url": "https://patentbrief.org/patent/us/10289962/deep-q-networks-dqn/md",
  "google_patents_url": "https://patents.google.com/patent/US10289962",
  "relatedPatents": []
}