{
  "patent_number": "US 10373050",
  "country": "US",
  "title": "How to Make AI Run Faster on Smaller Computer Chips",
  "original_title": "Fixed point neural network based on floating point neural network quantization",
  "summary": "A method to shrink complex AI models by converting their high-precision math into simpler, faster formats that run efficiently on mobile devices.",
  "what_it_does": "This patent describes a way to compress artificial intelligence models, specifically neural networks, so they can run on hardware with limited power, like smartphones. It works by converting 'floating point' numbers—which are very precise but computationally expensive—into 'fixed point' numbers, which are much simpler for a processor to handle. The core mechanism involves analyzing the statistical 'moments' (like the average or spread) of the data flowing through the network. By shifting these values to create a 'zero-mean distribution' and adjusting the math functions accordingly, the system ensures the model stays accurate even after it has been simplified.",
  "what_it_does_not_cover": [
    "Does not cover general neural network training methods that do not involve quantization.",
    "Does not cover hardware-specific circuit designs for performing multiplication or addition.",
    "Does not cover techniques that use retraining or fine-tuning to recover accuracy after quantization.",
    "Does not cover non-neural network machine learning models like support vector machines."
  ],
  "filed": "2015-10-22",
  "granted": "2019-08-06",
  "expires": null,
  "status": "active",
  "holder": "Qualcomm Inc",
  "holder_url": "https://patentbrief.org/company/qualcomm-inc",
  "inventors": [
    {
      "name": "Somdeb Majumdar",
      "url": "https://patentbrief.org/inventor/somdeb-majumdar"
    },
    {
      "name": "David Edward HOWARD",
      "url": "https://patentbrief.org/inventor/david-edward-howard"
    },
    {
      "name": "David Jonathan Julian",
      "url": "https://patentbrief.org/inventor/david-jonathan-julian"
    },
    {
      "name": "Venkata Sreekanta Reddy ANNAPUREDDY",
      "url": "https://patentbrief.org/inventor/venkata-sreekanta-reddy-annapureddy"
    },
    {
      "name": "Dexu Lin",
      "url": "https://patentbrief.org/inventor/dexu-lin"
    },
    {
      "name": "II William Richard BELL",
      "url": "https://patentbrief.org/inventor/ii-william-richard-bell"
    }
  ],
  "times_cited": 17,
  "tags": [
    "semiconductors",
    "ai_ml",
    "consumer_electronics",
    "telecommunications"
  ],
  "abstract": "A method of quantizing a floating point machine learning network to obtain a fixed point machine learning network using a quantizer may include selecting at least one moment of an input distribution of the floating point machine learning network. The method may also include determining quantizer parameters for quantizing values of the floating point machine learning network based at least in part on the at least one selected moment of the input distribution of the floating point machine learning network to obtain corresponding values of the fixed point machine learning network.",
  "url": "https://patentbrief.org/patent/us/10373050/tensor-processing-unit-tpu",
  "markdown_url": "https://patentbrief.org/patent/us/10373050/tensor-processing-unit-tpu/md",
  "google_patents_url": "https://patents.google.com/patent/US10373050",
  "relatedPatents": []
}