{
  "patent_number": "US 9607103",
  "country": "US",
  "title": "How Computers Match and Join Messy Data from Different Sources",
  "original_title": "Fuzzy data operations",
  "summary": "A method for merging datasets by identifying related but non-identical items using flexible matching rules rather than strict equality.",
  "what_it_does": "This patent describes a way for computer systems to combine data from two different sources even when the information doesn't match perfectly. Instead of looking for identical values, the system uses a 'variant relation' to determine if two objects are close enough to be considered a match, such as checking if the mathematical distance between two values falls below a specific threshold. Once these matches are identified, the system evaluates the surrounding data and joins the records together to create a new, combined dataset. For example, it could link 'John Smith' in one database with 'J. Smith' in another by recognizing they are variants of the same person based on defined similarity rules.",
  "what_it_does_not_cover": [
    "Does not cover simple database joins that rely on exact matches (e.g., matching primary keys that are identical).",
    "Does not cover human-manual data entry or manual reconciliation processes.",
    "Does not cover matching methods that are strictly limited to equivalence relations (where A must equal B).",
    "Does not cover the storage hardware itself, only the logical method of processing and joining the data."
  ],
  "filed": "2013-01-23",
  "granted": "2017-03-28",
  "expires": null,
  "status": "active",
  "holder": "Ab Initio Technology LLC",
  "holder_url": "https://patentbrief.org/company/ab-initio-technology-llc",
  "inventors": [
    {
      "name": "Arlen Anderson",
      "url": "https://patentbrief.org/inventor/arlen-anderson"
    }
  ],
  "times_cited": 15,
  "tags": [
    "software",
    "ai_ml",
    "finance",
    "ecommerce"
  ],
  "abstract": "A method for clustering data elements stored in a data storage system includes reading data elements from the data storage system. Clusters of data elements are formed with each data element being a member of at least one cluster. At least one data element is associated with two or more clusters. Membership of the data element belonging to respective ones of the two or more clusters is represented by a measure of ambiguity. Information is stored in the data storage system to represent the formed clusters.",
  "url": "https://patentbrief.org/patent/us/9607103/amazon-athena",
  "markdown_url": "https://patentbrief.org/patent/us/9607103/amazon-athena/md",
  "google_patents_url": "https://patents.google.com/patent/US9607103",
  "relatedPatents": []
}