I am trying to classify objects I receive from clients.
On the server side, I have defined my "blueprints" like so:
{ // "type1"
type: 1,
name: String,
password: String
}
{ // "type2"
type: 2,
user_id: Number,
action: String
}
{ // "type3", and yes, this says type: 2....
type: 2,
object_id: Number,
action: String
}
Based on what the client sends I want to classify them like this:
{ type: 1, name: 'user', password: 'pass' } // -> type1
{ type: 1, name: 'user', password: 'pass', remember_me: true } // -> type1
{ type: 2, name: 'user', password: 'pass' } // -> N/A
{ type: 2, user_id: 5, action: 'hello' } // -> type2
{ type: 2, object_id: 5, action: 'hello' } // -> type3
The identification would need to be based off of the key names, the data-types of values, and the actual value of the values. There will be thousands of objects sent per second, and there may be thousands of blueprints. Therefore, it would be nice if it could be done in < O(n) where n is the number of blueprints.
I am writing this from scratch so the blue prints and metadata can be stored in any datastructures needed.
Thanks for the help. I look forward to hearing ideas on this.
Random thought on an approach that might reduce complexity:
The real limiting factor here is going to be how well you can reduce the set of types. One of the most obvious approaches would be to do something based on only the keys of the object. The problem with having extra keys in the data is that we can't rely on just Object.keys( data ).sort().join(","), we must also try every combination of keys we DO have.
// Assuming the "types" list is called "types":
// using underscore.js api
var _ = require('underscore');
var keyMap = _.chain( types ).map(function( typeDef, typeIndex ) {
// get an index with the definition, in case its
return { index: typeIndex, def: typeDef };
}).groupBy(function( data ) {
return _.keys( data.def ).sort().join(",");
}).value();
// empty map needed
keyMap[""] = [];
// assumes sorted key list
function getPossibleMaps( keys ) {
// if we have a map for this, use it
if ( keyMap[ keys.join(",") ] ) {
return keyMap[ keys.join(",") ];
} else {
// create a map of possible types by removing every key from the list of keys
// and then looking for maps that match, cache our result
return keyMap[ keys.join(",") ] = recursiveMapTest( keys );
}
}
function recursiveMapTest( keys ) {
return _.chain( keys )
.map(function( key ) {
return getPossibleMaps( _.without( keys, key ) );
}).flatten().value();
}
// we must also include "lesser" definitions for each of the key lists we found:
_.each( keyMap, function( results, index ) {
var keys = index.split(",");
keyMap[index] = results.concat( recursiveMapTest( keys ) );
});
function getType( data ) {
function checkType( typeData ) {
var def = typeData.def;
return _.every(typeData.def, function( value, key ) {
// these checks are probably not quite right
if ( value === null ) {
return true;
} else if ( value === Number ) {
return typeof data[key] === "number" || data instanceof Number;
} else if ( value === String ) {
return typeof data[key] === "string" || data instanceof String;
} else {
return data[ key ] === value;
}
});
}
var match = _.find( getPossibleMaps( _.keys( data ).sort() ), checkType );
return match && match.index;
}
// Retrieve
var clientTypes = [
{ type: 1, name: 'user', password: 'pass' },
{ type: 2, name: 'user', password: 'pass' },
{ type: 2, user_id: 5, action: 'hello' },
{ type: 2, object_id: 5, action: 'hello' },
{ type: 1, name: 'user', password: 'pass', remember_me: true }
];
console.log('Client types:');
for (var i = 0; i < clientTypes.length; i++) {
var type = clientTypes[i];
// The type object from the map
console.log("getType", type, getType(type));
}
Granted, this just means that the more possible incoming key lists, the more memory you consume storing the "quick" lookup tables.
Also, If everything has an numeric type you can obviously use that to speedup a huge chunk of the possible "object types" within that subtype.
I think your best bet would be to avoid needing to do any of this in the first place. Pass better type hints with your objects.
The blueprints will either be sent in an object or an array. If you can send them as an object, use the type IDs as keys and the values as the type object. When determining the type, access the key of that type in O(1) time.
Even if you receive the types as an array, an O(n) pass will allow you to store them in an object internally and use it as a hash table to retrieve the required type information at runtime.
If you can't rely on the type itself as a key, generate a unique key for each type and use this same function for retrieval.
var types = [{ // Will refer to this JSON object as type1
type: 1,
name: String,
password: String
},
{ // type2
type: 2,
user_id: Number,
action: String
},
{ // type3
type: 2,
object_id: Number,
action: String
}];
console.log(types);
// Prepare map
var typeMap = {};
for (var i = 0; i < types.length; i++) {
var type = types[i];
typeMap[typeKey(type)] = type;
}
console.log(typeMap);
function typeKey(type) {
var key = '';
for (var i in type) {
if (i == 'type') {
key += type[i].toString() + ':';
}
key += ':' + i;
}
return key;
}
function getType(type) {
return typeMap[typeKey(type)];
}
// Retrieve
var clientTypes = [
{ type: 1, name: 'user', password: 'pass' },
{ type: 2, name: 'user', password: 'pass' },
{ type: 2, user_id: 5, action: 'hello' },
{ type: 2, object_id: 5, action: 'hello' }
];
console.log('Client types:');
for (var i = 0; i < clientTypes.length; i++) {
var type = clientTypes[i];
// The type object from the map
console.log(getType(type));
}
If a type isn't found for the "client" type, then undefined is retured from getType.
Output:
Client types:
Object {type: 1, name: function, password: function}
undefined
Object {type: 2, user_id: function, action: function}
Object {type: 2, object_id: function, action: function}
You can do it like this
var obj1={ type: 1, name: 'user', password: 'pass' };
var obj2={ type: 2, name: 'user', password: 'pass' };
//match JSON keys
var keys1 = Object.keys(obj1);
var keys2 = Object.keys(obj2);
if (JSON.stringify(keys1) === JSON.stringify(keys2))
console.log("matched all keys");
//match JSON value datatypes
for (var key in obj1) {
if (typeof(obj1[key]) == typeof(obj2[key]))
console.log(key +' data type matched');
}
//match 'type' field
if (obj1.type == obj2.type)
console.log("woooo total match");
Here is the time complexity :
So total is O(n) iff JSON is ordered other wise sorting will take additional time.