The three-sigma rule of thumb defines a conventional heuristic that "nearly all" values are taken to lie within three standard deviations of the mean. It is a simple yet effective algorithm to determine if a value in a sequence is an outlier or not and can be used for a variety of data processing approaches in machine learning. Read more...
AnomalyDetector = function( accuracy )
{
var _accuracy = accuracy || 0.1;
var _distributions = {};
/**
* Calculating expected value (E) of a random variable
*/
function computeExpectedValue( sequence, pow )
{
var sum = 0;
var n = sequence.length;
// set default value if not set
pow = pow || 1;
// if random variable is empty, return 0
if( n == 0 )
{
return 0.0;
}
for( var i = 0; i < n; i++ )
{
sum += Math.pow( sequence[ i ], pow ) / _accuracy;
}
return ( sum / ( n / _accuracy ) );
}
/**
* Calculating standard deviation (sigma) of a random variable
*/
function computeStandardDeviation( sequence, expected )
{
var ex2 = computeExpectedValue( sequence, 2 );
// calculate expected value fpr the sequence, if not set
expected = expected || computeExpectedValue( sequence );
// return squared root of the variation
return Math.sqrt( ex2 - Math.pow( expected, 2 ) );
}
/**
* Calculating probability distribution parameters for each random variable
*/
this.train = function( sequences, cb )
{
var distributions = {}; // probability distribution parameters array
for( var key in sequences )
{
var sequence = sequences[ key ];
var expected = computeExpectedValue( sequence );
var sigma = computeStandardDeviation( sequence, expected );
// add values to distributions array
distributions[ key ] = { e: expected, sigma: sigma };
}
// overwrite distribution data
for( var i in distributions )
{
_distributions[ i ] = distributions[ i ];
}
if( cb )
{
cb( _distributions );
}
};
/**
* Returns true if value seems to be correct, false if value is an outlier
*/
this.test = function( id, v, cb )
{
var d = _distributions[ id ];
var expected = d.e;
var sigma = d.sigma;
return cb( id, v, Math.abs( expected - v ) <= ( 3 * sigma ), expected, sigma );
};
};