Anomaly Detection

The three-sigma rule of thumb defines a conventional heuristic that "nearly all" values are taken to lie within three standard deviations of the mean. It is a simple yet effective algorithm to determine if a value in a sequence is an outlier or not and can be used for a variety of data processing approaches in machine learning. Read more...


AnomalyDetector = function( accuracy )
{
    var _accuracy = accuracy || 0.1;
    var _distributions = {};

    /**
     * Calculating expected value (E) of a random variable
     */
    function computeExpectedValue( sequence, pow )
    {
        var sum = 0;
        var n = sequence.length;

        // set default value if not set
        pow = pow || 1;

        // if random variable is empty, return 0
        if( n == 0 )
        {
            return 0.0;
        }

        for( var i = 0; i < n; i++ )
        {
            sum += Math.pow( sequence[ i ], pow ) / _accuracy;
        }

        return ( sum / ( n / _accuracy ) );
    }

    /**
     * Calculating standard deviation (sigma) of a random variable
     */
    function computeStandardDeviation( sequence, expected )
    {
        var ex2 = computeExpectedValue( sequence, 2 );

        // calculate expected value fpr the sequence, if not set
        expected = expected || computeExpectedValue( sequence );

        // return squared root of the variation
        return Math.sqrt( ex2 - Math.pow( expected, 2 ) );
    }

    /**
     * Calculating probability distribution parameters for each random variable
     */
    this.train = function( sequences, cb )
    {
        var distributions = {}; // probability distribution parameters array

        for( var key in sequences )
        {
            var sequence = sequences[ key ];
            var expected = computeExpectedValue( sequence );
            var sigma = computeStandardDeviation( sequence, expected );

            // add values to distributions array
            distributions[ key ] = { e: expected, sigma: sigma };
        }

        // overwrite distribution data
        for( var i in distributions )
        {
            _distributions[ i ] = distributions[ i ];
        }

        if( cb )
        {
            cb( _distributions );
        }
    };

    /**
     * Returns true if value seems to be correct, false if value is an outlier
     */
    this.test = function( id, v, cb )
    {
        var d = _distributions[ id ];

        var expected = d.e;
        var sigma = d.sigma;

        return cb( id, v, Math.abs( expected - v ) <= ( 3 * sigma ), expected, sigma );
    };
};