Ransen's Technical Stuff: Neural Networks on the Arduino Part 7 : The complete source code.

// MatrixANNAndTrain, Created by Owen F. Ransen 28th March 2020

#include <MatrixMath.h>

// To decide the type of test one of these should be 1 and the other 0
#define XOR_TEST 0
#define POSITION_TEST 1

#if XOR_TEST+POSITION_TEST != 1
#error: set one or the other to 1 please.
#endif

#if XOR_TEST
#define NUM_INPUTS 2
#define NUM_HIDDEN_NODES 6
#define NUM_OUTPUTS 2
#define NUM_TRAINING_SAMPLES 35000

#elif POSITION_TEST
#define NUM_INPUTS 5
#define NUM_HIDDEN_NODES 4
#define NUM_OUTPUTS 2
#define NUM_TRAINING_SAMPLES 8000
#endif

// These matrices must always exist, hence they are global
mtx_type gInputToHiddenMatrix[NUM_INPUTS][NUM_HIDDEN_NODES];
mtx_type gHiddenToOutputMatrix[NUM_HIDDEN_NODES][NUM_OUTPUTS];

// Useful to have this as a global so that after QueryTheNetwork we know what the
// hidden outputs were
mtx_type gHiddenOutputs[NUM_HIDDEN_NODES];

// This is the learning rate
const double Alpha = 0.1 ;

// Implement y = Sigmoid(x)
double Sigmoid (const double x)
{
    double y = 1.0/(1.0 + exp(-x)) ;
    return y ;
}

// Outer product, from two vectors form a matrix
// C = A*B
// A is a column vector (vertical and has lots of rows)
// B is a row vector (horizontal and has lots of columns)
// C must have space for mRows and nColumns for this to work.
void OuterProduct(mtx_type* A, mtx_type* B, int mRows, int nColumns, mtx_type* C)
{
    int ra, cb;
    for (ra = 0; ra < mRows; ra++) {
        for(cb = 0; cb < nColumns; cb++)
        {
            // C[ra][cb] = C[ra][cb] + (A[ra] * B[cb]);
            C[(nColumns * ra) + cb] = A[ra] * B[cb];
        }
    }
}

// Here we randomly initialise the weights in the neural network matrix
// This happens before training.
void RandomInitWeights (mtx_type* WeightsMatrix,
                        int iNumRows,
                        int iNumColumns)
{
    for (int row = 0; row < iNumRows; row++) {
        int iRowPart = (row*iNumColumns) ;
        for (int col = 0; col < iNumColumns; col++) {
            int iIdx = iRowPart + col ;
            // It has been suggested that a good starting point
            // is to have weights from -0.5 to +0.5
            WeightsMatrix[iIdx] = (random (0,1001)/1000.0) - 0.5 ;
        }
    }
}

// Given the inputs and the weights in the matrices calculate the outputs
void QueryTheNeuralNetwork (mtx_type* InVector, mtx_type* OutVector)
{
    // Use inputs and first matrix to get hidden node values...
    Matrix.Multiply((mtx_type*)InVector,
                    (mtx_type*)gInputToHiddenMatrix,
                    1, // rows in InputVector (a vector, so 1)
                    NUM_INPUTS, // columns in InputVector and rows in gInputToHiddenMatrix
                    NUM_HIDDEN_NODES, // columns in InputToHiddenMatrix
                    (mtx_type*)gHiddenOutputs); // This is the output of Multiply

    // Now we have values in the gHiddenOutputs
    // i.e. we have the summed weights*inputs in the hidden nodes

    // Transform hidden node values using sigmoid...
    // Go from h'1 to h1 in the diagram
    for (int hn = 0 ; hn < NUM_HIDDEN_NODES ; hn++) {
        double OldHiddenNodeValue = gHiddenOutputs[hn] ;
        double NewHiddenNodeValue = Sigmoid (OldHiddenNodeValue) ;
        gHiddenOutputs[hn] = NewHiddenNodeValue ;
    }

    // Do (sigmoided hidden node values) x (second matrix) to get outputs
    Matrix.Multiply((mtx_type*)gHiddenOutputs,
                    (mtx_type*)gHiddenToOutputMatrix,
                    1, // rows in HiddenVector (a row vector, so 1)
                    NUM_HIDDEN_NODES, // columns in gHiddenOutputs and rows in gHiddenToOutputMatrix
                    NUM_OUTPUTS, // columns in InputToHiddenMatrix
                    (mtx_type*)OutVector); // This is the output of this function

    // Transform output node values using sigmoid...
    for (int o = 0 ; o < NUM_OUTPUTS ; o++) {
        const double OldOutputValue = OutVector[o] ;
        const double NewOutputValue = Sigmoid (OldOutputValue) ;
        OutVector[o] = NewOutputValue ;
    }

    // "answer" is now inside OutputVector!
    // and the current hidden node values are in gHiddenNodes
}

// Change the weights matrix so that it produces less errors
void UpdateWeights (int iNumInputNodes, mtx_type* InputValues, // a column vector
                    int iNumOutputNodes, mtx_type* OutputValues, // a row vector
                    mtx_type* ErrorValues, // same size as output values
                    mtx_type* WeightMatrix) // This is an input and output
{
    // This is just to keep sizes of matrices in mind
    int iNumRows = iNumInputNodes ;
    int iNumCols = iNumOutputNodes ;

    // The f "horizontal" row vector is formed from
    // alfa*(error*(output*(1-output)) in each column
    // Initialised from errors and outputs of this layer, and so has the same
    // size as the error vector and output vector
    mtx_type f[iNumOutputNodes] ;

    for (int col = 0 ; col < iNumOutputNodes ; col++) {
        // The outouts have been created using the sigmoid function.
        // The derivative of the sigmnoid is used to modifiy weights.
        // Fortunately, because we have the outputs values, the derivative
        // is easy to calculate...Look up derivative of sigmod on the interweb
        const double SigmoidDerivative = OutputValues[col]*(1.0-OutputValues[col]) ;
        f[col] = Alpha*ErrorValues[col]*SigmoidDerivative ;
    }

    // The "vertical" column vector is the inputs to the current layer

    // Now we can do the outer product to form a matrix from a
    // a column vector multiplied by a row vector...
    // to get a matrix of delta weights
    mtx_type ErrorDeltasMatrix [iNumRows*iNumCols] ;
    OuterProduct((mtx_type*)InputValues, f, iNumRows, iNumCols, (mtx_type*)ErrorDeltasMatrix) ;

    // Now we have the deltas to add to the current matrix
    // We are simply doing OldWeight = OldWeight+DeltaWeight here
    for (int row = 0 ; row < iNumRows ; row++) {
        for (int col = 0 ; col < iNumCols ; col++) {
            int iIndex = (row*iNumCols)+col ;
            WeightMatrix[iIndex] = WeightMatrix[iIndex] + (ErrorDeltasMatrix[iIndex]) ;
        }
    }

}

// Given an input and a target find the error and change the weights to reduce the error
// We call UpdateWeights twice.
// You can find the equivalent in the python by searching for "def train"
void TrainOnOneSample (mtx_type* Ins,      // The input
                       mtx_type* Targets) // The ideal output
{
    // Ask what the answer would be with the current weights...
    mtx_type TestOutputs[NUM_OUTPUTS] ;
    QueryTheNeuralNetwork (Ins, TestOutputs) ; // Sets gHiddenOutputs too

    //# error is target - current_output
    // output_errors = targetscol - final_outputs
    mtx_type OutputErrors [NUM_OUTPUTS] ;
    for (int e = 0 ; e < NUM_OUTPUTS ; e++) {
        OutputErrors[e] = Targets[e] - TestOutputs[e] ;
    }

    // Update the weights of the connections from output to hidden, gHiddenToOutputMatrix
    // The hidden outputs are the inputs into the final layer
    UpdateWeights (NUM_HIDDEN_NODES,gHiddenOutputs, // here are the inputs into final layer
                   NUM_OUTPUTS,TestOutputs, // outputs from final layer
                   (mtx_type*)OutputErrors, // final layer outputs errors, calculated above
                   (mtx_type*)gHiddenToOutputMatrix) ; // connections of final layer to be updated

    // update the weights for the links between the input and hidden layers
    // We need the hidden layer errors for that
    // Hidden layer error is the output errors split by weights recombined at hidden nodes
    // This is done by multiplying the transpose of the output matrix by the output errors
    // In Python: hidden_errors = numpy.dot(self.who.T, output_errors)
    mtx_type HiddenToOutputTranspose [NUM_OUTPUTS][NUM_HIDDEN_NODES] ; // notice reverse sizes
    Matrix.Transpose((mtx_type*)gHiddenToOutputMatrix,    // Original matrix
                     NUM_HIDDEN_NODES,         // Rows in original matrix
                     NUM_OUTPUTS,              // Cols in original matrix
                     (mtx_type*)HiddenToOutputTranspose) ; // Transposed matrix

    // We've just made the transpose, now use it to calculate the hidden errors
    mtx_type HiddenErrors[NUM_HIDDEN_NODES] ;
    Matrix.Multiply ((mtx_type*)OutputErrors,
                     (mtx_type*)HiddenToOutputTranspose,
                     1, // rows in the input
                     NUM_OUTPUTS, // columns in the input, rows in the matrix
                     NUM_HIDDEN_NODES,
                     (mtx_type*)HiddenErrors) ; // Output 1 row and NUM_HIDDEN_NODES columns

    // The hidden outputs are the outputs of the hidden layer
    UpdateWeights (NUM_INPUTS,Ins, // inputs into hidden layer
                   NUM_HIDDEN_NODES,gHiddenOutputs, // outputs from hidden layer
                   HiddenErrors,
                   (mtx_type*)gInputToHiddenMatrix) ; // connections of first layer

#define QUERY_AS_YOU_UPDATE 1

#if QUERY_AS_YOU_UPDATE
    static int iDbgFlag = 0 ;

    if (iDbgFlag == 300) {
        mtx_type TestResultVector[NUM_OUTPUTS] ;
        QueryTheNeuralNetwork ((mtx_type*)Ins, (mtx_type*)TestResultVector) ;

        // Show the (hopefully miniscule) errors
        double Error = abs(Targets[0] - TestResultVector[0]) ;
        char sErr [10] ;
        dtostrf(Error, 6, 3, sErr ); // string will be 6 wide and 3 decimal point

        Serial.print ("Target = ") ; Serial.print (Targets[0]) ;
        Serial.print (", Output = ") ; Serial.print (TestResultVector[0]) ;
        Serial.print (", Error = ") ; Serial.println (sErr) ;
        iDbgFlag=0 ;
    }
    iDbgFlag++;

#endif // QUERY_AS_YOU_UPDATE
}

void setup()
{
    Serial.begin(9600);

    // Setup the weights randomnly for later training...
    RandomInitWeights ((mtx_type*)gInputToHiddenMatrix,
                       NUM_INPUTS,
                       NUM_HIDDEN_NODES) ;
    RandomInitWeights ((mtx_type*)gHiddenToOutputMatrix,
                       NUM_HIDDEN_NODES,
                       NUM_OUTPUTS) ;
}

#if POSITION_TEST
// This creates a test input vector and the ideal output for that input
void CreatePositionTrainingSample (mtx_type* TrainInputVector, mtx_type* TrainTargetVector)
{
    // A check to stop silly errors as you meddle with the code...
    if (NUM_OUTPUTS != 2) {
        Serial.print ("Training sample error, I want 2 outputs!") ;
    }

    // Choose a place from 0 to NUM_INPUTS..
    int iPos = random (0,NUM_INPUTS) ;

    // Make a vector with a spike at the randomly chosen iPos
    for (int i = 0 ; i < NUM_INPUTS ; i++) {
        if (i == iPos) {
            TrainInputVector[i] = 1.0 ;
        } else {
            TrainInputVector[i] = 0.0 ;
        }
    }
    // Now we have an input vector with a single non zero value

    // What is the expected output number?
    // We want one output to be at the "value of the position" so to speak
    double OutputValue1 = (double)iPos/double(NUM_INPUTS-1) ;

    // Just to have more than one output...
    // ...make other output to be at the "opposite of the value of the position"
    double OutputValue2 = 1.0 - OutputValue1 ;

    // This is the idea correct answer...
    TrainTargetVector[0] = OutputValue1 ;
    TrainTargetVector[1] = OutputValue2 ;
}
#endif // POSITION_TEST

#if XOR_TEST

// Create function which will give training samples from this logic table:
// inputs       XOR    XNOR
// 0    0        0      1
// 0    1        1      0
// 1    0        1      0
// 1    1        0      1

void CreateXORTrainingSample (mtx_type* TrainInputVector, mtx_type* TrainTargetVector)
{
    // A check to stop silly errors as you meddle with the code...
    if (NUM_OUTPUTS != 2) {
        Serial.print ("Training sample error, I want 2 outputs!") ;
    }
    if (NUM_INPUTS != 2) {
        Serial.print ("Training sample error, I want 2 inputs!") ;
    }

    // Choose a row in the truth table...
    int iWhichRow = random (0,4) ; // will give me a number from 0 to 3 inclusive

    if (iWhichRow == 0) {
        TrainInputVector[0] = 0 ;
        TrainInputVector[1] = 0 ;
        TrainTargetVector[0] = 0 ;
        TrainTargetVector[1] = 1 ;

    } else if (iWhichRow == 1) {
        TrainInputVector[0] = 1 ;
        TrainInputVector[1] = 0 ;
        TrainTargetVector[0] = 1 ;
        TrainTargetVector[1] = 0 ;

    } else if (iWhichRow == 2) {
        TrainInputVector[0] = 0 ;
        TrainInputVector[1] = 1 ;
        TrainTargetVector[0] = 1 ;
        TrainTargetVector[1] = 0 ;

    } else {
        TrainInputVector[0] = 1 ;
        TrainInputVector[1] = 1 ;
        TrainTargetVector[0] = 0 ;
        TrainTargetVector[1] = 1 ;
    }
}

#endif // XOR_TEST

// Train the neural network on many test cases
void RunMultipleTrain ()
{
    for (long t = 0 ; t < NUM_TRAINING_SAMPLES; t++) {
        // Create a perfect test case, input and required output...
        mtx_type TrainInputVector[NUM_INPUTS] ;
        mtx_type TrainTargetVector[NUM_OUTPUTS] ;

#if XOR_TEST
        CreateXORTrainingSample (TrainInputVector,TrainTargetVector) ;
#elif POSITION_TEST
        CreatePositionTrainingSample (TrainInputVector,TrainTargetVector) ;
#endif

        // This will modify the NN weights to get the outputs closer to the target
        TrainOnOneSample (TrainInputVector,TrainTargetVector) ;
    }
}

void loop()
{
    // Train the neural network...
    RunMultipleTrain () ;

    // Test the trained neural network...
    const int ikNumTests = 10 ;

    for (int t = 0 ; t < ikNumTests ; t++) {
        Serial.println ("") ;
        // Create a perfect test case, input and required output...
        mtx_type TestInputVector[NUM_INPUTS] ;
        mtx_type TestTargetVector[NUM_OUTPUTS] ;

#if XOR_TEST
        CreateXORTrainingSample (TestInputVector,TestTargetVector) ;
#else
        CreatePositionTrainingSample (TestInputVector,TestTargetVector) ;
#endif

        // Ask the NN what it thinks the outputs are...
        mtx_type TestResultVector[NUM_OUTPUTS] ;
        QueryTheNeuralNetwork ((mtx_type*)TestInputVector, (mtx_type*)TestResultVector) ;

        // Show the (hopefully miniscule) errors
        for (int op = 0 ; op < NUM_OUTPUTS ; op++) {
            double Error = TestTargetVector[op] - TestResultVector[op] ;
            Serial.print ("Target = ") ; Serial.print (TestTargetVector[op]) ;
            Serial.print (", Output = ") ; Serial.print (TestResultVector[op]) ;
            Serial.print (", Error = ") ; Serial.println (Error) ;
        }
    }

    while(1);
}

Ransen's Technical Stuff

Saturday, March 28, 2020

Neural Networks on the Arduino Part 7 : The complete source code.

No comments:

Post a Comment