Saturday, March 28, 2020
Neural Networks on the Arduino Part 7 : The complete source code.
// MatrixANNAndTrain, Created by Owen F. Ransen 28th March 2020
#include <MatrixMath.h>
// To decide the type of test one of these should be 1 and the other 0
#define XOR_TEST 0
#define POSITION_TEST 1
#if XOR_TEST+POSITION_TEST != 1
#error: set one or the other to 1 please.
#endif
#if XOR_TEST
#define NUM_INPUTS 2
#define NUM_HIDDEN_NODES 6
#define NUM_OUTPUTS 2
#define NUM_TRAINING_SAMPLES 35000
#elif POSITION_TEST
#define NUM_INPUTS 5
#define NUM_HIDDEN_NODES 4
#define NUM_OUTPUTS 2
#define NUM_TRAINING_SAMPLES 8000
#endif
// These matrices must always exist, hence they are global
mtx_type gInputToHiddenMatrix[NUM_INPUTS][NUM_HIDDEN_NODES];
mtx_type gHiddenToOutputMatrix[NUM_HIDDEN_NODES][NUM_OUTPUTS];
// Useful to have this as a global so that after QueryTheNetwork we know what the
// hidden outputs were
mtx_type gHiddenOutputs[NUM_HIDDEN_NODES];
// This is the learning rate
const double Alpha = 0.1 ;
// Implement y = Sigmoid(x)
double Sigmoid (const double x)
{
double y = 1.0/(1.0 + exp(-x)) ;
return y ;
}
// Outer product, from two vectors form a matrix
// C = A*B
// A is a column vector (vertical and has lots of rows)
// B is a row vector (horizontal and has lots of columns)
// C must have space for mRows and nColumns for this to work.
void OuterProduct(mtx_type* A, mtx_type* B, int mRows, int nColumns, mtx_type* C)
{
int ra, cb;
for (ra = 0; ra < mRows; ra++) {
for(cb = 0; cb < nColumns; cb++)
{
// C[ra][cb] = C[ra][cb] + (A[ra] * B[cb]);
C[(nColumns * ra) + cb] = A[ra] * B[cb];
}
}
}
// Here we randomly initialise the weights in the neural network matrix
// This happens before training.
void RandomInitWeights (mtx_type* WeightsMatrix,
int iNumRows,
int iNumColumns)
{
for (int row = 0; row < iNumRows; row++) {
int iRowPart = (row*iNumColumns) ;
for (int col = 0; col < iNumColumns; col++) {
int iIdx = iRowPart + col ;
// It has been suggested that a good starting point
// is to have weights from -0.5 to +0.5
WeightsMatrix[iIdx] = (random (0,1001)/1000.0) - 0.5 ;
}
}
}
// Given the inputs and the weights in the matrices calculate the outputs
void QueryTheNeuralNetwork (mtx_type* InVector, mtx_type* OutVector)
{
// Use inputs and first matrix to get hidden node values...
Matrix.Multiply((mtx_type*)InVector,
(mtx_type*)gInputToHiddenMatrix,
1, // rows in InputVector (a vector, so 1)
NUM_INPUTS, // columns in InputVector and rows in gInputToHiddenMatrix
NUM_HIDDEN_NODES, // columns in InputToHiddenMatrix
(mtx_type*)gHiddenOutputs); // This is the output of Multiply
// Now we have values in the gHiddenOutputs
// i.e. we have the summed weights*inputs in the hidden nodes
// Transform hidden node values using sigmoid...
// Go from h'1 to h1 in the diagram
for (int hn = 0 ; hn < NUM_HIDDEN_NODES ; hn++) {
double OldHiddenNodeValue = gHiddenOutputs[hn] ;
double NewHiddenNodeValue = Sigmoid (OldHiddenNodeValue) ;
gHiddenOutputs[hn] = NewHiddenNodeValue ;
}
// Do (sigmoided hidden node values) x (second matrix) to get outputs
Matrix.Multiply((mtx_type*)gHiddenOutputs,
(mtx_type*)gHiddenToOutputMatrix,
1, // rows in HiddenVector (a row vector, so 1)
NUM_HIDDEN_NODES, // columns in gHiddenOutputs and rows in gHiddenToOutputMatrix
NUM_OUTPUTS, // columns in InputToHiddenMatrix
(mtx_type*)OutVector); // This is the output of this function
// Transform output node values using sigmoid...
for (int o = 0 ; o < NUM_OUTPUTS ; o++) {
const double OldOutputValue = OutVector[o] ;
const double NewOutputValue = Sigmoid (OldOutputValue) ;
OutVector[o] = NewOutputValue ;
}
// "answer" is now inside OutputVector!
// and the current hidden node values are in gHiddenNodes
}
// Change the weights matrix so that it produces less errors
void UpdateWeights (int iNumInputNodes, mtx_type* InputValues, // a column vector
int iNumOutputNodes, mtx_type* OutputValues, // a row vector
mtx_type* ErrorValues, // same size as output values
mtx_type* WeightMatrix) // This is an input and output
{
// This is just to keep sizes of matrices in mind
int iNumRows = iNumInputNodes ;
int iNumCols = iNumOutputNodes ;
// The f "horizontal" row vector is formed from
// alfa*(error*(output*(1-output)) in each column
// Initialised from errors and outputs of this layer, and so has the same
// size as the error vector and output vector
mtx_type f[iNumOutputNodes] ;
for (int col = 0 ; col < iNumOutputNodes ; col++) {
// The outouts have been created using the sigmoid function.
// The derivative of the sigmnoid is used to modifiy weights.
// Fortunately, because we have the outputs values, the derivative
// is easy to calculate...Look up derivative of sigmod on the interweb
const double SigmoidDerivative = OutputValues[col]*(1.0-OutputValues[col]) ;
f[col] = Alpha*ErrorValues[col]*SigmoidDerivative ;
}
// The "vertical" column vector is the inputs to the current layer
// Now we can do the outer product to form a matrix from a
// a column vector multiplied by a row vector...
// to get a matrix of delta weights
mtx_type ErrorDeltasMatrix [iNumRows*iNumCols] ;
OuterProduct((mtx_type*)InputValues, f, iNumRows, iNumCols, (mtx_type*)ErrorDeltasMatrix) ;
// Now we have the deltas to add to the current matrix
// We are simply doing OldWeight = OldWeight+DeltaWeight here
for (int row = 0 ; row < iNumRows ; row++) {
for (int col = 0 ; col < iNumCols ; col++) {
int iIndex = (row*iNumCols)+col ;
WeightMatrix[iIndex] = WeightMatrix[iIndex] + (ErrorDeltasMatrix[iIndex]) ;
}
}
}
// Given an input and a target find the error and change the weights to reduce the error
// We call UpdateWeights twice.
// You can find the equivalent in the python by searching for "def train"
void TrainOnOneSample (mtx_type* Ins, // The input
mtx_type* Targets) // The ideal output
{
// Ask what the answer would be with the current weights...
mtx_type TestOutputs[NUM_OUTPUTS] ;
QueryTheNeuralNetwork (Ins, TestOutputs) ; // Sets gHiddenOutputs too
//# error is target - current_output
// output_errors = targetscol - final_outputs
mtx_type OutputErrors [NUM_OUTPUTS] ;
for (int e = 0 ; e < NUM_OUTPUTS ; e++) {
OutputErrors[e] = Targets[e] - TestOutputs[e] ;
}
// Update the weights of the connections from output to hidden, gHiddenToOutputMatrix
// The hidden outputs are the inputs into the final layer
UpdateWeights (NUM_HIDDEN_NODES,gHiddenOutputs, // here are the inputs into final layer
NUM_OUTPUTS,TestOutputs, // outputs from final layer
(mtx_type*)OutputErrors, // final layer outputs errors, calculated above
(mtx_type*)gHiddenToOutputMatrix) ; // connections of final layer to be updated
// update the weights for the links between the input and hidden layers
// We need the hidden layer errors for that
// Hidden layer error is the output errors split by weights recombined at hidden nodes
// This is done by multiplying the transpose of the output matrix by the output errors
// In Python: hidden_errors = numpy.dot(self.who.T, output_errors)
mtx_type HiddenToOutputTranspose [NUM_OUTPUTS][NUM_HIDDEN_NODES] ; // notice reverse sizes
Matrix.Transpose((mtx_type*)gHiddenToOutputMatrix, // Original matrix
NUM_HIDDEN_NODES, // Rows in original matrix
NUM_OUTPUTS, // Cols in original matrix
(mtx_type*)HiddenToOutputTranspose) ; // Transposed matrix
// We've just made the transpose, now use it to calculate the hidden errors
mtx_type HiddenErrors[NUM_HIDDEN_NODES] ;
Matrix.Multiply ((mtx_type*)OutputErrors,
(mtx_type*)HiddenToOutputTranspose,
1, // rows in the input
NUM_OUTPUTS, // columns in the input, rows in the matrix
NUM_HIDDEN_NODES,
(mtx_type*)HiddenErrors) ; // Output 1 row and NUM_HIDDEN_NODES columns
// The hidden outputs are the outputs of the hidden layer
UpdateWeights (NUM_INPUTS,Ins, // inputs into hidden layer
NUM_HIDDEN_NODES,gHiddenOutputs, // outputs from hidden layer
HiddenErrors,
(mtx_type*)gInputToHiddenMatrix) ; // connections of first layer
#define QUERY_AS_YOU_UPDATE 1
#if QUERY_AS_YOU_UPDATE
static int iDbgFlag = 0 ;
if (iDbgFlag == 300) {
mtx_type TestResultVector[NUM_OUTPUTS] ;
QueryTheNeuralNetwork ((mtx_type*)Ins, (mtx_type*)TestResultVector) ;
// Show the (hopefully miniscule) errors
double Error = abs(Targets[0] - TestResultVector[0]) ;
char sErr [10] ;
dtostrf(Error, 6, 3, sErr ); // string will be 6 wide and 3 decimal point
Serial.print ("Target = ") ; Serial.print (Targets[0]) ;
Serial.print (", Output = ") ; Serial.print (TestResultVector[0]) ;
Serial.print (", Error = ") ; Serial.println (sErr) ;
iDbgFlag=0 ;
}
iDbgFlag++;
#endif // QUERY_AS_YOU_UPDATE
}
void setup()
{
Serial.begin(9600);
// Setup the weights randomnly for later training...
RandomInitWeights ((mtx_type*)gInputToHiddenMatrix,
NUM_INPUTS,
NUM_HIDDEN_NODES) ;
RandomInitWeights ((mtx_type*)gHiddenToOutputMatrix,
NUM_HIDDEN_NODES,
NUM_OUTPUTS) ;
}
#if POSITION_TEST
// This creates a test input vector and the ideal output for that input
void CreatePositionTrainingSample (mtx_type* TrainInputVector, mtx_type* TrainTargetVector)
{
// A check to stop silly errors as you meddle with the code...
if (NUM_OUTPUTS != 2) {
Serial.print ("Training sample error, I want 2 outputs!") ;
}
// Choose a place from 0 to NUM_INPUTS..
int iPos = random (0,NUM_INPUTS) ;
// Make a vector with a spike at the randomly chosen iPos
for (int i = 0 ; i < NUM_INPUTS ; i++) {
if (i == iPos) {
TrainInputVector[i] = 1.0 ;
} else {
TrainInputVector[i] = 0.0 ;
}
}
// Now we have an input vector with a single non zero value
// What is the expected output number?
// We want one output to be at the "value of the position" so to speak
double OutputValue1 = (double)iPos/double(NUM_INPUTS-1) ;
// Just to have more than one output...
// ...make other output to be at the "opposite of the value of the position"
double OutputValue2 = 1.0 - OutputValue1 ;
// This is the idea correct answer...
TrainTargetVector[0] = OutputValue1 ;
TrainTargetVector[1] = OutputValue2 ;
}
#endif // POSITION_TEST
#if XOR_TEST
// Create function which will give training samples from this logic table:
// inputs XOR XNOR
// 0 0 0 1
// 0 1 1 0
// 1 0 1 0
// 1 1 0 1
void CreateXORTrainingSample (mtx_type* TrainInputVector, mtx_type* TrainTargetVector)
{
// A check to stop silly errors as you meddle with the code...
if (NUM_OUTPUTS != 2) {
Serial.print ("Training sample error, I want 2 outputs!") ;
}
if (NUM_INPUTS != 2) {
Serial.print ("Training sample error, I want 2 inputs!") ;
}
// Choose a row in the truth table...
int iWhichRow = random (0,4) ; // will give me a number from 0 to 3 inclusive
if (iWhichRow == 0) {
TrainInputVector[0] = 0 ;
TrainInputVector[1] = 0 ;
TrainTargetVector[0] = 0 ;
TrainTargetVector[1] = 1 ;
} else if (iWhichRow == 1) {
TrainInputVector[0] = 1 ;
TrainInputVector[1] = 0 ;
TrainTargetVector[0] = 1 ;
TrainTargetVector[1] = 0 ;
} else if (iWhichRow == 2) {
TrainInputVector[0] = 0 ;
TrainInputVector[1] = 1 ;
TrainTargetVector[0] = 1 ;
TrainTargetVector[1] = 0 ;
} else {
TrainInputVector[0] = 1 ;
TrainInputVector[1] = 1 ;
TrainTargetVector[0] = 0 ;
TrainTargetVector[1] = 1 ;
}
}
#endif // XOR_TEST
// Train the neural network on many test cases
void RunMultipleTrain ()
{
for (long t = 0 ; t < NUM_TRAINING_SAMPLES; t++) {
// Create a perfect test case, input and required output...
mtx_type TrainInputVector[NUM_INPUTS] ;
mtx_type TrainTargetVector[NUM_OUTPUTS] ;
#if XOR_TEST
CreateXORTrainingSample (TrainInputVector,TrainTargetVector) ;
#elif POSITION_TEST
CreatePositionTrainingSample (TrainInputVector,TrainTargetVector) ;
#endif
// This will modify the NN weights to get the outputs closer to the target
TrainOnOneSample (TrainInputVector,TrainTargetVector) ;
}
}
void loop()
{
// Train the neural network...
RunMultipleTrain () ;
// Test the trained neural network...
const int ikNumTests = 10 ;
for (int t = 0 ; t < ikNumTests ; t++) {
Serial.println ("") ;
// Create a perfect test case, input and required output...
mtx_type TestInputVector[NUM_INPUTS] ;
mtx_type TestTargetVector[NUM_OUTPUTS] ;
#if XOR_TEST
CreateXORTrainingSample (TestInputVector,TestTargetVector) ;
#else
CreatePositionTrainingSample (TestInputVector,TestTargetVector) ;
#endif
// Ask the NN what it thinks the outputs are...
mtx_type TestResultVector[NUM_OUTPUTS] ;
QueryTheNeuralNetwork ((mtx_type*)TestInputVector, (mtx_type*)TestResultVector) ;
// Show the (hopefully miniscule) errors
for (int op = 0 ; op < NUM_OUTPUTS ; op++) {
double Error = TestTargetVector[op] - TestResultVector[op] ;
Serial.print ("Target = ") ; Serial.print (TestTargetVector[op]) ;
Serial.print (", Output = ") ; Serial.print (TestResultVector[op]) ;
Serial.print (", Error = ") ; Serial.println (Error) ;
}
}
while(1);
}
Subscribe to:
Post Comments (Atom)
No comments:
Post a Comment