c++重写卷积网络的前向计算过程,复现theano的测试结果
c++重写卷积网络的前向计算过程,复现theano的测试结果
本人的需求是:
通过theano的cnn训练神经网络,将最终稳定的网络权值保存下来。c++实现cnn的前向计算过程,读取theano的权值,复现theano的测试结果
本人最终的成果是:
2、mlp网络的前向与后向计算,也就是可以用来训练样本
需要注意的是:
如果为了复现theano的测试结果,那么隐藏层的;
否则,为了mlp的训练过程,激活函数要选择
简单讲难点有两个:
为了解决上述两点,走了很多的弯路:
为了用c++重现theano的测试结果,必须让c++能够读取theano保存的权值以及测试样本。 思考分析如下: 1.theano的权值是numpy格式,而它直接与c++交互,很困难,numpy的格式不好解析,网上资料很少 2.采用python做中间转换,实现1)的要求。后看theano代码,发现读入python的训练样本,不用转换成numpy数组,用本来python就可以了。但是python经过cPickle的dump文件,加了很多格式,不适合同c++交互。 3.? 4.为了解决3,找了一天,终于找到了numpy数组的tolist接口,可以将numpy数组转换成python的list。 5.现在python和c++都可以用json了。研究jsoncpp库的使用,将python的json文件读取。通过测试发现,库不适合读取大文件,很容易造成内存不足,效率极低,故不可取。 6.用c++写函数,自己解析json文件。并且通过pot文件生成训练与测试样本的时候,也直接用c++来生成,不需要转换成numpy数组的格式。 经过上述分析,解决了难点1。。对于难点2,看一个典型的cnn网络图
难点2的详细描述如下:
def getDataJson(layers): data = [] i = 0 for layer in layers: w, b = layer.params # print '..layer is', i w, b = w.get_value(), b.get_value() wshape = w.shape # print '...the shape of w is', wshape if len(wshape) == 2: w = w.transpose() else: for k in xrange(wshape[0]): for j in xrange(wshape[1]): w[k][j] = numpy.rot90(w[k][j], 2) w = w.reshape((wshape[0], numpy.prod(wshape[1:]))) w = w.tolist() b = b.tolist() data.append([w, b]) i += 1 return data def writefile(data, name = '../../tmp/src/data/theanocnn.json'): print ('writefile is ' + name) f = open(name, "wb") json.dump(data,f) f.close()
theano读权值
def readfile(layers, nkerns, name = '../../tmp/src/data/theanocnn.json'): # Load the dataset print ('readfile is ' + name) f = open(name, 'rb') data = json.load(f) f.close() readwb(data, layers, nkerns) def readwb(data, layers, nkerns): i = 0 kernSize = len(nkerns) inputnum = 1 for layer in layers: w, b = data[i] w = numpy.array(w, dtype='float32') b = numpy.array(b, dtype='float32') # print '..layer is', i # print w.shape if i >= kernSize: w = w.transpose() else: w = w.reshape((nkerns[i], inputnum, 5, 5)) for k in xrange(nkerns[i]): for j in xrange(inputnum): c = w[k][j] w[k][j] = numpy.rot90(c, 2) inputnum = nkerns[i] # print '..readwb ,transpose and rot180' # print w.shape layer.W.set_value(w, borrow=True) layer.b.set_value(b, borrow=True) i += 1
测试样本由生成,核心代码如下:
def mnist2json_small(cnnName = 'mnist_small.json', validNumber = 10): dataset = '../../data/mnist.pkl' print '... loading data', dataset # Load the dataset f = open(dataset, 'rb') train_set, valid_set, test_set = cPickle.load(f) #print test_set f.close() def np2listSmall(train_set, number): trainfile = [] trains, labels = train_set trainfile = [] #如果注释掉下面,将生成number个验证样本 number = len(labels) for one in trains[:number]: one = one.tolist() trainfile.append(one) labelfile = labels[:number].tolist() datafile = [trainfile, labelfile] return datafile smallData = valid_set print len(smallData) valid, validlabel = np2listSmall(smallData, validNumber) datafile = [valid, validlabel] basedir = '../../tmp/src/data/' # basedir = './' json.dump(datafile, open(basedir + cnnName, 'wb'))
本人的需求以及实现时的困难已经基本描述清楚,如果还有别的小问题,我相信大家花点比俺少很多很多的时间就可以解决,下面开始贴代码
如果不想自己建工程,这里有vs2008的c++代码,自己按照theano生成一下权值就可以读入运行了
C++代码
main.cpp
#include <iostream> #include "mlp.h" #include "util.h" #include "testinherit.h" #include "neuralNetwork.h" using namespace std; /************************************************************************/ /* 本程序实现了 1、卷积神经网络的前向计算过程 2、mlp网络的前向与后向计算,也就是可以用来训练样本 需要注意的是: 如果为了复现theano的测试结果,那么隐藏层的激活函数要选用tanh; 否则,为了mlp的训练过程,激活函数要选择sigmoid */ /************************************************************************/ int main() { cout << "****cnn****" << endl; TestCnnTheano(28 * 28, 10); // TestMlpMnist对mlp训练样本进行测试 //TestMlpMnist(28 * 28, 500, 10); return 0; }
neuralNetwork.h
#ifndef NEURALNETWORK_H #define NEURALNETWORK_H #include "mlp.h" #include "cnn.h" #include <vector> using std::vector; /************************************************************************/ /* 这是一个卷积神经网络 */ /************************************************************************/ class NeuralNetWork { public: NeuralNetWork(int iInput, int iOut); ~NeuralNetWork(); void Predict(double** in_data, int n); double CalErrorRate(const vector<double *> &vecvalid, const vector<WORD> &vecValidlabel); void Setwb(vector< vector<double*> > &vvAllw, vector< vector<double> > &vvAllb); void SetTrainNum(int iNum); int Predict(double *pInputData); // void Forward_propagation(double** ppdata, int n); double* Forward_propagation(double *); private: int m_iSampleNum; //样本数量 int m_iInput; //输入维数 int m_iOut; //输出维数 vector<CnnLayer *> vecCnns; Mlp *m_pMlp; }; void TestCnnTheano(const int iInput, const int iOut); #endif
neuralNetwork.cpp
#ifndef NEURALNETWORK_H #define NEURALNETWORK_H #include "mlp.h" #include "cnn.h" #include <vector> using std::vector; /************************************************************************/ /* 这是一个卷积神经网络 */ /************************************************************************/ class NeuralNetWork { public: NeuralNetWork(int iInput, int iOut); ~NeuralNetWork(); void Predict(double** in_data, int n); double CalErrorRate(const vector<double *> &vecvalid, const vector<WORD> &vecValidlabel); void Setwb(vector< vector<double*> > &vvAllw, vector< vector<double> > &vvAllb); void SetTrainNum(int iNum); int Predict(double *pInputData); // void Forward_propagation(double** ppdata, int n); double* Forward_propagation(double *); private: int m_iSampleNum; //样本数量 int m_iInput; //输入维数 int m_iOut; //输出维数 vector<CnnLayer *> vecCnns; Mlp *m_pMlp; }; void TestCnnTheano(const int iInput, const int iOut); #endif
cnn.h
#ifndef CNN_H #define CNN_H #include "featuremap.h" #include "poollayer.h" #include <vector> using std::vector; typedef unsigned short WORD; /** *本卷积模拟theano的测试过程 *当输入层是num个featuremap时,本层卷积层假设有featureNum个featuremap。 *对于本层每个像素点选取,上一层num个featuremap一起组合,并且没有bias *然后本层输出到pooling层,pooling只对poolsize内的像素取最大值,然后加上bias,总共有featuremap个bias值 */ class CnnLayer { public: CnnLayer(int iSampleNum, int iInputImageNumber, int iInputImageWidth, int iFeatureMapNumber, int iKernelWidth, int iPoolWidth); ~CnnLayer(); void Forward_propagation(double *pdInputData); void Back_propagation(double* , double* , double ); void Train(double *x, WORD y, double dLr); int Predict(double *); void Setwb(vector<double*> &vpdw, vector<double> &vdb); void SetInputAllData(double **ppInputAllData, int iInputNum); void SetTrainNum(int iSampleNumber); void PrintOutputData(); double* GetOutputData(); private: int m_iSampleNum; FeatureMap *m_pFeatureMap; PoolLayer *m_pPoolLayer; //反向传播时所需值 double **m_ppdDelta; double *m_pdInputData; double *m_pdOutputData; }; void TestCnn(); #endif // CNN_H
cnn.cpp
#include "cnn.h" #include "util.h" #include <cassert> CnnLayer::CnnLayer(int iSampleNum, int iInputImageNumber, int iInputImageWidth, int iFeatureMapNumber, int iKernelWidth, int iPoolWidth): m_iSampleNum(iSampleNum), m_pdInputData(NULL), m_pdOutputData(NULL) { m_pFeatureMap = new FeatureMap(iInputImageNumber, iInputImageWidth, iFeatureMapNumber, iKernelWidth); int iFeatureMapWidth = iInputImageWidth - iKernelWidth + 1; m_pPoolLayer = new PoolLayer(iFeatureMapNumber, iPoolWidth, iFeatureMapWidth); } CnnLayer::~CnnLayer() { delete m_pFeatureMap; delete m_pPoolLayer; } void CnnLayer::Forward_propagation(double *pdInputData) { m_pFeatureMap->Convolute(pdInputData); m_pPoolLayer->Convolute(m_pFeatureMap->GetFeatureMapValue()); m_pdOutputData = m_pPoolLayer->GetOutputData(); /************************************************************************/ /* 调试卷积过程的各阶段结果,调通后删除 */ /************************************************************************/ /*m_pFeatureMap->PrintOutputData(); m_pPoolLayer->PrintOutputData();*/ } void CnnLayer::SetInputAllData(double **ppInputAllData, int iInputNum) { } double* CnnLayer::GetOutputData() { assert(NULL != m_pdOutputData); return m_pdOutputData; } void CnnLayer::Setwb(vector<double*> &vpdw, vector<double> &vdb) { m_pFeatureMap->SetWeigh(vpdw); m_pPoolLayer->SetBias(vdb); } void CnnLayer::SetTrainNum( int iSampleNumber ) { m_iSampleNum = iSampleNumber; } void CnnLayer::PrintOutputData() { m_pFeatureMap->PrintOutputData(); m_pPoolLayer->PrintOutputData(); } void TestCnn() { const int iFeatureMapNumber = 2, iPoolWidth = 2, iInputImageWidth = 8, iKernelWidth = 3, iInputImageNumber = 2; double *pdImage = new double[iInputImageWidth * iInputImageWidth * iInputImageNumber]; double arrInput[iInputImageNumber][iInputImageWidth * iInputImageWidth]; MakeCnnSample(arrInput, pdImage, iInputImageWidth, iInputImageNumber); double *pdKernel = new double[3 * 3 * iInputImageNumber]; double arrKernel[3 * 3 * iInputImageNumber]; MakeCnnWeigh(pdKernel, iInputImageNumber) ; CnnLayer cnn(3, iInputImageNumber, iInputImageWidth, iFeatureMapNumber, iKernelWidth, iPoolWidth); vector <double*> vecWeigh; vector <double> vecBias; for (int i = 0; i < iFeatureMapNumber; ++i) { vecBias.push_back(1.0); } vecWeigh.push_back(pdKernel); for (int i = 0; i < 3 * 3 * 2; ++i) { arrKernel[i] = i; } vecWeigh.push_back(arrKernel); cnn.Setwb(vecWeigh, vecBias); cnn.Forward_propagation(pdImage); cnn.PrintOutputData(); delete []pdKernel; delete []pdImage; }
featuremap.h
#ifndef FEATUREMAP_H #define FEATUREMAP_H #include <cassert> #include <vector> using std::vector; class FeatureMap { public: FeatureMap(int iInputImageNumber, int iInputImageWidth, int iFeatureMapNumber, int iKernelWidth); ~FeatureMap(); void Forward_propagation(double* ); void Back_propagation(double* , double* , double ); void Convolute(double *pdInputData); int GetFeatureMapSize() { return m_iFeatureMapSize; } int GetFeatureMapWidth() { return m_iFeatureMapWidth; } double* GetFeatureMapValue() { assert(m_pdOutputValue != NULL); return m_pdOutputValue; } void SetWeigh(const vector<double *> &vecWeigh); void PrintOutputData(); double **m_ppdWeigh; double *m_pdBias; private: int m_iInputImageNumber; int m_iInputImageWidth; int m_iInputImageSize; int m_iFeatureMapNumber; int m_iFeatureMapWidth; int m_iFeatureMapSize; int m_iKernelWidth; // double m_dBias; double *m_pdOutputValue; }; #endif // FEATUREMAP_H
featuremap.cpp
#include "featuremap.h" #include "util.h" #include <cassert> FeatureMap::FeatureMap(int iInputImageNumber, int iInputImageWidth, int iFeatureMapNumber, int iKernelWidth): m_iInputImageNumber(iInputImageNumber), m_iInputImageWidth(iInputImageWidth), m_iFeatureMapNumber(iFeatureMapNumber), m_iKernelWidth(iKernelWidth) { m_iFeatureMapWidth = m_iInputImageWidth - m_iKernelWidth + 1; m_iInputImageSize = m_iInputImageWidth * m_iInputImageWidth; m_iFeatureMapSize = m_iFeatureMapWidth * m_iFeatureMapWidth; int iKernelSize; iKernelSize = m_iKernelWidth * m_iKernelWidth; double dbase = 1.0 / m_iInputImageSize; srand((unsigned)time(NULL)); m_ppdWeigh = new double*[m_iFeatureMapNumber]; m_pdBias = new double[m_iFeatureMapNumber]; for (int i = 0; i < m_iFeatureMapNumber; ++i) { m_ppdWeigh[i] = new double[m_iInputImageNumber * iKernelSize]; for (int j = 0; j < m_iInputImageNumber * iKernelSize; ++j) { m_ppdWeigh[i][j] = uniform(-dbase, dbase); } //m_pdBias[i] = uniform(-dbase, dbase); //theano的卷积层貌似没有用到bias,它在pooling层使用 m_pdBias[i] = 0; } m_pdOutputValue = new double[m_iFeatureMapNumber * m_iFeatureMapSize]; // m_dBias = uniform(-dbase, dbase); } FeatureMap::~FeatureMap() { delete []m_pdOutputValue; delete []m_pdBias; for (int i = 0; i < m_iFeatureMapNumber; ++i) { delete []m_ppdWeigh[i]; } delete []m_ppdWeigh; } void FeatureMap::SetWeigh(const vector<double *> &vecWeigh) { assert(vecWeigh.size() == (DWORD)m_iFeatureMapNumber); for (int i = 0; i < m_iFeatureMapNumber; ++i) { delete []m_ppdWeigh[i]; m_ppdWeigh[i] = vecWeigh[i]; } } /* 卷积计算 pdInputData:一维向量,包含若干个输入图像 */ void FeatureMap::Convolute(double *pdInputData) { for (int iMapIndex = 0; iMapIndex < m_iFeatureMapNumber; ++iMapIndex) { double dBias = m_pdBias[iMapIndex]; //每一个featuremap for (int i = 0; i < m_iFeatureMapWidth; ++i) { for (int j = 0; j < m_iFeatureMapWidth; ++j) { double dSum = 0.0; int iInputIndex, iKernelIndex, iInputIndexStart, iKernelStart, iOutIndex; //输出向量的索引计算 iOutIndex = iMapIndex * m_iFeatureMapSize + i * m_iFeatureMapWidth + j; //分别计算每一个输入图像 for (int k = 0; k < m_iInputImageNumber; ++k) { //与kernel对应的输入图像的起始位置 //iInputIndexStart = k * m_iInputImageSize + j * m_iInputImageWidth + i; iInputIndexStart = k * m_iInputImageSize + i * m_iInputImageWidth + j; //kernel的起始位置 iKernelStart = k * m_iKernelWidth * m_iKernelWidth; for (int m = 0; m < m_iKernelWidth; ++m) { for (int n = 0; n < m_iKernelWidth; ++n) { //iKernelIndex = iKernelStart + n * m_iKernelWidth + m; iKernelIndex = iKernelStart + m * m_iKernelWidth + n; //i am not sure, is the expression of below correct? iInputIndex = iInputIndexStart + m * m_iInputImageWidth + n; dSum += pdInputData[iInputIndex] * m_ppdWeigh[iMapIndex][iKernelIndex]; }//end n }//end m }//end k //加上偏置 //dSum += dBias; m_pdOutputValue[iOutIndex] = dSum; }//end j }//end i }//end iMapIndex } void FeatureMap::PrintOutputData() { for (int i = 0; i < m_iFeatureMapNumber; ++i) { cout << "featuremap " << i <<endl; for (int m = 0; m < m_iFeatureMapWidth; ++m) { for (int n = 0; n < m_iFeatureMapWidth; ++n) { cout << m_pdOutputValue[i * m_iFeatureMapSize +m * m_iFeatureMapWidth +n] << ' '; } cout << endl; } cout <<endl; } }
poollayer.h
#ifndef POOLLAYER_H #define POOLLAYER_H #include <vector> using std::vector; class PoolLayer { public: PoolLayer(int iOutImageNumber, int iPoolWidth, int iFeatureMapWidth); ~PoolLayer(); void Convolute(double *pdInputData); void SetBias(const vector<double> &vecBias); double* GetOutputData(); void PrintOutputData(); private: int m_iOutImageNumber; int m_iPoolWidth; int m_iFeatureMapWidth; int m_iPoolSize; int m_iOutImageEdge; int m_iOutImageSize; double *m_pdOutData; double *m_pdBias; }; #endif // POOLLAYER_H
poollayer.cpp
#include "poollayer.h" #include "util.h" #include <cassert> PoolLayer::PoolLayer(int iOutImageNumber, int iPoolWidth, int iFeatureMapWidth): m_iOutImageNumber(iOutImageNumber), m_iPoolWidth(iPoolWidth), m_iFeatureMapWidth(iFeatureMapWidth) { m_iPoolSize = m_iPoolWidth * m_iPoolWidth; m_iOutImageEdge = m_iFeatureMapWidth / m_iPoolWidth; m_iOutImageSize = m_iOutImageEdge * m_iOutImageEdge; m_pdOutData = new double[m_iOutImageNumber * m_iOutImageSize]; m_pdBias = new double[m_iOutImageNumber]; /*for (int i = 0; i < m_iOutImageNumber; ++i) { m_pdBias[i] = 1; }*/ } PoolLayer::~PoolLayer() { delete []m_pdOutData; delete []m_pdBias; } void PoolLayer::Convolute(double *pdInputData) { int iFeatureMapSize = m_iFeatureMapWidth * m_iFeatureMapWidth; for (int iOutImageIndex = 0; iOutImageIndex < m_iOutImageNumber; ++iOutImageIndex) { double dBias = m_pdBias[iOutImageIndex]; for (int i = 0; i < m_iOutImageEdge; ++i) { for (int j = 0; j < m_iOutImageEdge; ++j) { double dValue = 0.0; int iInputIndex, iInputIndexStart, iOutIndex; /************************************************************************/ /* 这里是最大的bug,dMaxPixel初始值设置为0,然后找最大值 ** 问题在于像素值有负数,导致后面一系列计算错误,实在是太难找了 /************************************************************************/ double dMaxPixel = INT_MIN ; iOutIndex = iOutImageIndex * m_iOutImageSize + i * m_iOutImageEdge + j; iInputIndexStart = iOutImageIndex * iFeatureMapSize + (i * m_iFeatureMapWidth + j) * m_iPoolWidth; for (int m = 0; m < m_iPoolWidth; ++m) { for (int n = 0; n < m_iPoolWidth; ++n) { // int iPoolIndex = m * m_iPoolWidth + n; //i am not sure, the expression of below is correct? iInputIndex = iInputIndexStart + m * m_iFeatureMapWidth + n; if (pdInputData[iInputIndex] > dMaxPixel) { dMaxPixel = pdInputData[iInputIndex]; } }//end n }//end m dValue = dMaxPixel + dBias; assert(iOutIndex < m_iOutImageNumber * m_iOutImageSize); //m_pdOutData[iOutIndex] = (dMaxPixel); m_pdOutData[iOutIndex] = mytanh(dValue); }//end j }//end i }//end iOutImageIndex } void PoolLayer::SetBias(const vector<double> &vecBias) { assert(vecBias.size() == (DWORD)m_iOutImageNumber); for (int i = 0; i < m_iOutImageNumber; ++i) { m_pdBias[i] = vecBias[i]; } } double* PoolLayer::GetOutputData() { assert(NULL != m_pdOutData); return m_pdOutData; } void PoolLayer::PrintOutputData() { for (int i = 0; i < m_iOutImageNumber; ++i) { cout << "pool image " << i <<endl; for (int m = 0; m < m_iOutImageEdge; ++m) { for (int n = 0; n < m_iOutImageEdge; ++n) { cout << m_pdOutData[i * m_iOutImageSize + m * m_iOutImageEdge + n] << ' '; } cout << endl; } cout <<endl; } }
mlp.h
#ifndef MLP_H #define MLP_H #include "hiddenLayer.h" #include "logisticRegression.h" class Mlp { public: Mlp(int n, int n_i, int n_o, int nhl, int *hls); ~Mlp(); // void Train(double** in_data, double** in_label, double dLr, int epochs); void Predict(double** in_data, int n); void Train(double *x, WORD y, double dLr); void TrainAllSample(const vector<double*> &vecTrain, const vector<WORD> &vectrainlabel, double dLr); double CalErrorRate(const vector<double *> &vecvalid, const vector<WORD> &vecValidlabel); void Writewb(const char *szName); void Readwb(const char *szName); void Setwb(vector< vector<double*> > &vvAllw, vector< vector<double> > &vvAllb); void SetTrainNum(int iNum); int Predict(double *pInputData); // void Forward_propagation(double** ppdata, int n); double* Forward_propagation(double *); int* GetHiddenSize(); int GetHiddenNumber(); double *GetHiddenOutputData(); HiddenLayer **m_ppHiddenLayer; LogisticRegression *m_pLogisticLayer; private: int m_iSampleNum; //样本数量 int m_iInput; //输入维数 int m_iOut; //输出维数 int m_iHiddenLayerNum; //隐层数目 int* m_piHiddenLayerSize; //中间隐层的大小 e.g. {3,4}表示有两个隐层,第一个有三个节点,第二个有4个节点 }; void mlp(); void TestMlpTheano(const int m_iInput, const int ihidden, const int m_iOut); void TestMlpMnist(const int m_iInput, const int ihidden, const int m_iOut); #endif
mlp.cpp
#include <iostream> #include "mlp.h" #include "util.h" #include <cassert> #include <iomanip> using namespace std; const int m_iSamplenum = 8, innode = 3, outnode = 8; Mlp::Mlp(int n, int n_i, int n_o, int nhl, int *hls) { m_iSampleNum = n; m_iInput = n_i; m_iOut = n_o; m_iHiddenLayerNum = nhl; m_piHiddenLayerSize = hls; //构造网络结构 m_ppHiddenLayer = new HiddenLayer* [m_iHiddenLayerNum]; for(int i = 0; i < m_iHiddenLayerNum; ++i) { if(i == 0) { m_ppHiddenLayer[i] = new HiddenLayer(m_iInput, m_piHiddenLayerSize[i]);//第一个隐层 } else { m_ppHiddenLayer[i] = new HiddenLayer(m_piHiddenLayerSize[i-1], m_piHiddenLayerSize[i]);//其他隐层 } } if (m_iHiddenLayerNum > 0) { m_pLogisticLayer = new LogisticRegression(m_piHiddenLayerSize[m_iHiddenLayerNum - 1], m_iOut, m_iSampleNum);//最后的softmax层 } else { m_pLogisticLayer = new LogisticRegression(m_iInput, m_iOut, m_iSampleNum);//最后的softmax层 } } Mlp::~Mlp() { //二维指针分配的对象不一定是二维数组 for(int i = 0; i < m_iHiddenLayerNum; ++i) delete m_ppHiddenLayer[i]; //删除的时候不能加[] delete[] m_ppHiddenLayer; //log_layer只是一个普通的对象指针,不能作为数组delete delete m_pLogisticLayer;//删除的时候不能加[] } void Mlp::TrainAllSample(const vector<double *> &vecTrain, const vector<WORD> &vectrainlabel, double dLr) { cout << "Mlp::TrainAllSample" << endl; for (int j = 0; j < m_iSampleNum; ++j) { Train(vecTrain[j], vectrainlabel[j], dLr); } } void Mlp::Train(double *pdTrain, WORD usLabel, double dLr) { // cout << "******pdLabel****" << endl; // printArrDouble(ppdinLabel, m_iSampleNum, m_iOut); double *pdLabel = new double[m_iOut]; MakeOneLabel(usLabel, pdLabel, m_iOut); //前向传播阶段 for(int n = 0; n < m_iHiddenLayerNum; ++ n) { if(n == 0) //第一个隐层直接输入数据 { m_ppHiddenLayer[n]->Forward_propagation(pdTrain); } else //其他隐层用前一层的输出作为输入数据 { m_ppHiddenLayer[n]->Forward_propagation(m_ppHiddenLayer[n-1]->m_pdOutdata); } } //softmax层使用最后一个隐层的输出作为输入数据 m_pLogisticLayer->Forward_propagation(m_ppHiddenLayer[m_iHiddenLayerNum-1]->m_pdOutdata); //反向传播阶段 m_pLogisticLayer->Back_propagation(m_ppHiddenLayer[m_iHiddenLayerNum-1]->m_pdOutdata, pdLabel, dLr); for(int n = m_iHiddenLayerNum-1; n >= 1; --n) { if(n == m_iHiddenLayerNum-1) { m_ppHiddenLayer[n]->Back_propagation(m_ppHiddenLayer[n-1]->m_pdOutdata, m_pLogisticLayer->m_pdDelta, m_pLogisticLayer->m_ppdW, m_pLogisticLayer->m_iOut, dLr); } else { double *pdInputData; pdInputData = m_ppHiddenLayer[n-1]->m_pdOutdata; m_ppHiddenLayer[n]->Back_propagation(pdInputData, m_ppHiddenLayer[n+1]->m_pdDelta, m_ppHiddenLayer[n+1]->m_ppdW, m_ppHiddenLayer[n+1]->m_iOut, dLr); } } //这里该怎么写? if (m_iHiddenLayerNum > 1) m_ppHiddenLayer[0]->Back_propagation(pdTrain, m_ppHiddenLayer[1]->m_pdDelta, m_ppHiddenLayer[1]->m_ppdW, m_ppHiddenLayer[1]->m_iOut, dLr); else m_ppHiddenLayer[0]->Back_propagation(pdTrain, m_pLogisticLayer->m_pdDelta, m_pLogisticLayer->m_ppdW, m_pLogisticLayer->m_iOut, dLr); delete []pdLabel; } void Mlp::SetTrainNum(int iNum) { m_iSampleNum = iNum; } double* Mlp::Forward_propagation(double* pData) { double *pdForwardValue = pData; for(int n = 0; n < m_iHiddenLayerNum; ++ n) { if(n == 0) //第一个隐层直接输入数据 { pdForwardValue = m_ppHiddenLayer[n]->Forward_propagation(pData); } else //其他隐层用前一层的输出作为输入数据 { pdForwardValue = m_ppHiddenLayer[n]->Forward_propagation(pdForwardValue); } } return pdForwardValue; //softmax层使用最后一个隐层的输出作为输入数据 // m_pLogisticLayer->Forward_propagation(m_ppHiddenLayer[m_iHiddenLayerNum-1]->m_pdOutdata); // m_pLogisticLayer->Predict(m_ppHiddenLayer[m_iHiddenLayerNum-1]->m_pdOutdata); } int Mlp::Predict(double *pInputData) { Forward_propagation(pInputData); int iResult = m_pLogisticLayer->Predict(m_ppHiddenLayer[m_iHiddenLayerNum-1]->m_pdOutdata); return iResult; } void Mlp::Setwb(vector< vector<double*> > &vvAllw, vector< vector<double> > &vvAllb) { for (int i = 0; i < m_iHiddenLayerNum; ++i) { m_ppHiddenLayer[i]->Setwb(vvAllw[i], vvAllb[i]); } m_pLogisticLayer->Setwb(vvAllw[m_iHiddenLayerNum], vvAllb[m_iHiddenLayerNum]); } void Mlp::Writewb(const char *szName) { for(int i = 0; i < m_iHiddenLayerNum; ++i) { m_ppHiddenLayer[i]->Writewb(szName); } m_pLogisticLayer->Writewb(szName); } double Mlp::CalErrorRate(const vector<double *> &vecvalid, const vector<WORD> &vecValidlabel) { int iErrorNumber = 0, iValidNumber = vecValidlabel.size(); for (int i = 0; i < iValidNumber; ++i) { int iResult = Predict(vecvalid[i]); if (iResult != vecValidlabel[i]) { ++iErrorNumber; } } cout << "the num of error is " << iErrorNumber << endl; double dErrorRate = (double)iErrorNumber / iValidNumber; cout << "the error rate of Train sample by softmax is " << setprecision(10) << dErrorRate * 100 << "%" << endl; return dErrorRate; } void Mlp::Readwb(const char *szName) { long dcurpos = 0, dreadsize = 0; for(int i = 0; i < m_iHiddenLayerNum; ++i) { dreadsize = m_ppHiddenLayer[i]->Readwb(szName, dcurpos); cout << "hiddenlayer " << i + 1 << " read bytes: " << dreadsize << endl; if (-1 != dreadsize) dcurpos += dreadsize; else { cout << "read wb error from HiddenLayer" << endl; return; } } dreadsize = m_pLogisticLayer->Readwb(szName, dcurpos); if (-1 != dreadsize) dcurpos += dreadsize; else { cout << "read wb error from sofmaxLayer" << endl; return; } } int* Mlp::GetHiddenSize() { return m_piHiddenLayerSize; } double* Mlp::GetHiddenOutputData() { assert(m_iHiddenLayerNum > 0); return m_ppHiddenLayer[m_iHiddenLayerNum-1]->m_pdOutdata; } int Mlp::GetHiddenNumber() { return m_iHiddenLayerNum; } //double **makeLabelSample(double **label_x) double **makeLabelSample(double label_x[][outnode]) { double **pplabelSample; pplabelSample = new double*[m_iSamplenum]; for (int i = 0; i < m_iSamplenum; ++i) { pplabelSample[i] = new double[outnode]; } for (int i = 0; i < m_iSamplenum; ++i) { for (int j = 0; j < outnode; ++j) pplabelSample[i][j] = label_x[i][j]; } return pplabelSample; } double **maken_train(double train_x[][innode]) { double **ppn_train; ppn_train = new double*[m_iSamplenum]; for (int i = 0; i < m_iSamplenum; ++i) { ppn_train[i] = new double[innode]; } for (int i = 0; i < m_iSamplenum; ++i) { for (int j = 0; j < innode; ++j) ppn_train[i][j] = train_x[i][j]; } return ppn_train; } void TestMlpMnist(const int m_iInput, const int ihidden, const int m_iOut) { const int ihiddenSize = 1; int phidden[ihiddenSize] = {ihidden}; // construct LogisticRegression Mlp neural(m_iSamplenum, m_iInput, m_iOut, ihiddenSize, phidden); vector<double*> vecTrain, vecvalid; vector<WORD> vecValidlabel, vectrainlabel; LoadTestSampleFromJson(vecvalid, vecValidlabel, "../../data/mnist.json", m_iInput); LoadTestSampleFromJson(vecTrain, vectrainlabel, "../../data/mnisttrain.json", m_iInput); // test int itrainnum = vecTrain.size(); neural.SetTrainNum(itrainnum); const int iepochs = 1; const double dLr = 0.1; neural.CalErrorRate(vecvalid, vecValidlabel); for (int i = 0; i < iepochs; ++i) { neural.TrainAllSample(vecTrain, vectrainlabel, dLr); neural.CalErrorRate(vecvalid, vecValidlabel); } for (vector<double*>::iterator cit = vecTrain.begin(); cit != vecTrain.end(); ++cit) { delete [](*cit); } for (vector<double*>::iterator cit = vecvalid.begin(); cit != vecvalid.end(); ++cit) { delete [](*cit); } } void TestMlpTheano(const int m_iInput, const int ihidden, const int m_iOut) { const int ihiddenSize = 1; int phidden[ihiddenSize] = {ihidden}; // construct LogisticRegression Mlp neural(m_iSamplenum, m_iInput, m_iOut, ihiddenSize, phidden); vector<double*> vecTrain, vecw; vector<double> vecb; vector<WORD> vecLabel; vector< vector<double*> > vvAllw; vector< vector<double> > vvAllb; const char *pcfilename = "../../data/theanomlp.json"; vector<int> vecSecondDimOfWeigh; vecSecondDimOfWeigh.push_back(m_iInput); vecSecondDimOfWeigh.push_back(ihidden); LoadWeighFromJson(vvAllw, vvAllb, pcfilename, vecSecondDimOfWeigh); LoadTestSampleFromJson(vecTrain, vecLabel, "../../data/mnist_validall.json", m_iInput); cout << "loadwb ---------" << endl; int itrainnum = vecTrain.size(); neural.SetTrainNum(itrainnum); neural.Setwb(vvAllw, vvAllb); cout << "Predict------------" << endl; neural.CalErrorRate(vecTrain, vecLabel); for (vector<double*>::iterator cit = vecTrain.begin(); cit != vecTrain.end(); ++cit) { delete [](*cit); } } void mlp() { //输入样本 double X[m_iSamplenum][innode]= { {0,0,0},{0,0,1},{0,1,0},{0,1,1},{1,0,0},{1,0,1},{1,1,0},{1,1,1} }; double Y[m_iSamplenum][outnode]={ {1, 0, 0, 0, 0, 0, 0, 0}, {0, 1, 0, 0, 0, 0, 0, 0}, {0, 0, 1, 0, 0, 0, 0, 0}, {0, 0, 0, 1, 0, 0, 0, 0}, {0, 0, 0, 0, 1, 0, 0, 0}, {0, 0, 0, 0, 0, 1, 0, 0}, {0, 0, 0, 0, 0, 0, 1, 0}, {0, 0, 0, 0, 0, 0, 0, 1}, }; WORD pdLabel[outnode] = {0, 1, 2, 3, 4, 5, 6, 7}; const int ihiddenSize = 2; int phidden[ihiddenSize] = {5, 5}; //printArr(phidden, 1); Mlp neural(m_iSamplenum, innode, outnode, ihiddenSize, phidden); double **train_x, **ppdLabel; train_x = maken_train(X); //printArrDouble(train_x, m_iSamplenum, innode); ppdLabel = makeLabelSample(Y); for (int i = 0; i < 3500; ++i) { for (int j = 0; j < m_iSamplenum; ++j) { neural.Train(train_x[j], pdLabel[j], 0.1); } } cout<<"trainning complete..."<<endl; for (int i = 0; i < m_iSamplenum; ++i) neural.Predict(train_x[i]); //szName存放权值 // const char *szName = "mlp55new.wb"; // neural.Writewb(szName); // Mlp neural2(m_iSamplenum, innode, outnode, ihiddenSize, phidden); // cout<<"Readwb start..."<<endl; // neural2.Readwb(szName); // cout<<"Readwb end..."<<endl; // cout << "----------after readwb________" << endl; // for (int i = 0; i < m_iSamplenum; ++i) // neural2.Forward_propagation(train_x[i]); for (int i = 0; i != m_iSamplenum; ++i) { delete []train_x[i]; delete []ppdLabel[i]; } delete []train_x; delete []ppdLabel; cout<<endl; }
hiddenLayer.h
#ifndef HIDDENLAYER_H #define HIDDENLAYER_H #include "neuralbase.h" class HiddenLayer: public NeuralBase { public: HiddenLayer(int n_i, int n_o); ~HiddenLayer(); double* Forward_propagation(double* input_data); void Back_propagation(double *pdInputData, double *pdNextLayerDelta, double** ppdnextLayerW, int iNextLayerOutNum, double dLr); }; #endif
hiddenLayer.cpp
#include <cmath> #include <cassert> #include <cstdlib> #include <ctime> #include <iostream> #include "hiddenLayer.h" #include "util.h" using namespace std; HiddenLayer::HiddenLayer(int n_i, int n_o): NeuralBase(n_i, n_o, 0) { } HiddenLayer::~HiddenLayer() { } /************************************************************************/ /* 需要注意的是: 如果为了复现theano的测试结果,那么隐藏层的激活函数要选用tanh; 否则,为了mlp的训练过程,激活函数要选择sigmoid */ /************************************************************************/ double* HiddenLayer::Forward_propagation(double* pdInputData) { NeuralBase::Forward_propagation(pdInputData); for(int i = 0; i < m_iOut; ++i) { // m_pdOutdata[i] = sigmoid(m_pdOutdata[i]); m_pdOutdata[i] = mytanh(m_pdOutdata[i]); } return m_pdOutdata; } void HiddenLayer::Back_propagation(double *pdInputData, double *pdNextLayerDelta, double** ppdnextLayerW, int iNextLayerOutNum, double dLr) { /* pdInputData 为输入数据 *pdNextLayerDelta 为下一层的残差值delta,是一个大小为iNextLayerOutNum的数组 **ppdnextLayerW 为此层到下一层的权值 iNextLayerOutNum 实际上就是下一层的n_out dLr 为学习率learning rate m_iSampleNum 为训练样本总数 */ //sigma元素个数应与本层单元个数一致,而网上代码有误 //作者是没有自己测试啊,测试啊 //double* sigma = new double[iNextLayerOutNum]; double* sigma = new double[m_iOut]; //double sigma[10]; for(int i = 0; i < m_iOut; ++i) sigma[i] = 0.0; for(int i = 0; i < iNextLayerOutNum; ++i) { for(int j = 0; j < m_iOut; ++j) { sigma[j] += ppdnextLayerW[i][j] * pdNextLayerDelta[i]; } } //计算得到本层的残差delta for(int i = 0; i < m_iOut; ++i) { m_pdDelta[i] = sigma[i] * m_pdOutdata[i] * (1 - m_pdOutdata[i]); } //调整本层的权值w for(int i = 0; i < m_iOut; ++i) { for(int j = 0; j < m_iInput; ++j) { m_ppdW[i][j] += dLr * m_pdDelta[i] * pdInputData[j]; } m_pdBias[i] += dLr * m_pdDelta[i]; } delete[] sigma; }
logisticRegression.h
#ifndef LOGISTICREGRESSIONLAYER #define LOGISTICREGRESSIONLAYER #include "neuralbase.h" typedef unsigned short WORD; class LogisticRegression: public NeuralBase { public: LogisticRegression(int n_i, int i_o, int); ~LogisticRegression(); double* Forward_propagation(double* input_data); void Softmax(double* x); void Train(double *pdTrain, WORD usLabel, double dLr); void SetOldWb(double ppdWeigh[][3], double arriBias[8]); int Predict(double *); void MakeLabels(int* piMax, double (*pplabels)[8]); }; void Test_lr(); void Testwb(); void Test_theano(const int m_iInput, const int m_iOut); #endif
logisticRegression.cpp
#include <cmath> #include <cassert> #include <iomanip> #include <ctime> #include <iostream> #include "logisticRegression.h" #include "util.h" using namespace std; LogisticRegression::LogisticRegression(int n_i, int n_o, int n_t): NeuralBase(n_i, n_o, n_t) { } LogisticRegression::~LogisticRegression() { } void LogisticRegression::Softmax(double* x) { double _max = 0.0; double _sum = 0.0; for(int i = 0; i < m_iOut; ++i) { if(_max < x[i]) _max = x[i]; } for(int i = 0; i < m_iOut; ++i) { x[i] = exp(x[i]-_max); _sum += x[i]; } for(int i = 0; i < m_iOut; ++i) { x[i] /= _sum; } } double* LogisticRegression::Forward_propagation(double* pdinputdata) { NeuralBase::Forward_propagation(pdinputdata); /************************************************************************/ /* 调试 */ /************************************************************************/ //cout << "Forward_propagation from LogisticRegression" << endl; //PrintOutputData(); //cout << "over\n"; Softmax(m_pdOutdata); return m_pdOutdata; } int LogisticRegression::Predict(double *pdtest) { Forward_propagation(pdtest); /************************************************************************/ /* 调试使用 */ /************************************************************************/ //PrintOutputData(); int iResult = getMaxIndex(m_pdOutdata, m_iOut); return iResult; } void LogisticRegression::Train(double *pdTrain, WORD usLabel, double dLr) { Forward_propagation(pdTrain); double *pdLabel = new double[m_iOut]; MakeOneLabel(usLabel, pdLabel); Back_propagation(pdTrain, pdLabel, dLr); delete []pdLabel; } //double LogisticRegression::CalErrorRate(const vector<double*> &vecvalid, const vector<WORD> &vecValidlabel) //{ // int iErrorNumber = 0, iValidNumber = vecValidlabel.size(); // for (int i = 0; i < iValidNumber; ++i) // { // int iResult = Predict(vecvalid[i]); // if (iResult != vecValidlabel[i]) // { // ++iErrorNumber; // } // } // cout << "the num of error is " << iErrorNumber << endl; // double dErrorRate = (double)iErrorNumber / iValidNumber; // cout << "the error rate of Train sample by softmax is " << setprecision(10) << dErrorRate * 100 << "%" << endl; // return dErrorRate; //} void LogisticRegression::SetOldWb(double ppdWeigh[][3], double arriBias[8]) { for (int i = 0; i < m_iOut; ++i) { for (int j = 0; j < m_iInput; ++j) m_ppdW[i][j] = ppdWeigh[i][j]; m_pdBias[i] = arriBias[i]; } cout << "Setwb----------" << endl; printArrDouble(m_ppdW, m_iOut, m_iInput); printArr(m_pdBias, m_iOut); } //void LogisticRegression::TrainAllSample(const vector<double*> &vecTrain, const vector<WORD> &vectrainlabel, double dLr) //{ // for (int j = 0; j < m_iSamplenum; ++j) // { // Train(vecTrain[j], vectrainlabel[j], dLr); // } //} void LogisticRegression::MakeLabels(int* piMax, double (*pplabels)[8]) { for (int i = 0; i < m_iSamplenum; ++i) { for (int j = 0; j < m_iOut; ++j) pplabels[i][j] = 0; int k = piMax[i]; pplabels[i][k] = 1.0; } } void Test_theano(const int m_iInput, const int m_iOut) { // construct LogisticRegression LogisticRegression classifier(m_iInput, m_iOut, 0); vector<double*> vecTrain, vecvalid, vecw; vector<double> vecb; vector<WORD> vecValidlabel, vectrainlabel; LoadTestSampleFromJson(vecvalid, vecValidlabel, "../.../../data/mnist.json", m_iInput); LoadTestSampleFromJson(vecTrain, vectrainlabel, "../.../../data/mnisttrain.json", m_iInput); // test int itrainnum = vecTrain.size(); classifier.m_iSamplenum = itrainnum; const int iepochs = 5; const double dLr = 0.1; for (int i = 0; i < iepochs; ++i) { classifier.TrainAllSample(vecTrain, vectrainlabel, dLr); if (i % 2 == 0) { cout << "Predict------------" << i + 1 << endl; classifier.CalErrorRate(vecvalid, vecValidlabel); } } for (vector<double*>::iterator cit = vecTrain.begin(); cit != vecTrain.end(); ++cit) { delete [](*cit); } for (vector<double*>::iterator cit = vecvalid.begin(); cit != vecvalid.end(); ++cit) { delete [](*cit); } } void Test_lr() { srand(0); double learning_rate = 0.1; double n_epochs = 200; int test_N = 2; const int trainNum = 8, m_iInput = 3, m_iOut = 8; //int m_iOut = 2; double train_X[trainNum][m_iInput] = { {1, 1, 1}, {1, 1, 0}, {1, 0, 1}, {1, 0, 0}, {0, 1, 1}, {0, 1, 0}, {0, 0, 1}, {0, 0, 0} }; //sziMax存储的是最大值的下标 int sziMax[trainNum]; for (int i = 0; i < trainNum; ++i) sziMax[i] = trainNum - i - 1; // construct LogisticRegression LogisticRegression classifier(m_iInput, m_iOut, trainNum); // Train online for(int epoch=0; epoch<n_epochs; epoch++) { for(int i=0; i<trainNum; i++) { //classifier.trainEfficient(train_X[i], train_Y[i], learning_rate); classifier.Train(train_X[i], sziMax[i], learning_rate); } } const char *pcfile = "test.wb"; classifier.Writewb(pcfile); LogisticRegression logistic(m_iInput, m_iOut, trainNum); logistic.Readwb(pcfile, 0); // test data double test_X[2][m_iOut] = { {1, 0, 1}, {0, 0, 1} }; // test cout << "before Readwb ---------" << endl; for(int i=0; i<test_N; i++) { classifier.Predict(test_X[i]); cout << endl; } cout << "after Readwb ---------" << endl; for(int i=0; i<trainNum; i++) { logistic.Predict(train_X[i]); cout << endl; } cout << "*********\n"; } void Testwb() { // int test_N = 2; const int trainNum = 8, m_iInput = 3, m_iOut = 8; //int m_iOut = 2; double train_X[trainNum][m_iInput] = { {1, 1, 1}, {1, 1, 0}, {1, 0, 1}, {1, 0, 0}, {0, 1, 1}, {0, 1, 0}, {0, 0, 1}, {0, 0, 0} }; double arriBias[m_iOut] = {1, 2, 3, 3, 3, 3, 2, 1}; // construct LogisticRegression LogisticRegression classifier(m_iInput, m_iOut, trainNum); classifier.SetOldWb(train_X, arriBias); const char *pcfile = "test.wb"; classifier.Writewb(pcfile); LogisticRegression logistic(m_iInput, m_iOut, trainNum); logistic.Readwb(pcfile, 0); }
neuralbase.h
#ifndef NEURALBASE_H #define NEURALBASE_H #include <vector> using std::vector; typedef unsigned short WORD; class NeuralBase { public: NeuralBase(int , int , int); virtual ~NeuralBase(); virtual double* Forward_propagation(double* ); virtual void Back_propagation(double* , double* , double ); virtual void Train(double *x, WORD y, double dLr); virtual int Predict(double *); void Callbackwb(); void MakeOneLabel(int iMax, double *pdLabel); void TrainAllSample(const vector<double*> &vecTrain, const vector<WORD> &vectrainlabel, double dLr); double CalErrorRate(const vector<double*> &vecvalid, const vector<WORD> &vecValidlabel); void Printwb(); void Writewb(const char *szName); long Readwb(const char *szName, long); void Setwb(vector<double*> &vpdw, vector<double> &vdb); void PrintOutputData(); int m_iInput; int m_iOut; int m_iSamplenum; double** m_ppdW; double* m_pdBias; //本层前向传播的输出值,也是最终的预测值 double* m_pdOutdata; //反向传播时所需值 double* m_pdDelta; private: void _callbackwb(); }; #endif // NEURALBASE_H
neuralbase.cpp
#include "neuralbase.h" #include <cmath> #include <cassert> #include <ctime> #include <iomanip> #include <iostream> #include "util.h" using namespace std; NeuralBase::NeuralBase(int n_i, int n_o, int n_t):m_iInput(n_i), m_iOut(n_o), m_iSamplenum(n_t) { m_ppdW = new double* [m_iOut]; for(int i = 0; i < m_iOut; ++i) { m_ppdW[i] = new double [m_iInput]; } m_pdBias = new double [m_iOut]; double a = 1.0 / m_iInput; srand((unsigned)time(NULL)); for(int i = 0; i < m_iOut; ++i) { for(int j = 0; j < m_iInput; ++j) m_ppdW[i][j] = uniform(-a, a); m_pdBias[i] = uniform(-a, a); } m_pdDelta = new double [m_iOut]; m_pdOutdata = new double [m_iOut]; } NeuralBase::~NeuralBase() { Callbackwb(); delete[] m_pdOutdata; delete[] m_pdDelta; } void NeuralBase::Callbackwb() { _callbackwb(); } double NeuralBase::CalErrorRate(const vector<double *> &vecvalid, const vector<WORD> &vecValidlabel) { int iErrorNumber = 0, iValidNumber = vecValidlabel.size(); for (int i = 0; i < iValidNumber; ++i) { int iResult = Predict(vecvalid[i]); if (iResult != vecValidlabel[i]) { ++iErrorNumber; } } cout << "the num of error is " << iErrorNumber << endl; double dErrorRate = (double)iErrorNumber / iValidNumber; cout << "the error rate of Train sample by softmax is " << setprecision(10) << dErrorRate * 100 << "%" << endl; return dErrorRate; } int NeuralBase::Predict(double *) { cout << "NeuralBase::Predict(double *)" << endl; return 0; } void NeuralBase::_callbackwb() { for(int i=0; i < m_iOut; i++) delete []m_ppdW[i]; delete[] m_ppdW; delete[] m_pdBias; } void NeuralBase::Printwb() { cout << "'****m_ppdW****\n"; for(int i = 0; i < m_iOut; ++i) { for(int j = 0; j < m_iInput; ++j) cout << m_ppdW[i][j] << ' '; cout << endl; } cout << "'****m_pdBias****\n"; for(int i = 0; i < m_iOut; ++i) { cout << m_pdBias[i] << ' '; } cout << endl; cout << "'****output****\n"; for(int i = 0; i < m_iOut; ++i) { cout << m_pdOutdata[i] << ' '; } cout << endl; } double* NeuralBase::Forward_propagation(double* input_data) { for(int i = 0; i < m_iOut; ++i) { m_pdOutdata[i] = 0.0; for(int j = 0; j < m_iInput; ++j) { m_pdOutdata[i] += m_ppdW[i][j]*input_data[j]; } m_pdOutdata[i] += m_pdBias[i]; } return m_pdOutdata; } void NeuralBase::Back_propagation(double* input_data, double* pdlabel, double dLr) { for(int i = 0; i < m_iOut; ++i) { m_pdDelta[i] = pdlabel[i] - m_pdOutdata[i] ; for(int j = 0; j < m_iInput; ++j) { m_ppdW[i][j] += dLr * m_pdDelta[i] * input_data[j] / m_iSamplenum; } m_pdBias[i] += dLr * m_pdDelta[i] / m_iSamplenum; } } void NeuralBase::MakeOneLabel(int imax, double *pdlabel) { for (int j = 0; j < m_iOut; ++j) pdlabel[j] = 0; pdlabel[imax] = 1.0; } void NeuralBase::Writewb(const char *szName) { savewb(szName, m_ppdW, m_pdBias, m_iOut, m_iInput); } long NeuralBase::Readwb(const char *szName, long dstartpos) { return loadwb(szName, m_ppdW, m_pdBias, m_iOut, m_iInput, dstartpos); } void NeuralBase::Setwb(vector<double*> &vpdw, vector<double> &vdb) { assert(vpdw.size() == (DWORD)m_iOut); for (int i = 0; i < m_iOut; ++i) { delete []m_ppdW[i]; m_ppdW[i] = vpdw[i]; m_pdBias[i] = vdb[i]; } } void NeuralBase::TrainAllSample(const vector<double *> &vecTrain, const vector<WORD> &vectrainlabel, double dLr) { for (int j = 0; j < m_iSamplenum; ++j) { Train(vecTrain[j], vectrainlabel[j], dLr); } } void NeuralBase::Train(double *x, WORD y, double dLr) { (void)x; (void)y; (void)dLr; cout << "NeuralBase::Train(double *x, WORD y, double dLr)" << endl; } void NeuralBase::PrintOutputData() { for (int i = 0; i < m_iOut; ++i) { cout << m_pdOutdata[i] << ' '; } cout << endl; }
util.h
#ifndef UTIL_H #define UTIL_H #include <iostream> #include <cstdio> #include <cstdlib> #include <ctime> #include <vector> using namespace std; typedef unsigned char BYTE; typedef unsigned short WORD; typedef unsigned int DWORD; double sigmoid(double x); double mytanh(double dx); typedef struct stShapeWb { stShapeWb(int w, int h):width(w), height(h){} int width; int height; }ShapeWb_S; void MakeOneLabel(int iMax, double *pdLabel, int m_iOut); double uniform(double _min, double _max); //void printArr(T *parr, int num); //void printArrDouble(double **pparr, int row, int col); void initArr(double *parr, int num); int getMaxIndex(double *pdarr, int num); void Printivec(const vector<int> &ivec); void savewb(const char *szName, double **m_ppdW, double *m_pdBias, int irow, int icol); long loadwb(const char *szName, double **m_ppdW, double *m_pdBias, int irow, int icol, long dstartpos); void TestLoadJson(const char *pcfilename); bool LoadvtFromJson(vector<double*> &vecTrain, vector<WORD> &vecLabel, const char *filename, const int m_iInput); bool LoadwbFromJson(vector<double*> &vecTrain, vector<double> &vecLabel, const char *filename, const int m_iInput); bool LoadTestSampleFromJson(vector<double*> &vecTrain, vector<WORD> &vecLabel, const char *filename, const int m_iInput); bool LoadwbByByte(vector<double*> &vecTrain, vector<double> &vecLabel, const char *filename, const int m_iInput); bool LoadallwbByByte(vector< vector<double*> > &vvAllw, vector< vector<double> > &vvAllb, const char *filename, const int m_iInput, const int ihidden, const int m_iOut); bool LoadWeighFromJson(vector< vector<double*> > &vvAllw, vector< vector<double> > &vvAllb, const char *filename, const vector<int> &vecSecondDimOfWeigh); void MakeCnnSample(double arr[2][64], double *pdImage, int iImageWidth, int iNumOfImage ); void MakeCnnWeigh(double *, int iNumOfKernel); template <typename T> void printArr(T *parr, int num) { cout << "****printArr****" << endl; for (int i = 0; i < num; ++i) cout << parr[i] << ' '; cout << endl; } template <typename T> void printArrDouble(T **pparr, int row, int col) { cout << "****printArrDouble****" << endl; for (int i = 0; i < row; ++i) { for (int j = 0; j < col; ++j) { cout << pparr[i][j] << ' '; } cout << endl; } } #endif
util.cpp
#include "util.h" #include <iostream> #include <ctime> #include <cmath> #include <cassert> #include <fstream> #include <cstring> #include <stack> #include <iomanip> using namespace std; int getMaxIndex(double *pdarr, int num) { double dmax = -1; int iMax = -1; for(int i = 0; i < num; ++i) { if (pdarr[i] > dmax) { dmax = pdarr[i]; iMax = i; } } return iMax; } double sigmoid(double dx) { return 1.0/(1.0+exp(-dx)); } double mytanh(double dx) { double e2x = exp(2 * dx); return (e2x - 1) / (e2x + 1); } double uniform(double _min, double _max) { return rand()/(RAND_MAX + 1.0) * (_max - _min) + _min; } void initArr(double *parr, int num) { for (int i = 0; i < num; ++i) parr[i] = 0.0; } void savewb(const char *szName, double **m_ppdW, double *m_pdBias, int irow, int icol) { FILE *pf; if( (pf = fopen(szName, "ab" )) == NULL ) { printf( "File coulkd not be opened " ); return; } int isizeofelem = sizeof(double); for (int i = 0; i < irow; ++i) { if (fwrite((const void*)m_ppdW[i], isizeofelem, icol, pf) != icol) { fputs ("Writing m_ppdW error",stderr); return; } } if (fwrite((const void*)m_pdBias, isizeofelem, irow, pf) != irow) { fputs ("Writing m_ppdW error",stderr); return; } fclose(pf); } long loadwb(const char *szName, double **m_ppdW, double *m_pdBias, int irow, int icol, long dstartpos) { FILE *pf; long dtotalbyte = 0, dreadsize; if( (pf = fopen(szName, "rb" )) == NULL ) { printf( "File coulkd not be opened " ); return -1; } //让文件指针偏移到正确位置 fseek(pf, dstartpos , SEEK_SET); int isizeofelem = sizeof(double); for (int i = 0; i < irow; ++i) { dreadsize = fread((void*)m_ppdW[i], isizeofelem, icol, pf); if (dreadsize != icol) { fputs ("Reading m_ppdW error",stderr); return -1; } //每次成功读取,都要加到dtotalbyte中,最后返回 dtotalbyte += dreadsize; } dreadsize = fread(m_pdBias, isizeofelem, irow, pf); if (dreadsize != irow) { fputs ("Reading m_pdBias error",stderr); return -1; } dtotalbyte += dreadsize; dtotalbyte *= isizeofelem; fclose(pf); return dtotalbyte; } void Printivec(const vector<int> &ivec) { for (vector<int>::const_iterator it = ivec.begin(); it != ivec.end(); ++it) { cout << *it << ' '; } cout << endl; } void TestLoadJson(const char *pcfilename) { vector<double *> vpdw; vector<double> vdb; vector< vector<double*> > vvAllw; vector< vector<double> > vvAllb; int m_iInput = 28 * 28, ihidden = 500, m_iOut = 10; LoadallwbByByte(vvAllw, vvAllb, pcfilename, m_iInput, ihidden, m_iOut ); } //read vt from mnist, format is [[[], [],..., []],[1, 3, 5,..., 7]] bool LoadvtFromJson(vector<double*> &vecTrain, vector<WORD> &vecLabel, const char *filename, const int m_iInput) { cout << "loadvtFromJson" << endl; const int ciStackSize = 10; const int ciFeaturesize = m_iInput; int arriStack[ciStackSize], iTop = -1; ifstream ifs; ifs.open(filename, ios::in); assert(ifs.is_open()); BYTE ucRead, ucLeftbrace, ucRightbrace, ucComma, ucSpace; ucLeftbrace = '['; ucRightbrace = ']'; ucComma = ','; ucSpace = '0'; ifs >> ucRead; assert(ucRead == ucLeftbrace); //栈中全部存放左括号,用1代表,0说明清除 arriStack[++iTop] = 1; //样本train开始 ifs >> ucRead; assert(ucRead == ucLeftbrace); arriStack[++iTop] = 1;//iTop is 1 int iIndex; bool isdigit = false; double dread, *pdvt; //load vt sample while (iTop > 0) { if (isdigit == false) { ifs >> ucRead; isdigit = true; if (ucRead == ucComma) { //next char is space or leftbrace // ifs >> ucRead; isdigit = false; continue; } if (ucRead == ucSpace) { //if pdvt is null, next char is leftbrace; //else next char is double value if (pdvt == NULL) isdigit = false; continue; } if (ucRead == ucLeftbrace) { pdvt = new double[ciFeaturesize]; memset(pdvt, 0, ciFeaturesize * sizeof(double)); //iIndex数组下标 iIndex = 0; arriStack[++iTop] = 1; continue; } if (ucRead == ucRightbrace) { if (pdvt != NULL) { assert(iIndex == ciFeaturesize); vecTrain.push_back(pdvt); pdvt = NULL; } isdigit = false; arriStack[iTop--] = 0; continue; } } else { ifs >> dread; pdvt[iIndex++] = dread; isdigit = false; } }; //next char is dot ifs >> ucRead; assert(ucRead == ucComma); cout << vecTrain.size() << endl; //读取label WORD usread; isdigit = false; while (iTop > -1 && ifs.eof() == false) { if (isdigit == false) { ifs >> ucRead; isdigit = true; if (ucRead == ucComma) { //next char is space or leftbrace // ifs >> ucRead; // isdigit = false; continue; } if (ucRead == ucSpace) { //if pdvt is null, next char is leftbrace; //else next char is double value if (pdvt == NULL) isdigit = false; continue; } if (ucRead == ucLeftbrace) { arriStack[++iTop] = 1; continue; } //右括号的下一个字符是右括号(最后一个字符) if (ucRead == ucRightbrace) { isdigit = false; arriStack[iTop--] = 0; continue; } } else { ifs >> usread; vecLabel.push_back(usread); isdigit = false; } }; assert(vecLabel.size() == vecTrain.size()); assert(iTop == -1); ifs.close(); return true; } bool testjsonfloat(const char *filename) { vector<double> vecTrain; cout << "testjsondouble" << endl; const int ciStackSize = 10; int arriStack[ciStackSize], iTop = -1; ifstream ifs; ifs.open(filename, ios::in); assert(ifs.is_open()); BYTE ucRead, ucLeftbrace, ucRightbrace, ucComma; ucLeftbrace = '['; ucRightbrace = ']'; ucComma = ','; ifs >> ucRead; assert(ucRead == ucLeftbrace); //栈中全部存放左括号,用1代表,0说明清除 arriStack[++iTop] = 1; //样本train开始 ifs >> ucRead; assert(ucRead == ucLeftbrace); arriStack[++iTop] = 1;//iTop is 1 double fread; bool isdigit = false; while (iTop > -1) { if (isdigit == false) { ifs >> ucRead; isdigit = true; if (ucRead == ucComma) { //next char is space or leftbrace // ifs >> ucRead; isdigit = false; continue; } if (ucRead == ' ') continue; if (ucRead == ucLeftbrace) { arriStack[++iTop] = 1; continue; } if (ucRead == ucRightbrace) { isdigit = false; //右括号的下一个字符是右括号(最后一个字符) arriStack[iTop--] = 0; continue; } } else { ifs >> fread; vecTrain.push_back(fread); isdigit = false; } } ifs.close(); return true; } bool LoadwbFromJson(vector<double*> &vecTrain, vector<double> &vecLabel, const char *filename, const int m_iInput) { cout << "loadvtFromJson" << endl; const int ciStackSize = 10; const int ciFeaturesize = m_iInput; int arriStack[ciStackSize], iTop = -1; ifstream ifs; ifs.open(filename, ios::in); assert(ifs.is_open()); BYTE ucRead, ucLeftbrace, ucRightbrace, ucComma, ucSpace; ucLeftbrace = '['; ucRightbrace = ']'; ucComma = ','; ucSpace = '0'; ifs >> ucRead; assert(ucRead == ucLeftbrace); //栈中全部存放左括号,用1代表,0说明清除 arriStack[++iTop] = 1; //样本train开始 ifs >> ucRead; assert(ucRead == ucLeftbrace); arriStack[++iTop] = 1;//iTop is 1 int iIndex; bool isdigit = false; double dread, *pdvt; //load vt sample while (iTop > 0) { if (isdigit == false) { ifs >> ucRead; isdigit = true; if (ucRead == ucComma) { //next char is space or leftbrace // ifs >> ucRead; isdigit = false; continue; } if (ucRead == ucSpace) { //if pdvt is null, next char is leftbrace; //else next char is double value if (pdvt == NULL) isdigit = false; continue; } if (ucRead == ucLeftbrace) { pdvt = new double[ciFeaturesize]; memset(pdvt, 0, ciFeaturesize * sizeof(double)); //iIndex数组下标 iIndex = 0; arriStack[++iTop] = 1; continue; } if (ucRead == ucRightbrace) { if (pdvt != NULL) { assert(iIndex == ciFeaturesize); vecTrain.push_back(pdvt); pdvt = NULL; } isdigit = false; arriStack[iTop--] = 0; continue; } } else { ifs >> dread; pdvt[iIndex++] = dread; isdigit = false; } }; //next char is dot ifs >> ucRead; assert(ucRead == ucComma); cout << vecTrain.size() << endl; //读取label double usread; isdigit = false; while (iTop > -1 && ifs.eof() == false) { if (isdigit == false) { ifs >> ucRead; isdigit = true; if (ucRead == ucComma) { //next char is space or leftbrace // ifs >> ucRead; // isdigit = false; continue; } if (ucRead == ucSpace) { //if pdvt is null, next char is leftbrace; //else next char is double value if (pdvt == NULL) isdigit = false; continue; } if (ucRead == ucLeftbrace) { arriStack[++iTop] = 1; continue; } //右括号的下一个字符是右括号(最后一个字符) if (ucRead == ucRightbrace) { isdigit = false; arriStack[iTop--] = 0; continue; } } else { ifs >> usread; vecLabel.push_back(usread); isdigit = false; } }; assert(vecLabel.size() == vecTrain.size()); assert(iTop == -1); ifs.close(); return true; } bool vec2double(vector<BYTE> &vecDigit, double &dvalue) { if (vecDigit.empty()) return false; int ivecsize = vecDigit.size(); const int iMaxlen = 50; char szdigit[iMaxlen]; assert(iMaxlen > ivecsize); memset(szdigit, 0, iMaxlen); int i; for (i = 0; i < ivecsize; ++i) szdigit[i] = vecDigit[i]; szdigit[i++] = '\0'; vecDigit.clear(); dvalue = atof(szdigit); return true; } bool vec2short(vector<BYTE> &vecDigit, WORD &usvalue) { if (vecDigit.empty()) return false; int ivecsize = vecDigit.size(); const int iMaxlen = 50; char szdigit[iMaxlen]; assert(iMaxlen > ivecsize); memset(szdigit, 0, iMaxlen); int i; for (i = 0; i < ivecsize; ++i) szdigit[i] = vecDigit[i]; szdigit[i++] = '\0'; vecDigit.clear(); usvalue = atoi(szdigit); return true; } void readDigitFromJson(ifstream &ifs, vector<double*> &vecTrain, vector<WORD> &vecLabel, vector<BYTE> &vecDigit, double *&pdvt, int &iIndex, const int ciFeaturesize, int *arrStack, int &iTop, bool bFirstlist) { BYTE ucRead; WORD usvalue; double dvalue; const BYTE ucLeftbrace = '[', ucRightbrace = ']', ucComma = ',', ucSpace = ' '; ifs.read((char*)(&ucRead), 1); switch (ucRead) { case ucLeftbrace: { if (bFirstlist) { pdvt = new double[ciFeaturesize]; memset(pdvt, 0, ciFeaturesize * sizeof(double)); iIndex = 0; } arrStack[++iTop] = 1; break; } case ucComma: { //next char is space or leftbrace if (bFirstlist) { if (vecDigit.empty() == false) { vec2double(vecDigit, dvalue); pdvt[iIndex++] = dvalue; } } else { if(vec2short(vecDigit, usvalue)) vecLabel.push_back(usvalue); } break; } case ucSpace: break; case ucRightbrace: { if (bFirstlist) { if (pdvt != NULL) { vec2double(vecDigit, dvalue); pdvt[iIndex++] = dvalue; vecTrain.push_back(pdvt); pdvt = NULL; } assert(iIndex == ciFeaturesize); } else { if(vec2short(vecDigit, usvalue)) vecLabel.push_back(usvalue); } arrStack[iTop--] = 0; break; } default: { vecDigit.push_back(ucRead); break; } } } void readDoubleFromJson(ifstream &ifs, vector<double*> &vecTrain, vector<double> &vecLabel, vector<BYTE> &vecDigit, double *&pdvt, int &iIndex, const int ciFeaturesize, int *arrStack, int &iTop, bool bFirstlist) { BYTE ucRead; double dvalue; const BYTE ucLeftbrace = '[', ucRightbrace = ']', ucComma = ',', ucSpace = ' '; ifs.read((char*)(&ucRead), 1); switch (ucRead) { case ucLeftbrace: { if (bFirstlist) { pdvt = new double[ciFeaturesize]; memset(pdvt, 0, ciFeaturesize * sizeof(double)); iIndex = 0; } arrStack[++iTop] = 1; break; } case ucComma: { //next char is space or leftbrace if (bFirstlist) { if (vecDigit.empty() == false) { vec2double(vecDigit, dvalue); pdvt[iIndex++] = dvalue; } } else { if(vec2double(vecDigit, dvalue)) vecLabel.push_back(dvalue); } break; } case ucSpace: break; case ucRightbrace: { if (bFirstlist) { if (pdvt != NULL) { vec2double(vecDigit, dvalue); pdvt[iIndex++] = dvalue; vecTrain.push_back(pdvt); pdvt = NULL; } assert(iIndex == ciFeaturesize); } else { if(vec2double(vecDigit, dvalue)) vecLabel.push_back(dvalue); } arrStack[iTop--] = 0; break; } default: { vecDigit.push_back(ucRead); break; } } } bool LoadallwbByByte(vector< vector<double*> > &vvAllw, vector< vector<double> > &vvAllb, const char *filename, const int m_iInput, const int ihidden, const int m_iOut) { cout << "LoadallwbByByte" << endl; const int szistsize = 10; int ciFeaturesize = m_iInput; const BYTE ucLeftbrace = '[', ucRightbrace = ']', ucComma = ',', ucSpace = ' '; int arrStack[szistsize], iTop = -1, iIndex = 0; ifstream ifs; ifs.open(filename, ios::in | ios::binary); assert(ifs.is_open()); double *pdvt; BYTE ucRead; ifs.read((char*)(&ucRead), 1); assert(ucRead == ucLeftbrace); //栈中全部存放左括号,用1代表,0说明清除 arrStack[++iTop] = 1; ifs.read((char*)(&ucRead), 1); assert(ucRead == ucLeftbrace); arrStack[++iTop] = 1;//iTop is 1 ifs.read((char*)(&ucRead), 1); assert(ucRead == ucLeftbrace); arrStack[++iTop] = 1;//iTop is 2 vector<BYTE> vecDigit; vector<double *> vpdw; vector<double> vdb; while (iTop > 1 && ifs.eof() == false) { readDoubleFromJson(ifs, vpdw, vdb, vecDigit, pdvt, iIndex, m_iInput, arrStack, iTop, true); }; //next char is dot ifs.read((char*)(&ucRead), 1);; assert(ucRead == ucComma); cout << vpdw.size() << endl; //next char is space while (iTop > 0 && ifs.eof() == false) { readDoubleFromJson(ifs, vpdw, vdb, vecDigit, pdvt, iIndex, m_iInput, arrStack, iTop, false); }; assert(vpdw.size() == vdb.size()); assert(iTop == 0); vvAllw.push_back(vpdw); vvAllb.push_back(vdb); //clear vpdw and pdb 's contents vpdw.clear(); vdb.clear(); //next char is comma ifs.read((char*)(&ucRead), 1);; assert(ucRead == ucComma); //next char is space ifs.read((char*)(&ucRead), 1);; assert(ucRead == ucSpace); ifs.read((char*)(&ucRead), 1); assert(ucRead == ucLeftbrace); arrStack[++iTop] = 1;//iTop is 1 ifs.read((char*)(&ucRead), 1); assert(ucRead == ucLeftbrace); arrStack[++iTop] = 1;//iTop is 2 while (iTop > 1 && ifs.eof() == false) { readDoubleFromJson(ifs, vpdw, vdb, vecDigit, pdvt, iIndex, ihidden, arrStack, iTop, true); }; //next char is dot ifs.read((char*)(&ucRead), 1);; assert(ucRead == ucComma); cout << vpdw.size() << endl; //next char is space while (iTop > -1 && ifs.eof() == false) { readDoubleFromJson(ifs, vpdw, vdb, vecDigit, pdvt, iIndex, ihidden, arrStack, iTop, false); }; assert(vpdw.size() == vdb.size()); assert(iTop == -1); vvAllw.push_back(vpdw); vvAllb.push_back(vdb); //clear vpdw and pdb 's contents vpdw.clear(); vdb.clear(); //close file ifs.close(); return true; } bool LoadWeighFromJson(vector< vector<double*> > &vvAllw, vector< vector<double> > &vvAllb, const char *filename, const vector<int> &vecSecondDimOfWeigh) { cout << "LoadWeighFromJson" << endl; const int szistsize = 10; const BYTE ucLeftbrace = '[', ucRightbrace = ']', ucComma = ',', ucSpace = ' '; int arrStack[szistsize], iTop = -1, iIndex = 0; ifstream ifs; ifs.open(filename, ios::in | ios::binary); assert(ifs.is_open()); double *pdvt; BYTE ucRead; ifs.read((char*)(&ucRead), 1); assert(ucRead == ucLeftbrace); //栈中全部存放左括号,用1代表,0说明清除 arrStack[++iTop] = 1; ifs.read((char*)(&ucRead), 1); assert(ucRead == ucLeftbrace); arrStack[++iTop] = 1;//iTop is 1 ifs.read((char*)(&ucRead), 1); assert(ucRead == ucLeftbrace); arrStack[++iTop] = 1;//iTop is 2 int iVecWeighSize = vecSecondDimOfWeigh.size(); vector<BYTE> vecDigit; vector<double *> vpdw; vector<double> vdb; //读取iVecWeighSize个[w,b] for (int i = 0; i < iVecWeighSize; ++i) { int iDimesionOfWeigh = vecSecondDimOfWeigh[i]; while (iTop > 1 && ifs.eof() == false) { readDoubleFromJson(ifs, vpdw, vdb, vecDigit, pdvt, iIndex, iDimesionOfWeigh, arrStack, iTop, true); }; //next char is dot ifs.read((char*)(&ucRead), 1);; assert(ucRead == ucComma); cout << vpdw.size() << endl; //next char is space while (iTop > 0 && ifs.eof() == false) { readDoubleFromJson(ifs, vpdw, vdb, vecDigit, pdvt, iIndex, iDimesionOfWeigh, arrStack, iTop, false); }; assert(vpdw.size() == vdb.size()); assert(iTop == 0); vvAllw.push_back(vpdw); vvAllb.push_back(vdb); //clear vpdw and pdb 's contents vpdw.clear(); vdb.clear(); //如果最后一对[w,b]读取完毕,就退出,下一个字符是右括号']' if (i >= iVecWeighSize - 1) { break; } //next char is comma ifs.read((char*)(&ucRead), 1);; assert(ucRead == ucComma); //next char is space ifs.read((char*)(&ucRead), 1);; assert(ucRead == ucSpace); ifs.read((char*)(&ucRead), 1); assert(ucRead == ucLeftbrace); arrStack[++iTop] = 1;//iTop is 1 ifs.read((char*)(&ucRead), 1); assert(ucRead == ucLeftbrace); arrStack[++iTop] = 1;//iTop is 2 } ifs.read((char*)(&ucRead), 1);; assert(ucRead == ucRightbrace); --iTop; assert(iTop == -1); //close file ifs.close(); return true; } //read vt from mnszist, format is [[[], [],..., []],[1, 3, 5,..., 7]] bool LoadTestSampleFromJson(vector<double*> &vecTrain, vector<WORD> &vecLabel, const char *filename, const int m_iInput) { cout << "LoadTestSampleFromJson" << endl; const int szistsize = 10; const int ciFeaturesize = m_iInput; const BYTE ucLeftbrace = '[', ucRightbrace = ']', ucComma = ',', ucSpace = ' '; int arrStack[szistsize], iTop = -1, iIndex = 0; ifstream ifs; ifs.open(filename, ios::in | ios::binary); assert(ifs.is_open()); double *pdvt; BYTE ucRead; ifs.read((char*)(&ucRead), 1); assert(ucRead == ucLeftbrace); //栈中全部存放左括号,用1代表,0说明清除 arrStack[++iTop] = 1; ifs.read((char*)(&ucRead), 1); assert(ucRead == ucLeftbrace); arrStack[++iTop] = 1;//iTop is 1 vector<BYTE> vecDigit; while (iTop > 0 && ifs.eof() == false) { readDigitFromJson(ifs, vecTrain, vecLabel, vecDigit, pdvt, iIndex, ciFeaturesize, arrStack, iTop, true); }; //next char is dot ifs >> ucRead; assert(ucRead == ucComma); cout << vecTrain.size() << endl; //next char is space // ifs.read((char*)(&ucRead), 1); // ifs.read((char*)(&ucRead), 1); // assert(ucRead == ucLeftbrace); while (iTop > -1 && ifs.eof() == false) { readDigitFromJson(ifs, vecTrain, vecLabel, vecDigit, pdvt, iIndex, ciFeaturesize, arrStack, iTop, false); }; assert(vecLabel.size() == vecTrain.size()); assert(iTop == -1); ifs.close(); return true; } void MakeOneLabel(int iMax, double *pdLabel, int m_iOut) { for (int j = 0; j < m_iOut; ++j) pdLabel[j] = 0; pdLabel[iMax] = 1.0; } void MakeCnnSample(double arrInput[2][64], double *pdImage, int iImageWidth, int iNumOfImage) { int iImageSize = iImageWidth * iImageWidth; for (int k = 0; k < iNumOfImage; ++k) { int iStart = k *iImageSize; for (int i = 0; i < iImageWidth; ++i) { for (int j = 0; j < iImageWidth; ++j) { int iIndex = iStart + i * iImageWidth + j; pdImage[iIndex] = 1; pdImage[iIndex] += i + j; if (k > 0) pdImage[iIndex] -= 1; arrInput[k][i * iImageWidth +j] = pdImage[iIndex]; //pdImage[iIndex] /= 15.0 ; } } } cout << "input image is\n"; for (int k = 0; k < iNumOfImage; ++k) { int iStart = k *iImageSize; cout << "k is " << k <<endl; for (int i = 0; i < iImageWidth; ++i) { for (int j = 0; j < iImageWidth; ++j) { int iIndex = i * iImageWidth + j; double dValue = arrInput[k][iIndex]; cout << dValue << ' '; } cout << endl; } cout << endl; } cout << endl; } void MakeCnnWeigh(double *pdKernel, int iNumOfKernel) { const int iKernelWidth = 3; double iSum = 0; double arrKernel[iKernelWidth][iKernelWidth] = {{4, 7, 1}, {3, 8, 5}, {3, 2, 3}}; double arr2[iKernelWidth][iKernelWidth] = {{6, 5, 4}, {5, 4, 3}, {4, 3, 2}}; for (int k = 0; k < iNumOfKernel; ++k) { int iStart = k * iKernelWidth * iKernelWidth; for (int i = 0; i < iKernelWidth; ++i) { for (int j = 0; j < iKernelWidth; ++j) { int iIndex = i * iKernelWidth + j + iStart; pdKernel[iIndex] = i + j + 2; if (k > 0) pdKernel[iIndex] = arrKernel[i][j]; iSum += pdKernel[iIndex]; } } } cout << "sum is " << iSum << endl; for (int k = 0; k < iNumOfKernel; ++k) { cout << "kernel :" << k << endl; int iStart = k * iKernelWidth * iKernelWidth; for (int i = 0; i < iKernelWidth; ++i) { for (int j = 0; j < iKernelWidth; ++j) { int iIndex = i * iKernelWidth + j + iStart; //pdKernel[iIndex] /= (double)iSum; cout << pdKernel[iIndex] << ' '; } cout << endl; } cout << endl; } cout << endl; }
训练两轮,生成theano权值的代码
cnn_mlp_theano.py
#coding=utf-8 import cPickle import gzip import os import sys import time import json import numpy import theano import theano.tensor as T from theano.tensor.signal import downsample from theano.tensor.nnet import conv from logistic_sgd import LogisticRegression, load_data from mlp import HiddenLayer class LeNetConvPoolLayer(object): """Pool Layer of a convolutional network """ def __init__(self, rng, input, filter_shape, image_shape, poolsize=(2, 2)): """ Allocate a LeNetConvPoolLayer with shared variable internal parameters. :type rng: numpy.random.RandomState :param rng: a random number generator used to initialize weights :type input: theano.tensor.dtensor4 :param input: symbolic image tensor, of shape image_shape :type filter_shape: tuple or list of length 4 :param filter_shape: (number of filters, num input feature maps, filter height,filter width) :type image_shape: tuple or list of length 4 :param image_shape: (batch size, num input feature maps, image height, image width) :type poolsize: tuple or list of length 2 :param poolsize: the downsampling (pooling) factor (#rows,#cols) """ assert image_shape[1] == filter_shape[1] self.input = input # there are "num input feature maps * filter height * filter width" # inputs to each hidden unit fan_in = numpy.prod(filter_shape[1:]) # each unit in the lower layer receives a gradient from: # "num output feature maps * filter height * filter width" / # pooling size fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) / numpy.prod(poolsize)) # initialize weights with random weights W_bound = numpy.sqrt(6. / (fan_in + fan_out)) self.W = theano.shared(numpy.asarray( rng.uniform(low=-W_bound, high=W_bound, size=filter_shape), dtype=theano.config.floatX), borrow=True) # the bias is a 1D tensor -- one bias per output feature map b_values = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX) self.b = theano.shared(value=b_values, borrow=True) # convolve input feature maps with filters conv_out = conv.conv2d(input=input, filters=self.W, filter_shape=filter_shape, image_shape=image_shape) # downsample each feature map individually, using maxpooling pooled_out = downsample.max_pool_2d(input=conv_out, ds=poolsize, ignore_border=True) # add the bias term. Since the bias is a vector (1D array), we first # reshape it to a tensor of shape (1,n_filters,1,1). Each bias will # thus be broadcasted across mini-batches and feature map # width & height self.output = T.tanh(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x')) # store parameters of this layer self.params = [self.W, self.b] def getDataNumpy(layers): data = [] for layer in layers: wb = layer.params w, b = wb[0].get_value(), wb[1].get_value() data.append([w, b]) return data def getDataJson(layers): data = [] i = 0 for layer in layers: w, b = layer.params # print '..layer is', i w, b = w.get_value(), b.get_value() wshape = w.shape # print '...the shape of w is', wshape if len(wshape) == 2: w = w.transpose() else: for k in xrange(wshape[0]): for j in xrange(wshape[1]): w[k][j] = numpy.rot90(w[k][j], 2) w = w.reshape((wshape[0], numpy.prod(wshape[1:]))) w = w.tolist() b = b.tolist() data.append([w, b]) i += 1 return data def writefile(data, name = '../../tmp/src/data/theanocnn.json'): print ('writefile is ' + name) f = open(name, "wb") json.dump(data,f) f.close() def readfile(layers, nkerns, name = '../../tmp/src/data/theanocnn.json'): # Load the dataset print ('readfile is ' + name) f = open(name, 'rb') data = json.load(f) f.close() readwb(data, layers, nkerns) def readwb(data, layers, nkerns): i = 0 kernSize = len(nkerns) inputnum = 1 for layer in layers: w, b = data[i] w = numpy.array(w, dtype='float32') b = numpy.array(b, dtype='float32') # print '..layer is', i # print w.shape if i >= kernSize: w = w.transpose() else: w = w.reshape((nkerns[i], inputnum, 5, 5)) for k in xrange(nkerns[i]): for j in xrange(inputnum): c = w[k][j] w[k][j] = numpy.rot90(c, 2) inputnum = nkerns[i] # print '..readwb ,transpose and rot180' # print w.shape layer.W.set_value(w, borrow=True) layer.b.set_value(b, borrow=True) i += 1 def loadwb(classifier, name='theanocnn.json'): data = json.load(open(name, 'rb')) w, b = data print type(w) w = numpy.array(w, dtype='float32').transpose() classifier.W.set_value(w, borrow=True) classifier.b.set_value(b, borrow=True) def savewb(classifier, name='theanocnn.json'): w, b = classifier.params w = w.get_value().transpose().tolist() b = b.get_value().tolist() data = [w, b] json.dump(data, open(name, 'wb')) def evaluate_lenet5(learning_rate=0.1, n_epochs=2, dataset='../../data/mnist.pkl', nkerns=[20, 50], batch_size=500): """ Demonstrates lenet on MNIST dataset :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type dataset: string :param dataset: path to the dataset used for training /testing (MNIST here) :type nkerns: list of ints :param nkerns: number of kernels on each layer """ rng = numpy.random.RandomState(23455) datasets = load_data(dataset) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches /= batch_size n_valid_batches /= batch_size n_test_batches /= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ishape = (28, 28) # this is the size of MNIST images ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # Reshape matrix of rasterized images of shape (batch_size,28*28) # to a 4D tensor, compatible with our LeNetConvPoolLayer layer0_input = x.reshape((batch_size, 1, 28, 28)) # Construct the first convolutional pooling layer: # filtering reduces the image size to (28-5+1,28-5+1)=(24,24) # maxpooling reduces this further to (24/2,24/2) = (12,12) # 4D output tensor is thus of shape (batch_size,nkerns[0],12,12) layer0 = LeNetConvPoolLayer(rng, input=layer0_input, image_shape=(batch_size, 1, 28, 28), filter_shape=(nkerns[0], 1, 5, 5), poolsize=(2, 2)) # Construct the second convolutional pooling layer # filtering reduces the image size to (12-5+1,12-5+1)=(8,8) # maxpooling reduces this further to (8/2,8/2) = (4,4) # 4D output tensor is thus of shape (nkerns[0],nkerns[1],4,4) layer1 = LeNetConvPoolLayer(rng, input=layer0.output, image_shape=(batch_size, nkerns[0], 12, 12), filter_shape=(nkerns[1], nkerns[0], 5, 5), poolsize=(2, 2)) # the TanhLayer being fully-connected, it operates on 2D matrices of # shape (batch_size,num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (20,32*4*4) = (20,512) layer2_input = layer1.output.flatten(2) # construct a fully-connected sigmoidal layer layer2 = HiddenLayer(rng, input=layer2_input, n_in=nkerns[1] * 4 * 4, n_out=500, activation=T.tanh) # classify the values of the fully-connected sigmoidal layer layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10) # the cost we minimize during training is the NLL of the model cost = layer3.negative_log_likelihood(y) # create a function to compute the mistakes that are made by the model test_model = theano.function([index], layer3.errors(y), givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size]}) validate_model = theano.function([index], layer3.errors(y), givens={ x: valid_set_x[index * batch_size: (index + 1) * batch_size], y: valid_set_y[index * batch_size: (index + 1) * batch_size]}) # create a list of all model parameters to be fit by gradient descent params = layer3.params + layer2.params + layer1.params + layer0.params # create a list of gradients for all model parameters grads = T.grad(cost, params) layers = [layer0, layer1, layer2, layer3] # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i],grads[i]) pairs. updates = [] for param_i, grad_i in zip(params, grads): updates.append((param_i, param_i - learning_rate * grad_i)) train_model = theano.function([index], cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size]}) ############### # TRAIN MODEL # ############### print '... training' # early-stopping parameters patience = 10000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_params = None best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = time.clock() epoch = 0 done_looping = False while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 print '...epoch is', epoch, 'writefile' writefile(getDataJson(layers)) for minibatch_index in xrange(n_train_batches): iter = (epoch - 1) * n_train_batches + minibatch_index if iter % 100 == 0: print 'training @ iter = ', iter cost_ij = train_model(minibatch_index) if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [validate_model(i) for i in xrange(n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % \ (epoch, minibatch_index + 1, n_train_batches, \ this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses = [test_model(i) for i in xrange(n_test_batches)] test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of best ' 'model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time = time.clock() print('Optimization complete.') print('Best validation score of %f %% obtained at iteration %i,'\ 'with test performance %f %%' % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) ''' ''' readfile(layers, nkerns) validation_losses = [validate_model(i) for i in xrange(n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) print('validation error %f %%' % \ (this_validation_loss * 100.)) if __name__ == '__main__': evaluate_lenet5()
评论暂时关闭