AutoEncoderで遊ぶ

はじめに

次元圧縮がマイブーム化しているので、最近はやりのAutoEncoderで遊んでみる。
べ、別に深い何かのためにやろうとしてるわけじゃn

AutoEncoderとは

入力と出力が近くなるように学習するニューラルネットワーク
- (枠組みをさすだけでニューラルネットワークに限らないのかも？)
- 基本は、入力層、隠れ層、出力層の3層で構成し、教師信号は入力信号と同じにして学習させる
特徴や内部表現の構成を学習することができる
- 入力&出力の次元より隠れ層の次元を小さくして構成する
- 入力セットの圧縮された表現を学習する意味で、(非線形な)次元圧縮器とみなせる

AutoEncoderの種類

いくつか種類があるぽい。名前だけメモしておく。

Basic AutoEncoder
Regularized AutoEncoder
Sparse AutoEncoder
Denoising AutoEncoder
Contractive AutoEncoder
Saturating AutoEncoder
Nonparametrically Guided AutoEncoder
Unfolding Recursive AutoEncoder
Deepな世界
- Stacked AutoEncoder
- Stacked Denoising AutoEncoder

Denoising AutoEncoder

入力に、ノイズをのせたものを与え、出力ではノイズのないもので学習させるAutoEncoder
- ノイズを取り除くようにも学習される
- 良い結果が得られる

実装

以下を参考に書いてみた。
- http://blog.yusugomori.com/post/42116682299/denoising-autoencoders
0/1のバイナリ値バージョン

いつもながら適当なコード。

#include <iostream>
#include <vector>
#include <cmath>

//行列計算用クラス
class Matrix {
  int m, n;
  std::vector<double> val;

  void splice(int i, int j){
    int N = val.size();
    if(i+(j-1)<N){
      std::vector<double> vl(N-j, 0.0);
      int vl_idx = 0, val_idx = 0;
      while(val_idx<N){
        if(val_idx<i||i+j-1<val_idx){
          vl[vl_idx] = val[val_idx];
          vl_idx++;
        }
        val_idx++;
      }
      val = vl;
    }
  }
  Matrix clone(){
    Matrix ret(m, n);
    for(int i=0; i<m; i++){
      for(int j=0; j<n; j++){
        ret.setVal(i, j, val[n*i+j]);
      }
    }
    return ret;
  }
  
public:
  Matrix():m(0),n(0){}
  Matrix(int m, int n):m(m),n(n){
    if(m>0 && n>0){
      for(int i=0; i<m*n; i++){
        val.push_back(0.0);
      }
    }
  }

  int getM() const { return m; }
  int getN() const { return n; }
  double getVal(int i, int j) const { return val[n*i+j]; }
  void setVal(int i, int j, double x){ val[n*i+j] = x; }
  bool isSquare(){ return n==m; }

  Matrix add(const Matrix& mat){
    if(m == mat.m && n == mat.n){
      Matrix ret(m, n);
      for(int i=0; i<m; i++){
        for(int j=0; j<n; j++){
          ret.setVal(i, j, val[n*i+j] + mat.getVal(i,j));
        }
      }
      return ret;
    }
    return Matrix(-1, -1);
  }
  Matrix operator+(const Matrix& mat){ return add(mat); }

  Matrix sub(const Matrix& mat){
    if(m == mat.m && n == mat.n){
      Matrix ret(m, n);
      for(int i=0; i<m; i++){
        for(int j=0; j<n; j++){
          ret.setVal(i, j, val[n*i+j] - mat.getVal(i,j));
        }
      }
      return ret;
    }
    return Matrix(-1, -1);
  }
  Matrix operator-(const Matrix& mat){ return sub(mat); }

  Matrix prod(const Matrix& mat){
    //追記(2013/10/11): ベクトル同士の要素積 
    if(n == 1 && mat.n == 1 && m == mat.m){
      Matrix ret(m, 1);
      for(int i=0; i<m; i++){
        ret.setVal(i, 0, getVal(i, 0) * mat.getVal(i, 0));
      }
      return ret;
    }
    else if(n == mat.m){
      Matrix ret(m, mat.n);
      for(int i=0; i<m; i++){
        for(int j=0; j<mat.n; j++){
          double d = 0.0;
          for(int k=0; k<n; k++){
            d += val[n*i+k] * mat.getVal(k,j);
          }
          ret.setVal(i, j, d);
        }
      }
      return ret;
    }
    return Matrix(-1, -1);
  }
  Matrix operator*(const Matrix& mat){ return prod(mat); }

  void time(double x){
    for(int i=0; i<m; i++){
      for(int j=0; j<n; j++){
        val[n*i+j] *= x;
      }
    }
  }

  Matrix transpose(){
    Matrix ret(n, m);
    for(int i=0; i<m; i++){
      for(int j=0; j<n; j++){
        ret.setVal(j, i, val[n*i+j]);
      }
    }
    return ret;
  }
  
  double cofactor(int i, int j){
    Matrix mat = clone();
    mat.splice(i*mat.n, mat.m);
    mat.m -= 1;
    for(int k=mat.m-1; k>=0; k--){
      mat.splice(k*mat.n+j, 1);
    }
    mat.n -= 1;
    return mat.det() * ( ((i+j+2)%2==0) ? 1 : -1);
  }
  double det(){
    if(isSquare()){
      if(m == 2){
        return val[0]*val[3]-val[1]*val[2];
      }else{
        double d = 0;
        for(int k=0; k<n; k++){
          d += val[k] * cofactor(0, k);
        }
        return d;
      }
    }else{
      return 0.0;
    }
  }
  Matrix cofactorMatrix(){
    Matrix mat(m, n);
    for(int i=0; i<m; i++){
      for(int j=0; j<n; j++){
        mat.setVal(j, i, cofactor(i, j));
      }
    }
    return mat;
  }
  Matrix inverse(){
    if(isSquare()){
      double d = det();
      if(d != 0){
        Matrix mat;
        if(m>2){
          mat = cofactorMatrix();
        } else {
          mat = Matrix(2, 2);
          mat.setVal(0, 0, val[3]);
          mat.setVal(0, 1, -val[1]);
          mat.setVal(1, 0, -val[2]);
          mat.setVal(1, 1, val[0]);
        }
        mat.time(1 / d);
        return mat;
      }
    }else{
      return Matrix(-1, -1);
    }
    return Matrix(-1, -1);
  }
};
std::ostream& operator<<(std::ostream& os, const Matrix& mat){
  for(int i=0; i<mat.getM(); i++){
    for(int j=0; j<mat.getN(); j++){
      os << mat.getVal(i,j) << " ";
    }
    os << std::endl;
  }
  return os;
}



//乱数計算用関数
//xorshift
// 注意: longではなくint(32bit)にすべき
unsigned long xor128(){
  static unsigned long x=123456789, y=362436069, z=521288629, w=88675123;
  unsigned long t;
  t=(x^(x<<11));
  x=y; y=z; z=w;
  return w=(w^(w>>19))^(t^(t>>8));
}
//[0,1)の一様乱数
// 注意: int_maxぐらいで割った方がよい
double frand(){
  return xor128()%10000000000/static_cast<double>(10000000000); 
}





// binary-inputs denoising autoencoder (tied W, W' = W^T)
//  y = sigmoid(W x' + b)
//  z = sigmoid(W' y + b') ~ x
class DenoisingAutoEncoder {
  int num_visible, num_hidden;
  Matrix W, bvis, bhid;
  double p, eps;

  double sigmoid(double x){
    return 1.0 / (1.0 + exp(-x));
  }

public:
  DenoisingAutoEncoder(int num_visible, int num_hidden, double p, double eps):
    num_visible(num_visible),
    num_hidden(num_hidden),
    W(num_hidden, num_visible),
    bvis(num_visible, 1),
    bhid(num_hidden, 1),
    p(p),
    eps(eps)
  {
    //initial W
    //20140115追記：num_visibleとnum_hiddenが逆
    for(int i=0; i<num_hidden; i++){
      for(int j=0; j<num_visible; j++){
        W.setVal(i, j, 1.0 - 2.0 * frand());
      }
    }
  }

  void train(const std::vector<int>& in){
    //make input x
    Matrix x(num_visible, 1);
    for(int i=0; i<in.size(); i++){
      x.setVal(i, 0, in[i]);
    }
    //noise addition
    Matrix noisex(num_visible, 1);
    for(int i=0; i<in.size(); i++){
      double val = in[i];
      if(frand() < p){
        if(val < 0.5) val = 1.0;
        else val = 0.0;
      }
      noisex.setVal(i, 0, val);
    }
    //vector one
    Matrix one(num_hidden, 1);
    for(int i=0; i<num_hidden; i++){
      one.setVal(i, 0, 1.0);
    }
    //make y
    Matrix y = W * noisex + bhid;
    for(int i=0; i<num_hidden; i++){
      y.setVal(i, 0, sigmoid(y.getVal(i, 0)));
    }
    //make z
    Matrix z = W.transpose() * y + bvis;
    for(int i=0; i<num_visible; i++){
      z.setVal(i, 0, sigmoid(z.getVal(i, 0)));
    }

    //make grad bhid
    Matrix gradbhid = W * (x-z) * y * (one-y);
    //make grad bvis
    Matrix gradbvis = x - z;
    //make grad W
    Matrix gradW = (gradbhid * noisex.transpose()) + ((gradbvis * y.transpose()).transpose());

    //update W
    for(int i=0; i<num_hidden; i++){
      for(int j=0; j<num_visible; j++){
        W.setVal(i, j, W.getVal(i, j) + eps * gradW.getVal(i, j));
      }
    }

    //update b
    for(int i=0; i<num_hidden; i++){
      bhid.setVal(i, 0, bhid.getVal(i, 0) + eps * gradbhid.getVal(i, 0));
    }

    //update b'
    for(int i=0; i<num_visible; i++){
      bvis.setVal(i, 0, bvis.getVal(i, 0) + eps * gradbvis.getVal(i, 0));
    }
  }
  
  //隠れ層の値を取得
  std::vector<double> getHiddenValues(const std::vector<int>& in){
    std::vector<double> ret;
    
    //make input x
    Matrix x(num_visible, 1);
    for(int i=0; i<in.size(); i++){
      x.setVal(i, 0, in[i]);
    }
    //make y
    Matrix y = W * x + bhid;
    for(int i=0; i<num_hidden; i++){
      ret.push_back( sigmoid(y.getVal(i, 0)) );
    }
    
    return ret;
  }

  //出力層の値を取得
  std::vector<double> getOutputValues(const std::vector<int>& in){
    std::vector<double> ret;

    //make input x
    Matrix x(num_visible, 1);
    for(int i=0; i<in.size(); i++){
      x.setVal(i, 0, in[i]);
    }
    //make y
    Matrix y = W * x + bhid;
    for(int i=0; i<num_hidden; i++){
      y.setVal(i, 0, sigmoid(y.getVal(i, 0)));
    }
    //make z
    Matrix z = W.transpose() * y + bvis;
    for(int i=0; i<num_visible; i++){
      ret.push_back( sigmoid(z.getVal(i, 0)) );
    }

    return ret;
  }

  //重み行列Wを出力
  void dumpW(){
    std::cout << "Weight W" << std::endl;
    std::cout << W << std::endl;
  }
  //バイアスbとb'を出力
  void dumpb(){
    std::cout << "bhid" << std::endl;
    std::cout << bhid << std::endl;

    std::cout << "bvis" << std::endl;
    std::cout << bvis << std::endl;
  }
};


int main(){

  int InputDim, HiddenDim, Epoch;
  double noiseP, epsilon;
  int dataNum;
  
  //入力は以下の形式
  //  入力次元 隠れ層次元 反復回数 ノイズ付与の割合 学習率
  //  入力データ数
  //  入力データ...
  std::cin >> InputDim >> HiddenDim >> Epoch;
  std::cin >> noiseP >> epsilon;
  std::cin >> dataNum;


  DenoisingAutoEncoder da(InputDim, HiddenDim, noiseP, epsilon);

  //inputs
  std::vector< std::vector<int> > in(dataNum, std::vector<int>(InputDim));
  for(int i=0; i<dataNum; i++){
    for(int j=0; j<InputDim; j++){
      std::cin >> in[i][j];
    }
  }
  

  //train
  for(int t=0; t<Epoch; t++){
    for(int i=0; i<dataNum; i++){
      da.train(in[i]);
    }

    int errSum = 0;
    for(int i=0; i<dataNum; i++){
      std::vector<double> res = da.getOutputValues(in[i]);
      for(int j=0; j<InputDim; j++){
        if( in[i][j] != (res[j]>0.5?1:0) ) errSum++;
      }
    }    

    std::cout << "epoch " << t << " : Err = " << (100.0 * errSum / (dataNum * InputDim)) << "%" << std::endl;
  }


  //results
  da.dumpW();
  da.dumpb();

  for(int i=0; i<dataNum; i++){
    std::vector<double> resH = da.getHiddenValues(in[i]);
    std::vector<double> resO = da.getOutputValues(in[i]);

    //Inputs
    std::cout << "Input:  ";
    for(int j=0; j<InputDim; j++){
      std::cout << in[i][j] << " ";
    }
    std::cout << std::endl;
    
    //Outputs
    std::cout << "Output: ";
    for(int j=0; j<InputDim; j++){
      std::cout << ((resO[j]>0.5)?1:0) << " ";
    }
    std::cout << std::endl;

    //Hidden Layers
    std::cout << "Hidden: ";
    for(int j=0; j<HiddenDim; j++){
      std::cout << ((resH[j]>0.5)?1:0) << " ";
    }
    std::cout << std::endl;
    std::cout << std::endl;
  }

  return 0;
}

実験

入力データ1

とりあえず、適当に自分で入力を作ってやってみた。
入力10次元、隠れ層5次元、ノイズ付与30%ぐらいで、10個の入力を50000回学習させてみる。

10 5 50000 0.3 0.001
10
1 1 1 1 1 0 0 0 0 0
1 0 1 1 0 0 0 1 0 0
1 1 0 1 1 0 0 0 1 0
0 1 1 1 1 1 0 0 0 0
1 1 0 1 0 1 0 0 0 1
1 0 0 1 0 1 0 0 0 0
1 1 1 1 0 0 0 0 0 0
0 1 1 0 1 1 0 0 0 0
1 0 1 0 1 0 0 0 1 0
1 1 1 1 1 0 1 0 0 0

(なんか入力がすでにノイズ含みのようも見えてしまう・・・)

結果1

$ ./a.out < test.in
epoch 0 : Err = 37%
epoch 1 : Err = 37%
epoch 2 : Err = 37%
epoch 3 : Err = 37%
...
epoch 49997 : Err = 8%
epoch 49998 : Err = 8%
epoch 49999 : Err = 8%
Weight W
2.21431 -0.929678 -0.787646 1.86138 -1.71007 -1.41933 -0.638311 0.813204 0.183754 -0.377198 
-0.866526 2.71911 -1.1304 1.52018 0.567724 1.42645 0.34102 -2.09582 -1.06503 0.879303 
-1.43966 -0.96438 -0.122118 -1.38995 0.114601 1.82442 -2.18082 -0.427016 0.319436 -0.494829 
0.983997 0.00294969 -1.12257 -1.06816 2.04599 -1.35387 0.0266504 -1.60374 2.8456 -0.445375 
-0.996447 0.958298 2.89736 -0.641902 2.03816 -1.72298 0.858461 0.0691355 -0.0577102 -2.22283 

bhid
1.10898 
-0.184213 
2.38526 
-1.38289 
0.45578 

bvis
1.97711 
-0.390265 
0.765453 
0.935184 
-0.866645 
0.118507 
-1.73758 
-0.90747 
-2.11826 
-0.905058 

Input:  1 1 1 1 1 0 0 0 0 0 
Output: 1 1 1 1 1 0 0 0 0 0 
Hidden: 1 1 0 0 1 

Input:  1 0 1 1 0 0 0 1 0 0 
Output: 1 0 1 1 0 0 0 0 0 0 
Hidden: 1 0 0 0 1 

Input:  1 1 0 1 1 0 0 0 1 0 
Output: 1 1 1 1 1 0 0 0 0 0 
Hidden: 1 1 0 1 1 

Input:  0 1 1 1 1 1 0 0 0 0 
Output: 0 1 1 1 1 1 0 0 0 0 
Hidden: 0 1 1 0 1 

Input:  1 1 0 1 0 1 0 0 0 1 
Output: 1 1 0 1 0 1 0 0 0 0 
Hidden: 1 1 0 0 0 

Input:  1 0 0 1 0 1 0 0 0 0 
Output: 1 1 0 1 0 1 0 0 0 0 
Hidden: 1 1 1 0 0 

Input:  1 1 1 1 0 0 0 0 0 0 
Output: 1 1 1 1 1 0 0 0 0 0 
Hidden: 1 1 0 0 1 

Input:  0 1 1 0 1 1 0 0 0 0 
Output: 0 1 1 1 1 1 0 0 0 0 
Hidden: 0 1 1 0 1 

Input:  1 0 1 0 1 0 0 0 1 0 
Output: 1 0 1 0 1 0 0 0 1 0 
Hidden: 1 0 1 1 1 

Input:  1 1 1 1 1 0 1 0 0 0 
Output: 1 1 1 1 1 0 0 0 0 0 
Hidden: 1 1 0 0 1

できてるっぽい。
このデータは10次元あるけど、5次元でもだいたい表現できることがわかった。

入力データ2

もうちょっとわかりやすい例を追記。

10 2 10000 0.3 0.001
6
0 0 0 0 0 1 1 1 1 1
0 0 0 0 0 1 1 1 1 1
0 0 0 0 0 1 1 1 1 1
1 1 1 1 1 0 0 0 0 0
1 1 1 1 1 0 0 0 0 0
1 1 1 1 1 0 0 0 0 0

結果2

...
epoch 9997 : Err = 0%
epoch 9998 : Err = 0%
epoch 9999 : Err = 0%
Weight W
2.5364 2.39055 2.58962 2.56752 2.59881 -2.50951 -2.44933 -2.45826 -2.52386 -2.4876 
-2.68533 -2.7743 -2.81884 -2.80364 -2.83224 2.76912 2.68906 2.70468 2.77856 2.74376 

bhid
-0.0476432 
0.0985837 

bvis
0.0749993 
0.190013 
0.114568 
0.116447 
0.112886 
-0.122783 
-0.117929 
-0.119592 
-0.123139 
-0.123641 

Input:  0 0 0 0 0 1 1 1 1 1 
Output: 0 0 0 0 0 1 1 1 1 1 
Hidden: 0 1 

Input:  0 0 0 0 0 1 1 1 1 1 
Output: 0 0 0 0 0 1 1 1 1 1 
Hidden: 0 1 

Input:  0 0 0 0 0 1 1 1 1 1 
Output: 0 0 0 0 0 1 1 1 1 1 
Hidden: 0 1 

Input:  1 1 1 1 1 0 0 0 0 0 
Output: 1 1 1 1 1 0 0 0 0 0 
Hidden: 1 0 

Input:  1 1 1 1 1 0 0 0 0 0 
Output: 1 1 1 1 1 0 0 0 0 0 
Hidden: 1 0 

Input:  1 1 1 1 1 0 0 0 0 0 
Output: 1 1 1 1 1 0 0 0 0 0 
Hidden: 1 0

Negative/Positive Thinking