@@ -0,0 +1,162 @@
#include <iostream>
#include <sstream>
#include <string.h>
#include <string>
#include <fstream>
#include <vector>
#include <stdlib.h>
#include <iomanip>

using namespace std;

void move(char* &p) {
while (*p != ',' && *p != '\0') {
if (*p == '\n') return;
++p;
}
++p;
}

int main() {
ifstream trainningData("../../data-of-machine-learning/train.csv");
stringstream dataReader;
char header[4000], line[2500];
double paras[400], learningRate = 0.003;
double lineData[400], max[400] = {0}, min[400] = {0};
int numOfData = 0;

trainningData.getline(header, 4000);
char *p;
const char *split = ",";
int num = 0;
p = strtok(header, split);
while (p != NULL) {
paras[num] = 0;
++num;
p = strtok(NULL, split);
}
num = num - 2;

cout << "num: " << num << endl;

bool init = true;
int ct = 0;

while (!trainningData.eof()) {
//if (ct++ >= 10) break;
trainningData.getline(line, 2500);
if (strlen(line) == 0) break;
++numOfData;
p = line;
move(p);
double preditVal = 0;
for (int j = 0; j < num; ++j) {
double value = atof(p);
if (value > max[j] || init) max[j] = value;
if (value < min[j] || init) min[j] = value;
move(p);
}
init = false;
}

cout << numOfData << endl;

for (int i = 0; i < 1000; ++i) {
cout << i << endl;
trainningData.clear();
trainningData.seekg(0);

trainningData.getline(header, 4000);

ct = 0;

while (!trainningData.eof()) {
//if (ct++ > 30) break;
trainningData.getline(line, 2500);
if (strlen(line) == 0) break;
p = line;
move(p);
double preditVal = 0;
for (int j = 0; j < num; ++j) {
lineData[j] = atof(p);
move(p);
if (j > 0 && max[j] - min[j] != 0) lineData[j] = (lineData[j] - min[j]) / (max[j] - min[j]);
preditVal += paras[j] * lineData[j];
}
double weight = preditVal - atof(p);
for (int j = 0; j < num; ++j) {
paras[j] -= weight * lineData[j] * learningRate;
}
}
}


ifstream testData("../../data-of-machine-learning/train.csv");
ofstream preditData("../../data-of-machine-learning/submission.csv");
ofstream resultData("../../data-of-machine-learning/reuslt_0.05.csv");

testData.getline(header, 4000);
init = true;
int count = 0;

while (!testData.eof()) {
//if (count++ >= 10) break;
testData.getline(line, 2500);
if (strlen(line) == 0) break;
p = line;
move(p);
double preditVal = 0;
double value;
for (int j = 0; j < num; ++j) {
value = atof(p);
move(p);
if (value > max[j] || init) max[j] = value;
if (value < min[j] || init) min[j] = value;
}
init = false;
}

testData.clear();
testData.seekg(0);

testData.getline(header, 4000);
int n = 0;

preditData << "Id,reference" << endl;

double loss = 0;
count = 0;

while (!testData.eof()) {
//if (count++ >= 10) break;
testData.getline(line, 2500);
if (strlen(line) == 0) break;
p = line;
move(p);
double preditVal = 0;
for (int j = 0; j < num; ++j) {
lineData[j] = atof(p);
if (j > 0 && max[j] - min[j] != 0) lineData[j] = (lineData[j] - min[j]) / (max[j] - min[j]);
move(p);
preditVal += paras[j] * lineData[j];
}
double result = atof(p);
loss += (preditVal - result) * (preditVal - result);
preditData << n << ',' << preditVal << endl;
++n;
}

loss /= (2 * numOfData);


for (int i = 0; i < num; ++i) {
printf("%.4f,", paras[i]);
resultData << setprecision(2) << paras[i] << ',';
}

printf("\n%.4f\n", loss);

return 0;
}


BIN -19 KB Exp1/test.o
Binary file not shown.
BIN +0 Bytes (100%) Exp1/test8.o
Binary file not shown.
@@ -6,6 +6,7 @@
#include <vector>
#include <stdlib.h>
#include <iomanip>
#include <math.h>

using namespace std;

@@ -17,111 +18,185 @@ void move(char* &p) {
++p;
}

double data[25002 * 400];

int main() {
ifstream trainningData("../../data-of-machine-learning/train.csv");
stringstream dataReader;
char header[4000], line[2500];
double paras[400], learningRate = 0.03;
double lineData[400], max[400] = {0}, min[400] = {0};
double paras[400], learningRate = 0.5, randomRate = 0.004;
double max[400] = {0}, min[400] = {0};
int numOfData = 0;
double *lineData;
memset(data, 0, sizeof(data[0]) * 25002 * 400);

trainningData.getline(header, 4000);
char *p;
const char *split = ",";
int num = 0;
p = strtok(header, split);
while (p != NULL) {
paras[num] = 1;
paras[num] = 0;
++num;
p = strtok(NULL, split);
}
num = num - 2;

cout << "num: " << num << endl;

bool init = true;
int ct = 0;

while (!trainningData.eof()) {
trainningData.getline(line, 2500);
if (strlen(line) == 0) break;
++numOfData;
p = line;
move(p);
double preditVal = 0;
for (int j = 0; j < num; ++j) {
for (int j = 0; j < num + 1; ++j) {
double value = atof(p);
if (value > max[j] || j == 0) max[j] = value;
if (value < min[j] || j == 0) min[j] = value;
if (value > max[j] || init) max[j] = value;
if (value < min[j] || init) min[j] = value;
move(p);
data[numOfData * (num + 1) + j] = value;
}
init = false;
++numOfData;
}

cout << numOfData << endl;

for (int i = 0; i < numOfData; ++i) {
lineData = data + i * (num + 1);
for (int j = 0; j < num; ++j) {
if (max[j] - min[j] != 0 && false) {
//lineData[j] = (lineData[j] - min[j]) / (max[j] - min[j]);
}
//cout << lineData[j] << endl;
}
}


for (int i = 0; i < 30000; ++i) {
ofstream lossData("../../data-of-machine-learning/loss_0.04.csv");

cout << i << endl;
if (i % 5000 == 0) learningRate *= 0.1;
for (int k = 0; k < 30; ++k) {

trainningData.clear();
trainningData.seekg(0);
for (int i = 0; i < numOfData; ++i) {
double preditVal = 0;
lineData = data + i * (num + 1);
for (int j = 0; j < num; ++j) {
preditVal += paras[j] * lineData[j] * lineData[j];
}
double weight = preditVal - lineData[num];
//cout << weight << endl;
for (int j = 0; j < num; ++j) {
paras[j] -= weight * lineData[j] * randomRate;
}
}
}

double lastLoss = 1000;

for (int k = 0; k < 300000; ++k) {

trainningData.getline(header, 4000);
cout << k << endl;
if (k % 20000 == 0) learningRate *= 0.7;

double regresses[400] = {0};

while (!trainningData.eof()) {
trainningData.getline(line, 2500);
if (strlen(line) == 0) break;
p = line;
move(p);
int count = 0;

double loss = 0;

for (int i = 0; i < numOfData; ++i) {
lineData = data + i * (num + 1);
double preditVal = 0;
for (int j = 0; j < num; ++j) {
lineData[j] = atof(p);
move(p);
preditVal += paras[j] * lineData[j];
if (j > 0) lineData[j] = (lineData[j] - min[j]) / (max[j] - min[j]);
preditVal += paras[j] * lineData[j] * lineData[j];
}
double result = atof(p);
double weight = preditVal - result;
double weight = preditVal - lineData[num];
//cout << weight << endl;
loss += weight * weight;
for (int j = 0; j < num; ++j) {
regresses[j] += weight * lineData[j];
}
}

loss /= (1 * numOfData);
loss = sqrt(loss);
lossData << loss << endl;
cout << loss << endl;
lastLoss = loss;

for (int j = 0; j < num; ++j) {
//if (j != 0) paras[j] *= (1 - 0.03 * 1000 / numOfData);
paras[j] -= regresses[j] * learningRate / numOfData;
if (j != 0) paras[j] *= (1 - learningRate * 1 / numOfData);
if (min[j] != 0 || max[j] != 0) paras[j] -= regresses[j] * learningRate / numOfData;
//lossData << regresses[j] * learningRate / numOfData;
//if (j != num - 1) lossData << ",";
//else lossData << endl;
}

}


ifstream testData("../../data-of-machine-learning/test2.csv");
ofstream preditData("../../data-of-machine-learning/submission8.csv");
ofstream resultData("../../data-of-machine-learning/reuslt8.csv");
//free(data);

preditData << "Id,reference" << endl;
ifstream testData("../../data-of-machine-learning/test2.csv");
ofstream preditData("../../data-of-machine-learning/submission.csv");
ofstream resultData("../../data-of-machine-learning/reuslt_0.04.csv");

testData.getline(header, 4000);
init = true;
int count = 0;

int n = 0;
preditData << "Id,reference" << endl;

while (!testData.eof()) {
testData.getline(line, 2500);
if (strlen(line) == 0) break;
p = line;
move(p);
double preditVal = 0;
double value;
for (int j = 0; j < num; ++j) {
lineData[j] = atof(p);
value = atof(p);
move(p);
preditVal += paras[j] * lineData[j];
preditVal += paras[j] * value * value;
}
preditData << n << ',' << preditVal << endl;
preditData << count << ',' << preditVal << endl;
init = false;
++count;
}

int n = 0;


double loss = 0;
count = 0;

for (int i = 0; i < numOfData; ++i) {
lineData = data + i * (num + 1);
double preditVal = 0;
for (int j = 0; j < num; ++j) {
preditVal += paras[j] * lineData[j] * lineData[j];
}
double result = lineData[num];
loss += (preditVal - result) * (preditVal - result);
//preditData << n << ',' << preditVal << endl;
++n;
}

loss /= numOfData;
loss = sqrt(loss);


for (int i = 0; i < num; ++i) {
printf("%.4f,", paras[i]);
resultData << setprecision(2) << paras[i] << ',';
}

printf("\n%.4f\n", loss);

return 0;
}

40 test.m
@@ -0,0 +1,40 @@
%拟牛顿法中BFGS算法求解f = x1*x1+2*x2*x2-2*x1*x2-4*x1的最小值,起始点为x0=[1 1] H0为二阶单位阵
%算法根据最优化方法(天津大学出版社)122页编写
%v1.0 author: liuxi BIT

%format long
syms x1 x2 alpha;
f = x1*x1+2*x2*x2-2*x1*x2-4*x1;%要最小化的函数
df=jacobian(f,[x1 x2]);%函数f的偏导
epsilon=1e-6;%0.000001
k=1;
x0=[1 1];%起始点
xk=x0;
gk=subs(df,[x1 x2],xk);%起始点的梯度
%gk=double(gk);
H0=[1 0;0 1];%初始矩阵为二阶单位阵
while(norm(gk)>epsilon)%迭代终止条件||gk||<=epsilon
if k==1
pk=-H0*gk';%负梯度方向
Hk0=H0;%HK0代表HK(k-1)
else
pk=-Hk*gk';
Hk0=Hk;%HK0代表HK(k-1)
end
f_alpha=subs(f,[x1 x2],xk+alpha*pk');%关于alpha的函数
[left right] = jintuifa(f_alpha,alpha);%进退法求下单峰区间
[best_alpha best_f_alpha]=golddiv(f_alpha,alpha,left,right);%黄金分割法求最优步长
xk=xk+best_alpha*pk';
gk0=gk;%gk0代表g(k-1)
gk=subs(df,[x1 x2],xk);
%gk=double(gk);
yk=gk-gk0;
sk=best_alpha*pk';%sk=x(k+1)-xk
%====begin=============与DFP算法不同的地方==============
wk=(yk*Hk*yk')^0.5*(sk'/(yk*sk')-Hk*yk'/(yk*Hk*yk'));
Hk=Hk0-Hk0*yk'*yk*Hk0/(yk*Hk0*yk')+sk'*sk/(yk*sk')+wk*wk';%修正公式
%====end===============与DFP算法不同的地方==============
k=k+1;
end
best_x=xk%最优点
best_fx=subs(f,[x1 x2],best_x)%最优值