Permalink
Browse files

sources

  • Loading branch information...
Xiang Liang Xiang Liang
Xiang Liang authored and Xiang Liang committed Aug 30, 2009
1 parent 1451e36 commit 829091e882960995f1fe2ae5fd47954a320d2ba9
View
@@ -34,11 +34,15 @@ struct User{
User(){
begin_tm = INT_MAX;
end_tm = 0;
+ max_line = 0;
+ min_line = INT_MAX;
}
map<string,float> language;
map<string,float> repos;
int begin_tm;
int end_tm;
+ int max_line;
+ int min_line;
};
struct Item{
@@ -49,6 +53,7 @@ struct Item{
map<string,float> repos;
int tm, line;
string all_lang;
+ string reponame;
map<string,float> name;
map<string,float> lang_line;
};
@@ -174,6 +179,7 @@ void loadUserItemData(UserList & users, ItemList & items, vector<Rating> & data)
int id,tm;
string buf,name;
iss >> id >> buf >> tm >> name;
+ items[id].reponame = buf + "/" + name;
items[id].repos[buf] = 1;
items[id].tm = tm;
splitName(name, items[id].name);
@@ -190,6 +196,8 @@ void loadUserItemData(UserList & users, ItemList & items, vector<Rating> & data)
int ii = data[i].item;
users[uu].begin_tm = min<int>(users[uu].begin_tm, items[ii].tm);
users[uu].end_tm = max<int>(users[uu].end_tm, items[ii].tm);
+ users[uu].max_line = max<int>(users[uu].max_line, items[ii].line);
+ users[uu].min_line = min<int>(users[uu].min_line, items[ii].line);
}
}
View
@@ -0,0 +1 @@
+g++ -o knn main.cpp -I../include/
View
Binary file not shown.
View
@@ -0,0 +1,104 @@
+#include "github.h"
+
+vector< Rating > data;
+vector< vector<int> > ui(USER_NUM);
+vector< float > nu(USER_NUM), ni(ITEM_NUM);
+vector< vector< pair<int, float> > > w(ITEM_NUM);
+UserList users;
+ItemList items;
+
+void loadSim(bool train){
+ string file = "../data2/knni-iuf.txt";
+ if(train) file += ".0";
+ ifstream in(file.c_str());
+ int a, b;
+ float s;
+ while(in >> a >> b >> s){
+ s *= 1 + sim(items[a].repos, items[b].repos);
+ s *= 1 + sim(items[a].language, items[b].language);
+ double dt = abs(items[a].tm - items[b].tm);
+ dt /= 86400;
+ s *= exp(-0.04 * dt);
+ s *= 0.8 + sim(items[a].name, items[b].name);
+ w[a].push_back(make_pair<int,float>(b,s));
+ }
+ for(int i = 0; i < w.size(); ++i){
+ sort(w[i].begin(), w[i].end(), GreaterSecond<int,float>);
+ }
+ cout << "load sim finished!" << endl;
+}
+/*
+void predict(int u, vector< pair<int,float> > & ret){
+ set<int> rated(ui[u].begin(), ui[u].end());
+ vector<float> cand(ITEM_NUM, 0);
+ for(int i = 0; i < ui[u].size(); ++i){
+ int ii = ui[u][i];
+ for(int j = 0; j < w[ii].size() && j < 1000; ++j){
+ int jj = w[ii][j].first;
+ if(rated.find(jj) != rated.end()) continue;
+ cand[jj] += w[ii][j].second
+ * (1 + sim(user_repos[u], item_repos[jj]))
+ * (1 + sim(user_lang[u], item_lang[jj]));
+ }
+ }
+ for(int i = 0; i < cand.size(); ++i)
+ if(cand[i] > 0)
+ ret.push_back(make_pair<int,float>(i, cand[i]));
+ sort(ret.begin(), ret.end(), GreaterSecond<int,float>);
+}
+*/
+void predict2(int u, vector< pair<int,float> > & ret){
+ set<int> rated(ui[u].begin(), ui[u].end());
+ vector<float> cand(ITEM_NUM, 0);
+ for(int i = 0; i < ui[u].size(); ++i){
+ int ii = ui[u][i];
+ for(int j = 0; j < w[ii].size() && j < 1000; ++j){
+ int jj = w[ii][j].first;
+ if(rated.find(jj) != rated.end()) continue;
+ cand[jj] += w[ii][j].second;
+ }
+ }
+ for(int i = 0; i < cand.size(); ++i)
+ if(cand[i] > 0)
+ ret.push_back(make_pair<int,float>(i, cand[i]));
+ sort(ret.begin(), ret.end(), GreaterSecond<int,float>);
+}
+
+void predictAll(bool train){
+ loadData(data,train);
+ loadUserItemData(users, items, data);
+ for(int i = 0; i < data.size(); ++i){
+ int user = data[i].user;
+ int item = data[i].item;
+ ui[user].push_back(item);
+ nu[user]++;
+ ni[item]++;
+ }
+
+ int u;
+ loadSim(train);
+ map<int,int> test;
+ if(train) getTestSet2(test);
+ else getTestSet(test);
+ string file = "../ret2/results-knni-all.txt";
+ if(train) file += ".0";
+ ofstream out(file.c_str());
+ for(map<int,int>::iterator k = test.begin(); k != test.end(); ++k){
+ int u = k->first;
+ vector< pair<int,float> > ret;
+ predict2(u, ret);
+
+ out << u << "\t";
+ for(int i = 0; i < ret.size() && i < 500; ++i)
+ out << ret[i].first << "\t" << ret[i].second / ret[0].second << "\t";
+ out << endl;
+ }
+ out.close();
+}
+
+int main(int argc, char ** argv){
+ if(atoi(argv[1]) == 0) predictAll(false);
+ else predictAll(true);
+ return 0;
+}
+
Oops, something went wrong.

0 comments on commit 829091e

Please sign in to comment.