-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathboolRS.cpp~
More file actions
159 lines (157 loc) · 3.24 KB
/
boolRS.cpp~
File metadata and controls
159 lines (157 loc) · 3.24 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
#include"irs.h"
void readallfile()
{
struct dirent *pDirEntry=NULL;
DIR *pDir=NULL;
int len=50000000;
FILE *fp;
string dir="./input/";
if((pDir=opendir("./input/"))==NULL)
{
cout<<"open dir failed!"<<endl;
}
else
{
int i=1;
while((pDirEntry=readdir(pDir))!=NULL)
{
if(!strcmp(pDirEntry->d_name,"..")||!strcmp(pDirEntry->d_name,"."))
continue;
// cout<<i<<" Read OK : "<<pDirEntry->d_name<<endl;
i++;
dir=dir+pDirEntry->d_name;
// cout<<dir<<endl;
if((fp=fopen(dir.c_str(),"r"))==NULL)
{
cout<<"open file error"<<endl;
}
else
{
fread(buf,1,len,fp);
// cout<<buf<<endl;
fclose(fp);
doc pdoc;
pdoc.fname=pDirEntry->d_name;
pdoc.fcontent=buf;
docs.push_back(pdoc);
// cout<<"Save : "<<pDirEntry->d_name<<endl;
}
dir="./input/";
memset(buf,0,sizeof(buf));
}
closedir(pDir);
}
// cout<<docs[30].fcontent<<endl;
cout<<docs[30].fname<<endl;
cout<<docs[30].fcontent.length()<<endl;
cout<<docs[42].fname<<endl;
cout<<docs[42].fcontent.length()<<endl;
}
void genIndex()
{
const char *spilt="[]\t\r\n ,()-+.|\"':?~!;";
for(int i = 0;i < docs.size(); i++)
{
// cout<<i<<endl;
//cout<<docs[0].fcontent.length()<<endl;
char *str=(char *)malloc(docs[i].fcontent.length()+10);
if(str==NULL)
{
cout<<"malloc error!"<<endl;
return;
}
//cout<<sizeof(str)<<endl;
strcpy(str,docs[i].fcontent.c_str());
char *str2=str;
char *p=strtok(str,spilt);
while(p)
{
// p=trim(p);
string temp=p;
temp=strtolow(temp);
if(myindex.find(temp)==myindex.end())
{
list<int> listtemp;
listtemp.push_back(i);
myindex.insert(pair<string,list<int> >(temp,listtemp));
}
else if(myindex.at(temp).back()!=i)
{
myindex.at(temp).push_back(i);
}
// cout<<"--"<<p<<"--"<<endl;
p=strtok(NULL,spilt);
}
free(str2);
}
//add the length of postings lists
for(map<string,list<int> >::iterator it=myindex.begin();it!=myindex.end();it++)
{
it->second.push_front(it->second.size());
}
}
void printIndex()
{
for(map<string,list<int> >::iterator it=myindex.begin();it!=myindex.end();it++)
{
cout<<it->first;
for(list<int>::iterator its=it->second.begin();its!=it->second.end();its++)
cout<<" "<<*its;
cout<<endl;
}
}
list<int> queryOne(string str)
{
list<int> l;
map<string,list<int> >::iterator it = myindex.find(str);
if(it!=myindex.end())
{
l = it->second;
cout<<l.back()<<endl;
}
return l;
}
void printList(list<int> l)
{
list<int>::iterator it=l.begin();
cout<<"nums: "<<*it<<endl;
for(;it!=l.end();it++)
{
cout<<*it<<" "<<docs[*it].fname<<endl;
}
cout<<endl;
}
int main()
{
map<string,int> index;
index["hello"]=9;
index["world"]=7;
map<string,int>::iterator it=index.begin();
while (it!=index.end())
{
cout<<it->first<<" "<<it->second<<endl;
it++;
}
readallfile();
char *c=(char*)malloc(4);
char ch[]="zfx hello, zhang fei (xue) h - + gh";
c=ch;
cout<<c<<endl;
c[0]='F';
cout<<c<<endl;
char chh[]="zfx ubuntu:irs g++ . boo;hellp";
cout<<"------------------------"<<endl;
genIndex();
printIndex();
list<int> myq=queryOne("caesar");
list<int> m2=queryOne("brutus");
list<int> m3=queryOne("calpurnia");
if(myq.empty())
cout<<"NOT FIND!"<<endl;
else
{
printList(myq);
printList(m2);
printList(m3);
}
}