PageRank
Time Limit: 10 Sec Memory Limit: 256 MB
Submissions: 168 Solved: 8
Description
Google is famous over the world, much of the reason for their success lies with the effective search result for their user. In the final, support this effective is the technique called PageRank which largely used the huge url link network. A simple example is a hyperlink in Page A linked to Page B is viewed as A give a vote of support to B.
For us, PageRank is a complex technique, to simplify the problem, let’s analysis a simple model first. For every hyperlink, we just give 2 kind of attribute, the tag on the hyperlink, and the target url hyperlink point. So a hyperlink is viewed as a tag vote to an url.
As a user of search engine, each time we search, we will offer an keywork. Now the problem is for each given keywork, check all given hyperlink, if the tag on hyperlink contain this keywork, we count it’s a effective vote, then list the top 10 most voted url. (if 2 urls get the same votes, we sort it with lexicographic order)
Input
First line is N, indicate N hyperlinks. (1<=N<=20000)
Next N lines:
Each line contain two string, the first is the tag of hyperlink and the second is the url.
We guarantee that tags just contain lowcase and the length is less than 7, and urls just not contain blank characters(blank spaces, tabs and line break characters), the length of urls is less than 30.
Next line is Q, indicate Q times search. (1<=Q<=50000)
Next Q lines:
Each line just contain a string, indicate the keyword offered by user.
Output
For each search, list the top10 most voted urls, and a blankspace and the vote number.(if less then 10 urls be voted, just list all). If no url get votes, just output “Sorry, no result satisfied." In a single line.
For the answer of difference search, output a blank line between them.(Never leave a redundant blank line before the end of file)
Important hint, huge output, printf is recommended.
Sample Input
10
hustacm http://acm.hust.edu.cn
acmicpc http://acm.hust.edu.cn
acm http://acm.hust.edu.cn
hoj http://acm.hust.edu.cn/thx
vjudge http://acm.hust.edu.cn/vt
forum http://acm.hust.edu.cn/forum
hhoj http://acm.hust.edu.cn/vt
isun http://acm.hust.edu.cn/vt
sempr http://acm.hust.edu.cn/thx
gaewah http://acm.hust.edu.cn/forum
6
acm
m
hoj
zy
j
h
Sample Output
http://acm.hust.edu.cn 3
http://acm.hust.edu.cn 3
http://acm.hust.edu.cn/forum 1
http://acm.hust.edu.cn/thx 1
http://acm.hust.edu.cn/thx 1
http://acm.hust.edu.cn/vt 1
Sorry, no result satisfied.
http://acm.hust.edu.cn/vt 2
http://acm.hust.edu.cn/thx 1
http://acm.hust.edu.cn 1
http://acm.hust.edu.cn/forum 1
http://acm.hust.edu.cn/thx 1
http://acm.hust.edu.cn/vt 1
HINT
Source
HONG Zehua / HUST Monthly 2011.04.09
繁瑣的字符串插入查找,Trie 靈活應(yīng)用,因?yàn)榭臻g問題,用了一級指針,二級指針,鏈表。
預(yù)先開一個字符串buffer,用于存儲 tag 和 url,其它保存 url 的地方只保存相應(yīng)指針。
先根據(jù) url 字典序?qū)⑤斎氲?< tag,url > 排序,利用 Trie 統(tǒng)計(jì)對于同一個 url 的所有tag的所有子串對此 url 的 vote,同一tag的相同子串不要重復(fù)計(jì)數(shù),通過在 Trie 中加入mask實(shí)現(xiàn)。
Trie 中節(jié)點(diǎn)只保存本關(guān)鍵字投票的 Vote 鏈表的指針,若此節(jié)點(diǎn)對某 url 有投票,則 Vote 鏈表非空,且只保存得票最高的前 10 名。
程序用時 1932 MS。
1
#include <stdio.h>
2
#include <string.h>
3
#include <stdlib.h>
4
5
6
#define N 20009
7
#define INF 0x3f3f3f3f
8
9
#define STR_LEN 35
10
#define BUF_TM (N+N)
11
12
char buf[ BUF_TM ][ STR_LEN ];
13
int buf_end;
14
15
#define buf_init() buf_end = 0
16
#define buf_new() buf[ buf_end++ ]
17
18
19
struct __Data
20

{
21
char *tag, *url;
22
};
23
typedef struct __Data Data;
24
25
int cmpData( const void *a, const void *b )
{
26
return strcmp( ((Data*)a)->url, ((Data*)b)->url );
27
}
28
Data data[ N ];
29
30
31
#define VOTE_TM 3200000
32
#define VOTE_LIM 10
33
34
struct __Vote
35

{
36
char *pSz;
37
int vote;
38
struct __Vote *next;
39
};
40
typedef struct __Vote Vote;
41
Vote vote_mem[ VOTE_TM ];
42
int vote_end;
43
44
#define vote_init() vote_end = 0
45
#define vote_bef(a,b) ( ((a)->vote > (b)->vote) || (((a)->vote == (b)->vote)&&(strcmp((a)->pSz,(b)->pSz)<=0)) )
46
47
Vote* vote_new()
{
48
Vote *p = vote_mem + vote_end++;
49
memset( p, 0, sizeof(Vote) );
50
return p;
51
}
52
53
54
#define TRIE_TC 26
55
#define TRIE_TM 2000000
56
57
struct __Trie
58

{
59
int flag, mask, cnt;
60
Vote *pVote;
61
struct __Trie *ch[ TRIE_TC ];
62
};
63
typedef struct __Trie Trie;
64
Trie trie_mem[ TRIE_TM ];
65
int trie_end;
66
67
#define trie_init() trie_end = 0
68
69
Trie* trie_new()
{
70
Trie *p = trie_mem + trie_end++;
71
memset( p, 0, sizeof(Trie) );
72
return p;
73
}
74
75
void trie_addcnt( Trie **pRoot, char *p, int mask )
{
76
for ( ; ; )
{
77
if ( (*pRoot) == NULL )
{
78
(*pRoot) = trie_new();
79
}
80
if ( *p )
{
81
pRoot = &( (*pRoot)->ch[ (*p) - 'a' ] );
82
++p;
83
}
84
else
{
85
if ( (*pRoot)->mask != mask )
{
86
(*pRoot)->mask = mask;
87
++((*pRoot)->cnt);
88
}
89
return;
90
}
91
}
92
}
93
94
int trie_getcnt( Trie *root, char *p )
{
95
for ( ; ; )
{
96
if ( root == NULL )
{
97
return 0;
98
}
99
if ( *p )
{
100
root = root->ch[ (*p) - 'a' ];
101
++p;
102
}
103
else
{
104
return root->cnt;
105
}
106
}
107
return 0;
108
}
109
110
void trie_insert( Trie **pRoot, char *p, char *url, int vote, int mask )
{
111
for ( ; ; )
{
112
if ( (*pRoot) == NULL )
{
113
(*pRoot) = trie_new();
114
}
115
if ( *p )
{
116
pRoot = &( (*pRoot)->ch[ (*p) - 'a' ] );
117
++p;
118
}
119
else
{
120
(*pRoot)->cnt = 0;
121
if ( (*pRoot)->mask != mask )
{
122
Vote *pred, **ppVote, *nv;
123
int cnt;
124
125
(*pRoot)->flag = 1;
126
(*pRoot)->mask = mask;
127
128
ppVote = &((*pRoot)->pVote);
129
if ( (*ppVote) == NULL )
{
130
(*ppVote) = vote_new();
131
(*ppVote)->vote = INF;
132
}
133
134
nv = vote_new();
135
nv->pSz = url;
136
nv->vote = vote;
137
138
pred = (*ppVote);
139
ppVote = &(pred->next);
140
while ( (*ppVote) != NULL )
{
141
if ( vote_bef(pred,nv) && vote_bef(nv,(*ppVote)) )
{
142
if ( strcmp( nv->pSz, (*ppVote)->pSz ) != 0 )
{
143
nv->next = pred->next;
144
pred->next = nv;
145
}
146
break;
147
}
148
pred = (*ppVote);
149
ppVote = &(pred->next);
150
}
151
152
if ( (*ppVote) == NULL )
{
153
(*ppVote) = nv;
154
}
155
156
cnt = 1;
157
for ( nv = (*pRoot)->pVote->next; (nv!=NULL)&&(cnt<VOTE_LIM); nv=nv->next )
{
158
++cnt;
159
}
160
if ( nv != NULL )
{
161
nv->next = NULL;
162
}
163
}
164
return;
165
}
166
}
167
}
168
169
Trie* trie_find( Trie *root, char *p )
{
170
for ( ; ; )
{
171
if ( root == NULL )
{
172
return NULL;
173
}
174
if ( *p )
{
175
root = root->ch[ (*p) - 'a' ];
176
++p;
177
}
178
else
{
179
return root;
180
}
181
}
182
}
183
184
void solve( Trie *root, char *pSz )
{
185
Trie *p = trie_find( root, pSz );
186
Vote *i;
187
if ( (p==NULL) || (!p->flag) )
{
188
printf( "Sorry, no result satisfied.\n" );
189
return;
190
}
191
for ( i = p->pVote->next; i!=NULL; i=i->next )
{
192
printf( "%s %d\n", i->pSz, i->vote );
193
}
194
}
195
196
int main()
{
197
char tmp[ STR_LEN ];
198
int n, mask = 10, i, j, k, x, y, vote;
199
Trie *root = NULL;
200
201
vote_init();
202
trie_init();
203
buf_init();
204
scanf( "%d", &n );
205
for ( i = 0; i < n; ++i )
{
206
data[ i ].tag = buf_new();
207
data[ i ].url = buf_new();
208
scanf( "%s%s", data[ i ].tag, data[ i ].url );
209
}
210
qsort( data, n, sizeof(data[0]), cmpData );
211
212
i = j = 0;
213
do
{
214
while ( (j<n) && (strcmp(data[j].url,data[i].url)==0) )
{
215
++j;
216
}
217
218
for ( k = i; k < j; ++k )
{
219
++mask;
220
for ( x = 0; data[k].tag[x]; ++x )
{
221
for ( y = x; data[k].tag[y]; ++y )
{
222
tmp[ y-x ] = data[k].tag[y];
223
tmp[ y-x+1 ] = 0;
224
trie_addcnt( &root, tmp, mask );
225
}
226
}
227
}
228
229
for ( k = i; k < j; ++k )
{
230
++mask;
231
for ( x = 0; data[k].tag[x]; ++x )
{
232
for ( y = x; data[k].tag[y]; ++y )
{
233
tmp[ y-x ] = data[k].tag[y];
234
tmp[ y-x+1 ] = 0;
235
vote = trie_getcnt( root, tmp );
236
if ( vote > 0 )
{
237
trie_insert( &root, tmp, data[i].url, vote, mask );
238
}
239
}
240
}
241
}
242
243
i = j;
244
} while ( j < n );
245
246
scanf( "%d", &n );
247
while ( n-- > 0 )
{
248
scanf( "%s", tmp );
249
solve( root, tmp );
250
if ( n > 0 )
{
251
printf( "\n" );
252
}
253
}
254
return 0;
255
}
256