|
1 #! /usr/bin/env python |
|
2 ####################################################################### |
|
3 # Newslist $Revision: 66429 $ |
|
4 # |
|
5 # Syntax: |
|
6 # newslist [ -a ] |
|
7 # |
|
8 # This is a program to create a directory full of HTML pages |
|
9 # which between them contain links to all the newsgroups available |
|
10 # on your server. |
|
11 # |
|
12 # The -a option causes a complete list of all groups to be read from |
|
13 # the server rather than just the ones which have appeared since last |
|
14 # execution. This recreates the local list from scratch. Use this on |
|
15 # the first invocation of the program, and from time to time thereafter. |
|
16 # When new groups are first created they may appear on your server as |
|
17 # empty groups. By default, empty groups are ignored by the -a option. |
|
18 # However, these new groups will not be created again, and so will not |
|
19 # appear in the server's list of 'new groups' at a later date. Hence it |
|
20 # won't appear until you do a '-a' after some articles have appeared. |
|
21 # |
|
22 # I should really keep a list of ignored empty groups and re-check them |
|
23 # for articles on every run, but I haven't got around to it yet. |
|
24 # |
|
25 # This assumes an NNTP news feed. |
|
26 # |
|
27 # Feel free to copy, distribute and modify this code for |
|
28 # non-commercial use. If you make any useful modifications, let me |
|
29 # know! |
|
30 # |
|
31 # (c) Quentin Stafford-Fraser 1994 |
|
32 # fraser@europarc.xerox.com qs101@cl.cam.ac.uk |
|
33 # # |
|
34 ####################################################################### |
|
35 import sys,nntplib, string, marshal, time, os, posix, string |
|
36 |
|
37 ####################################################################### |
|
38 # Check these variables before running! # |
|
39 |
|
40 # Top directory. |
|
41 # Filenames which don't start with / are taken as being relative to this. |
|
42 topdir='/anfs/qsbigdisc/web/html/newspage' |
|
43 |
|
44 # The name of your NNTP host |
|
45 # eg. |
|
46 # newshost = 'nntp-serv.cl.cam.ac.uk' |
|
47 # or use following to get the name from the NNTPSERVER environment |
|
48 # variable: |
|
49 # newshost = posix.environ['NNTPSERVER'] |
|
50 newshost = 'nntp-serv.cl.cam.ac.uk' |
|
51 |
|
52 # The filename for a local cache of the newsgroup list |
|
53 treefile = 'grouptree' |
|
54 |
|
55 # The filename for descriptions of newsgroups |
|
56 # I found a suitable one at ftp.uu.net in /uunet-info/newgroups.gz |
|
57 # You can set this to '' if you don't wish to use one. |
|
58 descfile = 'newsgroups' |
|
59 |
|
60 # The directory in which HTML pages should be created |
|
61 # eg. |
|
62 # pagedir = '/usr/local/lib/html/newspage' |
|
63 # pagedir = 'pages' |
|
64 pagedir = topdir |
|
65 |
|
66 # The html prefix which will refer to this directory |
|
67 # eg. |
|
68 # httppref = '/newspage/', |
|
69 # or leave blank for relative links between pages: (Recommended) |
|
70 # httppref = '' |
|
71 httppref = '' |
|
72 |
|
73 # The name of the 'root' news page in this directory. |
|
74 # A .html suffix will be added. |
|
75 rootpage = 'root' |
|
76 |
|
77 # Set skipempty to 0 if you wish to see links to empty groups as well. |
|
78 # Only affects the -a option. |
|
79 skipempty = 1 |
|
80 |
|
81 # pagelinkicon can contain html to put an icon after links to |
|
82 # further pages. This helps to make important links stand out. |
|
83 # Set to '' if not wanted, or '...' is quite a good one. |
|
84 pagelinkicon='... <img src="http://pelican.cl.cam.ac.uk/icons/page.xbm"> ' |
|
85 |
|
86 # --------------------------------------------------------------------- |
|
87 # Less important personal preferences: |
|
88 |
|
89 # Sublistsize controls the maximum number of items the will appear as |
|
90 # an indented sub-list before the whole thing is moved onto a different |
|
91 # page. The smaller this is, the more pages you will have, but the |
|
92 # shorter each will be. |
|
93 sublistsize = 4 |
|
94 |
|
95 # That should be all. # |
|
96 ####################################################################### |
|
97 |
|
98 for dir in os.curdir, os.environ['HOME']: |
|
99 rcfile = os.path.join(dir, '.newslistrc.py') |
|
100 if os.path.exists(rcfile): |
|
101 print rcfile |
|
102 execfile(rcfile) |
|
103 break |
|
104 |
|
105 from nntplib import NNTP |
|
106 from stat import * |
|
107 |
|
108 rcsrev = '$Revision: 66429 $' |
|
109 rcsrev = string.join(filter(lambda s: '$' not in s, string.split(rcsrev))) |
|
110 desc = {} |
|
111 |
|
112 # Make (possibly) relative filenames into absolute ones |
|
113 treefile = os.path.join(topdir,treefile) |
|
114 descfile = os.path.join(topdir,descfile) |
|
115 page = os.path.join(topdir,pagedir) |
|
116 |
|
117 # First the bits for creating trees --------------------------- |
|
118 |
|
119 # Addtotree creates/augments a tree from a list of group names |
|
120 def addtotree(tree, groups): |
|
121 print 'Updating tree...' |
|
122 for i in groups: |
|
123 parts = string.splitfields(i,'.') |
|
124 makeleaf(tree, parts) |
|
125 |
|
126 # Makeleaf makes a leaf and the branch leading to it if necessary |
|
127 def makeleaf(tree,path): |
|
128 j = path[0] |
|
129 l = len(path) |
|
130 |
|
131 if not tree.has_key(j): |
|
132 tree[j] = {} |
|
133 if l == 1: |
|
134 tree[j]['.'] = '.' |
|
135 if l > 1: |
|
136 makeleaf(tree[j],path[1:]) |
|
137 |
|
138 # Then the bits for outputting trees as pages ---------------- |
|
139 |
|
140 # Createpage creates an HTML file named <root>.html containing links |
|
141 # to those groups beginning with <root>. |
|
142 |
|
143 def createpage(root, tree, p): |
|
144 filename = os.path.join(pagedir,root+'.html') |
|
145 if root == rootpage: |
|
146 detail = '' |
|
147 else: |
|
148 detail = ' under ' + root |
|
149 f = open(filename,'w') |
|
150 # f.write('Content-Type: text/html\n') |
|
151 f.write('<TITLE>Newsgroups available' + detail + '</TITLE>\n') |
|
152 f.write('<H1>Newsgroups available' + detail +'</H1>\n') |
|
153 f.write('<A HREF="'+httppref+rootpage+'.html">Back to top level</A><P>\n') |
|
154 printtree(f,tree,0,p) |
|
155 f.write('<I>This page automatically created by \'newslist\' v. '+rcsrev+'.') |
|
156 f.write(time.ctime(time.time()) + '</I><P>') |
|
157 f.close() |
|
158 |
|
159 # Printtree prints the groups as a bulleted list. Groups with |
|
160 # more than <sublistsize> subgroups will be put on a separate page. |
|
161 # Other sets of subgroups are just indented. |
|
162 |
|
163 def printtree(f, tree, indent, p): |
|
164 global desc |
|
165 l = len(tree) |
|
166 |
|
167 if l > sublistsize and indent>0: |
|
168 # Create a new page and a link to it |
|
169 f.write('<LI><B><A HREF="'+httppref+p[1:]+'.html">') |
|
170 f.write(p[1:]+'.*') |
|
171 f.write('</A></B>'+pagelinkicon+'\n') |
|
172 createpage(p[1:], tree, p) |
|
173 return |
|
174 |
|
175 kl = tree.keys() |
|
176 |
|
177 if l > 1: |
|
178 kl.sort() |
|
179 if indent > 0: |
|
180 # Create a sub-list |
|
181 f.write('<LI>'+p[1:]+'\n<UL>') |
|
182 else: |
|
183 # Create a main list |
|
184 f.write('<UL>') |
|
185 indent = indent + 1 |
|
186 |
|
187 for i in kl: |
|
188 if i == '.': |
|
189 # Output a newsgroup |
|
190 f.write('<LI><A HREF="news:' + p[1:] + '">'+ p[1:] + '</A> ') |
|
191 if desc.has_key(p[1:]): |
|
192 f.write(' <I>'+desc[p[1:]]+'</I>\n') |
|
193 else: |
|
194 f.write('\n') |
|
195 else: |
|
196 # Output a hierarchy |
|
197 printtree(f,tree[i], indent, p+'.'+i) |
|
198 |
|
199 if l > 1: |
|
200 f.write('\n</UL>') |
|
201 |
|
202 # Reading descriptions file --------------------------------------- |
|
203 |
|
204 # This returns an array mapping group name to its description |
|
205 |
|
206 def readdesc(descfile): |
|
207 global desc |
|
208 |
|
209 desc = {} |
|
210 |
|
211 if descfile == '': |
|
212 return |
|
213 |
|
214 try: |
|
215 d = open(descfile, 'r') |
|
216 print 'Reading descriptions...' |
|
217 except (IOError): |
|
218 print 'Failed to open description file ' + descfile |
|
219 return |
|
220 l = d.readline() |
|
221 while l != '': |
|
222 bits = string.split(l) |
|
223 try: |
|
224 grp = bits[0] |
|
225 dsc = string.join(bits[1:]) |
|
226 if len(dsc)>1: |
|
227 desc[grp] = dsc |
|
228 except (IndexError): |
|
229 pass |
|
230 l = d.readline() |
|
231 |
|
232 # Check that ouput directory exists, ------------------------------ |
|
233 # and offer to create it if not |
|
234 |
|
235 def checkopdir(pagedir): |
|
236 if not os.path.isdir(pagedir): |
|
237 print 'Directory '+pagedir+' does not exist.' |
|
238 print 'Shall I create it for you? (y/n)' |
|
239 if sys.stdin.readline()[0] == 'y': |
|
240 try: |
|
241 os.mkdir(pagedir,0777) |
|
242 except: |
|
243 print 'Sorry - failed!' |
|
244 sys.exit(1) |
|
245 else: |
|
246 print 'OK. Exiting.' |
|
247 sys.exit(1) |
|
248 |
|
249 # Read and write current local tree ---------------------------------- |
|
250 |
|
251 def readlocallist(treefile): |
|
252 print 'Reading current local group list...' |
|
253 tree = {} |
|
254 try: |
|
255 treetime = time.localtime(os.stat(treefile)[ST_MTIME]) |
|
256 except: |
|
257 print '\n*** Failed to open local group cache '+treefile |
|
258 print 'If this is the first time you have run newslist, then' |
|
259 print 'use the -a option to create it.' |
|
260 sys.exit(1) |
|
261 treedate = '%02d%02d%02d' % (treetime[0] % 100 ,treetime[1], treetime[2]) |
|
262 try: |
|
263 dump = open(treefile,'r') |
|
264 tree = marshal.load(dump) |
|
265 dump.close() |
|
266 except (IOError): |
|
267 print 'Cannot open local group list ' + treefile |
|
268 return (tree, treedate) |
|
269 |
|
270 def writelocallist(treefile, tree): |
|
271 try: |
|
272 dump = open(treefile,'w') |
|
273 groups = marshal.dump(tree,dump) |
|
274 dump.close() |
|
275 print 'Saved list to '+treefile+'\n' |
|
276 except: |
|
277 print 'Sorry - failed to write to local group cache '+treefile |
|
278 print 'Does it (or its directory) have the correct permissions?' |
|
279 sys.exit(1) |
|
280 |
|
281 # Return list of all groups on server ----------------------------- |
|
282 |
|
283 def getallgroups(server): |
|
284 print 'Getting list of all groups...' |
|
285 treedate='010101' |
|
286 info = server.list()[1] |
|
287 groups = [] |
|
288 print 'Processing...' |
|
289 if skipempty: |
|
290 print '\nIgnoring following empty groups:' |
|
291 for i in info: |
|
292 grpname = string.split(i[0])[0] |
|
293 if skipempty and string.atoi(i[1]) < string.atoi(i[2]): |
|
294 print grpname+' ', |
|
295 else: |
|
296 groups.append(grpname) |
|
297 print '\n' |
|
298 if skipempty: |
|
299 print '(End of empty groups)' |
|
300 return groups |
|
301 |
|
302 # Return list of new groups on server ----------------------------- |
|
303 |
|
304 def getnewgroups(server, treedate): |
|
305 print 'Getting list of new groups since start of '+treedate+'...', |
|
306 info = server.newgroups(treedate,'000001')[1] |
|
307 print 'got %d.' % len(info) |
|
308 print 'Processing...', |
|
309 groups = [] |
|
310 for i in info: |
|
311 grpname = string.split(i)[0] |
|
312 groups.append(grpname) |
|
313 print 'Done' |
|
314 return groups |
|
315 |
|
316 # Now the main program -------------------------------------------- |
|
317 |
|
318 def main(): |
|
319 global desc |
|
320 |
|
321 tree={} |
|
322 |
|
323 # Check that the output directory exists |
|
324 checkopdir(pagedir) |
|
325 |
|
326 try: |
|
327 print 'Connecting to '+newshost+'...' |
|
328 if sys.version[0] == '0': |
|
329 s = NNTP.init(newshost) |
|
330 else: |
|
331 s = NNTP(newshost) |
|
332 connected = 1 |
|
333 except (nntplib.error_temp, nntplib.error_perm), x: |
|
334 print 'Error connecting to host:', x |
|
335 print 'I\'ll try to use just the local list.' |
|
336 connected = 0 |
|
337 |
|
338 # If -a is specified, read the full list of groups from server |
|
339 if connected and len(sys.argv) > 1 and sys.argv[1] == '-a': |
|
340 |
|
341 groups = getallgroups(s) |
|
342 |
|
343 # Otherwise just read the local file and then add |
|
344 # groups created since local file last modified. |
|
345 else: |
|
346 |
|
347 (tree, treedate) = readlocallist(treefile) |
|
348 if connected: |
|
349 groups = getnewgroups(s, treedate) |
|
350 |
|
351 if connected: |
|
352 addtotree(tree, groups) |
|
353 writelocallist(treefile,tree) |
|
354 |
|
355 # Read group descriptions |
|
356 readdesc(descfile) |
|
357 |
|
358 print 'Creating pages...' |
|
359 createpage(rootpage, tree, '') |
|
360 print 'Done' |
|
361 |
|
362 if __name__ == "__main__": |
|
363 main() |
|
364 |
|
365 # That's all folks |
|
366 ###################################################################### |