PythonCdTools

Legacy Wiki Page

This page was migrated from the old MoinMoin-based wiki. Information may be outdated or no longer applicable. For current documentation, see python.org.

Python Wiki

You said you wanted to mirror the Python wiki on the CD, here is a little script to suck the pages from the wiki to a folder:

   1 import socket, os, sys, urllib2
   2 socket.setdefaulttimeout(15)
   3 from time import sleep
   4 
   5 def suckwiki(pagelist, #url to plain text list of wiki pages
   6              rawpage, #url to raw wiki text of a page
   7              foldername="wikifiles", #name of folder to save files to
   8              sleeptime=1 #seconds to sleep between page accesses
   9              ):
  10     foldername = os.path.join(os.path.abspath(os.path.dirname(sys.argv[0])), foldername)
  11     if not os.path.exists(foldername): os.mkdir(foldername)
  12     opener = urllib2.build_opener()
  13     listrequest = urllib2.Request(pagelist)
  14     listresponse = opener.open(listrequest)
  15     sleep(sleeptime)
  16     for pagename in listresponse:
  17         pagename = pagename.strip()
  18         pagename = pagename.replace('_','_5f')
  19         pagename = pagename.replace(' ','_20')
  20         print pagename
  21         fullpagename = rawpage % {'pagename':pagename}
  22         pagerequest = urllib2.Request(fullpagename)
  23         page = opener.open(pagerequest)
  24         f = open(os.path.join(foldername,pagename),"wb")
  25         f.write(page.read())
  26         f.close()
  27         page.close()
  28         sleep(sleeptime)
  29 
  30 if __name__ == '__main__':
  31     pagelist = "http://www.python.org/cgi-bin/moinmoin/TitleIndex?action=titleindex"
  32     rawpage = r"http://www.python.org/cgi-bin/moinmoin/%(pagename)s?action=raw"
  33     foldername = "pythonwiki" #name of folder to save pages to
  34     suckwiki(pagelist,rawpage,foldername)

Thanks! – ThomasWaldmann 2004-06-22 05:23:14