plugins/sitemap.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79

# Copyright (C) 1998,1999  marduk <marduk@python.net>
# Copyright (C) 2002 Mike Meyer <mwm@mired.org>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

"""Your site at-a-glance"""

__version__ = '1.0'
__author__ = 'mwm@mired.org'

import webcheck
from rptlib import *

title = 'Site Map'
level = 0

def explore(link, explored):
    """Recursively do a breadth-first traversal of the graph of links
    on the site.  Returns a list of HTML fragments that can be printed 
    to produce a site map."""

    global level
    if level > webcheck.config.REPORT_SITEMAP_LEVEL: return []
    # XXX I assume an object without a .URL is something
    # uninteresting? --amk
    if not hasattr(link, 'URL'): return []

    level=level+1
    explored[ link.URL ] = 1
    to_explore = []
    L = ['<ul>']

    # We need to do a breadth-first traversal.  This requires two
    # steps for any given page.  First, we need to make a list of
    # links to be traversed; links that have already been explored can 
    # be ignored.
    
    for i in link.children:
        # Skip pages that have already been traversed
        if explored.has_key( i ): continue
	if (i in webcheck.Link.badLinks) and not webcheck.config.ANCHOR_BAD_LINKS:
	    L.append('<li>%s' % i)
	else:
            to_explore.append(i)
        explored[ i ] = 1               # Mark the link as explored

    # Now we loop over the list of links; the traversal will not go to 
    # any pages that are marked as having already been traversed.
    for i in to_explore:
            child = webcheck.Link.linkList[i]
	    L.append('<li>%s' % (make_link(i,get_title(i))))
	    L = L + explore(child, explored)
            
    L.append( '</ul>' )
    level=level-1

    # If no sub-pages were traversed at all, just return an empty list 
    # to avoid redundant <UL>...</UL> pairs
    if len(L) == 2: return []

    return L
    
# site map
def generate():	
    print make_link(webcheck.Link.base,'Starting Page')
    L = explore(webcheck.Link.base, {})
    for i in L: print i