Arthur de Jong

Open Source / Free Software developer

summaryrefslogtreecommitdiffstats
path: root/extras/csrf_migration_helper.py
blob: 2a8853494c4b5b6328673cd43615241a429890d0 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
#!/usr/bin/env python

# This script aims to help developers locate forms and view code that needs to
# use the new CSRF protection in Django 1.2.  It tries to find all the code that
# may need the steps described in the CSRF documentation.  It does not modify
# any code directly, it merely attempts to locate it.  Developers should be
# aware of its limitations, described below.
#
# For each template that contains at least one POST form, the following info is printed:
#
# <Absolute path to template>
#   AKA: <Aliases (relative to template directory/directories that contain it)>
#   POST forms: <Number of POST forms>
#   With token: <Number of POST forms with the CSRF token already added>
#   Without token:
#     <File name and line number of form without token>
#
#   Searching for:
#     <Template names that need to be searched for in view code
#      (includes templates that 'include' current template)>
#
#   Found:
#     <File name and line number of any view code found>
#
# The format used allows this script to be used in Emacs grep mode:
#   M-x grep
#   Run grep (like this): /path/to/my/virtualenv/python /path/to/django/src/extras/csrf_migration_helper.py --settings=mysettings /path/to/my/srcs


# Limitations
# ===========
#
# - All templates must be stored on disk in '.html' or '.htm' files.
#   (extensions configurable below)
#
# - All Python code must be stored on disk in '.py' files.  (extensions
#   configurable below)
#
# - All templates must be accessible from TEMPLATE_DIRS or from the 'templates/'
#   directory in apps specified in INSTALLED_APPS.  Non-file based template
#   loaders are out of the picture, because there is no way to ask them to
#   return all templates.
#
# - It's impossible to programmatically determine which forms should and should
#   not have the token added.  The developer must decide when to do this,
#   ensuring that the token is only added to internally targeted forms.
#
# - It's impossible to programmatically work out when a template is used.  The
#   attempts to trace back to view functions are guesses, and could easily fail
#   in the following ways:
#
#   * If the 'include' template tag is used with a variable
#     i.e. {% include tname %} where tname is a variable containing the actual
#     template name, rather than {% include "my_template.html" %}.
#
#   * If the template name has been built up by view code instead of as a simple
#     string.  For example, generic views and the admin both do this.  (These
#     apps are both contrib and both use RequestContext already, as it happens).
#
#   * If the 'ssl' tag (or any template tag other than 'include') is used to
#     include the template in another template.
#
# - All templates belonging to apps referenced in INSTALLED_APPS will be
#   searched, which may include third party apps or Django contrib.  In some
#   cases, this will be a good thing, because even if the templates of these
#   apps have been fixed by someone else, your own view code may reference the
#   same template and may need to be updated.
#
#   You may, however, wish to comment out some entries in INSTALLED_APPS or
#   TEMPLATE_DIRS before running this script.

# Improvements to this script are welcome!

# Configuration
# =============

TEMPLATE_EXTENSIONS = [
    ".html",
    ".htm",
    ]

PYTHON_SOURCE_EXTENSIONS = [
    ".py",
    ]

TEMPLATE_ENCODING = "UTF-8"

PYTHON_ENCODING = "UTF-8"

# Method
# ======

# Find templates:
#  - template dirs
#  - installed apps
#
# Search for POST forms
#  - Work out what the name of the template is, as it would appear in an
#    'include' or get_template() call. This can be done by comparing template
#    filename to all template dirs.  Some templates can have more than one
#    'name' e.g.  if a directory and one of its child directories are both in
#    TEMPLATE_DIRS.  This is actually a common hack used for
#    overriding-and-extending admin templates.
#
# For each POST form,
# - see if it already contains '{% csrf_token %}' immediately after <form>
# - work back to the view function(s):
#   - First, see if the form is included in any other templates, then
#     recursively compile a list of affected templates.
#   - Find any code function that references that template.  This is just a
#     brute force text search that can easily return false positives
#     and fail to find real instances.


import os
import sys
import re
from optparse import OptionParser

USAGE = """
This tool helps to locate forms that need CSRF tokens added and the
corresponding view code.  This processing is NOT fool proof, and you should read
the help contained in the script itself.  Also, this script may need configuring
(by editing the script) before use.

Usage:

python csrf_migration_helper.py [--settings=path.to.your.settings] /path/to/python/code [more paths...]

  Paths can be specified as relative paths.

  With no arguments, this help is printed.
"""

_POST_FORM_RE = \
    re.compile(r'(<form\W[^>]*\bmethod\s*=\s*(\'|"|)POST(\'|"|)\b[^>]*>)', re.IGNORECASE)
_FORM_CLOSE_RE = re.compile(r'</form\s*>')
_TOKEN_RE = re.compile('\{% csrf_token')

def get_template_dirs():
    """
    Returns a set of all directories that contain project templates.
    """
    from django.conf import settings
    dirs = set()
    if ('django.template.loaders.filesystem.load_template_source' in settings.TEMPLATE_LOADERS
        or  'django.template.loaders.filesystem.Loader' in settings.TEMPLATE_LOADERS):
        dirs.update(map(unicode, settings.TEMPLATE_DIRS))

    if ('django.template.loaders.app_directories.load_template_source' in settings.TEMPLATE_LOADERS
        or 'django.template.loaders.app_directories.Loader' in settings.TEMPLATE_LOADERS):
        from django.template.loaders.app_directories import app_template_dirs
        dirs.update(app_template_dirs)
    return dirs

def make_template_info(filename, root_dirs):
    """
    Creates a Template object for a filename, calculating the possible
    relative_filenames from the supplied filename and root template directories
    """
    return Template(filename,
                    [filename[len(d)+1:] for d in root_dirs if filename.startswith(d)])


class Template(object):
    def __init__(self, absolute_filename, relative_filenames):
        self.absolute_filename, self.relative_filenames = absolute_filename, relative_filenames

    def content(self):
        try:
            return self._content
        except AttributeError:
            with open(self.absolute_filename) as fd:
                try:
                    content = fd.read().decode(TEMPLATE_ENCODING)
                except UnicodeDecodeError as e:
                    message = '%s in %s' % (
                        e[4], self.absolute_filename.encode('UTF-8', 'ignore'))
                    raise UnicodeDecodeError(*(e.args[:4] + (message,)))
            self._content = content
            return content
    content = property(content)

    def post_form_info(self):
        """
        Get information about any POST forms in the template.
        Returns [(linenumber, csrf_token added)]
        """
        forms = {}
        form_line = 0
        for ln, line in enumerate(self.content.split("\n")):
            if not form_line and _POST_FORM_RE.search(line):
                # record the form with no CSRF token yet
                form_line = ln + 1
                forms[form_line] = False
            if form_line and _TOKEN_RE.search(line):
                # found the CSRF token
                forms[form_line] = True
                form_line = 0
            if form_line and _FORM_CLOSE_RE.search(line):
                # no token found by form closing tag
                form_line = 0

        return forms.items()

    def includes_template(self, t):
        """
        Returns true if this template includes template 't' (via {% include %})
        """
        for r in t.relative_filenames:
            if re.search(r'\{%\s*include\s+(\'|")' + re.escape(r) + r'(\1)\s*%\}', self.content):
                return True
        return False

    def related_templates(self):
        """
        Returns all templates that include this one, recursively.  (starting
        with this one)
        """
        try:
            return self._related_templates
        except AttributeError:
            pass

        retval = set([self])
        for t in self.all_templates:
            if t.includes_template(self):
                # If two templates mutually include each other, directly or
                # indirectly, we have a problem here...
                retval = retval.union(t.related_templates())

        self._related_templates = retval
        return retval

    def __repr__(self):
        return repr(self.absolute_filename)

    def __eq__(self, other):
        return self.absolute_filename == other.absolute_filename

    def __hash__(self):
        return hash(self.absolute_filename)

def get_templates(dirs):
    """
    Returns all files in dirs that have template extensions, as Template
    objects.
    """
    templates = set()
    for root in dirs:
        for (dirpath, dirnames, filenames) in os.walk(root):
            for f in filenames:
                if len([True for e in TEMPLATE_EXTENSIONS if f.endswith(e)]) > 0:
                    t = make_template_info(os.path.join(dirpath, f), dirs)
                    # templates need to be able to search others:
                    t.all_templates = templates
                    templates.add(t)
    return templates

def get_python_code(paths):
    """
    Returns all Python code, as a list of tuples, each one being:
     (filename, list of lines)
    """
    retval = []
    for p in paths:
        if not os.path.isdir(p):
            raise Exception("'%s' is not a directory." % p)
        for (dirpath, dirnames, filenames) in os.walk(p):
            for f in filenames:
                if len([True for e in PYTHON_SOURCE_EXTENSIONS if f.endswith(e)]) > 0:
                    fn = os.path.join(dirpath, f)
                    with open(fn) as fd:
                        content = [l.decode(PYTHON_ENCODING) for l in fd.readlines()]
                    retval.append((fn, content))
    return retval

def search_python_list(python_code, template_names):
    """
    Searches python code for a list of template names.
    Returns a list of tuples, each one being:
     (filename, line number)
    """
    retval = set()
    for tn in template_names:
        retval.update(search_python(python_code, tn))
    return sorted(retval)

def search_python(python_code, template_name):
    """
    Searches Python code for a template name.
    Returns a list of tuples, each one being:
     (filename, line number)
    """
    retval = []
    for fn, content in python_code:
        for ln, line in enumerate(content):
            if ((u'"%s"' % template_name) in line) or \
               ((u"'%s'" % template_name) in line):
                retval.append((fn, ln + 1))
    return retval

def main(pythonpaths):
    template_dirs = get_template_dirs()
    templates = get_templates(template_dirs)
    python_code = get_python_code(pythonpaths)
    for t in templates:
        # Logic
        form_matches = t.post_form_info()
        num_post_forms = len(form_matches)
        form_lines_without_token = [ln for (ln, has_token) in form_matches if not has_token]
        if num_post_forms == 0:
            continue
        to_search = [rf for rt in t.related_templates() for rf in rt.relative_filenames]
        found = search_python_list(python_code, to_search)

        # Display:
        print(t.absolute_filename)
        for r in t.relative_filenames:
            print("  AKA %s" % r)
        print("  POST forms: %s" % num_post_forms)
        print("  With token: %s" % (num_post_forms - len(form_lines_without_token)))
        if form_lines_without_token:
            print("  Without token:")
            for ln in form_lines_without_token:
                print("%s:%d:" % (t.absolute_filename, ln))
        print('')
        print("  Searching for:")
        for r in to_search:
            print("    " + r)
        print('')
        print("  Found:")
        if len(found) == 0:
            print("    Nothing")
        else:
            for fn, ln in found:
                print("%s:%d:" % (fn, ln))

        print('')
        print("----")


parser = OptionParser(usage=USAGE)
parser.add_option("", "--settings", action="store", dest="settings", help="Dotted path to settings file")

if __name__ == '__main__':
    options, args = parser.parse_args()
    if len(args) == 0:
        parser.print_help()
        sys.exit(1)

    settings = getattr(options, 'settings', None)
    if settings is None:
        if os.environ.get("DJANGO_SETTINGS_MODULE", None) is None:
            print("You need to set DJANGO_SETTINGS_MODULE or use the '--settings' parameter")
            sys.exit(1)
    else:
        os.environ["DJANGO_SETTINGS_MODULE"] = settings

    main(args)