schemes/file.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60


# file.py handle urls with a file scheme
#
# Copyright (C) 1998, 1999 Albert Hopkins (marduk) <marduk@python.net>
# Copyright (C) 2002 Mike Meyer <mwm@mired.org>
# Copyright (C) 2005 Arthur de Jong <arthur@tiefighter.et.tudelft.nl>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA

"""This module defines the functions needed for creating Link objects for urls
using the file scheme."""

import urlparse
import urllib
import os
import time
import mimetypes
import re

# FIXME: store this extension somewhere else
mimetypes.add_type('text/html','.shtml')

def get_info(link):
    """Retreive some basic information about the file.
    Store the results in the link object."""
    (scheme, netloc, path, query, fragment) = urlparse.urlsplit(link.url)
    path=urllib.url2pathname(path)
    try:
        stats = os.stat(path)
    except os.error, e:
        link.add_problem(str(e))
        return
    link.size = stats[6]
    link.mtime = stats[8]
    # guess mimetype, falling back to application/octet-stream
    link.type = mimetypes.guess_type(link.url)[0]
    if link.type is None:
        link.type = 'application/octet-stream'

def get_document(link):
    """Return the contents of the document pointed to by the link."""
    (scheme, netloc, path, query, fragment) = urlparse.urlsplit(link.url)
    path=urllib.url2pathname(path)
    return open(path,'r').read()

def fetch(link):
    get_info(link)
    return get_document(link)