Changeset - 94c56bb468cb
[Not reviewed]
0 2 1
Ben Sturmfels (bsturmfels) - 1 month ago 2024-03-13 02:16:29
ben@sturm.com.au
Rewrite the `index` view to avoid risk of path traversal

I've simplified this view by removing the custom HTTP error handlers, Python 3.5
exception handling and adding documentation.
3 files changed with 70 insertions and 46 deletions:
0 comments (0 inline, 0 general)
conservancy/tests.py
Show inline comments
 
new file 100644
 
import datetime
 

	
 
from django.http import Http404
 
from django.test import RequestFactory, TestCase
 

	
 
from . import views
 
from conservancy.fundgoal.models import FundraisingGoal
 

	
 

	
 
class ContentTest(TestCase):
 
    def setUp(self):
 
        self.factory = RequestFactory()
 
        FundraisingGoal.objects.create(
 
            fundraiser_code_name='cy2023-end-year-match',
 
            fundraiser_goal_amount=0,
 
            fundraiser_so_far_amount=0,
 
            fundraiser_donation_count=0,
 
            fundraiser_donation_count_disclose_threshold=0,
 
            fundraiser_endtime=datetime.datetime(2000, 1, 1)
 
        )
 

	
 
    def test_about_page_served(self):
 
        request = self.factory.get('/about/')
 
        with self.assertTemplateUsed('about/index.html'):
 
            response = views.index(request).render()
 
        self.assertContains(response, 'Conservancy is a nonprofit organization')
 

	
 
    def test_annual_report_file_served(self):
 
        request = self.factory.get('/docs/conservancy_annual-report_fy-2011.pdf')
 
        response = views.index(request)
 
        self.assertEqual(response.headers['Content-Type'], 'application/pdf')
 

	
 
    def test_path_traversal_404s(self):
 
        request = self.factory.get('/about/../../settings.py')
 
        with self.assertRaises(Http404):
 
            views.index(request)
conservancy/urls.py
Show inline comments
...
 
@@ -40,8 +40,6 @@ urlpatterns = [
 
    url(r'^news/', include('conservancy.news.urls')),
 
    url(r'^blog/', include('conservancy.blog.urls')),
 
    # formerly static templated things... (dirs with templates)
 
    url(r'^error/(40[134]|500)(?:/index\.html|/|)$', static_views.handler),
 
    url(r'^error', static_views.index),
 
    url(r'^about', static_views.index),
 
    url(r'^activities', static_views.index),
 
    url(r'^donate', static_views.index),
conservancy/views.py
Show inline comments
 
import mimetypes
 
import os.path
 

	
 
from django.http import HttpResponse
 
from django.conf import settings
 
from django.http import Http404
 
from django.http import FileResponse
 
from django.template.response import TemplateResponse
 

	
 
from .local_context_processors import fundgoal_lookup
 

	
 
# This component doesn't actually have anything to do with Django's
 
# "staticfiles" app. It's only that its templates happen to be located in the
 
# STATIC_ROOT directory. They probably shouldn't be there.
 
STATIC_ROOT = os.path.abspath(os.path.dirname(__file__))
 
FILESYSTEM_ENCODING = 'utf-8'
 

	
 
def handler(request, errorcode):
 
    path = os.path.join('error', str(errorcode), 'index.html')
 
    fullpath = os.path.join(STATIC_ROOT, path)
 
    if not os.path.exists(fullpath):
 
        return HttpResponse("Internal error: " + path, status=int(errorcode))
 
    else:
 
        return TemplateResponse(request, path, status=int(errorcode))
 

	
 
def handler401(request):
 
    return handler(request, 401)
 

	
 
def handler403(request):
 
    return handler(request, 403)
 

	
 
def handler404(request):
 
    return handler(request, 404)
 

	
 
def handler500(request):
 
    return handler(request, 500)
 

	
 
def index(request, *args, **kwargs):
 
    """Faux CMS: bulk website content stored in templates and document files.
 

	
 
    Rationale: Many websites have a CMS and store the majority of their website
 
    content in a relational database eg. WordPress or Wagtail. That's useful
 
    because various people can easily be given access to edit the website. The
 
    downside is that is application complexity - the management of who change
 
    what, when it changed and what changed becomes an application concern. At
 
    the other end of the spectrum, we have files that are checked into a Git
 
    repository - we get the precise who/what/when out of the box with Git, but
 
    require you to have some technical knowledge and appropriate access to
 
    commit. Since you're committing to a code repository, this also opens up the
 
    possibility to break things you couldn't break via a CMS.
 

	
 
    This view serves most of the textual pages and documents on
 
    sfconservancy.org. It works a little like Apache serving mixed PHP/static
 
    files - it looks at the URL and tries to find a matching file on the
 
    filesystem. If it finds a template, it renders it via Django's template
 
    infrastructure. If it finds a file but it's not a template, it will serve
 
    the file as-is.
 
    """
 
    # The name "static" has no connection to Django staticfiles.
 
    base_path = settings.BASE_DIR / 'static'
 
    path = request.path.lstrip('/')
 
    if path.endswith('/'):
 
        path += 'index.html'
 
    fullpath = os.path.join(STATIC_ROOT, 'static', path)
 
    try:
 
        # Junk URLs in production (Python 3.5) are causing UnicodeEncodeErrors
 
        # here. Can't reproduce in development in Python 3.9 - only Python 2.7.
 
        if not os.path.exists(fullpath):
 
            return handler404(request)
 
    except UnicodeEncodeError:
 
        return handler404(request)
 
    content_type, _ = mimetypes.guess_type(path)
 
    if content_type != 'text/html':
 
        return HttpResponse(open(fullpath, 'rb'), content_type)
 
    full_path = (base_path / path).resolve()
 
    safe_from_path_traversal = full_path.is_relative_to(base_path)
 
    if not full_path.exists() or not safe_from_path_traversal:
 
        raise Http404()
 
    is_template = mimetypes.guess_type(full_path)[0] == 'text/html'
 
    if not is_template:
 
        return FileResponse(open(full_path, 'rb'))
 
    else:
 
        context = kwargs.copy()
 
        try:
 
            context['fundgoal'] = fundgoal_lookup(kwargs['fundraiser_sought'])
 
        except KeyError:
 
            pass
 
        # Maybe this should open() the template file directly so that these
 
        # don't have to be included in the global template TEMPLATES.DIRS?
 
        return TemplateResponse(request, path, context)
 

	
 
def debug(request):
 
    path = request.get_full_path()
 
    path = path.lstrip('/')
 
    return HttpResponse("Hello, static world: " + path)
0 comments (0 inline, 0 general)