12851 replace bleach with nh3 (#14767)

* 12851 replace bleach with nh3

* Move tags & attributes lists to constants.py

---------

Co-authored-by: Jeremy Stretch <jstretch@netboxlabs.com>
This commit is contained in:
Arthur Hanson 2024-01-11 06:31:32 -08:00 committed by GitHub
parent f8199339f5
commit 8254e707b6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 35 additions and 29 deletions

View File

@ -1,7 +1,3 @@
# HTML sanitizer
# https://github.com/mozilla/bleach/blob/main/CHANGES
bleach
# The Python web framework on which NetBox is built
# https://docs.djangoproject.com/en/stable/releases/
Django<5.1
@ -108,6 +104,10 @@ mkdocstrings[python-legacy]
# https://github.com/netaddr/netaddr/blob/master/CHANGELOG
netaddr
# Python bindings to the ammonia HTML sanitization library.
# https://github.com/messense/nh3
nh3
# Fork of PIL (Python Imaging Library) for image processing
# https://github.com/python-pillow/Pillow/blob/main/CHANGES.rst
Pillow

View File

@ -69,3 +69,27 @@ CSV_DELIMITERS = {
'semicolon': ';',
'tab': '\t',
}
#
# HTML allowed tags & attributes
#
HTML_ALLOWED_TAGS = {
"a", "b", "blockquote", "br", "code", "dd", "del", "div", "dl", "dt", "em", "h1", "h2", "h3", "h4", "h5", "h6",
"hr", "i", "img", "li", "ol", "p", "pre", "strong", "table", "tbody", "td", "th", "thead", "tr", "ul"
}
HTML_ALLOWED_ATTRIBUTES = {
"a": {"href", "title"},
"div": {"class"},
"h1": {"id"},
"h2": {"id"},
"h3": {"id"},
"h4": {"id"},
"h5": {"id"},
"h6": {"id"},
"img": {"alt", "src", "title"},
"td": {"align"},
"th": {"align"},
}

View File

@ -1,11 +1,11 @@
import datetime
import decimal
import json
import nh3
import re
from decimal import Decimal
from itertools import count, groupby
import bleach
from django.contrib.contenttypes.models import ContentType
from django.core import serializers
from django.db.models import Count, ManyToOneRel, OuterRef, Subquery
@ -24,6 +24,7 @@ from netbox.config import get_config
from netbox.plugins import PluginConfig
from urllib.parse import urlencode
from utilities.constants import HTTP_REQUEST_META_SAFE_COPY
from .constants import HTML_ALLOWED_ATTRIBUTES, HTML_ALLOWED_TAGS
def title(value):
@ -511,30 +512,11 @@ def clean_html(html, schemes):
Sanitizes HTML based on a whitelist of allowed tags and attributes.
Also takes a list of allowed URI schemes.
"""
ALLOWED_TAGS = {
"div", "pre", "code", "blockquote", "del",
"hr", "h1", "h2", "h3", "h4", "h5", "h6",
"ul", "ol", "li", "p", "br",
"strong", "em", "a", "b", "i", "img",
"table", "thead", "tbody", "tr", "th", "td",
"dl", "dt", "dd",
}
ALLOWED_ATTRIBUTES = {
"div": ['class'],
"h1": ["id"], "h2": ["id"], "h3": ["id"], "h4": ["id"], "h5": ["id"], "h6": ["id"],
"a": ["href", "title"],
"img": ["src", "title", "alt"],
"th": ["align"],
"td": ["align"],
}
return bleach.clean(
return nh3.clean(
html,
tags=ALLOWED_TAGS,
attributes=ALLOWED_ATTRIBUTES,
protocols=schemes
tags=HTML_ALLOWED_TAGS,
attributes=HTML_ALLOWED_ATTRIBUTES,
url_schemes=set(schemes)
)

View File

@ -1,4 +1,3 @@
bleach==6.1.0
Django==5.0.1
django-cors-headers==4.3.1
django-debug-toolbar==4.2.0
@ -24,6 +23,7 @@ Markdown==3.5.1
mkdocs-material==9.5.3
mkdocstrings[python-legacy]==0.24.0
netaddr==0.9.0
nh3==0.2.15
Pillow==10.1.0
psycopg[binary,pool]==3.1.16
PyYAML==6.0.1