Fix URLField to allow numbers in top level domain

Add a custom regex to URLField that allows numbers to be present in the
top level domain, e.g. https://towerhost.org42

Set by variable allow_numbers_in_top_level_domain in URLField __init__,
and is set to True by default. If set to False, it will use the regex
specified in the built-in django URLValidator class.

This solution was originally implemented in LDAPServerURIField, but is
now implemented in URLField to support this behavior more generally. The
changes in LDAPServerURIField are longer needed and have been removed in
this commit.

Adds unit testing to make sure URLField changes handle regex input
and settings correctly.
This commit is contained in:
Seth Foster
2019-10-28 13:47:01 -04:00
parent 5ab09686c9
commit 7e83ddc968
3 changed files with 51 additions and 26 deletions

View File

@@ -1,11 +1,12 @@
# Python # Python
import os import os
import re
import logging import logging
import urllib.parse as urlparse import urllib.parse as urlparse
from collections import OrderedDict from collections import OrderedDict
# Django # Django
from django.core.validators import URLValidator from django.core.validators import URLValidator, _lazy_re_compile
from django.utils.translation import ugettext_lazy as _ from django.utils.translation import ugettext_lazy as _
# Django REST Framework # Django REST Framework
@@ -118,17 +119,42 @@ class StringListPathField(StringListField):
class URLField(CharField): class URLField(CharField):
# these lines set up a custom regex that allow numbers in the
# top-level domain
tld_re = (
r'\.' # dot
r'(?!-)' # can't start with a dash
r'(?:[a-z' + URLValidator.ul + r'0-9' + '-]{2,63}' # domain label, this line was changed from the original URLValidator
r'|xn--[a-z0-9]{1,59})' # or punycode label
r'(?<!-)' # can't end with a dash
r'\.?' # may have a trailing dot
)
host_re = '(' + URLValidator.hostname_re + URLValidator.domain_re + tld_re + '|localhost)'
regex = _lazy_re_compile(
r'^(?:[a-z0-9\.\-\+]*)://' # scheme is validated separately
r'(?:[^\s:@/]+(?::[^\s:@/]*)?@)?' # user:pass authentication
r'(?:' + URLValidator.ipv4_re + '|' + URLValidator.ipv6_re + '|' + host_re + ')'
r'(?::\d{2,5})?' # port
r'(?:[/?#][^\s]*)?' # resource path
r'\Z', re.IGNORECASE)
def __init__(self, **kwargs): def __init__(self, **kwargs):
schemes = kwargs.pop('schemes', None) schemes = kwargs.pop('schemes', None)
regex = kwargs.pop('regex', None) regex = kwargs.pop('regex', None)
self.allow_plain_hostname = kwargs.pop('allow_plain_hostname', False) self.allow_plain_hostname = kwargs.pop('allow_plain_hostname', False)
self.allow_numbers_in_top_level_domain = kwargs.pop('allow_numbers_in_top_level_domain', True)
super(URLField, self).__init__(**kwargs) super(URLField, self).__init__(**kwargs)
validator_kwargs = dict(message=_('Enter a valid URL')) validator_kwargs = dict(message=_('Enter a valid URL'))
if schemes is not None: if schemes is not None:
validator_kwargs['schemes'] = schemes validator_kwargs['schemes'] = schemes
if regex is not None: if regex is not None:
validator_kwargs['regex'] = regex validator_kwargs['regex'] = regex
if self.allow_numbers_in_top_level_domain and regex is None:
# default behavior is to allow numbers in the top level domain
# if a custom regex isn't provided
validator_kwargs['regex'] = URLField.regex
self.validators.append(URLValidator(**validator_kwargs)) self.validators.append(URLValidator(**validator_kwargs))
def to_representation(self, value): def to_representation(self, value):

View File

@@ -1,7 +1,7 @@
import pytest import pytest
from rest_framework.fields import ValidationError from rest_framework.fields import ValidationError
from awx.conf.fields import StringListBooleanField, StringListPathField, ListTuplesField from awx.conf.fields import StringListBooleanField, StringListPathField, ListTuplesField, URLField
class TestStringListBooleanField(): class TestStringListBooleanField():
@@ -62,7 +62,7 @@ class TestListTuplesField():
FIELD_VALUES = [ FIELD_VALUES = [
([('a', 'b'), ('abc', '123')], [("a", "b"), ("abc", "123")]), ([('a', 'b'), ('abc', '123')], [("a", "b"), ("abc", "123")]),
] ]
FIELD_VALUES_INVALID = [ FIELD_VALUES_INVALID = [
("abc", type("abc")), ("abc", type("abc")),
([('a', 'b', 'c'), ('abc', '123', '456')], type(('a',))), ([('a', 'b', 'c'), ('abc', '123', '456')], type(('a',))),
@@ -130,3 +130,25 @@ class TestStringListPathField():
field.to_internal_value([value]) field.to_internal_value([value])
assert e.value.detail[0] == "{} is not a valid path choice.".format(value) assert e.value.detail[0] == "{} is not a valid path choice.".format(value)
class TestURLField():
regex = "^https://www.example.org$"
@pytest.mark.parametrize("url,schemes,regex, allow_numbers_in_top_level_domain, expect_no_error",[
("ldap://www.example.org42", "ldap", None, True, True),
("https://www.example.org42", "https", None, False, False),
("https://www.example.org", None, regex, None, True),
("https://www.example3.org", None, regex, None, False),
("ftp://www.example.org", "https", None, None, False)
])
def test_urls(self, url, schemes, regex, allow_numbers_in_top_level_domain, expect_no_error):
kwargs = {}
kwargs.setdefault("allow_numbers_in_top_level_domain", allow_numbers_in_top_level_domain)
kwargs.setdefault("schemes", schemes)
kwargs.setdefault("regex", regex)
field = URLField(**kwargs)
if expect_no_error:
field.run_validators(url)
else:
with pytest.raises(ValidationError):
field.run_validators(url)

View File

@@ -11,7 +11,6 @@ import awx
# Django # Django
from django.utils import six from django.utils import six
from django.utils.translation import ugettext_lazy as _ from django.utils.translation import ugettext_lazy as _
from django.core.validators import URLValidator, _lazy_re_compile
# Django Auth LDAP # Django Auth LDAP
import django_auth_ldap.config import django_auth_ldap.config
@@ -234,34 +233,12 @@ class AuthenticationBackendsField(fields.StringListField):
class LDAPServerURIField(fields.URLField): class LDAPServerURIField(fields.URLField):
tld_re = (
r'\.' # dot
r'(?!-)' # can't start with a dash
r'(?:[a-z' + URLValidator.ul + r'0-9' + '-]{2,63}' # domain label, this line was changed from the original URLValidator
r'|xn--[a-z0-9]{1,59})' # or punycode label
r'(?<!-)' # can't end with a dash
r'\.?' # may have a trailing dot
)
host_re = '(' + URLValidator.hostname_re + URLValidator.domain_re + tld_re + '|localhost)'
regex = _lazy_re_compile(
r'^(?:[a-z0-9\.\-\+]*)://' # scheme is validated separately
r'(?:[^\s:@/]+(?::[^\s:@/]*)?@)?' # user:pass authentication
r'(?:' + URLValidator.ipv4_re + '|' + URLValidator.ipv6_re + '|' + host_re + ')'
r'(?::\d{2,5})?' # port
r'(?:[/?#][^\s]*)?' # resource path
r'\Z', re.IGNORECASE)
def __init__(self, **kwargs): def __init__(self, **kwargs):
kwargs.setdefault('schemes', ('ldap', 'ldaps')) kwargs.setdefault('schemes', ('ldap', 'ldaps'))
kwargs.setdefault('allow_plain_hostname', True) kwargs.setdefault('allow_plain_hostname', True)
kwargs.setdefault('regex', LDAPServerURIField.regex)
super(LDAPServerURIField, self).__init__(**kwargs) super(LDAPServerURIField, self).__init__(**kwargs)
def run_validators(self, value): def run_validators(self, value):
for url in filter(None, re.split(r'[, ]', (value or ''))): for url in filter(None, re.split(r'[, ]', (value or ''))):
super(LDAPServerURIField, self).run_validators(url) super(LDAPServerURIField, self).run_validators(url)
return value return value