Fix URLField to allow numbers in top level domain

Add a custom regex to URLField that allows numbers to be present in the
top level domain, e.g. https://towerhost.org42

Set by variable allow_numbers_in_top_level_domain in URLField __init__,
and is set to True by default. If set to False, it will use the regex
specified in the built-in django URLValidator class.

This solution was originally implemented in LDAPServerURIField, but is
now implemented in URLField to support this behavior more generally. The
changes in LDAPServerURIField are longer needed and have been removed in
this commit.

Adds unit testing to make sure URLField changes handle regex input
and settings correctly.
This commit is contained in:
Seth Foster 2019-10-28 13:47:01 -04:00
parent 5ab09686c9
commit 7e83ddc968
No known key found for this signature in database
GPG Key ID: DECBFA46ED79BAEA
3 changed files with 51 additions and 26 deletions

View File

@ -1,11 +1,12 @@
# Python
import os
import re
import logging
import urllib.parse as urlparse
from collections import OrderedDict
# Django
from django.core.validators import URLValidator
from django.core.validators import URLValidator, _lazy_re_compile
from django.utils.translation import ugettext_lazy as _
# Django REST Framework
@ -118,17 +119,42 @@ class StringListPathField(StringListField):
class URLField(CharField):
# these lines set up a custom regex that allow numbers in the
# top-level domain
tld_re = (
r'\.' # dot
r'(?!-)' # can't start with a dash
r'(?:[a-z' + URLValidator.ul + r'0-9' + '-]{2,63}' # domain label, this line was changed from the original URLValidator
r'|xn--[a-z0-9]{1,59})' # or punycode label
r'(?<!-)' # can't end with a dash
r'\.?' # may have a trailing dot
)
host_re = '(' + URLValidator.hostname_re + URLValidator.domain_re + tld_re + '|localhost)'
regex = _lazy_re_compile(
r'^(?:[a-z0-9\.\-\+]*)://' # scheme is validated separately
r'(?:[^\s:@/]+(?::[^\s:@/]*)?@)?' # user:pass authentication
r'(?:' + URLValidator.ipv4_re + '|' + URLValidator.ipv6_re + '|' + host_re + ')'
r'(?::\d{2,5})?' # port
r'(?:[/?#][^\s]*)?' # resource path
r'\Z', re.IGNORECASE)
def __init__(self, **kwargs):
schemes = kwargs.pop('schemes', None)
regex = kwargs.pop('regex', None)
self.allow_plain_hostname = kwargs.pop('allow_plain_hostname', False)
self.allow_numbers_in_top_level_domain = kwargs.pop('allow_numbers_in_top_level_domain', True)
super(URLField, self).__init__(**kwargs)
validator_kwargs = dict(message=_('Enter a valid URL'))
if schemes is not None:
validator_kwargs['schemes'] = schemes
if regex is not None:
validator_kwargs['regex'] = regex
if self.allow_numbers_in_top_level_domain and regex is None:
# default behavior is to allow numbers in the top level domain
# if a custom regex isn't provided
validator_kwargs['regex'] = URLField.regex
self.validators.append(URLValidator(**validator_kwargs))
def to_representation(self, value):

View File

@ -1,7 +1,7 @@
import pytest
from rest_framework.fields import ValidationError
from awx.conf.fields import StringListBooleanField, StringListPathField, ListTuplesField
from awx.conf.fields import StringListBooleanField, StringListPathField, ListTuplesField, URLField
class TestStringListBooleanField():
@ -62,7 +62,7 @@ class TestListTuplesField():
FIELD_VALUES = [
([('a', 'b'), ('abc', '123')], [("a", "b"), ("abc", "123")]),
]
FIELD_VALUES_INVALID = [
("abc", type("abc")),
([('a', 'b', 'c'), ('abc', '123', '456')], type(('a',))),
@ -130,3 +130,25 @@ class TestStringListPathField():
field.to_internal_value([value])
assert e.value.detail[0] == "{} is not a valid path choice.".format(value)
class TestURLField():
regex = "^https://www.example.org$"
@pytest.mark.parametrize("url,schemes,regex, allow_numbers_in_top_level_domain, expect_no_error",[
("ldap://www.example.org42", "ldap", None, True, True),
("https://www.example.org42", "https", None, False, False),
("https://www.example.org", None, regex, None, True),
("https://www.example3.org", None, regex, None, False),
("ftp://www.example.org", "https", None, None, False)
])
def test_urls(self, url, schemes, regex, allow_numbers_in_top_level_domain, expect_no_error):
kwargs = {}
kwargs.setdefault("allow_numbers_in_top_level_domain", allow_numbers_in_top_level_domain)
kwargs.setdefault("schemes", schemes)
kwargs.setdefault("regex", regex)
field = URLField(**kwargs)
if expect_no_error:
field.run_validators(url)
else:
with pytest.raises(ValidationError):
field.run_validators(url)

View File

@ -11,7 +11,6 @@ import awx
# Django
from django.utils import six
from django.utils.translation import ugettext_lazy as _
from django.core.validators import URLValidator, _lazy_re_compile
# Django Auth LDAP
import django_auth_ldap.config
@ -234,34 +233,12 @@ class AuthenticationBackendsField(fields.StringListField):
class LDAPServerURIField(fields.URLField):
tld_re = (
r'\.' # dot
r'(?!-)' # can't start with a dash
r'(?:[a-z' + URLValidator.ul + r'0-9' + '-]{2,63}' # domain label, this line was changed from the original URLValidator
r'|xn--[a-z0-9]{1,59})' # or punycode label
r'(?<!-)' # can't end with a dash
r'\.?' # may have a trailing dot
)
host_re = '(' + URLValidator.hostname_re + URLValidator.domain_re + tld_re + '|localhost)'
regex = _lazy_re_compile(
r'^(?:[a-z0-9\.\-\+]*)://' # scheme is validated separately
r'(?:[^\s:@/]+(?::[^\s:@/]*)?@)?' # user:pass authentication
r'(?:' + URLValidator.ipv4_re + '|' + URLValidator.ipv6_re + '|' + host_re + ')'
r'(?::\d{2,5})?' # port
r'(?:[/?#][^\s]*)?' # resource path
r'\Z', re.IGNORECASE)
def __init__(self, **kwargs):
kwargs.setdefault('schemes', ('ldap', 'ldaps'))
kwargs.setdefault('allow_plain_hostname', True)
kwargs.setdefault('regex', LDAPServerURIField.regex)
super(LDAPServerURIField, self).__init__(**kwargs)
def run_validators(self, value):
for url in filter(None, re.split(r'[, ]', (value or ''))):
super(LDAPServerURIField, self).run_validators(url)
return value