Skip to content
Snippets Groups Projects
Commit eb79110b authored by Mark Haines's avatar Mark Haines
Browse files

Clean up the blacklist/whitelist handling.

Always set the config key with an empty list, even if a list isn't specified.
This means that the codepaths are the same for both the empty list and
for a missing key. Since the behaviour is the same for both cases this
makes the code somewhat easier to reason about.
parent dd95eb4c
No related branches found
No related tags found
No related merge requests found
......@@ -100,13 +100,13 @@ class ContentRepositoryConfig(Config):
"to work"
)
if "url_preview_ip_range_whitelist" in config:
self.url_preview_ip_range_whitelist = IPSet(
config["url_preview_ip_range_whitelist"]
)
self.url_preview_ip_range_whitelist = IPSet(
config.get("url_preview_ip_range_whitelist", ())
)
if "url_preview_url_blacklist" in config:
self.url_preview_url_blacklist = config["url_preview_url_blacklist"]
self.url_preview_url_blacklist = config.get(
"url_preview_url_blacklist", ()
)
def default_config(self, **kwargs):
media_store = self.default_path("media_store")
......
......@@ -380,8 +380,7 @@ class CaptchaServerHttpClient(SimpleHttpClient):
class SpiderEndpointFactory(object):
def __init__(self, hs):
self.blacklist = hs.config.url_preview_ip_range_blacklist
if hasattr(hs.config, "url_preview_ip_range_whitelist"):
self.whitelist = hs.config.url_preview_ip_range_whitelist
self.whitelist = hs.config.url_preview_ip_range_whitelist
self.policyForHTTPS = hs.get_http_client_context_factory()
def endpointForURI(self, uri):
......
......@@ -56,8 +56,7 @@ class PreviewUrlResource(Resource):
self.client = SpiderHttpClient(hs)
self.media_repo = media_repo
if hasattr(hs.config, "url_preview_url_blacklist"):
self.url_preview_url_blacklist = hs.config.url_preview_url_blacklist
self.url_preview_url_blacklist = hs.config.url_preview_url_blacklist
# simple memory cache mapping urls to OG metadata
self.cache = ExpiringCache(
......@@ -86,39 +85,37 @@ class PreviewUrlResource(Resource):
else:
ts = self.clock.time_msec()
# impose the URL pattern blacklist
if hasattr(self, "url_preview_url_blacklist"):
url_tuple = urlparse.urlsplit(url)
for entry in self.url_preview_url_blacklist:
match = True
for attrib in entry:
pattern = entry[attrib]
value = getattr(url_tuple, attrib)
logger.debug((
"Matching attrib '%s' with value '%s' against"
" pattern '%s'"
) % (attrib, value, pattern))
if value is None:
url_tuple = urlparse.urlsplit(url)
for entry in self.url_preview_url_blacklist:
match = True
for attrib in entry:
pattern = entry[attrib]
value = getattr(url_tuple, attrib)
logger.debug((
"Matching attrib '%s' with value '%s' against"
" pattern '%s'"
) % (attrib, value, pattern))
if value is None:
match = False
continue
if pattern.startswith('^'):
if not re.match(pattern, getattr(url_tuple, attrib)):
match = False
continue
if pattern.startswith('^'):
if not re.match(pattern, getattr(url_tuple, attrib)):
match = False
continue
else:
if not fnmatch.fnmatch(getattr(url_tuple, attrib), pattern):
match = False
continue
if match:
logger.warn(
"URL %s blocked by url_blacklist entry %s", url, entry
)
raise SynapseError(
403, "URL blocked by url pattern blacklist entry",
Codes.UNKNOWN
)
else:
if not fnmatch.fnmatch(getattr(url_tuple, attrib), pattern):
match = False
continue
if match:
logger.warn(
"URL %s blocked by url_blacklist entry %s", url, entry
)
raise SynapseError(
403, "URL blocked by url pattern blacklist entry",
Codes.UNKNOWN
)
# first check the memory cache - good to handle all the clients on this
# HS thundering away to preview the same URL at the same time.
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment