Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
Matrix
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Code
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Container Registry
Model registry
Operate
Environments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
TeDomum
Matrix
Commits
c741ba59
Unverified
Commit
c741ba59
authored
7 years ago
by
Richard van der Hoff
Committed by
GitHub
7 years ago
Browse files
Options
Downloads
Plain Diff
Merge pull request #2669 from matrix-org/rav/cache_urlpreview_failure
Cache failures in url_preview handler
parents
781c15a6
5d15abb1
No related branches found
Branches containing commit
No related tags found
Tags containing commit
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
synapse/rest/media/v1/preview_url_resource.py
+47
-41
47 additions, 41 deletions
synapse/rest/media/v1/preview_url_resource.py
with
47 additions
and
41 deletions
synapse/rest/media/v1/preview_url_resource.py
+
47
−
41
View file @
c741ba59
...
...
@@ -20,6 +20,7 @@ from twisted.web.resource import Resource
from
synapse.api.errors
import
(
SynapseError
,
Codes
,
)
from
synapse.util.logcontext
import
preserve_fn
,
make_deferred_yieldable
from
synapse.util.stringutils
import
random_string
from
synapse.util.caches.expiringcache
import
ExpiringCache
from
synapse.http.client
import
SpiderHttpClient
...
...
@@ -63,16 +64,15 @@ class PreviewUrlResource(Resource):
self
.
url_preview_url_blacklist
=
hs
.
config
.
url_preview_url_blacklist
# simple memory cache mapping urls to OG metadata
self
.
cache
=
ExpiringCache
(
# memory cache mapping urls to an ObservableDeferred returning
# JSON-encoded OG metadata
self
.
_cache
=
ExpiringCache
(
cache_name
=
"
url_previews
"
,
clock
=
self
.
clock
,
# don't spider URLs more often than once an hour
expiry_ms
=
60
*
60
*
1000
,
)
self
.
cache
.
start
()
self
.
downloads
=
{}
self
.
_cache
.
start
()
self
.
_cleaner_loop
=
self
.
clock
.
looping_call
(
self
.
_expire_url_cache_data
,
10
*
1000
...
...
@@ -94,6 +94,7 @@ class PreviewUrlResource(Resource):
else
:
ts
=
self
.
clock
.
time_msec
()
# XXX: we could move this into _do_preview if we wanted.
url_tuple
=
urlparse
.
urlsplit
(
url
)
for
entry
in
self
.
url_preview_url_blacklist
:
match
=
True
...
...
@@ -126,14 +127,42 @@ class PreviewUrlResource(Resource):
Codes
.
UNKNOWN
)
# first check the memory cache - good to handle all the clients on this
# HS thundering away to preview the same URL at the same time.
og
=
self
.
cache
.
get
(
url
)
if
og
:
respond_with_json_bytes
(
request
,
200
,
json
.
dumps
(
og
),
send_cors
=
True
)
return
# the in-memory cache:
# * ensures that only one request is active at a time
# * takes load off the DB for the thundering herds
# * also caches any failures (unlike the DB) so we don't keep
# requesting the same endpoint
observable
=
self
.
_cache
.
get
(
url
)
if
not
observable
:
download
=
preserve_fn
(
self
.
_do_preview
)(
url
,
requester
.
user
,
ts
,
)
observable
=
ObservableDeferred
(
download
,
consumeErrors
=
True
)
self
.
_cache
[
url
]
=
observable
else
:
logger
.
info
(
"
Returning cached response
"
)
og
=
yield
make_deferred_yieldable
(
observable
.
observe
())
respond_with_json_bytes
(
request
,
200
,
og
,
send_cors
=
True
)
# then check the URL cache in the DB (which will also provide us with
@defer.inlineCallbacks
def
_do_preview
(
self
,
url
,
user
,
ts
):
"""
Check the db, and download the URL and build a preview
Args:
url (str):
user (str):
ts (int):
Returns:
Deferred[str]: json-encoded og data
"""
# check the URL cache in the DB (which will also provide us with
# historical previews, if we have any)
cache_result
=
yield
self
.
store
.
get_url_cache
(
url
,
ts
)
if
(
...
...
@@ -141,32 +170,10 @@ class PreviewUrlResource(Resource):
cache_result
[
"
expires_ts
"
]
>
ts
and
cache_result
[
"
response_code
"
]
/
100
==
2
):
respond_with_json_bytes
(
request
,
200
,
cache_result
[
"
og
"
].
encode
(
'
utf-8
'
),
send_cors
=
True
)
defer
.
returnValue
(
cache_result
[
"
og
"
])
return
# Ensure only one download for a given URL is active at a time
download
=
self
.
downloads
.
get
(
url
)
if
download
is
None
:
download
=
self
.
_download_url
(
url
,
requester
.
user
)
download
=
ObservableDeferred
(
download
,
consumeErrors
=
True
)
self
.
downloads
[
url
]
=
download
@download.addBoth
def
callback
(
media_info
):
del
self
.
downloads
[
url
]
return
media_info
media_info
=
yield
download
.
observe
()
# FIXME: we should probably update our cache now anyway, so that
# even if the OG calculation raises, we don't keep hammering on the
# remote server. For now, leave it uncached to aid debugging OG
# calculation problems
media_info
=
yield
self
.
_download_url
(
url
,
user
)
logger
.
debug
(
"
got media_info of
'
%s
'"
%
media_info
)
...
...
@@ -212,7 +219,7 @@ class PreviewUrlResource(Resource):
# just rely on the caching on the master request to speed things up.
if
'
og:image
'
in
og
and
og
[
'
og:image
'
]:
image_info
=
yield
self
.
_download_url
(
_rebase_url
(
og
[
'
og:image
'
],
media_info
[
'
uri
'
]),
requester
.
user
_rebase_url
(
og
[
'
og:image
'
],
media_info
[
'
uri
'
]),
user
)
if
_is_media
(
image_info
[
'
media_type
'
]):
...
...
@@ -239,8 +246,7 @@ class PreviewUrlResource(Resource):
logger
.
debug
(
"
Calculated OG for %s as %s
"
%
(
url
,
og
))
# store OG in ephemeral in-memory cache
self
.
cache
[
url
]
=
og
jsonog
=
json
.
dumps
(
og
)
# store OG in history-aware DB cache
yield
self
.
store
.
store_url_cache
(
...
...
@@ -248,12 +254,12 @@ class PreviewUrlResource(Resource):
media_info
[
"
response_code
"
],
media_info
[
"
etag
"
],
media_info
[
"
expires
"
]
+
media_info
[
"
created_ts
"
],
json
.
dumps
(
og
)
,
jsonog
,
media_info
[
"
filesystem_id
"
],
media_info
[
"
created_ts
"
],
)
respond_with_json_bytes
(
request
,
200
,
json
.
dumps
(
og
),
send_cors
=
True
)
defer
.
returnValue
(
jsonog
)
@defer.inlineCallbacks
def
_download_url
(
self
,
url
,
user
):
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment