Skip to content
Snippets Groups Projects
Unverified Commit 2b1a4b25 authored by Erik Johnston's avatar Erik Johnston Committed by GitHub
Browse files

Merge pull request #3722 from matrix-org/erikj/bg_process_iteration

LaterGauge needs to call thread safe functions
parents 324525f4 cd6937fb
No related branches found
No related tags found
No related merge requests found
Fix error collecting prometheus metrics when run on dedicated thread due to threading concurrency issues
......@@ -15,6 +15,7 @@
# limitations under the License.
import logging
import threading
from prometheus_client.core import Counter, Histogram
......@@ -111,6 +112,9 @@ in_flight_requests_db_sched_duration = Counter(
# The set of all in flight requests, set[RequestMetrics]
_in_flight_requests = set()
# Protects the _in_flight_requests set from concurrent accesss
_in_flight_requests_lock = threading.Lock()
def _get_in_flight_counts():
"""Returns a count of all in flight requests by (method, server_name)
......@@ -120,7 +124,8 @@ def _get_in_flight_counts():
"""
# Cast to a list to prevent it changing while the Prometheus
# thread is collecting metrics
reqs = list(_in_flight_requests)
with _in_flight_requests_lock:
reqs = list(_in_flight_requests)
for rm in reqs:
rm.update_metrics()
......@@ -154,10 +159,12 @@ class RequestMetrics(object):
# to the "in flight" metrics.
self._request_stats = self.start_context.get_resource_usage()
_in_flight_requests.add(self)
with _in_flight_requests_lock:
_in_flight_requests.add(self)
def stop(self, time_sec, request):
_in_flight_requests.discard(self)
with _in_flight_requests_lock:
_in_flight_requests.discard(self)
context = LoggingContext.current_context()
......
......@@ -13,6 +13,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import threading
import six
from prometheus_client.core import REGISTRY, Counter, GaugeMetricFamily
......@@ -78,6 +80,9 @@ _background_process_counts = dict() # type: dict[str, int]
# of process descriptions that no longer have any active processes.
_background_processes = dict() # type: dict[str, set[_BackgroundProcess]]
# A lock that covers the above dicts
_bg_metrics_lock = threading.Lock()
class _Collector(object):
"""A custom metrics collector for the background process metrics.
......@@ -92,7 +97,11 @@ class _Collector(object):
labels=["name"],
)
for desc, processes in six.iteritems(_background_processes):
# We copy the dict so that it doesn't change from underneath us
with _bg_metrics_lock:
_background_processes_copy = dict(_background_processes)
for desc, processes in six.iteritems(_background_processes_copy):
background_process_in_flight_count.add_metric(
(desc,), len(processes),
)
......@@ -167,19 +176,26 @@ def run_as_background_process(desc, func, *args, **kwargs):
"""
@defer.inlineCallbacks
def run():
count = _background_process_counts.get(desc, 0)
_background_process_counts[desc] = count + 1
with _bg_metrics_lock:
count = _background_process_counts.get(desc, 0)
_background_process_counts[desc] = count + 1
_background_process_start_count.labels(desc).inc()
with LoggingContext(desc) as context:
context.request = "%s-%i" % (desc, count)
proc = _BackgroundProcess(desc, context)
_background_processes.setdefault(desc, set()).add(proc)
with _bg_metrics_lock:
_background_processes.setdefault(desc, set()).add(proc)
try:
yield func(*args, **kwargs)
finally:
proc.update_metrics()
_background_processes[desc].remove(proc)
with _bg_metrics_lock:
_background_processes[desc].remove(proc)
with PreserveLoggingContext():
return run()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment