diff options
20 files changed, 2858 insertions, 1 deletions
diff --git a/system/xen/xen.SlackBuild b/system/xen/xen.SlackBuild index fbc396f6a4..39af829758 100644 --- a/system/xen/xen.SlackBuild +++ b/system/xen/xen.SlackBuild @@ -24,7 +24,7 @@ PRGNAM=xen VERSION=${VERSION:-4.8.0} -BUILD=${BUILD:-2} +BUILD=${BUILD:-3} TAG=${TAG:-_SBo} SEABIOS=${SEABIOS:-1.10.0} diff --git a/system/xen/xsa/xsa206-4.8-0001-xenstored-apply-a-write-transaction-rate-limit.patch b/system/xen/xsa/xsa206-4.8-0001-xenstored-apply-a-write-transaction-rate-limit.patch new file mode 100644 index 0000000000..ee39918cb7 --- /dev/null +++ b/system/xen/xsa/xsa206-4.8-0001-xenstored-apply-a-write-transaction-rate-limit.patch @@ -0,0 +1,456 @@ +From 8b2d563e6693527e0747fbb22c5f01eeb89a6c53 Mon Sep 17 00:00:00 2001 +From: Ian Jackson <ian.jackson@eu.citrix.com> +Date: Tue, 7 Mar 2017 16:09:12 +0000 +Subject: [PATCH 01/15] xenstored: apply a write transaction rate limit + +This avoids a rogue client being about to stall another client (eg the +toolstack) indefinitely. + +This is XSA-206. + +Signed-off-by: Ian Jackson <Ian.Jackson@eu.citrix.com> + +Backported to 4.8 (not entirely trivial). + +Reported-by: Juergen Gross <jgross@suse.com> +Signed-off-by: George Dunlap <george.dunlap@citrix.com> +Acked-by: Ian Jackson <Ian.Jackson@eu.citrix.com> +--- + tools/xenstore/Makefile | 3 +- + tools/xenstore/xenstored_core.c | 9 ++ + tools/xenstore/xenstored_core.h | 6 + + tools/xenstore/xenstored_domain.c | 215 +++++++++++++++++++++++++++++++++ + tools/xenstore/xenstored_domain.h | 25 ++++ + tools/xenstore/xenstored_transaction.c | 5 + + 6 files changed, 262 insertions(+), 1 deletion(-) + +diff --git a/tools/xenstore/Makefile b/tools/xenstore/Makefile +index 36b6fd4..9cb54de 100644 +--- a/tools/xenstore/Makefile ++++ b/tools/xenstore/Makefile +@@ -32,6 +32,7 @@ XENSTORED_OBJS_$(CONFIG_FreeBSD) = xenstored_posix.o + XENSTORED_OBJS_$(CONFIG_MiniOS) = xenstored_minios.o + + XENSTORED_OBJS += $(XENSTORED_OBJS_y) ++LDLIBS_xenstored += -lrt + + ifneq ($(XENSTORE_STATIC_CLIENTS),y) + LIBXENSTORE := libxenstore.so +@@ -73,7 +74,7 @@ endif + $(XENSTORED_OBJS): CFLAGS += $(CFLAGS_libxengnttab) + + xenstored: $(XENSTORED_OBJS) +- $(CC) $^ $(LDFLAGS) $(LDLIBS_libxenevtchn) $(LDLIBS_libxengnttab) $(LDLIBS_libxenctrl) $(SOCKET_LIBS) -o $@ $(APPEND_LDFLAGS) ++ $(CC) $^ $(LDFLAGS) $(LDLIBS_libxenevtchn) $(LDLIBS_libxengnttab) $(LDLIBS_libxenctrl) $(LDLIBS_xenstored) $(SOCKET_LIBS) -o $@ $(APPEND_LDFLAGS) + + xenstored.a: $(XENSTORED_OBJS) + $(AR) cr $@ $^ +diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c +index 3df977b..d14f096 100644 +--- a/tools/xenstore/xenstored_core.c ++++ b/tools/xenstore/xenstored_core.c +@@ -358,6 +358,7 @@ static void initialize_fds(int sock, int *p_sock_pollfd_idx, + int *ptimeout) + { + struct connection *conn; ++ struct wrl_timestampt now; + + if (fds) + memset(fds, 0, sizeof(struct pollfd) * current_array_size); +@@ -377,8 +378,11 @@ static void initialize_fds(int sock, int *p_sock_pollfd_idx, + xce_pollfd_idx = set_fd(xenevtchn_fd(xce_handle), + POLLIN|POLLPRI); + ++ wrl_gettime_now(&now); ++ + list_for_each_entry(conn, &connections, list) { + if (conn->domain) { ++ wrl_check_timeout(conn->domain, now, ptimeout); + if (domain_can_read(conn) || + (domain_can_write(conn) && + !list_empty(&conn->out_list))) +@@ -833,6 +837,7 @@ static void delete_node_single(struct connection *conn, struct node *node) + corrupt(conn, "Could not delete '%s'", node->name); + return; + } ++ + domain_entry_dec(conn, node); + } + +@@ -972,6 +977,7 @@ static void do_write(struct connection *conn, struct buffered_data *in) + } + + add_change_node(conn->transaction, name, false); ++ wrl_apply_debit_direct(conn); + fire_watches(conn, in, name, false); + send_ack(conn, XS_WRITE); + } +@@ -1003,6 +1009,7 @@ static void do_mkdir(struct connection *conn, struct buffered_data *in) + return; + } + add_change_node(conn->transaction, name, false); ++ wrl_apply_debit_direct(conn); + fire_watches(conn, in, name, false); + } + send_ack(conn, XS_MKDIR); +@@ -1129,6 +1136,7 @@ static void do_rm(struct connection *conn, struct buffered_data *in) + + if (_rm(conn, node, name)) { + add_change_node(conn->transaction, name, true); ++ wrl_apply_debit_direct(conn); + fire_watches(conn, in, name, true); + send_ack(conn, XS_RM); + } +@@ -1205,6 +1213,7 @@ static void do_set_perms(struct connection *conn, struct buffered_data *in) + } + + add_change_node(conn->transaction, name, false); ++ wrl_apply_debit_direct(conn); + fire_watches(conn, in, name, false); + send_ack(conn, XS_SET_PERMS); + } +diff --git a/tools/xenstore/xenstored_core.h b/tools/xenstore/xenstored_core.h +index ecc614f..9e9d960 100644 +--- a/tools/xenstore/xenstored_core.h ++++ b/tools/xenstore/xenstored_core.h +@@ -33,6 +33,12 @@ + #include "list.h" + #include "tdb.h" + ++#define MIN(a, b) (((a) < (b))? (a) : (b)) ++ ++typedef int32_t wrl_creditt; ++#define WRL_CREDIT_MAX (1000*1000*1000) ++/* ^ satisfies non-overflow condition for wrl_xfer_credit */ ++ + struct buffered_data + { + struct list_head list; +diff --git a/tools/xenstore/xenstored_domain.c b/tools/xenstore/xenstored_domain.c +index 5de93d4..012dfe6 100644 +--- a/tools/xenstore/xenstored_domain.c ++++ b/tools/xenstore/xenstored_domain.c +@@ -21,6 +21,7 @@ + #include <unistd.h> + #include <stdlib.h> + #include <stdarg.h> ++#include <time.h> + + #include "utils.h" + #include "talloc.h" +@@ -74,6 +75,10 @@ struct domain + + /* number of watch for this domain */ + int nbwatch; ++ ++ /* write rate limit */ ++ wrl_creditt wrl_credit; /* [ -wrl_config_writecost, +_dburst ] */ ++ struct wrl_timestampt wrl_timestamp; + }; + + static LIST_HEAD(domains); +@@ -206,6 +211,8 @@ static int destroy_domain(void *_domain) + + fire_watches(NULL, domain, "@releaseDomain", false); + ++ wrl_domain_destroy(domain); ++ + return 0; + } + +@@ -253,6 +260,9 @@ void handle_event(void) + bool domain_can_read(struct connection *conn) + { + struct xenstore_domain_interface *intf = conn->domain->interface; ++ ++ if (domain_is_unprivileged(conn) && conn->domain->wrl_credit < 0) ++ return false; + return (intf->req_cons != intf->req_prod); + } + +@@ -284,6 +294,8 @@ static struct domain *new_domain(void *context, unsigned int domid, + domain->domid = domid; + domain->path = talloc_domain_path(domain, domid); + ++ wrl_domain_new(domain); ++ + list_add(&domain->list, &domains); + talloc_set_destructor(domain, destroy_domain); + +@@ -751,6 +763,209 @@ int domain_watch(struct connection *conn) + : 0; + } + ++static wrl_creditt wrl_config_writecost = WRL_FACTOR; ++static wrl_creditt wrl_config_rate = WRL_RATE * WRL_FACTOR; ++static wrl_creditt wrl_config_dburst = WRL_DBURST * WRL_FACTOR; ++static wrl_creditt wrl_config_gburst = WRL_GBURST * WRL_FACTOR; ++static wrl_creditt wrl_config_newdoms_dburst = ++ WRL_DBURST * WRL_NEWDOMS * WRL_FACTOR; ++ ++long wrl_ntransactions; ++ ++static long wrl_ndomains; ++static wrl_creditt wrl_reserve; /* [-wrl_config_newdoms_dburst, +_gburst ] */ ++ ++void wrl_gettime_now(struct wrl_timestampt *now_wt) ++{ ++ struct timespec now_ts; ++ int r; ++ ++ r = clock_gettime(CLOCK_MONOTONIC, &now_ts); ++ if (r) ++ barf_perror("Could not find time (clock_gettime failed)"); ++ ++ now_wt->sec = now_ts.tv_sec; ++ now_wt->msec = now_ts.tv_nsec / 1000000; ++} ++ ++static void wrl_xfer_credit(wrl_creditt *debit, wrl_creditt debit_floor, ++ wrl_creditt *credit, wrl_creditt credit_ceil) ++ /* ++ * Transfers zero or more credit from "debit" to "credit". ++ * Transfers as much as possible while maintaining ++ * debit >= debit_floor and credit <= credit_ceil. ++ * (If that's violated already, does nothing.) ++ * ++ * Sufficient conditions to avoid overflow, either of: ++ * |every argument| <= 0x3fffffff ++ * |every argument| <= 1E9 ++ * |every argument| <= WRL_CREDIT_MAX ++ * (And this condition is preserved.) ++ */ ++{ ++ wrl_creditt xfer = MIN( *debit - debit_floor, ++ credit_ceil - *credit ); ++ if (xfer > 0) { ++ *debit -= xfer; ++ *credit += xfer; ++ } ++} ++ ++void wrl_domain_new(struct domain *domain) ++{ ++ domain->wrl_credit = 0; ++ wrl_gettime_now(&domain->wrl_timestamp); ++ wrl_ndomains++; ++ /* Steal up to DBURST from the reserve */ ++ wrl_xfer_credit(&wrl_reserve, -wrl_config_newdoms_dburst, ++ &domain->wrl_credit, wrl_config_dburst); ++} ++ ++void wrl_domain_destroy(struct domain *domain) ++{ ++ wrl_ndomains--; ++ /* ++ * Don't bother recalculating domain's credit - this just ++ * means we don't give the reserve the ending domain's credit ++ * for time elapsed since last update. ++ */ ++ wrl_xfer_credit(&domain->wrl_credit, 0, ++ &wrl_reserve, wrl_config_dburst); ++} ++ ++void wrl_credit_update(struct domain *domain, struct wrl_timestampt now) ++{ ++ /* ++ * We want to calculate ++ * credit += (now - timestamp) * RATE / ndoms; ++ * But we want it to saturate, and to avoid floating point. ++ * To avoid rounding errors from constantly adding small ++ * amounts of credit, we only add credit for whole milliseconds. ++ */ ++ long seconds = now.sec - domain->wrl_timestamp.sec; ++ long milliseconds = now.msec - domain->wrl_timestamp.msec; ++ long msec; ++ int64_t denom, num; ++ wrl_creditt surplus; ++ ++ seconds = MIN(seconds, 1000*1000); /* arbitrary, prevents overflow */ ++ msec = seconds * 1000 + milliseconds; ++ ++ if (msec < 0) ++ /* shouldn't happen with CLOCK_MONOTONIC */ ++ msec = 0; ++ ++ /* 32x32 -> 64 cannot overflow */ ++ denom = (int64_t)msec * wrl_config_rate; ++ num = (int64_t)wrl_ndomains * 1000; ++ /* denom / num <= 1E6 * wrl_config_rate, so with ++ reasonable wrl_config_rate, denom / num << 2^64 */ ++ ++ /* at last! */ ++ domain->wrl_credit = MIN( (int64_t)domain->wrl_credit + denom / num, ++ WRL_CREDIT_MAX ); ++ /* (maybe briefly violating the DBURST cap on wrl_credit) */ ++ ++ /* maybe take from the reserve to make us nonnegative */ ++ wrl_xfer_credit(&wrl_reserve, 0, ++ &domain->wrl_credit, 0); ++ ++ /* return any surplus (over DBURST) to the reserve */ ++ surplus = 0; ++ wrl_xfer_credit(&domain->wrl_credit, wrl_config_dburst, ++ &surplus, WRL_CREDIT_MAX); ++ wrl_xfer_credit(&surplus, 0, ++ &wrl_reserve, wrl_config_gburst); ++ /* surplus is now implicitly discarded */ ++ ++ domain->wrl_timestamp = now; ++ ++ trace("wrl: dom %4d %6ld msec %9ld credit %9ld reserve" ++ " %9ld discard\n", ++ domain->domid, ++ msec, ++ (long)domain->wrl_credit, (long)wrl_reserve, ++ (long)surplus); ++} ++ ++void wrl_check_timeout(struct domain *domain, ++ struct wrl_timestampt now, ++ int *ptimeout) ++{ ++ uint64_t num, denom; ++ int wakeup; ++ ++ wrl_credit_update(domain, now); ++ ++ if (domain->wrl_credit >= 0) ++ /* not blocked */ ++ return; ++ ++ if (!*ptimeout) ++ /* already decided on immediate wakeup, ++ so no need to calculate our timeout */ ++ return; ++ ++ /* calculate wakeup = now + -credit / (RATE / ndoms); */ ++ ++ /* credit cannot go more -ve than one transaction, ++ * so the first multiplication cannot overflow even 32-bit */ ++ num = (uint64_t)(-domain->wrl_credit * 1000) * wrl_ndomains; ++ denom = wrl_config_rate; ++ ++ wakeup = MIN( num / denom /* uint64_t */, INT_MAX ); ++ if (*ptimeout==-1 || wakeup < *ptimeout) ++ *ptimeout = wakeup; ++ ++ trace("wrl: domain %u credit=%ld (reserve=%ld) SLEEPING for %d\n", ++ domain->domid, ++ (long)domain->wrl_credit, (long)wrl_reserve, ++ wakeup); ++} ++ ++void wrl_apply_debit_actual(struct domain *domain) ++{ ++ struct wrl_timestampt now; ++ ++ if (!domain) ++ /* sockets escape the write rate limit */ ++ return; ++ ++ wrl_gettime_now(&now); ++ wrl_credit_update(domain, now); ++ ++ domain->wrl_credit -= wrl_config_writecost; ++ trace("wrl: domain %u credit=%ld (reserve=%ld)\n", ++ domain->domid, ++ (long)domain->wrl_credit, (long)wrl_reserve); ++} ++ ++void wrl_apply_debit_direct(struct connection *conn) ++{ ++ if (!conn) ++ /* some writes are generated internally */ ++ return; ++ ++ if (conn->transaction) ++ /* these are accounted for when the transaction ends */ ++ return; ++ ++ if (!wrl_ntransactions) ++ /* we don't conflict with anyone */ ++ return; ++ ++ wrl_apply_debit_actual(conn->domain); ++} ++ ++void wrl_apply_debit_trans_commit(struct connection *conn) ++{ ++ if (wrl_ntransactions <= 1) ++ /* our own transaction appears in the counter */ ++ return; ++ ++ wrl_apply_debit_actual(conn->domain); ++} ++ + /* + * Local variables: + * c-file-style: "linux" +diff --git a/tools/xenstore/xenstored_domain.h b/tools/xenstore/xenstored_domain.h +index 2554423..cec341e 100644 +--- a/tools/xenstore/xenstored_domain.h ++++ b/tools/xenstore/xenstored_domain.h +@@ -65,4 +65,29 @@ void domain_watch_inc(struct connection *conn); + void domain_watch_dec(struct connection *conn); + int domain_watch(struct connection *conn); + ++/* Write rate limiting */ ++ ++#define WRL_FACTOR 1000 /* for fixed-point arithmetic */ ++#define WRL_RATE 200 ++#define WRL_DBURST 10 ++#define WRL_GBURST 1000 ++#define WRL_NEWDOMS 5 ++ ++struct wrl_timestampt { ++ time_t sec; ++ int msec; ++}; ++ ++extern long wrl_ntransactions; ++ ++void wrl_gettime_now(struct wrl_timestampt *now_ts); ++void wrl_domain_new(struct domain *domain); ++void wrl_domain_destroy(struct domain *domain); ++void wrl_credit_update(struct domain *domain, struct wrl_timestampt now); ++void wrl_check_timeout(struct domain *domain, ++ struct wrl_timestampt now, ++ int *ptimeout); ++void wrl_apply_debit_direct(struct connection *conn); ++void wrl_apply_debit_trans_commit(struct connection *conn); ++ + #endif /* _XENSTORED_DOMAIN_H */ +diff --git a/tools/xenstore/xenstored_transaction.c b/tools/xenstore/xenstored_transaction.c +index 84cb0bf..5059a11 100644 +--- a/tools/xenstore/xenstored_transaction.c ++++ b/tools/xenstore/xenstored_transaction.c +@@ -120,6 +120,7 @@ static int destroy_transaction(void *_transaction) + { + struct transaction *trans = _transaction; + ++ wrl_ntransactions--; + trace_destroy(trans, "transaction"); + if (trans->tdb) + tdb_close(trans->tdb); +@@ -183,6 +184,7 @@ void do_transaction_start(struct connection *conn, struct buffered_data *in) + talloc_steal(conn, trans); + talloc_set_destructor(trans, destroy_transaction); + conn->transaction_started++; ++ wrl_ntransactions++; + + snprintf(id_str, sizeof(id_str), "%u", trans->id); + send_reply(conn, XS_TRANSACTION_START, id_str, strlen(id_str)+1); +@@ -218,6 +220,9 @@ void do_transaction_end(struct connection *conn, struct buffered_data *in) + send_error(conn, EAGAIN); + return; + } ++ ++ wrl_apply_debit_trans_commit(conn); ++ + if (!replace_tdb(trans->tdb_name, trans->tdb)) { + send_error(conn, errno); + return; +-- +2.1.4 + diff --git a/system/xen/xsa/xsa206-4.8-0002-xenstored-Log-when-the-write-transaction-rate-limit-.patch b/system/xen/xsa/xsa206-4.8-0002-xenstored-Log-when-the-write-transaction-rate-limit-.patch new file mode 100644 index 0000000000..57e1e9be9e --- /dev/null +++ b/system/xen/xsa/xsa206-4.8-0002-xenstored-Log-when-the-write-transaction-rate-limit-.patch @@ -0,0 +1,113 @@ +From 6a5b012157c3dbe4bb82f2aa3d950e20cb5bf9d6 Mon Sep 17 00:00:00 2001 +From: Ian Jackson <ian.jackson@eu.citrix.com> +Date: Tue, 7 Mar 2017 16:09:13 +0000 +Subject: [PATCH 02/15] xenstored: Log when the write transaction rate limit + bites + +Reported-by: Juergen Gross <jgross@suse.com> +Signed-off-by: Ian Jackson <ian.jackson@eu.citrix.com> +--- + tools/xenstore/xenstored_core.c | 1 + + tools/xenstore/xenstored_domain.c | 25 +++++++++++++++++++++++++ + tools/xenstore/xenstored_domain.h | 2 ++ + 3 files changed, 28 insertions(+) + +diff --git a/tools/xenstore/xenstored_core.c b/tools/xenstore/xenstored_core.c +index d14f096..dc9a26f 100644 +--- a/tools/xenstore/xenstored_core.c ++++ b/tools/xenstore/xenstored_core.c +@@ -379,6 +379,7 @@ static void initialize_fds(int sock, int *p_sock_pollfd_idx, + POLLIN|POLLPRI); + + wrl_gettime_now(&now); ++ wrl_log_periodic(now); + + list_for_each_entry(conn, &connections, list) { + if (conn->domain) { +diff --git a/tools/xenstore/xenstored_domain.c b/tools/xenstore/xenstored_domain.c +index 012dfe6..fd9ca39 100644 +--- a/tools/xenstore/xenstored_domain.c ++++ b/tools/xenstore/xenstored_domain.c +@@ -22,6 +22,7 @@ + #include <stdlib.h> + #include <stdarg.h> + #include <time.h> ++#include <syslog.h> + + #include "utils.h" + #include "talloc.h" +@@ -79,6 +80,7 @@ struct domain + /* write rate limit */ + wrl_creditt wrl_credit; /* [ -wrl_config_writecost, +_dburst ] */ + struct wrl_timestampt wrl_timestamp; ++ bool wrl_delay_logged; + }; + + static LIST_HEAD(domains); +@@ -774,6 +776,7 @@ long wrl_ntransactions; + + static long wrl_ndomains; + static wrl_creditt wrl_reserve; /* [-wrl_config_newdoms_dburst, +_gburst ] */ ++static time_t wrl_log_last_warning; /* 0: no previous warning */ + + void wrl_gettime_now(struct wrl_timestampt *now_wt) + { +@@ -923,6 +926,9 @@ void wrl_check_timeout(struct domain *domain, + wakeup); + } + ++#define WRL_LOG(now, ...) \ ++ (syslog(LOG_WARNING, "write rate limit: " __VA_ARGS__)) ++ + void wrl_apply_debit_actual(struct domain *domain) + { + struct wrl_timestampt now; +@@ -938,6 +944,25 @@ void wrl_apply_debit_actual(struct domain *domain) + trace("wrl: domain %u credit=%ld (reserve=%ld)\n", + domain->domid, + (long)domain->wrl_credit, (long)wrl_reserve); ++ ++ if (domain->wrl_credit < 0) { ++ if (!domain->wrl_delay_logged++) { ++ WRL_LOG(now, "domain %ld is affected", ++ (long)domain->domid); ++ } else if (!wrl_log_last_warning) { ++ WRL_LOG(now, "rate limiting restarts"); ++ } ++ wrl_log_last_warning = now.sec; ++ } ++} ++ ++void wrl_log_periodic(struct wrl_timestampt now) ++{ ++ if (wrl_log_last_warning && ++ (now.sec - wrl_log_last_warning) > WRL_LOGEVERY) { ++ WRL_LOG(now, "not in force recently"); ++ wrl_log_last_warning = 0; ++ } + } + + void wrl_apply_debit_direct(struct connection *conn) +diff --git a/tools/xenstore/xenstored_domain.h b/tools/xenstore/xenstored_domain.h +index cec341e..561ab5d 100644 +--- a/tools/xenstore/xenstored_domain.h ++++ b/tools/xenstore/xenstored_domain.h +@@ -72,6 +72,7 @@ int domain_watch(struct connection *conn); + #define WRL_DBURST 10 + #define WRL_GBURST 1000 + #define WRL_NEWDOMS 5 ++#define WRL_LOGEVERY 120 /* seconds */ + + struct wrl_timestampt { + time_t sec; +@@ -87,6 +88,7 @@ void wrl_credit_update(struct domain *domain, struct wrl_timestampt now); + void wrl_check_timeout(struct domain *domain, + struct wrl_timestampt now, + int *ptimeout); ++void wrl_log_periodic(struct wrl_timestampt now); + void wrl_apply_debit_direct(struct connection *conn); + void wrl_apply_debit_trans_commit(struct connection *conn); + +-- +2.1.4 + diff --git a/system/xen/xsa/xsa206-4.8-0003-oxenstored-comments-explaining-some-variables.patch b/system/xen/xsa/xsa206-4.8-0003-oxenstored-comments-explaining-some-variables.patch new file mode 100644 index 0000000000..e878a385f0 --- /dev/null +++ b/system/xen/xsa/xsa206-4.8-0003-oxenstored-comments-explaining-some-variables.patch @@ -0,0 +1,65 @@ +From d94645f1eb00e2703aef57cac19df9e86c54d275 Mon Sep 17 00:00:00 2001 +From: Thomas Sanders <thomas.sanders@citrix.com> +Date: Tue, 14 Mar 2017 12:15:52 +0000 +Subject: [PATCH 03/15] oxenstored: comments explaining some variables + +It took a while of reading and reasoning to work out what these are +for, so here are comments to make life easier for everyone reading +this code in future. + +Reported-by: Juergen Gross <jgross@suse.com> +Signed-off-by: Thomas Sanders <thomas.sanders@citrix.com> +Reviewed-by: Jonathan Davies <jonathan.davies@citrix.com> +Reviewed-by: Ian Jackson <ian.jackson@eu.citrix.com> +Reviewed-by: Christian Lindig <christian.lindig@citrix.com> +--- + tools/ocaml/xenstored/store.ml | 1 + + tools/ocaml/xenstored/transaction.ml | 10 +++++++--- + 2 files changed, 8 insertions(+), 3 deletions(-) + +diff --git a/tools/ocaml/xenstored/store.ml b/tools/ocaml/xenstored/store.ml +index 223ee21..9f619b8 100644 +--- a/tools/ocaml/xenstored/store.ml ++++ b/tools/ocaml/xenstored/store.ml +@@ -211,6 +211,7 @@ let apply rnode path fct = + lookup rnode path fct + end + ++(* The Store.t type *) + type t = + { + mutable stat_transaction_coalesce: int; +diff --git a/tools/ocaml/xenstored/transaction.ml b/tools/ocaml/xenstored/transaction.ml +index 6b37fc2..51d5d6a 100644 +--- a/tools/ocaml/xenstored/transaction.ml ++++ b/tools/ocaml/xenstored/transaction.ml +@@ -69,11 +69,15 @@ let can_coalesce oldroot currentroot path = + else + false + +-type ty = No | Full of (int * Store.Node.t * Store.t) ++type ty = No | Full of ( ++ int * (* Transaction id *) ++ Store.Node.t * (* Original root *) ++ Store.t (* A pointer to the canonical store: its root changes on each transaction-commit *) ++) + + type t = { + ty: ty; +- store: Store.t; ++ store: Store.t; (* This is the store that we change in write operations. *) + quota: Quota.t; + mutable paths: (Xenbus.Xb.Op.operation * Store.Path.t) list; + mutable operations: (Packet.request * Packet.response) list; +@@ -155,7 +159,7 @@ let commit ~con t = + let has_commited = + match t.ty with + | No -> true +- | Full (id, oldroot, cstore) -> ++ | Full (id, oldroot, cstore) -> (* "cstore" meaning current canonical store *) + let commit_partial oldroot cstore store = + (* get the lowest path of the query and verify that it hasn't + been modified by others transactions. *) +-- +2.1.4 + diff --git a/system/xen/xsa/xsa206-4.8-0004-oxenstored-handling-of-domain-conflict-credit.patch b/system/xen/xsa/xsa206-4.8-0004-oxenstored-handling-of-domain-conflict-credit.patch new file mode 100644 index 0000000000..63cfbaee85 --- /dev/null +++ b/system/xen/xsa/xsa206-4.8-0004-oxenstored-handling-of-domain-conflict-credit.patch @@ -0,0 +1,299 @@ +From 1f12baed15dc7502365afb54161827405ff24732 Mon Sep 17 00:00:00 2001 +From: Thomas Sanders <thomas.sanders@citrix.com> +Date: Tue, 14 Mar 2017 12:15:52 +0000 +Subject: [PATCH 04/15] oxenstored: handling of domain conflict-credit + +This commit gives each domain a conflict-credit variable, which will +later be used for limiting how often a domain can cause other domain's +transaction-commits to fail. + +This commit also provides functions and data for manipulating domains +and their conflict-credit, and checking whether they have credit. + +Reported-by: Juergen Gross <jgross@suse.com> +Signed-off-by: Thomas Sanders <thomas.sanders@citrix.com> +Reviewed-by: Jonathan Davies <jonathan.davies@citrix.com> +Reviewed-by: Christian Lindig <christian.lindig@citrix.com> + +--- + tools/ocaml/xenstored/connection.ml | 5 ++ + tools/ocaml/xenstored/define.ml | 3 + + tools/ocaml/xenstored/domain.ml | 11 +++- + tools/ocaml/xenstored/domains.ml | 103 ++++++++++++++++++++++++++++++- + tools/ocaml/xenstored/oxenstored.conf.in | 32 ++++++++++ + tools/ocaml/xenstored/transaction.ml | 2 + + tools/ocaml/xenstored/xenstored.ml | 2 + + 7 files changed, 154 insertions(+), 4 deletions(-) + +diff --git a/tools/ocaml/xenstored/connection.ml b/tools/ocaml/xenstored/connection.ml +index 3ffd35b..a66d2f7 100644 +--- a/tools/ocaml/xenstored/connection.ml ++++ b/tools/ocaml/xenstored/connection.ml +@@ -296,3 +296,8 @@ let debug con = + let domid = get_domstr con in + let watches = List.map (fun (path, token) -> Printf.sprintf "watch %s: %s %s\n" domid path token) (list_watches con) in + String.concat "" watches ++ ++let decr_conflict_credit doms con = ++ match con.dom with ++ | None -> () (* It's a socket connection. We don't know which domain we're in, so treat it as if it's free to conflict *) ++ | Some dom -> Domains.decr_conflict_credit doms dom +diff --git a/tools/ocaml/xenstored/define.ml b/tools/ocaml/xenstored/define.ml +index e9d957f..816b493 100644 +--- a/tools/ocaml/xenstored/define.ml ++++ b/tools/ocaml/xenstored/define.ml +@@ -29,6 +29,9 @@ let maxwatch = ref (50) + let maxtransaction = ref (20) + let maxrequests = ref (-1) (* maximum requests per transaction *) + ++let conflict_burst_limit = ref 5.0 ++let conflict_rate_limit_is_aggregate = ref true ++ + let domid_self = 0x7FF0 + + exception Not_a_directory of string +diff --git a/tools/ocaml/xenstored/domain.ml b/tools/ocaml/xenstored/domain.ml +index ab34314..e677aa3 100644 +--- a/tools/ocaml/xenstored/domain.ml ++++ b/tools/ocaml/xenstored/domain.ml +@@ -31,8 +31,12 @@ type t = + mutable io_credit: int; (* the rounds of ring process left to do, default is 0, + usually set to 1 when there is work detected, could + also set to n to give "lazy" clients extra credit *) ++ mutable conflict_credit: float; (* Must be positive to perform writes; a commit ++ that later causes conflict with another ++ domain's transaction costs credit. *) + } + ++let is_dom0 d = d.id = 0 + let get_path dom = "/local/domain/" ^ (sprintf "%u" dom.id) + let get_id domain = domain.id + let get_interface d = d.interface +@@ -48,6 +52,10 @@ let set_io_credit ?(n=1) domain = domain.io_credit <- max 0 n + let incr_io_credit domain = domain.io_credit <- domain.io_credit + 1 + let decr_io_credit domain = domain.io_credit <- max 0 (domain.io_credit - 1) + ++let is_paused_for_conflict dom = dom.conflict_credit <= 0.0 ++ ++let is_free_to_conflict = is_dom0 ++ + let string_of_port = function + | None -> "None" + | Some x -> string_of_int (Xeneventchn.to_int x) +@@ -84,6 +92,5 @@ let make id mfn remote_port interface eventchn = { + port = None; + bad_client = false; + io_credit = 0; ++ conflict_credit = !Define.conflict_burst_limit; + } +- +-let is_dom0 d = d.id = 0 +diff --git a/tools/ocaml/xenstored/domains.ml b/tools/ocaml/xenstored/domains.ml +index 395f3a9..3d29cc8 100644 +--- a/tools/ocaml/xenstored/domains.ml ++++ b/tools/ocaml/xenstored/domains.ml +@@ -15,20 +15,58 @@ + *) + + let debug fmt = Logging.debug "domains" fmt ++let error fmt = Logging.error "domains" fmt ++let warn fmt = Logging.warn "domains" fmt + + type domains = { + eventchn: Event.t; + table: (Xenctrl.domid, Domain.t) Hashtbl.t; ++ ++ (* N.B. the Queue module is not thread-safe but oxenstored is single-threaded. *) ++ (* Domains queue up to regain conflict-credit; we have a queue for ++ domains that are carrying some penalty and so are below the ++ maximum credit, and another queue for domains that have run out of ++ credit and so have had their access paused. *) ++ doms_conflict_paused: (Domain.t option ref) Queue.t; ++ doms_with_conflict_penalty: (Domain.t option ref) Queue.t; ++ ++ (* A callback function to be called when we go from zero to one paused domain. ++ This will be to reset the countdown until the next unit of credit is issued. *) ++ on_first_conflict_pause: unit -> unit; ++ ++ (* If config is set to use individual instead of aggregate conflict-rate-limiting, ++ we use this instead of the queues. *) ++ mutable n_paused: int; + } + +-let init eventchn = +- { eventchn = eventchn; table = Hashtbl.create 10 } ++let init eventchn = { ++ eventchn = eventchn; ++ table = Hashtbl.create 10; ++ doms_conflict_paused = Queue.create (); ++ doms_with_conflict_penalty = Queue.create (); ++ on_first_conflict_pause = (fun () -> ()); (* Dummy value for now, pending subsequent commit. *) ++ n_paused = 0; ++} + let del doms id = Hashtbl.remove doms.table id + let exist doms id = Hashtbl.mem doms.table id + let find doms id = Hashtbl.find doms.table id + let number doms = Hashtbl.length doms.table + let iter doms fct = Hashtbl.iter (fun _ b -> fct b) doms.table + ++(* Functions to handle queues of domains given that the domain might be deleted while in a queue. *) ++let push dom queue = ++ Queue.push (ref (Some dom)) queue ++ ++let rec pop queue = ++ match !(Queue.pop queue) with ++ | None -> pop queue ++ | Some x -> x ++ ++let remove_from_queue dom queue = ++ Queue.iter (fun d -> match !d with ++ | None -> () ++ | Some x -> if x=dom then d := None) queue ++ + let cleanup xc doms = + let notify = ref false in + let dead_dom = ref [] in +@@ -52,6 +90,11 @@ let cleanup xc doms = + let dom = Hashtbl.find doms.table id in + Domain.close dom; + Hashtbl.remove doms.table id; ++ if dom.Domain.conflict_credit <= !Define.conflict_burst_limit ++ then ( ++ remove_from_queue dom doms.doms_with_conflict_penalty; ++ if (dom.Domain.conflict_credit <= 0.) then remove_from_queue dom doms.doms_conflict_paused ++ ) + ) !dead_dom; + !notify, !dead_dom + +@@ -82,3 +125,59 @@ let create0 doms = + Domain.bind_interdomain dom; + Domain.notify dom; + dom ++ ++let decr_conflict_credit doms dom = ++ let before = dom.Domain.conflict_credit in ++ let after = max (-1.0) (before -. 1.0) in ++ dom.Domain.conflict_credit <- after; ++ if !Define.conflict_rate_limit_is_aggregate then ( ++ if before >= !Define.conflict_burst_limit ++ && after < !Define.conflict_burst_limit ++ && after > 0.0 ++ then ( ++ push dom doms.doms_with_conflict_penalty ++ ) else if before > 0.0 && after <= 0.0 ++ then ( ++ let first_pause = Queue.is_empty doms.doms_conflict_paused in ++ push dom doms.doms_conflict_paused; ++ if first_pause then doms.on_first_conflict_pause () ++ ) else ( ++ (* The queues are correct already: no further action needed. *) ++ ) ++ ) else if before > 0.0 && after <= 0.0 then ( ++ doms.n_paused <- doms.n_paused + 1; ++ if doms.n_paused = 1 then doms.on_first_conflict_pause () ++ ) ++ ++(* Give one point of credit to one domain, and update the queues appropriately. *) ++let incr_conflict_credit_from_queue doms = ++ let process_queue q requeue_test = ++ let d = pop q in ++ d.Domain.conflict_credit <- min (d.Domain.conflict_credit +. 1.0) !Define.conflict_burst_limit; ++ if requeue_test d.Domain.conflict_credit then ( ++ push d q (* Make it queue up again for its next point of credit. *) ++ ) ++ in ++ let paused_queue_test cred = cred <= 0.0 in ++ let penalty_queue_test cred = cred < !Define.conflict_burst_limit in ++ try process_queue doms.doms_conflict_paused paused_queue_test ++ with Queue.Empty -> ( ++ try process_queue doms.doms_with_conflict_penalty penalty_queue_test ++ with Queue.Empty -> () (* Both queues are empty: nothing to do here. *) ++ ) ++ ++let incr_conflict_credit doms = ++ if !Define.conflict_rate_limit_is_aggregate ++ then incr_conflict_credit_from_queue doms ++ else ( ++ (* Give a point of credit to every domain, subject only to the cap. *) ++ let inc dom = ++ let before = dom.Domain.conflict_credit in ++ let after = min (before +. 1.0) !Define.conflict_burst_limit in ++ dom.Domain.conflict_credit <- after; ++ if before <= 0.0 && after > 0.0 ++ then doms.n_paused <- doms.n_paused - 1 ++ in ++ (* Scope for optimisation (probably tiny): avoid iteration if all domains are at max credit *) ++ iter doms inc ++ ) +diff --git a/tools/ocaml/xenstored/oxenstored.conf.in b/tools/ocaml/xenstored/oxenstored.conf.in +index 82117a9..edd4335 100644 +--- a/tools/ocaml/xenstored/oxenstored.conf.in ++++ b/tools/ocaml/xenstored/oxenstored.conf.in +@@ -9,6 +9,38 @@ test-eagain = false + # Activate transaction merge support + merge-activate = true + ++# Limits applied to domains whose writes cause other domains' transaction ++# commits to fail. Must include decimal point. ++ ++# The burst limit is the number of conflicts a domain can cause to ++# fail in a short period; this value is used for both the initial and ++# the maximum value of each domain's conflict-credit, which falls by ++# one point for each conflict caused, and when it reaches zero the ++# domain's requests are ignored. ++conflict-burst-limit = 5.0 ++ ++# The conflict-credit is replenished over time: ++# one point is issued after each conflict-max-history-seconds, so this ++# is the minimum pause-time during which a domain will be ignored. ++# conflict-max-history-seconds = 0.05 ++ ++# If the conflict-rate-limit-is-aggregate flag is true then after each ++# tick one point of conflict-credit is given to just one domain: the ++# one at the front of the queue. If false, then after each tick each ++# domain gets a point of conflict-credit. ++# ++# In environments where it is known that every transaction will ++# involve a set of nodes that is writable by at most one other domain, ++# then it is safe to set this aggregate-limit flag to false for better ++# performance. (This can be determined by considering the layout of ++# the xenstore tree and permissions, together with the content of the ++# transactions that require protection.) ++# ++# A transaction which involves a set of nodes which can be modified by ++# multiple other domains can suffer conflicts caused by any of those ++# domains, so the flag must be set to true. ++conflict-rate-limit-is-aggregate = true ++ + # Activate node permission system + perms-activate = true + +diff --git a/tools/ocaml/xenstored/transaction.ml b/tools/ocaml/xenstored/transaction.ml +index 51d5d6a..6f758ff 100644 +--- a/tools/ocaml/xenstored/transaction.ml ++++ b/tools/ocaml/xenstored/transaction.ml +@@ -14,6 +14,8 @@ + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) ++let error fmt = Logging.error "transaction" fmt ++ + open Stdext + + let none = 0 +diff --git a/tools/ocaml/xenstored/xenstored.ml b/tools/ocaml/xenstored/xenstored.ml +index 2efcce6..20473d5 100644 +--- a/tools/ocaml/xenstored/xenstored.ml ++++ b/tools/ocaml/xenstored/xenstored.ml +@@ -89,6 +89,8 @@ let parse_config filename = + let pidfile = ref default_pidfile in + let options = [ + ("merge-activate", Config.Set_bool Transaction.do_coalesce); ++ ("conflict-burst-limit", Config.Set_float Define.conflict_burst_limit); ++ ("conflict-rate-limit-is-aggregate", Config.Set_bool Define.conflict_rate_limit_is_aggregate); + ("perms-activate", Config.Set_bool Perms.activate); + ("quota-activate", Config.Set_bool Quota.activate); + ("quota-maxwatch", Config.Set_int Define.maxwatch); +-- +2.1.4 + diff --git a/system/xen/xsa/xsa206-4.8-0005-oxenstored-ignore-domains-with-no-conflict-credit.patch b/system/xen/xsa/xsa206-4.8-0005-oxenstored-ignore-domains-with-no-conflict-credit.patch new file mode 100644 index 0000000000..b5f4250cdc --- /dev/null +++ b/system/xen/xsa/xsa206-4.8-0005-oxenstored-ignore-domains-with-no-conflict-credit.patch @@ -0,0 +1,219 @@ +From e0f02f8fb5a5130a37bd7efdc80d7f0dd46db41e Mon Sep 17 00:00:00 2001 +From: Thomas Sanders <thomas.sanders@citrix.com> +Date: Tue, 14 Mar 2017 12:15:52 +0000 +Subject: [PATCH 05/15] oxenstored: ignore domains with no conflict-credit + +When processing connections, skip those from domains with no remaining +conflict-credit. + +Also, issue a point of conflict-credit at regular intervals, the +period being set by the configuration option "conflict-max-history- +seconds". When issuing conflict-credit, we give a point either to +every domain at once (one each) or only to the single domain at the +front of the queue, depending on the configuration option +"conflict-rate-limit-is-aggregate". + +Reported-by: Juergen Gross <jgross@suse.com> +Signed-off-by: Thomas Sanders <thomas.sanders@citrix.com> +Reviewed-by: Jonathan Davies <jonathan.davies@citrix.com> +Reviewed-by: Christian Lindig <christian.lindig@citrix.com> +--- + tools/ocaml/xenstored/connections.ml | 14 ++++--- + tools/ocaml/xenstored/define.ml | 1 + + tools/ocaml/xenstored/domains.ml | 4 +- + tools/ocaml/xenstored/oxenstored.conf.in | 2 +- + tools/ocaml/xenstored/xenstored.ml | 65 +++++++++++++++++++++++--------- + 5 files changed, 60 insertions(+), 26 deletions(-) + +diff --git a/tools/ocaml/xenstored/connections.ml b/tools/ocaml/xenstored/connections.ml +index f9bc225..ae76928 100644 +--- a/tools/ocaml/xenstored/connections.ml ++++ b/tools/ocaml/xenstored/connections.ml +@@ -44,12 +44,14 @@ let add_domain cons dom = + | Some p -> Hashtbl.add cons.ports p con; + | None -> () + +-let select cons = +- Hashtbl.fold +- (fun _ con (ins, outs) -> +- let fd = Connection.get_fd con in +- (fd :: ins, if Connection.has_output con then fd :: outs else outs)) +- cons.anonymous ([], []) ++let select ?(only_if = (fun _ -> true)) cons = ++ Hashtbl.fold (fun _ con (ins, outs) -> ++ if (only_if con) then ( ++ let fd = Connection.get_fd con in ++ (fd :: ins, if Connection.has_output con then fd :: outs else outs) ++ ) else (ins, outs) ++ ) ++ cons.anonymous ([], []) + + let find cons = + Hashtbl.find cons.anonymous +diff --git a/tools/ocaml/xenstored/define.ml b/tools/ocaml/xenstored/define.ml +index 816b493..5a604d1 100644 +--- a/tools/ocaml/xenstored/define.ml ++++ b/tools/ocaml/xenstored/define.ml +@@ -30,6 +30,7 @@ let maxtransaction = ref (20) + let maxrequests = ref (-1) (* maximum requests per transaction *) + + let conflict_burst_limit = ref 5.0 ++let conflict_max_history_seconds = ref 0.05 + let conflict_rate_limit_is_aggregate = ref true + + let domid_self = 0x7FF0 +diff --git a/tools/ocaml/xenstored/domains.ml b/tools/ocaml/xenstored/domains.ml +index 3d29cc8..99f68c7 100644 +--- a/tools/ocaml/xenstored/domains.ml ++++ b/tools/ocaml/xenstored/domains.ml +@@ -39,12 +39,12 @@ type domains = { + mutable n_paused: int; + } + +-let init eventchn = { ++let init eventchn on_first_conflict_pause = { + eventchn = eventchn; + table = Hashtbl.create 10; + doms_conflict_paused = Queue.create (); + doms_with_conflict_penalty = Queue.create (); +- on_first_conflict_pause = (fun () -> ()); (* Dummy value for now, pending subsequent commit. *) ++ on_first_conflict_pause = on_first_conflict_pause; + n_paused = 0; + } + let del doms id = Hashtbl.remove doms.table id +diff --git a/tools/ocaml/xenstored/oxenstored.conf.in b/tools/ocaml/xenstored/oxenstored.conf.in +index edd4335..536611e 100644 +--- a/tools/ocaml/xenstored/oxenstored.conf.in ++++ b/tools/ocaml/xenstored/oxenstored.conf.in +@@ -22,7 +22,7 @@ conflict-burst-limit = 5.0 + # The conflict-credit is replenished over time: + # one point is issued after each conflict-max-history-seconds, so this + # is the minimum pause-time during which a domain will be ignored. +-# conflict-max-history-seconds = 0.05 ++conflict-max-history-seconds = 0.05 + + # If the conflict-rate-limit-is-aggregate flag is true then after each + # tick one point of conflict-credit is given to just one domain: the +diff --git a/tools/ocaml/xenstored/xenstored.ml b/tools/ocaml/xenstored/xenstored.ml +index 20473d5..f562f59 100644 +--- a/tools/ocaml/xenstored/xenstored.ml ++++ b/tools/ocaml/xenstored/xenstored.ml +@@ -53,14 +53,16 @@ let process_connection_fds store cons domains rset wset = + + let process_domains store cons domains = + let do_io_domain domain = +- if not (Domain.is_bad_domain domain) then +- let io_credit = Domain.get_io_credit domain in +- if io_credit > 0 then ( +- let con = Connections.find_domain cons (Domain.get_id domain) in +- Process.do_input store cons domains con; +- Process.do_output store cons domains con; +- Domain.decr_io_credit domain; +- ) in ++ if Domain.is_bad_domain domain ++ || Domain.get_io_credit domain <= 0 ++ || Domain.is_paused_for_conflict domain ++ then () (* nothing to do *) ++ else ( ++ let con = Connections.find_domain cons (Domain.get_id domain) in ++ Process.do_input store cons domains con; ++ Process.do_output store cons domains con; ++ Domain.decr_io_credit domain ++ ) in + Domains.iter domains do_io_domain + + let sigusr1_handler store = +@@ -90,6 +92,7 @@ let parse_config filename = + let options = [ + ("merge-activate", Config.Set_bool Transaction.do_coalesce); + ("conflict-burst-limit", Config.Set_float Define.conflict_burst_limit); ++ ("conflict-max-history-seconds", Config.Set_float Define.conflict_max_history_seconds); + ("conflict-rate-limit-is-aggregate", Config.Set_bool Define.conflict_rate_limit_is_aggregate); + ("perms-activate", Config.Set_bool Perms.activate); + ("quota-activate", Config.Set_bool Quota.activate); +@@ -262,7 +265,22 @@ let _ = + + let store = Store.create () in + let eventchn = Event.init () in +- let domains = Domains.init eventchn in ++ let next_frequent_ops = ref 0. in ++ let advance_next_frequent_ops () = ++ next_frequent_ops := (Unix.gettimeofday () +. !Define.conflict_max_history_seconds) ++ in ++ let delay_next_frequent_ops_by duration = ++ next_frequent_ops := !next_frequent_ops +. duration ++ in ++ let domains = Domains.init eventchn advance_next_frequent_ops in ++ ++ (* For things that need to be done periodically but more often ++ * than the periodic_ops function *) ++ let frequent_ops () = ++ if Unix.gettimeofday () > !next_frequent_ops then ( ++ Domains.incr_conflict_credit domains; ++ advance_next_frequent_ops () ++ ) in + let cons = Connections.create () in + + let quit = ref false in +@@ -394,23 +412,34 @@ let _ = + gc.Gc.heap_words gc.Gc.heap_chunks + gc.Gc.live_words gc.Gc.live_blocks + gc.Gc.free_words gc.Gc.free_blocks +- ) +- in ++ ); ++ let elapsed = Unix.gettimeofday () -. now in ++ delay_next_frequent_ops_by elapsed ++ in + +- let period_ops_interval = 15. in +- let period_start = ref 0. in ++ let period_ops_interval = 15. in ++ let period_start = ref 0. in + + let main_loop () = +- ++ let is_peaceful c = ++ match Connection.get_domain c with ++ | None -> true (* Treat socket-connections as exempt, and free to conflict. *) ++ | Some dom -> not (Domain.is_paused_for_conflict dom) ++ in ++ frequent_ops (); + let mw = Connections.has_more_work cons in ++ let peaceful_mw = List.filter is_peaceful mw in + List.iter + (fun c -> + match Connection.get_domain c with + | None -> () | Some d -> Domain.incr_io_credit d) +- mw; ++ peaceful_mw; ++ let start_time = Unix.gettimeofday () in + let timeout = +- if List.length mw > 0 then 0. else period_ops_interval in +- let inset, outset = Connections.select cons in ++ let until_next_activity = min (max 0. (!next_frequent_ops -. start_time)) period_ops_interval in ++ if peaceful_mw <> [] then 0. else until_next_activity ++ in ++ let inset, outset = Connections.select ~only_if:is_peaceful cons in + let rset, wset, _ = + try + Select.select (spec_fds @ inset) outset [] timeout +@@ -420,6 +449,7 @@ let _ = + List.partition (fun fd -> List.mem fd spec_fds) rset in + if List.length sfds > 0 then + process_special_fds sfds; ++ + if List.length cfds > 0 || List.length wset > 0 then + process_connection_fds store cons domains cfds wset; + if timeout <> 0. then ( +@@ -427,6 +457,7 @@ let _ = + if now > !period_start +. period_ops_interval then + (period_start := now; periodic_ops now) + ); ++ + process_domains store cons domains + in + +-- +2.1.4 + diff --git a/system/xen/xsa/xsa206-4.8-0006-oxenstored-add-transaction-info-relevant-to-history-.patch b/system/xen/xsa/xsa206-4.8-0006-oxenstored-add-transaction-info-relevant-to-history-.patch new file mode 100644 index 0000000000..e9f23d62c1 --- /dev/null +++ b/system/xen/xsa/xsa206-4.8-0006-oxenstored-add-transaction-info-relevant-to-history-.patch @@ -0,0 +1,88 @@ +From eedcaba31d907b889f571113e7d9739e5ce1e9e5 Mon Sep 17 00:00:00 2001 +From: Jonathan Davies <jonathan.davies@citrix.com> +Date: Tue, 14 Mar 2017 12:17:38 +0000 +Subject: [PATCH 06/15] oxenstored: add transaction info relevant to + history-tracking + +Specifically: + * retain the original store (not just the root) in full transactions + * store commit count at the time of the start of the transaction + +Reported-by: Juergen Gross <jgross@suse.com> +Signed-off-by: Jonathan Davies <jonathan.davies@citrix.com> +Reviewed-by: Thomas Sanders <thomas.sanders@citrix.com> +Reviewed-by: Ian Jackson <ian.jackson@eu.citrix.com> +Reviewed-by: Christian Lindig <christian.lindig@citrix.com> +--- + tools/ocaml/xenstored/process.ml | 2 +- + tools/ocaml/xenstored/transaction.ml | 12 ++++++++---- + 2 files changed, 9 insertions(+), 5 deletions(-) + +diff --git a/tools/ocaml/xenstored/process.ml b/tools/ocaml/xenstored/process.ml +index 7b60376..5f92044 100644 +--- a/tools/ocaml/xenstored/process.ml ++++ b/tools/ocaml/xenstored/process.ml +@@ -301,7 +301,7 @@ let transaction_replay c t doms cons = + | Transaction.No -> + error "attempted to replay a non-full transaction"; + false +- | Transaction.Full(id, oldroot, cstore) -> ++ | Transaction.Full(id, oldstore, cstore) -> + let tid = Connection.start_transaction c cstore in + let new_t = Transaction.make tid cstore in + let con = sprintf "r(%d):%s" id (Connection.get_domstr c) in +diff --git a/tools/ocaml/xenstored/transaction.ml b/tools/ocaml/xenstored/transaction.ml +index 6f758ff..b1791b3 100644 +--- a/tools/ocaml/xenstored/transaction.ml ++++ b/tools/ocaml/xenstored/transaction.ml +@@ -73,12 +73,13 @@ let can_coalesce oldroot currentroot path = + + type ty = No | Full of ( + int * (* Transaction id *) +- Store.Node.t * (* Original root *) ++ Store.t * (* Original store *) + Store.t (* A pointer to the canonical store: its root changes on each transaction-commit *) + ) + + type t = { + ty: ty; ++ start_count: int64; + store: Store.t; (* This is the store that we change in write operations. *) + quota: Quota.t; + mutable paths: (Xenbus.Xb.Op.operation * Store.Path.t) list; +@@ -87,10 +88,13 @@ type t = { + mutable write_lowpath: Store.Path.t option; + } + ++let counter = ref 0L ++ + let make id store = +- let ty = if id = none then No else Full(id, Store.get_root store, store) in ++ let ty = if id = none then No else Full(id, Store.copy store, store) in + { + ty = ty; ++ start_count = !counter; + store = if id = none then store else Store.copy store; + quota = Quota.copy store.Store.quota; + paths = []; +@@ -161,7 +165,7 @@ let commit ~con t = + let has_commited = + match t.ty with + | No -> true +- | Full (id, oldroot, cstore) -> (* "cstore" meaning current canonical store *) ++ | Full (id, oldstore, cstore) -> (* "cstore" meaning current canonical store *) + let commit_partial oldroot cstore store = + (* get the lowest path of the query and verify that it hasn't + been modified by others transactions. *) +@@ -204,7 +208,7 @@ let commit ~con t = + if !test_eagain && Random.int 3 = 0 then + false + else +- try_commit oldroot cstore t.store ++ try_commit (Store.get_root oldstore) cstore t.store + in + if has_commited && has_write_ops then + Disk.write t.store; +-- +2.1.4 + diff --git a/system/xen/xsa/xsa206-4.8-0007-oxenstored-support-commit-history-tracking.patch b/system/xen/xsa/xsa206-4.8-0007-oxenstored-support-commit-history-tracking.patch new file mode 100644 index 0000000000..d8fadaa749 --- /dev/null +++ b/system/xen/xsa/xsa206-4.8-0007-oxenstored-support-commit-history-tracking.patch @@ -0,0 +1,153 @@ +From 5df600ee06458eebf037f6bff663b0a9273e3a3f Mon Sep 17 00:00:00 2001 +From: Jonathan Davies <jonathan.davies@citrix.com> +Date: Tue, 14 Mar 2017 13:20:07 +0000 +Subject: [PATCH 07/15] oxenstored: support commit history tracking + +Add ability to track xenstore tree operations -- either non-transactional +operations or committed transactions. + +For now, the call to actually retain commits is commented out because history +can grow without bound. + +For now, we call record_commit for all non-transactional operations. A +subsequent patch will make it retain only the ones with side-effects. + +Reported-by: Juergen Gross <jgross@suse.com> +Signed-off-by: Jonathan Davies <jonathan.davies@citrix.com> +Signed-off-by: Thomas Sanders <thomas.sanders@citrix.com> +Reviewed-by: Christian Lindig <christian.lindig@citrix.com> + +--- + tools/ocaml/xenstored/Makefile | 1 + + tools/ocaml/xenstored/history.ml | 43 ++++++++++++++++++++++++++++++++++++++ + tools/ocaml/xenstored/process.ml | 24 +++++++++++++++++++-- + tools/ocaml/xenstored/xenstored.ml | 1 + + 4 files changed, 67 insertions(+), 2 deletions(-) + create mode 100644 tools/ocaml/xenstored/history.ml + +diff --git a/tools/ocaml/xenstored/Makefile b/tools/ocaml/xenstored/Makefile +index 1769e55..d238836 100644 +--- a/tools/ocaml/xenstored/Makefile ++++ b/tools/ocaml/xenstored/Makefile +@@ -53,6 +53,7 @@ OBJS = paths \ + domains \ + connection \ + connections \ ++ history \ + parse_arg \ + process \ + xenstored +diff --git a/tools/ocaml/xenstored/history.ml b/tools/ocaml/xenstored/history.ml +new file mode 100644 +index 0000000..e4b4d70 +--- /dev/null ++++ b/tools/ocaml/xenstored/history.ml +@@ -0,0 +1,43 @@ ++(* ++ * Copyright (c) 2017 Citrix Systems Ltd. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU Lesser General Public License as published ++ * by the Free Software Foundation; version 2.1 only. with the special ++ * exception on linking described in file LICENSE. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU Lesser General Public License for more details. ++ *) ++ ++type history_record = { ++ con: Connection.t; (* connection that made a change *) ++ tid: int; (* transaction id of the change (may be Transaction.none) *) ++ before: Store.t; (* the store before the change *) ++ after: Store.t; (* the store after the change *) ++ finish_count: int64; (* the commit-count at which the transaction finished *) ++} ++ ++let history : history_record list ref = ref [] ++ ++(* Called from periodic_ops to ensure we don't discard symbols that are still needed. *) ++(* There is scope for optimisation here, since in consecutive commits one commit's `after` ++ * is the same thing as the next commit's `before`, but not all commits in history are ++ * consecutive. *) ++let mark_symbols () = ++ (* There are gaps where dom0's commits are missing. Otherwise we could assume that ++ * each element's `before` is the same thing as the next element's `after` ++ * since the next element is the previous commit *) ++ List.iter (fun hist_rec -> ++ Store.mark_symbols hist_rec.before; ++ Store.mark_symbols hist_rec.after; ++ ) ++ !history ++ ++let push (x: history_record) = ++ let dom = x.con.Connection.dom in ++ match dom with ++ | None -> () (* treat socket connections as always free to conflict *) ++ | Some d -> if not (Domain.is_free_to_conflict d) then history := x :: !history +diff --git a/tools/ocaml/xenstored/process.ml b/tools/ocaml/xenstored/process.ml +index 5f92044..964c044 100644 +--- a/tools/ocaml/xenstored/process.ml ++++ b/tools/ocaml/xenstored/process.ml +@@ -293,6 +293,16 @@ let write_response_log ~ty ~tid ~con ~response = + | Packet.Reply x -> write_answer_log ~ty ~tid ~con ~data:x + | Packet.Error e -> write_answer_log ~ty:(Xenbus.Xb.Op.Error) ~tid ~con ~data:e + ++let record_commit ~con ~tid ~before ~after = ++ let inc r = r := Int64.add 1L !r in ++ let finish_count = inc Transaction.counter; !Transaction.counter in ++ (* This call would leak memory if historic activity is retained forever ++ so can only be uncommented if history is guaranteed not to grow ++ unboundedly. ++ History.push {History.con=con; tid=tid; before=before; after=after; finish_count=finish_count} ++ *) ++ () ++ + (* Replay a stored transaction against a fresh store, check the responses are + all equivalent: if so, commit the transaction. Otherwise send the abort to + the client. *) +@@ -363,8 +373,14 @@ let do_transaction_end con t domains cons data = + Connection.end_transaction con (Transaction.get_id t) commit in + if not success then + raise Transaction_again; +- if commit then +- process_watch (List.rev (Transaction.get_paths t)) cons ++ if commit then begin ++ process_watch (List.rev (Transaction.get_paths t)) cons; ++ match t.Transaction.ty with ++ | Transaction.No -> ++ () (* no need to record anything *) ++ | Transaction.Full(id, oldstore, cstore) -> ++ record_commit ~con ~tid:id ~before:oldstore ~after:cstore ++ end + + let do_introduce con t domains cons data = + if not (Connection.is_dom0 con) +@@ -448,7 +464,11 @@ let process_packet ~store ~cons ~doms ~con ~req = + else + Connection.get_transaction con tid + in ++ ++ let before = Store.copy store in + let response = input_handle_error ~cons ~doms ~fct ~con ~t ~req in ++ let after = Store.copy store in ++ if tid = Transaction.none then record_commit ~con ~tid ~before ~after; + + let response = try + if tid <> Transaction.none then +diff --git a/tools/ocaml/xenstored/xenstored.ml b/tools/ocaml/xenstored/xenstored.ml +index f562f59..d5c50fd 100644 +--- a/tools/ocaml/xenstored/xenstored.ml ++++ b/tools/ocaml/xenstored/xenstored.ml +@@ -385,6 +385,7 @@ let _ = + Symbol.mark_all_as_unused (); + Store.mark_symbols store; + Connections.iter cons Connection.mark_symbols; ++ History.mark_symbols (); + Symbol.garbage () + end; + +-- +2.1.4 + diff --git a/system/xen/xsa/xsa206-4.8-0008-oxenstored-only-record-operations-with-side-effects-.patch b/system/xen/xsa/xsa206-4.8-0008-oxenstored-only-record-operations-with-side-effects-.patch new file mode 100644 index 0000000000..e3de404298 --- /dev/null +++ b/system/xen/xsa/xsa206-4.8-0008-oxenstored-only-record-operations-with-side-effects-.patch @@ -0,0 +1,85 @@ +From f8083d52b6314f92718316f160eea47fef47988e Mon Sep 17 00:00:00 2001 +From: Jonathan Davies <jonathan.davies@citrix.com> +Date: Thu, 23 Mar 2017 14:20:33 +0000 +Subject: [PATCH 08/15] oxenstored: only record operations with side-effects in + history + +There is no need to record "read" operations as they will never cause another +transaction to fail. + +Reported-by: Juergen Gross <jgross@suse.com> +Signed-off-by: Jonathan Davies <jonathan.davies@citrix.com> +Reviewed-by: Thomas Sanders <thomas.sanders@citrix.com> + +--- + tools/ocaml/xenstored/process.ml | 47 ++++++++++++++++++++++++++++++++++++---- + 1 file changed, 43 insertions(+), 4 deletions(-) + +diff --git a/tools/ocaml/xenstored/process.ml b/tools/ocaml/xenstored/process.ml +index 964c044..b435a4a 100644 +--- a/tools/ocaml/xenstored/process.ml ++++ b/tools/ocaml/xenstored/process.ml +@@ -450,6 +450,37 @@ let function_of_type ty = + | _ -> function_of_type_simple_op ty + + (** ++ * Determines which individual (non-transactional) operations we want to retain. ++ * We only want to retain operations that have side-effects in the store since ++ * these can be the cause of transactions failing. ++ *) ++let retain_op_in_history ty = ++ match ty with ++ | Xenbus.Xb.Op.Write ++ | Xenbus.Xb.Op.Mkdir ++ | Xenbus.Xb.Op.Rm ++ | Xenbus.Xb.Op.Setperms -> true ++ | Xenbus.Xb.Op.Debug ++ | Xenbus.Xb.Op.Directory ++ | Xenbus.Xb.Op.Read ++ | Xenbus.Xb.Op.Getperms ++ | Xenbus.Xb.Op.Watch ++ | Xenbus.Xb.Op.Unwatch ++ | Xenbus.Xb.Op.Transaction_start ++ | Xenbus.Xb.Op.Transaction_end ++ | Xenbus.Xb.Op.Introduce ++ | Xenbus.Xb.Op.Release ++ | Xenbus.Xb.Op.Getdomainpath ++ | Xenbus.Xb.Op.Watchevent ++ | Xenbus.Xb.Op.Error ++ | Xenbus.Xb.Op.Isintroduced ++ | Xenbus.Xb.Op.Resume ++ | Xenbus.Xb.Op.Set_target ++ | Xenbus.Xb.Op.Restrict ++ | Xenbus.Xb.Op.Reset_watches ++ | Xenbus.Xb.Op.Invalid -> false ++ ++(** + * Nothrow guarantee. + *) + let process_packet ~store ~cons ~doms ~con ~req = +@@ -465,10 +496,18 @@ let process_packet ~store ~cons ~doms ~con ~req = + Connection.get_transaction con tid + in + +- let before = Store.copy store in +- let response = input_handle_error ~cons ~doms ~fct ~con ~t ~req in +- let after = Store.copy store in +- if tid = Transaction.none then record_commit ~con ~tid ~before ~after; ++ let execute () = input_handle_error ~cons ~doms ~fct ~con ~t ~req in ++ ++ let response = ++ (* Note that transactions are recorded in history separately. *) ++ if tid = Transaction.none && retain_op_in_history ty then begin ++ let before = Store.copy store in ++ let response = execute () in ++ let after = Store.copy store in ++ record_commit ~con ~tid ~before ~after; ++ response ++ end else execute () ++ in + + let response = try + if tid <> Transaction.none then +-- +2.1.4 + diff --git a/system/xen/xsa/xsa206-4.8-0009-oxenstored-discard-old-commit-history-on-txn-end.patch b/system/xen/xsa/xsa206-4.8-0009-oxenstored-discard-old-commit-history-on-txn-end.patch new file mode 100644 index 0000000000..f1734c8375 --- /dev/null +++ b/system/xen/xsa/xsa206-4.8-0009-oxenstored-discard-old-commit-history-on-txn-end.patch @@ -0,0 +1,141 @@ +From 66bdbbc41a45d2bf59ddb4ff26c52c1cc546f720 Mon Sep 17 00:00:00 2001 +From: Thomas Sanders <thomas.sanders@citrix.com> +Date: Thu, 23 Mar 2017 14:25:16 +0000 +Subject: [PATCH 09/15] oxenstored: discard old commit-history on txn end + +The history of commits is to be used for working out which historical +commit(s) (including atomic writes) caused conflicts with a +currently-failing commit of a transaction. Any commit that was made +before the current transaction started cannot be relevant. Therefore +we never need to keep history from before the start of the +longest-running transaction that is open at any given time: whenever a +transaction ends (with or without a commit) then if it was the +longest-running open transaction we can delete history up until start +of the the next-longest-running open transaction. + +Some transactions might stay open for a very long time, so if any +transaction exceeds conflict_max_history_seconds then we remove it +from consideration in this context, and will not guarantee to keep +remembering about historical commits made during such a transaction. + +We implement this by keeping a list of all open transactions that have +not been open too long. When a transaction ends, we remove it from the +list, along with any that have been open longer than the maximum; then +we delete any history from before the start of the longest-running +transaction remaining in the list. + +Reported-by: Juergen Gross <jgross@suse.com> +Signed-off-by: Thomas Sanders <thomas.sanders@citrix.com> +Reviewed-by: Jonathan Davies <jonathan.davies@citrix.com> +Reviewed-by: Christian Lindig <christian.lindig@citrix.com> +--- + tools/ocaml/xenstored/history.ml | 17 +++++++++++++++++ + tools/ocaml/xenstored/process.ml | 4 ++-- + tools/ocaml/xenstored/transaction.ml | 29 +++++++++++++++++++++++++---- + 3 files changed, 44 insertions(+), 6 deletions(-) + +diff --git a/tools/ocaml/xenstored/history.ml b/tools/ocaml/xenstored/history.ml +index e4b4d70..6f7a282 100644 +--- a/tools/ocaml/xenstored/history.ml ++++ b/tools/ocaml/xenstored/history.ml +@@ -36,6 +36,23 @@ let mark_symbols () = + ) + !history + ++(* Keep only enough commit-history to protect the running transactions that we are still tracking *) ++(* There is scope for optimisation here, replacing List.filter with something more efficient, ++ * probably on a different list-like structure. *) ++let trim () = ++ history := match Transaction.oldest_short_running_transaction () with ++ | None -> [] (* We have no open transaction, so no history is needed *) ++ | Some (_, txn) -> ( ++ (* keep records with finish_count recent enough to be relevant *) ++ List.filter (fun r -> r.finish_count > txn.Transaction.start_count) !history ++ ) ++ ++let end_transaction txn con tid commit = ++ let success = Connection.end_transaction con tid commit in ++ Transaction.end_transaction txn; ++ trim (); ++ success ++ + let push (x: history_record) = + let dom = x.con.Connection.dom in + match dom with +diff --git a/tools/ocaml/xenstored/process.ml b/tools/ocaml/xenstored/process.ml +index b435a4a..6f4d118 100644 +--- a/tools/ocaml/xenstored/process.ml ++++ b/tools/ocaml/xenstored/process.ml +@@ -313,7 +313,7 @@ let transaction_replay c t doms cons = + false + | Transaction.Full(id, oldstore, cstore) -> + let tid = Connection.start_transaction c cstore in +- let new_t = Transaction.make tid cstore in ++ let new_t = Transaction.make ~internal:true tid cstore in + let con = sprintf "r(%d):%s" id (Connection.get_domstr c) in + let perform_exn (request, response) = + write_access_log ~ty:request.Packet.ty ~tid ~con ~data:request.Packet.data; +@@ -370,7 +370,7 @@ let do_transaction_end con t domains cons data = + in + let success = + let commit = if commit then Some (fun con trans -> transaction_replay con trans domains cons) else None in +- Connection.end_transaction con (Transaction.get_id t) commit in ++ History.end_transaction t con (Transaction.get_id t) commit in + if not success then + raise Transaction_again; + if commit then begin +diff --git a/tools/ocaml/xenstored/transaction.ml b/tools/ocaml/xenstored/transaction.ml +index b1791b3..edd1178 100644 +--- a/tools/ocaml/xenstored/transaction.ml ++++ b/tools/ocaml/xenstored/transaction.ml +@@ -87,12 +87,29 @@ type t = { + mutable read_lowpath: Store.Path.t option; + mutable write_lowpath: Store.Path.t option; + } ++let get_id t = match t.ty with No -> none | Full (id, _, _) -> id + + let counter = ref 0L + +-let make id store = ++(* Scope for optimisation: different data-structure and functions to search/filter it *) ++let short_running_txns = ref [] ++ ++let oldest_short_running_transaction () = ++ let rec last = function ++ | [] -> None ++ | [x] -> Some x ++ | x :: xs -> last xs ++ in last !short_running_txns ++ ++let end_transaction txn = ++ let cutoff = Unix.gettimeofday () -. !Define.conflict_max_history_seconds in ++ short_running_txns := List.filter ++ (function (start_time, tx) -> start_time >= cutoff && tx != txn) ++ !short_running_txns ++ ++let make ?(internal=false) id store = + let ty = if id = none then No else Full(id, Store.copy store, store) in +- { ++ let txn = { + ty = ty; + start_count = !counter; + store = if id = none then store else Store.copy store; +@@ -101,9 +118,13 @@ let make id store = + operations = []; + read_lowpath = None; + write_lowpath = None; +- } ++ } in ++ if id <> none && not internal then ( ++ let now = Unix.gettimeofday () in ++ short_running_txns := (now, txn) :: !short_running_txns ++ ); ++ txn + +-let get_id t = match t.ty with No -> none | Full (id, _, _) -> id + let get_store t = t.store + let get_paths t = t.paths + +-- +2.1.4 + diff --git a/system/xen/xsa/xsa206-4.8-0010-oxenstored-track-commit-history.patch b/system/xen/xsa/xsa206-4.8-0010-oxenstored-track-commit-history.patch new file mode 100644 index 0000000000..f5d85367eb --- /dev/null +++ b/system/xen/xsa/xsa206-4.8-0010-oxenstored-track-commit-history.patch @@ -0,0 +1,35 @@ +From 2e94c635a652a575cb1f25f7dc5bc0ebcdbedcc4 Mon Sep 17 00:00:00 2001 +From: Jonathan Davies <jonathan.davies@citrix.com> +Date: Mon, 27 Mar 2017 08:58:29 +0000 +Subject: [PATCH 10/15] oxenstored: track commit history + +Since the list of historic activity cannot grow without bound, it is safe to use +this to track commits. + +Reported-by: Juergen Gross <jgross@suse.com> +Signed-off-by: Jonathan Davies <jonathan.davies@citrix.com> +Reviewed-by: Thomas Sanders <thomas.sanders@citrix.com> +--- + tools/ocaml/xenstored/process.ml | 5 ----- + 1 file changed, 5 deletions(-) + +diff --git a/tools/ocaml/xenstored/process.ml b/tools/ocaml/xenstored/process.ml +index 6f4d118..1ed1a8f 100644 +--- a/tools/ocaml/xenstored/process.ml ++++ b/tools/ocaml/xenstored/process.ml +@@ -296,12 +296,7 @@ let write_response_log ~ty ~tid ~con ~response = + let record_commit ~con ~tid ~before ~after = + let inc r = r := Int64.add 1L !r in + let finish_count = inc Transaction.counter; !Transaction.counter in +- (* This call would leak memory if historic activity is retained forever +- so can only be uncommented if history is guaranteed not to grow +- unboundedly. + History.push {History.con=con; tid=tid; before=before; after=after; finish_count=finish_count} +- *) +- () + + (* Replay a stored transaction against a fresh store, check the responses are + all equivalent: if so, commit the transaction. Otherwise send the abort to +-- +2.1.4 + diff --git a/system/xen/xsa/xsa206-4.8-0011-oxenstored-blame-the-connection-that-caused-a-transa.patch b/system/xen/xsa/xsa206-4.8-0011-oxenstored-blame-the-connection-that-caused-a-transa.patch new file mode 100644 index 0000000000..b0a8e011ee --- /dev/null +++ b/system/xen/xsa/xsa206-4.8-0011-oxenstored-blame-the-connection-that-caused-a-transa.patch @@ -0,0 +1,137 @@ +From 0972f3e46001e9b3192786033663ef4ee423f8be Mon Sep 17 00:00:00 2001 +From: Jonathan Davies <jonathan.davies@citrix.com> +Date: Thu, 23 Mar 2017 14:28:16 +0000 +Subject: [PATCH 11/15] oxenstored: blame the connection that caused a + transaction conflict + +Blame each connection found to have made a commit that would cause this +transaction to fail. Each blamed connection is penalised by having its +conflict-credit decremented. + +Note the change in semantics for the replay function: we no longer stop after +finding the first operation that can't be replayed. This allows us to identify +all operations that conflicted with this transaction, not just the one that +conflicted first. + +Signed-off-by: Jonathan Davies <jonathan.davies@citrix.com> +Signed-off-by: Thomas Sanders <thomas.sanders@citrix.com> +v1 Reviewed-by: Christian Lindig <christian.lindig@citrix.com> + +Changes since v1: + * use correct log levels for informational messages +Changes since v2: + * fix the blame algorithm and improve logging + (fix was reviewed by Jonathan Davies) + +Reported-by: Juergen Gross <jgross@suse.com> +Signed-off-by: Thomas Sanders <thomas.sanders@citrix.com> +--- + tools/ocaml/xenstored/history.ml | 12 ++++++++++ + tools/ocaml/xenstored/process.ml | 50 ++++++++++++++++++++++++++++++++-------- + 2 files changed, 52 insertions(+), 10 deletions(-) + +diff --git a/tools/ocaml/xenstored/history.ml b/tools/ocaml/xenstored/history.ml +index 6f7a282..e941e2b 100644 +--- a/tools/ocaml/xenstored/history.ml ++++ b/tools/ocaml/xenstored/history.ml +@@ -58,3 +58,15 @@ let push (x: history_record) = + match dom with + | None -> () (* treat socket connections as always free to conflict *) + | Some d -> if not (Domain.is_free_to_conflict d) then history := x :: !history ++ ++(* Find the connections from records since commit-count [since] for which [f record] returns [true] *) ++let filter_connections ~since ~f = ++ (* The "mem" call is an optimisation, to avoid calling f if we have picked con already. *) ++ (* Using a hash table rather than a list is to optimise the "mem" call. *) ++ List.fold_left (fun acc hist_rec -> ++ if hist_rec.finish_count > since ++ && not (Hashtbl.mem acc hist_rec.con) ++ && f hist_rec ++ then Hashtbl.replace acc hist_rec.con (); ++ acc ++ ) (Hashtbl.create 1023) !history +diff --git a/tools/ocaml/xenstored/process.ml b/tools/ocaml/xenstored/process.ml +index 1ed1a8f..5e5a1ab 100644 +--- a/tools/ocaml/xenstored/process.ml ++++ b/tools/ocaml/xenstored/process.ml +@@ -16,6 +16,7 @@ + + let error fmt = Logging.error "process" fmt + let info fmt = Logging.info "process" fmt ++let debug fmt = Logging.debug "process" fmt + + open Printf + open Stdext +@@ -25,6 +26,7 @@ exception Transaction_nested + exception Domain_not_match + exception Invalid_Cmd_Args + ++(* This controls the do_debug fn in this module, not the debug logging-function. *) + let allow_debug = ref false + + let c_int_of_string s = +@@ -308,23 +310,51 @@ let transaction_replay c t doms cons = + false + | Transaction.Full(id, oldstore, cstore) -> + let tid = Connection.start_transaction c cstore in +- let new_t = Transaction.make ~internal:true tid cstore in ++ let replay_t = Transaction.make ~internal:true tid cstore in + let con = sprintf "r(%d):%s" id (Connection.get_domstr c) in +- let perform_exn (request, response) = +- write_access_log ~ty:request.Packet.ty ~tid ~con ~data:request.Packet.data; ++ ++ let perform_exn ~wlog txn (request, response) = ++ if wlog then write_access_log ~ty:request.Packet.ty ~tid ~con ~data:request.Packet.data; + let fct = function_of_type_simple_op request.Packet.ty in +- let response' = input_handle_error ~cons ~doms ~fct ~con:c ~t:new_t ~req:request in +- write_response_log ~ty:request.Packet.ty ~tid ~con ~response:response'; +- if not(Packet.response_equal response response') then raise Transaction_again in ++ let response' = input_handle_error ~cons ~doms ~fct ~con:c ~t:txn ~req:request in ++ if wlog then write_response_log ~ty:request.Packet.ty ~tid ~con ~response:response'; ++ if not(Packet.response_equal response response') then raise Transaction_again ++ in + finally + (fun () -> + try + Logging.start_transaction ~con ~tid; +- List.iter perform_exn (Transaction.get_operations t); +- Logging.end_transaction ~con ~tid; ++ List.iter (perform_exn ~wlog:true replay_t) (Transaction.get_operations t); (* May throw EAGAIN *) + +- Transaction.commit ~con new_t +- with e -> ++ Logging.end_transaction ~con ~tid; ++ Transaction.commit ~con replay_t ++ with ++ | Transaction_again -> ( ++ let victim_domstr = Connection.get_domstr c in ++ debug "Apportioning blame for EAGAIN in txn %d, domain=%s" id victim_domstr; ++ let punish guilty_con = ++ debug "Blaming domain %s for conflict with domain %s txn %d" ++ (Connection.get_domstr guilty_con) victim_domstr id; ++ Connection.decr_conflict_credit doms guilty_con ++ in ++ let judge_and_sentence hist_rec = ( ++ let can_apply_on store = ( ++ let store = Store.copy store in ++ let trial_t = Transaction.make ~internal:true Transaction.none store in ++ try List.iter (perform_exn ~wlog:false trial_t) (Transaction.get_operations t); ++ true ++ with Transaction_again -> false ++ ) in ++ if can_apply_on hist_rec.History.before ++ && not (can_apply_on hist_rec.History.after) ++ then (punish hist_rec.History.con; true) ++ else false ++ ) in ++ let guilty_cons = History.filter_connections ~since:t.Transaction.start_count ~f:judge_and_sentence in ++ if Hashtbl.length guilty_cons = 0 then debug "Found no culprit for conflict in %s: must be self or not in history." con; ++ false ++ ) ++ | e -> + info "transaction_replay %d caught: %s" tid (Printexc.to_string e); + false + ) +-- +2.1.4 + diff --git a/system/xen/xsa/xsa206-4.8-0012-oxenstored-allow-self-conflicts.patch b/system/xen/xsa/xsa206-4.8-0012-oxenstored-allow-self-conflicts.patch new file mode 100644 index 0000000000..4a46db4c14 --- /dev/null +++ b/system/xen/xsa/xsa206-4.8-0012-oxenstored-allow-self-conflicts.patch @@ -0,0 +1,58 @@ +From d3a8b4ffde38f01aa7f497d07404478b6f90041c Mon Sep 17 00:00:00 2001 +From: Thomas Sanders <thomas.sanders@citrix.com> +Date: Thu, 23 Mar 2017 19:06:54 +0000 +Subject: [PATCH 12/15] oxenstored: allow self-conflicts + +We already avoid inter-domain conflicts but now allow intra-domain +conflicts. Although there are no known practical examples of a domain +that might perform operations that conflict with its own transactions, +this is conceivable, so here we avoid changing those semantics +unnecessarily. + +When a transaction commit fails with a conflict and we look through +the history of commits to see which connection(s) to blame, ignore +historical commits that were made by the same connection as the +failing commit. + +Reported-by: Juergen Gross <jgross@suse.com> +Signed-off-by: Thomas Sanders <thomas.sanders@citrix.com> +Reviewed-by: Jonathan Davies <jonathan.davies@citrix.com> +--- + tools/ocaml/xenstored/history.ml | 3 ++- + tools/ocaml/xenstored/process.ml | 2 +- + 2 files changed, 3 insertions(+), 2 deletions(-) + +diff --git a/tools/ocaml/xenstored/history.ml b/tools/ocaml/xenstored/history.ml +index e941e2b..4079588 100644 +--- a/tools/ocaml/xenstored/history.ml ++++ b/tools/ocaml/xenstored/history.ml +@@ -60,11 +60,12 @@ let push (x: history_record) = + | Some d -> if not (Domain.is_free_to_conflict d) then history := x :: !history + + (* Find the connections from records since commit-count [since] for which [f record] returns [true] *) +-let filter_connections ~since ~f = ++let filter_connections ~ignore ~since ~f = + (* The "mem" call is an optimisation, to avoid calling f if we have picked con already. *) + (* Using a hash table rather than a list is to optimise the "mem" call. *) + List.fold_left (fun acc hist_rec -> + if hist_rec.finish_count > since ++ && not (hist_rec.con == ignore) + && not (Hashtbl.mem acc hist_rec.con) + && f hist_rec + then Hashtbl.replace acc hist_rec.con (); +diff --git a/tools/ocaml/xenstored/process.ml b/tools/ocaml/xenstored/process.ml +index 5e5a1ab..b56e3fc 100644 +--- a/tools/ocaml/xenstored/process.ml ++++ b/tools/ocaml/xenstored/process.ml +@@ -350,7 +350,7 @@ let transaction_replay c t doms cons = + then (punish hist_rec.History.con; true) + else false + ) in +- let guilty_cons = History.filter_connections ~since:t.Transaction.start_count ~f:judge_and_sentence in ++ let guilty_cons = History.filter_connections ~ignore:c ~since:t.Transaction.start_count ~f:judge_and_sentence in + if Hashtbl.length guilty_cons = 0 then debug "Found no culprit for conflict in %s: must be self or not in history." con; + false + ) +-- +2.1.4 + diff --git a/system/xen/xsa/xsa206-4.8-0013-oxenstored-do-not-commit-read-only-transactions.patch b/system/xen/xsa/xsa206-4.8-0013-oxenstored-do-not-commit-read-only-transactions.patch new file mode 100644 index 0000000000..e6b066e40a --- /dev/null +++ b/system/xen/xsa/xsa206-4.8-0013-oxenstored-do-not-commit-read-only-transactions.patch @@ -0,0 +1,59 @@ +From f45ce51771c7e96c8ac8179c44476f8fc6168636 Mon Sep 17 00:00:00 2001 +From: Thomas Sanders <thomas.sanders@citrix.com> +Date: Fri, 24 Mar 2017 16:16:10 +0000 +Subject: [PATCH 13/15] oxenstored: do not commit read-only transactions + +The packet telling us to end the transaction has always carried an +argument telling us whether to commit. + +If the transaction made no modifications to the tree, now we ignore +that argument and do not commit: it is just a waste of effort. + +This makes read-only transactions immune to conflicts, and means that +we do not need to store any of their details in the history that is +used for assigning blame for conflicts. + +We count a transaction as a read-only transaction only if it contains +no operations that modified the tree. + +This means that (for example) a transaction that creates a new node +then deletes it would NOT count as read-only, even though it makes no +change overall. A more sophisticated algorithm could judge the +transaction based on comparison of its initial and final states, but +this would add complexity and computational cost. + +Reported-by: Juergen Gross <jgross@suse.com> +Signed-off-by: Thomas Sanders <thomas.sanders@citrix.com> +Reviewed-by: Jonathan Davies <jonathan.davies@citrix.com> +--- + tools/ocaml/xenstored/process.ml | 1 + + tools/ocaml/xenstored/transaction.ml | 1 + + 2 files changed, 2 insertions(+) + +diff --git a/tools/ocaml/xenstored/process.ml b/tools/ocaml/xenstored/process.ml +index b56e3fc..adfc7a4 100644 +--- a/tools/ocaml/xenstored/process.ml ++++ b/tools/ocaml/xenstored/process.ml +@@ -393,6 +393,7 @@ let do_transaction_end con t domains cons data = + | x :: _ -> raise (Invalid_argument x) + | _ -> raise Invalid_Cmd_Args + in ++ let commit = commit && not (Transaction.is_read_only t) in + let success = + let commit = if commit then Some (fun con trans -> transaction_replay con trans domains cons) else None in + History.end_transaction t con (Transaction.get_id t) commit in +diff --git a/tools/ocaml/xenstored/transaction.ml b/tools/ocaml/xenstored/transaction.ml +index edd1178..8f95301 100644 +--- a/tools/ocaml/xenstored/transaction.ml ++++ b/tools/ocaml/xenstored/transaction.ml +@@ -128,6 +128,7 @@ let make ?(internal=false) id store = + let get_store t = t.store + let get_paths t = t.paths + ++let is_read_only t = t.paths = [] + let add_wop t ty path = t.paths <- (ty, path) :: t.paths + let add_operation ~perm t request response = + if !Define.maxrequests >= 0 +-- +2.1.4 + diff --git a/system/xen/xsa/xsa206-4.8-0014-oxenstored-don-t-wake-to-issue-no-conflict-credit.patch b/system/xen/xsa/xsa206-4.8-0014-oxenstored-don-t-wake-to-issue-no-conflict-credit.patch new file mode 100644 index 0000000000..8aae281406 --- /dev/null +++ b/system/xen/xsa/xsa206-4.8-0014-oxenstored-don-t-wake-to-issue-no-conflict-credit.patch @@ -0,0 +1,141 @@ +From 374c6a67e3d139a04ecde127a63ce70d24ed7b45 Mon Sep 17 00:00:00 2001 +From: Thomas Sanders <thomas.sanders@citrix.com> +Date: Fri, 24 Mar 2017 19:55:03 +0000 +Subject: [PATCH 14/15] oxenstored: don't wake to issue no conflict-credit + +In the main loop, when choosing the timeout for the select function +call, we were setting it so as to wake up to issue conflict-credit to +any domains that could accept it. When xenstore is idle, this would +mean waking up every 50ms (by default) to do no work. With this +commit, we check whether any domain is below its cap, and if not then +we set the timeout for longer (the same timeout as before the +conflict-protection feature was added). + +Reported-by: Juergen Gross <jgross@suse.com> +Signed-off-by: Thomas Sanders <thomas.sanders@citrix.com> +Reviewed-by: Jonathan Davies <jonathan.davies@citrix.com> +--- + tools/ocaml/xenstored/domains.ml | 51 ++++++++++++++++++++++++++++++-------- + tools/ocaml/xenstored/xenstored.ml | 5 +++- + 2 files changed, 44 insertions(+), 12 deletions(-) + +diff --git a/tools/ocaml/xenstored/domains.ml b/tools/ocaml/xenstored/domains.ml +index 99f68c7..61d1e2e 100644 +--- a/tools/ocaml/xenstored/domains.ml ++++ b/tools/ocaml/xenstored/domains.ml +@@ -35,8 +35,9 @@ type domains = { + on_first_conflict_pause: unit -> unit; + + (* If config is set to use individual instead of aggregate conflict-rate-limiting, +- we use this instead of the queues. *) +- mutable n_paused: int; ++ we use these counts instead of the queues. The second one includes the first. *) ++ mutable n_paused: int; (* Number of domains with zero or negative credit *) ++ mutable n_penalised: int; (* Number of domains with less than maximum credit *) + } + + let init eventchn on_first_conflict_pause = { +@@ -46,6 +47,7 @@ let init eventchn on_first_conflict_pause = { + doms_with_conflict_penalty = Queue.create (); + on_first_conflict_pause = on_first_conflict_pause; + n_paused = 0; ++ n_penalised = 0; + } + let del doms id = Hashtbl.remove doms.table id + let exist doms id = Hashtbl.mem doms.table id +@@ -53,6 +55,23 @@ let find doms id = Hashtbl.find doms.table id + let number doms = Hashtbl.length doms.table + let iter doms fct = Hashtbl.iter (fun _ b -> fct b) doms.table + ++let rec is_empty_queue q = ++ Queue.is_empty q || ++ if !(Queue.peek q) = None ++ then ( ++ ignore (Queue.pop q); ++ is_empty_queue q ++ ) else false ++ ++let all_at_max_credit doms = ++ if !Define.conflict_rate_limit_is_aggregate ++ then ++ (* Check both becuase if burst limit is 1.0 then a domain can go straight ++ * from max-credit to paused without getting into the penalty queue. *) ++ is_empty_queue doms.doms_with_conflict_penalty ++ && is_empty_queue doms.doms_conflict_paused ++ else doms.n_penalised = 0 ++ + (* Functions to handle queues of domains given that the domain might be deleted while in a queue. *) + let push dom queue = + Queue.push (ref (Some dom)) queue +@@ -130,13 +149,16 @@ let decr_conflict_credit doms dom = + let before = dom.Domain.conflict_credit in + let after = max (-1.0) (before -. 1.0) in + dom.Domain.conflict_credit <- after; ++ let newly_penalised = ++ before >= !Define.conflict_burst_limit ++ && after < !Define.conflict_burst_limit in ++ let newly_paused = before > 0.0 && after <= 0.0 in + if !Define.conflict_rate_limit_is_aggregate then ( +- if before >= !Define.conflict_burst_limit +- && after < !Define.conflict_burst_limit ++ if newly_penalised + && after > 0.0 + then ( + push dom doms.doms_with_conflict_penalty +- ) else if before > 0.0 && after <= 0.0 ++ ) else if newly_paused + then ( + let first_pause = Queue.is_empty doms.doms_conflict_paused in + push dom doms.doms_conflict_paused; +@@ -144,9 +166,12 @@ let decr_conflict_credit doms dom = + ) else ( + (* The queues are correct already: no further action needed. *) + ) +- ) else if before > 0.0 && after <= 0.0 then ( +- doms.n_paused <- doms.n_paused + 1; +- if doms.n_paused = 1 then doms.on_first_conflict_pause () ++ ) else ( ++ if newly_penalised then doms.n_penalised <- doms.n_penalised + 1; ++ if newly_paused then ( ++ doms.n_paused <- doms.n_paused + 1; ++ if doms.n_paused = 1 then doms.on_first_conflict_pause () ++ ) + ) + + (* Give one point of credit to one domain, and update the queues appropriately. *) +@@ -175,9 +200,13 @@ let incr_conflict_credit doms = + let before = dom.Domain.conflict_credit in + let after = min (before +. 1.0) !Define.conflict_burst_limit in + dom.Domain.conflict_credit <- after; ++ + if before <= 0.0 && after > 0.0 +- then doms.n_paused <- doms.n_paused - 1 ++ then doms.n_paused <- doms.n_paused - 1; ++ ++ if before < !Define.conflict_burst_limit ++ && after >= !Define.conflict_burst_limit ++ then doms.n_penalised <- doms.n_penalised - 1 + in +- (* Scope for optimisation (probably tiny): avoid iteration if all domains are at max credit *) +- iter doms inc ++ if doms.n_penalised > 0 then iter doms inc + ) +diff --git a/tools/ocaml/xenstored/xenstored.ml b/tools/ocaml/xenstored/xenstored.ml +index d5c50fd..06387a8 100644 +--- a/tools/ocaml/xenstored/xenstored.ml ++++ b/tools/ocaml/xenstored/xenstored.ml +@@ -437,7 +437,10 @@ let _ = + peaceful_mw; + let start_time = Unix.gettimeofday () in + let timeout = +- let until_next_activity = min (max 0. (!next_frequent_ops -. start_time)) period_ops_interval in ++ let until_next_activity = ++ if Domains.all_at_max_credit domains ++ then period_ops_interval ++ else min (max 0. (!next_frequent_ops -. start_time)) period_ops_interval in + if peaceful_mw <> [] then 0. else until_next_activity + in + let inset, outset = Connections.select ~only_if:is_peaceful cons in +-- +2.1.4 + diff --git a/system/xen/xsa/xsa206-4.8-0015-oxenstored-transaction-conflicts-improve-logging.patch b/system/xen/xsa/xsa206-4.8-0015-oxenstored-transaction-conflicts-improve-logging.patch new file mode 100644 index 0000000000..bc46ed1ac5 --- /dev/null +++ b/system/xen/xsa/xsa206-4.8-0015-oxenstored-transaction-conflicts-improve-logging.patch @@ -0,0 +1,153 @@ +From d7d0c021115d40177035a0626ed47681b034b584 Mon Sep 17 00:00:00 2001 +From: Thomas Sanders <thomas.sanders@citrix.com> +Date: Mon, 27 Mar 2017 14:36:34 +0100 +Subject: [PATCH 15/15] oxenstored transaction conflicts: improve logging + +For information related to transaction conflicts, potentially frequent +logging at "info" priority has been changed to "debug" priority, and +once per two minutes there is an "info" priority summary. + +Additional detailed logging has been added at "debug" priority. + +Reported-by: Juergen Gross <jgross@suse.com> +Signed-off-by: Thomas Sanders <thomas.sanders@citrix.com> +--- + tools/ocaml/xenstored/domain.ml | 8 ++++++++ + tools/ocaml/xenstored/domains.ml | 5 +++++ + tools/ocaml/xenstored/process.ml | 6 +++++- + tools/ocaml/xenstored/transaction.ml | 5 +++++ + tools/ocaml/xenstored/xenstored.ml | 6 ++++++ + 5 files changed, 29 insertions(+), 1 deletion(-) + +diff --git a/tools/ocaml/xenstored/domain.ml b/tools/ocaml/xenstored/domain.ml +index e677aa3..4515650 100644 +--- a/tools/ocaml/xenstored/domain.ml ++++ b/tools/ocaml/xenstored/domain.ml +@@ -34,6 +34,7 @@ type t = + mutable conflict_credit: float; (* Must be positive to perform writes; a commit + that later causes conflict with another + domain's transaction costs credit. *) ++ mutable caused_conflicts: int64; + } + + let is_dom0 d = d.id = 0 +@@ -93,4 +94,11 @@ let make id mfn remote_port interface eventchn = { + bad_client = false; + io_credit = 0; + conflict_credit = !Define.conflict_burst_limit; ++ caused_conflicts = 0L; + } ++ ++let log_and_reset_conflict_stats logfn dom = ++ if dom.caused_conflicts > 0L then ( ++ logfn dom.id dom.caused_conflicts; ++ dom.caused_conflicts <- 0L ++ ) +diff --git a/tools/ocaml/xenstored/domains.ml b/tools/ocaml/xenstored/domains.ml +index 61d1e2e..fdae298 100644 +--- a/tools/ocaml/xenstored/domains.ml ++++ b/tools/ocaml/xenstored/domains.ml +@@ -146,8 +146,10 @@ let create0 doms = + dom + + let decr_conflict_credit doms dom = ++ dom.Domain.caused_conflicts <- Int64.add 1L dom.Domain.caused_conflicts; + let before = dom.Domain.conflict_credit in + let after = max (-1.0) (before -. 1.0) in ++ debug "decr_conflict_credit dom%d %F -> %F" (Domain.get_id dom) before after; + dom.Domain.conflict_credit <- after; + let newly_penalised = + before >= !Define.conflict_burst_limit +@@ -178,7 +180,9 @@ let decr_conflict_credit doms dom = + let incr_conflict_credit_from_queue doms = + let process_queue q requeue_test = + let d = pop q in ++ let before = d.Domain.conflict_credit in (* just for debug-logging *) + d.Domain.conflict_credit <- min (d.Domain.conflict_credit +. 1.0) !Define.conflict_burst_limit; ++ debug "incr_conflict_credit_from_queue: dom%d: %F -> %F" (Domain.get_id d) before d.Domain.conflict_credit; + if requeue_test d.Domain.conflict_credit then ( + push d q (* Make it queue up again for its next point of credit. *) + ) +@@ -200,6 +204,7 @@ let incr_conflict_credit doms = + let before = dom.Domain.conflict_credit in + let after = min (before +. 1.0) !Define.conflict_burst_limit in + dom.Domain.conflict_credit <- after; ++ debug "incr_conflict_credit dom%d: %F -> %F" (Domain.get_id dom) before after; + + if before <= 0.0 && after > 0.0 + then doms.n_paused <- doms.n_paused - 1; +diff --git a/tools/ocaml/xenstored/process.ml b/tools/ocaml/xenstored/process.ml +index adfc7a4..8a688c4 100644 +--- a/tools/ocaml/xenstored/process.ml ++++ b/tools/ocaml/xenstored/process.ml +@@ -330,6 +330,7 @@ let transaction_replay c t doms cons = + Transaction.commit ~con replay_t + with + | Transaction_again -> ( ++ Transaction.failed_commits := Int64.add !Transaction.failed_commits 1L; + let victim_domstr = Connection.get_domstr c in + debug "Apportioning blame for EAGAIN in txn %d, domain=%s" id victim_domstr; + let punish guilty_con = +@@ -351,7 +352,10 @@ let transaction_replay c t doms cons = + else false + ) in + let guilty_cons = History.filter_connections ~ignore:c ~since:t.Transaction.start_count ~f:judge_and_sentence in +- if Hashtbl.length guilty_cons = 0 then debug "Found no culprit for conflict in %s: must be self or not in history." con; ++ if Hashtbl.length guilty_cons = 0 then ( ++ debug "Found no culprit for conflict in %s: must be self or not in history." con; ++ Transaction.failed_commits_no_culprit := Int64.add !Transaction.failed_commits_no_culprit 1L ++ ); + false + ) + | e -> +diff --git a/tools/ocaml/xenstored/transaction.ml b/tools/ocaml/xenstored/transaction.ml +index 8f95301..da4a3e3 100644 +--- a/tools/ocaml/xenstored/transaction.ml ++++ b/tools/ocaml/xenstored/transaction.ml +@@ -90,6 +90,11 @@ type t = { + let get_id t = match t.ty with No -> none | Full (id, _, _) -> id + + let counter = ref 0L ++let failed_commits = ref 0L ++let failed_commits_no_culprit = ref 0L ++let reset_conflict_stats () = ++ failed_commits := 0L; ++ failed_commits_no_culprit := 0L + + (* Scope for optimisation: different data-structure and functions to search/filter it *) + let short_running_txns = ref [] +diff --git a/tools/ocaml/xenstored/xenstored.ml b/tools/ocaml/xenstored/xenstored.ml +index 06387a8..05ace4d 100644 +--- a/tools/ocaml/xenstored/xenstored.ml ++++ b/tools/ocaml/xenstored/xenstored.ml +@@ -376,6 +376,7 @@ let _ = + let last_scan_time = ref 0. in + + let periodic_ops now = ++ debug "periodic_ops starting"; + (* we garbage collect the string->int dictionary after a sizeable amount of operations, + * there's no need to be really fast even if we got loose + * objects since names are often reuse. +@@ -395,7 +396,11 @@ let _ = + + (* make sure we don't print general stats faster than 2 min *) + if now > (!last_stat_time +. 120.) then ( ++ info "Transaction conflict statistics for last %F seconds:" (now -. !last_stat_time); + last_stat_time := now; ++ Domains.iter domains (Domain.log_and_reset_conflict_stats (info "Dom%d caused %Ld conflicts")); ++ info "%Ld failed transactions; of these no culprit was found for %Ld" !Transaction.failed_commits !Transaction.failed_commits_no_culprit; ++ Transaction.reset_conflict_stats (); + + let gc = Gc.stat () in + let (lanon, lanon_ops, lanon_watchs, +@@ -415,6 +420,7 @@ let _ = + gc.Gc.free_words gc.Gc.free_blocks + ); + let elapsed = Unix.gettimeofday () -. now in ++ debug "periodic_ops took %F seconds." elapsed; + delay_next_frequent_ops_by elapsed + in + +-- +2.1.4 + diff --git a/system/xen/xsa/xsa206-4.8-0016-oxenstored-trim-history-in-the-frequent_ops-function.patch b/system/xen/xsa/xsa206-4.8-0016-oxenstored-trim-history-in-the-frequent_ops-function.patch new file mode 100644 index 0000000000..40102efceb --- /dev/null +++ b/system/xen/xsa/xsa206-4.8-0016-oxenstored-trim-history-in-the-frequent_ops-function.patch @@ -0,0 +1,79 @@ +From 26b15d4eb7ac71fcab28a7fca664afa0549c135c Mon Sep 17 00:00:00 2001 +From: Thomas Sanders <thomas.sanders@citrix.com> +Date: Tue, 28 Mar 2017 18:57:52 +0100 +Subject: [PATCH 16/15] oxenstored: trim history in the frequent_ops function + +We were trimming the history of commits only at the end of each +transaction (regardless of how it ended). + +Therefore if non-transactional writes were being made but no +transactions were being ended, the history would grow +indefinitely. Now we trim the history at regular intervals. + +Signed-off-by: Thomas Sanders <thomas.sanders@citrix.com> +--- + tools/ocaml/xenstored/history.ml | 6 +++--- + tools/ocaml/xenstored/transaction.ml | 8 ++++++-- + tools/ocaml/xenstored/xenstored.ml | 1 + + 3 files changed, 10 insertions(+), 5 deletions(-) + +diff --git a/tools/ocaml/xenstored/history.ml b/tools/ocaml/xenstored/history.ml +index 4079588..f39565b 100644 +--- a/tools/ocaml/xenstored/history.ml ++++ b/tools/ocaml/xenstored/history.ml +@@ -39,7 +39,8 @@ let mark_symbols () = + (* Keep only enough commit-history to protect the running transactions that we are still tracking *) + (* There is scope for optimisation here, replacing List.filter with something more efficient, + * probably on a different list-like structure. *) +-let trim () = ++let trim ?txn () = ++ Transaction.trim_short_running_transactions txn; + history := match Transaction.oldest_short_running_transaction () with + | None -> [] (* We have no open transaction, so no history is needed *) + | Some (_, txn) -> ( +@@ -49,8 +50,7 @@ let trim () = + + let end_transaction txn con tid commit = + let success = Connection.end_transaction con tid commit in +- Transaction.end_transaction txn; +- trim (); ++ trim ~txn (); + success + + let push (x: history_record) = +diff --git a/tools/ocaml/xenstored/transaction.ml b/tools/ocaml/xenstored/transaction.ml +index da4a3e3..23e7ccf 100644 +--- a/tools/ocaml/xenstored/transaction.ml ++++ b/tools/ocaml/xenstored/transaction.ml +@@ -106,10 +106,14 @@ let oldest_short_running_transaction () = + | x :: xs -> last xs + in last !short_running_txns + +-let end_transaction txn = ++let trim_short_running_transactions txn = + let cutoff = Unix.gettimeofday () -. !Define.conflict_max_history_seconds in ++ let keep = match txn with ++ | None -> (function (start_time, _) -> start_time >= cutoff) ++ | Some t -> (function (start_time, tx) -> start_time >= cutoff && tx != t) ++ in + short_running_txns := List.filter +- (function (start_time, tx) -> start_time >= cutoff && tx != txn) ++ keep + !short_running_txns + + let make ?(internal=false) id store = +diff --git a/tools/ocaml/xenstored/xenstored.ml b/tools/ocaml/xenstored/xenstored.ml +index 92ea99e..c45146d 100644 +--- a/tools/ocaml/xenstored/xenstored.ml ++++ b/tools/ocaml/xenstored/xenstored.ml +@@ -280,6 +280,7 @@ let _ = + * than the periodic_ops function *) + let frequent_ops () = + if Unix.gettimeofday () > !next_frequent_ops then ( ++ History.trim (); + Domains.incr_conflict_credit domains; + advance_next_frequent_ops () + ) in +-- +1.7.9.5 + diff --git a/system/xen/xsa/xsa211-qemut.patch b/system/xen/xsa/xsa211-qemut.patch new file mode 100644 index 0000000000..59e12fb31e --- /dev/null +++ b/system/xen/xsa/xsa211-qemut.patch @@ -0,0 +1,225 @@ +From 29e67cfd46b4d06ca1bb75558e227ec34a6af35f Mon Sep 17 00:00:00 2001 +From: Ian Jackson <ian.jackson@eu.citrix.com> +Date: Thu, 9 Mar 2017 11:14:55 +0000 +Subject: [PATCH] cirrus/vnc: zap drop bitblit support from console code. + +From: Gerd Hoffmann <kraxel@redhat.com> + +There is a special code path (dpy_gfx_copy) to allow graphic emulation +notify user interface code about bitblit operations carryed out by +guests. It is supported by cirrus and vnc server. The intended purpose +is to optimize display scrolls and just send over the scroll op instead +of a full display update. + +This is rarely used these days though because modern guests simply don't +use the cirrus blitter any more. Any linux guest using the cirrus drm +driver doesn't. Any windows guest newer than winxp doesn't ship with a +cirrus driver any more and thus uses the cirrus as simple framebuffer. + +So this code tends to bitrot and bugs can go unnoticed for a long time. +See for example commit "3e10c3e vnc: fix qemu crash because of SIGSEGV" +which fixes a bug lingering in the code for almost a year, added by +commit "c7628bf vnc: only alloc server surface with clients connected". + +Also the vnc server will throttle the frame rate in case it figures the +network can't keep up (send buffers are full). This doesn't work with +dpy_gfx_copy, for any copy operation sent to the vnc client we have to +send all outstanding updates beforehand, otherwise the vnc client might +run the client side blit on outdated data and thereby corrupt the +display. So this dpy_gfx_copy "optimization" might even make things +worse on slow network links. + +Lets kill it once for all. + +Signed-off-by: Gerd Hoffmann <kraxel@redhat.com> + +These changes (dropping dpy_copy and all its references and +implementations) reimplemented for qemu-xen-traditional. + +This is XSA-211. + +Signed-off-by: Ian Jackson <ian.jackson@eu.citrix.com> +--- + console.c | 8 -------- + console.h | 16 ---------------- + hw/cirrus_vga.c | 15 +++++---------- + hw/vmware_vga.c | 1 + + vnc.c | 35 ----------------------------------- + 5 files changed, 6 insertions(+), 69 deletions(-) + +diff --git a/console.c b/console.c +index d4f1ad0..e61b53b 100644 +--- a/console.c ++++ b/console.c +@@ -1399,14 +1399,6 @@ void qemu_console_resize(DisplayState *ds, int width, int height) + } + } + +-void qemu_console_copy(DisplayState *ds, int src_x, int src_y, +- int dst_x, int dst_y, int w, int h) +-{ +- if (is_graphic_console()) { +- dpy_copy(ds, src_x, src_y, dst_x, dst_y, w, h); +- } +-} +- + PixelFormat qemu_different_endianness_pixelformat(int bpp) + { + PixelFormat pf; +diff --git a/console.h b/console.h +index 14b42f3..8306cc4 100644 +--- a/console.h ++++ b/console.h +@@ -98,8 +98,6 @@ struct DisplayChangeListener { + void (*dpy_resize)(struct DisplayState *s); + void (*dpy_setdata)(struct DisplayState *s); + void (*dpy_refresh)(struct DisplayState *s); +- void (*dpy_copy)(struct DisplayState *s, int src_x, int src_y, +- int dst_x, int dst_y, int w, int h); + void (*dpy_fill)(struct DisplayState *s, int x, int y, + int w, int h, uint32_t c); + void (*dpy_text_cursor)(struct DisplayState *s, int x, int y); +@@ -211,18 +209,6 @@ static inline void dpy_refresh(DisplayState *s) + } + } + +-static inline void dpy_copy(struct DisplayState *s, int src_x, int src_y, +- int dst_x, int dst_y, int w, int h) { +- struct DisplayChangeListener *dcl = s->listeners; +- while (dcl != NULL) { +- if (dcl->dpy_copy) +- dcl->dpy_copy(s, src_x, src_y, dst_x, dst_y, w, h); +- else /* TODO */ +- dcl->dpy_update(s, dst_x, dst_y, w, h); +- dcl = dcl->next; +- } +-} +- + static inline void dpy_fill(struct DisplayState *s, int x, int y, + int w, int h, uint32_t c) { + struct DisplayChangeListener *dcl = s->listeners; +@@ -297,8 +283,6 @@ void text_consoles_set_display(DisplayState *ds); + void console_select(unsigned int index); + void console_color_init(DisplayState *ds); + void qemu_console_resize(DisplayState *ds, int width, int height); +-void qemu_console_copy(DisplayState *ds, int src_x, int src_y, +- int dst_x, int dst_y, int w, int h); + + /* sdl.c */ + void sdl_display_init(DisplayState *ds, int full_screen, int no_frame, int opengl_enabled); +diff --git a/hw/cirrus_vga.c b/hw/cirrus_vga.c +index 06b4a3b..4e85b90 100644 +--- a/hw/cirrus_vga.c ++++ b/hw/cirrus_vga.c +@@ -793,11 +793,6 @@ static void cirrus_do_copy(CirrusVGAState *s, int dst, int src, int w, int h) + } + } + +- /* we have to flush all pending changes so that the copy +- is generated at the appropriate moment in time */ +- if (notify) +- vga_hw_update(); +- + (*s->cirrus_rop) (s, s->vram_ptr + + (s->cirrus_blt_dstaddr & s->cirrus_addr_mask), + s->vram_ptr + +@@ -806,13 +801,13 @@ static void cirrus_do_copy(CirrusVGAState *s, int dst, int src, int w, int h) + s->cirrus_blt_width, s->cirrus_blt_height); + + if (notify) +- qemu_console_copy(s->ds, +- sx, sy, dx, dy, +- s->cirrus_blt_width / depth, +- s->cirrus_blt_height); ++ dpy_update(s->ds, ++ dx, dy, ++ s->cirrus_blt_width / depth, ++ s->cirrus_blt_height); + + /* we don't have to notify the display that this portion has +- changed since qemu_console_copy implies this */ ++ changed since dpy_update implies this */ + + cirrus_invalidate_region(s, s->cirrus_blt_dstaddr, + s->cirrus_blt_dstpitch, s->cirrus_blt_width, +diff --git a/hw/vmware_vga.c b/hw/vmware_vga.c +index d1cba28..c38e43c 100644 +--- a/hw/vmware_vga.c ++++ b/hw/vmware_vga.c +@@ -383,6 +383,7 @@ static inline void vmsvga_copy_rect(struct vmsvga_state_s *s, + + # ifdef DIRECT_VRAM + if (s->ds->dpy_copy) ++# error This configuration is not supported. See XSA-211. + qemu_console_copy(s->ds, x0, y0, x1, y1, w, h); + else + # endif +diff --git a/vnc.c b/vnc.c +index 61d1555..0e61197 100644 +--- a/vnc.c ++++ b/vnc.c +@@ -572,36 +572,6 @@ static void send_framebuffer_update(VncState *vs, int x, int y, int w, int h) + send_framebuffer_update_raw(vs, x, y, w, h); + } + +-static void vnc_copy(DisplayState *ds, int src_x, int src_y, int dst_x, int dst_y, int w, int h) +-{ +- VncState *vs = ds->opaque; +- int updating_client = 1; +- +- if (!vs->update_requested || +- src_x < vs->visible_x || src_y < vs->visible_y || +- dst_x < vs->visible_x || dst_y < vs->visible_y || +- (src_x + w) > (vs->visible_x + vs->visible_w) || +- (src_y + h) > (vs->visible_y + vs->visible_h) || +- (dst_x + w) > (vs->visible_x + vs->visible_w) || +- (dst_y + h) > (vs->visible_y + vs->visible_h)) +- updating_client = 0; +- +- if (updating_client) +- _vnc_update_client(vs); +- +- if (updating_client && vs->csock != -1 && !vs->has_update) { +- vnc_write_u8(vs, 0); /* msg id */ +- vnc_write_u8(vs, 0); +- vnc_write_u16(vs, 1); /* number of rects */ +- vnc_framebuffer_update(vs, dst_x, dst_y, w, h, 1); +- vnc_write_u16(vs, src_x); +- vnc_write_u16(vs, src_y); +- vnc_flush(vs); +- vs->update_requested--; +- } else +- framebuffer_set_updated(vs, dst_x, dst_y, w, h); +-} +- + static int find_update_height(VncState *vs, int y, int maxy, int last_x, int x) + { + int h; +@@ -1543,16 +1513,12 @@ static void set_encodings(VncState *vs, int32_t *encodings, size_t n_encodings) + vs->has_pointer_type_change = 0; + vs->has_WMVi = 0; + vs->absolute = -1; +- dcl->dpy_copy = NULL; + + for (i = n_encodings - 1; i >= 0; i--) { + switch (encodings[i]) { + case 0: /* Raw */ + vs->has_hextile = 0; + break; +- case 1: /* CopyRect */ +- dcl->dpy_copy = vnc_copy; +- break; + case 5: /* Hextile */ + vs->has_hextile = 1; + break; +@@ -2459,7 +2425,6 @@ static void vnc_listen_read(void *opaque) + vs->has_resize = 0; + vs->has_hextile = 0; + vs->update_requested = 0; +- dcl->dpy_copy = NULL; + vnc_timer_init(vs); + } + } +-- +2.1.4 + diff --git a/system/xen/xsa/xsa211-qemuu-4.8.patch b/system/xen/xsa/xsa211-qemuu-4.8.patch new file mode 100644 index 0000000000..aabdd11ae3 --- /dev/null +++ b/system/xen/xsa/xsa211-qemuu-4.8.patch @@ -0,0 +1,264 @@ +From 7563ddb0bd67e7e54568e017cb7d4f7f556587fc Mon Sep 17 00:00:00 2001 +From: Gerd Hoffmann <kraxel@redhat.com> +Date: Tue, 14 Feb 2017 19:09:59 +0100 +Subject: [PATCH] cirrus/vnc: zap bitblit support from console code. + +There is a special code path (dpy_gfx_copy) to allow graphic emulation +notify user interface code about bitblit operations carryed out by +guests. It is supported by cirrus and vnc server. The intended purpose +is to optimize display scrolls and just send over the scroll op instead +of a full display update. + +This is rarely used these days though because modern guests simply don't +use the cirrus blitter any more. Any linux guest using the cirrus drm +driver doesn't. Any windows guest newer than winxp doesn't ship with a +cirrus driver any more and thus uses the cirrus as simple framebuffer. + +So this code tends to bitrot and bugs can go unnoticed for a long time. +See for example commit "3e10c3e vnc: fix qemu crash because of SIGSEGV" +which fixes a bug lingering in the code for almost a year, added by +commit "c7628bf vnc: only alloc server surface with clients connected". + +Also the vnc server will throttle the frame rate in case it figures the +network can't keep up (send buffers are full). This doesn't work with +dpy_gfx_copy, for any copy operation sent to the vnc client we have to +send all outstanding updates beforehand, otherwise the vnc client might +run the client side blit on outdated data and thereby corrupt the +display. So this dpy_gfx_copy "optimization" might even make things +worse on slow network links. + +Lets kill it once for all. + +Signed-off-by: Gerd Hoffmann <kraxel@redhat.com> +--- + hw/display/cirrus_vga.c | 12 ++----- + include/ui/console.h | 7 ---- + ui/console.c | 28 --------------- + ui/vnc.c | 96 ------------------------------------------------- + 4 files changed, 3 insertions(+), 140 deletions(-) + +diff --git a/hw/display/cirrus_vga.c b/hw/display/cirrus_vga.c +index 9a4e90a..2966125 100644 +--- a/hw/display/cirrus_vga.c ++++ b/hw/display/cirrus_vga.c +@@ -760,11 +760,6 @@ static void cirrus_do_copy(CirrusVGAState *s, int dst, int src, int w, int h) + } + } + +- /* we have to flush all pending changes so that the copy +- is generated at the appropriate moment in time */ +- if (notify) +- graphic_hw_update(s->vga.con); +- + (*s->cirrus_rop) (s, s->vga.vram_ptr + + (s->cirrus_blt_dstaddr & s->cirrus_addr_mask), + s->vga.vram_ptr + +@@ -773,10 +768,9 @@ static void cirrus_do_copy(CirrusVGAState *s, int dst, int src, int w, int h) + s->cirrus_blt_width, s->cirrus_blt_height); + + if (notify) { +- qemu_console_copy(s->vga.con, +- sx, sy, dx, dy, +- s->cirrus_blt_width / depth, +- s->cirrus_blt_height); ++ dpy_gfx_update(s->vga.con, dx, dy, ++ s->cirrus_blt_width / depth, ++ s->cirrus_blt_height); + } + + /* we don't have to notify the display that this portion has +diff --git a/include/ui/console.h b/include/ui/console.h +index 2703a3a..67927ed 100644 +--- a/include/ui/console.h ++++ b/include/ui/console.h +@@ -189,9 +189,6 @@ typedef struct DisplayChangeListenerOps { + int x, int y, int w, int h); + void (*dpy_gfx_switch)(DisplayChangeListener *dcl, + struct DisplaySurface *new_surface); +- void (*dpy_gfx_copy)(DisplayChangeListener *dcl, +- int src_x, int src_y, +- int dst_x, int dst_y, int w, int h); + bool (*dpy_gfx_check_format)(DisplayChangeListener *dcl, + pixman_format_code_t format); + +@@ -273,8 +270,6 @@ int dpy_set_ui_info(QemuConsole *con, QemuUIInfo *info); + void dpy_gfx_update(QemuConsole *con, int x, int y, int w, int h); + void dpy_gfx_replace_surface(QemuConsole *con, + DisplaySurface *surface); +-void dpy_gfx_copy(QemuConsole *con, int src_x, int src_y, +- int dst_x, int dst_y, int w, int h); + void dpy_text_cursor(QemuConsole *con, int x, int y); + void dpy_text_update(QemuConsole *con, int x, int y, int w, int h); + void dpy_text_resize(QemuConsole *con, int w, int h); +@@ -398,8 +393,6 @@ void text_consoles_set_display(DisplayState *ds); + void console_select(unsigned int index); + void console_color_init(DisplayState *ds); + void qemu_console_resize(QemuConsole *con, int width, int height); +-void qemu_console_copy(QemuConsole *con, int src_x, int src_y, +- int dst_x, int dst_y, int w, int h); + DisplaySurface *qemu_console_surface(QemuConsole *con); + + /* console-gl.c */ +diff --git a/ui/console.c b/ui/console.c +index c24bfe4..ece0c04 100644 +--- a/ui/console.c ++++ b/ui/console.c +@@ -1558,27 +1558,6 @@ static void dpy_refresh(DisplayState *s) + } + } + +-void dpy_gfx_copy(QemuConsole *con, int src_x, int src_y, +- int dst_x, int dst_y, int w, int h) +-{ +- DisplayState *s = con->ds; +- DisplayChangeListener *dcl; +- +- if (!qemu_console_is_visible(con)) { +- return; +- } +- QLIST_FOREACH(dcl, &s->listeners, next) { +- if (con != (dcl->con ? dcl->con : active_console)) { +- continue; +- } +- if (dcl->ops->dpy_gfx_copy) { +- dcl->ops->dpy_gfx_copy(dcl, src_x, src_y, dst_x, dst_y, w, h); +- } else { /* TODO */ +- dcl->ops->dpy_gfx_update(dcl, dst_x, dst_y, w, h); +- } +- } +-} +- + void dpy_text_cursor(QemuConsole *con, int x, int y) + { + DisplayState *s = con->ds; +@@ -2104,13 +2083,6 @@ void qemu_console_resize(QemuConsole *s, int width, int height) + dpy_gfx_replace_surface(s, surface); + } + +-void qemu_console_copy(QemuConsole *con, int src_x, int src_y, +- int dst_x, int dst_y, int w, int h) +-{ +- assert(con->console_type == GRAPHIC_CONSOLE); +- dpy_gfx_copy(con, src_x, src_y, dst_x, dst_y, w, h); +-} +- + DisplaySurface *qemu_console_surface(QemuConsole *console) + { + return console->surface; +diff --git a/ui/vnc.c b/ui/vnc.c +index d1087c9..b45bb2c 100644 +--- a/ui/vnc.c ++++ b/ui/vnc.c +@@ -872,101 +872,6 @@ int vnc_send_framebuffer_update(VncState *vs, int x, int y, int w, int h) + return n; + } + +-static void vnc_copy(VncState *vs, int src_x, int src_y, int dst_x, int dst_y, int w, int h) +-{ +- /* send bitblit op to the vnc client */ +- vnc_lock_output(vs); +- vnc_write_u8(vs, VNC_MSG_SERVER_FRAMEBUFFER_UPDATE); +- vnc_write_u8(vs, 0); +- vnc_write_u16(vs, 1); /* number of rects */ +- vnc_framebuffer_update(vs, dst_x, dst_y, w, h, VNC_ENCODING_COPYRECT); +- vnc_write_u16(vs, src_x); +- vnc_write_u16(vs, src_y); +- vnc_unlock_output(vs); +- vnc_flush(vs); +-} +- +-static void vnc_dpy_copy(DisplayChangeListener *dcl, +- int src_x, int src_y, +- int dst_x, int dst_y, int w, int h) +-{ +- VncDisplay *vd = container_of(dcl, VncDisplay, dcl); +- VncState *vs, *vn; +- uint8_t *src_row; +- uint8_t *dst_row; +- int i, x, y, pitch, inc, w_lim, s; +- int cmp_bytes; +- +- if (!vd->server) { +- /* no client connected */ +- return; +- } +- +- vnc_refresh_server_surface(vd); +- QTAILQ_FOREACH_SAFE(vs, &vd->clients, next, vn) { +- if (vnc_has_feature(vs, VNC_FEATURE_COPYRECT)) { +- vs->force_update = 1; +- vnc_update_client(vs, 1, true); +- /* vs might be free()ed here */ +- } +- } +- +- /* do bitblit op on the local surface too */ +- pitch = vnc_server_fb_stride(vd); +- src_row = vnc_server_fb_ptr(vd, src_x, src_y); +- dst_row = vnc_server_fb_ptr(vd, dst_x, dst_y); +- y = dst_y; +- inc = 1; +- if (dst_y > src_y) { +- /* copy backwards */ +- src_row += pitch * (h-1); +- dst_row += pitch * (h-1); +- pitch = -pitch; +- y = dst_y + h - 1; +- inc = -1; +- } +- w_lim = w - (VNC_DIRTY_PIXELS_PER_BIT - (dst_x % VNC_DIRTY_PIXELS_PER_BIT)); +- if (w_lim < 0) { +- w_lim = w; +- } else { +- w_lim = w - (w_lim % VNC_DIRTY_PIXELS_PER_BIT); +- } +- for (i = 0; i < h; i++) { +- for (x = 0; x <= w_lim; +- x += s, src_row += cmp_bytes, dst_row += cmp_bytes) { +- if (x == w_lim) { +- if ((s = w - w_lim) == 0) +- break; +- } else if (!x) { +- s = (VNC_DIRTY_PIXELS_PER_BIT - +- (dst_x % VNC_DIRTY_PIXELS_PER_BIT)); +- s = MIN(s, w_lim); +- } else { +- s = VNC_DIRTY_PIXELS_PER_BIT; +- } +- cmp_bytes = s * VNC_SERVER_FB_BYTES; +- if (memcmp(src_row, dst_row, cmp_bytes) == 0) +- continue; +- memmove(dst_row, src_row, cmp_bytes); +- QTAILQ_FOREACH(vs, &vd->clients, next) { +- if (!vnc_has_feature(vs, VNC_FEATURE_COPYRECT)) { +- set_bit(((x + dst_x) / VNC_DIRTY_PIXELS_PER_BIT), +- vs->dirty[y]); +- } +- } +- } +- src_row += pitch - w * VNC_SERVER_FB_BYTES; +- dst_row += pitch - w * VNC_SERVER_FB_BYTES; +- y += inc; +- } +- +- QTAILQ_FOREACH(vs, &vd->clients, next) { +- if (vnc_has_feature(vs, VNC_FEATURE_COPYRECT)) { +- vnc_copy(vs, src_x, src_y, dst_x, dst_y, w, h); +- } +- } +-} +- + static void vnc_mouse_set(DisplayChangeListener *dcl, + int x, int y, int visible) + { +@@ -3119,7 +3024,6 @@ static gboolean vnc_listen_io(QIOChannel *ioc, + static const DisplayChangeListenerOps dcl_ops = { + .dpy_name = "vnc", + .dpy_refresh = vnc_refresh, +- .dpy_gfx_copy = vnc_dpy_copy, + .dpy_gfx_update = vnc_dpy_update, + .dpy_gfx_switch = vnc_dpy_switch, + .dpy_gfx_check_format = qemu_pixman_check_format, +-- +2.1.4 + diff --git a/system/xen/xsa/xsa212.patch b/system/xen/xsa/xsa212.patch new file mode 100644 index 0000000000..2c435c4136 --- /dev/null +++ b/system/xen/xsa/xsa212.patch @@ -0,0 +1,87 @@ +memory: properly check guest memory ranges in XENMEM_exchange handling + +The use of guest_handle_okay() here (as introduced by the XSA-29 fix) +is insufficient here, guest_handle_subrange_okay() needs to be used +instead. + +Note that the uses are okay in +- XENMEM_add_to_physmap_batch handling due to the size field being only + 16 bits wide, +- livepatch_list() due to the limit of 1024 enforced on the + number-of-entries input (leaving aside the fact that this can be + called by a privileged domain only anyway), +- compat mode handling due to counts there being limited to 32 bits, +- everywhere else due to guest arrays being accessed sequentially from + index zero. + +This is XSA-212. + +Reported-by: Jann Horn <jannh@google.com> +Signed-off-by: Jan Beulich <jbeulich@suse.com> +Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com> + +--- a/xen/common/memory.c ++++ b/xen/common/memory.c +@@ -436,8 +436,8 @@ static long memory_exchange(XEN_GUEST_HA + goto fail_early; + } + +- if ( !guest_handle_okay(exch.in.extent_start, exch.in.nr_extents) || +- !guest_handle_okay(exch.out.extent_start, exch.out.nr_extents) ) ++ if ( !guest_handle_subrange_okay(exch.in.extent_start, exch.nr_exchanged, ++ exch.in.nr_extents - 1) ) + { + rc = -EFAULT; + goto fail_early; +@@ -447,11 +447,27 @@ static long memory_exchange(XEN_GUEST_HA + { + in_chunk_order = exch.out.extent_order - exch.in.extent_order; + out_chunk_order = 0; ++ ++ if ( !guest_handle_subrange_okay(exch.out.extent_start, ++ exch.nr_exchanged >> in_chunk_order, ++ exch.out.nr_extents - 1) ) ++ { ++ rc = -EFAULT; ++ goto fail_early; ++ } + } + else + { + in_chunk_order = 0; + out_chunk_order = exch.in.extent_order - exch.out.extent_order; ++ ++ if ( !guest_handle_subrange_okay(exch.out.extent_start, ++ exch.nr_exchanged << out_chunk_order, ++ exch.out.nr_extents - 1) ) ++ { ++ rc = -EFAULT; ++ goto fail_early; ++ } + } + + d = rcu_lock_domain_by_any_id(exch.in.domid); +--- a/xen/include/asm-x86/x86_64/uaccess.h ++++ b/xen/include/asm-x86/x86_64/uaccess.h +@@ -29,8 +29,9 @@ extern void *xlat_malloc(unsigned long * + /* + * Valid if in +ve half of 48-bit address space, or above Xen-reserved area. + * This is also valid for range checks (addr, addr+size). As long as the +- * start address is outside the Xen-reserved area then we will access a +- * non-canonical address (and thus fault) before ever reaching VIRT_START. ++ * start address is outside the Xen-reserved area, sequential accesses ++ * (starting at addr) will hit a non-canonical address (and thus fault) ++ * before ever reaching VIRT_START. + */ + #define __addr_ok(addr) \ + (((unsigned long)(addr) < (1UL<<47)) || \ +@@ -40,7 +41,8 @@ extern void *xlat_malloc(unsigned long * + (__addr_ok(addr) || is_compat_arg_xlat_range(addr, size)) + + #define array_access_ok(addr, count, size) \ +- (access_ok(addr, (count)*(size))) ++ (likely(((count) ?: 0UL) < (~0UL / (size))) && \ ++ access_ok(addr, (count) * (size))) + + #define __compat_addr_ok(d, addr) \ + ((unsigned long)(addr) < HYPERVISOR_COMPAT_VIRT_START(d)) |