Packages/packages/net/openvswitch/patches/0008-backport-d94cd0d-ovsdb-idl-Support-write-only-change.patch
2025-02-05 00:21:49 +08:00

319 lines
12 KiB
Diff
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

From d94cd0d3eec33e4290d7ca81918f5ac61444886e Mon Sep 17 00:00:00 2001
From: Dumitru Ceara <dceara@redhat.com>
Date: Tue, 26 Apr 2022 12:37:08 +0200
Subject: [PATCH] ovsdb-idl: Support write-only-changed IDL monitor mode.
At a first glance, change tracking should never be allowed for
write-only columns. However, some clients (e.g., ovn-northd) that are
mostly exclusive writers of a database, use change tracking to avoid
duplicating the IDL row records into a local cache when implementing
incremental processing.
The default behavior of the IDL is to automatically turn a write-only
column into a read-write column whenever the client enables change
tracking for that column.
For the afore mentioned clients, this becomes a performance issue.
Commit 1cc618c32524 ("ovsdb-idl: Fix atomicity of writes that don't
change a column's value.") explains why writes that don't change a
column's value cannot be optimized out early if the column is
read/write.
Furthermore, if there is at least one record in any table that
changed during a transaction, then *all* records that have been
written are added to the transaction, even if their values didn't
change. If there are many such rows (e.g., like in ovn-northd's
case) this incurs a significant overhead because:
a. the client has to build this large transaction
b. the transaction has to be sent over the network
c. the server needs to parse this (mostly) no-op update
We now introduce new IDL APIs allowing users to set a new monitoring
mode flag, OVSDB_IDL_WRITE_CHANGED_ONLY, to indicate to the IDL that the
atomicity constraints may be relaxed and written columns that don't
change value can be skipped from the current transaction.
We benchmarked ovn-northd performance when using this new mode
against NB and SB databases taken from ovn-kubernetes scale tests.
We noticed that when a minor change is performed to the Northbound
database (e.g., NB_Global.nb_cfg is incremented) the time it takes to
build the Southbound transaction becomes negligible (vs ~1.5 seconds
before this change).
End-to-end ovn-kubernetes scale tests on 120-node clusters also show
significant reduction of latency to bring up pods; both average and P99
latency decreased by ~30%.
Acked-by: Han Zhou <hzhou@ovn.org>
Signed-off-by: Dumitru Ceara <dceara@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
---
lib/ovsdb-idl.c | 60 +++++++++++++++++++++++++++++++++++++++++-----
lib/ovsdb-idl.h | 16 +++++++++++--
tests/ovsdb-idl.at | 31 +++++++++++++++++++++++-
tests/test-ovsdb.c | 18 ++++++++++----
5 files changed, 115 insertions(+), 14 deletions(-)
--- a/lib/ovsdb-idl.c
+++ b/lib/ovsdb-idl.c
@@ -1398,6 +1398,45 @@ ovsdb_idl_track_clear(struct ovsdb_idl *
{
ovsdb_idl_track_clear__(idl, false);
}
+
+/* Sets or clears (depending on 'enable') OVSDB_IDL_WRITE_CHANGED_ONLY
+ * for 'column' in 'idl', ensuring that the column will be included in a
+ * transaction only if its value has actually changed locally. Normally
+ * read/write columns that are written to are always included in the
+ * transaction but, in specific cases, when the application doesn't
+ * require atomicity of writes across different columns, the ones that
+ * don't change value may be skipped.
+ *
+ * This function should be called between ovsdb_idl_create() and
+ * the first call to ovsdb_idl_run().
+ */
+void
+ovsdb_idl_set_write_changed_only(struct ovsdb_idl *idl,
+ const struct ovsdb_idl_column *column,
+ bool enable)
+{
+ if (enable) {
+ *ovsdb_idl_get_mode(idl, column) |= OVSDB_IDL_WRITE_CHANGED_ONLY;
+ } else {
+ *ovsdb_idl_get_mode(idl, column) &= ~OVSDB_IDL_WRITE_CHANGED_ONLY;
+ }
+}
+
+/* Helper function to wrap calling ovsdb_idl_set_write_changed_only() for
+ * all columns that are part of 'idl'.
+ */
+void
+ovsdb_idl_set_write_changed_only_all(struct ovsdb_idl *idl, bool enable)
+{
+ for (size_t i = 0; i < idl->class_->n_tables; i++) {
+ const struct ovsdb_idl_table_class *tc = &idl->class_->tables[i];
+
+ for (size_t j = 0; j < tc->n_columns; j++) {
+ const struct ovsdb_idl_column *column = &tc->columns[j];
+ ovsdb_idl_set_write_changed_only(idl, column, enable);
+ }
+ }
+}
static void
log_parse_update_error(struct ovsdb_error *error)
@@ -3502,6 +3541,8 @@ ovsdb_idl_txn_write__(const struct ovsdb
{
struct ovsdb_idl_row *row = CONST_CAST(struct ovsdb_idl_row *, row_);
const struct ovsdb_idl_table_class *class;
+ unsigned char column_mode;
+ bool optimize_rewritten;
size_t column_idx;
bool write_only;
@@ -3512,12 +3553,15 @@ ovsdb_idl_txn_write__(const struct ovsdb
class = row->table->class_;
column_idx = column - class->columns;
- write_only = row->table->modes[column_idx] == OVSDB_IDL_MONITOR;
+ column_mode = row->table->modes[column_idx];
+ write_only = column_mode == OVSDB_IDL_MONITOR;
+ optimize_rewritten =
+ write_only || (column_mode & OVSDB_IDL_WRITE_CHANGED_ONLY);
+
ovs_assert(row->new_datum != NULL);
ovs_assert(column_idx < class->n_columns);
- ovs_assert(row->old_datum == NULL ||
- row->table->modes[column_idx] & OVSDB_IDL_MONITOR);
+ ovs_assert(row->old_datum == NULL || column_mode & OVSDB_IDL_MONITOR);
if (row->table->idl->verify_write_only && !write_only) {
VLOG_ERR("Bug: Attempt to write to a read/write column (%s:%s) when"
@@ -3535,9 +3579,13 @@ ovsdb_idl_txn_write__(const struct ovsdb
* different value in that column since we read it. (But if a whole
* transaction only does writes of existing values, without making any real
* changes, we will drop the whole transaction later in
- * ovsdb_idl_txn_commit().) */
- if (write_only && ovsdb_datum_equals(ovsdb_idl_read(row, column),
- datum, &column->type)) {
+ * ovsdb_idl_txn_commit().)
+ *
+ * The application may choose to bypass this restriction and always
+ * optimize by setting OVSDB_IDL_WRITE_CHANGED_ONLY.
+ */
+ if (optimize_rewritten && ovsdb_datum_equals(ovsdb_idl_read(row, column),
+ datum, &column->type)) {
goto discard_datum;
}
--- a/lib/ovsdb-idl.h
+++ b/lib/ovsdb-idl.h
@@ -158,7 +158,7 @@ bool ovsdb_idl_server_has_column(const s
* IDL will change the value returned by ovsdb_idl_get_seqno() when the
* column's value changes in any row.
*
- * The possible mode combinations are:
+ * Typical mode combinations are:
*
* - 0, for a column that a client doesn't care about. This is the default
* for every column in every table, if the client passes false for
@@ -181,10 +181,17 @@ bool ovsdb_idl_server_has_column(const s
* - (OVSDB_IDL_MONITOR | OVSDB_IDL_ALERT | OVSDB_IDL_TRACK), for a column
* that a client wants to track using the change tracking
* ovsdb_idl_track_get_*() functions.
+ *
+ * - (OVSDB_IDL_MONITOR | OVSDB_IDL_ALERT | OVSDB_IDL_WRITE_CHANGED_ONLY)
+ * is similar to (OVSDB_IDL_MONITOR | OVSDB_IDL_ALERT) except that it
+ * only adds a written column to a transaction if the column's value
+ * has actually changed.
*/
#define OVSDB_IDL_MONITOR (1 << 0) /* Replicate this column? */
#define OVSDB_IDL_ALERT (1 << 1) /* Alert client when column changes? */
-#define OVSDB_IDL_TRACK (1 << 2)
+#define OVSDB_IDL_TRACK (1 << 2) /* Track column changes? */
+#define OVSDB_IDL_WRITE_CHANGED_ONLY \
+ (1 << 3) /* Write back only changed columns? */
void ovsdb_idl_add_column(struct ovsdb_idl *, const struct ovsdb_idl_column *);
void ovsdb_idl_add_table(struct ovsdb_idl *,
@@ -233,6 +240,11 @@ bool ovsdb_idl_track_is_updated(const st
const struct ovsdb_idl_column *column);
void ovsdb_idl_track_clear(struct ovsdb_idl *);
+void ovsdb_idl_set_write_changed_only(struct ovsdb_idl *idl,
+ const struct ovsdb_idl_column *column,
+ bool enable);
+void ovsdb_idl_set_write_changed_only_all(struct ovsdb_idl *idl, bool enable);
+
/* Reading the database replica. */
--- a/tests/ovsdb-idl.at
+++ b/tests/ovsdb-idl.at
@@ -94,6 +94,20 @@ m4_define([OVSDB_CHECK_IDL_C],
OVSDB_SERVER_SHUTDOWN
AT_CLEANUP])
+# same as OVSDB_CHECK_IDL but uses OVSDB_IDL_WRITE_CHANGED_ONLY.
+m4_define([OVSDB_CHECK_IDL_WRITE_CHANGED_ONLY_C],
+ [AT_SETUP([$1 - write-changed-only - C])
+ AT_KEYWORDS([ovsdb server idl positive $5])
+ AT_CHECK([ovsdb_start_idltest])
+ m4_if([$2], [], [],
+ [AT_CHECK([ovsdb-client transact unix:socket $2], [0], [ignore], [ignore])])
+ AT_CHECK([test-ovsdb '-vPATTERN:console:test-ovsdb|%c|%m' -vjsonrpc -t10 idl unix:socket $3],
+ [0], [stdout], [ignore])
+ AT_CHECK([sort stdout | uuidfilt]m4_if([$6],,, [[| $6]]),
+ [0], [$4])
+ OVSDB_SERVER_SHUTDOWN
+ AT_CLEANUP])
+
# same as OVSDB_CHECK_IDL but uses tcp.
m4_define([OVSDB_CHECK_IDL_TCP_C],
[AT_SETUP([$1 - C - tcp])
@@ -264,6 +278,7 @@ m4_define([OVSDB_CHECK_IDL_SSL_PY],
m4_define([OVSDB_CHECK_IDL],
[OVSDB_CHECK_IDL_C($@)
+ OVSDB_CHECK_IDL_WRITE_CHANGED_ONLY_C($@)
OVSDB_CHECK_IDL_TCP_C($@)
OVSDB_CHECK_IDL_TCP6_C($@)
OVSDB_CHECK_IDL_PY($@)
@@ -1202,8 +1217,22 @@ m4_define([OVSDB_CHECK_IDL_TRACK_C],
OVSDB_SERVER_SHUTDOWN
AT_CLEANUP])
+m4_define([OVSDB_CHECK_IDL_TRACK_WRITE_CHANGED_ONLY_C],
+ [AT_SETUP([$1 - write-changed-only - C])
+ AT_KEYWORDS([ovsdb server idl tracking positive $5])
+ AT_CHECK([ovsdb_start_idltest])
+ m4_if([$2], [], [],
+ [AT_CHECK([ovsdb-client transact unix:socket $2], [0], [ignore], [ignore])])
+ AT_CHECK([test-ovsdb '-vPATTERN:console:test-ovsdb|%c|%m' -vjsonrpc -t10 -c -w idl unix:socket $3],
+ [0], [stdout], [ignore])
+ AT_CHECK([sort stdout | uuidfilt]m4_if([$6],,, [[| $6]]),
+ [0], [$4])
+ OVSDB_SERVER_SHUTDOWN
+ AT_CLEANUP])
+
m4_define([OVSDB_CHECK_IDL_TRACK],
- [OVSDB_CHECK_IDL_TRACK_C($@)])
+ [OVSDB_CHECK_IDL_TRACK_C($@)
+ OVSDB_CHECK_IDL_TRACK_WRITE_CHANGED_ONLY_C($@)])
OVSDB_CHECK_IDL_TRACK([track, simple idl, initially populated],
[['["idltest",
--- a/tests/test-ovsdb.c
+++ b/tests/test-ovsdb.c
@@ -56,6 +56,7 @@
VLOG_DEFINE_THIS_MODULE(test_ovsdb);
struct test_ovsdb_pvt_context {
+ bool write_changed_only;
bool track;
};
@@ -91,6 +92,7 @@ parse_options(int argc, char *argv[], st
{"timeout", required_argument, NULL, 't'},
{"verbose", optional_argument, NULL, 'v'},
{"change-track", optional_argument, NULL, 'c'},
+ {"write-changed-only", optional_argument, NULL, 'w'},
{"magic", required_argument, NULL, OPT_MAGIC},
{"no-rename-open-files", no_argument, NULL, OPT_NO_RENAME_OPEN_FILES},
{"help", no_argument, NULL, 'h'},
@@ -125,6 +127,10 @@ parse_options(int argc, char *argv[], st
pvt->track = true;
break;
+ case 'w':
+ pvt->write_changed_only = true;
+ break;
+
case OPT_MAGIC:
magic = optarg;
break;
@@ -2681,6 +2687,7 @@ update_conditions(struct ovsdb_idl *idl,
static void
do_idl(struct ovs_cmdl_context *ctx)
{
+ struct test_ovsdb_pvt_context *pvt = ctx->pvt;
struct jsonrpc *rpc;
struct ovsdb_idl *idl;
unsigned int next_cond_seqno = 0;
@@ -2690,9 +2697,6 @@ do_idl(struct ovs_cmdl_context *ctx)
int step = 0;
int error;
int i;
- bool track;
-
- track = ((struct test_ovsdb_pvt_context *)(ctx->pvt))->track;
idl = ovsdb_idl_create(ctx->argv[1], &idltest_idl_class, true, true);
ovsdb_idl_set_leader_only(idl, false);
@@ -2709,10 +2713,14 @@ do_idl(struct ovs_cmdl_context *ctx)
rpc = NULL;
}
- if (track) {
+ if (pvt->track) {
ovsdb_idl_track_add_all(idl);
}
+ if (pvt->write_changed_only) {
+ ovsdb_idl_set_write_changed_only_all(idl, true);
+ }
+
setvbuf(stdout, NULL, _IONBF, 0);
symtab = ovsdb_symbol_table_create();
@@ -2770,7 +2778,7 @@ do_idl(struct ovs_cmdl_context *ctx)
}
/* Print update. */
- if (track) {
+ if (pvt->track) {
print_idl_track(idl, step++, terse);
ovsdb_idl_track_clear(idl);
} else {