krb5 commit: Improve ulog block resize efficiency

ghudson at mit.edu ghudson at mit.edu
Thu Feb 6 18:43:34 EST 2025


https://github.com/krb5/krb5/commit/197ddd4196a3049cd17d29d1c8f0af1424472956
commit 197ddd4196a3049cd17d29d1c8f0af1424472956
Author: Zoltan Borbely <Zoltan.Borbely at morganstanley.com>
Date:   Fri Jan 31 18:00:17 2025 +0100

    Improve ulog block resize efficiency
    
    When it is necessary to increase the ulog block size, copy the
    existing entries instead of reinitializing the log.
    
    [ghudson at mit.edu: added test case; renamed and split INDEX() to avoid
    duplication; added sync_ulog() helper; modified copying loop for
    clarity; edited commit message]
    
    ticket: 9161 (new)

 src/include/kdb_log.h | 20 ++++++++++-----
 src/kprop/kproplog.c  |  2 +-
 src/lib/kdb/kdb_log.c | 68 +++++++++++++++++++++++++++++++++++----------------
 src/tests/t_iprop.py  | 30 +++++++++++++++++++++--
 4 files changed, 90 insertions(+), 30 deletions(-)

diff --git a/src/include/kdb_log.h b/src/include/kdb_log.h
index 423957565..2856859f8 100644
--- a/src/include/kdb_log.h
+++ b/src/include/kdb_log.h
@@ -18,12 +18,6 @@
 extern "C" {
 #endif
 
-/*
- * DB macros
- */
-#define INDEX(ulog, i) (kdb_ent_header_t *)(void *)                     \
-    ((char *)(ulog) + sizeof(kdb_hlog_t) + (i) * ulog->kdb_block)
-
 /*
  * Current DB version #
  */
@@ -106,6 +100,20 @@ typedef struct _kdb_log_context {
     int             ulogfd;
 } kdb_log_context;
 
+/* Return the address of the i'th record in ulog for the given block size. */
+static inline uint8_t *
+ulog_record_ptr(kdb_hlog_t *ulog, size_t i, size_t bsize)
+{
+    return (uint8_t *)ulog + sizeof(*ulog) + i * bsize;
+}
+
+/* Return the i'th update entry header for ulog. */
+static inline kdb_ent_header_t *
+ulog_index(kdb_hlog_t *ulog, size_t i)
+{
+    return (void *)ulog_record_ptr(ulog, i, ulog->kdb_block);
+}
+
 #ifdef  __cplusplus
 }
 #endif
diff --git a/src/kprop/kproplog.c b/src/kprop/kproplog.c
index 1f10aa6dc..45ded429e 100644
--- a/src/kprop/kproplog.c
+++ b/src/kprop/kproplog.c
@@ -330,7 +330,7 @@ print_update(kdb_hlog_t *ulog, uint32_t entry, uint32_t ulogentries,
     for (i = start_sno; i < ulog->kdb_last_sno; i++) {
         indx = i % ulogentries;
 
-        indx_log = INDEX(ulog, indx);
+        indx_log = ulog_index(ulog, indx);
 
         /*
          * Check for corrupt update entry
diff --git a/src/lib/kdb/kdb_log.c b/src/lib/kdb/kdb_log.c
index 68fae919a..b840eec9a 100644
--- a/src/lib/kdb/kdb_log.c
+++ b/src/lib/kdb/kdb_log.c
@@ -103,13 +103,31 @@ sync_header(kdb_hlog_t *ulog)
     }
 }
 
+/* Sync memory to disk for the entire ulog. */
+static void
+sync_ulog(kdb_hlog_t *ulog, uint32_t ulogentries)
+{
+    size_t len;
+
+    if (!pagesize)
+        pagesize = getpagesize();
+
+    len = (sizeof(kdb_hlog_t) + ulogentries * ulog->kdb_block +
+           (pagesize - 1)) & ~(pagesize - 1);
+    if (msync(ulog, len, MS_SYNC)) {
+        /* Couldn't sync to disk, let's panic. */
+        syslog(LOG_ERR, _("could not sync the whole ulog to disk"));
+        abort();
+    }
+}
+
 /* Return true if the ulog entry for sno matches sno and timestamp. */
 static krb5_boolean
 check_sno(kdb_log_context *log_ctx, kdb_sno_t sno,
           const kdbe_time_t *timestamp)
 {
     unsigned int indx = (sno - 1) % log_ctx->ulogentries;
-    kdb_ent_header_t *ent = INDEX(log_ctx->ulog, indx);
+    kdb_ent_header_t *ent = ulog_index(log_ctx->ulog, indx);
 
     return ent->kdb_entry_sno == sno && time_equal(&ent->kdb_time, timestamp);
 }
@@ -175,17 +193,16 @@ extend_file_to(int fd, unsigned int new_size)
     return 0;
 }
 
-/*
- * Resize the array elements.  We reinitialize the update log rather than
- * unrolling the the log and copying it over to a temporary log for obvious
- * performance reasons.  Replicas will subsequently do a full resync, but the
- * need for resizing should be very small.
- */
+/* Resize the array elements of ulog to be at least as large as recsize.  Move
+ * the existing elements into the proper offsets for the new block size. */
 static krb5_error_code
 resize(kdb_hlog_t *ulog, uint32_t ulogentries, int ulogfd,
        unsigned int recsize, const kdb_incr_update_t *upd)
 {
-    unsigned int new_block, new_size;
+    size_t old_block = ulog->kdb_block, new_block, new_size;
+    krb5_error_code retval;
+    uint8_t *old_ent, *new_ent;
+    uint32_t i;
 
     if (ulog == NULL)
         return KRB5_LOG_ERROR;
@@ -204,16 +221,25 @@ resize(kdb_hlog_t *ulog, uint32_t ulogentries, int ulogfd,
     if (new_size > MAXLOGLEN)
         return KRB5_LOG_ERROR;
 
-    /* Reinit log with new block size. */
-    memset(ulog, 0, sizeof(*ulog));
-    ulog->kdb_hmagic = KDB_ULOG_HDR_MAGIC;
-    ulog->db_version_num = KDB_VERSION;
-    ulog->kdb_state = KDB_STABLE;
-    ulog->kdb_block = new_block;
-    sync_header(ulog);
-
     /* Expand log considering new block size. */
-    return extend_file_to(ulogfd, new_size);
+    retval = extend_file_to(ulogfd, new_size);
+    if (retval)
+        return retval;
+
+    /* Copy each record into its new location and zero out the unused areas.
+     * The area is overlapping, so we have to iterate backwards. */
+    for (i = ulogentries; i > 0; i--) {
+        old_ent = ulog_record_ptr(ulog, i - 1, old_block);
+        new_ent = ulog_record_ptr(ulog, i - 1, new_block);
+        memmove(new_ent, old_ent, old_block);
+        memset(new_ent + old_block, 0, new_block - old_block);
+    }
+
+    syslog(LOG_INFO, _("ulog block size has been resized from %lu to %lu"),
+           (unsigned long)old_block, (unsigned long)new_block);
+    ulog->kdb_block = new_block;
+    sync_ulog(ulog, ulogentries);
+    return 0;
 }
 
 /* Set the ulog to contain only a dummy entry with the given serial number and
@@ -222,7 +248,7 @@ static void
 set_dummy(kdb_log_context *log_ctx, kdb_sno_t sno, const kdbe_time_t *kdb_time)
 {
     kdb_hlog_t *ulog = log_ctx->ulog;
-    kdb_ent_header_t *ent = INDEX(ulog, (sno - 1) % log_ctx->ulogentries);
+    kdb_ent_header_t *ent = ulog_index(ulog, (sno - 1) % log_ctx->ulogentries);
 
     memset(ent, 0, sizeof(*ent));
     ent->kdb_umagic = KDB_ULOG_MAGIC;
@@ -305,7 +331,7 @@ store_update(kdb_log_context *log_ctx, kdb_incr_update_t *upd)
     ulog->kdb_state = KDB_UNSTABLE;
 
     i = (upd->kdb_entry_sno - 1) % ulogentries;
-    indx_log = INDEX(ulog, i);
+    indx_log = ulog_index(ulog, i);
 
     memset(indx_log, 0, ulog->kdb_block);
     indx_log->kdb_umagic = KDB_ULOG_MAGIC;
@@ -335,7 +361,7 @@ store_update(kdb_log_context *log_ctx, kdb_incr_update_t *upd)
     } else {
         /* We are circling; set kdb_first_sno and time to the next update. */
         i = upd->kdb_entry_sno % ulogentries;
-        indx_log = INDEX(ulog, i);
+        indx_log = ulog_index(ulog, i);
         ulog->kdb_first_sno = indx_log->kdb_entry_sno;
         ulog->kdb_first_time = indx_log->kdb_time;
     }
@@ -593,7 +619,7 @@ ulog_get_entries(krb5_context context, const kdb_last_t *last,
 
     for (; sno < ulog->kdb_last_sno; sno++) {
         indx = sno % ulogentries;
-        indx_log = INDEX(ulog, indx);
+        indx_log = ulog_index(ulog, indx);
 
         memset(upd, 0, sizeof(kdb_incr_update_t));
         xdrmem_create(&xdrs, (char *)indx_log->entry_data,
diff --git a/src/tests/t_iprop.py b/src/tests/t_iprop.py
index b356971db..1f1634f31 100755
--- a/src/tests/t_iprop.py
+++ b/src/tests/t_iprop.py
@@ -86,8 +86,10 @@ def wait_for_prop(kpropd, full_expected, expected_old, expected_new):
 # Verify the output of kproplog against the expected number of
 # entries, first and last serial number, and a list of principal names
 # for the update entrires.
-def check_ulog(num, first, last, entries, env=None):
+def check_ulog(num, first, last, entries, env=None, bsize=2048):
     out = realm.run([kproplog], env=env)
+    if 'Entry block size : ' + str(bsize) + '\n' not in out:
+        fail('Expected block size %d' % bsize)
     if 'Number of entries : ' + str(num) + '\n' not in out:
         fail('Expected %d entries' % num)
     if last:
@@ -458,8 +460,32 @@ for realm in multidb_realms(kdc_conf=conf, create_user=False,
     wait_for_prop(kpropd2, True, 6, 1)
     check_ulog(1, 1, 1, [None], replica2)
 
-    # Stop the kprop daemons so we can test kpropd -t.
+    # Create an update large enough to cause a block resize, and make
+    # sure that it propagates incrementally.
+    mark('block resize')
+    cmd = [kadminl, 'cpw',
+           '-e', 'aes128-sha1,aes256-sha1,aes128-sha2,aes256-sha2',
+           '-randkey', '-keepold', pr2]
+    n = 6
+    for i in range(n):
+        realm.run(cmd)
+    check_ulog(n + 1, 1, n + 1, [None] + n * [pr2], bsize=4096)
+    kpropd1.send_signal(signal.SIGUSR1)
+    wait_for_prop(kpropd1, False, 1, n + 1)
+    check_ulog(n + 1, 1, n + 1, [None] + n * [pr2], replica1, bsize=4096)
+    kpropd2.send_signal(signal.SIGUSR1)
+    wait_for_prop(kpropd2, False, 1, n + 1)
+    check_ulog(n + 1, 1, n + 1, [None] + n * [pr2], replica2, bsize=4096)
+
+    # Reset the ulog again.
+    realm.run([kproplog, '-R'])
+    kpropd1.send_signal(signal.SIGUSR1)
+    wait_for_prop(kpropd1, True, 7, 1)
+    kpropd2.send_signal(signal.SIGUSR1)
+    wait_for_prop(kpropd2, True, 7, 1)
     realm.stop_kpropd(kpropd1)
+
+    # Stop the kprop daemons so we can test kpropd -t.
     stop_daemon(kpropd2)
     stop_daemon(kadmind_proponly)
     mark('kpropd -t')


More information about the cvs-krb5 mailing list