[Concept,v2,12/29] video: truetype: Add a scratch buffer to use malloc() less

Message ID 20260103200510.3605009-13-sjg@u-boot.org
State New
Headers
Series Malloc debugging and test/py improvements |

Commit Message

Simon Glass Jan. 3, 2026, 8:04 p.m. UTC
  From: Simon Glass <simon.glass@canonical.com>

The stb_truetype library performs around 5 allocations per character
rendered, totalling approximately 26KB of temporary memory. This creates
significant malloc/free overhead and heap fragmentation.

Add a scratch buffer mechanism that pre-allocates memory once during
probe and reuses it for each character. The buffer is reset at the start
of each putc_xy() call, and allocations come from this buffer using a
simple bump allocator with 8-byte alignment.

If the scratch buffer is exhausted (e.g. for very complex glyphs), the
allocator falls back to malloc transparently.

The scratch buffer is controlled by two new Kconfig options:
- CONSOLE_TRUETYPE_SCRATCH: Enable/disable the feature (default y)
- CONSOLE_TRUETYPE_SCRATCH_SIZE: Buffer size in bytes (default 32KB)

Co-developed-by: Claude <noreply@anthropic.com>
Signed-off-by: Simon Glass <simon.glass@canonical.com>
---

(no changes since v1)

 doc/usage/cmd/font.rst           |  9 +++++
 drivers/video/Kconfig            | 23 ++++++++++++
 drivers/video/console_truetype.c | 62 ++++++++++++++++++++++++++++++--
 drivers/video/stb_truetype.h     | 46 ++++++++++++++++++++++--
 4 files changed, 136 insertions(+), 4 deletions(-)
  

Patch

diff --git a/doc/usage/cmd/font.rst b/doc/usage/cmd/font.rst
index f7a4897667b..a4b9495b977 100644
--- a/doc/usage/cmd/font.rst
+++ b/doc/usage/cmd/font.rst
@@ -85,6 +85,15 @@  CONFIG_CONSOLE_TRUETYPE_GLYPH_BUF enables a pre-allocated buffer for glyph
 rendering, avoiding malloc/free per character. The buffer starts at 4KB and
 grows as needed via realloc().
 
+CONFIG_CONSOLE_TRUETYPE_SCRATCH enables a scratch buffer for internal stbtt
+allocations. Without this, the TrueType library performs around 5 allocations
+per character (totalling ~26KB), creating malloc/free overhead and heap
+fragmentation. With the scratch buffer, memory is allocated once at probe time
+and reused for each character. CONFIG_CONSOLE_TRUETYPE_SCRATCH_SIZE sets the
+buffer size (default 32KB), which is sufficient for most Latin characters.
+Complex glyphs (CJK, emoji) or very large font sizes may need 64KB or more.
+Allocations exceeding the buffer size fall back to malloc transparently.
+
 CONFIG_VIDEO_GLYPH_STATS enables tracking of glyph-rendering statistics.
 
 Return value
diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig
index 0f99ba1845b..4a8090e622d 100644
--- a/drivers/video/Kconfig
+++ b/drivers/video/Kconfig
@@ -259,6 +259,29 @@  config CONSOLE_TRUETYPE_GLYPH_BUF
 	  The buffer starts at 4KB and grows via realloc() as needed to
 	  accommodate larger glyphs.
 
+config CONSOLE_TRUETYPE_SCRATCH
+	bool "TrueType scratch buffer to reduce malloc traffic"
+	depends on CONSOLE_TRUETYPE
+	default y
+	help
+	  Enable a pre-allocated scratch buffer for internal TrueType
+	  rendering allocations. This eliminates malloc/free calls during
+	  character rendering, improving performance and reducing heap
+	  fragmentation.
+
+	  With this disabled, stbtt allocates and frees around 26KB of
+	  temporary memory for each character rendered.
+
+config CONSOLE_TRUETYPE_SCRATCH_SIZE
+	int "TrueType scratch buffer size"
+	depends on CONSOLE_TRUETYPE_SCRATCH
+	default 32768
+	help
+	  Size of the scratch buffer in bytes for TrueType rendering.
+	  32KB is sufficient for most Latin characters. Complex glyphs
+	  (CJK, emoji) may need 64KB or more. Allocations exceeding this
+	  size fall back to malloc.
+
 config VIDEO_GLYPH_STATS
 	bool "Track glyph rendering statistics"
 	depends on CONSOLE_TRUETYPE
diff --git a/drivers/video/console_truetype.c b/drivers/video/console_truetype.c
index c33686aff9b..cbc4473207c 100644
--- a/drivers/video/console_truetype.c
+++ b/drivers/video/console_truetype.c
@@ -105,8 +105,47 @@  static double tt_acos(double val)
 #define STBTT_fmod		tt_fmod
 #define STBTT_cos		tt_cos
 #define STBTT_acos		tt_acos
-#define STBTT_malloc(size, u)	((void)(u), malloc(size))
-#define STBTT_free(size, u)	((void)(u), free(size))
+
+/* Scratch buffer for zero-malloc rendering - must match stb_truetype.h */
+#define STBTT_SCRATCH_DEFINED
+struct stbtt_scratch {
+	char *buf;
+	size_t size;
+	size_t used;
+};
+
+static inline void stbtt_scratch_reset(struct stbtt_scratch *s)
+{
+	if (s)
+		s->used = 0;
+}
+
+static inline void *stbtt__scratch_alloc(size_t size, void *userdata)
+{
+	struct stbtt_scratch *s = userdata;
+	size_t aligned = (size + 7) & ~7;
+
+	if (s && s->used + aligned <= s->size) {
+		void *p = s->buf + s->used;
+
+		s->used += aligned;
+
+		return p;
+	}
+
+	return malloc(size);
+}
+
+static inline void stbtt__scratch_free(void *ptr, void *userdata)
+{
+	struct stbtt_scratch *s = userdata;
+
+	if (!s || ptr < (void *)s->buf || ptr >= (void *)(s->buf + s->size))
+		free(ptr);
+}
+
+#define STBTT_malloc(size, u)	stbtt__scratch_alloc(size, u)
+#define STBTT_free(ptr, u)	stbtt__scratch_free(ptr, u)
 #define STBTT_assert(x)
 #define STBTT_strlen(x)		strlen(x)
 #define STBTT_memcpy		memcpy
@@ -184,6 +223,8 @@  struct console_tt_metrics {
  *	this avoids malloc/free per character. Allocated lazily after
  *	relocation to avoid using early malloc space.
  * @glyph_buf_size: Current size of glyph_buf in bytes
+ * @scratch: Scratch buffer state for stbtt internal allocations
+ * @scratch_buf: Memory for scratch buffer
  */
 struct console_tt_priv {
 	struct console_tt_metrics *cur_met;
@@ -196,6 +237,8 @@  struct console_tt_priv {
 	int pos_count;
 	u8 *glyph_buf;
 	int glyph_buf_size;
+	struct stbtt_scratch scratch;
+	char *scratch_buf;
 };
 
 /**
@@ -377,6 +420,9 @@  static int console_truetype_putc_xy(struct udevice *dev, uint x, uint y,
 	if (priv->cur_fontdata)
 		return console_fixed_putc_xy(dev, x, y, cp, priv->cur_fontdata);
 
+	/* Reset scratch buffer for this character */
+	stbtt_scratch_reset(&priv->scratch);
+
 	/* First get some basic metrics about this character */
 	font = &met->font;
 	stbtt_GetCodepointHMetrics(font, cp, &advance, &lsb);
@@ -813,6 +859,7 @@  static int truetype_add_metrics(struct udevice *dev, const char *font_name,
 		debug("%s: Font init failed\n", __func__);
 		return -EPERM;
 	}
+	font->userdata = &priv->scratch;
 
 	/* Pre-calculate some things we will need regularly */
 	met->scale = stbtt_ScaleForPixelHeight(font, font_size);
@@ -1217,6 +1264,17 @@  static int console_truetype_probe(struct udevice *dev)
 	int ret;
 
 	debug("%s: start\n", __func__);
+
+	/* Allocate scratch buffer for stbtt internal allocations */
+	if (CONFIG_IS_ENABLED(CONSOLE_TRUETYPE_SCRATCH)) {
+		priv->scratch_buf = malloc(CONFIG_CONSOLE_TRUETYPE_SCRATCH_SIZE);
+		if (priv->scratch_buf) {
+			priv->scratch.buf = priv->scratch_buf;
+			priv->scratch.size = CONFIG_CONSOLE_TRUETYPE_SCRATCH_SIZE;
+			priv->scratch.used = 0;
+		}
+	}
+
 	if (vid_priv->font_size)
 		font_size = vid_priv->font_size;
 	else
diff --git a/drivers/video/stb_truetype.h b/drivers/video/stb_truetype.h
index 90a5c2e2b3f..23a88898287 100644
--- a/drivers/video/stb_truetype.h
+++ b/drivers/video/stb_truetype.h
@@ -465,11 +465,53 @@  int main(int arg, char **argv)
    #define STBTT_fabs(x)      fabs(x)
    #endif
 
+   /* Scratch buffer for zero-malloc rendering */
+   #ifndef STBTT_SCRATCH_DEFINED
+   #define STBTT_SCRATCH_DEFINED
+   struct stbtt_scratch {
+       char *buf;
+       size_t size;
+       size_t used;
+   };
+
+   static inline void stbtt_scratch_reset(struct stbtt_scratch *s)
+   {
+       if (s)
+           s->used = 0;
+   }
+   #endif
+
    // #define your own functions "STBTT_malloc" / "STBTT_free" to avoid malloc.h
    #ifndef STBTT_malloc
    #include <stdlib.h>
-   #define STBTT_malloc(x,u)  ((void)(u),malloc(x))
-   #define STBTT_free(x,u)    ((void)(u),free(x))
+
+   static inline void *stbtt__scratch_alloc(size_t size, void *userdata)
+   {
+       struct stbtt_scratch *s = userdata;
+       size_t aligned = (size + 7) & ~7;  /* 8-byte alignment */
+
+       if (s && s->used + aligned <= s->size) {
+           void *p = s->buf + s->used;
+
+           s->used += aligned;
+
+           return p;
+       }
+
+       return malloc(size);  /* fallback */
+   }
+
+   static inline void stbtt__scratch_free(void *ptr, void *userdata)
+   {
+       struct stbtt_scratch *s = userdata;
+
+       /* Only free if not from scratch buffer */
+       if (!s || ptr < (void *)s->buf || ptr >= (void *)(s->buf + s->size))
+           free(ptr);
+   }
+
+   #define STBTT_malloc(x,u)  stbtt__scratch_alloc(x, u)
+   #define STBTT_free(x,u)    stbtt__scratch_free(x, u)
    #endif
 
    #ifndef STBTT_assert