diff --git a/autodoc.pl b/autodoc.pl
index e19d12a522e9..bac73619a167 100644
--- a/autodoc.pl
+++ b/autodoc.pl
@@ -1899,7 +1899,7 @@ ($fh, $section_name, $element_name, $docref)
                     # Here, has a long name and we didn't create one just
                     # above.  Check that there really is a long name entry.
                     my $real_proto = delete $protos{"Perl_$name"};
-                    if ($real_proto) {
+                    if ($real_proto || $flags =~ /m/) {
 
                         # Set up to redo the loop at the end.  This iteration
                         # adds the short form; the redo causes its long form
diff --git a/embed.fnc b/embed.fnc
index f490397f7de6..ae17ffcda520 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -14,7 +14,8 @@
 : real (full) name, with any appropriate thread context paramaters, thus hiding
 : that detail from the typical code.
 :
-: Most macros (as opposed to functions) listed here are the complete full name.
+: Many macros (as opposed to functions) listed here are the complete full name,
+: though we may want to start converting those to have full names.
 :
 : All non-static functions defined by perl need to be listed in this file.
 : embed.pl uses the entries here to construct:
@@ -157,8 +158,9 @@
 : and you know at a glance that the macro actually has documentation.  It
 : doesn't by itself create any documentation; instead the other apidoc lines
 : pull in information specified by these lines.  Many of the lines in this
-: file for macros could be pulled out of here and replaced by these lines
-: throughout the source.  It is a goal to do that as convenience dictates.
+: file for macros that don't also have the 'p' flag (described below) could be
+: pulled out of here and replaced by these lines throughout the source.  It is
+: a goal to do that as convenience dictates.
 :
 : The other apidoc lines either have the usage data as part of the line, or
 : pull in the data from this file or apidoc_defn lines.
@@ -210,10 +212,10 @@
 : line that begins with an '='.  In particular, an '=cut' line ends that
 : documentation without introducing something new.
 :
-: Various macros and other elements aren't listed here in embed.fnc.  They are
-: documented in the same manner, but since they don't have this file to get
-: information from, the defining lines have the syntax and meaning they do in
-: this file, so it can be specified:
+: Various macros and other elements aren't listed here in embed.fnc (though
+: they could be).  They are documented in the same manner, but since they don't
+: have this file to get information from, the defining lines have the syntax
+: and meaning they do in this file, so it can be specified:
 :
 :   =for apidoc flags|return_type|name|arg1|arg2|...|argN
 :   =for apidoc_item flags|return_type|name|arg1|arg2|...|argN
@@ -301,22 +303,21 @@
 :        functions flagged with this, the installation can run Configure with
 :        the -Accflags='-DNO_MATHOMS' parameter to not even compile them.
 :
-:        Sometimes the function has been subsumed by a more general one (say,
-:        by adding a flags parameter), and a macro exists with the original
-:        short name API, and it calls the new function, bypassing this one, and
-:        the original 'Perl_' form is being deprecated. In this case also
-:        specify the 'M' flag.
+:        If the function can be implemented as a macro (that evaluates its
+:        arguments exactly once), use the 'm' and 'p' flags together to implement
+:        this.  (See the discussion under 'm'.)  Another option for this is to
+:        use the 'M' flag.
 :
-:        Without the M flag, these functions should be deprecated, and it is an
-:        error to not also specify the 'D' flag.
+:        Without the m or M flags, these functions should be deprecated, and it
+:        is an error to not also specify the 'D' flag.
 :
 :        The 'b' functions are normally moved to mathoms.c, but if
 :        circumstances dictate otherwise, they can be anywhere, provided the
 :        whole function is wrapped with
 :
-:	    #ifndef NO_MATHOMS
-:	    ...
-:	    #endif
+:           #ifndef NO_MATHOMS
+:           ...
+:           #endif
 :
 :        Note that this flag no longer automatically adds a 'Perl_' prefix to
 :        the name. Additionally specify 'p' to do that.
@@ -370,10 +371,10 @@
 :        then it is assumed to take a strftime-style format string as the 1st
 :        arg; otherwise it's assumed to take a printf style format string, not
 :        necessarily the 1st arg.  All the arguments following the second form
-:	 (including possibly '...') are assumed to be for the format.
+:        (including possibly '...') are assumed to be for the format.
 :
 :         embed.h: any entry in here for the second form is suppressed because
-:	           of varargs
+:                  of varargs
 :         proto.h: add __attribute__format__ (or ...null_ok__)
 :
 :   'F'  Function has a '...' parameter, but don't assume it is a format. This
@@ -396,7 +397,7 @@
 :        one NN argument.
 :
 :          proto.h: PERL_ARGS_ASSERT macro is not defined unless the function
-:		   has NN arguments
+:                   has NN arguments
 :
 :   'h'  Hide any documentation that would normally go into perlapi or
 :        perlintern. This is typically used when the documentation is actually
@@ -427,7 +428,7 @@
 :        particular C file(s) or in the perl core.) Therefore, all non-guarded
 :        functions should also have the 'p' flag specified to avoid polluting
 :        the XS code name space.  Otherwise, this flag also turns on the 'S'
-:	 flag.
+:        flag.
 :
 :          proto.h: function is declared as PERL_STATIC_INLINE
 :
@@ -439,23 +440,46 @@
 :                  __attribute__always_inline__ is added
 :
 :   'm'  Implemented as a macro; there is no function associated with this
-:        name, and hence no long Perl_ or S_ name. However, if the macro name
-:        itself begins with 'Perl_', autodoc.pl will show a thread context
-:        parameter unless the 'T' flag is specified.
+:        name.  There is no long S_ name.
+:
+:        However, you may #define the macro with a long name like 'Perl_foo',
+:        and specify the 'p' flag.  This will cause an embed.h entry to be
+:        created that #defines 'foo' as 'Perl_foo'.  This can be used to make
+:        any macro have a long name, perhaps to avoid name collisions.  It is
+:        particularly useful tp preserve backward compatibility when a function
+:        is converted to be a macro.  Most of mathoms.c could be converted to
+:        use this facility.  When there is no thread context involved, you just
+:        do something like
+:
+:           #define Perl_foo(a, b, c)  Perl_bar(a, b, 0, c)
+:
+:        Otherwise consider this general case where there is a series of macros
+:        that build on the previous ones by calling something with a different
+:        name or with an extra parameter beyond what the previous one did:
+:
+:           #define Perl_foo(mTHX, a)         Perl_bar1(aTHX, a)
+:           #define Perl_bar1(mTHX, a)        Perl_bar2(aTHX, a, 0)
+:           #define Perl_bar2(mTHX, a, b)     Perl_bar3(aTHX, a, b, 0)
+:           #define Perl_bar3(mTHX, a, b, c)  Perl_func(aTHX_ a, b, c, 0)
+:
+:        Use the formal parameter name 'mTHX,' (which stands for "macro thread
+:        context") as the first in each macro definition, and call the next
+:        macro in the sequence with 'aTHX,' (Note the commas).  Eventually, the
+:        sequence will end with a function call (or else there would be no need
+:        for thread context).  For that instead call it with 'aTHX_' (with an
+:        underscore instead of a comma).
 :
 :         suppress proto.h entry (actually, not suppressed, but commented out)
-:         suppress entry in the list of exported symbols available on all platforms
-:         suppress embed.h entry, as the implementation should furnish the macro
+:         suppress entry in the list of exported symbols available on all
+:             platforms
+:         suppress embed.h entry (when no 'p' flag), as the implementation
+:             should furnish the macro
 :
 :   'M'  The implementation is furnishing its own macro instead of relying on
 :        the automatically generated short name macro (which simply expands to
 :        call the real name function). One reason to do this is if the
-:        parameters need to be cast from what the caller has, or if there is a
-:        macro that bypasses this function (whose long name is being retained
-:        for backward compatibility for those who call it with that name). An
-:        example is when a new function is created with an extra parameter and
-:        a wrapper macro is added that has the old API, but calls the new one
-:        with the exta parameter set to a default.
+:        parameters need to be cast from what the caller has.  There is less
+:        need to do this now that 'm' and 'p' together is supported.
 :
 :        This flag requires the 'p' flag to be specified, as there would be no
 :        need to do this if the function weren't publicly accessible before.
@@ -489,10 +513,10 @@
 :
 :   'o'  Has no Perl_foo or S_foo compatibility macro:
 :
-:	 This is used for whatever reason to force the function to be called
-:	 with the long name.  Perhaps there is a varargs issue.  Use the 'M'
-:	 flag instead for wrapper macros, and legacy-only functions should
-:	 also use 'b'.
+:        This is used for whatever reason to force the function to be called
+:        with the long name.  Perhaps there is a varargs issue.  Use the 'M'
+:        or 'm' flags instead for wrapper macros, and legacy-only functions
+:        should also use 'b'.
 :
 :          embed.h: suppress "#define foo Perl_foo"
 :
@@ -517,9 +541,10 @@
 :
 :          proto.h: add __attribute__pure__
 :
-:   'p'  Function in source code has a Perl_ prefix:
+:   'p'  Function or macro in source code has a Perl_ prefix:
 :
-:          proto.h: function is declared as Perl_foo rather than foo
+:          proto.h: function or macro is declared as Perl_foo rather than foo
+:                   (though the entries for macros will be commented out)
 :          embed.h: "#define foo Perl_foo" entries added
 :
 :   'R'  Return value must not be ignored (also implied by 'a' and 'P' flags):
@@ -543,8 +568,8 @@
 :
 :   's'  Static function, but function in source code has a Perl_ prefix:
 :
-:	 This is used for functions that have always had a Perl_ prefix, but
-:	 have been moved to a header file and declared static.
+:        This is used for functions that have always had a Perl_ prefix, but
+:        have been moved to a header file and declared static.
 :
 :          proto.h: function is declared as Perl_foo rather than foo
 :                STATIC is added to declaration;
@@ -579,11 +604,11 @@
 :        compatibility issues.
 :
 :   'W'  Add a comma_pDEPTH argument to function prototypes, and a comma_aDEPTH
-:	 argument to the function calls. This means that under DEBUGGING a
-:	 depth argument is added to the functions, which is used for example by
-:	 the regex engine for debugging and trace output. A non DEBUGGING build
-:	 will not pass the unused argument. Currently restricted to functions
-:	 with at least one argument.
+:        argument to the function calls. This means that under DEBUGGING a
+:        depth argument is added to the functions, which is used for example by
+:        the regex engine for debugging and trace output. A non DEBUGGING build
+:        will not pass the unused argument. Currently restricted to functions
+:        with at least one argument.
 :
 :   'X'  Explicitly exported:
 :
@@ -762,14 +787,9 @@ Adp	|int	|bytes_cmp_utf8 |NN const U8 *b 			\
 				|STRLEN blen				\
 				|NN const U8 *u 			\
 				|STRLEN ulen
-AMdp	|U8 *	|bytes_from_utf8|NN const U8 *s 			\
+Adp	|U8 *	|bytes_from_utf8|NN const U8 *s 			\
 				|NN STRLEN *lenp			\
 				|NN bool *is_utf8p
-CTdp	|U8 *	|bytes_from_utf8_loc					\
-				|NN const U8 *s 			\
-				|NN STRLEN *lenp			\
-				|NN bool *is_utf8p			\
-				|NULLOK const U8 **first_unconverted
 Adp	|U8 *	|bytes_to_utf8	|NN const U8 *s 			\
 				|NN STRLEN *lenp
 AOdp	|SSize_t|call_argv	|NN const char *sub_name		\
@@ -3657,6 +3677,19 @@ CDbdp	|UV	|utf8n_to_uvuni |NN const U8 *s 			\
 				|U32 flags
 Adpx	|U8 *	|utf8_to_bytes	|NN U8 *s				\
 				|NN STRLEN *lenp
+Cp	|PL_utf8_to_bytes_ret|utf8_to_bytes_				\
+				|NN U8 **s_ptr				\
+				|NN STRLEN *lenp			\
+				|Perl_utf8_to_bytes_arg result_as
+Admp	|PL_utf8_to_bytes_ret|utf8_to_bytes_new_pv			\
+				|NN U8 const **s_ptr			\
+				|NN STRLEN *lenp
+Admp	|PL_utf8_to_bytes_ret|utf8_to_bytes_overwrite			\
+				|NN U8 **s_ptr				\
+				|NN STRLEN *lenp
+Admp	|PL_utf8_to_bytes_ret|utf8_to_bytes_temp_pv			\
+				|NN U8 const **s_ptr			\
+				|NN STRLEN *lenp
 EMXp	|U8 *	|utf16_to_utf8	|NN U8 *p				\
 				|NN U8 *d				\
 				|Size_t bytelen 			\
diff --git a/embed.h b/embed.h
index 74aaea51bcd5..0999ce468a30 100644
--- a/embed.h
+++ b/embed.h
@@ -156,7 +156,7 @@
 # define block_gimme()                          Perl_block_gimme(aTHX)
 # define block_start(a)                         Perl_block_start(aTHX_ a)
 # define bytes_cmp_utf8(a,b,c,d)                Perl_bytes_cmp_utf8(aTHX_ a,b,c,d)
-# define bytes_from_utf8_loc                    Perl_bytes_from_utf8_loc
+# define bytes_from_utf8(a,b,c)                 Perl_bytes_from_utf8(aTHX_ a,b,c)
 # define bytes_to_utf8(a,b)                     Perl_bytes_to_utf8(aTHX_ a,b)
 # define call_argv(a,b,c)                       Perl_call_argv(aTHX_ a,b,c)
 # define call_atexit(a,b)                       Perl_call_atexit(aTHX_ a,b)
@@ -791,6 +791,10 @@
 # define utf8_hop_safe                          Perl_utf8_hop_safe
 # define utf8_length(a,b)                       Perl_utf8_length(aTHX_ a,b)
 # define utf8_to_bytes(a,b)                     Perl_utf8_to_bytes(aTHX_ a,b)
+# define utf8_to_bytes_(a,b,c)                  Perl_utf8_to_bytes_(aTHX_ a,b,c)
+# define utf8_to_bytes_new_pv(a,b)              Perl_utf8_to_bytes_new_pv(aTHX,a,b)
+# define utf8_to_bytes_overwrite(a,b)           Perl_utf8_to_bytes_overwrite(aTHX,a,b)
+# define utf8_to_bytes_temp_pv(a,b)             Perl_utf8_to_bytes_temp_pv(aTHX,a,b)
 # define utf8_to_uvchr_buf_helper(a,b,c)        Perl_utf8_to_uvchr_buf_helper(aTHX_ a,b,c)
 # define utf8n_to_uvchr_msgs                    Perl_utf8n_to_uvchr_msgs
 # define uvoffuni_to_utf8_flags_msgs(a,b,c,d)   Perl_uvoffuni_to_utf8_flags_msgs(aTHX_ a,b,c,d)
diff --git a/mathoms.c b/mathoms.c
index 27fa2969d1b1..b7c4760964f1 100644
--- a/mathoms.c
+++ b/mathoms.c
@@ -22,15 +22,15 @@
 /*
  * This file contains mathoms, various binary artifacts from previous
  * versions of Perl which we cannot completely remove from the core
- * code. There are two reasons functions should be here:
+ * code. There is only one reason these days for functions should be here:
  *
  * 1) A function has been replaced by a macro within a minor release,
  *    so XS modules compiled against an older release will expect to
  *    still be able to link against the function
- * 2) A function Perl_foo(...) with #define foo Perl_foo(aTHX_ ...)
- *    has been replaced by a macro, e.g. #define foo(...) foo_flags(...,0)
- *    but XS code may still explicitly use the long form, i.e.
- *    Perl_foo(aTHX_ ...)
+ *
+ * It used to be that this was the way to handle the case were a function
+ * Perl_foo(...) had been replaced by a macro.  But see the 'm' flag discussion
+ * in embed.fnc for a better way to handle this.
  *
  * This file can't just be cleaned out periodically, because that would break
  * builds with -DPERL_NO_SHORT_NAMES
diff --git a/proto.h b/proto.h
index cb2fa83c0dfd..ee045eab1fe5 100644
--- a/proto.h
+++ b/proto.h
@@ -408,11 +408,6 @@ Perl_bytes_from_utf8(pTHX_ const U8 *s, STRLEN *lenp, bool *is_utf8p);
 #define PERL_ARGS_ASSERT_BYTES_FROM_UTF8        \
         assert(s); assert(lenp); assert(is_utf8p)
 
-PERL_CALLCONV U8 *
-Perl_bytes_from_utf8_loc(const U8 *s, STRLEN *lenp, bool *is_utf8p, const U8 **first_unconverted);
-#define PERL_ARGS_ASSERT_BYTES_FROM_UTF8_LOC    \
-        assert(s); assert(lenp); assert(is_utf8p)
-
 PERL_CALLCONV U8 *
 Perl_bytes_to_utf8(pTHX_ const U8 *s, STRLEN *lenp);
 #define PERL_ARGS_ASSERT_BYTES_TO_UTF8          \
@@ -5137,6 +5132,20 @@ Perl_utf8_to_bytes(pTHX_ U8 *s, STRLEN *lenp);
 #define PERL_ARGS_ASSERT_UTF8_TO_BYTES          \
         assert(s); assert(lenp)
 
+PERL_CALLCONV PL_utf8_to_bytes_ret
+Perl_utf8_to_bytes_(pTHX_ U8 **s_ptr, STRLEN *lenp, Perl_utf8_to_bytes_arg result_as);
+#define PERL_ARGS_ASSERT_UTF8_TO_BYTES_         \
+        assert(s_ptr); assert(lenp)
+
+/* PERL_CALLCONV PL_utf8_to_bytes_ret
+Perl_utf8_to_bytes_new_pv(pTHX_ U8 const **s_ptr, STRLEN *lenp); */
+
+/* PERL_CALLCONV PL_utf8_to_bytes_ret
+Perl_utf8_to_bytes_overwrite(pTHX_ U8 **s_ptr, STRLEN *lenp); */
+
+/* PERL_CALLCONV PL_utf8_to_bytes_ret
+Perl_utf8_to_bytes_temp_pv(pTHX_ U8 const **s_ptr, STRLEN *lenp); */
+
 PERL_CALLCONV U8 *
 Perl_utf8_to_utf16_base(pTHX_ U8 *s, U8 *d, Size_t bytelen, Size_t *newlen, const bool high, const bool low);
 #define PERL_ARGS_ASSERT_UTF8_TO_UTF16_BASE     \
diff --git a/regen/embed.pl b/regen/embed.pl
index 5fce877c6c16..6c4e5ee4e8c1 100755
--- a/regen/embed.pl
+++ b/regen/embed.pl
@@ -137,11 +137,14 @@ sub generate_proto_h {
 
         die_at_end "$plain_func: S and p flags are mutually exclusive"
                                             if $flags =~ /S/ && $flags =~ /p/;
-        die_at_end "$plain_func: m and $1 flags are mutually exclusive"
-                                        if $has_mflag && $flags =~ /([pS])/;
-
-        die_at_end "$plain_func: u flag only usable with m" if $flags =~ /u/
-                                                            && ! $has_mflag;
+	if ($has_mflag) {
+	    if ($flags =~ /S/) {
+		die_at_end "$plain_func: m and S flags are mutually exclusive";
+	    }
+	}
+	else {
+	    die_at_end "$plain_func: u flag only usable with m" if $flags =~ /u/;
+	}
 
         my ($static_flag, @extra_static_flags)= $flags =~/([SsIi])/g;
 
@@ -500,9 +503,9 @@ sub embed_h {
         my $ind= $level ? " " : "";
         $ind .= "  " x ($level-1) if $level>1;
         my $inner_ind= $ind ? "  " : " ";
-        unless ($flags =~ /[omM]/) {
+        if ($flags !~ /[omM]/ or ($flags =~ /m/ && $flags =~ /p/)) {
             my $argc = scalar @$args;
-            if ($flags =~ /T/) {
+            if ($flags =~ /[T]/) {
                 my $full_name = full_name($func, $flags);
                 next if $full_name eq $func;    # Don't output a no-op.
                 $ret = indent_define($func, $full_name, $ind);
@@ -525,8 +528,11 @@ sub embed_h {
                     $use_va_list ? ("__VA_ARGS__") : ());
                 $ret = "#${ind}define $func($paramlist) ";
                 add_indent($ret,full_name($func, $flags) . "(aTHX");
-                $ret .= "_ " if $replacelist;
-                $ret .= $replacelist;
+		if ($replacelist) {
+                    $ret .= ($flags =~ /m/) ? "," : "_ ";
+                    $ret .= $replacelist;
+                }
+
                 if ($flags =~ /W/) {
                     if ($replacelist) {
                         $ret .= " comma_aDEPTH";
diff --git a/utf8.c b/utf8.c
index 88f8299792fe..3d115b5730c8 100644
--- a/utf8.c
+++ b/utf8.c
@@ -2352,52 +2352,188 @@ Perl_bytes_cmp_utf8(pTHX_ const U8 *b, STRLEN blen, const U8 *u, STRLEN ulen)
 }
 
 /*
-=for apidoc utf8_to_bytes
+=for apidoc      utf8_to_bytes_overwrite
+=for apidoc_item utf8_to_bytes_new_pv
+=for apidoc_item utf8_to_bytes_temp_pv
+=for apidoc_item utf8_to_bytes
+=for apidoc_item bytes_from_utf8
 
-Converts a string C<"s"> of length C<*lenp> from UTF-8 into native byte encoding.
-Unlike L</bytes_to_utf8>, this over-writes the original string, and
-updates C<*lenp> to contain the new length.
-Returns zero on failure (leaving C<"s"> unchanged) setting C<*lenp> to -1.
+These each convert a string encoded as UTF-8 into the equivalent native byte
+representation, if possible.  The first three forms are preferred; their API is
+more convenient to use, and each return non-zero if the result is in bytes;
+zero if the conversion failed.
 
-Upon successful return, the number of variants in the string can be computed by
-having saved the value of C<*lenp> before the call, and subtracting the
-after-call value of C<*lenp> from it.
+=over 4
+
+=item C<utf8_to_bytes_overwrite>
+
+=item C<utf8_to_bytes_new_pv>
+
+=item C<utf8_to_bytes_temp_pv>
+
+These differ only in the form of the returned string and the allowed constness
+of the input string.  In each, if the input string was already in native bytes
+or was not convertible, the input isn't changed.
+
+C<utf8_to_bytes_overwrite> overwrites the input string with the bytes
+conversion.  Hence, the input string should not be C<const>.  (Converting the
+multi-byte UTF-8 encoding to single bytes never expands the result, so
+overwriting is always feasible.)
+
+Both C<utf8_to_bytes_new_pv> and C<utf8_to_bytes_temp_pv> allocate new memory
+to hold the converted string, never changing the input.  Hence the input string
+may be C<const>.  They differ in that C<utf8_to_bytes_temp_pv> arranges for the
+new memory to automatically be freed.  With C<utf8_to_bytes_new_pv>, the caller
+is responsible for freeing the memory.
+
+In each of these three functions, the input C<s_ptr> is a pointer to the string
+to be converted (so that the first byte will be at C<*sptr[0]>), and C<*lenp>
+is its length.
+
+The return of each can be one of three values:
+
+=over 4
+
+=item C<PL_cant_convert> (or its equivalent, 0 or C<false>)
+
+This happens when the input is not well-formed UTF-8 or contains at least one
+UTF-8 sequence that represents a code point that can't be expressed as a byte.
+The contents of C<*s_ptr> and C<*lenp> are not changed.
+
+=item C<PL_was_noop>
+
+The input turned out to already be in bytes form.  The contents of C<*s_ptr>
+and C<*lenp> are not changed.
+
+=item C<PL_converted>
+
+The input was successfully translated to native bytes.  The result will be
+NUL-terminated even if the original wasn't.
+
+=over 4
+
+=item For C<utf8_to_bytes_overwrite>,
+
+The input string C<*s_ptr> was overwritten with the native bytes, including a
+NUL terminator.  C<*lenp> has been updated with the new length.
+
+=item For C<utf8_to_bytes_new_pv> and C<utf8_to_bytes_temp_pv>
+
+The input string was not changed.  Instead, new memory has been allocated
+containing the translation of the input into native bytes with a NUL terminator
+byte.  C<*s_ptr> now points to that new memory, and  C<*lenp> contains its
+length.
+
+For C<utf8_to_bytes_temp_pv>, the new memory has been arranged to be
+automatically freed, via a call to C<L</SAVEFREEPV>>.
+
+For C<utf8_to_bytes_new_pv>, it is the caller's responsibility to free the new
+memory when done using it.
+
+=back
+
+=back
+
+Except when calling C<utf8_to_bytes_new_pv>, most likely you can treat the
+return as a boolean.  With that function you will need to know whether or not
+there is memory that has to be freed.
+
+Note that in all cases, C<*s_ptr> and C<*lenp> will have correct and consistent
+values, updated as was necessary.
+
+Also note that when the return isn't C<PL_cant_convert>, the number of variants
+in the string can be computed by having saved the value of C<*lenp> before the
+call, and subtracting the after-call value of C<*lenp> from it.  This is also
+true for the other two functions described below.
+
+=item C<utf8_to_bytes>
+
+Plain C<utf8_to_bytes> also converts a UTF-8 encoded string to bytes, but there
+are more glitches that the caller has to be prepared to handle.
+
+The input string is passed with one less indirection level, C<s>.
+
+=over
+
+=item If the conversion was successful or a noop
+
+The function returns C<s> (unchanged), but its contents were changed if this
+was not a noop; C<*lenp> will be updated as necessary to be the correct length.
+
+=item If the conversion failed
 
-If you need a copy of the string, see L</bytes_from_utf8>.
+The function returns NULL and sets C<*lenp> to -1, cast to C<STRLEN>.
+This means that you will have to use a temporary containing the string length
+to pass to the function if you will need the value afterwards.
+
+The contents of C<s> were not changed.
+
+=back
+
+=item C<bytes_from_utf8>
+
+C<bytes_from_utf8> also converts a potentially UTF-8 encoded string C<s> to
+bytes.  It preserves C<s>, allocating new memory for the converted string.
+
+In contrast to the other functions, the input string to this one need not
+be UTF-8.  If not, the caller has set C<*is_utf8p> to be C<false>, and the
+function does nothing, returning the original C<s>.
+
+Also do nothing if there are code points in the string not expressible in
+native byte encoding, returning the original C<s>.
+
+Otherwise, C<*is_utf8p> is set to 0, and the return value is a pointer to a
+newly created string containing the native byte equivalent of C<s>, and whose
+length is returned in C<*lenp>, updated.  The new string is C<NUL>-terminated.
+The caller is responsible for arranging for the memory used by this string to
+get freed.
+
+The major problem with this function is that memory is allocated and filled
+even when the input string was already in bytes form.
+
+=back
+
+New code should use the first three functions listed above.
 
 =cut
 */
 
-U8 *
-Perl_utf8_to_bytes(pTHX_ U8 *s, STRLEN *lenp)
+PL_utf8_to_bytes_ret
+Perl_utf8_to_bytes_(pTHX_ U8 **s_ptr, STRLEN *lenp,
+                          Perl_utf8_to_bytes_arg result_as)
 {
-    U8 * first_variant;
+    PERL_ARGS_ASSERT_UTF8_TO_BYTES_;
 
-    PERL_ARGS_ASSERT_UTF8_TO_BYTES;
-    PERL_UNUSED_CONTEXT;
+    U8 * first_variant;
 
     /* This is a no-op if no variants at all in the input */
-    if (is_utf8_invariant_string_loc(s, *lenp, (const U8 **) &first_variant)) {
-        return s;
+    if (is_utf8_invariant_string_loc(*s_ptr, *lenp,
+                                    (const U8 **) &first_variant))
+    {
+        return PL_was_noop;
     }
 
     /* Nothing before 'first_variant' needs to be changed, so start the real
      * work there */
 
-    U8 * const save = s;
-    U8 * const send = s + *lenp;
-    s = first_variant;
+    U8 * const s0 = *s_ptr;
+    const U8 * const send = s0 + *lenp;
+    U8 * s = first_variant;
+    Size_t invariant_length = first_variant - s0;
+    Size_t variant_count = 0;
 
 #ifndef EBCDIC      /* The below relies on the bit patterns of UTF-8 */
 
-    /* There is some start-up/tear-down overhead with this, so no real gain
+    /* Do a first pass through the string to see if it actually is translatable
+     * into bytes, and if so, how big the result is.  On long strings this is
+     * done a word at a time, so is relatively quick. (There is some
+     * start-up/tear-down overhead with the per-word algorithm, so no real gain
      * unless the remaining portion of the string is long enough.  The current
-     * value is just a guess. */
+     * value is just a guess.)  On EBCDIC, it's always per-byte. */
     if ((send - s) > (ptrdiff_t) (5 * PERL_WORDSIZE)) {
 
-        /* First, go through the string a word at-a-time to verify that it is
-         * downgradable.  If it contains any start byte besides C2 and C3, then
-         * it isn't. */
+        /* If the string contains any start byte besides C2 and C3, then it
+         * isn't translatable into bytes */
 
         const PERL_UINTMAX_T C0_mask = PERL_COUNT_MULTIPLIER * 0xC0;
         const PERL_UINTMAX_T C2_mask = PERL_COUNT_MULTIPLIER * 0xC2;
@@ -2414,11 +2550,13 @@ Perl_utf8_to_bytes(pTHX_ U8 *s, STRLEN *lenp)
         while (s < partial_word_end) {
             if (! UTF8_IS_INVARIANT(*s)) {
                 if (! UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(s, send)) {
-                    *lenp = ((STRLEN) -1);
-                    return NULL;
+                    return PL_cant_convert;
                 }
+
                 s++;
+                variant_count++;
             }
+
             s++;
         }
 
@@ -2465,10 +2603,15 @@ Perl_utf8_to_bytes(pTHX_ U8 *s, STRLEN *lenp)
              * If they're not equal, there are start bytes that aren't C2
              * nor C3, hence this is not downgradable */
             if (start_bytes != C2_C3_start_bytes) {
-                *lenp = ((STRLEN) -1);
-                return NULL;
+                return PL_cant_convert;
             }
 
+            /* Commit 03c1e4ab1d6ee9062fb3f94b0ba31db6698724b1 contains an
+               explanation of how this works */
+            variant_count +=
+                (Size_t) (((((start_bytes)) >> 7) * PERL_COUNT_MULTIPLIER)
+                                      >> ((PERL_WORDSIZE - 1) * CHARBITS));
+
             s += PERL_WORDSIZE;
         } while (s + PERL_WORDSIZE <= send);
 
@@ -2477,262 +2620,174 @@ Perl_utf8_to_bytes(pTHX_ U8 *s, STRLEN *lenp)
          * first byte of the character  */
         if (s > first_variant && UTF8_IS_START(*(s-1))) {
             s--;
+            variant_count--;
         }
     }
 
 #endif
-
-    /* Do the straggler bytes beyond the final word boundary (or all bytes
-     * in the case of EBCDIC) */
+    /* Do the straggler bytes beyond what the loop above did */
     while (s < send) {
         if (! UTF8_IS_INVARIANT(*s)) {
             if (! UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(s, send)) {
-                *lenp = ((STRLEN) -1);
-                return NULL;
+                return PL_cant_convert;
             }
             s++;
+            variant_count++;
         }
         s++;
     }
 
-    /* Here, we passed the tests above.  For the EBCDIC case, everything
-     * was well-formed and can be downgraded to non-UTF8.  For non-EBCDIC,
-     * it means only that all start bytes were C2 or C3, hence any
-     * well-formed sequences are downgradable.  But we didn't test, for
-     * example, that there weren't two C2's in a row.  That means that in
-     * the loop below, we have to be sure things are well-formed.  Because
-     * this is very very likely, and we don't care about having speedy
-     * handling of malformed input, the loop proceeds as if well formed,
-     * and should a malformed one come along, it undoes what it already has
-     * done */
-
-    U8 * d = s = first_variant;
+    /* Here, we passed the tests above and know how many UTF-8 variant
+     * characters there are, which allows us to calculate the size to malloc
+     * for the non-destructive case */
+    U8 *d0;
+    if (result_as != PL_utf8_to_bytes_new_memory) {
+        d0 = s0;
+    }
+    else {
+        Newx(d0, (*lenp) + 1 - variant_count, U8);
+        Copy(s0, d0, invariant_length, U8);
+    }
 
+    U8 * d = d0 + invariant_length;
+
+    /* For the cases where the per-word algorithm wasn't used, everything is
+     * well-formed and can definitely be translated.  When the per word
+     * algorithm was used, it found that all start bytes in the string were C2
+     * or C3, hence any well-formed sequences are convertible to bytes.  But we
+     * didn't test, for example, that there weren't two C2's in a row.  That
+     * means that in the loop below, we have to be sure things are well-formed.
+     * Because it is very very unlikely that we got this far for something
+     * malformed, and because we prioritize speed in the normal case over the
+     * malformed one, we go ahead and do the translation, and undo it if found
+     * to be necessary. */
+    s = first_variant;
     while (s < send) {
-        U8 * s1;
-
-        if (UVCHR_IS_INVARIANT(*s)) {
-            *d++ = *s++;
-            continue;
-        }
-
-        /* Here it is two-byte encoded. */
-        if (   LIKELY(UTF8_IS_DOWNGRADEABLE_START(*s))
-            && LIKELY(UTF8_IS_CONTINUATION((s[1]))))
-        {
-            U8 first_byte = *s++;
-            *d++ = EIGHT_BIT_UTF8_TO_NATIVE(first_byte, *s);
-            s++;
-            continue;
-        }
-
-        /* Here, it is malformed.  This shouldn't happen on EBCDIC, and on
-         * ASCII platforms, we know that the only start bytes in the text
-         * are C2 and C3, and the code above has made sure that it doesn't
-         * end with a start byte.  That means the only malformations that
-         * are possible are a start byte without a continuation (either
-         * followed by another start byte or an invariant) or an unexpected
-         * continuation.
-         *
-         * We have to undo all we've done before, back down to the first
-         * UTF-8 variant.  Note that each 2-byte variant we've done so far
-         * (converted to single byte) slides things to the left one byte,
-         * and so we have bytes that haven't been written over.
-         *
-         * Here, 'd' points to the next position to overwrite, and 's'
-         * points to the first invalid byte.  That means 'd's contents
-         * haven't been changed yet, nor has anything else beyond it in the
-         * string.  In restoring to the original contents, we don't need to
-         * do anything past (d-1).
-         *
-         * In particular, the bytes from 'd' to 's' have not been changed.
-         * This loop uses a new variable 's1' (to avoid confusing 'source'
-         * and 'destination') set to 'd',  and moves 's' and 's1' in lock
-         * step back so that afterwards, 's1' points to the first changed
-         * byte that will be the source for the first byte (or bytes) at
-         * 's' that need to be changed back.  Note that s1 can expand to
-         * two bytes */
-        s1 = d;
-        while (s >= d) {
-            s--;
-            if (! UVCHR_IS_INVARIANT(*s1)) {
-                s--;
+        U8 c = *s++;
+        if (! UVCHR_IS_INVARIANT(c)) {
+
+            /* Then it is a multi-byte character.  The first pass above
+             * determined that the string contains only invariants, the C2 and
+             * C3 start bytes, and continuation bytes.  The condition above
+             * excluded this from being an invariant.  To be well formed, it
+             * needs to be a start byte followed by a continuation byte. */
+            if (   UNLIKELY(  UTF8_IS_CONTINUATION(c))
+                || UNLIKELY(  s >= send)
+                || UNLIKELY(! UTF8_IS_CONTINUATION(*s)))
+            {
+                goto cant_convert;
             }
-            s1--;
-        }
 
-        /* Do the changing back */
-        while (s1 >= first_variant) {
-            if (UVCHR_IS_INVARIANT(*s1)) {
-                *s-- = *s1--;
-            }
-            else {
-                *s-- = UTF8_EIGHT_BIT_LO(*s1);
-                *s-- = UTF8_EIGHT_BIT_HI(*s1);
-                s1--;
-            }
+            c = EIGHT_BIT_UTF8_TO_NATIVE(c, *s);
+            s++;
         }
 
-        *lenp = ((STRLEN) -1);
-        return NULL;
+        *d++ = c;
     }
 
     /* Success! */
     *d = '\0';
-    *lenp = d - save;
-
-    return save;
-}
-
-/*
-=for apidoc bytes_from_utf8
-
-Converts a potentially UTF-8 encoded string C<s> of length C<*lenp> into native
-byte encoding.  On input, the boolean C<*is_utf8p> gives whether or not C<s> is
-actually encoded in UTF-8.
+    *lenp = d - d0;
 
-Unlike L</utf8_to_bytes> but like L</bytes_to_utf8>, this is non-destructive of
-the input string.
+    if (result_as != PL_utf8_to_bytes_overwrite) {
+        *s_ptr = d0;
+        if (result_as == PL_utf8_to_bytes_use_temporary) {
+            SAVEFREEPV(*s_ptr);
+        }
+    }
 
-Do nothing if C<*is_utf8p> is 0, or if there are code points in the string
-not expressible in native byte encoding.  In these cases, C<*is_utf8p> and
-C<*lenp> are unchanged, and the return value is the original C<s>.
+    return PL_converted;
 
-Otherwise, C<*is_utf8p> is set to 0, and the return value is a pointer to a
-newly created string containing a downgraded copy of C<s>, and whose length is
-returned in C<*lenp>, updated.  The new string is C<NUL>-terminated.  The
-caller is responsible for arranging for the memory used by this string to get
-freed.
-
-Upon successful return, the number of variants in the string can be computed by
-having saved the value of C<*lenp> before the call, and subtracting the
-after-call value of C<*lenp> from it.
+  cant_convert: ;
 
-=cut
+    /* Here, we found a malformation in the input.  This won't happen except
+     * when the per-word algorithm was used in the first pass, because that may
+     * miss some malformations.  It determined that the only start bytes in the
+     * text are C2 and C3, but didn't examine it to make sure each of those was
+     * followed by precisely one continuation, for example.
+     *
+     * If the result is in newly allocated memory, just free it */
+    if (result_as != PL_utf8_to_bytes_overwrite) {
+        Safefree(d0);
+        return PL_cant_convert;
+    }
 
-There is a macro that avoids this function call, but this is retained for
-anyone who calls it with the Perl_ prefix */
+    /* Otherwise, we have to undo all we've done before, back down to the first
+     * UTF-8 variant.  Note that each 2-byte variant we've done so far
+     * (converted to single byte) slides things to the left one byte, and so we
+     * have bytes that haven't been written over.
+     *
+     * Here, 'd' points to the next position to overwrite, and 's' points to
+     * the first invalid byte.  That means 'd's contents haven't been changed
+     * yet, nor has anything else beyond it in the string.  In restoring to the
+     * original contents, we don't need to do anything past (d-1).
+     *
+     * In particular, the bytes from 'd' to 's' have not been changed.  This
+     * loop uses a new variable 's1' (to avoid confusing 'source' and
+     * 'destination') set to 'd',  and moves 's' and 's1' in lock step back so
+     * that afterwards, 's1' points to the first changed byte that will be the
+     * source for the first byte (or bytes) at 's' that need to be changed
+     * back.  Note that s1 can expand to two bytes */
+    U8 * s1 = d;
+    while (s >= d) {
+        s--;
+        if (! UVCHR_IS_INVARIANT(*s1)) {
+            s--;
+        }
+        s1--;
+    }
 
-U8 *
-Perl_bytes_from_utf8(pTHX_ const U8 *s, STRLEN *lenp, bool *is_utf8p)
-{
-    PERL_ARGS_ASSERT_BYTES_FROM_UTF8;
-    PERL_UNUSED_CONTEXT;
+    /* Do the changing back */
+    while (s1 >= first_variant) {
+        if (UVCHR_IS_INVARIANT(*s1)) {
+            *s-- = *s1--;
+        }
+        else {
+            *s-- = UTF8_EIGHT_BIT_LO(*s1);
+            *s-- = UTF8_EIGHT_BIT_HI(*s1);
+            s1--;
+        }
+    }
 
-    return bytes_from_utf8_loc(s, lenp, is_utf8p, NULL);
+    return PL_cant_convert;
 }
 
-/*
-=for apidoc bytes_from_utf8_loc
-
-Like C<L<perlapi/bytes_from_utf8>()>, but takes an extra parameter, a pointer
-to where to store the location of the first character in C<"s"> that cannot be
-converted to non-UTF8.
-
-If that parameter is C<NULL>, this function behaves identically to
-C<bytes_from_utf8>.
-
-Otherwise if C<*is_utf8p> is 0 on input, the function behaves identically to
-C<bytes_from_utf8>, except it also sets C<*first_non_downgradable> to C<NULL>.
-
-Otherwise, the function returns a newly created C<NUL>-terminated string
-containing the non-UTF8 equivalent of the convertible first portion of
-C<"s">.  C<*lenp> is set to its length, not including the terminating C<NUL>.
-If the entire input string was converted, C<*is_utf8p> is set to a FALSE value,
-and C<*first_non_downgradable> is set to C<NULL>.
-
-Otherwise, C<*first_non_downgradable> is set to point to the first byte of the
-first character in the original string that wasn't converted.  C<*is_utf8p> is
-unchanged.  Note that the new string may have length 0.
-
-Another way to look at it is, if C<*first_non_downgradable> is non-C<NULL> and
-C<*is_utf8p> is TRUE, this function starts at the beginning of C<"s"> and
-converts as many characters in it as possible stopping at the first one it
-finds that can't be converted to non-UTF-8.  C<*first_non_downgradable> is
-set to point to that.  The function returns the portion that could be converted
-in a newly created C<NUL>-terminated string, and C<*lenp> is set to its length,
-not including the terminating C<NUL>.  If the very first character in the
-original could not be converted, C<*lenp> will be 0, and the new string will
-contain just a single C<NUL>.  If the entire input string was converted,
-C<*is_utf8p> is set to FALSE and C<*first_non_downgradable> is set to C<NULL>.
-
-Upon successful return, the number of variants in the converted portion of the
-string can be computed by having saved the value of C<*lenp> before the call,
-and subtracting the after-call value of C<*lenp> from it.
-
-=cut
-
-
-*/
-
 U8 *
-Perl_bytes_from_utf8_loc(const U8 *s, STRLEN *lenp, bool *is_utf8p, const U8** first_unconverted)
+Perl_utf8_to_bytes(pTHX_ U8 *s, STRLEN *lenp)
 {
-    PERL_ARGS_ASSERT_BYTES_FROM_UTF8_LOC;
-
-    if (! *is_utf8p) {
-        if (first_unconverted) {
-            *first_unconverted = NULL;
-        }
-
-        return (U8 *) s;
-    }
-
-    const U8 * const s0 = s;
-    const U8 * const send = s + *lenp;
-    const U8 * first_variant;
+    PERL_ARGS_ASSERT_UTF8_TO_BYTES;
 
-    /* The initial portion of 's' that consists of invariants can be Copied
-     * as-is.  If it is entirely invariant, the whole thing can be Copied. */
-    if (is_utf8_invariant_string_loc(s, *lenp, &first_variant)) {
-        first_variant = send;
+    if (utf8_to_bytes_overwrite(&s, lenp)) {
+        return s;
     }
 
-    U8 *d;
-    Newx(d, (*lenp) + 1, U8);
-    Copy(s, d, first_variant - s, U8);
+    *lenp = (STRLEN) -1;
+    return NULL;
+}
 
-    U8 *converted_start = d;
-    d += first_variant - s;
-    s = first_variant;
+U8 *
+Perl_bytes_from_utf8(pTHX_ const U8 *s, STRLEN *lenp, bool *is_utf8p)
+{
+    PERL_ARGS_ASSERT_BYTES_FROM_UTF8;
 
-    while (s < send) {
-        U8 c = *s++;
-        if (! UTF8_IS_INVARIANT(c)) {
-
-            /* Then it is multi-byte encoded.  If the code point is above 0xFF,
-             * have to stop now */
-            if (UNLIKELY (! UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(s - 1, send))) {
-                if (first_unconverted) {
-                    *first_unconverted = s - 1;
-                    goto finish_and_return;
-                }
-                else {
-                    Safefree(converted_start);
-                    return (U8 *) s0;
-                }
+    if (*is_utf8p) {
+        PL_utf8_to_bytes_ret ret = utf8_to_bytes_new_pv(&s, lenp);
+        if (ret) {
+            *is_utf8p = false;
+
+            /* Our callers are always expecting new memory upon success.  Give
+             * it to them, adding a trailing NUL if not already there */
+            if (ret == PL_was_noop) {
+                U8 * new_s;
+                Newx(new_s, *lenp + 1, U8);
+                Copy(s, new_s, *lenp, U8);
+                new_s[*lenp] = '\0';
+                s = new_s;
             }
-
-            c = EIGHT_BIT_UTF8_TO_NATIVE(c, *s);
-            s++;
         }
-        *d++ = c;
     }
 
-    /* Here, converted the whole of the input */
-    *is_utf8p = FALSE;
-    if (first_unconverted) {
-        *first_unconverted = NULL;
-    }
-
-  finish_and_return:
-    *d = '\0';
-    *lenp = d - converted_start;
-
-    /* Trim unused space */
-    Renew(converted_start, *lenp + 1, U8);
-
-    return converted_start;
+    return (U8 *) s;
 }
 
 /*
diff --git a/utf8.h b/utf8.h
index e4f98270b2bb..365dbb021b27 100644
--- a/utf8.h
+++ b/utf8.h
@@ -1296,8 +1296,25 @@ point's representation.
 #define SHARP_S_SKIP 2
 
 #define is_utf8_char_buf(buf, buf_end) isUTF8_CHAR(buf, buf_end)
-#define bytes_from_utf8(s, lenp, is_utf8p)                                  \
-                            bytes_from_utf8_loc(s, lenp, is_utf8p, 0)
+
+typedef enum {
+    PL_cant_convert = 0,
+    PL_was_noop,
+    PL_converted,
+} PL_utf8_to_bytes_ret;
+
+typedef enum {
+    PL_utf8_to_bytes_overwrite = 0,
+    PL_utf8_to_bytes_new_memory,
+    PL_utf8_to_bytes_use_temporary,
+} Perl_utf8_to_bytes_arg;
+
+#define Perl_utf8_to_bytes_overwrite(mTHX, s, l)                            \
+        Perl_utf8_to_bytes_(aTHX_ s, l, PL_utf8_to_bytes_overwrite)
+#define Perl_utf8_to_bytes_new_pv(mTHX, s, l)                               \
+        Perl_utf8_to_bytes_(aTHX_ (U8 **) s, l, PL_utf8_to_bytes_new_memory)
+#define Perl_utf8_to_bytes_temp_pv(mTHX, s, l)                              \
+        Perl_utf8_to_bytes_(aTHX_ (U8 **) s, l, PL_utf8_to_bytes_use_temporary)
 
 /* Do not use; should be deprecated.  Use isUTF8_CHAR() instead; this is
  * retained solely for backwards compatibility */