diff -rc perl-5.8.3/patchlevel.h perl-5.8.3.patched/patchlevel.h
*** perl-5.8.3/patchlevel.h	Wed Jan 14 15:18:14 2004
--- perl-5.8.3.patched/patchlevel.h	Thu Nov 15 17:13:24 2007
***************
*** 123 ****
! 	,NULL
--- 123,124 ----
! 	,"REGEXP0 - fix for UTF-8 recoding in regexps - CVE-2007-5116"
! 	,NULL
diff -rc perl-5.8.3/regcomp.c perl-5.8.3.patched/regcomp.c
*** perl-5.8.3/regcomp.c	Sat Nov  1 16:00:37 2003
--- perl-5.8.3.patched/regcomp.c	Thu Nov 15 17:13:24 2007
***************
*** 126,132 ****
      I32		extralen;
      I32		seen_zerolen;
      I32		seen_evals;
!     I32		utf8;
  #if ADD_TO_REGEXEC
      char 	*starttry;		/* -Dr: where regtry was called. */
  #define RExC_starttry	(pRExC_state->starttry)
--- 126,135 ----
      I32		extralen;
      I32		seen_zerolen;
      I32		seen_evals;
!     I32		utf8;		/* whether the pattern is utf8 or not */
!     I32		orig_utf8;	/* whether the pattern was originally in utf8 */
! 				/* XXX use this for future optimisation of case
! 				 * where pattern must be upgraded to utf8. */
  #if ADD_TO_REGEXEC
      char 	*starttry;		/* -Dr: where regtry was called. */
  #define RExC_starttry	(pRExC_state->starttry)
***************
*** 152,157 ****
--- 155,161 ----
  #define RExC_seen_zerolen	(pRExC_state->seen_zerolen)
  #define RExC_seen_evals	(pRExC_state->seen_evals)
  #define RExC_utf8	(pRExC_state->utf8)
+ #define RExC_orig_utf8	(pRExC_state->orig_utf8)
  
  #define	ISMULT1(c)	((c) == '*' || (c) == '+' || (c) == '?')
  #define	ISMULT2(s)	((*s) == '*' || (*s) == '+' || (*s) == '?' || \
***************
*** 1746,1760 ****
      if (exp == NULL)
  	FAIL("NULL regexp argument");
  
!     RExC_utf8 = pm->op_pmdynflags & PMdf_CMP_UTF8;
  
-     RExC_precomp = exp;
      DEBUG_r({
  	 if (!PL_colorset) reginitcolors();
  	 PerlIO_printf(Perl_debug_log, "%sCompiling REx%s `%s%*s%s'\n",
  		       PL_colors[4],PL_colors[5],PL_colors[0],
! 		       (int)(xend - exp), RExC_precomp, PL_colors[1]);
      });
      RExC_flags = pm->op_pmflags;
      RExC_sawback = 0;
  
--- 1750,1766 ----
      if (exp == NULL)
  	FAIL("NULL regexp argument");
  
!     RExC_utf8 = RExC_orig_utf8 = pm->op_pmdynflags & PMdf_CMP_UTF8;
  
      DEBUG_r({
  	 if (!PL_colorset) reginitcolors();
  	 PerlIO_printf(Perl_debug_log, "%sCompiling REx%s `%s%*s%s'\n",
  		       PL_colors[4],PL_colors[5],PL_colors[0],
! 		       (int)(xend - exp), exp, PL_colors[1]);
      });
+ 
+ redo_first_pass:
+     RExC_precomp = exp;
      RExC_flags = pm->op_pmflags;
      RExC_sawback = 0;
  
***************
*** 1779,1784 ****
--- 1785,1809 ----
      if (reg(pRExC_state, 0, &flags) == NULL) {
  	RExC_precomp = Nullch;
  	return(NULL);
+     }
+     if (RExC_utf8 && !RExC_orig_utf8) {
+         /* It's possible to write a regexp in ascii that represents unicode
+         codepoints outside of the byte range, such as via \x{100}. If we
+         detect such a sequence we have to convert the entire pattern to utf8
+         and then recompile, as our sizing calculation will have been based
+         on 1 byte == 1 character, but we will need to use utf8 to encode
+         at least some part of the pattern, and therefore must convert the whole
+         thing.
+         XXX: somehow figure out how to make this less expensive...
+         -- dmq */
+         STRLEN len = xend-exp;
+         DEBUG_r(PerlIO_printf(Perl_debug_log,
+ 	    "UTF8 mismatch! Converting to utf8 for resizing and compile\n"));
+         exp = (char*)Perl_bytes_to_utf8(aTHX_ (U8*)exp, &len);
+         xend = exp + len;
+         RExC_orig_utf8 = RExC_utf8;
+         SAVEFREEPV(exp);
+         goto redo_first_pass;
      }
      DEBUG_r(PerlIO_printf(Perl_debug_log, "size %"IVdf" ", (IV)RExC_size));
  
diff -rc perl-5.8.3/t/op/pat.t perl-5.8.3.patched/t/op/pat.t
*** perl-5.8.3/t/op/pat.t	Fri Jan  2 00:19:02 2004
--- perl-5.8.3.patched/t/op/pat.t	Thu Nov 15 17:14:32 2007
***************
*** 6,12 ****
  
  $| = 1;
  
! print "1..1055\n";
  
  BEGIN {
      chdir 't' if -d 't';
--- 6,12 ----
  
  $| = 1;
  
! print "1..1057\n";
  
  BEGIN {
      chdir 't' if -d 't';
***************
*** 3254,3259 ****
--- 3254,3268 ----
  	);
      }
  }
+  
+ {
+     use warnings;
+     my @w;
+     local $SIG{__WARN__}=sub{push @w,"@_"};
+     my $c=qq(\x{DF}); 
+     ok($c=~/${c}|\x{100}/, "ASCII pattern that really is utf8");
+     ok(@w==0, "No warnings");
+ }    
  
  # last test 1055