This patch makes it possible to hook into processing of constants in Perl.

This is a very improved version of my previous patch.  Note the following
design decisions: we differentiate integers/octals+


--- ./op.c~	Tue Dec  9 00:57:24 1997
+++ ./op.c	Mon Dec 15 19:36:24 1997
@@ -1496,11 +1496,39 @@ scope(OP *o)
     return o;
 }
 
+static void
+restore_hints(void *p)
+{
+    GV *gv = (GV*)p;
+
+    if (!GvHV(gv))
+	return;
+    SvREFCNT_dec((SV*)GvHV(gv));
+    GvHV(gv) = NULL;
+}
+
+void
+save_hints(void)
+{
+    GV* gv = gv_fetchpv("\010", TRUE, SVt_PV); /* *^H */
+
+    SAVEI32(hints);
+    if (hints & HINT_LOCALIZE_HH) {
+	SAVESPTR(GvHV(gv));
+	GvHV(gv) = newHVhv(GvHV(gv));
+	SAVEFREESV(GvHV(gv));
+    } else {
+	/* Common case, do minimal job possible */
+	SAVEDESTRUCTOR(restore_hints,gv);
+    }
+}
+
 int
 block_start(int full)
 {
     dTHR;
     int retval = savestack_ix;
+
     SAVEI32(comppad_name_floor);
     if (full) {
 	if ((comppad_name_fill = AvFILL(comppad_name)) > 0)
@@ -1515,7 +1543,7 @@ block_start(int full)
     SAVEI32(padix_floor);
     padix_floor = padix;
     pad_reset_pending = FALSE;
-    SAVEI32(hints);
+    save_hints();
     hints &= ~HINT_BLOCK_SCOPE;
     return retval;
 }
--- ./hv.c~	Thu Dec  4 01:00:18 1997
+++ ./hv.c	Mon Dec 15 19:55:42 1997
@@ -734,6 +734,55 @@ newHV(void)
     return hv;
 }
 
+HV *
+newHVhv(HV *ohv)
+{
+    register HV *hv;
+    register XPVHV* xhv;
+    STRLEN hv_max = ohv ? HvMAX(ohv) : 0;
+    STRLEN hv_fill = ohv ? HvFILL(ohv) : 0;
+
+    hv = (HV*)NEWSV(502,0);
+    sv_upgrade((SV *)hv, SVt_PVHV);
+    xhv = (XPVHV*)SvANY(hv);
+    SvPOK_off(hv);
+    SvNOK_off(hv);
+#ifndef NODEFAULT_SHAREKEYS    
+    HvSHAREKEYS_on(hv);         /* key-sharing on by default */
+#endif    
+    while (hv_max && hv_max + 1 >= hv_fill * 2)
+	hv_max = hv_max / 2;	/* Is always 2^n-1 */
+    xhv->xhv_max = hv_max;
+    xhv->xhv_fill = 0;
+    xhv->xhv_pmroot = 0;
+    (void)hv_iterinit(hv);	/* so each() will start off right */
+    if (!hv_fill)
+	return hv;
+
+#if 0
+    if (!SvRMAGICAL(ohv) || !mg_find((SV*)ohv,'P')) {
+	/* Quick way ???*/
+    } 
+    else 
+#endif
+    {
+	HE *entry;
+	I32 hv_riter = HvRITER(ohv);	/* current root of iterator */
+	HE *hv_eiter = HvEITER(ohv);	/* current entry of iterator */
+	
+	/* Slow way */
+	hv_iterinit(hv);
+	while (entry = hv_iternext(ohv)) {
+	    hv_store(hv, HeKEY(entry), HeKLEN(entry), 
+		     SvREFCNT_inc(HeVAL(entry)), HeHASH(entry));
+	}
+	HvRITER(ohv) = hv_riter;
+	HvEITER(ohv) = hv_eiter;
+    }
+    
+    return hv;
+}
+
 void
 hv_free_ent(HV *hv, register HE *entry)
 {
--- ./t/pragma/overload.t~	Tue Nov 25 06:55:48 1997
+++ ./t/pragma/overload.t	Tue Dec 16 15:32:48 1997
@@ -48,7 +48,20 @@ $| = 1;
 print "1..",&last,"\n";
 
 sub test {
-  $test++; if (shift) {print "ok $test\n";1} else {print "not ok $test\n";0}
+  $test++; 
+  if (@_ > 1) {
+    if ($_[0] eq $_[1]) {
+      print "ok $test\n";
+    } else {
+      print "not ok $test: '$_[0]' ne '$_[1]'\n";
+    }
+  } else {
+    if (shift) {
+      print "ok $test\n";
+    } else {
+      print "not ok $test\n";
+    } 
+  }
 }
 
 $a = new Oscalar "087";
@@ -359,5 +372,70 @@ test(($aI | 3) eq '_<<_xx_<<_');	# 114
 # warn $aII << 3;
 test(($aII << 3) eq '_<<_087_<<_');	# 115
 
+{
+  BEGIN { $int = 7; overload::constant 'integer' => sub {$int++; shift}; }
+  $out = 2**10;
+}
+test($int, 9);		# 116
+test($out, 1024);		# 117
+
+$foo = 'foo';
+$foo1 = 'f\'o\\o';
+{
+  BEGIN { $q = $qr = 7; 
+	  overload::constant 'q' => sub {$q++; push @q, shift, ($_[1] || 'none'); shift},
+			     'qr' => sub {$qr++; push @qr, shift, ($_[1] || 'none'); shift}; }
+  $out = 'foo';
+  $out1 = 'f\'o\\o';
+  $out2 = "a\a$foo,\,";
+  /b\b$foo.\./;
+}
+
+test($out, 'foo');		# 118
+test($out, $foo);		# 119
+test($out1, 'f\'o\\o');		# 120
+test($out1, $foo1);		# 121
+test($out2, "a\afoo,\,");	# 122
+test("@q", "foo q f'o\\\\o q a\\a qq ,\\, qq");	# 123
+test($q, 11);			# 124
+test("@qr", "b\\b qq .\\. qq");	# 125
+test($qr, 9);			# 126
+
+{
+  $_ = '!<b>!foo!<-.>!';
+  BEGIN { overload::constant 'q' => sub {push @q1, shift, ($_[1] || 'none'); "_<" . (shift) . ">_"},
+			     'qr' => sub {push @qr1, shift, ($_[1] || 'none'); "!<" . (shift) . ">!"}; }
+  $out = 'foo';
+  $out1 = 'f\'o\\o';
+  $out2 = "a\a$foo,\,";
+  $res = /b\b$foo.\./;
+  $a = <<EOF;
+oups
+EOF
+  $b = <<'EOF';
+oups1
+EOF
+  $c = bareword;
+  m'try it';
+  s'first part'second part';
+  s/yet another/tail here/;
+  tr/z-Z/z-Z/;
+}
+
+test($out, '_<foo>_');		# 117
+test($out1, '_<f\'o\\o>_');		# 128
+test($out2, "_<a\a>_foo_<,\,>_");	# 129
+test("@q1", "foo q f'o\\\\o q a\\a qq ,\\, qq oups
+ qq oups1
+ q second part q tail here s z-Z tr z-Z tr");	# 130
+test("@qr1", "b\\b qq .\\. qq try it q first part q yet another qq");	# 131
+test($res, 1);			# 132
+test($a, "_<oups
+>_");	# 133
+test($b, "_<oups1
+>_");	# 134
+test($c, "bareword");	# 135
+
+
 # Last test is:
-sub last {115}
+sub last {135}
--- ./lib/overload.pm~	Tue Nov 25 06:52:52 1997
+++ ./lib/overload.pm	Tue Dec 16 15:29:00 1997
@@ -100,6 +100,32 @@ sub mycan {				# Real can would leave st
   return undef;
 }
 
+%constants = (
+	      'integer'	  =>  0x1000, 
+	      'float'	  =>  0x2000,
+	      'binary'	  =>  0x4000,
+	      'q'	  =>  0x8000,
+	      'qr'	  => 0x10000,
+	     );
+
+sub constant {
+  # Arguments: what, sub
+  while (@_) {
+    $^H{$_[0]} = $_[1];
+    $^H |= $constants{$_[0]} | 0x20000;
+    shift, shift;
+  }
+}
+
+sub unconstant {
+  # Arguments: what, sub
+  while (@_) {
+    delete $^H{$_[0]};
+    $^H &= ~ $constants{$_[0]};
+    shift, shift;
+  }
+}
+
 1;
 
 __END__
@@ -522,6 +548,72 @@ Returns C<undef> or a reference to the m
 
 =back
 
+=head1 Overloading constants
+
+For some application Perl parser mangles constants too much.  It is possible
+to hook into this process via overload::constant() and overload::unconstant()
+functions.
+
+These functions take a hash as an argument.  The recognized keys of this hash
+are
+
+=over 8
+
+=item integer
+
+to overload integer constants,
+
+=item float
+
+to overload floating point constants,
+
+=item binary
+
+to overload octal and hexadecimal constants,
+
+=item q
+
+to overload C<q>-quoted strings, constant pieces of C<qq>- and C<qx>-quoted
+strings and here-documents,
+
+=item qr
+
+to overload constant pieces of regular expressions.
+
+=back
+
+The corresponding values are references to functions which take three arguments:
+the first one is the I<initial> string form of the constant, the second one
+is how Perl interprets this constant, the third one is how the constant is used.  
+Note that the initial string form does not
+contain string delimiters, and has backslashes in backslash-delimiter 
+combinations stripped (thus the value of delimiter is not relevant for
+processing of this string).  The return value of this function is how this 
+constant is going to be interpreted by Perl.  The third argument is undefined
+unless for overloaded C<q>- and C<qr>- constants, it is C<q> in single-quote
+context (comes from strings, regular expressions, and single-quote HERE
+documents), it is C<tr> for arguments of C<tr>/C<y> operators, 
+it is C<s> for right-hand side of C<s>-operator, and it is C<qq> otherwise.
+
+Since an expression C<"ab$cd,,"> is just a shortcut for C<'ab' . $cd . ',,'>,
+it is expected that overloaded constant strings are equipped with reasonable
+overloaded catenation operator, otherwise absurd results will result.  
+Similarly, negative numbers are considered as negations of positive constants.
+
+Note that it is probably meaningless to call the functions overload::constant()
+and overload::unconstant() from anywhere but import() and unimport() methods.
+From these methods they may be called as
+
+	sub import {
+	  shift;
+	  return unless @_;
+	  die "unknown import: @_" unless @_ == 1 and $_[0] eq ':constant';
+	  overload::constant integer => sub {Math::BigInt->new(shift)};
+	}
+
+B<BUGS> Currently overloaded-ness of constants does not propagate 
+into C<eval '...'>.
+
 =head1 IMPLEMENTATION
 
 What follows is subject to change RSN.
@@ -596,6 +688,8 @@ For the purpose of inheritance every ove
 C<fallback> is present (possibly undefined). This may create
 interesting effects if some package is not overloaded, but inherits
 from two overloaded packages.
+
+Barewords are not covered by overloaded string constants.
 
 This document is confusing.
 
--- ./lib/Math/BigInt.pm~	Tue Nov 25 06:52:32 1997
+++ ./lib/Math/BigInt.pm	Tue Dec 16 03:02:00 1997
@@ -36,6 +36,12 @@ sub stringify { "${$_[0]}" }
 sub numify { 0 + "${$_[0]}" }	# Not needed, additional overhead
 				# comparing to direct compilation based on
 				# stringify
+sub import {
+  shift;
+  return unless @_;
+  die "unknown import: @_" unless @_ == 1 and $_[0] eq ':constant';
+  overload::constant integer => sub {Math::BigInt->new(shift)};
+}
 
 $zero = 0;
 
@@ -383,6 +389,19 @@ are not numbers, as well as the result o
    '   -123 123 123'               canonical value '-123123123'
    '1 23 456 7890'                 canonical value '+1234567890'
 
+
+=head1 Autocreating constants
+
+After C<use Math::BigInt ':constant'> all the integer decimal constants
+in the given scope are converted to C<Math::BigInt>.  This convertion
+happens at compile time.
+
+In particular
+
+  perl -MMath::BigInt=:constant -e 'print 2**100'
+
+print the integer value of C<2**100>.  Note that without convertion of 
+constants the expression 2**100 will be calculatted as floating point number.
 
 =head1 BUGS
 
--- ./global.sym.orig	Fri May 15 09:59:13 1998
+++ ./global.sym	Wed May 27 18:43:26 1998
@@ -474,6 +474,7 @@ newGVREF
 newGVgen
 newHV
 newHVREF
+newHVhv
 newIO
 newLISTOP
 newLOGOP
@@ -921,6 +922,7 @@ save_freesv
 save_gp
 save_hash
 save_helem
+save_hints
 save_hptr
 save_int
 save_item
--- ./proto.h.orig	Fri May 15 10:00:37 1998
+++ ./proto.h	Wed May 27 18:44:32 1998
@@ -330,6 +330,7 @@ GV*	newGVgen _((char* pack));
 OP*	newGVREF _((I32 type, OP* o));
 OP*	newHVREF _((OP* o));
 HV*	newHV _((void));
+HV*	newHVhv _((HV *hv));
 IO*	newIO _((void));
 OP*	newLISTOP _((I32 type, I32 flags, OP* first, OP* last));
 OP*	newPMOP _((I32 type, I32 flags));
@@ -444,6 +445,7 @@ void	save_freepv _((char* pv));
 void	save_gp _((GV* gv, I32 empty));
 HV*	save_hash _((GV* gv));
 void	save_helem _((HV* hv, SV *key, SV **sptr));
+void	save_hints _((void));
 void	save_hptr _((HV** hptr));
 void	save_I16 _((I16* intp));
 void	save_I32 _((I32* intp));
--- ./pp_ctl.c.orig	Fri May 15 10:00:33 1998
+++ ./pp_ctl.c	Wed May 27 18:45:20 1998
@@ -2168,7 +2168,7 @@ sv_compile_2op(SV *sv, OP** startop, cha
        introduced within evals. See force_ident(). GSAR 96-10-12 */
     safestr = savepv(tmpbuf);
     SAVEDELETE(defstash, safestr, strlen(safestr));
-    SAVEI32(hints);
+    save_hints();
 #ifdef OP_IN_REGISTER
     opsave = op;
 #else
@@ -2496,7 +2496,7 @@ PP(pp_require)
     rsfp = tryrsfp;
     name = savepv(name);
     SAVEFREEPV(name);
-    SAVEI32(hints);
+    save_hints();
     hints = 0;
  
     /* switch to eval mode */
@@ -2556,7 +2556,7 @@ PP(pp_entereval)
        introduced within evals. See force_ident(). GSAR 96-10-12 */
     safestr = savepv(tmpbuf);
     SAVEDELETE(defstash, safestr, strlen(safestr));
-    SAVEI32(hints);
+    save_hints();
     hints = op->op_targ;
 
     push_return(op->op_next);
--- ./perl.h~	Wed May 27 18:35:32 1998
+++ ./perl.h	Wed May 27 18:46:22 1998
@@ -1644,6 +1644,13 @@ typedef enum {
 #define HINT_STRICT_VARS	0x00000400
 #define HINT_LOCALE		0x00000800
 
+#define HINT_NEW_INTEGER	0x00001000
+#define HINT_NEW_FLOAT		0x00002000
+#define HINT_NEW_BINARY		0x00004000
+#define HINT_NEW_STRING		0x00008000
+#define HINT_NEW_RE		0x00010000
+#define HINT_LOCALIZE_HH	0x00020000 /* %^H needs to be copied */
+
 /* Various states of an input record separator SV (rs, nrs) */
 #define RsSNARF(sv)   (! SvOK(sv))
 #define RsSIMPLE(sv)  (SvOK(sv) && SvCUR(sv))
--- ./toke.c.orig	Fri May 15 10:02:39 1998
+++ ./toke.c	Wed May 27 18:50:15 1998
@@ -49,6 +49,7 @@ static int uni _((I32 f, char *s));
 #endif
 static char * filter_gets _((SV *sv, PerlIO *fp, STRLEN append));
 static void restore_rsfp _((void *f));
+static SV *new_constant _((char *s, STRLEN len, char *key, SV *sv, SV *pv, char *type));
 static void restore_expect _((void *e));
 static void restore_lex_expect _((void *e));
 
@@ -598,20 +599,23 @@ q(SV *sv)
     register char *s;
     register char *send;
     register char *d;
-    STRLEN len;
+    STRLEN len = 0;
+    SV *pv = sv;
 
     if (!SvLEN(sv))
-	return sv;
+	goto finish;
 
     s = SvPV_force(sv, len);
     if (SvIVX(sv) == -1)
-	return sv;
+	goto finish;
     send = s + len;
     while (s < send && *s != '\\')
 	s++;
     if (s == send)
-	return sv;
+	goto finish;
     d = s;
+    if ( hints & HINT_NEW_STRING )
+	pv = sv_2mortal(newSVpv(SvPVX(pv), len));
     while (s < send) {
 	if (*s == '\\') {
 	    if (s + 1 < send && (s[1] == '\\'))
@@ -621,7 +625,9 @@ q(SV *sv)
     }
     *d = '\0';
     SvCUR_set(sv, d - SvPVX(sv));
-
+  finish:
+    if ( hints & HINT_NEW_STRING )
+       return new_constant(NULL, 0, "q", sv, pv, "q");
     return sv;
 }
 
@@ -637,10 +643,19 @@ sublex_start(void)
     }
     if (op_type == OP_CONST || op_type == OP_READLINE) {
 	SV *sv = q(lex_stuff);
-	STRLEN len;
-	char *p = SvPV(sv, len);
-	yylval.opval = (OP*)newSVOP(op_type, 0, newSVpv(p, len));
-	SvREFCNT_dec(sv);
+
+	if (SvTYPE(sv) == SVt_PVIV) {
+	    /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
+	    STRLEN len;
+	    char *p;
+	    SV *nsv;
+
+	    p = SvPV(sv, len);
+	    nsv = newSVpv(p, len);
+	    SvREFCNT_dec(sv);
+	    sv = nsv;
+	} 
+	yylval.opval = (OP*)newSVOP(op_type, 0, sv);
 	lex_stuff = Nullsv;
 	return THING;
     }
@@ -1033,9 +1048,17 @@ scan_const(char *start)
     }
 
     /* return the substring (via yylval) only if we parsed anything */
-    if (s > bufptr)
+    if (s > bufptr) {
+	if ( hints & ( lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) )
+	    sv = new_constant(start, s - start, (lex_inpat ? "qr" : "q"), 
+			      sv, Nullsv,
+			      ( lex_inwhat == OP_TRANS 
+				? "tr"
+				: ( (lex_inwhat == OP_SUBST && !lex_inpat)
+				    ? "s"
+				    : "qq")));
 	yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
-    else
+    } else
 	SvREFCNT_dec(sv);
     return s;
 }
@@ -1667,6 +1690,8 @@ yylex(void)
 	    SV *sv = newSVsv(linestr);
 	    if (!lex_inpat)
 		sv = q(sv);
+	    else if ( hints & HINT_NEW_RE )
+		sv = new_constant(NULL, 0, "qr", sv, sv, "q");
 	    yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
 	    s = bufend;
 	}
@@ -4700,6 +4725,74 @@ checkcomma(register char *s, char *name,
     }
 }
 
+static SV *
+new_constant(char *s, STRLEN len, char *key, SV *sv, SV *pv, char *type) 
+{
+    HV *table = perl_get_hv("\10", FALSE); /* ^H */
+    dTHR;
+    dSP;
+    BINOP myop;
+    SV *res;
+    bool oldcatch = CATCH_GET;
+    SV **cvp;
+    SV *cv, *typesv;
+    char buf[128];
+	    
+    if (!table) {
+	yyerror("%^H is not defined");
+	return sv;
+    }
+    cvp = hv_fetch(table, key, strlen(key), FALSE);
+    if (!cvp || !SvOK(*cvp)) {
+	sprintf(buf,"$^H{%s} is not defined", key);
+	yyerror(buf);
+	return sv;
+    }
+    sv_2mortal(sv);			/* Parent created it permanently */
+    cv = *cvp;
+    if (!pv)
+	pv = sv_2mortal(newSVpv(s, len));
+    if (type)
+	typesv = sv_2mortal(newSVpv(type, 0));
+    else
+	typesv = &sv_undef;
+    CATCH_SET(TRUE);
+    Zero(&myop, 1, BINOP);
+    myop.op_last = (OP *) &myop;
+    myop.op_next = Nullop;
+    myop.op_flags = OPf_WANT_SCALAR | OPf_STACKED;
+
+    ENTER;
+    SAVEOP();
+    op = (OP *) &myop;
+    if (PERLDB_SUB && curstash != debstash)
+	op->op_private |= OPpENTERSUB_DB;
+    PUTBACK;
+    pp_pushmark(ARGS);
+
+    EXTEND(sp, 3);
+    PUSHs(pv);
+    PUSHs(sv);
+    PUSHs(typesv);
+    PUSHs(cv);
+    PUTBACK;
+
+    if (op = pp_entersub(ARGS))
+      runops();
+    LEAVE;
+    SPAGAIN;
+
+    res = POPs;
+    PUTBACK;
+    CATCH_SET(oldcatch);
+
+    if (!SvOK(res)) {
+	sprintf(buf,"Call to &{$^H{%s}} did not return a defined value", key);
+	yyerror(buf);
+    }
+    return SvREFCNT_inc(res);
+}
+
 static char *
 scan_word(register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
 {
@@ -5552,7 +5645,8 @@ scan_num(char *start)
 
 		  digit:
 		    n = u << shift;	/* make room for the digit */
-		    if (!overflowed && (n >> shift) != u) {
+		    if (!overflowed && (n >> shift) != u
+			&& !(hints & HINT_NEW_BINARY)) {
 			warn("Integer overflow in %s number",
 			     (shift == 4) ? "hex" : "octal");
 			overflowed = TRUE;
@@ -5568,6 +5662,8 @@ scan_num(char *start)
 	  out:
 	    sv = NEWSV(92,0);
 	    sv_setuv(sv, u);
+	    if ( hints & HINT_NEW_BINARY)
+		sv = new_constant(start, s - start, "binary", sv, Nullsv, NULL);
 	}
 	break;
 
@@ -5669,6 +5765,9 @@ scan_num(char *start)
 	    sv_setiv(sv, tryiv);
 	else
 	    sv_setnv(sv, value);
+	if ( floatit ? (hints & HINT_NEW_FLOAT) : (hints & HINT_NEW_INTEGER) )
+	    sv = new_constant(tokenbuf, d - tokenbuf, 
+			      (floatit ? "float" : "integer"), sv, Nullsv, NULL);
 	break;
     }