From 6e73b504491d67ebf9d9cb245115f36b6e04709c Mon Sep 17 00:00:00 2001
From: Andrew Dunstan <andrew@dunslane.net>
Date: Sat, 1 Dec 2007 15:20:34 +0000
Subject: [PATCH] Workaround for perl problem where evaluating UTF8 regexes can
 cause implicit loading of modules, thereby breaking Safe rules. We compile
 and call a tiny perl function on trusted interpreter init, after which the
 problem does not occur.

---
 src/pl/plperl/GNUmakefile |  3 ++-
 src/pl/plperl/plperl.c    | 51 ++++++++++++++++++++++++++++++++++++++-
 2 files changed, 52 insertions(+), 2 deletions(-)

diff --git a/src/pl/plperl/GNUmakefile b/src/pl/plperl/GNUmakefile
index 3e1e0487bb..383d479218 100644
--- a/src/pl/plperl/GNUmakefile
+++ b/src/pl/plperl/GNUmakefile
@@ -1,5 +1,5 @@
 # Makefile for PL/Perl
-# $PostgreSQL: pgsql/src/pl/plperl/GNUmakefile,v 1.31 2007/07/25 10:17:46 mha Exp $
+# $PostgreSQL: pgsql/src/pl/plperl/GNUmakefile,v 1.32 2007/12/01 15:20:34 adunstan Exp $
 
 subdir = src/pl/plperl
 top_builddir = ../../..
@@ -27,6 +27,7 @@ override CFLAGS += -Wno-comment
 endif
 
 override CPPFLAGS := -I$(srcdir) $(CPPFLAGS) -I$(perl_archlibexp)/CORE
+override CFLAGS += -fPIC
 
 rpathdir = $(perl_archlibexp)/CORE
 
diff --git a/src/pl/plperl/plperl.c b/src/pl/plperl/plperl.c
index 936bbcc082..5f4677c360 100644
--- a/src/pl/plperl/plperl.c
+++ b/src/pl/plperl/plperl.c
@@ -1,7 +1,7 @@
 /**********************************************************************
  * plperl.c - perl as a procedural language for PostgreSQL
  *
- *	  $PostgreSQL: pgsql/src/pl/plperl/plperl.c,v 1.132 2007/11/15 22:25:17 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/pl/plperl/plperl.c,v 1.133 2007/12/01 15:20:34 adunstan Exp $
  *
  **********************************************************************/
 
@@ -149,6 +149,8 @@ static HV  *plperl_spi_execute_fetch_result(SPITupleTable *, int, int);
 static SV  *newSVstring(const char *str);
 static SV **hv_store_string(HV *hv, const char *key, SV *val);
 static SV **hv_fetch_string(HV *hv, const char *key);
+static SV  *plperl_create_sub(char *proname, char *s, bool trusted);
+static SV  *plperl_call_perl_func(plperl_proc_desc *desc, FunctionCallInfo fcinfo);
 
 /*
  * This routine is a crock, and so is everyplace that calls it.  The problem
@@ -504,6 +506,53 @@ plperl_safe_init(void)
 	else
 	{
 		eval_pv(SAFE_OK, FALSE);
+		if (GetDatabaseEncoding() == PG_UTF8)
+		{
+
+			/* 
+			 * Fill in just enough information to set up this perl
+			 * function in the safe container and call it.
+			 * For some reason not entirely clear, it prevents errors that
+			 * can arise from the regex code later trying to load
+			 * utf8 modules.
+			 */
+
+			plperl_proc_desc desc;			
+			FunctionCallInfoData fcinfo;
+			FmgrInfo outfunc;
+			HeapTuple   typeTup;
+			Form_pg_type typeStruct;
+			SV *ret;
+			SV *func;
+
+			/* make sure we don't call ourselves recursively */
+			plperl_safe_init_done = true;
+
+			/* compile the function */
+			func = plperl_create_sub(
+				"utf8fix",
+				"return shift =~ /\\xa9/i ? 'true' : 'false' ;",
+				true);
+
+
+			/* set up to call the function with a single text argument 'a' */
+			desc.reference = func;
+			desc.nargs = 1;
+			desc.arg_is_rowtype[0] = false;
+			fcinfo.argnull[0] = false;
+			fcinfo.arg[0] = 
+				DatumGetTextP(DirectFunctionCall1(textin, 
+												  CStringGetDatum("a")));
+			typeTup = SearchSysCache(TYPEOID,
+									 TEXTOID,
+									 0, 0, 0);
+			typeStruct = (Form_pg_type) GETSTRUCT(typeTup);
+			fmgr_info(typeStruct->typoutput,&(desc.arg_out_func[0]));
+			ReleaseSysCache(typeTup);
+			
+			/* and make the call */
+			ret = plperl_call_perl_func(&desc,&fcinfo);
+		}
 	}
 
 	plperl_safe_init_done = true;