# copy part of /data/debug.txt # FireFox: 29 Mar 2005 - 13:47 URL web.topic is Sandbox.BadMnsterEifel 29 Mar 2005 - 13:47 no encoding with ASCII or UTF8 29 Mar 2005 - 13:47 Final web and topic are Sandbox BadMnsterEifel (Native URL -> iso-8859-1) 29 Mar 2005 - 13:48 URL web.topic is Sandbox.BadMA?nsterEifel 29 Mar 2005 - 13:48 ASCII encoding 29 Mar 2005 - 13:48 Final web and topic are Sandbox BadMA?nsterEifel (ASCII URL -> iso-8859-1) 29 Mar 2005 - 13:48 URL web.topic is Sandbox.BadMAnsterEifel 29 Mar 2005 - 13:48 ASCII encoding 29 Mar 2005 - 13:48 Final web and topic are Sandbox BadMAnsterEifel (ASCII URL -> iso-8859-1) # Internet Explorer (with UTF8 enabled): 29 Mar 2005 - 14:20 URL web.topic is Sandbox.BadenWA¬rttemberg 29 Mar 2005 - 14:20 no encoding with ASCII or UTF8 29 Mar 2005 - 14:20 Final web and topic are Sandbox BadenWA¬rttemberg (Native URL -> iso-8859-1) 29 Mar 2005 - 14:20 URL web.topic is Sandbox.BadenWAAªrttemberg 29 Mar 2005 - 14:20 no encoding with ASCII or UTF8 29 Mar 2005 - 14:20 Final web and topic are Sandbox BadenWAAªrttemberg (Native URL -> iso-8859-1) 29 Mar 2005 - 14:20 URL web.topic is Sandbox.BadenWAA¦rttemberg 29 Mar 2005 - 14:20 no encoding with ASCII or UTF8 29 Mar 2005 - 14:20 Final web and topic are Sandbox BadenWAA¦rttemberg (Native URL -> iso-8859-1) 29 Mar 2005 - 14:20 URL web.topic is Sandbox.TestTopic2 29 Mar 2005 - 14:20 ASCII encoding 29 Mar 2005 - 14:20 Final web and topic are Sandbox TestTopic2 (ASCII URL -> iso-8859-1) # Internet Explorer (without UTF8): 29 Mar 2005 - 14:23 URL web.topic is Sandbox.BadenWrttemberg 29 Mar 2005 - 14:23 no encoding with ASCII or UTF8 29 Mar 2005 - 14:23 Final web and topic are Sandbox BadenWrttemberg (Native URL -> iso-8859-1) 29 Mar 2005 - 14:23 URL web.topic is Sandbox.BadenW?rttemberg 29 Mar 2005 - 14:23 ASCII encoding 29 Mar 2005 - 14:23 Final web and topic are Sandbox BadenW?rttemberg (ASCII URL -> iso-8859-1) 29 Mar 2005 - 14:23 URL web.topic is Sandbox.BadenWrttemberg 29 Mar 2005 - 14:23 ASCII encoding 29 Mar 2005 - 14:23 Final web and topic are Sandbox BadenWrttemberg (ASCII URL -> iso-8859-1) # Firefox (after adding some more writeDebug in function setupLocale()): 29 Mar 2005 - 17:03 sub setupLocale: $siteLocale is de_DE.ISO-8859-1 29 Mar 2005 - 17:03 sub setupLocale: $useLocale is 1 29 Mar 2005 - 17:03 URL web.topic is Sandbox.TestTopic2 29 Mar 2005 - 17:03 ASCII encoding 29 Mar 2005 - 17:03 Final web and topic are Sandbox TestTopic2 (ASCII URL -> iso-8859-1) 29 Mar 2005 - 17:03 sub setupLocale: $siteLocale is de_DE.ISO-8859-1 29 Mar 2005 - 17:03 sub setupLocale: $useLocale is 1 29 Mar 2005 - 17:03 URL web.topic is Sandbox.­berraschungsEi 29 Mar 2005 - 17:03 no encoding with ASCII or UTF8 29 Mar 2005 - 17:03 Final web and topic are Sandbox ­berraschungsEi (Native URL -> iso-8859-1) 29 Mar 2005 - 17:03 sub setupLocale: $siteLocale is de_DE.ISO-8859-1 29 Mar 2005 - 17:03 sub setupLocale: $useLocale is 1 29 Mar 2005 - 17:03 URL web.topic is Sandbox.?-berraschungsEi 29 Mar 2005 - 17:03 ASCII encoding 29 Mar 2005 - 17:03 Final web and topic are Sandbox ?-berraschungsEi (ASCII URL -> iso-8859-1) # copy of relevant parts in TWiki.pm sub setupLocale { writeDebug "sub setupLocale: \$siteLocale is $siteLocale"; $siteCharset = 'ISO-8859-1'; # Default values if locale mis-configured $siteLang = 'en'; $siteFullLang = 'en-us'; writeDebug "sub setupLocale: \$useLocale is $useLocale"; ... } sub convertUtf8URLtoSiteCharset { my ( $webName, $topicName ) = @_; # uncommented next line. 2005-03-29. JuditMays writeDebug "URL web.topic is $webName.$topicName"; my $fullTopicName = "$webName.$topicName"; my $charEncoding; # Detect character encoding of the full topic name from URL if ( $fullTopicName =~ $regex{validAsciiStringRegex} ) { $urlCharEncoding = 'ASCII'; writeDebug "ASCII encoding"; } elsif ( $fullTopicName =~ $regex{validUtf8StringRegex} ) { $urlCharEncoding = 'UTF-8'; writeDebug "UTF8 encoding"; # Convert into ISO-8859-1 if it is the site charset if ( $siteCharset =~ /^iso-?8859-?1$/i ) { # ISO-8859-1 maps onto first 256 codepoints of Unicode # (conversion from 'perldoc perluniintro') $fullTopicName =~ s/ ([\xC2\xC3]) ([\x80-\xBF]) / chr( ord($1) << 6 & 0xC0 | ord($2) & 0x3F ) /egx; } elsif ( $siteCharset eq "utf-8" ) { # inserted next line. 2005-03-29. JuditMays writeDebug "Debug: \$siteCharset eq 'utf8', above require Encode"; # Convert into internal Unicode characters if on Perl 5.8 or higher. if( $] >= 5.008 ) { require Encode; # Perl 5.8 or higher only $fullTopicName = Encode::decode("utf8", $fullTopicName); # 'decode' into UTF-8 } else { writeWarning "UTF-8 not supported on Perl $] - use Perl 5.8 or higher."; } writeWarning "UTF-8 not yet supported as site charset - TWiki is likely to have problems"; } else { # Convert from UTF-8 into some other site charset writeDebug "Converting from UTF-8 to $siteCharset"; # Use conversion modules depending on Perl version if( $] >= 5.008 ) { # inserted next line. 2005-03-29. JuditMays writeDebug " \$] >= 5.008 . require Encode"; require Encode; # Perl 5.8 or higher only import Encode qw(:fallbacks); # Map $siteCharset into real encoding name $charEncoding = Encode::resolve_alias( $siteCharset ); if( not $charEncoding ) { writeWarning "Conversion to \$siteCharset '$siteCharset' not supported, or name not recognised - check 'perldoc Encode::Supported'"; } else { ##writeDebug "Converting with Encode, valid 'to' encoding is '$charEncoding'"; # Convert text using Encode: # - first, convert from UTF8 bytes into internal (UTF-8) characters $fullTopicName = Encode::decode("utf8", $fullTopicName); # - then convert into site charset from internal UTF-8, # inserting \x{NNNN} for characters that can't be converted $fullTopicName = Encode::encode( $charEncoding, $fullTopicName, &FB_PERLQQ() ); ##writeDebug "Encode result is $fullTopicName"; } } else { require Unicode::MapUTF8; # Pre-5.8 Perl versions $charEncoding = $siteCharset; if( not Unicode::MapUTF8::utf8_supported_charset($charEncoding) ) { writeWarning "Conversion to \$siteCharset '$siteCharset' not supported, or name not recognised - check 'perldoc Unicode::MapUTF8'"; } else { # Convert text # uncommented next line. 2005-03-29. JuditMays writeDebug "Converting with Unicode::MapUTF8, valid encoding is '$charEncoding'"; $fullTopicName = Unicode::MapUTF8::from_utf8({ -string => $fullTopicName, -charset => $charEncoding }); # FIXME: Check for failed conversion? } } } ($webName, $topicName) = split /\./, $fullTopicName; } else { # Non-ASCII and non-UTF-8 - assume in site character set, # no conversion required writeDebug "no encoding with ASCII or UTF8"; $urlCharEncoding = 'Native'; $charEncoding = $siteCharset; } # uncommented next line. 2005-03-29. JuditMays writeDebug "Final web and topic are $webName $topicName ($urlCharEncoding URL -> $siteCharset)"; return ($webName, $topicName); }