#!/usr/bin/perl -w # # Usage: # # LANG=fr_FR.iso88591 perl charset.pl # # (for some working locale listed by 'locale -a') # Unix/Linux locales # setlocale(LC_CTYPE, "de_AT.ISO8859-15"); # Windows locales # setlocale(LC_CTYPE, "Russian_Russia.866"); # setlocale(LC_CTYPE, "English_United Kingdom.1252"); use strict; use vars qw($lang); my $locale_bit; BEGIN { $lang= $ENV{LANG} || ''; print "LANG is $lang\n"; if ( $lang ) { # Equivalent of 'use locale' but dynamic and propagates to rest of file require locale; import locale (); } print "====== first part =============\n"; print "\$^H is $^H \n"; # Locale bit is 0x4 on ActivePerl build 633 or higher # and also on Perl 5.8.4 on Debian. Use 0x800 on older Perl versions, # e.g. 5.6.x? Check 'locale.pm' for details. $locale_bit = ($^H & 0x4) ? 1 : 0; print "Locale bit is $locale_bit (controls Perl regexes etc)\n"; } # NOTE: $^H not set by the BEGIN block here, but its effects propagate # to rest of file, as shown by the locale-based tests below if ( exists $INC{'locale.pm'} ) { print "locale.pm loaded\n"; } else { print "locale.pm not loaded\n"; } use POSIX qw(locale_h); my $cur_locale = setlocale(LC_CTYPE); print "Locale is $cur_locale\n"; setlocale(LC_CTYPE, $lang); setlocale(LC_COLLATE, $lang); $cur_locale = setlocale(LC_CTYPE); print "Locale now is $cur_locale\n"; # Test collation sequence for sorting print "Sorted: ", +(sort grep /\w/, map { chr() } 0..255), "\n"; print "Unsorted: ", +(grep /\w/, map { chr() } 0..255), "\n"; # Comment out this line to test uc() and lc() with locales exit; # NOTE: ActivePerl's locale support is very broken for the UK locale above! # Weird non-alpha characters appear as if lower case or upper case of # completely different characters... # NOTE: fr_FR.ISO8859-1 on Debian includes '_' and '-' in the \w character # class - en_US includes only '_'... Same locale on 5.8 includes 'mu'. # Sorting of UTF8 characters doesn't seem to work... foreach my $charno ( 0..255 ) { my $char = chr ($charno); if ( (uc $char) eq $char and (lc $char) ne $char ) { printf "upper: %d %c\t", $charno, $charno; my $lower = lc $char; printf "lc: %d %c\n", ord $lower, ord $lower; } if ( lc($char) eq $char and uc $char ne $char ) { printf "lower: %d %c\t", $charno, $charno; my $upper = uc $char; printf "uc: %d %c\n", ord $upper, ord $upper; } }