ex_beshenov wrote in perldev

uniesc.pl

Useful thing. Can you suggest/correct something?



#!/usr/bin/perl -w

use Getopt::Long;
use utf8;
use encoding utf8;
use charnames qw(:full);

GetOptions ( "format=s" => \$format );

# if ($format eq "Source" || !defined($format))
if ($format eq "Source") { # Escape as \uFFFF
	foreach my $str (<STDIN>) {
		utf8::decode($str);
		foreach (unpack("U*", $str)) {
			if ($_ > 127) {
				printf "\\u%04x", $_;
			}
			else {
				printf chr($_);
			}
		}
	}
}

else {
	if ($format eq "XML") { # Escape as &#xFFFF; (XML entities)
		foreach my $str (<STDIN>) {
			utf8::decode($str);
			foreach (unpack("U*", $str)) {
				if ($_ > 127) {
					printf "&#x%04x;", $_;
				}
				else {
					printf chr($_);
				}
			}
		}
	}

	else {
die "uniesc.pl, the Unicode Escaper
Usage:
	uniesc.pl --format XML
for escaping as &#xFFFF;
or
	uniesc.pl --format Source
for escaping as \\uFFFF

Recommended:
	cat ifile | uniesc.pl [options] > ofile
";
	}

}