I have been working with dawesdust_12 on a new PHP LfsString class to replace the old codepage and color code converter functions I posted here.
The class uses iconv for character conversion. We found that this is the fastest conversion method available and will be supported by many PHP installs.
I suppose support for other conversion libraries could be added, but maybe it's not needed.
This class hopefully does everything right. I've tested it rigorously, but who knows some bugs may still exist.
I have also benchmarked every change i made and have tried many methods of conversion, to make sure everything is as fast as can be.
Can you find more optimisations?
Here is the class. The interface describes which methods you can use :
The class uses iconv for character conversion. We found that this is the fastest conversion method available and will be supported by many PHP installs.
I suppose support for other conversion libraries could be added, but maybe it's not needed.
This class hopefully does everything right. I've tested it rigorously, but who knows some bugs may still exist.
I have also benchmarked every change i made and have tried many methods of conversion, to make sure everything is as fast as can be.
Can you find more optimisations?
Here is the class. The interface describes which methods you can use :
<?php
interface iLfsString
{
// Converts everything into a UTF-8 html string.
// Assumes the raw LFS string is UTF-8. If it isn't, you have to indicate it.
public static function convert($s, $fromCp = 'UTF-8');
// Converts everything into a UTF-8 (html) string, but strips colors.
// Assumes the raw LFS string is UTF-8. If it isn't, you have to indicate it.
public static function convertWithoutColor($s, $fromCp = 'UTF-8');
// Convert only color codes into html.
// Assumes a raw LFS string.
public static function convertColor($s);
// Strip color codes.
// Assumes a raw LFS string.
public static function stripColor($s);
// Prepare a raw LFS host name with or without color codes, for linkage.
// Assumes the raw hostname is UTF-8. If it isn't, you have to indicate it.
// Output is a cp1252 rawurlencoded string.
public static function encodeLfsUrl($s, $fromCp = 'UTF-8');
}
class LfsString implements iLfsString
{
public static function convert($s, $fromCp = 'UTF-8')
{
// Conversion only works if the raw string is in CP1252
if ($fromCp != 'CP1252')
$s = iconv($fromCp, 'CP1252', $s);
return str_replace('^', '^',
self::writeColor(
self::convertLfsSpecialChars(
self::codepageConvert(
str_replace('^^', '^', $s)))));
}
public static function convertWithoutColor($s, $fromCp = 'UTF-8')
{
// Conversion only works if the raw string is in CP1252
if ($fromCp != 'CP1252')
$s = iconv($fromCp, 'CP1252', $s);
return str_replace('^', '^',
self::unWriteColor(
self::convertLfsSpecialChars(
self::codepageConvert(
str_replace('^^', '^', $s)))));
}
public static function convertColor($s)
{
return str_replace('^', '^^',
self::writeColor(
str_replace('^^', '^', $s)));
}
public static function stripColor($s)
{
return str_replace('^', '^^',
self::unWriteColor(
str_replace('^^', '^', $s)));
}
public static function encodeLfsUrl($s, $fromCp = 'UTF-8')
{
// LFS expects the host name in cp1252
if ($fromCp != 'CP1252')
$s = iconv($fromCp, 'CP1252', $s);
return rawurlencode($s);
}
// Private parts
private static $colorCodes = array(
'000', // 0
'F00', // 1
'0F0', // 2
'FF0', // 3
'00F', // 4
'F0F', // 5
'0FF', // 6
'FFF' // 7
);
private static $codepages = array(
'L' => 'CP1252', // Latin 1
'G' => 'CP1253', // Greek
'C' => 'CP1251', // Cyrillic
'E' => 'CP1250', // Central Europe
'T' => 'CP1254', // Turkish
'B' => 'CP1257', // Baltic
'J' => 'CP932', // Japanese
'S' => 'CP936', // Simplified Chinese
'K' => 'CP949', // Korean
'H' => 'CP950' // Traditional Chinese
);
private static $specialPtrn = array('&', '<', '>', '^h', '^d', '^s', '^c', '^a', '^q', '^t', '^l', '^r', '^v');
private static $specialRepl = array('&', '^l', '^r', '#' , '\\', '/' , ':' , '*' , '?' , '"' , '<' , '>' , '|');
private static $specialReplHtml = array('&', '^l', '^r', '#', '\\', '/', ':', '*', '?' , '"', '<', '>', '|');
private static $found = false; // static temp var
private static function writeColor($s)
{
self::$found = false;
$replaced = preg_replace_callback(
'#\^([0-9])#',
function (array $matches)
{
if ($matches[1] < 8)
{
$return = (self::$found ? '</span>' : '').'<span style="color:#'.self::$colorCodes[$matches[1]].';">';
self::$found = true;
}
else
{
$return = self::$found ? '</span>' : '';
self::$found = false;
}
return $return;
},
$s);
return self::$found ? $replaced.'</span>' : $replaced;
}
private static function unWriteColor($s)
{
return preg_replace('#\^[0-9]#', '', $s);
}
private static function codepageConvert($s)
{
$parts = preg_split('#\^([LGCETBJSKH])#', $s, -1, PREG_SPLIT_DELIM_CAPTURE);
array_unshift($parts, 'L');
$p = 0;
$s = '';
while (isset($parts[$p]))
{
$s .= iconv(self::$codepages[$parts[$p]], 'UTF-8', $parts[$p+1]);
$p += 2;
}
return $s;
}
private static function convertLfsSpecialChars($s)
{
return isset($_SERVER['SERVER_NAME']) ?
str_replace(self::$specialPtrn, self::$specialReplHtml, $s) :
str_replace(self::$specialPtrn, self::$specialRepl, $s);
}
}
?>