Home
The Toolkit for Online Communities
17119 Community Members, 0 members online, 2060 visitors today
Log In Register
OpenACS Home : Forums : OpenACS Q&A : Conversion from half-width to full-width Japanese kana characters

Forum OpenACS Q&A: Conversion from half-width to full-width Japanese kana characters

Icon of envelope Request notifications

This code will convert a string which contains half-width Japanese kana characters into full width chars, leaving any other characters untouched. It is based on the algorithm in Java from the CJKV book by Lunde.
ad_page_contract {

} {
    {kana:optional ""}
}




set FWKatakana "u3002u300cu300du3001u30fb"
append FWKatakana "u30f2u30a1u30a3u30a5u30a7"
append FWKatakana "u30a9u30e3u30e5u30e7u30c3"
append FWKatakana "u30fcu30a2u30a4u30a6u30a8"
append FWKatakana "u30aau30abu30adu30afu30b1"
append FWKatakana "u30b3u30b5u30b7u30b9u30bb"
append FWKatakana "u30bdu30bfu30c1u30c4u30c6"
append FWKatakana "u30c8u30cau30cbu30ccu30cd"
append FWKatakana "u30ceu30cfu30d2u30d5u30d8"
append FWKatakana "u30dbu30deu30dfu30e0u30e1"
append FWKatakana "u30e2u30e4u30e6u30e8u30e9"
append FWKatakana "u30eau30ebu30ecu30edu30ef"
append FWKatakana "u30f3u309bu309c"

nsv_set kana_conversions half_to_full $FWKatakana

proc half_to_full_width_katakana {str} {

    set FWKatakana [nsv_get kana_conversions "half_to_full"]
    set i 0
    set fw ""
    set strlen [string length $str]
    while {$i < $strlen} {
	# Read the next two string chars as integer values
	scan [string range $str $i [expr $i + 1]] "%c%c" ix ix1

	if {$ix >= 0xff61 && $ix <= 0xff9f} {
	    if {[expr $i + 1] >= $strlen} {
		append fw [string index $FWKatakana [expr $ix - 
0xff61]]
		incr i
	    } else {
		if {$ix1 == 0xff9e || $ix1 == 0x3099 || $ix1 == 
0x309b} {
		    if { $ix == 0xff73 } {
			append fw "u30f4"
			incr i 2
		    } elseif { ($ix >= 0xff76 && $ix <= 
0xff84 ) || ($ix >= 0xff8a && $ix <= 0xff8e)} {
			# get the mapped char as an integer
			scan [string index $FWKatakana [expr $ix - 
0xff61]] "%c" z
			# add 2 to it, handles those extra little 
marks (the little
			# dots that make "ki" into "gi", etc)
			append fw [format "%c" [expr $z + 1]]
			incr i 2
		    } else {
			append fw [string index $FWKatakana [expr 
$ix - 0xff61]]
			incr i
		    }
		} elseif {$ix1 == 0xff9f || $ix1 == 0x309a || $ix1 == 
0x309c } {
		    if { $ix >= 0xff8a && $ix1 <= 
0xff83 } {
			scan [string index $FWKatakana [expr $ix - 
0xff61]] "%c" z
			append fw [format "%c" [expr $z + 2]]
			incr i 2
		    } else {
		    append fw [string index $FWKatakana [expr $ix - 
0xff61]]
		    incr i
		    }
		} else {
		    append fw [string index $FWKatakana [expr $ix - 
0xff61]]
		    incr i
		}
	    } 
                         
	} else {
	    append fw [format "%c" $ix]
	    incr i
	}
    }
    return $fw
}




append page "<title>half to full width kana</title>
<h4>half to full width kana</h4>

kana = $kana
<br>to-full => [half_to_full_width_katakana $kana]
<br>
<p>


<form action=hankaku.tcl method=get>
<input type=text name=kana value="$kana">
<br>
<input type=submit>
</form>
"

ns_return 200 text/html $page
The backslashes didn't make it through that posting ... all unicode
chars are escaped as uxxxx
Henry,
Arigatou for improving Japanese version.
In case I forgot to mention it, you need to set the LANG=C for the
environment before you do an initdb, in order to get Postgres
to use binary sort order on strings. Otherwise, sorting
Japanese kana strings will not give you useful results. Using
binary order on Unicode more or less works for kana.
Henry, Great to hear from you.
Yesterday, at Haruki's place, Rolf taught us Login to my PC, which you installed OpenACS.