(* i have an applescript that generates a PHP script which it writes as a string. applescript writes strings (when not unicode) as macroman encoded text, but php seems to expect windows-1252 encoding, so most non-ascii characters like ö, é and ® are interpreted incorrectly. i created this routine to write the characters as windows-1252. when converted, written, then opened on a mac, these characters won't look right (unless you specifically open as windows-1252 encoded text), but php should read them correctly. note: some characters in macroman do not have equivalents in Western (Windows Latin 1), so they will be replaced by character 149 which is the solid bullet character (•). if a different character is desired, replace 149 with the desired decimal character code in the "theConverts" list below, e.g. 20 for space *)
set theText to "Testing for Jörg! and Crü? Äll Çlear. ¡go! ©1998 ™Kraft 44°F •99 π=3.1415927"
set theText to text returned of (display dialog "re-encode what" default answer theText)
set theNewText to my convertText(theText)
display dialog theNewText default answer theNewText
on convertText(theText)
--convert to windows encoding for php
set theConverts to {196, 197, 199, 201, 209, 214, 220, 225, 224, 226, 228, 227, 229, 231, 233, 232, 234, 235, 237, 236, 238, 239, 241, 243, 242, 244, 246, 245, 250, 249, 251, 252, 134, 176, 162, 163, 167, 149, 182, 223, 174, 169, 153, 180, 168, 149, 198, 216, 149, 177, 149, 149, 165, 181, 149, 149, 149, 149, 149, 170, 186, 149, 230, 248, 191, 161}
set theTextX to ""
repeat with eachLetter in theText
set eachNum to ASCII number of eachLetter
if eachNum > 127 and eachNum < 194 then
set newNum to item (eachNum - 127) of theConverts
else
set newNum to eachNum
end if
set theTextX to theTextX & (ASCII character newNum)
end repeat
return theTextX
end convertText
-- applescript page
(*
html entity | macroman decimal | windows-1252 decimal |
Ä | 128 | 196 |
Å | 129 | 197 |
Ç | 130 | 199 |
É | 131 | 201 |
Ñ | 132 | 209 |
Ö | 133 | 214 |
Ü | 134 | 220 |
á | 135 | 225 |
à | 136 | 224 |
â | 137 | 226 |
ä | 138 | 228 |
ã | 139 | 227 |
å | 140 | 229 |
ç | 141 | 231 |
é | 142 | 233 |
è | 143 | 232 |
ê | 144 | 234 |
ë | 145 | 235 |
í | 146 | 237 |
ì | 147 | 236 |
î | 148 | 238 |
ï | 149 | 239 |
ñ | 150 | 241 |
ó | 151 | 243 |
ò | 152 | 242 |
ô | 153 | 244 |
ö | 154 | 246 |
õ | 155 | 245 |
ú | 156 | 250 |
ù | 157 | 249 |
û | 158 | 251 |
ü | 159 | 252 |
† | 160 | 134 |
° | 161 | 176 |
¢ | 162 | 162 |
£ | 163 | 163 |
§ | 164 | 167 |
• | 165 | 149 |
¶ | 166 | 182 |
ß | 167 | 223 |
® | 168 | 174 |
© | 169 | 169 |
™ | 170 | 153 |
´ | 171 | 180 |
¨ | 172 | 168 |
≠ | 173 | none |
Æ | 174 | 198 |
Ø | 175 | 216 |
∞ | 176 | none |
± | 177 | 177 |
≤ | 178 | none |
≥ | 179 | none |
¥ | 180 | 165 |
µ | 181 | 181 |
∂ | 182 | none |
∑ | 183 | none |
∏ | 184 | none |
π | 185 | none |
∫ | 186 | none |
ª | 187 | 170 |
º | 188 | 186 |
Ω | 189 | none |
æ | 190 | 230 |
ø | 191 | 248 |
¿ | 192 | 191 |
¡ | 193 | 161 |
*)