(* i have an applescript that generates a PHP script which it writes as a string. applescript writes strings (when not unicode) as macroman encoded text, but php seems to expect windows-1252 encoding, so most non-ascii characters like ö, é and ® are interpreted incorrectly. i created this routine to write the characters as windows-1252. when converted, written, then opened on a mac, these characters won't look right (unless you specifically open as windows-1252 encoded text), but php should read them correctly. note: some characters in macroman do not have equivalents in Western (Windows Latin 1), so they will be replaced by character 149 which is the solid bullet character (•). if a different character is desired, replace 149 with the desired decimal character code in the "theConverts" list below, e.g. 20 for space *)
set theText to "Testing for Jörg! and Crü? Äll Çlear. ¡go! ©1998 ™Kraft 44°F •99 π=3.1415927"
set theText to text returned of (display dialog "re-encode what" default answer theText)
set theNewText to my convertText(theText)
display dialog theNewText default answer theNewText
on convertText(theText)
--convert to windows encoding for php
set theConverts to {196, 197, 199, 201, 209, 214, 220, 225, 224, 226, 228, 227, 229, 231, 233, 232, 234, 235, 237, 236, 238, 239, 241, 243, 242, 244, 246, 245, 250, 249, 251, 252, 134, 176, 162, 163, 167, 149, 182, 223, 174, 169, 153, 180, 168, 149, 198, 216, 149, 177, 149, 149, 165, 181, 149, 149, 149, 149, 149, 170, 186, 149, 230, 248, 191, 161}
set theTextX to ""
repeat with eachLetter in theText
set eachNum to ASCII number of eachLetter
if eachNum > 127 and eachNum < 194 then
set newNum to item (eachNum - 127) of theConverts
else
set newNum to eachNum
end if
set theTextX to theTextX & (ASCII character newNum)
end repeat
return theTextX
end convertText
-- applescript page
(*
| html entity | macroman decimal | windows-1252 decimal |
| Ä | 128 | 196 |
| Å | 129 | 197 |
| Ç | 130 | 199 |
| É | 131 | 201 |
| Ñ | 132 | 209 |
| Ö | 133 | 214 |
| Ü | 134 | 220 |
| á | 135 | 225 |
| à | 136 | 224 |
| â | 137 | 226 |
| ä | 138 | 228 |
| ã | 139 | 227 |
| å | 140 | 229 |
| ç | 141 | 231 |
| é | 142 | 233 |
| è | 143 | 232 |
| ê | 144 | 234 |
| ë | 145 | 235 |
| í | 146 | 237 |
| ì | 147 | 236 |
| î | 148 | 238 |
| ï | 149 | 239 |
| ñ | 150 | 241 |
| ó | 151 | 243 |
| ò | 152 | 242 |
| ô | 153 | 244 |
| ö | 154 | 246 |
| õ | 155 | 245 |
| ú | 156 | 250 |
| ù | 157 | 249 |
| û | 158 | 251 |
| ü | 159 | 252 |
| † | 160 | 134 |
| ° | 161 | 176 |
| ¢ | 162 | 162 |
| £ | 163 | 163 |
| § | 164 | 167 |
| • | 165 | 149 |
| ¶ | 166 | 182 |
| ß | 167 | 223 |
| ® | 168 | 174 |
| © | 169 | 169 |
| ™ | 170 | 153 |
| ´ | 171 | 180 |
| ¨ | 172 | 168 |
| ≠ | 173 | none |
| Æ | 174 | 198 |
| Ø | 175 | 216 |
| ∞ | 176 | none |
| ± | 177 | 177 |
| ≤ | 178 | none |
| ≥ | 179 | none |
| ¥ | 180 | 165 |
| µ | 181 | 181 |
| ∂ | 182 | none |
| ∑ | 183 | none |
| ∏ | 184 | none |
| π | 185 | none |
| ∫ | 186 | none |
| ª | 187 | 170 |
| º | 188 | 186 |
| Ω | 189 | none |
| æ | 190 | 230 |
| ø | 191 | 248 |
| ¿ | 192 | 191 |
| ¡ | 193 | 161 |
*)