Talk:Windows code page
Way too big?
This page should link to the individual code pages instead of listing all of the tables at once (for one thing it is missing most of the mappings anyway, like for 932).
Generator
the generator code is given below, readtxt.pas can be obtained from http://bewareserv.sourceforge.net/ most of the data used came from http://www.unicode.org/PUBLIC/mappings the .mspx.html files used where unicode.org didn't have a mapping availible for the code page in question are from http://www.microsoft.com/globaldev/reference/oem.mspx
program byposition; uses sysutils,readtxt; //we use our own text reader as the delphi one can't handle //unix format text const maxcharset =12;//9; startat = $80; {$define showallchars} {$define breakbeforecodepoint} var buildarray : array[0..255,0..maxcharset] of longint; names : array[0..31] of string; procedure processcharset(name:string;number:byte;filename:string); var t: treadtext; line:string; i,j:integer; begin names[number] := name; readtext_init(t,filename); repeat line := readtext_line(t); if (length(line)>=11) and (line[2]='x') and (line[7]='x') then begin; //writeln('processing line '+line); //unicode.org format buildarray[strtoint('$'+copy(line,3,2)),number] := strtoint('$'+copy(line,8,4)); end else if (length(line)>=11) and (copy(line,3,5)=' = U+') then begin; //ms format buildarray[strtoint('$'+copy(line,1,2)),number] := strtoint('$'+copy(line,8,4)); end; until readtext_eof(t); end; var t: textfile; i,j,k : integer; firstline : boolean; goodline : boolean; rowcounter : integer; comparevalue : integer; begin for i := 0 to 255 do for j := 0 to maxcharset do buildarray[i,j] := -1; {processcharset('windows-874|874',0,'CP874.txt'); processcharset('windows-1250|1250',1,'CP1250.txt'); processcharset('windows-1251|1251',2,'CP1251.txt'); processcharset('windows-1252|1252',3,'CP1252.txt'); processcharset('windows-1253|1253',4,'CP1253.txt'); processcharset('windows-1254|1254',5,'CP1254.txt'); processcharset('windows-1255|1255',6,'CP1255.txt'); processcharset('windows-1256|1256',7,'CP1256.txt'); processcharset('windows-1257|1257',8,'CP1257.txt'); processcharset('windows-1258|1258',9,'CP1258.txt');} processcharset('code page 437|437',0,'CP437.txt'); processcharset('code page 720|720',1,'720.mspx.html'); processcharset('code page 737|737',2,'CP737.txt'); processcharset('code page 775|775',3,'CP775.txt'); processcharset('code page 850|850',4,'CP850.txt'); processcharset('code page 852|852',5,'CP852.txt'); processcharset('code page 855|855',6,'CP855.txt'); processcharset('code page 857|857',7,'CP857.txt'); processcharset('code page 858|858',8,'858.mspx.html'); processcharset('code page 862|862',9,'CP862.txt'); processcharset('code page 866|866',10,'CP866.txt'); processcharset('windows-874|874',11,'CP874.txt'); processcharset('windows-1258|1258',12,'CP1250.txt'); assignfile(t,'output.txt'); rewrite(t); writeln(t,'<table {{prettytable}}>'); firstline := true; rowcounter := 0; for i := startat to 255 do begin goodline := false; comparevalue := buildarray[i,0]; {$ifdef showallchars} goodline := true; {$else} for j := 1 to maxcharset do begin if comparevalue <> buildarray[i,j] then goodline := true; end; {$endif} if goodline then begin if (rowcounter and ($1F shr(0{$ifndef twocol}+1{$endif} {$ifdef breakbeforecodepoint}+1{$endif} ))) = 0 then begin write(t,'<tr>'); {$ifdef twocol}for j := 1 to 2 do{$endif} begin write(t,'<td>position<br>([[hexadecimal|hex]])'); for k := 0 to maxcharset do begin; write(t,'<td>[['+names[k]+']]'); end; end; end; {$ifdef twocol}if (rowcounter and 1) =0 then{$endif} write(t,'<tr>'); write(t,'<td>'+inttohex(i,2)); inc(rowcounter); //if firstline then begin // firstline := false; // write(t,'<td>{{uplusfirst}}'+inttohex(i,4)); //end else begin //end; for j := 0 to maxcharset do begin write(t,'<td>'); case buildarray[i,j] of -1 : ; $00 : write(t,'[[NUL]]'); $01 : write(t,'[[SOH]]'); $02 : write(t,'[[STX]]'); $03 : write(t,'[[ETX]]'); $04 : write(t,'[[EOT]]'); $05 : write(t,'[[ENQ]]'); $06 : write(t,'[[ACK]]'); $07 : write(t,'[[BEL]]'); $08 : write(t,'[[BS]]'); $09 : write(t,'[[TAB]]'); $0A : write(t,'[[LF]]'); $0B : write(t,'[[VT]]'); $0C : write(t,'[[FF]]'); $0D : write(t,'[[CR]]'); $0E : write(t,'[[SO]]'); $0F : write(t,'[[SI]]'); $10 : write(t,'[[DLE]]'); $11 : write(t,'[[DC1]]'); $12 : write(t,'[[DC2]]'); $13 : write(t,'[[DC3]]'); $14 : write(t,'[[DC4]]'); $15 : write(t,'[[NAK]]'); $16 : write(t,'[[SYN]]'); $17 : write(t,'[[ETB]]'); $18 : write(t,'[[CAN]]'); $19 : write(t,'[[EM]]'); $1A : write(t,'[[SUB]]'); $1B : write(t,'[[ESC]]'); $1C : write(t,'[[FS]]'); $1D : write(t,'[[GS]]'); $1E : write(t,'[[RS]]'); $1F : write(t,'[[US]]'); $80 : write(t,'[[PAD]]'); $81 : write(t,'[[HOP]]'); $82 : write(t,'[[BPH]]'); $83 : write(t,'[[NBH]]'); $84 : write(t,'[[IND]]'); $85 : write(t,'[[NEL]]'); $86 : write(t,'[[SSA]]'); $87 : write(t,'[[ESA]]'); $88 : write(t,'[[HTS]]'); $89 : write(t,'[[HTJ]]'); $8A : write(t,'[[VTS]]'); $8B : write(t,'[[PLD]]'); $8C : write(t,'[[PLU]]'); $8D : write(t,'[[RI]]'); $8E : write(t,'[[SS2]]'); $8F : write(t,'[[SS3]]'); $90 : write(t,'[[DCS]]'); $91 : write(t,'[[PU1]]'); $92 : write(t,'[[PU2]]'); $93 : write(t,'[[STS]]'); $94 : write(t,'[[CCH]]'); $95 : write(t,'[[MW]]'); $96 : write(t,'[[SPA]]'); $97 : write(t,'[[EPA]]'); $98 : write(t,'[[SOS]]'); $99 : write(t,'[[SGCI]]'); $9A : write(t,'[[SCI]]'); $9B : write(t,'[[CSI]]'); $9C : write(t,'[[ST]]'); $9D : write(t,'[[OSC]]'); $9E : write(t,'[[PM]]'); $9F : write(t,'[[APC]]'); $A0 : write(t,'[[NBSP]]'); $AD : write(t,'[[SHY]]'); else write(t,'[[&#x'+inttohex(buildarray[i,j],4)+';]]'); end; {$ifdef breakbeforecodepoint} if buildarray[i,j] >=0 then write(t,'<br><small>U+'+inttohex(buildarray[i,j],4)+'</small>'); {$else} if buildarray[i,j] >=0 then write(t,'<sub>U+'+inttohex(buildarray[i,j],4)+'</sub>'); {$endif} end; writeln(t,'</td>'); end; end; writeln(t,'</table>'); closefile(t); //for counter := 0 to 65535 do begin; end.
ANSI or not?
Once and for all, is it correct to say "ANSI" to the Windows code pages? Currently, some pages on wikipedia say it's wrong (as ANSI never defined these code pages, but Microsoft just says "ANSI" to it anyway), while this article makes the impression that it is ok. --Abdull 23:53, 17 March 2006 (UTC)
- Well microsofts technical documents use that term all over the place and i don't belive anyone uses the term ansi code page for anything else. I can't imagine ANSI are particularlly happy about having thier name put to something that isn't thiers though. I guess it all depends on how you define right and wrong ;) Plugwash 10:44, 18 March 2006 (UTC)
- Microsoft is now leaning away from "ansi" and uses "active" instead to describe the current code page.
Redundant comment
"Recent Microsoft products and APIs use Unicode internally, but many applications and APIs (including Java) continue to…"
- does the Java comment seem relevant? I mean, there are a million and one applications that use the older methods, shouldn't we name them here too ?
Removal of chart
I have removed the charts of the code pages from this article, and put all the information that was left in a miscellaneous section. I am using the latest version of the screen reader JAWS on a fairly fast computer and JAWS froze for thirty seconds when I entered this page. I almost couldn't edit the section with the chart to remove it. I can understand that happening at somewhere like wikipedia:articles for deletion/yesterday or wikipedia:requests for adminship when it is busy, but JAWS should never freeze for more than a few seconds when I enter an article. Not even the article United States is that taxing on JAWS resources. The charts for code pages are available elsewhere on the Internet in an uneditable form. Graham87 08:29, 21 June 2007 (UTC)