Jump to content

Talk:Windows code page

Page contents not supported in other languages.
From Wikipedia, the free encyclopedia
This is an old revision of this page, as edited by 131.107.0.73 (talk) at 20:55, 18 May 2007. The present address (URL) is a permanent link to this revision, which may differ significantly from the current revision.

Way too big?

This page should link to the individual code pages instead of listing all of the tables at once (for one thing it is missing most of the mappings anyway, like for 932).

Generator

the generator code is given below, readtxt.pas can be obtained from http://bewareserv.sourceforge.net/ most of the data used came from http://www.unicode.org/PUBLIC/mappings the .mspx.html files used where unicode.org didn't have a mapping availible for the code page in question are from http://www.microsoft.com/globaldev/reference/oem.mspx

program byposition;

uses
  sysutils,readtxt; //we use our own text reader as the delphi one can't handle
                    //unix format text
const
  maxcharset =12;//9;
  startat = $80;
{$define showallchars}
{$define breakbeforecodepoint}
var
  buildarray : array[0..255,0..maxcharset] of longint;
  names : array[0..31] of string;
procedure processcharset(name:string;number:byte;filename:string);
var
  t: treadtext;
  line:string;
  i,j:integer;
begin
  names[number] := name;

  readtext_init(t,filename);
  repeat
    line := readtext_line(t);
    if (length(line)>=11) and (line[2]='x') and (line[7]='x') then begin;
      //writeln('processing line '+line);
      //unicode.org format
      buildarray[strtoint('$'+copy(line,3,2)),number] :=  strtoint('$'+copy(line,8,4));
    end else if (length(line)>=11) and (copy(line,3,5)=' = U+') then begin;
      //ms format
      buildarray[strtoint('$'+copy(line,1,2)),number] :=  strtoint('$'+copy(line,8,4));
    end;
  until readtext_eof(t);
end;
var
  t: textfile;
  i,j,k : integer;
  firstline : boolean;
  goodline : boolean;
  rowcounter : integer;
  comparevalue : integer;
begin
  for i := 0 to 255 do for j := 0 to maxcharset do buildarray[i,j] := -1;

  {processcharset('windows-874|874',0,'CP874.txt');
  processcharset('windows-1250|1250',1,'CP1250.txt');
  processcharset('windows-1251|1251',2,'CP1251.txt');
  processcharset('windows-1252|1252',3,'CP1252.txt');
  processcharset('windows-1253|1253',4,'CP1253.txt');
  processcharset('windows-1254|1254',5,'CP1254.txt');
  processcharset('windows-1255|1255',6,'CP1255.txt');
  processcharset('windows-1256|1256',7,'CP1256.txt');
  processcharset('windows-1257|1257',8,'CP1257.txt');
  processcharset('windows-1258|1258',9,'CP1258.txt');}

  processcharset('code page 437|437',0,'CP437.txt');
  processcharset('code page 720|720',1,'720.mspx.html');
  processcharset('code page 737|737',2,'CP737.txt');
  processcharset('code page 775|775',3,'CP775.txt');
  processcharset('code page 850|850',4,'CP850.txt');
  processcharset('code page 852|852',5,'CP852.txt');
  processcharset('code page 855|855',6,'CP855.txt');
  processcharset('code page 857|857',7,'CP857.txt');
  processcharset('code page 858|858',8,'858.mspx.html');
  processcharset('code page 862|862',9,'CP862.txt');
  processcharset('code page 866|866',10,'CP866.txt');
  processcharset('windows-874|874',11,'CP874.txt');
  processcharset('windows-1258|1258',12,'CP1250.txt');

  assignfile(t,'output.txt');
  rewrite(t);
  writeln(t,'<table {{prettytable}}>');


  firstline := true;
  rowcounter := 0;
  for i := startat to 255 do begin
    goodline := false;
    comparevalue := buildarray[i,0];
    {$ifdef showallchars}
      goodline := true;
    {$else}
      for j := 1 to maxcharset do begin
        if comparevalue <> buildarray[i,j] then goodline := true;
      end;
    {$endif}
    if goodline then begin
      if (rowcounter and ($1F shr(0{$ifndef twocol}+1{$endif} {$ifdef breakbeforecodepoint}+1{$endif} ))) = 0 then begin
        write(t,'<tr>');
        {$ifdef twocol}for j := 1 to 2 do{$endif} begin

          write(t,'<td>position<br>([[hexadecimal|hex]])');
          for k := 0 to maxcharset do begin;
            write(t,'<td>[['+names[k]+']]');
          end;
        end;
      end;
      {$ifdef twocol}if (rowcounter and 1) =0 then{$endif} write(t,'<tr>');
        write(t,'<td>'+inttohex(i,2));
      inc(rowcounter);
      //if firstline then begin
      //  firstline := false;
      //  write(t,'<td>{{uplusfirst}}'+inttohex(i,4));
      //end else begin

      //end;




      for j := 0 to maxcharset do begin
        write(t,'<td>');
        case buildarray[i,j] of
          -1  : ;
          $00 : write(t,'[[NUL]]');
          $01 : write(t,'[[SOH]]');
          $02 : write(t,'[[STX]]');
          $03 : write(t,'[[ETX]]');
          $04 : write(t,'[[EOT]]');
          $05 : write(t,'[[ENQ]]');
          $06 : write(t,'[[ACK]]');
          $07 : write(t,'[[BEL]]');
          $08 : write(t,'[[BS]]');
          $09 : write(t,'[[TAB]]');
          $0A : write(t,'[[LF]]');
          $0B : write(t,'[[VT]]');
          $0C : write(t,'[[FF]]');
          $0D : write(t,'[[CR]]');
          $0E : write(t,'[[SO]]');
          $0F : write(t,'[[SI]]');

          $10 : write(t,'[[DLE]]');
          $11 : write(t,'[[DC1]]');
          $12 : write(t,'[[DC2]]');
          $13 : write(t,'[[DC3]]');
          $14 : write(t,'[[DC4]]');
          $15 : write(t,'[[NAK]]');
          $16 : write(t,'[[SYN]]');
          $17 : write(t,'[[ETB]]');
          $18 : write(t,'[[CAN]]');
          $19 : write(t,'[[EM]]');
          $1A : write(t,'[[SUB]]');
          $1B : write(t,'[[ESC]]');
          $1C : write(t,'[[FS]]');
          $1D : write(t,'[[GS]]');
          $1E : write(t,'[[RS]]');
          $1F : write(t,'[[US]]');

          $80 : write(t,'[[PAD]]');
          $81 : write(t,'[[HOP]]');
          $82 : write(t,'[[BPH]]');
          $83 : write(t,'[[NBH]]');
          $84 : write(t,'[[IND]]');
          $85 : write(t,'[[NEL]]');
          $86 : write(t,'[[SSA]]');
          $87 : write(t,'[[ESA]]');
          $88 : write(t,'[[HTS]]');
          $89 : write(t,'[[HTJ]]');
          $8A : write(t,'[[VTS]]');
          $8B : write(t,'[[PLD]]');
          $8C : write(t,'[[PLU]]');
          $8D : write(t,'[[RI]]');
          $8E : write(t,'[[SS2]]');
          $8F : write(t,'[[SS3]]');

          $90 : write(t,'[[DCS]]');
          $91 : write(t,'[[PU1]]');
          $92 : write(t,'[[PU2]]');
          $93 : write(t,'[[STS]]');
          $94 : write(t,'[[CCH]]');
          $95 : write(t,'[[MW]]');
          $96 : write(t,'[[SPA]]');
          $97 : write(t,'[[EPA]]');
          $98 : write(t,'[[SOS]]');
          $99 : write(t,'[[SGCI]]');
          $9A : write(t,'[[SCI]]');
          $9B : write(t,'[[CSI]]');
          $9C : write(t,'[[ST]]');
          $9D : write(t,'[[OSC]]');
          $9E : write(t,'[[PM]]');
          $9F : write(t,'[[APC]]');

          $A0 : write(t,'[[NBSP]]');
          $AD : write(t,'[[SHY]]');


          else write(t,'[[&#x'+inttohex(buildarray[i,j],4)+';]]');
        end;
        {$ifdef breakbeforecodepoint}
          if buildarray[i,j] >=0 then write(t,'<br><small>U+'+inttohex(buildarray[i,j],4)+'</small>');
        {$else}
          if buildarray[i,j] >=0 then write(t,'<sub>U+'+inttohex(buildarray[i,j],4)+'</sub>');
        {$endif}
      end;
      writeln(t,'</td>');


    end;
  end;
  writeln(t,'</table>');
  closefile(t);
  //for counter := 0 to 65535 do begin;

end.

ANSI or not?

Once and for all, is it correct to say "ANSI" to the Windows code pages? Currently, some pages on wikipedia say it's wrong (as ANSI never defined these code pages, but Microsoft just says "ANSI" to it anyway), while this article makes the impression that it is ok. --Abdull 23:53, 17 March 2006 (UTC)[reply]

Well microsofts technical documents use that term all over the place and i don't belive anyone uses the term ansi code page for anything else. I can't imagine ANSI are particularlly happy about having thier name put to something that isn't thiers though. I guess it all depends on how you define right and wrong ;) Plugwash 10:44, 18 March 2006 (UTC)[reply]
Microsoft is now leaning away from "ansi" and uses "active" instead to describe the current code page.

Redundant comment

"Recent Microsoft products and APIs use Unicode internally, but many applications and APIs (including Java) continue to…"

does the Java comment seem relevant? I mean, there are a million and one applications that use the older methods, shouldn't we name them here too ?