Template:US Demographics/perlscript

This template makes a table of US state demographics in the form specified by Template:US DemogTable. The template was constructed by the following script, which extracts numbers from US census data files from http://www.census.gov/popest/states/asrh/tables/
To generate a table for a specified state
{{US Demographics|state=<state>}}
To generate a table for the state based on the pagename
{{US Demographics}}
These tables use the "single or in combination" numbers, so some rows sum to more than 100%.
This uses CSVsplit from the module from: http://www.rath.ca/Misc/Perl_CSV/
Please click on EDIT to grab a copy of this script



# HOW TO USE THIS SCRIPT:

# You will need a copy of Perl on your PC. You can
# download a copy at http://www.activestate.com/
# and install it in about ten minutes. 

# This script is indented one space in order to display 
# properly at Wikipedia. You'll need to outdent that space
# in order to make the perl here-docs function properly. 

# Download all the CSV files. They will be located at
# http://www.census.gov/popest/states/asrh/tables/
# with filenames like SC-EST2005-03-42.csv
# The EST2005-03 indicates which file series is being used.
# The 42 indicates the state being referenced.

# File formats are going to change. The "7" in year 2000 
# get_data calls indicates that the desired data is in
# column 7.  When the 2006 estimates come out, it'll probably 
# shove everything over, so you would changes that to 8.

# You'll also need to change the value of SC-EST2005- to whatever
# the new filename is.

# Put all the CSV files in the same directory with this script,
# and execute this script. It will generate a file named
# template.txt, which is the data which needs to be inserted
# in Template:US Demographics

# It's a quick-and-dirty script, which is what Perl was
# intended for. You will want to check your data, to make
# sure you're grabbing the right numbers from the file.
# Otherwise, you're going to get some mighty strange tables.

# This script is NOT subject to the GNU Free Documentation
# License, as it expressly prohibits alteration, and this
# script will be useless if NOT altered.

# Script copyright by user ClairSamoht, August 2006.
# You may download it, modify it, and execute it ONLY for
# maintaining Wikipedia's Template:US Demographics
# You may NOT republish or redistribute this script, or
# use it for any purpose other than intended.
# But then, it's an awfully simple script. If you're
# smart enough to figure out how to modify it for other
# purposes, you can write the script you need from scratch
# in less time than to modify this script.

# CSV module from: http://www.rath.ca/Misc/Perl_CSV/
use CSV;

sub pct
{
$inval =$_[0];
$base  =$_[1];
$inval = stripcommas ($inval);
$base = stripcommas ($base);
if ($base < 200)
        {
        return "    na ";
        }
$portion = 100 * $inval/$base;
$retval = sprintf (" %6.2f", $portion);
return $retval;
}

sub stripcommas
{
$val = $_[0];
$sc_retval = '';
@sc_parts = split (/\,/, $val);
foreach $sc_pit (@sc_parts)
        {
        $sc_retval .= $sc_pit;
        }
return $sc_retval;
}

sub get_data
{
$useline = $_[0];
$item_number = $_[1];
@parts = CSVsplit(@lines[$useline]);
### @parts = split (/\"/, @lines[$useline]);
$data = stripcommas($parts[$item_number]);
return $data;
}

sub make_table
{
foreach $val (sort keys %DATA)
        {
        $DATA{$val} = '';
        }
$DATA{'fname'} = $_[0];
open (FILE, "< $DATA{'fname'}");
@lines = <FILE>;
close (FILE);
$count = @lines;
print "$DATA{'fname'} has $count lines\n";
$DATA{'state'} = @lines[1];
@parts = split (/ for /, @lines[1]);
$DATA{'state'} = $parts[1];
@parts = split (/: /, $DATA{'state'});
$DATA{'state'} = $parts[0];

$DATA{'tot2005'}      = get_data ( 4,1);

$DATA{'white2005'}    = get_data (13,1);
$DATA{'black2005'}    = get_data (14,1);
$DATA{'AIAN2005'}     = get_data (15,1);
$DATA{'asian2005'}    = get_data (16,1);
$DATA{'NHPI2005'}     = get_data (17,1);

$DATA{'nh_white2005'} = get_data (27,1);
$DATA{'nh_black2005'} = get_data (28,1);
$DATA{'nh_AIAN2005'}  = get_data (29,1);
$DATA{'nh_asian2005'} = get_data (30,1);
$DATA{'nh_NHPI2005'}  = get_data (31,1);

$DATA{'h_white2005'}  = get_data (41,1);
$DATA{'h_black2005'}  = get_data (42,1);
$DATA{'h_AIAN2005'}   = get_data (43,1);
$DATA{'h_asian2005'}  = get_data (44,1);
$DATA{'h_NHPI2005'}   = get_data (45,1);

$basiscol = 7;

$DATA{'tot2000'}      = get_data ( 4,$basiscol);

$DATA{'white2000'}    = get_data (13,$basiscol);
$DATA{'black2000'}    = get_data (14,$basiscol);
$DATA{'AIAN2000'}     = get_data (15,$basiscol);
$DATA{'asian2000'}    = get_data (16,$basiscol);
$DATA{'NHPI2000'}     = get_data (17,$basiscol);

$DATA{'nh_white2000'} = get_data (27,$basiscol);
$DATA{'nh_black2000'} = get_data (28,$basiscol);
$DATA{'nh_AIAN2000'}  = get_data (29,$basiscol);
$DATA{'nh_asian2000'} = get_data (30,$basiscol);
$DATA{'nh_NHPI2000'}  = get_data (31,$basiscol);

$DATA{'h_white2000'}  = get_data (41,$basiscol);
$DATA{'h_black2000'}  = get_data (42,$basiscol);
$DATA{'h_AIAN2000'}   = get_data (43,$basiscol);
$DATA{'h_asian2000'}  = get_data (44,$basiscol);
$DATA{'h_NHPI2000'}   = get_data (45,$basiscol);

#to examine data, and make sure it matches numbers 
#in spreadsheet version of file, change 0 to 1
if (0) 
{
print "state       SDATA{'state'}        \n";
print "fname       $DATA{'fname'}        \n";
print "tot2005     $DATA{'tot2005'}      \n";
print "\n";
print "white2005   $DATA{'white2005'}    \n";
print "black2005   $DATA{'black2005'}    \n";
print "AIAN2005    $DATA{'AIAN2005'}     \n";
print "asian2005   $DATA{'asian2005'}    \n";
print "NHPI2005    $DATA{'NHPI2005'}     \n";
print "\n";
print "hwhite2005  $DATA{'h_white2005'}  \n";
print "hblack2005  $DATA{'h_black2005'}  \n";
print "hAIAN2005   $DATA{'h_AIAN2005'}   \n";
print "hasian2005  $DATA{'h_asian2005'}  \n";
print "hNHPI2005   $DATA{'h_NHPI2005'}   \n";
print "\n";
print "nhwhite2005 $DATA{'nh_white2005'} \n";
print "nhblack2005 $DATA{'nh_black2005'} \n";
print "nhAIAN2005  $DATA{'nh_AIAN2005'}  \n";
print "nhasian2005 $DATA{'nh_asian2005'} \n";
print "nhNHPI2005  $DATA{'nh_NHPI2005'}  \n";
print "\n";
print "tot2000     $DATA{'tot2000'}      \n";
print "\n";
print "white2000   $DATA{'white2000'}    \n";
print "black2000   $DATA{'black2000'}    \n";
print "AIAN2000    $DATA{'AIAN2000'}     \n";
print "asian2000   $DATA{'asian2000'}    \n";
print "NHPI2000    $DATA{'NHPI2000'}     \n";
print "\n";
print "hwhite2000  $DATA{'h_white2000'}  \n";
print "hblack2000  $DATA{'h_black2000'}  \n";
print "hAIAN2000   $DATA{'h_AIAN2000'}   \n";
print "hasian2000  $DATA{'h_asian2000'}  \n";
print "hNHPI2000   $DATA{'h_NHPI2000'}   \n";
print "\n";
print "nhwhite2000 $DATA{'nh_white2000'} \n";
print "nhblack2000 $DATA{'nh_black2000'} \n";
print "nhAIAN2000  $DATA{'nh_AIAN2000'}  \n";
print "nhasian2000 $DATA{'nh_asian2000'} \n";
print "nhNHPI2000  $DATA{'nh_NHPI2000'}  \n";
}

$template = <<EOHEADER;
| state = {{US DemogTable{{!}}state{{!}}fname{{!}}=
{{!}}p_white2000{{!}}p_black2000{{!}}p_AIAN2000{{!}}p_asian2000{{!}}p_NHPI2000{{!}}=
{{!}}h_white2000{{!}}h_black2000{{!}}h_AIAN2000{{!}}h_asian2000{{!}}h_NHPI2000{{!}}=
{{!}}p_white2005{{!}}p_black2005{{!}}p_AIAN2005{{!}}p_asian2005{{!}}p_NHPI2005{{!}}=
{{!}}h_white2005{{!}}h_black2005{{!}}h_AIAN2005{{!}}h_asian2005{{!}}h_NHPI2005{{!}}=
{{!}}d_white{{!}}d_black{{!}}d_AIAN{{!}}d_asian{{!}}d_NHPI{{!}}=
{{!}}x_white{{!}}x_black{{!}}x_AIAN{{!}}x_asian{{!}}x_NHPI{{!}}=
{{!}}h_white{{!}}h_black{{!}}h_AIAN{{!}}h_asian{{!}}h_NHPI}}
EOHEADER

$state = $DATA{'state'};
$fname = $DATA{'fname'};
$p_white2000 = pct ($DATA{'white2000'},    $DATA{'tot2000'});
$p_black2000 = pct ($DATA{'black2000'},    $DATA{'tot2000'});
$p_AIAN2000  = pct ($DATA{'AIAN2000'},     $DATA{'tot2000'});
$p_asian2000 = pct ($DATA{'asian2000'},    $DATA{'tot2000'});
$p_NHPI2000  = pct ($DATA{'NHPI2000'},     $DATA{'tot2000'});
$h_white2000 = pct ($DATA{'h_white2000'},  $DATA{'tot2000'});
$h_black2000 = pct ($DATA{'h_black2000'},  $DATA{'tot2000'});
$h_AIAN2000  = pct ($DATA{'h_AIAN2000'},   $DATA{'tot2000'});
$h_asian2000 = pct ($DATA{'h_asian2000'},  $DATA{'tot2000'});
$h_NHPI2000  = pct ($DATA{'h_NHPI2000'},   $DATA{'tot2000'});
$p_white2005 = pct ($DATA{'white2005'},    $DATA{'tot2005'});
$p_black2005 = pct ($DATA{'black2005'},    $DATA{'tot2005'});
$p_AIAN2005  = pct ($DATA{'AIAN2005'},     $DATA{'tot2005'});
$p_asian2005 = pct ($DATA{'asian2005'},    $DATA{'tot2005'});
$p_NHPI2005  = pct ($DATA{'NHPI2005'},     $DATA{'tot2005'});
$h_white2005 = pct ($DATA{'h_white2005'},  $DATA{'tot2005'});
$h_black2005 = pct ($DATA{'h_black2005'},  $DATA{'tot2005'});
$h_AIAN2005  = pct ($DATA{'h_AIAN2005'},   $DATA{'tot2005'});
$h_asian2005 = pct ($DATA{'h_asian2005'},  $DATA{'tot2005'});
$h_NHPI2005  = pct ($DATA{'h_NHPI2005'},   $DATA{'tot2005'});
$d_white = pct ($DATA{'white2005'} - $DATA{'white2000'},$DATA{'white2000'});
$d_black = pct ($DATA{'black2005'} - $DATA{'black2000'},$DATA{'black2000'});
$d_AIAN  = pct ($DATA{'AIAN2005'}  - $DATA{'AIAN2000'}, $DATA{'AIAN2000'} );
$d_asian = pct ($DATA{'asian2005'} - $DATA{'asian2000'},$DATA{'asian2000'});
$d_NHPI  = pct ($DATA{'NHPI2005'}  - $DATA{'NHPI2000'}, $DATA{'NHPI2000'} );
$x_white = pct ($DATA{'nh_white2005'} - $DATA{'nh_white2000'},$DATA{'nh_white2000'});
$x_black = pct ($DATA{'nh_black2005'} - $DATA{'nh_black2000'},$DATA{'nh_black2000'});
$x_AIAN  = pct ($DATA{'nh_AIAN2005'}  - $DATA{'nh_AIAN2000'}, $DATA{'nh_AIAN2000'} );
$x_asian = pct ($DATA{'nh_asian2005'} - $DATA{'nh_asian2000'},$DATA{'nh_asian2000'});
$x_NHPI  = pct ($DATA{'nh_NHPI2005'}  - $DATA{'nh_NHPI2000'}, $DATA{'nh_NHPI2000'} );
$h_white = pct ($DATA{'h_white2005'} - $DATA{'h_white2000'},$DATA{'h_white2000'});
$h_black = pct ($DATA{'h_black2005'} - $DATA{'h_black2000'},$DATA{'h_black2000'});
$h_AIAN  = pct ($DATA{'h_AIAN2005'}  - $DATA{'h_AIAN2000'}, $DATA{'h_AIAN2000'} );
$h_asian = pct ($DATA{'h_asian2005'} - $DATA{'h_asian2000'},$DATA{'h_asian2000'});
$h_NHPI  = pct ($DATA{'h_NHPI2005'}  - $DATA{'h_NHPI2000'}, $DATA{'h_NHPI2000'} );
$fname =~ s/SC-EST2005-//i;
$template =~ s/state/$state/gi;
$template =~ s/fname/$fname/gi;
$template =~ s/p_white2000/$p_white2000/gi;
$template =~ s/p_black2000/$p_black2000/gi;
$template =~ s/p_AIAN2000/$p_AIAN2000/gi;
$template =~ s/p_asian2000/$p_asian2000/gi;
$template =~ s/p_NHPI2000/$p_NHPI2000/gi;
$template =~ s/h_white2000/$h_white2000/gi;
$template =~ s/h_black2000/$h_black2000/gi;
$template =~ s/h_AIAN2000/$h_AIAN2000/gi;
$template =~ s/h_asian2000/$h_asian2000/gi;
$template =~ s/h_NHPI2000/$h_NHPI2000/gi;
$template =~ s/p_white2005/$p_white2005/gi;
$template =~ s/p_black2005/$p_black2005/gi;
$template =~ s/p_AIAN2005/$p_AIAN2005/gi;
$template =~ s/p_asian2005/$p_asian2005/gi;
$template =~ s/p_NHPI2005/$p_NHPI2005/gi;
$template =~ s/h_white2005/$h_white2005/gi;
$template =~ s/h_black2005/$h_black2005/gi;
$template =~ s/h_AIAN2005/$h_AIAN2005/gi;
$template =~ s/h_asian2005/$h_asian2005/gi;
$template =~ s/h_NHPI2005/$h_NHPI2005/gi;
$template =~ s/d_white/$d_white/gi;
$template =~ s/d_black/$d_black/gi;
$template =~ s/d_AIAN/$d_AIAN/gi;
$template =~ s/d_asian/$d_asian/gi;
$template =~ s/d_NHPI/$d_NHPI/gi;
$template =~ s/x_white/$x_white/gi;
$template =~ s/x_black/$x_black/gi;
$template =~ s/x_AIAN/$x_AIAN/gi;
$template =~ s/x_asian/$x_asian/gi;
$template =~ s/x_NHPI/$x_NHPI/gi;
$template =~ s/h_white/$h_white/gi;
$template =~ s/h_black/$h_black/gi;
$template =~ s/h_AIAN/$h_AIAN/gi;
$template =~ s/h_asian/$h_asian/gi;
$template =~ s/h_NHPI/$h_NHPI/gi;
print OFILE $template;
}

opendir(DIR, ".") or die "can't opendir $dirname: $!";
@files = readdir (DIR);
closedir (DIR);
$outfile = "template.txt";
open (OFILE, "> $outfile");
print OFILE <<TOPOFILE;
<noinclude>{{esoteric}}For documentation see [[/perlscript]]
</noinclude><includeonly>{{#switch: {{{state|{{PAGENAME}}}}}
TOPOFILE
foreach $val (@files)
        {
        if ($val =~ m/\.csv$/i)
                {
                make_table ($val);
                }
        }
print OFILE <<EOFILE;
}}</includeonly>
<noinclude>
[[Category:Templates using ParserFunctions|{{PAGENAME}}]]
[[Category:Demographics of the United States|*]]
</noinclude>
EOFILE
close (OFILE);