Jump to content

Module:ImportProtein

From Wikipedia, the free encyclopedia
This is an old revision of this page, as edited by Wnt (talk | contribs) at 18:45, 25 March 2013 (I think the 1px border throws things off ever so slightly and isn't really mattering in two examples so far. Might try again at some point.). The present address (URL) is a permanent link to this revision, which may differ significantly from the current revision.

local p = {}

function p.main (frame)
    ---- Set up the initial frame parameters
   local debuglog="|}|}"
   local args=frame.args
   local parent=frame.getParent(frame)
   local pargs={}
   if parent then pargs=parent.args end
   local height=args.height or pargs.height or "50"
   local width=args.width or pargs.width or "600"
   local background=args.background or pargs.background or "#333333"
   local vtext=args.vtext or pargs.vtext or 25 -- characters of legend text to display vertically down from motifs
   local largeonlyregion=args.largeonlyregion or pargs.largeonlyregion or 20
   local vtext=tonumber(vtext) -- no meaning except as a number
   local tableoutput=args.tableoutput or pargs.tableoutput or "collapsed" -- I've decided to use the table as the legend much of the time after all, I think.  (previously defaulted to "nil" to suppress)
   if tableoutput=="no" then tableoutput=nil end
   local vwidth=args.vwidth or pargs.vwidth or 4 -- number of PIXELS to tag as not overwriteable with vertical text.
   local vclaim={} --- if vclaim[pixel] is true, that pixel is taken
   local vprotest='' --- list of positions not shown separated by spaces
   
    ---- These parameters send text that needs to be processed into tables
    ---- include is nil to include everything.  If it exists then only what is in it is used.
    ---- format is "I want only this" (any junk) "and this" etc.  Note weakness - a stray quote would foul up the whole template.
   local include=args.include or pargs.include or "all"
   if include == "all" then include = nil end
   local tinclude={}
   if include then
      for i in mw.ustring.gmatch(include,[[%"(.-)%"]]) do
         tinclude[i]=1
      end
   end
    ---- replaceregion defines a section with too many features to note individually.
    ---- Instead you group them with a new text.
    ---- The format is xx..yy:"Use this text"
   local replaceregion=args.replaceregion or pargs.replaceregion or ""
   local treplaceregion={}
      treplaceregion.s={};treplaceregion.e={};treplaceregion.t={}
      for i,j,k in mw.ustring.gmatch(replaceregion,[[(%d+)%.%.(%d+):%"(.-)%"]]) do
          table.insert(treplaceregion.s,tonumber(i));table.insert(treplaceregion.e,tonumber(j));table.insert(treplaceregion.t,tostring(k))
      end
    ---- exclude "Forget about this" (junk between ignored).
    ---- this prevents things from showing up even in the table and all motifs of this kind
   local exclude=args.exclude or pargs.exclude or "" -- for these empty arrays will be ignored later.
   local texclude={}
   for i in mw.ustring.gmatch(exclude,[[%"(.-)%"]]) do
      texclude[i]=1
   end
    ---- usenotes "This is a crummy motif name" (junk between ignored).  Uses /note entries instead
   local usenotes=args.usenotes or pargs.usenotes or ""
   local tusenotes={}
   for i in mw.ustring.gmatch(usenotes,[[%"(.-)%"]]) do
      tusenotes[i]=1
   end
    ---- substitute "Don't like this wording":"That's what I want" (anything between these ignored)
   local substitute=args.substitute or pargs.substitute or ""
   local tsubstitute={}
   for i,j in mw.ustring.gmatch(substitute,[[%"(.-)%":%"(.-)%"]]) do
      tsubstitute[i]=j
   end
    ---- toprow "Put this motif in the top row, no vertical annotation"
    ---- If present, defines an upper part of the graphic to mark certain features by color only - most likely, helices and sheets and turns
   local toprowtext=args.toprow or pargs.toprow or ""
   local ttoprow={}
   local toprowheight=0 -- no height unless one exists
   local toprow -- boolean to mark if anything is actually on the top row
   for i in mw.ustring.gmatch(toprowtext,[[%"(.-)%"]]) do
      ttoprow[i]=1;toprow=true
   end
   if toprow then toprowheight=args.toprowheight or pargs.toprowheight or 10 end
    ---- Check there is a protein sequence file and figure out where the CDS in it starts and ends
   local file=args.file or pargs.file
   if not(file) then return "error: use 'file=some cut-and-pasted NCBI protein sequence' to input a protein to be diagrammed" end
   local cdsstart, cdsend = mw.ustring.match(file,"Protein%s-(%d+)%.%.(%d+)")
   cdsstart=tonumber(cdsstart);cdsend=tonumber(cdsend)
   if ((cdsstart<1) or (cdsend<1)) then return [[error: the module expected a line "Protein: ''start amino acid''..''end amino acid''" to define the CDS.]] end
   local cdswidth=cdsend-cdsstart
    ---- Find and replace Site and Region to create unique separators
    ---- so that every one of these sections can be individually processed in the main loop
   file = mw.ustring.gsub(file,"Site%s+","|##|S") -- there are no pipe characters in the input or it would have choked
   file = mw.ustring.gsub(file,"Region%s+","|##|R")
   file = mw.ustring.gsub(file,"$","|##|") --- close last feature at the EOF
    ---- Load a set of colors to use for the different motifs.
    ---- Any unicode separator changes them.  No format expectations.
   local colorpage=mw.title.new("Template:ImportProtein/DefaultColors")
   local content
   local color={}
   if colorpage then
       content=colorpage.getContent(colorpage)
       if content then
          for x in mw.ustring.gmatch(content,"(%S+)") do
              table.insert(color,x)
          end
       end
   end
   if #color<1 then color={"#000055","#000099","#0000CC","#0000FF","#550055","#550099","#5500CC","#5500FF","#990055","#990099","#9900CC","#9900FF","#CC0055","#CC0099","#CC00CC","#CC00FF","#FF0000","#FF0055","#FF0099","#FF00CC","#FF00FF","#005555","#005599","#0055CC","#0055FF","#55555","#555599","#5555CC","#5555FF","#995555","#995599","#9955CC","#9955FF","#CC5555","#CC5599","#CC55CC","#CC55FF","#FF5500","#FF5555","#FF5599","#FF55CC","#FF55FF"} end
   local claim={};local nextcolor=1 -- keeps track of the colors assigned to specific nkeys throughout the loop
       ---- Begin the output and graphics files
   local output
   local tlegend="" -- legend for top row entries only, shown above table
   if tableoutput=="collapsed" then output=[[{| class="wikitable collapsible collapsed" style="width:]].. width .. [[px;"]] .. "\n" .. [[!colspan=4|List of protein features]] .. "\n" .. [[|-]] else if tableoutput=="collapsible" then output=[[{| class="wikitable collapsible" style="width:]].. width .. [[px;"]] .. "\n" .. [[!colspan=4|List of protein features]] .. "\n" .. [[|-]] else output=[[{| class="wikitable"]] end end
   local graphics=[[<div style="position:relative;background-color:]].. background .. [[;width:]] .. width .. [[px;height:]] .. height .. [[px;">]]
    ---- MAIN LOOP ----
    ---- this goes through features one by one and marks them down in "output" (legend/table) and "graphics" (protein box and vertical annotation)
    ---- Note that this does NOT create an array of features to sort, but is purely once through.
    ---- This means, for example, that there is no easy way to reposition adjacent motifs left and right to fit automatically.
    ---- You could, of course, array the output and retroactively process it ... maybe even turn this into a sortable array of function tail calls???  (My head hurts)
   for feature, range in mw.ustring.gmatch(file,"#|(.-)|#") do
       local t=mw.ustring.match(feature,"^(%a)") -- S or R placed in previous find/replace
       local s=mw.ustring.match(feature,"(%d+)") -- first number is the beginning of site or region
       local e=mw.ustring.match(feature,"^.%s-%d+%.%.(%d+)") or s -- second number in xx..yy range ; this needs updating!
 
       if s then
           ---- decide on the name to be used for the motif and annotation
          local n,c
          if t=="R" then n=mw.ustring.match(feature,[[/region_name=%"(.-)%"]]) end
          if t=="S" then n=mw.ustring.match(feature,[[/site_type=%"(.-)%"]]) end
          n=tostring(n)
          if tusenotes[n] then n=mw.ustring.match(feature,[[/note=%"(.-)%"]]) or n end
          n=tostring(n)
          n=mw.ustring.match(n,"^%s+(.+)%s+$") or n -- kill white space
          n=mw.ustring.gsub(n,"\n"," ") or n -- remove line feeds
          n=tostring(n) -- am I paranoid?
          if tsubstitute[n] then n=tostring(tsubstitute[n]) end
          n = mw.ustring.match(n,"(.+)%.") or n -- Don't need the ubiquitous final periods
           ---- from the name (n) pull out an nkey that excludes parenthesized stuff
           ---- each unique nkey can claim its own color to use from here on out
          local nkey=mw.ustring.match(n,"(.+)[%.;,%(%[]") or n
          local newcolor=false; -- is this a new color (if so, then if it is toprow, then add to legend for those)
          if claim[nkey] then c=claim[nkey] else c=color[nextcolor];claim[nkey]=c;nextcolor=nextcolor+1;newcolor=true end
          local cstyle=[[style="color:]] .. c .. [[;"|]]
           ---- decide whether to show the motif, and crop it to the CDS
          local showthismotif=true
          s=tonumber(s);e=tonumber(e)
          if s<cdsstart then s=cdsstart end
          if e>cdsend then e=cdsend end
          if s==cdsstart and e==cdsend then showthismotif=nil end
          if include then if not (tinclude[n]) then showthismotif=nil end end -- if include is set, and n isn't in it, don't add to table or graphic
          if exclude then if texclude[n] then showthismotif=nil end end -- if exclude is set and n is in it don't add
          if showthismotif then
              ---- update the table output for the legend
             if tostring(t)=="R" then output = output .. "\n|" .. cstyle .. "region\n|" else output = output .. "\n|" .. cstyle .. "site\n|" end
             output = output .. cstyle .. tostring(s) .. "\n|" .. cstyle .. tostring(e) .. "\n|" .. cstyle .. n .. "\n|-"
              ---- update the graphic display: first determine if the block is large to be displayed full height and annotated inside itself
             nkey=mw.ustring.sub(nkey,1,vtext) -- for graphics purposes, truncate the string (default 25 characters)
             local large
             local boxleft=math.floor(width*tonumber(s)/cdswidth)
             local boxwidth=math.floor(width*tonumber(e)/cdswidth)-boxleft
             if boxwidth>8*tonumber(mw.ustring.len(nkey)) then large=true else large=nil end
              ---- then work out the horizontal or vertical display
             local vertical -- height substring of the drawn block
             local annot="" -- text contents of a large block
             if ttoprow[n] then
                vertical=tostring(toprowheight)
                if newcolor then tlegend=tlegend..[[<span style="background-color:]] .. c .. [[;">&nbsp;&nbsp;</span> ]] .. nkey .. "\n" end
                nkey=""
             else
                if large then
                    vertical=tostring(height-toprowheight)
                    if toprow then vertical=vertical .. "px;top:" .. tostring(toprowheight) end
                    annot="'''" .. nkey .."'''"
                    nkey="" -- no vertical text display
                else vertical=tostring(math.floor(height) - toprowheight - largeonlyregion) .. "px;top:" .. tostring(toprowheight + largeonlyregion)
                    nkey=mw.ustring.gsub(nkey,"(.)","%1<br />") -- verticalize the text 
                end
             end
             local z=10000-1*boxwidth --- smaller elements in front of larger ones
             if not(large) then z=z+10000 end --- large elements reliably to the back
               -- draw graphics within the protein rectangle
             graphics = graphics .. [[<div style="position:absolute;overflow:hidden;z-index:]] .. z .. [[;left:]] .. boxleft .. [[px;border-top:0px;border-bottom:0px;border-left:1px;border-right:1px;border-style:solid;border-color:]].. c .. [[;background-color:]].. c .. [[;width:]] .. boxwidth .. [[px;height:]] .. vertical .. [[px;text-align:center;">]] .. annot .. [[</div>]]
               -- draw annotations vertically below it
               -- don't do at all if no text (nkey=="", such as on the top row)
             if not (nkey=="") then
                  -- first decide if in a replaceregion - if so, don't draw
                local toreplace;local ri=1
                while treplaceregion.s[ri] do
                   local rs=treplaceregion.s[ri]
                   local re=treplaceregion.e[ri]
                   if s>=rs and e<=re then toreplace=true;break end
                   ri=ri+1
                end          
                if not toreplace and not large then 
                    --- center vt in the feature; then claim pixels one by one around it.
                    --- Don't draw in a claimed pixel, but file a protest at bottom.
                   local vt=math.floor(boxleft+boxwidth/2 - 2) -- vertical text's horizontal position
                   if not vclaim[vt] then
                      for i = vt-vwidth,vt+vwidth,1 do
                          vclaim[i]=true
                      end
                      graphics = graphics .. [[<span style="position:absolute;text-align:center;line-height:90%;font-size:85%;overflow:visible;z-index:100;left:]] .. vt .. [[px;top:]] .. math.floor(height+5) .. [[px;">]] .. nkey .. [[</span>]]
                      else vprotest=vprotest .. s .. "-" .. e .. " "
                   end -- (if not ttoprow[n])
                end -- (if not vclaim)
             end -- (if not toreplace)
          end -- (if showthismotif)
       end -- (if s)
   end -- for feature, range
    --- we're out of the loop - now draw annotations for the chosen replace regions based on user text
   local ri=1
      while treplaceregion.s[ri] do
         local rs=treplaceregion.s[ri]
         local re=treplaceregion.e[ri]
         local rt=mw.ustring.gsub(mw.ustring.sub(treplaceregion.t[ri],1,vtext),"(.)","%1<br />") -- verticalize the text
         local boxleft=math.floor(width*tonumber(rs)/cdswidth)
         local boxwidth=math.floor(width*tonumber(re)/cdswidth)-boxleft
         local vt = math.floor(boxleft+boxwidth/2 -2) -- this formula should be synchronized with above, but defining constants seems silly.
          -- this ignores vclaim - it's a user input, therefore repositionable field
         graphics = graphics .. [[<span style="position:absolute;text-align:center;line-height:90%;font-size:85%;overflow:visible;z-index:100;left:]] .. vt .. [[px;top:]] .. math.floor(height+5) .. [[px;">]] .. rt .. [[</span>]]
         ri=ri+1
      end
   if not(tableoutput) then output = "" end
   if tlegend == "" then else tlegend = [[<span style="width:]]..width..[[;">]] .. [[''Top row:'' ]] .. tlegend .. [[</span>]] end
   if vprotest == "" then else vprotest = "''Overlapping vertical annotations not shown above: " .. vprotest .. "''" end
   if debuglog == "|}|}" then debuglog="" else debuglog = debuglog .. "\n" end
   if vtext>2 then vtext=vtext-2 end -- make up for extra return required to start a table at the end there.
   local output = [=[{| style="width:]=]..width..[[px;"]] .. "\n|".. graphics .. [[</div><span style="line-height:90%;font-size:85%;">]] .. mw.ustring.rep("\n",vtext) .. "</span>" .. tlegend .. vprotest .. "\n" .. output .. "\n|}\n" .. debuglog .. "|}\n"
   return output

end

return p