User:Tsirel/Bot code
Appearance
parse.py
###################################################################### # # # parse.py # # # # Main infile "all.in"; its lines: "[[article]] tags". # # Tags are space-separated, # # each either "(rvnumber:rvtype)" # # or a name (corename or aroundname) and maybe suffix "1" or "2". # # Parameters from four infiles. # # Outfile: all.dat (for format.py). # # # ###################################################################### # read parameters corenames = eval(open("corenames.dat").read()) aroundnames = eval(open("aroundnames.dat").read()) rvnumber = eval(open("rvnumber.dat").read()) rvtype = eval(open("rvtype.dat").read()) # only abbreviations will be used; full texts are for "format.py" corenames = map ( (lambda(x,y):x), corenames ) aroundnames = map ( (lambda(x,y):x), aroundnames ) rvnumber = map ( (lambda(x,y):x), rvnumber ) rvtype = map ( (lambda(x,y):x), rvtype ) # distinction of "core" and "around" is for "format.py" names = corenames + aroundnames # parameters are ready; the main loop follows biglist = [] # for parsed lines notags = [] # for lines without tags (if any) everything = {} # for articles (duplication check) for line in open("all.in"): # main loop: lines of the main input file line=line.strip() if line=="": continue # ignore empty lines (if any) if not line.startswith("[["): print "BAD LINE: "+line # syntax error in the main input file continue article=line[2:line.index("]]")] # extract article from "[[article]] tags" article=article[0].upper()+article[1:] # article -> Article if article in everything: # duplication? print "AGAIN "+article everything[article]=0 # the 0 is of no use rest=line[line.index("]]")+2:].split() # list of tags if rest==[]: notags.append(article); continue # no tags: report, do not process tags0=[] # for "(A:B)" tags converted to (A,B) tags1=[] # for other tags for tag in rest: # loop over all tags of the given line if tag.startswith("("): # "(A:B)" tag? maintag=tag[1:] if not maintag.endswith(")"): print "!!! "+maintag; break # syntax error maintag=maintag[:-1] maintag1=maintag[:maintag.index(":")] # A from "(A:B)" maintag2=maintag[maintag.index(":")+1:] # B from "(A:B)" for x in maintag1: # syntax check if x not in rvnumber: print "?? "+x+" "+whole_line raise "oops-1" # syntax error for x in maintag2: # syntax check if x not in rvtype: print "?? "+x+" "+whole_line raise "oops-2" # syntax error tags0.append((maintag1,maintag2)) # store (A,B) else: # other tag, not "A:B)" assert len(tag)==3 or len(tag)==4 # 3 chars, and maybe 1 char suffix if tag[0:3] not in names: print "BAD TAG: "+tag continue # syntax error if len(tag)==4: assert tag[3] in ["1","2"] # suffix syntax check tags1.append(tag) # store the other tag biglist.append( (tags0,tags1,article) ) # store the parsed line # the main loop is finished print >>open("all.dat","w"), biglist # write main out file if notags != []: # report lines with no tags (if any) print "*** notags ***" print notags ######################## the end ###################################
format.py
###################################################################### # # # format.py # # # # Main infile "all.dat" (produced by parse.py). # # A tag not of the form "(A:B)" directs a line to the first # # (core:selected) or the third (around the core) section, # # according to corenames and aroundnames. # # A tag of the form "(A:B)" directs a line to the the second section # # (core:other) unless it is directed to the first section. # # In the first section a line is special if the tag has a suffix # # "1" or "2". # # Parameters from five infiles. # # Outfile: all.out (to be uploaded). # # # ###################################################################### import sys import copy # read parameters corenames = eval(open("corenames.dat").read()) aroundnames = eval(open("aroundnames.dat").read()) rvnumber = eval(open("rvnumber.dat").read()) rvtype = eval(open("rvtype.dat").read()) headings = eval(open("headings.dat").read()) # read the main infile biglist = eval(open("all.dat").read()) # F U N C T I O N S # # frmt: format a line # # whole is (tags0,tags1,article), special is True or False # tags0: list of (A,B) pairs corresponding to "(A:B)" tags # tags1: list of other tags # article of the form "X(Y)" turns into "X(Y)|X" # but article of the form "X!(Y)" turns into "X(Y)" # returns "[[article]] / tags<br>" if special is False, # or "'''[[article]]''' / tags<br>" if special is True. def frmt (whole,special): (tags0,tags1,article) = whole k=article.find("(",5) if k>0: # article of the form "X(Y)" or "X!(Y)" ? if article[k-1] != "!": article += "|"+article[0:k].strip() # "X(Y)|X" else: article = article[:k-1]+article[k:] # "X(Y)" if special: res="'''[["+article+"]]''' / " else: res="[["+article+"]] / " for z in tags1: # first, other tags, not "(A:B)" res += z[0:3]+" " # remove suffix (if any) for tag0 in tags0: # last, "(A:B)" tags (first,second)=tag0 res += "("+first+":"+second+") " # convert (A,B) back to "(A:B)" res = res[:-1]+"<br>" # remove the last space return res # flst: format and print list of lines by frmt # lst: list of usual lines # lst1, lst2: lists of special lines # (len(lst1),len(lst2)) must be (0,0), (1,0) or (1,1) # two columns are produced unless the list is short def flst (lst,lst1,lst2): assert len(lst2) <= len(lst1) assert len(lst1) <= 1 l = len(lst) if l<4 and lst2==[]: # one column? if lst1!=[]: # special line? print frmt(lst1[0],True) # format special line for whole in lst: print frmt(whole,False) # format usual line else: # two columns k=(l+1-len(lst1)+len(lst2))//2 # a half of lines - to the left column print "{{Top}}" # start the left column if lst1!=[]: # special line? print frmt(lst1[0],True) # format special line for whole in lst[:k]: print frmt(whole,False) # format usual line print "{{Mid}}" # start the right column if lst2!=[]: # special line? print frmt(lst2[0],True) # format special line for whole in lst[k:]: print frmt(whole,False) # format usual line print "{{Bottom}}" # finish the right column return # e n d o f f u n c t i o n s # initialize lists for "(A:B)" tags # A[x+y] will collect all lines containing "(x:y)" A = {} for (x,xx) in rvnumber: for (y,yy) in rvtype: A[x+y]=[] # initialize lists for other core tags # B[z] will collect all usual lines containing z # B1[z] will collect all special lines containing tag z with suffix 1 # B2[z] will collect all special lines containing tag z with suffix 2 B = dict ( map ( (lambda(x,y):(x,[])), corenames ) ) B1 = copy.deepcopy(B) B2 = copy.deepcopy(B) # initialize lists for other non-core tags # C[z] will collect all lines containing z C = dict ( map ( (lambda(x,y):(x,[])), aroundnames ) ) # initialize counters count_sel_links=0 count_sel_articles=0 count_other_links=0 count_other_articles=0 count_around_links = 0 count_around_articles = 0 # distribute the lines (according to tags) to the lists for whole in biglist: (tags0,tags1,article) = whole tags11=[] # core-selected tags to be collected here tags12=[] # non-core tags to be collected here for z in tags1: if z[0:3] in B: # tag (without suffix) belongs to core? tags11.append(z) # store in tags11 else: tags12.append(z) # store in tags12 if tags11!=[]: # some core-selected tags? count_sel_articles += 1 count_sel_links += len(tags11) elif tags0!=[]: # some core-other (but no core-selected) tags? count_other_articles += 1 if tags12!=[]: # some non-core tags? count_around_articles += 1 count_around_links += len(tags12) for z in tags11: # process core-selected tags if len(z)==3: # tag with no suffix? B[z].append(whole) # store usual line elif z[3]=="1": # tag with suffix "1"? B1[z[0:3]].append(whole) # store special line else: # tag with suffix "2" B2[z[0:3]].append(whole) # store special line if tags11==[]: # no core-selected tags? then process "(A:B)" tags for tag0 in tags0: # process "(A:B)" tags (first,second)=tag0 for x in first: for y in second: A[x+y].append(whole) count_other_links += 1 for z in tags12: # process non-core tags C[z].append(whole) # store line # sort each list alphabetically; sort key is article (not tag) for z in A: A[z].sort(key=(lambda wh: wh[2])) for z in B: B[z].sort(key=(lambda wh: wh[2])) for z in C: C[z].sort(key=(lambda wh: wh[2])) # lists are ready; now start generating the output sys.stdout = open("all.out","w") # redirect "print" to the outfile print headings[0] print print headings[1] print # format and print selected core topics for (z,text) in corenames: print "==="+text+" ("+z+")===" print flst(B[z],B1[z],B2[z]) print # format and print other core topics print headings[2] print for (x,text1) in rvnumber: print "==="+text1+" ("+x+":)===" print for (y,text2) in rvtype: if A[x+y] != []: print "===="+text2+" ("+x+":"+y+")====" print flst(A[x+y],[],[]) print # format and print non-core topics print headings[3] print for (z,text) in aroundnames: print "==="+text+" ("+z+")===" print flst(C[z],[],[]) print print headings[4] print "{{Top}}" print '"Core": %i (%i)<br>' % ( count_sel_articles + count_other_articles, count_sel_links + count_other_links ) print '"Around": %i (%i)<br>' % ( count_around_articles, count_around_links) print "{{Mid}}" print '"Core selected": %i (%i)<br>' % (count_sel_articles,count_sel_links) print '"Core others": %i (%i)<br>' % ( count_other_articles, count_other_links ) print "{{Bottom}}" print print "Here ''k''(''n'') means: ''n'' links to ''k'' articles. (Some articles are linked more than once.)" ######################## the end ####################################