User:Tsirel/Bot code
Appearance
parse.py
###################################################################### # # # parse.py # # # # Main infile "all.in"; its lines: "[[article]] tags". # # Tags are space-separated, # # each either "(rvnumber:rvtype)" # # or a name (corename or aroundname) and maybe suffix "1" or "2". # # Parameters from four infiles. # # Outfile: all.dat (for format.py). # # # ###################################################################### # read parameters corenames = eval(open("corenames.dat").read()) aroundnames = eval(open("aroundnames.dat").read()) rvnumber = eval(open("rvnumber.dat").read()) rvtype = eval(open("rvtype.dat").read()) # only abbreviations will be used; full texts are for "format.py" corenames = map ( (lambda(x,y):x), corenames ) aroundnames = map ( (lambda(x,y):x), aroundnames ) rvnumber = map ( (lambda(x,y):x), rvnumber ) rvtype = map ( (lambda(x,y):x), rvtype ) # distinction of "core" and "around" is for "format.py" names = corenames + aroundnames # parameters are ready; the main loop follows biglist = [] # for parsed lines notags = [] # for lines without tags (if any) everything = {} # for articles (duplication check) for line in open("all.in"): # main loop: lines of the main input file line=line.strip() if line=="": continue # ignore empty lines (if any) if not line.startswith("[["): print "BAD LINE: "+line # syntax error in the main input file continue article=line[2:line.index("]]")] # extract article from "[[article]] tags" article=article[0].upper()+article[1:] # article -> Article if article in everything: # duplication? print "AGAIN "+article everything[article]=0 # the 0 is of no use rest=line[line.index("]]")+2:].split() # list of tags if rest==[]: notags.append(article); continue # no tags: report, do not process tags0=[] # for "(A:B)" tags converted to (A,B) tags1=[] # for other tags for tag in rest: # loop over all tags of the given line if tag.startswith("("): # "(A:B)" tag? maintag=tag[1:] if not maintag.endswith(")"): print "!!! "+maintag; break # syntax error maintag=maintag[:-1] maintag1=maintag[:maintag.index(":")] # A from "(A:B)" maintag2=maintag[maintag.index(":")+1:] # B from "(A:B)" for x in maintag1: # syntax check if x not in rvnumber: print "?? "+x+" "+whole_line raise "oops-1" # syntax error for x in maintag2: # syntax check if x not in rvtype: print "?? "+x+" "+whole_line raise "oops-2" # syntax error tags0.append((maintag1,maintag2)) # store (A,B) else: # other tag, not "A:B)" assert len(tag)==3 or len(tag)==4 # 3 chars, and maybe 1 char suffix if tag[0:3] not in names: print "BAD TAG: "+tag continue # syntax error if len(tag)==4: assert tag[3] in ["1","2"] # suffix syntax check tags1.append(tag) # store the other tag biglist.append( (tags0,tags1,article) ) # store the parsed line # the main loop is finished print >>open("all.dat","w"), biglist # write main out file if notags != []: # report lines with no tags (if any) print "*** notags ***" print notags ######################## the end ###################################
format.py
###################################################################### # # # format.py # # # # Main infile "all.dat" (produced by parse.py). # # A tag not of the form "(A:B)" directs a line to the first # # (core:selected) or the third (around the core) section, # # according to corenames and aroundnames. # # A tag of the form "(A:B)" directs a line to the the second section # # (core:other) unless it is directed to the first section. # # In the first section a line is special if the tag has a suffix # # "1" or "2". # # Parameters from five infiles. # # Outfile: all.out (to be uploaded). # # # ###################################################################### import sys import copy # read parameters corenames = eval(open("corenames.dat").read()) aroundnames = eval(open("aroundnames.dat").read()) rvnumber = eval(open("rvnumber.dat").read()) rvtype = eval(open("rvtype.dat").read()) headings = eval(open("headings.dat").read()) # read the main infile biglist = eval(open("all.dat").read()) # F U N C T I O N S # # frmt: format a line # # whole is (tags0,tags1,article), special is True or False # tags0: list of (A,B) pairs corresponding to "(A:B)" tags # tags1: list of other tags # article of the form "X(Y)" turns into "X(Y)|X" # but article of the form "X!(Y)" turns into "X(Y)" # returns "[[article]] / tags<br>" if special is False, # or "'''[[article]]''' / tags<br>" if special is True. def frmt (whole,special): (tags0,tags1,article) = whole k=article.find("(",5) if k>0: # article of the form "X(Y)" or "X!(Y)" ? if article[k-1] != "!": article += "|"+article[0:k].strip() # "X(Y)|X" else: article = article[:k-1]+article[k:] # "X(Y)" if special: res="'''[["+article+"]]''' / " else: res="[["+article+"]] / " for z in tags1: # first, other tags, not "(A:B)" res += z[0:3]+" " # remove suffix (if any) for tag0 in tags0: # last, "(A:B)" tags (first,second)=tag0 res += "("+first+":"+second+") " # convert (A,B) back to "(A:B)" res = res[:-1]+"<br>" # remove the last space return res # flst: format and print list of lines by frmt # lst: list of usual lines # lst1, lst2: lists of special lines # (len(lst1),len(lst2)) must be (0,0), (1,0) or (1,1) # two columns are produced unless the list is short def flst (lst,lst1,lst2): assert len(lst2) <= len(lst1) assert len(lst1) <= 1 l = len(lst) if l<4 and lst2==[]: # one column? if lst1!=[]: # special line? print frmt(lst1[0],True) # format special line for whole in lst: print frmt(whole,False) # format usual line else: # two columns k=(l+1-len(lst1)+len(lst2))//2 # a half of lines - to the left column print "{{Top}}" # start the left column if lst1!=[]: # special line? print frmt(lst1[0],True) # format special line for whole in lst[:k]: print frmt(whole,False) # format usual line print "{{Mid}}" # start the right column if lst2!=[]: # special line? print frmt(lst2[0],True) # format special line for whole in lst[k:]: print frmt(whole,False) # format usual line print "{{Bottom}}" # finish the right column return # e n d o f f u n c t i o n s # initialize lists for "(A:B)" tags # A[x+y] will collect all lines containing "(x:y)" A = {} for (x,xx) in rvnumber: for (y,yy) in rvtype: A[x+y]=[] # initialize lists for other core tags # B[z] will collect all usual lines containing z # B1[z] will collect all special lines containing tag z with suffix 1 # B2[z] will collect all special lines containing tag z with suffix 2 B = dict ( map ( (lambda(x,y):(x,[])), corenames ) ) B1 = copy.deepcopy(B) B2 = copy.deepcopy(B) # initialize lists for other non-core tags # C[z] will collect all lines containing z C = dict ( map ( (lambda(x,y):(x,[])), aroundnames ) ) # initialize counters count_sel_links=0 count_sel_articles=0 count_other_links=0 count_other_articles=0 count_around_links = 0 count_around_articles = 0 # distribute the lines (according to tags) to the lists for whole in biglist: (tags0,tags1,article) = whole tags11=[] # core-selected tags to be collected here tags12=[] # non-core tags to be collected here for z in tags1: if z[0:3] in B: # tag (without suffix) belongs to core? tags11.append(z) # store in tags11 else: tags12.append(z) # store in tags12 if tags11!=[]: # some core-selected tags? count_sel_articles += 1 count_sel_links += len(tags11) elif tags0!=[]: # some core-other (but no core-selected) tags? count_other_articles += 1 if tags12!=[]: # some non-core tags? count_around_articles += 1 count_around_links += len(tags12) for z in tags11: # process core-selected tags if len(z)==3: # tag with no suffix? B[z].append(whole) # store usual line elif z[3]=="1": # tag with suffix "1"? B1[z[0:3]].append(whole) # store special line else: # tag with suffix "2" B2[z[0:3]].append(whole) # store special line if tags11==[]: # no core-selected tags? then process "(A:B)" tags for tag0 in tags0: # process "(A:B)" tags (first,second)=tag0 for x in first: for y in second: A[x+y].append(whole) count_other_links += 1 for z in tags12: # process non-core tags C[z].append(whole) # store line # sort each list alphabetically; sort key is article (not tag) for z in A: A[z].sort(key=(lambda wh: wh[2])) for z in B: B[z].sort(key=(lambda wh: wh[2])) for z in C: C[z].sort(key=(lambda wh: wh[2])) # lists are ready; now start generating the output sys.stdout = open("all.out","w") # redirect "print" to the outfile print headings[0] print print headings[1] print # format and print selected core topics for (z,text) in corenames: print "==="+text+" ("+z+")===" print flst(B[z],B1[z],B2[z]) print # format and print other core topics print headings[2] print for (x,text1) in rvnumber: print "==="+text1+" ("+x+":)===" print for (y,text2) in rvtype: if A[x+y] != []: print "===="+text2+" ("+x+":"+y+")====" print flst(A[x+y],[],[]) print # format and print non-core topics print headings[3] print for (z,text) in aroundnames: print "==="+text+" ("+z+")===" print flst(C[z],[],[]) print print headings[4] print "{{Top}}" print '"Core": %i (%i)<br>' % ( count_sel_articles + count_other_articles, count_sel_links + count_other_links ) print '"Around": %i (%i)<br>' % ( count_around_articles, count_around_links) print "{{Mid}}" print '"Core selected": %i (%i)<br>' % (count_sel_articles,count_sel_links) print '"Core others": %i (%i)<br>' % ( count_other_articles, count_other_links ) print "{{Bottom}}" print print "Here ''k''(''n'') means: ''n'' links to ''k'' articles. (Some articles are linked more than once.)" ######################## the end ####################################
files of parameters
corenames.dat
[('bsc', 'Basic notions'), ('mnt', 'Moments'), ('inq', 'Inequalities'), ('Mar', 'Markov chains, processes, fields, networks'), ('Gau', 'Gaussian random variables, vectors, functions'), ('cnd', 'Conditioning'), ('spd', 'Specific distributions'), ('emm', 'Empirical measure'), ('lmt', 'Limit theorems'), ('lrd', 'Large deviations'), ('scl', 'Stochastic calculus'), ('Mal', 'Malliavin calculus'), ('anl', 'Analytic aspects (including measure theoretic)')]
aroundnames.dat
[('grl', 'General aspects'), ('fnd', 'Foundations of probability theory'), ('gmb', 'Gambling'), ('cnc', 'Coincidence'), ('alg', 'Algorithmics'), ('Bay', 'Bayesian approach'), ('fnc', 'Financial mathematics'), ('phs', 'Physics'), ('gnt', 'Genetics'), ('spr', 'Stochastic process'), ('geo', 'Geometric probability'), ('emp', 'Empirical findings'), ('hst', 'Historical'), ('msc', 'Miscellany')]
rvnumber.dat
[("1","A single random variable"), ("2","Two random variables"), ("3","Three random variables"), ("F","Finitely many random variables"), ("L","A large number of random variables (finite but tending to infinity)"), ("S","An infinite sequence of random variables"), ("U","Uncountably many random variables (continuous-time processes etc)")]