User:Tsirel/Bot code
Appearance
program
<code> import sys import copy sys.path.append("/home/boris/Wiki/Bot/pywikipedia") import wikipedia mysite=wikipedia.getSite() params_page=wikipedia.Page(mysite,'User:Tsirel/Bot parameters') params_txt=params_page.get() params_txt=params_txt.splitlines() assert params_txt.pop(0)=="== files of parameters ==","bad parameters" def parse_param (name): assert params_txt.pop(0)=="=== "+name+".dat ===","bad parameter "+name assert params_txt.pop(0)=="","bad parameter "+name assert params_txt.pop(0)=="<pre>","bad parameter "+name param="" while params_txt[0]!="</pre>": param += params_txt.pop(0) params_txt.pop(0) if params_txt: assert params_txt.pop(0)=="","bad parameter "+name return eval(param) corenames = parse_param ("corenames") aroundnames = parse_param ("aroundnames") rvnumber = parse_param ("rvnumber") rvtype = parse_param ("rvtype") headings = parse_param ("headings") source_page=wikipedia.Page(mysite,'User:Tsirel/Catalog source') source_txt=source_page.get() source_txt=source_txt.splitlines() assert source_txt.pop(0)=="<pre>","bad source" assert source_txt.pop()=="</pre>","bad source" ###################################################################### # # # parse # # # # Main infile "all.in"; its lines: "[[article]] tags". # # Tags are space-separated, # # each either "(rvnumber:rvtype)" # # or a name (corename or aroundname) and maybe suffix "1" or "2". # # Parameters from four infiles. # # Outfile: all.dat (for format.py). # # # ###################################################################### def parse (source_txt, corenames, aroundnames, rvnumber, rvtype): # only abbreviations will be used; full texts are for "format.py" corenames = map ( (lambda(x,y):x), corenames ) aroundnames = map ( (lambda(x,y):x), aroundnames ) rvnumber = map ( (lambda(x,y):x), rvnumber ) rvtype = map ( (lambda(x,y):x), rvtype ) # distinction of "core" and "around" is for "format.py" names = corenames + aroundnames # parameters are ready; the main loop follows biglist = [] # for parsed lines notags = [] # for lines without tags (if any) everything = {} # for articles (duplication check) for line in source_txt: # main loop: lines of the main input file line=line.strip() if line=="": continue # ignore empty lines (if any) assert line.startswith("[["), "BAD LINE: "+line # syntax error in the main input file article=line[2:line.index("]]")] # extract article from "[[article]] tags" article=article[0].upper()+article[1:] # article -> Article # if article in everything: # duplication? # print "AGAIN "+article everything[article]=0 # the 0 is of no use rest=line[line.index("]]")+2:].split() # list of tags if rest==[]: notags.append(article); continue # no tags: report, do not process tags0=[] # for "(A:B)" tags converted to (A,B) tags1=[] # for other tags for tag in rest: # loop over all tags of the given line if tag.startswith("("): # "(A:B)" tag? maintag=tag[1:] assert maintag.endswith(")"), "!!! "+maintag # syntax error maintag=maintag[:-1] maintag1=maintag[:maintag.index(":")] # A from "(A:B)" maintag2=maintag[maintag.index(":")+1:] # B from "(A:B)" for x in maintag1: # syntax check assert x in rvnumber, "?? "+x+" "+whole_line for x in maintag2: # syntax check assert x in rvtype, "?? "+x+" "+whole_line tags0.append((maintag1,maintag2)) # store (A,B) else: # other tag, not "A:B)" assert len(tag)==3 or len(tag)==4, "bad tag" # 3 chars, and maybe 1 char suffix assert tag[0:3] in names, "BAD TAG: "+tag if len(tag)==4: assert tag[3] in ["1","2"], "bad tag" # suffix syntax check tags1.append(tag) # store the other tag biglist.append( (tags0,tags1,article) ) # store the parsed line return(biglist) ######################## end of parse ################################### ###################################################################### # # # format.py # # # # Main infile "all.dat" (produced by parse.py). # # A tag not of the form "(A:B)" directs a line to the first # # (core:selected) or the third (around the core) section, # # according to corenames and aroundnames. # # A tag of the form "(A:B)" directs a line to the the second section # # (core:other) unless it is directed to the first section. # # In the first section a line is special if the tag has a suffix # # "1" or "2". # # Parameters from five infiles. # # Outfile: all.out (to be uploaded). # # # ###################################################################### def format (biglist): # Internal F U N C T I O N S # # frmt: format a line # # whole is (tags0,tags1,article), special is True or False # tags0: list of (A,B) pairs corresponding to "(A:B)" tags # tags1: list of other tags # article of the form "X(Y)" turns into "X(Y)|X" # but article of the form "X!(Y)" turns into "X(Y)" # returns "[[article]] / tags<br>" if special is False, # or "'''[[article]]''' / tags<br>" if special is True. def frmt (whole,special): (tags0,tags1,article) = whole k=article.find("(",5) if k>0: # article of the form "X(Y)" or "X!(Y)" ? if article[k-1] != "!": article += "|"+article[0:k].strip() # "X(Y)|X" else: article = article[:k-1]+article[k:] # "X(Y)" if special: res="'''[["+article+"]]''' / " else: res="[["+article+"]] / " for z in tags1: # first, other tags, not "(A:B)" res += z[0:3]+" " # remove suffix (if any) for tag0 in tags0: # last, "(A:B)" tags (first,second)=tag0 res += "("+first+":"+second+") " # convert (A,B) back to "(A:B)" res = res[:-1]+"<br>" # remove the last space return res # pr: prints text to the string "formatted" def pr(text): global formatted formatted += text+"\n" # flst: format and print list of lines by frmt # lst: list of usual lines # lst1, lst2: lists of special lines # (len(lst1),len(lst2)) must be (0,0), (1,0) or (1,1) # two columns are produced unless the list is short def flst (lst,lst1,lst2): assert len(lst2) <= len(lst1), "bad special lines" assert len(lst1) <= 1, "bad special lines" l = len(lst) if l<4 and lst2==[]: # one column? if lst1!=[]: # special line? pr(frmt(lst1[0],True)) # format special line for whole in lst: pr(frmt(whole,False)) # format usual line else: # two columns k=(l+1-len(lst1)+len(lst2))//2 # a half of lines - to the left column pr("{{Top}}") # start the left column if lst1!=[]: # special line? pr(frmt(lst1[0],True)) # format special line for whole in lst[:k]: pr(frmt(whole,False)) # format usual line pr("{{Mid}}") # start the right column if lst2!=[]: # special line? pr(frmt(lst2[0],True)) # format special line for whole in lst[k:]: pr(frmt(whole,False)) # format usual line pr("{{Bottom}}") # finish the right column return # e n d o f internal f u n c t i o n s # initialize lists for "(A:B)" tags # A[x+y] will collect all lines containing "(x:y)" A = {} for (x,xx) in rvnumber: for (y,yy) in rvtype: A[x+y]=[] # initialize lists for other core tags # B[z] will collect all usual lines containing z # B1[z] will collect all special lines containing tag z with suffix 1 # B2[z] will collect all special lines containing tag z with suffix 2 B = dict ( map ( (lambda(x,y):(x,[])), corenames ) ) B1 = copy.deepcopy(B) B2 = copy.deepcopy(B) # initialize lists for other non-core tags # C[z] will collect all lines containing z C = dict ( map ( (lambda(x,y):(x,[])), aroundnames ) ) # initialize counters count_sel_links=0 count_sel_articles=0 count_other_links=0 count_other_articles=0 count_around_links = 0 count_around_articles = 0 # distribute the lines (according to tags) to the lists for whole in biglist: (tags0,tags1,article) = whole tags11=[] # core-selected tags to be collected here tags12=[] # non-core tags to be collected here for z in tags1: if z[0:3] in B: # tag (without suffix) belongs to core? tags11.append(z) # store in tags11 else: tags12.append(z) # store in tags12 if tags11!=[]: # some core-selected tags? count_sel_articles += 1 count_sel_links += len(tags11) elif tags0!=[]: # some core-other (but no core-selected) tags? count_other_articles += 1 if tags12!=[]: # some non-core tags? count_around_articles += 1 count_around_links += len(tags12) for z in tags11: # process core-selected tags if len(z)==3: # tag with no suffix? B[z].append(whole) # store usual line elif z[3]=="1": # tag with suffix "1"? B1[z[0:3]].append(whole) # store special line else: # tag with suffix "2" B2[z[0:3]].append(whole) # store special line if tags11==[]: # no core-selected tags? then process "(A:B)" tags for tag0 in tags0: # process "(A:B)" tags (first,second)=tag0 for x in first: for y in second: A[x+y].append(whole) count_other_links += 1 for z in tags12: # process non-core tags C[z].append(whole) # store line # sort each list alphabetically; sort key is article (not tag) for z in A: A[z].sort(key=(lambda wh: wh[2])) for z in B: B[z].sort(key=(lambda wh: wh[2])) for z in C: C[z].sort(key=(lambda wh: wh[2])) # lists are ready; now start generating the output pr(headings[0]) pr("") pr(headings[1]) pr("") # format and print selected core topics for (z,text) in corenames: pr("==="+text+" ("+z+")===") pr("") flst(B[z],B1[z],B2[z]) pr("") # format and print other core topics pr(headings[2]) pr("") for (x,text1) in rvnumber: pr("==="+text1+" ("+x+":)===") pr("") for (y,text2) in rvtype: if A[x+y] != []: pr("===="+text2+" ("+x+":"+y+")====") pr("") flst(A[x+y],[],[]) pr("") # format and print non-core topics pr(headings[3]) pr("") for (z,text) in aroundnames: pr("==="+text+" ("+z+")===") pr("") flst(C[z],[],[]) pr("") pr(headings[4]) pr("{{Top}}") pr( '"Core": %i (%i)<br>' % ( count_sel_articles + count_other_articles, count_sel_links + count_other_links ) ) pr( '"Around": %i (%i)<br>' % ( count_around_articles, count_around_links) ) pr("{{Mid}}") pr( '"Core selected": %i (%i)<br>' % (count_sel_articles,count_sel_links) ) pr( '"Core others": %i (%i)<br>' % ( count_other_articles, count_other_links ) ) pr("{{Bottom}}") pr("") pr( "Here ''k''(''n'') means: ''n'' links to ''k'' articles. (Some articles are linked more than once.)" ) return ######################## end of format #################################### ###################### now execute them ##################################### parsed = parse (source_txt, corenames, aroundnames, rvnumber, rvtype) formatted = "" # initialize the output string format (parsed) out_page=wikipedia.Page(mysite,'User:Tsirel/Catalog') out_page.put(formatted) ### write to Wikipedia ### ########################### the end ######################################### </code>
files of parameters
corenames.dat
[('bsc', 'Basic notions'), ('mnt', 'Moments'), ('inq', 'Inequalities'), ('Mar', 'Markov chains, processes, fields, networks'), ('Gau', 'Gaussian random variables, vectors, functions'), ('cnd', 'Conditioning'), ('spd', 'Specific distributions'), ('emm', 'Empirical measure'), ('lmt', 'Limit theorems'), ('lrd', 'Large deviations'), ('scl', 'Stochastic calculus'), ('Mal', 'Malliavin calculus'), ('anl', 'Analytic aspects (including measure theoretic)')]
aroundnames.dat
[('grl', 'General aspects'), ('fnd', 'Foundations of probability theory'), ('gmb', 'Gambling'), ('cnc', 'Coincidence'), ('alg', 'Algorithmics'), ('Bay', 'Bayesian approach'), ('fnc', 'Financial mathematics'), ('phs', 'Physics'), ('gnt', 'Genetics'), ('spr', 'Stochastic process'), ('geo', 'Geometric probability'), ('emp', 'Empirical findings'), ('hst', 'Historical'), ('msc', 'Miscellany')]
rvnumber.dat
[("1","A single random variable"), ("2","Two random variables"), ("3","Three random variables"), ("F","Finitely many random variables"), ("L","A large number of random variables (finite but tending to infinity)"), ("S","An infinite sequence of random variables"), ("U","Uncountably many random variables (continuous-time processes etc)")]
rvtype.dat
[("B","Binary"), ("D","Discrete"), ("C", "Continuous"), ("R","Real-valued, arbitrary"), ("M","Random point of a manifold"), ("G","General (random element of an abstract space)")]
headings.dat
["==Core probability: selected topics==", "'''[[Probability theory]]'''", "==Core probability: other articles, by number and type of random variables==", "==Around the core==", "==Counters of articles=="]