User:Tsirel/Bot code
Appearance
###################################################################### # # # parse.py # # # # Main infile "all.in"; its lines: "[[article]] tags". # # Tags are space-separated, # # each either "(rvnumber:rvtype)" # # or a name (corename or aroundname) and maybe suffix "1" or "2". # # Parameters from four infiles. # # Outfile: all.dat (for format.py). # # # ###################################################################### # read parameters corenames = eval(open("corenames.dat").read()) aroundnames = eval(open("aroundnames.dat").read()) rvnumber = eval(open("rvnumber.dat").read()) rvtype = eval(open("rvtype.dat").read()) # only abbreviations will be used; full texts are for "format.py" corenames = map ( (lambda(x,y):x), corenames ) aroundnames = map ( (lambda(x,y):x), aroundnames ) rvnumber = map ( (lambda(x,y):x), rvnumber ) rvtype = map ( (lambda(x,y):x), rvtype ) # distinction of "core" and "around" is for "format.py" names = corenames + aroundnames # parameters are ready; the main loop follows biglist = [] # for parsed lines notags = [] # for lines without tags (if any) everything = {} # for articles (duplication check) for line in open("all.in"): # main loop: lines of the main input file line=line.strip() if line=="": continue # ignore empty lines (if any) if not line.startswith("[["): print "BAD LINE: "+line # syntax error in the main input file continue article=line[2:line.index("]]")] # extract article from "[[article]] tags" article=article[0].upper()+article[1:] # article -> Article if article in everything: # duplication? print "AGAIN "+article everything[article]=0 # the 0 is of no use rest=line[line.index("]]")+2:].split() # list of tags if rest==[]: notags.append(article); continue # no tags: report, do not process tags0=[] # for "(A:B)" tags converted to (A,B) tags1=[] # for other tags for tag in rest: # loop over all tags of the given line if tag.startswith("("): # "(A:B)" tag? maintag=tag[1:] if not maintag.endswith(")"): print "!!! "+maintag; break # syntax error maintag=maintag[:-1] maintag1=maintag[:maintag.index(":")] # A from "(A:B)" maintag2=maintag[maintag.index(":")+1:] # B from "(A:B)" for x in maintag1: # syntax check if x not in rvnumber: print "?? "+x+" "+whole_line raise "oops-1" # syntax error for x in maintag2: # syntax check if x not in rvtype: print "?? "+x+" "+whole_line raise "oops-2" # syntax error tags0.append((maintag1,maintag2)) # store (A,B) else: # other tag, not "A:B)" assert len(tag)==3 or len(tag)==4 # 3 chars, and maybe 1 char suffix if tag[0:3] not in names: print "BAD TAG: "+tag continue # syntax error if len(tag)==4: assert tag[3] in ["1","2"] # suffix syntax check tags1.append(tag) # store the other tag biglist.append( (tags0,tags1,article) ) # store the parsed line # the main loop is finished print >>open("all.dat","w"), biglist # write main out file if notags != []: # report lines with no tags (if any) print "*** notags ***" print notags ######################## the end ###################################