User:Commander Keane/wikidump2mapdata.py
Appearance
"""
wikidump2mapdata.py
A Python script to create a Wikimedia Commons Data: file for mapframe display from
a wikitext list of UK grid references (with bold headings in the list).
Markers have blank titles and descriptions with the heading and grid reference.
Input: .txt file
Output: commons_data.json
"""
import re
import OSGridConverter
output_filename = "commons_data.json"
wiki_dump_filename = "grid-refs-from-wiki.txt"
decimal_places = 6 # Number of decimal places for lat/lon
grid_ref_regex = r'^.*([A-Z]{2} [0-9]{4}).*$' # Captures format "SK 1234"
subheading_regex = r"^.*'''(.*)'''.*$" # Captures within triple quotes
subheading = 'default_subheading'
grid_refs_from_wiki = []
data = {}
def uk_grid_to_coords(grid_ref):
"""
Convert UK grid reference (e.g., "SK 0805") to decimal coordinates (lat, lon).
Args:
grid_ref: String in format "XX 0805" where XX is two letters and 0805 is 4 digits
Returns:
tuple: (latitude, longitude) in decimal degrees (WGS84)
"""
# OSGridConverter expects format like "SK08050500" (10 figures)
# Convert 4-figure "SK 0805" to 10-figure format
grid_ref = grid_ref.replace(" ", "").upper()
letters = grid_ref[:2]
easting = grid_ref[2:4]
northing = grid_ref[4:6]
# Pad to 3 figures each
full_grid_ref = f"{letters}{easting}0{northing}0"
# Convert using OSGridConverter
coords = OSGridConverter.grid2latlong(full_grid_ref)
lat = round(coords.latitude, decimal_places)
lon = round(coords.longitude, decimal_places)
return lat, lon
with open(wiki_dump_filename, 'r') as f:
for line in f:
match_subheading = re.match(subheading_regex, line.strip())
if match_subheading:
subheading = match_subheading.group(1)
data[subheading] = []
match_grid_ref = re.match(grid_ref_regex, line.strip())
if match_grid_ref:
grid_ref = match_grid_ref.group(1)
lat, lon = uk_grid_to_coords(grid_ref)
data[subheading].append([grid_ref, lat, lon])
# Create GeoJSON output for Wikimedia Commons
commons_output = []
output_start = '''{
"type": "FeatureCollection",
"features": ['''
output_end = ''' ]
}'''
commons_output.append(output_start)
for heading in data:
for item in data[heading]:
grid_ref = item[0]
lat = item[1]
lon = item[2]
color = "#555555"
item_output = f''' {{
"type": "Feature",
"properties": {{ "marker-color": "{color}", "title":"", "description":"{heading}, grid reference: {grid_ref}","marker-size": "small"}},
"geometry": {{
"type": "Point",
"coordinates": [{lon}, {lat}]
}}
}},'''
commons_output.append(item_output)
commons_output[-1] = commons_output[-1].rstrip(',') # Remove trailing comma from last item
commons_output.append(output_end)
with open(output_filename, 'w') as f:
for line in commons_output:
f.write(f"{line}\n")