kblayouts/kbtrans/mac_parser.py
2023-11-09 14:16:16 +01:00

331 lines
11 KiB
Python

import base64
from typing import TypeVar, List
from xml.dom import minidom
from xml.parsers.expat import ExpatError
from xml.dom.minidom import Document, Node, Element, getDOMImplementation
from kc import KeyCode
from mackb import StateName, ActionID, MapIndex, Modifier, ModMap, \
State, KeyMap, OnPress, ActionReference, Output, EnterState, Action, \
MacKeyboardLayout
# Ukelele/mac uses ꯍ for escaping "invalid" characters, but Python puts the char directly. We search for all strings in the input, decode them and base64 encode them so we can parse them later
def encode_document(inp: str) -> str:
in_str = False
# TODO: This should be a bytes object. The input seems to be utf-16
# However, I don't think we'll be parsing anything outside 0x0-0xffff
out = ""
string_contents = ""
i = 0
inside_proper_tag = False
while i < len(inp):
ch = inp[i]
if not in_str:
if ch == "<":
inside_proper_tag = inp[i + 1] not in "?!"
if ch == '"':
in_str = True
else:
out += ch
i += 1
else:
if ch == "&" and inp[i + 1] == "#" and inp[i + 2] == "x" and inp[i+7] == ";":
hexed = inp[i+3:i+7]
string_contents += chr(int(hexed, 16))
i += 8
elif ch == '"':
if inside_proper_tag:
out += '"' + base64.b64encode(string_contents.encode()).decode() + '"'
else:
out += '"' + string_contents + '"'
string_contents = ""
in_str = False
i += 1
else:
string_contents += ch
i += 1
return out
def percent_encode(inp: str) -> str:
out = ""
for ch in inp:
if ord(ch) < 0x20 or ch in '&"<>':
out += "&#x" + hex(ord(ch))[2:].rjust(4, '0') + ";"
else:
out += ch
return out
def decode_document(inp: str) -> str:
in_str = False
out = ""
string_contents = ""
i = 0
inside_proper_tag = False
while i < len(inp):
ch = inp[i]
if not in_str:
if ch == "<":
inside_proper_tag = inp[i + 1] not in "?!"
if ch == '"':
in_str = True
else:
out += ch
i += 1
else:
if ch == '"':
if inside_proper_tag:
out += '"' + percent_encode(base64.b64decode(string_contents.encode()).decode()) + '"'
else:
out += '"' + string_contents + '"'
string_contents = ""
in_str = False
i += 1
else:
string_contents += ch
i += 1
return out
def decode_tree(d: Document) -> Document:
def recode_in_place(n: Node):
if isinstance(n, Element):
for name, value in n.attributes.items():
n.setAttribute(name, base64.b64decode(value.encode()).decode())
for child in n.childNodes:
recode_in_place(child)
for ch in d.childNodes:
recode_in_place(ch)
return d
def encode_tree(d: Document) -> Document:
def recode_in_place(n: Node):
if isinstance(n, Element):
for name, value in n.attributes.items():
n.setAttribute(name, base64.b64encode(value.encode()).decode())
for child in n.childNodes:
recode_in_place(child)
for ch in d.childNodes:
recode_in_place(ch)
return d
def load_file(path: str) -> Document:
with open(path, "r") as f:
data = f.read()
parsed = encode_document(data)
with open("/tmp/mjau-i", "w") as o:
o.write(parsed)
return decode_tree(minidom.parseString(parsed))
def save_file(path: str, x: Document):
gen = encode_tree(x).toprettyxml(indent=" ")
with open("/tmp/mjau-o", "w") as o:
o.write(gen)
with open(path, "w") as out:
out.write(decode_document(gen))
A = TypeVar("A")
def get_only(l: List[A], name: str) -> A:
if l == []:
raise ValueError(f"No {name}")
if len(l) > 1:
raise ValueError(f"Too many {name} ({len(l)})")
return l[0]
def parse_modmap(node: Node) -> ModMap:
used_keys = []
selects = {}
for select in node.childNodes:
if select.nodeName != "keyMapSelect":
continue
map_index = MapIndex(int(select.attributes["mapIndex"].value))
modifier = get_only([ch for ch in select.childNodes if ch.nodeName == "modifier"], f"modifier of {map_index}")
key_names = modifier.attributes["keys"].value.split(" ")
modifier_keys = set()
for key_name in key_names:
try:
modifier_keys.add(Modifier.from_name(key_name))
except ValueError as e:
continue
if modifier_keys in used_keys:
print(f"modifier select {map_index} already used, skipping")
continue
used_keys.append(modifier_keys)
selects[map_index] = modifier_keys
return ModMap(selects)
def parse_onpress(node: Element) -> OnPress:
if "action" in node.attributes:
return ActionReference(node.attributes["action"].value)
if "output" in node.attributes:
return Output(node.attributes["output"].value)
if "next" in node.attributes:
return EnterState(node.attributes["next"].value)
raise ValueError(f"node {node} with attributes {dict(node.attributes)} has no on-press")
def parse_keymap(node: Node) -> KeyMap:
keys = {}
for key in node.childNodes:
if key.nodeName != "key":
continue
code = KeyCode(int(key.attributes["code"].value))
on_press = parse_onpress(key)
keys[code] = on_press
return KeyMap(keys)
def parse_keyboard_layout(path: str) -> MacKeyboardLayout:
dom: Document = load_file(path)
keyboard = get_only(dom.getElementsByTagName("keyboard"), "keyboard")
name = keyboard.attributes["name"].value
states_node = get_only(dom.getElementsByTagName("terminators"), "terminators")
modmap_node = get_only(dom.getElementsByTagName("modifierMap"), "modifierMap")
keymap_nodes = dom.getElementsByTagName("keyMapSet")
ansi_keymaps = [keymap_node for keymap_node in keymap_nodes if keymap_node.attributes["id"].value == "ANSI"]
keymapset_node = get_only(ansi_keymaps, "keymapSet (ANSI)")
actions_node = get_only(dom.getElementsByTagName("actions"), "actions")
states = {}
for state_node in states_node.childNodes:
if state_node.nodeName != "when":
continue
state_name = StateName(state_node.attributes["state"].value)
terminator = state_node.attributes["output"].value
states[state_name] = State(terminator)
modmap = parse_modmap(modmap_node)
keymaps = {}
for keymap_node in keymapset_node.childNodes:
if keymap_node.nodeName != "keyMap":
continue
map_index = MapIndex(int(keymap_node.attributes["index"].value))
keymap = parse_keymap(keymap_node)
keymaps[map_index] = keymap
actions = {}
for action in actions_node.childNodes:
if action.nodeName != "action":
continue
action_id = ActionID(action.attributes["id"].value)
state_actions = {}
for on_press_node in action.childNodes:
if on_press_node.nodeName != "when":
continue
state = StateName(on_press_node.attributes["state"].value)
on_press = parse_onpress(on_press_node)
state_actions[state] = on_press
actions[action_id] = Action(state_actions)
terminators_node = get_only(dom.getElementsByTagName("terminators"), "terminators")
for terminator in terminators_node.childNodes:
if terminator.nodeName != "when":
continue
output = terminator.attributes["output"].value
state = StateName(output)
return MacKeyboardLayout(
name=name,
states=states,
modmap=modmap,
keymaps=keymaps,
actions=actions,
)
def write_on_press(doc: Document, name: str, on_press: OnPress) -> Element:
key = doc.createElement(name)
if isinstance(on_press, ActionReference):
key.setAttribute("action", on_press.ref)
elif isinstance(on_press, Output):
key.setAttribute("output", on_press.output)
elif isinstance(on_press, EnterState):
key.setAttribute("next", on_press.state_name)
else:
raise ValueError(on_press)
return key
def unparse(kb: MacKeyboardLayout) -> Document:
impl = getDOMImplementation()
assert impl is not None
maxout = 0
for ac in kb.actions.values():
for onpress in ac.state_actions.values():
if isinstance(onpress, Output):
maxout = max(maxout, len(onpress.output.encode("utf-16-le")) // 2)
for km in kb.keymaps.values():
for onpress in km.keys.values():
if isinstance(onpress, Output):
maxout = max(maxout, len(onpress.output.encode("utf-16-le")) // 2)
doc = impl.createDocument(None, "keyboard", None)
keyboard = doc.documentElement
keyboard.setAttribute("group", str(126))
keyboard.setAttribute("id", str(123456))
keyboard.setAttribute("name", kb.name)
keyboard.setAttribute("maxout", str(maxout))
keyboard.appendChild(layouts := doc.createElement("layouts"))
layouts.appendChild(layout := doc.createElement("layout"))
layout.setAttribute("first", str(0))
layout.setAttribute("last", str(207))
layout.setAttribute("mapSet", "ANSI")
layout.setAttribute("modifiers", "Modifiers")
keyboard.appendChild(modifierMap := doc.createElement("modifierMap"))
modifierMap.setAttribute("id", "Modifiers")
modifierMap.setAttribute("defaultIndex", str(0))
for idx, map in kb.modmap.selects.items():
modifierMap.appendChild(kbsel := doc.createElement("keyMapSelect"))
kbsel.setAttribute("mapIndex", str(idx))
kbsel.appendChild(modifier := doc.createElement("modifier"))
modifier.setAttribute("keys", " ".join(m.value for m in map))
keyboard.appendChild(keymapset := doc.createElement("keyMapSet"))
keymapset.setAttribute("id", "ANSI")
for idx, km in kb.keymaps.items():
keymapset.appendChild(keymap := doc.createElement("keyMap"))
keymap.setAttribute("index", str(idx))
for kc, on_press in km.keys.items():
keymap.appendChild(doc.createComment(str(kc)))
keymap.appendChild(key := write_on_press(doc, "key", on_press))
key.setAttribute("code", str(kc.kc))
keyboard.appendChild(actions := doc.createElement("actions"))
for action_id, action in kb.actions.items():
actions.appendChild(action_elem := doc.createElement("action"))
action_elem.setAttribute("id", action_id)
for state_name, on_press in action.state_actions.items():
action_elem.appendChild(when_elem := write_on_press(doc, "when", on_press))
when_elem.setAttribute("state", state_name)
keyboard.appendChild(terminators := doc.createElement("terminators"))
for state_name, state in kb.states.items():
terminators.appendChild(terminator := doc.createElement("when"))
terminator.setAttribute("state", state_name)
terminator.setAttribute("output", state.terminator)
return doc