Skip to content

Commit 6abe9c0

Browse files
authored
adding script to tune camera (ARISE-Initiative#64)
* adding helper script to tune camera positions with keyboard inputs
1 parent ee51df5 commit 6abe9c0

File tree

1 file changed

+265
-0
lines changed

1 file changed

+265
-0
lines changed

robosuite/scripts/tune_camera.py

Lines changed: 265 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,265 @@
1+
"""
2+
Convenience script to tune a camera view in a mujoco environment.
3+
Allows keyboard presses to move a camera around in the viewer, and
4+
then prints the final position and quaternion you should set
5+
for your camera in the mujoco XML file.
6+
"""
7+
8+
import time
9+
import argparse
10+
import glfw
11+
import xml.etree.ElementTree as ET
12+
import numpy as np
13+
14+
import robosuite
15+
import robosuite.utils.transform_utils as T
16+
17+
# some settings
18+
DELTA_POS_KEY_PRESS = 0.05 # delta camera position per key press
19+
DELTA_ROT_KEY_PRESS = 1 # delta camera angle per key press
20+
21+
def modify_xml_for_camera_movement(xml, camera_name):
22+
"""
23+
Cameras in mujoco are 'fixed', so they can't be moved by default.
24+
Although it's possible to hack position movement, rotation movement
25+
does not work. An alternative is to attach a camera to a mocap body,
26+
and move the mocap body.
27+
28+
This function modifies the camera with name @camera_name in the xml
29+
by attaching it to a mocap body that can move around freely. In this
30+
way, we can move the camera by moving the mocap body.
31+
32+
See http://www.mujoco.org/forum/index.php?threads/move-camera.2201/ for
33+
further details.
34+
"""
35+
tree = ET.fromstring(xml)
36+
wb = tree.find("worldbody")
37+
38+
# find the correct camera
39+
camera_elem = None
40+
cameras = wb.findall("camera")
41+
for camera in cameras:
42+
if camera.get("name") == camera_name:
43+
camera_elem = camera
44+
break
45+
assert(camera_elem is not None)
46+
47+
# add mocap body
48+
mocap = ET.SubElement(wb, "body")
49+
mocap.set("name", "cameramover")
50+
mocap.set("mocap", "true")
51+
mocap.set("pos", camera.get("pos"))
52+
mocap.set("quat", camera.get("quat"))
53+
new_camera = ET.SubElement(mocap, "camera")
54+
new_camera.set("mode", "fixed")
55+
new_camera.set("name", camera.get("name"))
56+
new_camera.set("pos", "0 0 0")
57+
58+
# remove old camera element
59+
wb.remove(camera_elem)
60+
61+
return ET.tostring(tree, encoding="utf8").decode("utf8")
62+
63+
def move_camera(env, direction, scale, camera_id):
64+
"""
65+
Move the camera view along a direction (in the camera frame).
66+
:param direction: a 3-dim numpy array for where to move camera in camera frame
67+
:param scale: a float for how much to move along that direction
68+
:param camera_id: which camera to modify
69+
"""
70+
71+
# current camera pose
72+
camera_pos = np.array(env.sim.data.get_mocap_pos("cameramover"))
73+
camera_rot = T.quat2mat(T.convert_quat(env.sim.data.get_mocap_quat("cameramover"), to='xyzw'))
74+
75+
# move along camera frame axis and set new position
76+
camera_pos += scale * camera_rot.dot(direction)
77+
env.sim.data.set_mocap_pos("cameramover", camera_pos)
78+
env.sim.forward()
79+
80+
def rotate_camera(env, direction, angle, camera_id):
81+
"""
82+
Rotate the camera view about a direction (in the camera frame).
83+
:param direction: a 3-dim numpy array for where to move camera in camera frame
84+
:param angle: a float for how much to rotate about that direction
85+
:param camera_id: which camera to modify
86+
"""
87+
88+
# current camera rotation
89+
camera_rot = T.quat2mat(T.convert_quat(env.sim.data.get_mocap_quat("cameramover"), to='xyzw'))
90+
91+
# rotate by angle and direction to get new camera rotation
92+
rad = np.pi * angle / 180.0
93+
R = T.rotation_matrix(rad, direction, point=None)
94+
camera_rot = camera_rot.dot(R[:3, :3])
95+
96+
# set new rotation
97+
env.sim.data.set_mocap_quat("cameramover", T.convert_quat(T.mat2quat(camera_rot), to='wxyz'))
98+
env.sim.forward()
99+
100+
101+
class KeyboardHandler:
102+
def __init__(self, env, camera_id):
103+
"""
104+
Store internal state here.
105+
"""
106+
self.env = env
107+
self.camera_id = camera_id
108+
109+
def on_press(self, window, key, scancode, action, mods):
110+
"""
111+
Key handler for key presses.
112+
"""
113+
114+
# controls for moving position
115+
if key == glfw.KEY_W:
116+
# move forward
117+
move_camera(env=self.env, direction=[0., 0., -1.], scale=DELTA_POS_KEY_PRESS, camera_id=self.camera_id)
118+
elif key == glfw.KEY_S:
119+
# move backward
120+
move_camera(env=self.env, direction=[0., 0., 1.], scale=DELTA_POS_KEY_PRESS, camera_id=self.camera_id)
121+
elif key == glfw.KEY_A:
122+
# move left
123+
move_camera(env=self.env, direction=[-1., 0., 0.], scale=DELTA_POS_KEY_PRESS, camera_id=self.camera_id)
124+
elif key == glfw.KEY_D:
125+
# move right
126+
move_camera(env=self.env, direction=[1., 0., 0.], scale=DELTA_POS_KEY_PRESS, camera_id=self.camera_id)
127+
elif key == glfw.KEY_R:
128+
# move up
129+
move_camera(env=self.env, direction=[0., 1., 0.], scale=DELTA_POS_KEY_PRESS, camera_id=self.camera_id)
130+
elif key == glfw.KEY_F:
131+
# move down
132+
move_camera(env=self.env, direction=[0., -1., 0.], scale=DELTA_POS_KEY_PRESS, camera_id=self.camera_id)
133+
134+
135+
# controls for moving rotation
136+
elif key == glfw.KEY_UP:
137+
# rotate up
138+
rotate_camera(env=self.env, direction=[1., 0., 0.], angle=DELTA_ROT_KEY_PRESS, camera_id=self.camera_id)
139+
elif key == glfw.KEY_DOWN:
140+
# rotate down
141+
rotate_camera(env=self.env, direction=[-1., 0., 0.], angle=DELTA_ROT_KEY_PRESS, camera_id=self.camera_id)
142+
elif key == glfw.KEY_LEFT:
143+
# rotate left
144+
rotate_camera(env=self.env, direction=[0., 1., 0.], angle=DELTA_ROT_KEY_PRESS, camera_id=self.camera_id)
145+
elif key == glfw.KEY_RIGHT:
146+
# rotate right
147+
rotate_camera(env=self.env, direction=[0., -1., 0.], angle=DELTA_ROT_KEY_PRESS, camera_id=self.camera_id)
148+
elif key == glfw.KEY_PERIOD:
149+
# rotate counterclockwise
150+
rotate_camera(env=self.env, direction=[0., 0., 1.], angle=DELTA_ROT_KEY_PRESS, camera_id=self.camera_id)
151+
elif key == glfw.KEY_SLASH:
152+
# rotate clockwise
153+
rotate_camera(env=self.env, direction=[0., 0., -1.], angle=DELTA_ROT_KEY_PRESS, camera_id=self.camera_id)
154+
155+
156+
def on_release(self, window, key, scancode, action, mods):
157+
"""
158+
Key handler for key releases.
159+
"""
160+
pass
161+
162+
def print_command(char, info):
163+
char += " " * (10 - len(char))
164+
print("{}\t{}".format(char, info))
165+
166+
if __name__ == "__main__":
167+
parser = argparse.ArgumentParser()
168+
parser.add_argument(
169+
"--env",
170+
type=str,
171+
default="SawyerLift",
172+
)
173+
args = parser.parse_args()
174+
175+
print("\nWelcome to the camera tuning script! You will be able to tune a camera view")
176+
print("by moving it around using your keyboard. The controls are printed below.")
177+
178+
print("")
179+
print_command("Keys", "Command")
180+
print_command("w-s", "zoom the camera in/out")
181+
print_command("a-d", "pan the camera left/right")
182+
print_command("r-f", "pan the camera up/down")
183+
print_command("arrow keys", "rotate the camera to change view direction")
184+
print_command(".-/", "rotate the camera view without changing view direction")
185+
print("")
186+
187+
# read camera XML tag from user input
188+
inp = input("\nPlease paste a camera xml tag below (e.g. <camera ... />) \nOR leave blank for an example:\n")
189+
190+
if len(inp) == 0:
191+
if args.env != "SawyerLift":
192+
raise Exception("ERROR: env must be SawyerLift to run default example.")
193+
print("\nUsing an example tag corresponding to the frontview camera.")
194+
print("This xml tag was copied from robosuite/models/assets/arenas/table_arena.xml")
195+
inp = '<camera mode="fixed" name="frontview" pos="1.6 0 1.45" quat="0.56 0.43 0.43 0.56"/>'
196+
197+
print("NOTE: using the following xml tag:\n")
198+
print("{}\n".format(inp))
199+
200+
# remember the tag and infer some properties
201+
cam_tree = ET.fromstring(inp)
202+
CAMERA_NAME = cam_tree.get("name")
203+
initial_file_camera_pos = np.array(cam_tree.get("pos").split(" ")).astype(float)
204+
initial_file_camera_quat = T.convert_quat(np.array(cam_tree.get("quat").split(" ")).astype(float), to='xyzw')
205+
initial_file_camera_pose = T.make_pose(initial_file_camera_pos, T.quat2mat(initial_file_camera_quat))
206+
207+
# make the environment
208+
env = robosuite.make(
209+
args.env,
210+
has_renderer=True,
211+
ignore_done=True,
212+
use_camera_obs=False,
213+
control_freq=100,
214+
)
215+
env.reset()
216+
initial_mjstate = env.sim.get_state().flatten()
217+
xml = env.model.get_xml()
218+
219+
# add mocap body to camera to be able to move it around
220+
xml = modify_xml_for_camera_movement(xml, camera_name=CAMERA_NAME)
221+
env.reset_from_xml_string(xml)
222+
env.sim.reset()
223+
env.sim.set_state_from_flattened(initial_mjstate)
224+
env.sim.forward()
225+
226+
camera_id = env.sim.model.camera_name2id(CAMERA_NAME)
227+
env.viewer.set_camera(camera_id=camera_id)
228+
229+
# remember difference between camera pose in initial tag
230+
# and absolute camera pose in world
231+
initial_world_camera_pos = np.array(env.sim.data.get_mocap_pos("cameramover"))
232+
initial_world_camera_quat = T.convert_quat(env.sim.data.get_mocap_quat("cameramover"), to='xyzw')
233+
initial_world_camera_pose = T.make_pose(initial_world_camera_pos, T.quat2mat(initial_world_camera_quat))
234+
world_in_file = initial_file_camera_pose.dot(T.pose_inv(initial_world_camera_pose))
235+
236+
# register callbacks to handle key presses in the viewer
237+
key_handler = KeyboardHandler(env=env, camera_id=camera_id)
238+
env.viewer.add_keypress_callback("any", key_handler.on_press)
239+
env.viewer.add_keyup_callback("any", key_handler.on_release)
240+
env.viewer.add_keyrepeat_callback("any", key_handler.on_press)
241+
242+
# just spin to let user interact with glfw window
243+
spin_count = 0
244+
while True:
245+
action = np.zeros(env.dof)
246+
obs, reward, done, _ = env.step(action)
247+
env.render()
248+
spin_count += 1
249+
if spin_count % 500 == 0:
250+
# convert from world coordinates to file coordinates (xml subtree)
251+
camera_pos = env.sim.data.get_mocap_pos("cameramover")
252+
camera_quat = T.convert_quat(env.sim.data.get_mocap_quat("cameramover"), to='xyzw')
253+
world_camera_pose = T.make_pose(camera_pos, T.quat2mat(camera_quat))
254+
file_camera_pose = world_in_file.dot(world_camera_pose)
255+
camera_pos, camera_quat = T.mat2pose(file_camera_pose)
256+
camera_quat = T.convert_quat(camera_quat, to='wxyz')
257+
258+
print("\n\ncurrent camera tag you should copy")
259+
cam_tree.set("pos", "{} {} {}".format(camera_pos[0], camera_pos[1], camera_pos[2]))
260+
cam_tree.set("quat", "{} {} {} {}".format(camera_quat[0], camera_quat[1], camera_quat[2], camera_quat[3]))
261+
print(ET.tostring(cam_tree, encoding="utf8").decode("utf8"))
262+
263+
264+
265+

0 commit comments

Comments
 (0)