-
Notifications
You must be signed in to change notification settings - Fork 28
/
main.py
executable file
·2785 lines (2457 loc) · 178 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/env python
import time
import pickle
import bz2
import os
import signal
import sys
import random
import threading
import argparse
import torch
from torch.autograd import Variable
import numpy as np
import scipy as sc
import cv2
from collections import namedtuple
from robot import Robot
from trainer import Trainer
from logger import Logger
import utils
from utils import ACTION_TO_ID
from utils import ID_TO_ACTION
from utils import StackSequence
from utils import compute_demo_dist, compute_cc_dist
from utils import annotate_success_manually
from utils_torch import action_space_argmax
from utils_torch import action_space_explore_random
from demo import Demonstration, load_all_demos
import plot
import json
import copy
import shutil
import matplotlib
import matplotlib.pyplot as plt
from data import DatasetReader, GoodRobotDatasetReader
from generate_logoblocks_images import BlockSetter
from annotate_data import Pair
def run_title(args):
"""
# Returns
title, dirname
"""
title = ''
title += 'Sim ' if args.is_sim else 'Real '
if args.task_type is not None:
if args.task_type == 'vertical_square':
title += 'Vertical Square, '
elif args.task_type == 'unstack':
title += 'Unstacking, '
elif args.task_type == 'stack':
title += 'Stack, '
elif args.task_type == 'row':
title += 'Row, '
elif args.check_row:
title += 'Rows, '
elif args.place:
title += 'Stack, '
elif not args.place and not args.check_row:
title += 'Push and Grasp, '
if args.use_demo:
title += 'Imitation, '
elif args.trial_reward:
title += 'SPOT Trial Reward, '
elif args.discounted_reward:
title += 'Discounted Reward, '
else:
title += 'Two Step Reward, '
if args.common_sense:
title += 'Masked, '
if not args.test_preset_cases:
title += 'Testing' if args.is_testing else 'Training'
else:
title += 'Challenging Arrangements'
if args.depth_channels_history:
title += ', Three Step History'
save_file = os.path.basename(title).replace(':', '-').replace('.', '-').replace(',','').replace(' ','-')
dirname = utils.timeStamped(save_file)
return title, dirname
def main(args):
# TODO(ahundt) move main and process_actions() to a class?
num_problems_detected = 0
# --------------- Setup options ---------------
is_sim = args.is_sim # Run in simulation?
obj_mesh_dir = os.path.abspath(args.obj_mesh_dir) if is_sim else None # Directory containing 3D mesh files (.obj) of objects to be added to simulation
num_obj = args.num_obj if is_sim or args.check_row or args.use_demo or args.static_language_mask else None # Number of objects to add to simulation
num_extra_obj = args.num_extra_obj if is_sim or args.check_row or args.use_demo or args.static_language_mask else None
goal_num_obj = args.goal_num_obj
timeout = args.timeout # time to wait before simulator reset
if num_obj is not None:
num_obj += num_extra_obj
if args.check_row:
print('Overriding --num_obj to 4 because we have --check_row and will expect 4 blocks in a row.')
num_obj = 4
tcp_host_ip = args.tcp_host_ip if not is_sim else None # IP and port to robot arm as TCP client (UR5)
tcp_port = args.tcp_port # TODO(killeen) change the rest of these?
rtc_host_ip = args.rtc_host_ip if not is_sim else None # IP and port to robot arm as real-time client (UR5)
rtc_port = args.rtc_port if not is_sim else None
if is_sim:
workspace_limits = np.asarray([[-0.724, -0.276], [-0.224, 0.224], [-0.0001, 0.5]]) # Cols: min max, Rows: x y z (define workspace limits in robot coordinates)
sim_workspace_limits = workspace_limits
else:
# Corner near window on robot base side
# [0.47984089 0.34192974 0.02173636]
# Corner on the side of the cameras and far from the window
# [ 0.73409861 -0.45199446 -0.00229499]
# Dimensions of workspace should be 448 mm x 448 mm. That's 224x224 pixels with each pixel being 2mm x2mm.
workspace_limits = np.asarray([[0.376, 0.824], [-0.264, 0.184], [-0.07, 0.4]]) # Cols: min max, Rows: x y z (define workspace limits in robot coordinates)
if args.place:
# The object sets differ for stacking, so add a bit to min z.
# TODO(ahundt) this keeps the real gripper from colliding with the block and causing a security stop when it misses a grasp on top of blocks. However, it makes the stacks appear shorter than they really are too, so this needs to be fixed in a more nuanced way.
workspace_limits[2][0] += 0.02
if args.use_demo:
# define sim_workspace limits if use_demo is set
sim_workspace_limits = np.asarray([[-0.724, -0.276], [-0.224, 0.224], [-0.0001, 0.5]]) # Cols: min max, Rows: x y z (define workspace limits in robot coordinates)
# Original visual pushing graping paper workspace definition
# workspace_limits = np.asarray([[0.3, 0.748], [-0.224, 0.224], [-0.255, -0.1]]) # Cols: min max, Rows: x y z (define workspace limits in robot coordinates)
heightmap_resolution = args.heightmap_resolution # Meters per pixel of heightmap
random_seed = args.random_seed
force_cpu = args.force_cpu
flops = args.flops
show_heightmap = args.show_heightmap
max_train_actions = args.max_train_actions
human_reset = args.human_reset
# ------------- Algorithm options -------------
method = args.method # 'reactive' (supervised learning) or 'reinforcement' (reinforcement learning ie Q-learning)
push_rewards = args.push_rewards if method == 'reinforcement' else None # Use immediate rewards (from change detection) for pushing?
future_reward_discount = args.future_reward_discount
experience_replay_enabled = args.experience_replay # Use prioritized experience replay?
trial_reward = args.trial_reward
discounted_reward = args.discounted_reward
heuristic_bootstrap = args.heuristic_bootstrap # Use handcrafted grasping algorithm when grasping fails too many times in a row?
explore_rate_decay = args.explore_rate_decay
grasp_only = args.grasp_only
check_row = args.check_row
check_z_height = args.check_z_height
check_z_height_goal = args.check_z_height_goal
check_z_height_max = args.check_z_height_max
pretrained = not args.random_weights
max_iter = args.max_iter
no_height_reward = args.no_height_reward
transfer_grasp_to_place = args.transfer_grasp_to_place
neural_network_name = args.nn
num_dilation = args.num_dilation
disable_situation_removal = args.disable_situation_removal
evaluate_random_objects = args.evaluate_random_objects
skip_noncontact_actions = args.skip_noncontact_actions
common_sense = args.common_sense
place_common_sense = common_sense and ((args.task_type is None) or ('unstack' not in args.task_type))
print('main.py using common sense:', common_sense, 'using place common sense:', place_common_sense)
common_sense_backprop = not args.no_common_sense_backprop
disable_two_step_backprop = args.disable_two_step_backprop
random_trunk_weights_max = args.random_trunk_weights_max
random_trunk_weights_reset_iters = args.random_trunk_weights_reset_iters
random_trunk_weights_min_success = args.random_trunk_weights_min_success
random_actions = args.random_actions
# TODO(zhe) Added static language mask option
static_language_mask = args.static_language_mask
baseline_language_mask = args.baseline_language_mask
is_bisk = args.is_bisk
human_annotation = args.human_annotation
randomized = static_language_mask or not args.place # If we are using the language mask, we are using the logoblock_dataset. randomized refers to the object meshes
obj_scale = 0.00018 if args.is_bisk else 1 # Hard coded value based on logoblock mesh size.
language_model_config = args.language_model_config
language_model_weights = args.language_model_weights
end_on_incorrect_order = args.end_on_incorrect_order
separation_threshold = args.separation_threshold
distance_threshold = args.distance_threshold
# -------------- Demo options -----------------------
use_demo = args.use_demo
demo_path = args.demo_path
task_type = args.task_type
primitive_distance_method = args.primitive_distance_method
cycle_consistency = args.cycle_consistency
depth_channels_history = args.depth_channels_history
# load example demos, load embeddings if they exist
if use_demo:
example_demos = load_all_demos(demo_path=args.demo_path, check_z_height=check_z_height,
task_type=args.task_type)
if cycle_consistency:
pickle_filename = os.path.join(demo_path, 'embeddings', 'embed_dict.pickle')
else:
pickle_filename = os.path.join(demo_path, 'embeddings', 'embed_dict_single.pickle')
if os.path.exists(pickle_filename):
with open(pickle_filename, 'rb') as data:
example_actions_dict = pickle.load(data)
print("main.py: loaded example_actions_dict")
else:
example_actions_dict = None
# NOTE(adit98) HACK, make sure we set task_type to 'unstack' and not 'unstacking'
if task_type is not None and 'unstack' in args.task_type:
args.task_type = 'unstack'
task_type = 'unstack'
# -------------- Test grasping options --------------
is_testing = args.is_testing
if is_testing:
print('Testing mode detected, automatically disabling situation removal.')
disable_situation_removal = True
max_test_trials = args.max_test_trials # Maximum number of test runs per case/scenario
# set max trial actions
if is_testing:
max_trial_actions = args.max_trial_actions_test
else:
max_trial_actions = args.max_trial_actions_train
test_preset_cases = args.test_preset_cases
trials_per_case = 1
show_preset_cases_then_exit = args.show_preset_cases_then_exit
if show_preset_cases_then_exit:
test_preset_cases = True
if test_preset_cases:
if args.test_preset_file:
# load just one specific file
preset_files = [os.path.abspath(args.test_preset_file)]
else:
# load a directory of files
preset_files = os.listdir(args.test_preset_dir)
preset_files = [os.path.abspath(os.path.join(args.test_preset_dir, filename)) for filename in preset_files]
preset_files = sorted(preset_files)
trials_per_case = max_test_trials
# run each preset file max_test_trials times.
max_test_trials *= len(preset_files)
test_preset_file = preset_files[0]
else:
preset_files = None
test_preset_file = None
unstack = args.unstack
if args.place and not args.is_sim:
unstack = True
args.unstack = True
print('--unstack is automatically enabled')
if args.task_type is not None and args.task_type != 'stack':
# don't do auto unstacking
save_history = False
else:
# do auto unstacking
save_history = True
else:
save_history = False
# ------ Pre-loading and logging options ------
snapshot_file, multi_task_snapshot_files, continue_logging, logging_directory = parse_resume_and_snapshot_file_args(args)
save_visualizations = args.save_visualizations # Save visualizations of FCN predictions? Takes 0.6s per training step if set to True
plot_window = args.plot_window
# ------ Stacking Blocks and Grasping Specific Colors -----
grasp_color_task = args.grasp_color_task
place = args.place
if grasp_color_task:
if not is_sim:
raise NotImplementedError('Real execution goal conditioning is not yet implemented')
goal_condition_len = num_obj
else:
goal_condition_len = 0
# Set random seed
np.random.seed(random_seed)
# Initialize pick-and-place system (camera and robot)
# TODO(zhe) modify the None here to ensure that the test_preset_arr option is set correctly
robot = Robot(is_sim, obj_mesh_dir, num_obj, workspace_limits,
tcp_host_ip, tcp_port, rtc_host_ip, rtc_port,
is_testing, test_preset_cases, test_preset_file, None,
place, grasp_color_task, unstack=unstack,
heightmap_resolution=heightmap_resolution, randomized=randomized, obj_scale=obj_scale, task_type=task_type,
language=static_language_mask, human_reset=human_reset)
# Set the "common sense" dynamic action space region around objects,
# which defines where place actions are permitted. Units are in meters.
if check_row:
place_dilation = 0.05
elif task_type == 'stack':
place_dilation = 0.00
elif task_type is not None:
place_dilation = 0.05
else:
place_dilation = 0.00
# Initialize trainer(s)
if use_demo:
assert task_type is not None, ("Must provide task_type if using demo")
assert is_testing, ("Must run in testing mode if using demo")
trainer = None
stack_trainer, row_trainer, unstack_trainer, vertical_square_trainer = None, None, None, None
# store list of trainers
trainers = []
if 'row' in multi_task_snapshot_files:
row_trainer = Trainer(method, push_rewards, future_reward_discount,
is_testing, multi_task_snapshot_files['row'], force_cpu,
goal_condition_len, place, pretrained, flops,
network=neural_network_name, common_sense=common_sense,
place_common_sense=place_common_sense, show_heightmap=show_heightmap,
place_dilation=place_dilation, common_sense_backprop=common_sense_backprop,
trial_reward='discounted' if discounted_reward else 'spot',
num_dilation=num_dilation)
# add row trainer to list of trainers
trainers.append(row_trainer)
# set trainer if not already set
if trainer is None:
trainer = row_trainer
if 'stack' in multi_task_snapshot_files:
stack_trainer = Trainer(method, push_rewards, future_reward_discount,
is_testing, multi_task_snapshot_files['stack'], force_cpu,
goal_condition_len, place, pretrained, flops,
network=neural_network_name, common_sense=common_sense,
place_common_sense=place_common_sense, show_heightmap=show_heightmap,
place_dilation=place_dilation, common_sense_backprop=common_sense_backprop,
trial_reward='discounted' if discounted_reward else 'spot',
num_dilation=num_dilation)
# add stack trainer to list of trainers
trainers.append(stack_trainer)
# set trainer if not already set
if trainer is None:
trainer = stack_trainer
if 'unstack' in multi_task_snapshot_files:
unstack_trainer = Trainer(method, push_rewards, future_reward_discount,
is_testing, multi_task_snapshot_files['unstack'], force_cpu,
goal_condition_len, place, pretrained, flops,
network=neural_network_name, common_sense=common_sense,
place_common_sense=place_common_sense, show_heightmap=show_heightmap,
place_dilation=place_dilation, common_sense_backprop=common_sense_backprop,
trial_reward='discounted' if discounted_reward else 'spot',
num_dilation=num_dilation)
# add unstack trainer to list of trainers
trainers.append(unstack_trainer)
# set trainer if not already set
if trainer is None:
trainer = unstack_trainer
if 'vertical_square' in multi_task_snapshot_files:
vertical_square_trainer = Trainer(method, push_rewards, future_reward_discount,
is_testing, multi_task_snapshot_files['vertical_square'], force_cpu,
goal_condition_len, place, pretrained, flops,
network=neural_network_name, common_sense=common_sense,
place_common_sense=place_common_sense, show_heightmap=show_heightmap,
place_dilation=place_dilation, common_sense_backprop=common_sense_backprop,
trial_reward='discounted' if discounted_reward else 'spot',
num_dilation=num_dilation)
# add vertical_square trainer to list of trainers
trainers.append(vertical_square_trainer)
# set trainer if not already set
if trainer is None:
trainer = vertical_square_trainer
else:
trainer = Trainer(method, push_rewards, future_reward_discount,
is_testing, snapshot_file, force_cpu,
goal_condition_len, place, pretrained, flops,
network=neural_network_name, common_sense=common_sense,
place_common_sense=place_common_sense, show_heightmap=show_heightmap,
place_dilation=place_dilation, common_sense_backprop=common_sense_backprop,
trial_reward='discounted' if discounted_reward else 'spot',
num_dilation=num_dilation, static_language_mask=static_language_mask, check_row = check_row, baseline_language_mask = baseline_language_mask)
if transfer_grasp_to_place:
# Transfer pretrained grasp weights to the place action.
trainer.model.transfer_grasp_to_place()
# Initialize data logger
title, dir_name = run_title(args)
logger = Logger(continue_logging, logging_directory, args=args, dir_name=dir_name)
logger.save_camera_info(robot.cam_intrinsics, robot.cam_pose, robot.cam_depth_scale) # Save camera intrinsics and pose
logger.save_heightmap_info(workspace_limits, heightmap_resolution) # Save heightmap parameters
# Quick hack for nonlocal memory between threads in Python 2
# Most of these variables are saved to a json file during a run, and reloaded during resume.
nonlocal_variables = {'executing_action': False,
'primitive_action': None,
'best_pix_ind': None,
'push_success': False,
'grasp_success': False,
'color_success': False,
'place_success': False,
'partial_stack_success': False,
'stack_height': 1,
'stack_rate': np.inf,
'trial_success_rate': np.inf,
'replay_iteration': 0,
'trial_complete': False,
'finalize_prev_trial_log': False,
'prev_stack_height': 1,
'save_state_this_iteration': False,
'example_actions_dict': None,
'language_metadata': {},
'best_trainer_log': []}
# load example_actions_dict if it exists
if use_demo:
nonlocal_variables['example_actions_dict'] = example_actions_dict
# Ignore these nonlocal_variables when saving/loading and resuming a run.
# They will always be initialized to their default values
always_default_nonlocals = ['executing_action',
'primitive_action',
'save_state_this_iteration']
# These variables handle pause and exit state. Also a quick hack for nonlocal memory.
nonlocal_pause = {'pause': 0,
'pause_time_start': time.time(),
# setup KeyboardInterrupt signal handler for pausing
'original_sigint': signal.getsignal(signal.SIGINT),
'exit_called': False,
'process_actions_exit_called': False}
# Find last executed iteration of pre-loaded log, and load execution info and RL variables
if continue_logging:
trainer.preload(logger.transitions_directory)
# when resuming, load nonlocal_variables from previous point the the log was finalized in the run
nonlocal_vars_filename = os.path.join(logger.base_directory, 'data', 'variables', 'nonlocal_vars_%d.json' % (trainer.iteration))
if os.path.exists(nonlocal_vars_filename):
with open(nonlocal_vars_filename, 'r') as f:
nonlocals_to_load = json.load(f)
# copy loaded values to nonlocals
for k, v in nonlocals_to_load.items():
if k not in always_default_nonlocals:
if k in nonlocal_variables: # ignore any entries in the saved data which aren't in nonlocal_variables
nonlocal_variables[k] = v
else:
print('WARNING: Missing /data/variables/nonlocal_vars_%d.json on resume. Default values initialized. Inconsistencies' % (trainer.iteration))
num_trials = trainer.end_trial()
# trainer.iteration += 1 # Begin next trial after loading
else:
num_trials = 0
# Initialize variables for heuristic bootstrapping and exploration probability
no_change_count = [2, 2] if not is_testing else [0, 0]
explore_prob = 0.5 if not is_testing else 0.0
if check_z_height:
nonlocal_variables['stack_height'] = 0.0
nonlocal_variables['prev_stack_height'] = 0.0
best_stack_rate = np.inf
prev_grasp_success = False
if check_z_height:
if place:
is_goal_conditioned = True
else:
is_goal_conditioned = False
else:
is_goal_conditioned = grasp_color_task or place
# Choose the first color block to grasp, or None if not running in goal conditioned mode
# color_names = ["red","blue","green","yellow", "brown", "orange", "gray", "purple", "cyan", "pink"]
if is_sim:
color_names = robot.object_colors
else:
# TODO(ahundt) allow command line setting of block colors for real experiments
color_names = ['red', 'blue', 'green','yellow']
print('Real robot mode, setting color names as follows, edit color_names in the code in main.py and robot.py if this is not correct: ' + str(color_names))
print('num_obj: ' + str(num_obj) + ' num_extra_obj: ' + str(num_extra_obj) + ' goal_num_obj: ' + str(goal_num_obj))
if num_obj is not None:
nonlocal_variables['stack'] = StackSequence(num_obj - num_extra_obj, goal_num_obj=goal_num_obj, is_goal_conditioned_task=is_goal_conditioned, trial=num_trials, total_steps=trainer.iteration, color_names=color_names)
else:
nonlocal_variables['stack'] = StackSequence(20, is_goal_conditioned, trial=num_trials, total_steps=trainer.iteration, color_names=color_names)
num_trials = 0
if continue_logging:
num_trials = int(max(trainer.trial_log)[0])
nonlocal_variables['stack'].trial = num_trials + 1
if place:
# If we are stacking we actually skip to the second block which needs to go on the first
nonlocal_variables['stack'].next()
trainer_iteration_of_most_recent_model_reload = 0
def pause(signum, frame):
"""This function is designated as the KeyboardInterrupt handler.
It blocks execution in the main thread
and pauses the process action thread. Execution will resume when this function returns,
or will stop if ctrl-c is pressed 5 more times
"""
# TODO(ahundt) come up with a cleaner pause resume API, maybe use an OpenCV interface.
ctrl_c_stop_threshold = 3
ctrl_c_kill_threshold = 5
try:
# restore the original signal handler as otherwise evil things will happen
# in input when CTRL+C is pressed, and our signal handler is not re-entrant
signal.signal(signal.SIGINT, nonlocal_pause['original_sigint'])
time_since_last_ctrl_c = time.time() - nonlocal_pause['pause_time_start']
if time_since_last_ctrl_c > 5:
nonlocal_pause['pause'] = 0
nonlocal_pause['pause_time_start'] = time.time()
print('More than 5 seconds since last ctrl+c, Unpausing. '
'Press again within 5 seconds to pause.'
' Ctrl+C Count: ' + str(nonlocal_pause['pause']))
else:
nonlocal_pause['pause'] += 1
print('\n\nPaused, press ctrl-c 3 total times in less than 5 seconds '
'to stop the run cleanly, 5 to do a hard stop. '
'Pressing Ctrl + C after 5 seconds will resume.'
'Remember, you can always press Ctrl+\\ to hard kill the program at any time.'
' Ctrl+C Count: ' + str(nonlocal_pause['pause']))
if nonlocal_pause['pause'] >= ctrl_c_stop_threshold:
print('Starting a clean exit, wait a few seconds for the robot and code to finish.')
nonlocal_pause['exit_called'] = True
# we need to unpause to complete the exit
nonlocal_pause['pause'] = 0
elif nonlocal_pause['pause'] >= ctrl_c_kill_threshold:
print('Triggering a Hard exit now.')
sys.exit(1)
except KeyboardInterrupt:
nonlocal_pause['pause'] += 1
# restore the pause handler here
signal.signal(signal.SIGINT, pause)
# Set up the pause signal
signal.signal(signal.SIGINT, pause)
def set_nonlocal_success_variables_false():
nonlocal_variables['push_success'] = False
nonlocal_variables['grasp_success'] = False
nonlocal_variables['place_success'] = False
nonlocal_variables['grasp_color_success'] = False
nonlocal_variables['place_color_success'] = False
nonlocal_variables['partial_stack_success'] = False
def check_stack_update_goal(place_check=False, top_idx=-1, depth_img=None, use_imitation=False, task_type=None, check_z_height=False):
""" Check nonlocal_variables for a good stack and reset if it does not match the current goal.
# Params
place_check: If place check is True we should match the current stack goal,
all other actions should match the stack check excluding the top goal block,
which will not have been placed yet.
top_idx: The index of blocks sorted from high to low which is expected to contain the top stack block.
-1 will be the highest object in the scene, -2 will be the second highest in the scene, etc.
use_imitation: If use_imitation is True, we are doing an imitation task
task_type: Needs to be set if use_imitation is set (options are 'vertical_square', 'unstack')
# Returns
needed_to_reset boolean which is True if a reset was needed and False otherwise.
"""
current_stack_goal = nonlocal_variables['stack'].current_sequence_progress()
if static_language_mask or grasp_color_task:
print(f'CURRENT ACTION GOAL: {current_stack_goal}, associated colors: ' + str(np.array(robot.color_names)[np.array(current_stack_goal).astype(int)]) + ' FINAL GOAL: ' + str(nonlocal_variables['stack'].color_idx_sequence_to_string_list()))
# no need to reset by default
needed_to_reset = False
toppled = None
if place_check:
# Only reset while placing if the stack decreases in height!
stack_shift = 1
elif current_stack_goal is not None:
# only the place check expects the current goal to be met
current_stack_goal = current_stack_goal[:-1]
stack_shift = 0
if check_z_height:
max_workspace_height = ' (see max_workspace_height printout above) '
# TODO(ahundt) BUG Figure out why a real stack of size 2 or 3 and a push which touches no blocks does not pass the stack_check and ends up a MISMATCH in need of reset. (update: may now be fixed, double check then delete when confirmed)
if task_type is not None:
# based on task type, call partial success function from robot, 'stack_height' represents task progress in these cases
if human_annotation:
stack_matches_goal, nonlocal_variables['stack_height'], needed_to_reset = robot.manual_progress_check(nonlocal_variables['prev_stack_height'], task_type)
print('human annotation manual_progress_check results: stack_matches_goal: ' + str(stack_matches_goal) + ' stack_height: ' + str(nonlocal_variables['stack_height']) + ' needed_to_reset: ' + str(needed_to_reset) +
' input to robot.manual_progress_check: prev_stack_height: ' + str(nonlocal_variables['prev_stack_height']) + ' task_type: ' + str(task_type))
elif task_type == 'vertical_square':
if check_z_height:
stack_matches_goal, nonlocal_variables['stack_height'], needed_to_reset = robot.manual_progress_check(nonlocal_variables['prev_stack_height'], task_type)
else:
# NOTE(adit98) explicitly set a lower distance threshold for vertical square
stack_matches_goal, nonlocal_variables['stack_height'] = \
robot.vertical_square_partial_success(current_stack_goal,
check_z_height=check_z_height, stack_dist_thresh=0.04)
elif task_type == 'unstack':
# structure size (stack_height) is 1 + # of blocks removed from stack (1, 2, 3, 4)
stack_matches_goal, nonlocal_variables['stack_height'] = \
robot.unstacking_partial_success(nonlocal_variables['prev_stack_height'],
check_z_height=check_z_height, depth_img=depth_img)
elif task_type == 'stack':
if check_z_height:
# decrease_threshold = None # None means decrease_threshold will be disabled
stack_matches_goal, nonlocal_variables['stack_height'], needed_to_reset = robot.check_z_height(depth_img, nonlocal_variables['prev_stack_height'])
# TODO(ahundt) add a separate case for incremental height where continuous heights are converted back to height where 1.0 is the height of a block.
# stack_matches_goal, nonlocal_variables['stack_height'] = robot.check_incremental_height(input_img, current_stack_goal)
else:
# TODO(adit98) make sure we have path for real robot here
stack_matches_goal, nonlocal_variables['stack_height'] = \
robot.check_stack(current_stack_goal, top_idx=top_idx)
elif task_type == 'row':
if not check_z_height:
stack_matches_goal, nonlocal_variables['stack_height'] = robot.check_row(current_stack_goal,
check_z_height=check_z_height, valid_depth_heightmap=depth_img,
num_obj=num_obj, separation_threshold=separation_threshold, distance_threshold=distance_threshold,
prev_z_height=nonlocal_variables['prev_stack_height'])
# Note that for rows, a single action can make a row (horizontal stack) go from size 1 to a much larger number like 4.
stack_matches_goal = nonlocal_variables['stack_height'] >= len(current_stack_goal)
else:
stack_matches_goal, nonlocal_variables['stack_height'], needed_to_reset = robot.manual_progress_check(nonlocal_variables['prev_stack_height'], task_type)
else:
# TODO(adit98) trigger graceful exit here
raise NotImplementedError(task_type)
elif check_row:
stack_matches_goal, nonlocal_variables['stack_height'] = robot.check_row(current_stack_goal,
num_obj=num_obj,distance_threshold=distance_threshold, separation_threshold=separation_threshold, check_z_height=check_z_height, valid_depth_heightmap=valid_depth_heightmap[:, :, 0],
prev_z_height=nonlocal_variables['prev_stack_height'])
# Note that for rows, a single action can make a row (horizontal stack) go from size 1 to a much larger number like 4.
if not check_z_height and not static_language_mask:
stack_matches_goal = nonlocal_variables['stack_height'] >= len(current_stack_goal)
elif check_z_height:
# decrease_threshold = None # None means decrease_threshold will be disabled
stack_matches_goal, nonlocal_variables['stack_height'], needed_to_reset = robot.check_z_height(depth_img, nonlocal_variables['prev_stack_height'])
# TODO(ahundt) add a separate case for incremental height where continuous heights are converted back to height where 1.0 is the height of a block.
# stack_matches_goal, nonlocal_variables['stack_height'] = robot.check_incremental_height(input_img, current_stack_goal)
else:
if static_language_mask:
# current_stack_goal = nonlocal_variables['stack'].object_color_sequence[0:4]
stack_matches_goal, nonlocal_variables['stack_height'], pred_stack_goal = robot.check_stack(current_stack_goal, top_idx=top_idx, goal_num_obj=goal_num_obj, return_inds=True)
else:
stack_matches_goal, nonlocal_variables['stack_height'] = robot.check_stack(current_stack_goal, top_idx=top_idx, goal_num_obj=goal_num_obj)
nonlocal_variables['partial_stack_success'] = stack_matches_goal
if not check_z_height:
if nonlocal_variables['stack_height'] == 1:
# A stack of size 1 does not meet the criteria for a partial stack success
nonlocal_variables['partial_stack_success'] = False
nonlocal_variables['stack_success'] = False
max_workspace_height = len(current_stack_goal) - stack_shift
# Has that stack gotten shorter than it was before? If so we need to reset
needed_to_reset = nonlocal_variables['stack_height'] < max_workspace_height or nonlocal_variables['stack_height'] < nonlocal_variables['prev_stack_height']
if task_type is not None and task_type == 'unstack':
# also reset if we toppled while unstacking
if nonlocal_variables['primitive_action'] == 'place':
# can't progress unstacking with place action, so this must have been a topple
toppled = nonlocal_variables['stack_height'] > nonlocal_variables['prev_stack_height']
elif nonlocal_variables['primitive_action'] == 'grasp' and not nonlocal_variables['grasp_success']:
# can't progress legally if we have failed grasp
toppled = nonlocal_variables['stack_height'] > nonlocal_variables['prev_stack_height']
else:
# caused decrease of more than 1 block during push/grasp
toppled = nonlocal_variables['stack_height'] > (nonlocal_variables['prev_stack_height'] + 1)
insufficient_objs_in_scene = False
# add check for num_obj in scene
if is_sim and task_type in ['row', 'vertical_square', 'unstack']:
if task_type == 'unstack' and is_testing:
pass
else:
buffer_meters = 0.0
if task_type == 'row' and is_testing:
buffer_meters = 0.025
objs = robot.get_objects_in_scene(buffer_meters=buffer_meters)
if len(objs) < nonlocal_variables['stack'].num_obj:
needed_to_reset = True
insufficient_objs_in_scene = True
if human_annotation and static_language_mask:
if trainer.iteration == 0:
success_code, comment = annotate_success_manually(" ".join(command), None, color_heightmap)
else:
success_code, comment = annotate_success_manually(" ".join(command), prev_color_heightmap, color_heightmap)
# TODO: all place successes will have this set to True, but can be postprocessed out, since we can match to action by line in the log
nonlocal_variables['grasp_color_success'] = True if success_code == "success" and nonlocal_variables['primitive_action'] == 'grasp' else False
nonlocal_variables['color_success'] = True if success_code == "success" else False
if nonlocal_variables['color_success'] == False:
nonlocal_variables['partial_stack_success'] = False
if success_code == 'quit':
needed_to_reset = True
print('human_annotation: color_success: ' + str(nonlocal_variables['color_success']) + ' grasp_color_success: ' + str(nonlocal_variables['grasp_color_success']))
print('check_stack() stack_height: ' + str(nonlocal_variables['stack_height']) + ' stack matches current goal: ' + str(stack_matches_goal) + ' partial_stack_success: ' +
str(nonlocal_variables['partial_stack_success']) + ' Does the code think a reset is needed: ' + str(needed_to_reset) + ' Does the code think the stack toppled: ' +
str(toppled))
if static_language_mask and end_on_incorrect_order and not check_row:
# check order, if it is the right length and bad then kill this trial
# if there is a mistake further down in the stack than the top-most block, there's no way to recover
# but if it's the top-most one, we can still unstack it, so don't kill the trial quite yet
if len(pred_stack_goal) == 2:
# if there are only 2 blocks and it places it on the wrong block, it can recover
# pred_matches_goal_up_to_top = pred_stack_goal[-1] == current_stack_goal[-1]
# just check that it isn't buried
pred_matches_goal_up_to_top = pred_stack_goal[0] != current_stack_goal[1]
else:
pred_matches_goal_up_to_top = np.array_equal(pred_stack_goal, current_stack_goal)
# only check for this if the predicted stack and goal stack are the same height
lens_match = len(pred_stack_goal) >= len(current_stack_goal)
# also check for toppling, but only if the previous stack was more than 2 block high
was_over_one = nonlocal_variables['prev_stack_height'] > 1
decreased_height = len(pred_stack_goal) < nonlocal_variables['prev_stack_height']
# conditions for ending a trial:
# either a mistake is made in the bottom of the stack, or the stack has toppled
if (not pred_matches_goal_up_to_top and lens_match) or (was_over_one and decreased_height):
print('ERROR: Stack is irreparably misordered or has toppled, trial is a failure. Ending...')
needed_to_reset = True
toppled = True
else:
pass
# if place and needed_to_reset:
# TODO(ahundt) BUG may reset push/grasp success too aggressively. If statement above and below for debugging, remove commented line after debugging complete
if needed_to_reset or evaluate_random_objects or (toppled is not None and toppled):
# we are two blocks off the goal, reset the scene.
mismatch_str = 'main.py check_stack() DETECTED PROGRESS REVERSAL, mismatch between the goal height: ' + str(max_workspace_height) + ' and current workspace stack height: ' + str(nonlocal_variables['stack_height'])
if insufficient_objs_in_scene:
mismatch_str += ', INSUFFICIENT OBJECTS IN SCENE'
if toppled is not None and toppled:
mismatch_str += ', TOPPLED stack'
if not disable_situation_removal or insufficient_objs_in_scene or (toppled is not None and toppled):
mismatch_str += ', RESETTING the objects, goals, and action success to FALSE...'
print(mismatch_str)
# this reset is appropriate for stacking, but not checking rows
get_and_save_images(robot, workspace_limits, heightmap_resolution, logger, trainer, '1')
robot.reposition_objects()
nonlocal_variables['stack'].reset_sequence()
nonlocal_variables['stack'].next()
# We needed to reset, so the stack must have been knocked over!
# all rewards and success checks are False!
set_nonlocal_success_variables_false()
nonlocal_variables['trial_complete'] = True
if check_row or (task_type is not None and ((task_type == 'row') or (task_type == 'vertical_square'))):
# on reset get the current row state
_, nonlocal_variables['stack_height'] = robot.check_row(current_stack_goal, num_obj=num_obj, check_z_height=check_z_height, valid_depth_heightmap=valid_depth_heightmap[:, :, 0], separation_threshold=separation_threshold, distance_threshold=distance_threshold)
nonlocal_variables['prev_stack_height'] = copy.deepcopy(nonlocal_variables['stack_height'])
else:
# not resetting, so set stack goal to proper value
nonlocal_variables['stack'].set_progress(int(nonlocal_variables['stack_height']))
print(mismatch_str)
return needed_to_reset
# Parallel thread to process network output and execute actions
# -------------------------------------------------------------
def process_actions():
last_iteration_saved = -1 # used so the loop only saves one time while waiting
action_count = 0
grasp_count = 0
successful_grasp_count = 0
successful_color_grasp_count = 0
place_count = 0
place_rate = 0
# short stacks of blocks
partial_stack_count = 0
partial_stack_rate = np.inf
# all the blocks stacked
stack_count = 0
stack_rate = np.inf
# will need to reset if something went wrong with stacking
needed_to_reset = False
grasp_str = ''
successful_trial_count = int(np.max(trainer.trial_success_log)) if continue_logging and len(trainer.trial_success_log) > 0 else 0
trial_rate = np.inf
# when resuming a previous run, load variables saved from previous run
if continue_logging:
process_vars = None
resume_var_values_path = os.path.join(logger.base_directory, 'data', 'variables','process_action_var_values_%d.json' % (trainer.iteration))
if os.path.exists(resume_var_values_path):
with open(resume_var_values_path, 'r') as f:
process_vars = json.load(f)
# TODO(ahundt) the loop below should be a simpler way to do the same thing, but it doesn't seem to work
# for k, v in process_vars.items():
# # initialize all the local variables based on the dictionary entries
# setattr(sys.modules[__name__], k, v)
action_count = process_vars['action_count']
grasp_count = process_vars['grasp_count']
successful_grasp_count = process_vars['successful_grasp_count']
successful_color_grasp_count = process_vars['successful_color_grasp_count']
place_count = process_vars['place_count']
place_rate = process_vars['place_rate']
partial_stack_count = process_vars['partial_stack_count']
partial_stack_rate = process_vars['partial_stack_rate']
stack_count = process_vars['stack_count']
stack_rate = process_vars['stack_rate']
needed_to_reset = process_vars['needed_to_reset']
grasp_str = process_vars['grasp_str']
successful_trial_count = process_vars['successful_trial_count']
trial_rate = process_vars['trial_rate']
else:
print("WARNING: Missing /data/variables/process_action_var_values_%d.json on resume. Default values initialized. May cause log inconsistencies" % (trainer.iteration))
# NOTE(zhe) The loop continues to run until an exit signal appears. The loop doesn't run when not "executing action"
while not nonlocal_pause['process_actions_exit_called']:
if nonlocal_variables['executing_action']:
action_count += 1
# Determine whether grasping or pushing should be executed based on network predictions OR with demo
if use_demo:
# initialize preds array
preds = []
# figure out primitive action (limited to grasp or place)
if nonlocal_variables['primitive_action'] != 'grasp':
# next action is grasp if we didn't grasp already
nonlocal_variables['primitive_action'] = 'grasp'
# get grasp predictions (since next action is grasp)
# fill the masked arrays and add to preds
if row_trainer is not None:
preds.append(grasp_feat_row)
else:
preds.append(None)
if stack_trainer is not None:
preds.append(grasp_feat_stack)
else:
preds.append(None)
if unstack_trainer is not None:
preds.append(grasp_feat_unstack)
else:
preds.append(None)
if vertical_square_trainer is not None:
preds.append(grasp_feat_vertical_square)
else:
preds.append(None)
else:
if nonlocal_variables['grasp_success']:
# if we had a successful grasp, set next action to place
nonlocal_variables['primitive_action'] = 'place'
# get place predictions (since next action is place)
# fill the masked arrays and add to preds
if row_trainer is not None:
preds.append(place_feat_row)
else:
preds.append(None)
if stack_trainer is not None:
preds.append(place_feat_stack)
else:
preds.append(None)
if unstack_trainer is not None:
preds.append(place_feat_unstack)
else:
preds.append(None)
if vertical_square_trainer is not None:
preds.append(place_feat_vertical_square)
else:
preds.append(None)
else:
# last grasp was unsuccessful, so we need to grasp again
nonlocal_variables['primitive_action'] = 'grasp'
# get grasp predictions (since next action is grasp)
# fill the masked arrays and add to preds
if row_trainer is not None:
preds.append(grasp_feat_row)
else:
preds.append(None)
if stack_trainer is not None:
preds.append(grasp_feat_stack)
else:
preds.append(None)
if unstack_trainer is not None:
preds.append(grasp_feat_unstack)
else:
preds.append(None)
if vertical_square_trainer is not None:
preds.append(grasp_feat_vertical_square)
else:
preds.append(None)
print("main.py: running demo.get_action for stack height",
nonlocal_variables['stack_height'], "and primitive action",
nonlocal_variables['primitive_action'])
# first check if nonlocal_variables['example_actions_dict'] is none
if nonlocal_variables['example_actions_dict'] is None:
nonlocal_variables['example_actions_dict'] = {}
# check if embeddings for demo for progress n and primitive action p_a already exists
task_progress = nonlocal_variables['stack_height']
if check_z_height:
# NOTE(adit98) check if we should round or cut off float -> int
task_progress = int(np.rint(task_progress))
# NOTE(adit98) HACK, make sure task_progress starts at 1 when stack_height is initialized to 0.0
if task_progress == 0:
task_progress = 1
if task_type in ['row', 'vertical_square', 'unstack']:
# HACK: max task_progress at 3 (in case of simulator bugs or for the final place in unstacking)
if task_progress > 3:
print('WARNING: main.py +Activating HACK workaround, limiting max task progress value to 3.')
task_progress = min(3, task_progress)
# NOTE(adit98) add is in dict checks to trigger graceful exits
action = nonlocal_variables['primitive_action']
if task_progress not in nonlocal_variables['example_actions_dict']:
nonlocal_variables['example_actions_dict'][task_progress] = {}
if action not in nonlocal_variables['example_actions_dict'][task_progress]:
nonlocal_variables['example_actions_dict'][task_progress][action] = {}
for ind, d in enumerate(example_demos):
# get action embeddings from example demo
if ind not in nonlocal_variables['example_actions_dict'][task_progress][action]:
demo_row_action, demo_stack_action, demo_unstack_action, demo_vertical_square_action, action_id, demo_action_ind = \
d.get_action(sim_workspace_limits, action, task_progress, stack_trainer,
row_trainer, unstack_trainer, vertical_square_trainer, use_hist=depth_channels_history,
cycle_consistency=cycle_consistency)
nonlocal_variables['example_actions_dict'][task_progress][action][ind] = [demo_row_action,
demo_stack_action, demo_unstack_action, demo_vertical_square_action, demo_action_ind]
print("main.py nonlocal_variables['executing_action']: got demo actions")
else:
best_push_conf = np.ma.max(push_predictions)
best_grasp_conf = np.ma.max(grasp_predictions)
if place:
best_place_conf = np.ma.max(place_predictions)
print('Primitive confidence scores: %f (push), %f (grasp), %f (place)' % (best_push_conf, best_grasp_conf, best_place_conf))
else:
print('Primitive confidence scores: %f (push), %f (grasp)' % (best_push_conf, best_grasp_conf))
# Exploitation (do best action) vs exploration (do random action)
if is_testing:
explore_actions = False
else:
explore_actions = np.random.uniform() < explore_prob
if explore_actions:
print('Strategy: explore (exploration probability: %f)' % (explore_prob))
else:
print('Strategy: exploit (exploration probability: %f)' % (explore_prob))
if not use_demo:
# NOTE(zhe) Designate action type (grasp vs place) based on previous action.
# If we just did a successful grasp, we always need to place
if place and nonlocal_variables['primitive_action'] == 'grasp' and nonlocal_variables['grasp_success']:
nonlocal_variables['primitive_action'] = 'place'
else:
nonlocal_variables['primitive_action'] = 'grasp'
# NOTE(zhe) Switch grasp to push if push has better score. NO PUSHING IN LANGUAGE MODEL.
# determine if the network indicates we should do a push or a grasp
# otherwise if we are exploring and not placing choose between push and grasp randomly
if not grasp_only and not nonlocal_variables['primitive_action'] == 'place':
if is_testing and method == 'reactive':
if best_push_conf > 2 * best_grasp_conf:
nonlocal_variables['primitive_action'] = 'push'