diff --git a/libs/boxes/__init__.py b/libs/boxes/__init__.py index db58691..297954b 100644 --- a/libs/boxes/__init__.py +++ b/libs/boxes/__init__.py @@ -6,8 +6,8 @@ # -------------------------------------------------------- from . import cython_nms from . import cython_bbox -import nms -import timer +from . import nms +from . import timer from .anchor import anchors from .anchor import anchors_plane from .roi import roi_cropping diff --git a/libs/boxes/anchor.py b/libs/boxes/anchor.py index 136a7d0..adcf6d7 100644 --- a/libs/boxes/anchor.py +++ b/libs/boxes/anchor.py @@ -35,7 +35,7 @@ def generate_anchors(base_size=16, ratios=[0.5, 1, 2], base_anchor = np.array([1, 1, base_size, base_size]) - 1 ratio_anchors = _ratio_enum(base_anchor, ratios) anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales) - for i in xrange(ratio_anchors.shape[0])]) + for i in range(ratio_anchors.shape[0])]) return anchors def _whctrs(anchor): diff --git a/libs/boxes/blob.py b/libs/boxes/blob.py index b3f165c..a3b9f5a 100644 --- a/libs/boxes/blob.py +++ b/libs/boxes/blob.py @@ -20,7 +20,7 @@ def im_list_to_blob(ims): num_images = len(ims) blob = np.zeros((num_images, max_shape[0], max_shape[1], 3), dtype=np.float32) - for i in xrange(num_images): + for i in range(num_images): im = ims[i] blob[i, 0:im.shape[0], 0:im.shape[1], :] = im diff --git a/libs/boxes/gprof2dot.py b/libs/boxes/gprof2dot.py index c7e87f1..f920c91 100644 --- a/libs/boxes/gprof2dot.py +++ b/libs/boxes/gprof2dot.py @@ -2681,7 +2681,7 @@ def parse(self): caller = self.get_function(fn) call = Call(callee.id) if isinstance(value, tuple): - for i in xrange(0, len(value), 4): + for i in range(0, len(value), 4): nc, cc, tt, ct = value[i:i+4] if CALLS in call: call[CALLS] += cc diff --git a/libs/datasets/coco.py b/libs/datasets/coco.py index 464a8c5..f14289e 100644 --- a/libs/datasets/coco.py +++ b/libs/datasets/coco.py @@ -22,6 +22,18 @@ } +def correct_decode_raw(data, dtype): + + # BUG: THERE WAS A BUG HERE, tf.decode_raw('', tf.float32) returns [0.] tensor not the [] tensor + # So we use correct_decode_raw instead of tf.decode_raw + + result = tf.cond(tf.equal(data, tf.constant("")), + lambda: tf.constant([], dtype=dtype), + lambda: tf.decode_raw(data, dtype)) + + return result + + def get_split(split_name, dataset_dir, file_pattern=None, reader=None): if split_name not in SPLITS_TO_SIZES: raise ValueError('split name %s was not recognized.' % split_name) @@ -48,7 +60,7 @@ def get_split(split_name, dataset_dir, file_pattern=None, reader=None): } def _masks_decoder(keys_to_tensors): - masks = tf.decode_raw(keys_to_tensors['label/gt_masks'], tf.uint8) + masks = correct_decode_raw(keys_to_tensors['label/gt_masks'], tf.uint8) width = tf.cast(keys_to_tensors['image/width'], tf.int32) height = tf.cast(keys_to_tensors['image/height'], tf.int32) instances = tf.cast(keys_to_tensors['label/num_instances'], tf.int32) @@ -56,7 +68,7 @@ def _masks_decoder(keys_to_tensors): return tf.reshape(masks, mask_shape) def _gt_boxes_decoder(keys_to_tensors): - bboxes = tf.decode_raw(keys_to_tensors['label/gt_boxes'], tf.float32) + bboxes = correct_decode_raw(keys_to_tensors['label/gt_boxes'], tf.float32) instances = tf.cast(keys_to_tensors['label/num_instances'], tf.int32) bboxes_shape = tf.stack([instances, 5]) return tf.reshape(bboxes, bboxes_shape) @@ -117,15 +129,15 @@ def read(tfrecords_filename): ih = tf.cast(features['image/height'], tf.int32) iw = tf.cast(features['image/width'], tf.int32) num_instances = tf.cast(features['label/num_instances'], tf.int32) - image = tf.decode_raw(features['image/encoded'], tf.uint8) + image = correct_decode_raw(features['image/encoded'], tf.uint8) imsize = tf.size(image) image = tf.cond(tf.equal(imsize, ih * iw), \ lambda: tf.image.grayscale_to_rgb(tf.reshape(image, (ih, iw, 1))), \ lambda: tf.reshape(image, (ih, iw, 3))) - gt_boxes = tf.decode_raw(features['label/gt_boxes'], tf.float32) + gt_boxes = correct_decode_raw(features['label/gt_boxes'], tf.float32) gt_boxes = tf.reshape(gt_boxes, [num_instances, 5]) - gt_masks = tf.decode_raw(features['label/gt_masks'], tf.uint8) + gt_masks = correct_decode_raw(features['label/gt_masks'], tf.uint8) gt_masks = tf.cast(gt_masks, tf.int32) gt_masks = tf.reshape(gt_masks, [num_instances, ih, iw]) diff --git a/libs/datasets/dataset_factory.py b/libs/datasets/dataset_factory.py index f4fa449..6005209 100644 --- a/libs/datasets/dataset_factory.py +++ b/libs/datasets/dataset_factory.py @@ -16,6 +16,8 @@ def get_dataset(dataset_name, split_name, dataset_dir, file_pattern = dataset_name + '_' + split_name + '*.tfrecord' tfrecords = glob.glob(dataset_dir + '/records/' + file_pattern) + assert len(tfrecords)>0, "haven't found any tfrecord(did you run train.py from code root?). we were looking at %s." % dataset_dir + '/records/' + file_pattern + image, ih, iw, gt_boxes, gt_masks, num_instances, img_id = coco.read(tfrecords) image, gt_boxes, gt_masks = coco_preprocess.preprocess_image(image, gt_boxes, gt_masks, is_training) diff --git a/libs/datasets/download_and_convert_coco.py b/libs/datasets/download_and_convert_coco.py index 3d0ec94..540b009 100644 --- a/libs/datasets/download_and_convert_coco.py +++ b/libs/datasets/download_and_convert_coco.py @@ -31,8 +31,7 @@ ] FLAGS = tf.app.flags.FLAGS -tf.app.flags.DEFINE_boolean('vis', False, - 'Show some visual masks') +tf.app.flags.DEFINE_boolean('vis', False, 'Show some visual masks') def download_and_uncompress_zip(zip_url, dataset_dir): @@ -305,15 +304,16 @@ def _add_to_tfrecord(record_dir, image_dir, annotation_dir, split_name): img_raw = img.tostring() mask_raw = mask.tostring() - + example = _to_tfexample_coco_raw( img_id, img_raw, mask_raw, height, width, gt_boxes.shape[0], gt_boxes.tostring(), masks.tostring()) - + tfrecord_writer.write(example.SerializeToString()) + sys.stdout.write('\n') sys.stdout.flush() diff --git a/libs/layers/sample.py b/libs/layers/sample.py index fa31e14..7a8e2ff 100644 --- a/libs/layers/sample.py +++ b/libs/layers/sample.py @@ -114,14 +114,14 @@ def sample_rpn_outputs_wrt_gt_boxes(boxes, scores, gt_boxes, is_training=False, gt_argmax_overlaps = overlaps.argmax(axis=0) # G fg_inds = np.union1d(gt_argmax_overlaps, fg_inds) - fg_rois = int(min(fg_inds.size, cfg.FLAGS.rois_per_image * cfg.FLAGS.fg_roi_fraction)) - if fg_inds.size > 0 and fg_rois < fg_inds.size: - fg_inds = np.random.choice(fg_inds, size=fg_rois, replace=False) - - # TODO: sampling strategy - bg_inds = np.where((max_overlaps < cfg.FLAGS.bg_threshold))[0] - bg_rois = max(min(cfg.FLAGS.rois_per_image - fg_rois, fg_rois * 3), 8)#64 - if bg_inds.size > 0 and bg_rois < bg_inds.size: + fg_rois = int(min(fg_inds.size, cfg.FLAGS.rois_per_image * cfg.FLAGS.fg_roi_fraction)) + if fg_inds.size > 0 and fg_rois < fg_inds.size: + fg_inds = np.random.choice(fg_inds, size=fg_rois, replace=False) + + # TODO: sampling strategy + bg_inds = np.where((max_overlaps < cfg.FLAGS.bg_threshold))[0] + bg_rois = max(min(cfg.FLAGS.rois_per_image - fg_rois, fg_rois * 3), 8)#64 + if bg_inds.size > 0 and bg_rois < bg_inds.size: bg_inds = np.random.choice(bg_inds, size=bg_rois, replace=False) keep_inds = np.append(fg_inds, bg_inds) diff --git a/libs/nets/pyramid_network.py b/libs/nets/pyramid_network.py index 567c2be..94b2d71 100644 --- a/libs/nets/pyramid_network.py +++ b/libs/nets/pyramid_network.py @@ -123,7 +123,9 @@ def _filter_negative_samples(labels, tensors): filtered = [] for t in tensors: - tf.assert_equal(tf.shape(t)[0], tf.shape(labels)[0]) + # tf.assert_equal(tf.shape(t)[0], tf.shape(labels)[0]) - I removed this assertion because it was never used. + # assertion is not automatically checked you should execute it in graph as any other operation + f = tf.gather(t, keeps) filtered.append(f) diff --git a/libs/setup.py b/libs/setup.py index fde2069..026cf7e 100644 --- a/libs/setup.py +++ b/libs/setup.py @@ -47,7 +47,7 @@ def locate_cuda(): cudaconfig = {'home':home, 'nvcc':nvcc, 'include': pjoin(home, 'include'), 'lib64': pjoin(home, 'lib64')} - for k, v in cudaconfig.iteritems(): + for k, v in cudaconfig.items(): if not os.path.exists(v): raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v)) @@ -81,7 +81,7 @@ def customize_compiler_for_nvcc(self): # object but distutils doesn't have the ability to change compilers # based on source extension: we add it. def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts): - print extra_postargs + print(extra_postargs) if os.path.splitext(src)[1] == '.cu': # use the cuda for .cu files self.set_executable('compiler_so', CUDA['nvcc']) diff --git a/train/train.py b/train/train.py index c171b92..a9361a5 100644 --- a/train/train.py +++ b/train/train.py @@ -170,11 +170,11 @@ def train(): FLAGS.im_batch, is_training=True) - data_queue = tf.RandomShuffleQueue(capacity=32, min_after_dequeue=16, - dtypes=( - image.dtype, ih.dtype, iw.dtype, - gt_boxes.dtype, gt_masks.dtype, - num_instances.dtype, img_id.dtype)) + data_queue = tf.RandomShuffleQueue(capacity=32, min_after_dequeue=16, dtypes=( + image.dtype, ih.dtype, iw.dtype, + gt_boxes.dtype, gt_masks.dtype, + num_instances.dtype, img_id.dtype)) + enqueue_op = data_queue.enqueue((image, ih, iw, gt_boxes, gt_masks, num_instances, img_id)) data_queue_runner = tf.train.QueueRunner(data_queue, [enqueue_op] * 4) tf.add_to_collection(tf.GraphKeys.QUEUE_RUNNERS, data_queue_runner)