File size: 2,999 Bytes

d3dbf03

# Copyright (c) OpenMMLab. All rights reserved.
import argparse

import mmengine


def parse_args():
    parser = argparse.ArgumentParser(
        description='Convert txt annotation list to json')
    parser.add_argument(
        'annofile', type=str, help='the txt annotation file to convert')
    parser.add_argument(
        '--format',
        type=str,
        default='rawframes',
        choices=['rawframes', 'videos'],
        help='the format of the txt annotation file')
    parser.add_argument(
        '--output',
        type=str,
        default=None,
        help=(
            'the output file name, use annofile.replace(\'.txt\', \'.json\') '
            'if the arg value is None'))
    args = parser.parse_args()

    return args


def lines2dictlist(lines, format):
    """Convert lines in 'txt' format to dictionaries in 'json' format.

    Currently support single-label and multi-label.



    Example of a single-label rawframes annotation txt file:



    .. code-block:: txt



        (frame_dir num_frames label)

        some/directory-1 163 1

        some/directory-2 122 1

        some/directory-3 258 2



    Example of a multi-label rawframes annotation txt file:



    .. code-block:: txt



        (frame_dir num_frames label1 label2 ...)

        some/directory-1 163 1 3 5

        some/directory-2 122 1 2

        some/directory-3 258 2



    Example of a single-label videos annotation txt file:



    .. code-block:: txt



        (filename label)

        some/path/000.mp4 1

        some/path/001.mp4 1

        some/path/002.mp4 2



    Example of a multi-label videos annotation txt file:



    .. code-block:: txt



        (filename label1 label2 ...)

        some/path/000.mp4 1 3 5

        some/path/001.mp4 1 4 8

        some/path/002.mp4 2 4 9



    Args:

        lines (list): List of lines in 'txt' label format.

        format (str): Data format, choices are 'rawframes' and 'videos'.



    Returns:

        list[dict]: For rawframes format, each dict has keys: frame_dir,

            total_frames, label; for videos format, each diction has keys:

            filename, label.

    """
    lines = [x.split() for x in lines]
    if format == 'rawframes':
        data = [
            dict(
                frame_dir=line[0],
                total_frames=int(line[1]),
                label=[int(x) for x in line[2:]]) for line in lines
        ]
    elif format == 'videos':
        data = [
            dict(filename=line[0], label=[int(x) for x in line[1:]])
            for line in lines
        ]
    return data


if __name__ == '__main__':
    # convert txt anno list to json
    args = parse_args()
    lines = open(args.annofile).readlines()
    lines = [x.strip() for x in lines]
    result = lines2dictlist(lines, args.format)
    if args.output is None:
        args.output = args.annofile.replace('.txt', '.json')
    mmengine.dump(result, args.output)