mmaction2 / tests /models /backbones /test_timesformer.py

mmaction2

d3dbf03 verified 3 months ago

2.46 kB

	# Copyright (c) OpenMMLab. All rights reserved.
	import pytest
	import torch

	from mmaction.models import TimeSformer
	from mmaction.testing import generate_backbone_demo_inputs


	def test_timesformer_backbone():
	input_shape = (1, 3, 8, 64, 64)
	imgs = generate_backbone_demo_inputs(input_shape)

	# divided_space_time
	timesformer = TimeSformer(
	8, 64, 16, embed_dims=768, attention_type='divided_space_time')
	timesformer.init_weights()
	from mmaction.models.common import (DividedSpatialAttentionWithNorm,
	DividedTemporalAttentionWithNorm,
	FFNWithNorm)
	assert isinstance(timesformer.transformer_layers.layers[0].attentions[0],
	DividedTemporalAttentionWithNorm)
	assert isinstance(timesformer.transformer_layers.layers[11].attentions[1],
	DividedSpatialAttentionWithNorm)
	assert isinstance(timesformer.transformer_layers.layers[0].ffns[0],
	FFNWithNorm)
	assert hasattr(timesformer, 'time_embed')
	assert timesformer.patch_embed.num_patches == 16

	cls_tokens = timesformer(imgs)
	assert cls_tokens.shape == torch.Size([1, 768])

	# space_only
	timesformer = TimeSformer(
	8, 64, 16, embed_dims=512, num_heads=8, attention_type='space_only')
	timesformer.init_weights()

	assert not hasattr(timesformer, 'time_embed')
	assert timesformer.patch_embed.num_patches == 16

	cls_tokens = timesformer(imgs)
	assert cls_tokens.shape == torch.Size([1, 512])

	# joint_space_time
	input_shape = (1, 3, 2, 64, 64)
	imgs = generate_backbone_demo_inputs(input_shape)
	timesformer = TimeSformer(
	2,
	64,
	8,
	embed_dims=256,
	num_heads=8,
	attention_type='joint_space_time')
	timesformer.init_weights()

	assert hasattr(timesformer, 'time_embed')
	assert timesformer.patch_embed.num_patches == 64

	cls_tokens = timesformer(imgs)
	assert cls_tokens.shape == torch.Size([1, 256])

	with pytest.raises(AssertionError):
	# unsupported attention type
	timesformer = TimeSformer(
	8, 64, 16, attention_type='wrong_attention_type')

	with pytest.raises(AssertionError):
	# Wrong transformer_layers type
	timesformer = TimeSformer(8, 64, 16, transformer_layers='wrong_type')