| | """
|
| | ISL Sign Language Translation - TechMatrix Solvers Initiative
|
| | Model definitions for body pose and hand pose estimation
|
| | Developed by: TechMatrix Solvers Team
|
| | """
|
| |
|
| | import torch
|
| | from collections import OrderedDict
|
| | import torch.nn as nn
|
| |
|
| |
|
| | def construct_layers(layer_config, no_relu_layers, prelu_layers=[]):
|
| | """
|
| | Constructs neural network layers based on configuration
|
| |
|
| | Args:
|
| | layer_config: Dictionary defining layer parameters
|
| | no_relu_layers: List of layers that shouldn't have ReLU activation
|
| | prelu_layers: List of layers that should use PReLU instead of ReLU
|
| | """
|
| | layers = []
|
| |
|
| | for layer_name, params in layer_config.items():
|
| | if 'pool' in layer_name:
|
| | layer = nn.MaxPool2d(kernel_size=params[0], stride=params[1], padding=params[2])
|
| | layers.append((layer_name, layer))
|
| | else:
|
| | conv2d = nn.Conv2d(
|
| | in_channels=params[0],
|
| | out_channels=params[1],
|
| | kernel_size=params[2],
|
| | stride=params[3],
|
| | padding=params[4]
|
| | )
|
| | layers.append((layer_name, conv2d))
|
| |
|
| | if layer_name not in no_relu_layers:
|
| | if layer_name not in prelu_layers:
|
| | layers.append(('relu_' + layer_name, nn.ReLU(inplace=True)))
|
| | else:
|
| | layers.append(('prelu' + layer_name[4:], nn.PReLU(params[1])))
|
| |
|
| | return nn.Sequential(OrderedDict(layers))
|
| |
|
| |
|
| | def construct_multi_conv_layers(layer_config, no_relu_layers):
|
| | """
|
| | Constructs multiple convolution layers for complex architectures
|
| | """
|
| | modules = []
|
| | for layer_name, params in layer_config.items():
|
| | layers = []
|
| | if 'pool' in layer_name:
|
| | layer = nn.MaxPool2d(kernel_size=params[0], stride=params[1], padding=params[2])
|
| | layers.append((layer_name, layer))
|
| | else:
|
| | conv2d = nn.Conv2d(
|
| | in_channels=params[0],
|
| | out_channels=params[1],
|
| | kernel_size=params[2],
|
| | stride=params[3],
|
| | padding=params[4]
|
| | )
|
| | layers.append((layer_name, conv2d))
|
| | if layer_name not in no_relu_layers:
|
| | layers.append(('Mprelu' + layer_name[5:], nn.PReLU(params[1])))
|
| | modules.append(nn.Sequential(OrderedDict(layers)))
|
| | return nn.ModuleList(modules)
|
| |
|
| |
|
| | class BodyPose25Model(nn.Module):
|
| | """
|
| | Body pose estimation model using 25-point skeleton
|
| | Developed by TechMatrix Solvers for ISL translation
|
| | """
|
| |
|
| | def __init__(self):
|
| | super(BodyPose25Model, self).__init__()
|
| |
|
| |
|
| | no_relu_layers = [
|
| | 'Mconv7_stage0_L1', 'Mconv7_stage0_L2',
|
| | 'Mconv7_stage1_L1', 'Mconv7_stage1_L2',
|
| | 'Mconv7_stage2_L2', 'Mconv7_stage3_L2'
|
| | ]
|
| | prelu_layers = ['conv4_2', 'conv4_3_CPM', 'conv4_4_CPM']
|
| |
|
| |
|
| | base_layers = OrderedDict([
|
| | ('conv1_1', [3, 64, 3, 1, 1]),
|
| | ('conv1_2', [64, 64, 3, 1, 1]),
|
| | ('pool1_stage1', [2, 2, 0]),
|
| | ('conv2_1', [64, 128, 3, 1, 1]),
|
| | ('conv2_2', [128, 128, 3, 1, 1]),
|
| | ('pool2_stage1', [2, 2, 0]),
|
| | ('conv3_1', [128, 256, 3, 1, 1]),
|
| | ('conv3_2', [256, 256, 3, 1, 1]),
|
| | ('conv3_3', [256, 256, 3, 1, 1]),
|
| | ('conv3_4', [256, 256, 3, 1, 1]),
|
| | ('pool3_stage1', [2, 2, 0]),
|
| | ('conv4_1', [256, 512, 3, 1, 1]),
|
| | ('conv4_2', [512, 512, 3, 1, 1]),
|
| | ('conv4_3_CPM', [512, 256, 3, 1, 1]),
|
| | ('conv4_4_CPM', [256, 128, 3, 1, 1])
|
| | ])
|
| | self.base_model = construct_layers(base_layers, no_relu_layers, prelu_layers)
|
| |
|
| |
|
| | stage_blocks = {}
|
| |
|
| |
|
| | stage_blocks['Mconv1_stage0_L2'] = OrderedDict([
|
| | ('Mconv1_stage0_L2_0', [128, 96, 3, 1, 1]),
|
| | ('Mconv1_stage0_L2_1', [96, 96, 3, 1, 1]),
|
| | ('Mconv1_stage0_L2_2', [96, 96, 3, 1, 1])
|
| | ])
|
| |
|
| | for i in range(2, 6):
|
| | stage_blocks[f'Mconv{i}_stage0_L2'] = OrderedDict([
|
| | (f'Mconv{i}_stage0_L2_0', [288, 96, 3, 1, 1]),
|
| | (f'Mconv{i}_stage0_L2_1', [96, 96, 3, 1, 1]),
|
| | (f'Mconv{i}_stage0_L2_2', [96, 96, 3, 1, 1])
|
| | ])
|
| |
|
| | stage_blocks['Mconv6_7_stage0_L2'] = OrderedDict([
|
| | ('Mconv6_stage0_L2', [288, 256, 1, 1, 0]),
|
| | ('Mconv7_stage0_L2', [256, 52, 1, 1, 0])
|
| | ])
|
| |
|
| |
|
| | for stage in range(1, 4):
|
| | stage_blocks[f'Mconv1_stage{stage}_L2'] = OrderedDict([
|
| | (f'Mconv1_stage{stage}_L2_0', [180, 128, 3, 1, 1]),
|
| | (f'Mconv1_stage{stage}_L2_1', [128, 128, 3, 1, 1]),
|
| | (f'Mconv1_stage{stage}_L2_2', [128, 128, 3, 1, 1])
|
| | ])
|
| | for i in range(2, 6):
|
| | stage_blocks[f'Mconv{i}_stage{stage}_L2'] = OrderedDict([
|
| | (f'Mconv{i}_stage{stage}_L2_0', [384, 128, 3, 1, 1]),
|
| | (f'Mconv{i}_stage{stage}_L2_1', [128, 128, 3, 1, 1]),
|
| | (f'Mconv{i}_stage{stage}_L2_2', [128, 128, 3, 1, 1])
|
| | ])
|
| | stage_blocks[f'Mconv6_7_stage{stage}_L2'] = OrderedDict([
|
| | (f'Mconv6_stage{stage}_L2', [384, 512, 1, 1, 0]),
|
| | (f'Mconv7_stage{stage}_L2', [512, 52, 1, 1, 0])
|
| | ])
|
| |
|
| |
|
| | stage_blocks['Mconv1_stage0_L1'] = OrderedDict([
|
| | ('Mconv1_stage0_L1_0', [180, 96, 3, 1, 1]),
|
| | ('Mconv1_stage0_L1_1', [96, 96, 3, 1, 1]),
|
| | ('Mconv1_stage0_L1_2', [96, 96, 3, 1, 1])
|
| | ])
|
| |
|
| | for i in range(2, 6):
|
| | stage_blocks[f'Mconv{i}_stage0_L1'] = OrderedDict([
|
| | (f'Mconv{i}_stage0_L1_0', [288, 96, 3, 1, 1]),
|
| | (f'Mconv{i}_stage0_L1_1', [96, 96, 3, 1, 1]),
|
| | (f'Mconv{i}_stage0_L1_2', [96, 96, 3, 1, 1])
|
| | ])
|
| |
|
| | stage_blocks['Mconv6_7_stage0_L1'] = OrderedDict([
|
| | ('Mconv6_stage0_L1', [288, 256, 1, 1, 0]),
|
| | ('Mconv7_stage0_L1', [256, 26, 1, 1, 0])
|
| | ])
|
| |
|
| | stage_blocks['Mconv1_stage1_L1'] = OrderedDict([
|
| | ('Mconv1_stage1_L1_0', [206, 128, 3, 1, 1]),
|
| | ('Mconv1_stage1_L1_1', [128, 128, 3, 1, 1]),
|
| | ('Mconv1_stage1_L1_2', [128, 128, 3, 1, 1])
|
| | ])
|
| |
|
| | for i in range(2, 6):
|
| | stage_blocks[f'Mconv{i}_stage1_L1'] = OrderedDict([
|
| | (f'Mconv{i}_stage1_L1_0', [384, 128, 3, 1, 1]),
|
| | (f'Mconv{i}_stage1_L1_1', [128, 128, 3, 1, 1]),
|
| | (f'Mconv{i}_stage1_L1_2', [128, 128, 3, 1, 1])
|
| | ])
|
| |
|
| | stage_blocks['Mconv6_7_stage1_L1'] = OrderedDict([
|
| | ('Mconv6_stage1_L1', [384, 512, 1, 1, 0]),
|
| | ('Mconv7_stage1_L1', [512, 26, 1, 1, 0])
|
| | ])
|
| |
|
| |
|
| | for block_name in stage_blocks.keys():
|
| | stage_blocks[block_name] = construct_multi_conv_layers(stage_blocks[block_name], no_relu_layers)
|
| |
|
| | self.stage_models = nn.ModuleDict(stage_blocks)
|
| |
|
| |
|
| | for param in self.parameters():
|
| | param.requires_grad = False
|
| |
|
| | def _multi_conv_forward(self, x, models):
|
| | """Forward pass through multi-convolution blocks"""
|
| | outputs = []
|
| | current_output = x
|
| | for model in models:
|
| | current_output = model(current_output)
|
| | outputs.append(current_output)
|
| | return torch.cat(outputs, 1)
|
| |
|
| | def forward(self, x):
|
| | """Forward pass through the body pose model"""
|
| | base_features = self.base_model(x)
|
| |
|
| |
|
| | current_features = base_features
|
| | for stage in range(4):
|
| | current_features = self._multi_conv_forward(
|
| | current_features, self.stage_models[f'Mconv1_stage{stage}_L2']
|
| | )
|
| | for layer in range(2, 6):
|
| | current_features = self._multi_conv_forward(
|
| | current_features, self.stage_models[f'Mconv{layer}_stage{stage}_L2']
|
| | )
|
| | current_features = self.stage_models[f'Mconv6_7_stage{stage}_L2'][0](current_features)
|
| | current_features = self.stage_models[f'Mconv6_7_stage{stage}_L2'][1](current_features)
|
| | l2_output = current_features
|
| | current_features = torch.cat([base_features, current_features], 1)
|
| |
|
| |
|
| | current_features = self._multi_conv_forward(
|
| | current_features, self.stage_models['Mconv1_stage0_L1']
|
| | )
|
| | for layer in range(2, 6):
|
| | current_features = self._multi_conv_forward(
|
| | current_features, self.stage_models[f'Mconv{layer}_stage0_L1']
|
| | )
|
| | current_features = self.stage_models['Mconv6_7_stage0_L1'][0](current_features)
|
| | current_features = self.stage_models['Mconv6_7_stage0_L1'][1](current_features)
|
| | stage0_l1_output = current_features
|
| | current_features = torch.cat([base_features, stage0_l1_output, l2_output], 1)
|
| |
|
| |
|
| | current_features = self._multi_conv_forward(
|
| | current_features, self.stage_models['Mconv1_stage1_L1']
|
| | )
|
| | for layer in range(2, 6):
|
| | current_features = self._multi_conv_forward(
|
| | current_features, self.stage_models[f'Mconv{layer}_stage1_L1']
|
| | )
|
| | current_features = self.stage_models['Mconv6_7_stage1_L1'][0](current_features)
|
| | stage1_l1_output = self.stage_models['Mconv6_7_stage1_L1'][1](current_features)
|
| |
|
| | return l2_output, stage1_l1_output
|
| |
|
| |
|
| | class HandPoseModel(nn.Module):
|
| | """
|
| | Hand pose estimation model using 21-point hand landmarks
|
| | Developed by TechMatrix Solvers for ISL translation
|
| | """
|
| |
|
| | def __init__(self):
|
| | super(HandPoseModel, self).__init__()
|
| |
|
| |
|
| | no_relu_layers = [
|
| | 'conv6_2_CPM', 'Mconv7_stage2', 'Mconv7_stage3',
|
| | 'Mconv7_stage4', 'Mconv7_stage5', 'Mconv7_stage6'
|
| | ]
|
| |
|
| |
|
| | stage1_base = OrderedDict([
|
| | ('conv1_1', [3, 64, 3, 1, 1]),
|
| | ('conv1_2', [64, 64, 3, 1, 1]),
|
| | ('pool1_stage1', [2, 2, 0]),
|
| | ('conv2_1', [64, 128, 3, 1, 1]),
|
| | ('conv2_2', [128, 128, 3, 1, 1]),
|
| | ('pool2_stage1', [2, 2, 0]),
|
| | ('conv3_1', [128, 256, 3, 1, 1]),
|
| | ('conv3_2', [256, 256, 3, 1, 1]),
|
| | ('conv3_3', [256, 256, 3, 1, 1]),
|
| | ('conv3_4', [256, 256, 3, 1, 1]),
|
| | ('pool3_stage1', [2, 2, 0]),
|
| | ('conv4_1', [256, 512, 3, 1, 1]),
|
| | ('conv4_2', [512, 512, 3, 1, 1]),
|
| | ('conv4_3', [512, 512, 3, 1, 1]),
|
| | ('conv4_4', [512, 512, 3, 1, 1]),
|
| | ('conv5_1', [512, 512, 3, 1, 1]),
|
| | ('conv5_2', [512, 512, 3, 1, 1]),
|
| | ('conv5_3_CPM', [512, 128, 3, 1, 1])
|
| | ])
|
| |
|
| | stage1_prediction = OrderedDict([
|
| | ('conv6_1_CPM', [128, 512, 1, 1, 0]),
|
| | ('conv6_2_CPM', [512, 22, 1, 1, 0])
|
| | ])
|
| |
|
| | stage_blocks = {}
|
| | stage_blocks['stage1_base'] = stage1_base
|
| | stage_blocks['stage1_prediction'] = stage1_prediction
|
| |
|
| |
|
| | for i in range(2, 7):
|
| | stage_blocks[f'stage{i}'] = OrderedDict([
|
| | (f'Mconv1_stage{i}', [150, 128, 7, 1, 3]),
|
| | (f'Mconv2_stage{i}', [128, 128, 7, 1, 3]),
|
| | (f'Mconv3_stage{i}', [128, 128, 7, 1, 3]),
|
| | (f'Mconv4_stage{i}', [128, 128, 7, 1, 3]),
|
| | (f'Mconv5_stage{i}', [128, 128, 7, 1, 3]),
|
| | (f'Mconv6_stage{i}', [128, 128, 1, 1, 0]),
|
| | (f'Mconv7_stage{i}', [128, 22, 1, 1, 0])
|
| | ])
|
| |
|
| |
|
| | for block_name in stage_blocks.keys():
|
| | stage_blocks[block_name] = construct_layers(stage_blocks[block_name], no_relu_layers)
|
| |
|
| | self.stage1_base_model = stage_blocks['stage1_base']
|
| | self.stage1_prediction_model = stage_blocks['stage1_prediction']
|
| | self.stage2_model = stage_blocks['stage2']
|
| | self.stage3_model = stage_blocks['stage3']
|
| | self.stage4_model = stage_blocks['stage4']
|
| | self.stage5_model = stage_blocks['stage5']
|
| | self.stage6_model = stage_blocks['stage6']
|
| |
|
| |
|
| | for param in self.parameters():
|
| | param.requires_grad = False
|
| |
|
| | def forward(self, x):
|
| | """Forward pass through the hand pose model"""
|
| | base_features = self.stage1_base_model(x)
|
| | stage1_output = self.stage1_prediction_model(base_features)
|
| |
|
| |
|
| | stage2_input = torch.cat([stage1_output, base_features], 1)
|
| | stage2_output = self.stage2_model(stage2_input)
|
| |
|
| |
|
| | stage3_input = torch.cat([stage2_output, base_features], 1)
|
| | stage3_output = self.stage3_model(stage3_input)
|
| |
|
| |
|
| | stage4_input = torch.cat([stage3_output, base_features], 1)
|
| | stage4_output = self.stage4_model(stage4_input)
|
| |
|
| |
|
| | stage5_input = torch.cat([stage4_output, base_features], 1)
|
| | stage5_output = self.stage5_model(stage5_input)
|
| |
|
| |
|
| | stage6_input = torch.cat([stage5_output, base_features], 1)
|
| | stage6_output = self.stage6_model(stage6_input)
|
| |
|
| | return stage6_output
|
| |
|
| |
|
| |
|
| | def create_bodypose_model():
|
| | """Create and return body pose detection model"""
|
| | return BodyPose25Model()
|
| |
|
| |
|
| | def create_handpose_model():
|
| | """Create and return hand pose detection model"""
|
| | return HandPoseModel() |