gen_image.py

import matplotlib.pyplot as plt
import numpy as np
import torch
import glm
import time
from .my import util
from .my import sample_in_pupil

class RetinalGen(object):
    '''
    Class for retinal generation process
    
    Properties
    --------
    conf - multi-layers' parameters configuration
    u    - M x 3 tensor, M sample positions in pupil
    p_r  - H_r x W_r x 3 tensor, retinal pixel grid, [H_r, W_r] is the retinal resolution
    Phi  - N x H_r x W_r x M x 2 tensor, retinal to layers mapping, N is number of layers
    mask - N x H_r x W_r x M x 2 tensor, indicates invalid (out-of-range) mapping
    
    Methods
    --------
    '''
    def __init__(self, conf):
        '''
        Initialize retinal generator instance

        Parameters
        --------
        conf - multi-layers' parameters configuration
        u    - a M x 3 tensor stores M sample positions in pupil
        '''
        self.conf = conf
        self.u = sample_in_pupil.CircleGen(conf.pupil_size, 5)
        # self.u = u.to(cuda_dev)
        # self.u = u # M x 3 M sample positions 
        self.D_r = conf.retinal_res # retinal res 480 x 640 
        self.N = conf.GetNLayers() # 2 
        self.M = self.u.size()[0] # samples
        # p_rx, p_ry = torch.meshgrid(torch.tensor(range(0, self.D_r[0])),
        #                             torch.tensor(range(0, self.D_r[1])))
        # self.p_r = torch.cat([
        #     ((torch.stack([p_rx, p_ry], 2) + 0.5) / self.D_r - 0.5) * conf.GetEyeViewportSize(), # 眼球视野
        #     torch.ones(self.D_r[0], self.D_r[1], 1)
        # ], 2)

        self.p_r = torch.cat([
            ((util.MeshGrid(self.D_r) + 0.5) / self.D_r - 0.5) * conf.GetEyeViewportSize(),
            torch.ones(self.D_r[0], self.D_r[1], 1)
        ], 2)

        # self.Phi = torch.empty(N, D_r[0], D_r[1], M, 2, device=cuda_dev, dtype=torch.long)
        # self.mask = torch.empty(self.N, self.D_r[0], self.D_r[1], self.M, 2, dtype=torch.float) # 2 x 480 x 640 x 41 x 2
        
    def CalculateRetinal2LayerMappings(self, position, gaze_dir, df):
        '''
        Calculate the mapping matrix from retinal to layers.

        Parameters
        --------
        position - 1 x 3 tensor, eye's position
        gaze_dir - 1 x 2 tensor, gaze forward vector (with z normalized)
        df       - focus distance

        Returns
        --------
        phi             - N x H_r x W_r x M x 2, retinal to layers mapping, N is number of layers
        phi_invalid     - N x H_r x W_r x M x 1, indicates invalid (out-of-range) mapping
        retinal_invalid - 1 x H_r x W_r, indicates invalid pixels in retinal image
        '''
        D = self.conf.layer_res 
        c = torch.tensor([ D[1] / 2, D[0] / 2 ])     # c: Center of layers (pixel)

        D_r = self.conf.retinal_res        # D_r: Resolution of retinal 480 640
        V = self.conf.GetEyeViewportSize() # V: Viewport size of eye 
        p_f = self.p_r * df                # p_f: H x W x 3, focus positions of retinal pixels on focus plane
        
        # Calculate transformation from eye to display
        gvec_lookat = glm.dvec3(gaze_dir[0], -gaze_dir[1], 1)
        gmat_eye = glm.inverse(glm.lookAtLH(glm.dvec3(), gvec_lookat, glm.dvec3(0, 1, 0)))
        eye_rot = util.Glm2Tensor(glm.dmat3(gmat_eye))
        eye_center = torch.tensor([ position[0], -position[1], position[2] ])

        u_rot = torch.mm(self.u, eye_rot)
        v_rot = torch.matmul(p_f, eye_rot).unsqueeze(2).expand(
            -1, -1, self.M, -1) - u_rot # v_rot: H x W x M x 3, rotated rays' direction vector
        u_rot.add_(eye_center)                            # translate by eye's center
        v_rot = v_rot.div(v_rot[:, :, :, 2].unsqueeze(3)) # make z = 1 for each direction vector in v_rot
        
        phi = torch.empty(self.N, self.D_r[0], self.D_r[1], self.M, 2, dtype=torch.long)

        for i in range(0, self.N):
            dp_i = self.conf.GetPixelSizeOfLayer(i)     # dp_i: Pixel size of layer i
            d_i = self.conf.d_layer[i]                  # d_i: Distance of layer i
            k = (d_i - u_rot[:, 2]).unsqueeze(1)
            pi_r = (u_rot[:, 0:2] + v_rot[:, :, :, 0:2] * k) / dp_i      # pi_r: H x W x M x 2, rays' pixel coord on layer i
            phi[i, :, :, :, :] = torch.floor(pi_r + c)
        
        # Calculate invalid mask (out-of-range elements in phi) and reduced to retinal
        phi_invalid = (phi[:, :, :, :, 0] < 0) | (phi[:, :, :, :, 0] >= D[1]) | \
                       (phi[:, :, :, :, 1] < 0) | (phi[:, :, :, :, 1] >= D[0])
        phi_invalid = phi_invalid.unsqueeze(4)
        # print("phi_invalid:",phi_invalid.shape) 
        retinal_invalid = phi_invalid.amax((0, 3)).squeeze().unsqueeze(0)
        # print("retinal_invalid:",retinal_invalid.shape)
        # Fix invalid elements in phi
        phi[phi_invalid.expand(-1, -1, -1, -1, 2)] = 0

        return [ phi, phi_invalid, retinal_invalid  ]
    
    
    def GenRetinalFromLayers(self, layers, Phi):
        '''
        Generate retinal image from layers, using precalculated mapping matrix
        
        Parameters
        --------
        layers       - 3N x H x W, stacked layer images, with 3 channels in each layer
        phi          - N x H_r x W_r x M x 2, retinal to layers mapping, N is number of layers
        
        Returns
        --------
        3 x H_r x W_r, 3 channels retinal image
        '''
        # FOR GRAYSCALE 1 FOR RGB 3
        mapped_layers = torch.empty(self.N, 3, self.D_r[0], self.D_r[1], self.M) # 2 x 3 x 480 x 640 x 41
        # print("mapped_layers:",mapped_layers.shape)
        for i in range(0, Phi.size()[0]):
            # torch.Size([3, 2, 320, 320, 2])
            # print("gather layers:",layers[(i * 3) : (i * 3 + 3),Phi[i, :, :, :, 0],Phi[i, :, :, :, 1]].shape)
            mapped_layers[i, :, :, :, :] = layers[(i * 3) : (i * 3 + 3),
                                                    Phi[i, :, :, :, 1],
                                                    Phi[i, :, :, :, 0]]
        # print("mapped_layers:",mapped_layers.shape)
        retinal = mapped_layers.prod(0).sum(3).div(Phi.size()[3])
        # print("retinal:",retinal.shape)
        return retinal

    def GenRetinalFromLayersBatch(self, layers, Phi):
        '''
        Generate retinal image from layers, using precalculated mapping matrix
        
        Parameters
        --------
        layers - 3N x H_l x W_l tensor, stacked layer images, with 3 channels in each layer
        
        Returns
        --------
        3 x H_r x W_r tensor, 3 channels retinal image
        H_r x W_r tensor, retinal image mask, indicates pixels valid or not
        
        '''
        mapped_layers = torch.empty(layers.size()[0], self.N, 3, self.D_r[0], self.D_r[1], self.M) #BS x Layers x C x H x W x Sample
        
        # truth = torch.empty(layers.size()[0], self.N, 3, self.D_r[0], self.D_r[1], self.M)
        # layers_truth = layers.clone()
        # Phi_truth = Phi.clone()
        layers = torch.stack((layers[:,0:3,:,:],layers[:,3:6,:,:]),dim=1) ## torch.Size([BS, Layer, RGB 3, 320, 320])
        
        # Phi = Phi[:,:,None,:,:,:,:].expand(-1,-1,3,-1,-1,-1,-1)
        # print("mapped_layers:",mapped_layers.shape) #torch.Size([2, 2, 3, 320, 320, 41])
        # print("input layers:",layers.shape) ## torch.Size([2, 2, 3, 320, 320])
        # print("input Phi:",Phi.shape) #torch.Size([2, 2, 320, 320, 41, 2])
        
        # #没优化

        # for i in range(0, Phi_truth.size()[0]):
        #     for j in range(0, Phi_truth.size()[1]):
        #         truth[i, j, :, :, :, :] = layers_truth[i, (j * 3) : (j * 3 + 3),
        #                                                 Phi_truth[i, j, :, :, :, 0],
        #                                                 Phi_truth[i, j, :, :, :, 1]]

        #优化2
        # start = time.time() 
        mapped_layers_op1 = mapped_layers.reshape(-1,
                mapped_layers.shape[2],mapped_layers.shape[3],mapped_layers.shape[4],mapped_layers.shape[5])
                # BatchSizexLayer Channel 3 320 320 41
        layers_op1 = layers.reshape(-1,layers.shape[2],layers.shape[3],layers.shape[4]) # 2x2 3 320 320
        Phi_op1 = Phi.reshape(-1,Phi.shape[2],Phi.shape[3],Phi.shape[4],Phi.shape[5]) # 2x2 320 320 41 2
        x = Phi_op1[:,:,:,:,0] # 2x2 320 320 41
        y = Phi_op1[:,:,:,:,1] # 2x2 320 320 41
        # print("reshape:",time.time() - start)

        # start = time.time()
        mapped_layers_op1 = layers_op1[torch.arange(layers_op1.shape[0])[:, None, None, None], :, y, x] # x,y 切换
        #2x2 320 320 41 3
        # print("mapping one step:",time.time() - start)
        
        # print("mapped_layers:",mapped_layers_op1.shape) # torch.Size([4, 3, 320, 320, 41])
        # start = time.time()
        mapped_layers_op1 = mapped_layers_op1.permute(0,4,1,2,3)
        mapped_layers = mapped_layers_op1.reshape(mapped_layers.shape[0],mapped_layers.shape[1],
                    mapped_layers.shape[2],mapped_layers.shape[3],mapped_layers.shape[4],mapped_layers.shape[5])
        # print("reshape end:",time.time() - start)

        # print("test:")
        # print((truth.cpu() == mapped_layers.cpu()).all())
        #优化1
        # start = time.time()
        # mapped_layers_op1 = mapped_layers.reshape(-1,
        #         mapped_layers.shape[2],mapped_layers.shape[3],mapped_layers.shape[4],mapped_layers.shape[5])
        # layers_op1 = layers.reshape(-1,layers.shape[2],layers.shape[3],layers.shape[4])
        # Phi_op1 = Phi.reshape(-1,Phi.shape[2],Phi.shape[3],Phi.shape[4],Phi.shape[5])
        # print("reshape:",time.time() - start)


        # for i in range(0, Phi_op1.size()[0]):
        #     start = time.time()
        #     mapped_layers_op1[i, :, :, :, :] = layers_op1[i,:,
        #                                             Phi_op1[i, :, :, :, 0],
        #                                             Phi_op1[i, :, :, :, 1]]
        #     print("mapping one step:",time.time() - start)
        # print("mapped_layers:",mapped_layers_op1.shape) # torch.Size([4, 3, 320, 320, 41])
        # start = time.time()
        # mapped_layers = mapped_layers_op1.reshape(mapped_layers.shape[0],mapped_layers.shape[1],
        #             mapped_layers.shape[2],mapped_layers.shape[3],mapped_layers.shape[4],mapped_layers.shape[5])
        # print("reshape end:",time.time() - start)

        # print("mapped_layers:",mapped_layers.shape) # torch.Size([2, 2, 3, 320, 320, 41])
        retinal = mapped_layers.prod(1).sum(4).div(Phi.size()[4])
        # print("retinal:",retinal.shape) # torch.Size([BatchSize, 3, 320, 320])
        return retinal
        
    ## TO BE CHECK
    def GenFoveaLayers(self, b_retinal, is_mask):
        '''
        Generate foveated layers for retinal images or masks
        
        Parameters
        --------
        b_retinal - B x C x H_r x W_r, Batch of retinal images/masks
        is_mask   - Whether b_retinal is masks or images
        
        Returns
        --------
        b_fovea_layers - N_f x (B x C x H[f] x W[f]) list of batch of foveated layers
        '''
        b_fovea_layers = []
        for i in range(0, len(self.conf.eye_fovea_angles)):
            k = self.conf.eye_fovea_downsamples[i]
            region = self.conf.GetRegionOfFoveaLayer(i)
            b_roi = b_retinal[:, :, region, region]
            if k == 1:
                b_fovea_layers.append(b_roi)
            elif is_mask:
                b_fovea_layers.append(torch.nn.functional.max_pool2d(b_roi.to(torch.float), k).to(torch.bool))
            else:
                b_fovea_layers.append(torch.nn.functional.avg_pool2d(b_roi, k))
        return b_fovea_layers
        # fovea_layers = []
        # fovea_layer_masks = []
        # fov = self.conf.eye_fovea_angles[-1]
        # retinal_res = int(self.conf.retinal_res[0])
        # for i in range(0, len(self.conf.eye_fovea_angles)):
        #     angle = self.conf.eye_fovea_angles[i]
        #     k = self.conf.eye_fovea_downsamples[i]
        #     roi_size = int(np.ceil(retinal_res * angle / fov))
        #     roi_offset = int((retinal_res - roi_size) / 2)
        #     roi_img = retinal[:, roi_offset:(roi_offset + roi_size), roi_offset:(roi_offset + roi_size)]
        #     roi_mask = retinal_mask[roi_offset:(roi_offset + roi_size), roi_offset:(roi_offset + roi_size)]
        #     if k == 1:
        #         fovea_layers.append(roi_img)
        #         fovea_layer_masks.append(roi_mask)
        #     else:
        #         fovea_layers.append(torch.nn.functional.avg_pool2d(roi_img.unsqueeze(0), k).squeeze(0))
        #         fovea_layer_masks.append(1 - torch.nn.functional.max_pool2d((1 - roi_mask).unsqueeze(0), k).squeeze(0))
        # return [ fovea_layers, fovea_layer_masks ]

    ## TO BE CHECK
    def GenFoveaLayersBatch(self, retinal, retinal_mask):
        '''
        Generate foveated layers and corresponding masks
        
        Parameters
        --------
        retinal      - Retinal image generated by GenRetinalFromLayers()
        retinal_mask - Mask of retinal image, also generated by GenRetinalFromLayers()
        
        Returns
        --------
        fovea_layers      - list of foveated layers
        fovea_layer_masks - list of mask images, corresponding to foveated layers
        '''
        fovea_layers = []
        fovea_layer_masks = []
        fov = self.conf.eye_fovea_angles[-1]
        # print("fov:",fov)
        retinal_res = int(self.conf.retinal_res[0])
        # print("retinal_res:",retinal_res)
        # print("len(self.conf.eye_fovea_angles):",len(self.conf.eye_fovea_angles))
        for i in range(0, len(self.conf.eye_fovea_angles)):
            angle = self.conf.eye_fovea_angles[i]
            k = self.conf.eye_fovea_downsamples[i]
            roi_size = int(np.ceil(retinal_res * angle / fov))
            roi_offset = int((retinal_res - roi_size) / 2)
            # [2, 3, 320, 320]
            roi_img = retinal[:, :, roi_offset:(roi_offset + roi_size), roi_offset:(roi_offset + roi_size)]
            # print("roi_img:",roi_img.shape)
            # [2, 320, 320]
            roi_mask = retinal_mask[:, roi_offset:(roi_offset + roi_size), roi_offset:(roi_offset + roi_size)]
            # print("roi_mask:",roi_mask.shape)
            if k == 1:
                fovea_layers.append(roi_img)
                fovea_layer_masks.append(roi_mask)
            else:
                fovea_layers.append(torch.nn.functional.avg_pool2d(roi_img, k))
                fovea_layer_masks.append(1 - torch.nn.functional.max_pool2d((1 - roi_mask), k))
        return [ fovea_layers, fovea_layer_masks ]
    
    ## TO BE CHECK
    def GenFoveaRetinal(self, b_fovea_layers):
        '''
        Generate foveated retinal image by blending fovea layers
        **Note: current implementation only support two fovea layers**
        
        Parameters
        --------
        b_fovea_layers - N_f x (B x 3 x H[f] x W[f]), list of batch of (masked) foveated layers
        
        Returns
        --------
        B x 3 x H_r x W_r, batch of foveated retinal images
        '''
        b_fovea_retinal = torch.nn.functional.interpolate(b_fovea_layers[1],
            scale_factor=self.conf.eye_fovea_downsamples[1],
            mode='bilinear', align_corners=False)
        region = self.conf.GetRegionOfFoveaLayer(0)
        blend = self.conf.eye_fovea_blend[0]
        b_roi = b_fovea_retinal[:, :, region, region]
        b_roi.mul_(1 - blend).add_(b_fovea_layers[0] * blend)
        return b_fovea_retinal