mpeg_encode_decode/init.m at master · amitkm2603/mpeg_encode_decode · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
%%
%@author : Amit Mandal
%Assignment 2
%Date: 22-Feb-2016
%%

function time_arr = init(mmdata,frames_to_process,quality,fps,pattern)
clc;
frames = mmdata.frames;
%encoding
% pattern = 'IPBBPI';
h = waitbar(0,'Please wait... Setting up Integer Transform and Quantization matrices');
%initialize the integer transform/quant/scaling matrix based on quality
init_global_var(quality);
waitbar(1,h,'Setting up Integer Transform and Quantization matrices complete. Encoding will start now');
close(h);


%encode
disp('encoding the video ..');
time_arr = [];
tic;
encoded_mpeg = encoder(frames, pattern, frames_to_process);
time_arr(end+1) = toc;

%decode
disp('decoding the video..');
tic;
decoded_mpeg = decoder(encoded_mpeg, frames_to_process);
time_arr(end+1) = toc;
implay(decoded_mpeg,fps);
end
%%

function encoded_mpeg = encoder(frames,pattern,frames_to_process)

%%%%%%
%% frames are in the form of struct -> cdata and colormap
%% cdata in the form: height x width x 3 -> uint8 form
%%%%%%%
n = frames_to_process;
step = 1;
%counter to keep track of the pattern
pattern_pos  = 1;
%saving previous frame
previous_frame = [];
h = waitbar(0,'Please wait... Encoding of P and I frames');
    for frame_index = 1:step: n


        frame_data = frames(1,frame_index); %iframe
        current_frame = double(frame_data.cdata); %cdata height x width x 3 -> uint8 form

        %RBG - > YUV
        temp_frame = convert_rgb_yuv(current_frame);
        %implementing 4:2:0 Chroma sub sampling
        current_frame = struct('Y_comp',temp_frame(:,:,1),'U_comp',sample_down(temp_frame(:,:,2)) ,'V_comp'  , sample_down(temp_frame(:,:,3)));

        % deciding which frame coding to apply
        frame_pattern = mod(pattern_pos,length(pattern));
        if(frame_pattern == 0) %last element
            frame_pattern = pattern(length(pattern));
        else
            frame_pattern = pattern(frame_pattern);
        end
        pattern_pos = pattern_pos +1 ;

        waitbar(frame_index / n, h, strcat('Encoding frame number:    ',num2str(frame_index), ' of type:    ',frame_pattern));

        %if its frame B then skip it as we don't have the processed future frame
        if frame_pattern == 'B'
            encoded_mpeg_t = struct('type','B','data',[],'motion_estimation',[] );
            encoded_mpeg{frame_index} = encoded_mpeg_t;
            continue;
        end

        %encode the frame
        [encoded_mpeg{frame_index}, previous_frame ]  = encode_frame(current_frame,frame_pattern,previous_frame,[]);


    end
    close(h);

  %process the unprocessed B frames
  pattern_pos  = 1; % reset pattern position
  h = waitbar(0,'Please wait... Encoding B frames..');
     for frame_index = 1:step: n
          frame_pattern = mod(pattern_pos,length(pattern));
        if(frame_pattern == 0) %last element
            frame_pattern = pattern(length(pattern));
        else
            frame_pattern = pattern(frame_pattern);
        end
        pattern_pos = pattern_pos +1 ;

        %process b frames
        % condition: the next/previous cannot be a b frame, if they are, we
        % look further +/- 1 frames to look for it
        if frame_pattern == 'B'

             waitbar(frame_index / n, h, strcat('Encoding frame number:    ',num2str(frame_index), ' of type:    ',frame_pattern));

             frame_data = frames(1,frame_index); %iframe
             current_frame = double(frame_data.cdata); %cdata height x width x 3 -> uint8 form

             %RBG - > YUV
             temp_frame = convert_rgb_yuv(current_frame);

             %implementing 4:2:0 Chroma sub sampling
             current_frame = struct('Y_comp',temp_frame(:,:,1),'U_comp',sample_down(temp_frame(:,:,2)) ,'V_comp'  , sample_down(temp_frame(:,:,3)));

             previous_frame = encoded_mpeg{frame_index-step};
%            encoded_mpeg = struct('type',[],'data',[],'motion_estimation',[] );
             if(previous_frame.type== 'B')
                previous_frame = encoded_mpeg{frame_index - (2*step)};
             end

             next_frame = encoded_mpeg{frame_index+step};
             if(next_frame.type == 'B')
                next_frame = encoded_mpeg{frame_index+(2*step)};
             end

            [encoded_mpeg{frame_index}, reconst_mpeg ]  = encode_frame(current_frame,frame_pattern,previous_frame,next_frame);
        end

        %generate motion vector plot for 2nd frame
        if frame_index == 2
            frame_data = frames(1,frame_index); %iframe
            current_frame = double(frame_data.cdata);
             frame_data = frames(1,frame_index-1); %iframe
            previous_frame = double(frame_data.cdata);
          display_mv(encoded_mpeg{frame_index},frame_index,frame_pattern,current_frame,previous_frame);
        end


     end
    close(h);
end

%% Decoder
function mov = decoder(encoded_frames,frames_to_process)
n = frames_to_process;
step = 1;
%each frame to be saved in the below format so that implay can play the
%frame sequences as video
mov = struct('cdata',[],'colormap',[]);

h = waitbar(0,'Please wait... Decoding in progress ..');

    for frame_index = 1:step: n
    current_frame = encoded_frames{frame_index};

    waitbar(frame_index / n, h, strcat('Decoding frame number:    ',num2str(frame_index), ' of type:    ',current_frame.type));

    decode  = decode_frame(current_frame,current_frame.prev_frame,[]);

      %Reversing the 4:2:0 chroma sub sampling
      decoded_frame(:,:,1) =decode.Y_comp;
      decoded_frame(:,:,2) =sample_up(decode.U_comp);
      decoded_frame(:,:,3) =sample_up(decode.V_comp);

      %converting YUV back to RGB
      decode = convert_yuv_rgb(decoded_frame);
      mov(frame_index) = struct('cdata',decode,'colormap',[]);
    end
    close(h);
end

%%
function [encoded_mpeg, reconst_mpeg] = encode_frame(current_frame,frame_pattern,previous_frame, next_frame)
     encoded_mpeg = struct('type',[],'data',[],'motion_estimation',[] );

     encoded_frame = struct('Y_comp',[],'U_comp',[] ,'V_comp'  ,[]);
     reconst_mpeg = struct('Y_comp',[],'U_comp',[] ,'V_comp'  ,[]);
     difference = struct('Y_comp',[],'U_comp',[] ,'V_comp'  ,[]);

     %using h264 integer intra frame coding to encode I frame
     if(frame_pattern == 'I')
         %http://iphome.hhi.de/wiegand/assets/pdfs/h264-AVC-Standard.pdf

              %8x8 encoding for chroma
              encoded_frame.Y_comp = encode_intra_frame(current_frame.Y_comp,4,16);
              %4x16 encoding for luma
              encoded_frame.U_comp = encode_intra_frame(current_frame.U_comp,8,8);
              encoded_frame.V_comp = encode_intra_frame(current_frame.V_comp,8,8);

              %using integer transform to encode the frame
              enc_i_frame.Y_comp = integer_dct_quant(double(encoded_frame.Y_comp));
              enc_i_frame.U_comp = integer_dct_quant(double(encoded_frame.U_comp));
              enc_i_frame.V_comp = integer_dct_quant(double(encoded_frame.V_comp));

              %Decoding the frame to use it as reference for future frames
              reconst_mpeg.Y_comp = integer_idct_dequant(double(enc_i_frame.Y_comp));
              reconst_mpeg.U_comp = integer_idct_dequant(double(enc_i_frame.U_comp));
              reconst_mpeg.V_comp = integer_idct_dequant(double(enc_i_frame.V_comp));


             %store the frame in frame data structure
             encoded_mpeg.type = 'I';
             encoded_mpeg.data = enc_i_frame;
             encoded_mpeg.motion_estimation = 0; %no motion estimation for I frame
             encoded_mpeg.prev_frame = []; %no reference frame for I frames

     end

     %Encoding P frame using logarithmic motion vector search
     if(frame_pattern == 'P')
         % only using Luma(Y) component to find out MV
         [~, difference.Y_comp, motion_est] = encode_p_frame(current_frame.Y_comp,previous_frame.Y_comp);

         %simply find out difference of U and V component
          difference.U_comp = current_frame.U_comp - previous_frame.U_comp;
          difference.V_comp = current_frame.V_comp - previous_frame.V_comp;

          %using integer transform to encode the difference
          difference.Y_comp = integer_dct_quant(difference.Y_comp);
          difference.U_comp = integer_dct_quant(difference.U_comp);
          difference.V_comp = integer_dct_quant(difference.V_comp);

          %store the frame in frame data structure
          encoded_mpeg.type = 'P';
          encoded_mpeg.data = difference; % difference is stored
          encoded_mpeg.motion_estimation = motion_est;
          encoded_mpeg.prev_frame = previous_frame;


          %Decoding the frame to use it as reference for future frames
          reconst_mpeg = decode_p_frame(difference,motion_est,previous_frame);
     end

     %http://dsp.stackexchange.com/questions/2486/video-compression-when-would-an-average-of-the-previous-and-next-i-or-p-frame?lq=1
     if frame_pattern == 'B'

         %need to decode the previously encoded frame
         if previous_frame.type == 'I'

             previous_frame_temp_y = integer_idct_dequant(previous_frame.data.Y_comp);
             previous_frame_temp_u = integer_idct_dequant(previous_frame.data.U_comp);
             previous_frame_temp_v = integer_idct_dequant(previous_frame.data.V_comp);

             previous_frame = struct('Y_comp',previous_frame_temp_y,'U_comp',previous_frame_temp_u ,'V_comp'  ,previous_frame_temp_v);
         elseif previous_frame.type == 'P'
                 previous_frame = decode_p_frame(previous_frame.data,previous_frame.motion_estimation,previous_frame.prev_frame);
         end
         %need to decode the next encoded frame
         if next_frame.type == 'I'
             previous_frame_temp_y = integer_idct_dequant(previous_frame.Y_comp);
             previous_frame_temp_u = integer_idct_dequant(previous_frame.U_comp);
             previous_frame_temp_v = integer_idct_dequant(previous_frame.V_comp);

             next_frame = struct('Y_comp',previous_frame_temp_y,'U_comp',previous_frame_temp_u ,'V_comp'  ,previous_frame_temp_v);

          elseif next_frame.type == 'P'
                 next_frame = decode_p_frame(next_frame.data,next_frame.motion_estimation,next_frame.prev_frame);
         end


          %Calculating forward MV and difference
          [~, difference_forward.Y_comp, motion_est] = encode_p_frame(current_frame.Y_comp,previous_frame.Y_comp);
          difference_forward.U_comp = current_frame.U_comp - previous_frame.U_comp;
          difference_forward.V_comp = current_frame.V_comp - previous_frame.V_comp;
          %Calculating backward MV and difference
          [~, difference_backward.Y_comp, motion_est] = encode_p_frame(next_frame.Y_comp,current_frame.Y_comp);
          difference_backward.U_comp = next_frame.U_comp - current_frame.U_comp;
          difference_backward.V_comp = next_frame.V_comp - current_frame.V_comp;

         %calculating average of differences
         difference.Y_comp = ( (difference_forward.Y_comp + difference_backward.Y_comp) / 2 );
         difference.Y_comp = integer_dct_quant(difference.Y_comp);
         difference.U_comp = ( (difference_forward.U_comp + difference_backward.U_comp) / 2 );
         difference.U_comp = integer_dct_quant(difference.U_comp);
         difference.V_comp = ( (difference_forward.V_comp + difference_backward.V_comp) / 2 );
         difference.V_comp = integer_dct_quant(difference.V_comp);

         %averaging the motion vectors of the forward and backward mv
          [m, n, ~] = size(difference);
           macro_blk_size = [m, n] / 16;
            for m = 1: macro_blk_size(1)
                for n = 1: macro_blk_size(2)
                   motion_est(m,n).mvx = round( (motion_est_forward(m,n).mvx + motion_est_backward(m,n).mvx ) / 2 );
                   motion_est(m,n).mvy = round( (motion_est_forward(m,n).mvy + motion_est_backward(m,n).mvy ) / 2 );
                end
            end

          %reconstructing the image
          reconst_mpeg = decode_p_frame(difference,motion_est,previous_frame);

          encoded_mpeg.data = difference;
          encoded_mpeg.motion_estimation = motion_est;
          encoded_mpeg.prev_frame = previous_frame;
          encoded_mpeg.type = 'B';
     end

end


%%
% Decoding the frames
%
function decoded_frame = decode_frame(current_frame,previous_frame,~)
decoded_frame = struct('Y_comp',[],'U_comp',[] ,'V_comp'  ,[]);
difference = struct('Y_comp',[],'U_comp',[] ,'V_comp'  ,[]);

frame_pattern =current_frame.type;
        if(frame_pattern == 'I')
            current_frame = current_frame.data;
            decoded_frame.Y_comp = integer_idct_dequant(double(current_frame.Y_comp));
            decoded_frame.U_comp = integer_idct_dequant(double(current_frame.U_comp));
            decoded_frame.V_comp = integer_idct_dequant(double(current_frame.V_comp));
         end

         if(frame_pattern == 'P' || frame_pattern == 'B')
            difference_temp = current_frame.data;
            difference.Y_comp = difference_temp.Y_comp;
            difference.U_comp = difference_temp.U_comp;
            difference.V_comp = difference_temp.V_comp;

            motion_estimation = current_frame.motion_estimation;
            decoded_frame = decode_p_frame(difference,motion_estimation,previous_frame);
         end
end
%%

%http://www2.cs.sfu.ca/CourseCentral/820/li/material/source/H264_Codec_Notes.pdf
%% calculating Integer dct-quant-scaling and reverse matrices
function init_global_var(QP)

%test - sample macro block
% f = [72 82 85 79;
% 74 75 86 82;
% 84 73 78 80;
% 77 81 76 84];
global H;
H = [1 1 1 1;
    2 1 -1 -2
    1 -1 -1 1
    1 -2 2 -1];
global H_inv;
H_inv = [1 1 1 0.5;
    1 0.5 -1 -1;
    1 -0.5 -1 1;
    1 -1 1 -0.5];
m = [13107 5243 8066;
    11916 4660 7490;
    10082 4194 6554;
    9362 3647 5825;
    8192 3355 5243;
    7282 2893 4559];
v = [10 16 13;
    11 18 14;
    13 20 16;
    14 23 18;
    16 25 20;
    18 29 23];

global M_f;
M_f = [m(mod(QP,6)+1, 0+1) m(mod(QP,6)+1, 2+1) m(mod(QP,6)+1, 0+1) m(mod(QP,6)+1, 2+1);
    m(mod(QP,6)+1, 2+1) m(mod(QP,6)+1, 1+1) m(mod(QP,6)+1, 2+1) m(mod(QP,6)+1, 1+1);
    m(mod(QP,6)+1, 0+1) m(mod(QP,6)+1, 2+1) m(mod(QP,6)+1, 0+1) m(mod(QP,6)+1, 2+1);
    m(mod(QP,6)+1, 2+1) m(mod(QP,6)+1, 1+1) m(mod(QP,6)+1, 2+1) m(mod(QP,6)+1, 1+1)];


global V_i;

V_i =  [v(mod(QP,6)+1, 0+1) v(mod(QP,6)+1, 2+1) v(mod(QP,6)+1, 0+1) v(mod(QP,6)+1, 2+1);
        v(mod(QP,6)+1, 2+1) v(mod(QP,6)+1, 1+1) v(mod(QP,6)+1, 2+1) v(mod(QP,6)+1, 1+1);
        v(mod(QP,6)+1, 0+1) v(mod(QP,6)+1, 2+1) v(mod(QP,6)+1, 0+1) v(mod(QP,6)+1, 2+1);
        v(mod(QP,6)+1, 2+1) v(mod(QP,6)+1, 1+1) v(mod(QP,6)+1, 2+1) v(mod(QP,6)+1, 1+1)];

if(QP>=6)
    M_f = round(M_f ./power(2,floor(QP/6))); %eq 12.6
    V_i = round(V_i .*power(2,floor(QP/6))); %eq 12.8
end

%f_hat = round( (H * f * H') .* (M_f ./power(2,15))); %eq 12.5
%f_tilde = round( (H_inv * (f_hat .* V_i) * H_inv')./power(2,6)); %eq 12.7
 display_matrix(M_f,V_i);
end