-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathbaseline.m
More file actions
138 lines (94 loc) · 4.73 KB
/
baseline.m
File metadata and controls
138 lines (94 loc) · 4.73 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
warning('off', 'MATLAB:hg:EraseModeIgnored')
dataBaseDir = 'data';
rng(0); % Seed RNG so that randomization is deterministic
%% Get all filenames from imageBaseDir
%rdir http://www.mathworks.com/matlabcentral/fileexchange/19550-recursive-directory-listing
imageBaseDir = '../scene_categories';
structList = rdir('../scene_categories/*/*.jpg');
imageFileList = {structList.name}; % Get filenames from struct
numExamples = length(imageFileList);
%% Subsample from image filenames to test whether code functions
if (exist('doSubsample', 'var') && doSubsample)
% Shuffle image filenames
imageFileList = imageFileList(randperm(numExamples));
% Retain a subset
subsample_size = 100; % number of filenames to retain
imageFileList = imageFileList(1:subsample_size);
end
%% Get labels of every image filename
% Get subdirectory of each image filename
subdirs = cellfun(@fileparts, imageFileList, 'UniformOutput', 0);
% Subdirectories are string labels for each class
unique_subdirs = unique(subdirs);
numExamples = length(imageFileList);
labels = [];
for i = 1 : numExamples
subdir = subdirs{i};
label = find(ismember(unique_subdirs, subdir));
labels(i) = label;
end
%% Split data into train and test sets
numTrainPerClass = 100; % Number of training examples per class;
[fTrain, fTest, yTrain, yTest] = TrainTestSplit( imageFileList, labels, numTrainPerClass );
%% Print histogram of label distribution
fprintf('Distribution of labels in entire dataset of %i instances:\n', numExamples);
PrintLabelDistribution(labels);
fprintf('Distribution of labels in training set of %i instances:\n', length(yTrain));
PrintLabelDistribution(yTrain);
fprintf('Distribution of labels in test set of %i instances:\n', length(yTest));
PrintLabelDistribution(yTest);
%% Define parameters of feature extraction
params.maxImageSize = 1000;
params.gridSpacing = 8;
params.patchSize = 16;
params.dictionarySize = 200;
params.numTextonImages = 50;
params.pyramidLevels = 3;
canSkip = 1;
pfig = figure;
% Default suffix where SIFT features are stored. One mat file is generated per image.
featureSuffix = '_sift.mat';
% Default dictionary created by CalculateDictionary. We need to delete this
% if we want to create a new dictionary.
dictFilename = sprintf('dictionary_%d.mat', params.dictionarySize);
% Default suffix of files created by BuildHistograms
textonSuffix = sprintf('_texton_ind_%d.mat',params.dictionarySize);
histSuffix = sprintf('_hist_%d.mat', params.dictionarySize);
% Default suffix of files created by CompilePyramid
pyramidSuffix = sprintf('_pyramid_%d_%d.mat', params.dictionarySize, params.pyramidLevels);
%% Extract features of both training and test images
xTrain = []; % feature vectors for training set
xTest = []; % feature vectors for test set
% Generate sift descriptors from both training and test images
imageFileList = [fTrain fTest];
GenerateSiftDescriptors( imageFileList, imageBaseDir, dataBaseDir, params, canSkip, pfig );
% Calculate dictionary only from training images. IMPORTANT!!
imageFileList = fTrain;
CalculateDictionary( imageFileList, imageBaseDir, dataBaseDir, featureSuffix, params, canSkip, pfig );
% Build histograms from both training and test images
imageFileList = [fTrain fTest];
H_all = BuildHistograms( imageFileList,imageBaseDir, dataBaseDir, featureSuffix, params, canSkip, pfig );
% Calculate feature vectors for training and test images separately
xTrain = CompilePyramid( fTrain, dataBaseDir, textonSuffix, params, canSkip, pfig );
xTest = CompilePyramid( fTest, dataBaseDir, textonSuffix, params, canSkip, pfig );
%% Train SVM
% Compute kernel matrix so that we can train a kernel SVM
% From the libSVM README:
% To use precomputed kernel, you must include sample serial number as
% the first column of the training and testing data
kernelMatrixTrain = hist_isect(xTrain, xTrain);
kernelMatrixTrain = [[1:size(kernelMatrixTrain, 1)]' kernelMatrixTrain];
yTrain = double(yTrain); % liblinear requires labels to be double
svmModel = svmtrain(yTrain, kernelMatrixTrain, '-t 4');
%% Predict labels for test images
kernelMatrixTest = hist_isect(xTest, xTrain);
kernelMatrixTest = [[1:size(kernelMatrixTest, 1)]' kernelMatrixTest];
% Labels are required to compute accuracy. Just a convenient featuretrain .
[predTest, accuracy, ~] = svmpredict(yTest, kernelMatrixTest, svmModel);
meanAccuracyTest = ComputeMeanAccuracy(yTest, predTest);
%% Train and predict with linear SVM
fprintf('Train and predict with linear SVM');
linearModel = train(yTrain, sparse(xTrain));
predictions = predict(yTest, sparse(xTest), linearModel);
fprintf('Mean accuracy for spatial pyramid + kernel SVM = %.1f%%\n', 100 * meanAccuracyTest);
fprintf('Mean accuracy for spatial pyramid + linear SVM = %.1f%%\n', 100 * ComputeMeanAccuracy(yTest, predictions));