Commit f131e34e authored by Adriaens, Ines's avatar Adriaens, Ines ✌🏼
Browse files

update penalty exploration

parent d6ad9e1d
%% NewCHP analysis
% ------------------------------------------------------------------------
% Created September 29, 2021 by Ines Adriaens @adria036
% ------------------------------------------------------------------------
% Working in new git branch elegant_penalty
% ------------------------------------------------------------------------
% Context:
% previously, a changepoint analysis was implemented to detect changes of
% distributions within the z- and CP distance time series. A changepoint
% analysis detects sudden changes based on the minimalisation of a cost
% function togehter with a penalty for the number of changes to be
% detected. The number of changes can be either a fixed number, or can be
% based on a minimal improvement of the goodness-of-fit. The fixed
% number approach is not very elegant, but penalty are, in literature,
% typically based on 'expert knowledge'. In this part of the analysis,
% following steps are implemented.
% ------------------------------------------------------------------------
% Steps:
% 1) set constants and filepaths
% 2) load data
% 3) summarize lying bouts
% 4) calculate minimum "residue" for no changepoint found + plot
% 5) create function to evaluate performance cp detection
% 6) determine and test penalties based on residue of no cp
% 7) visualise
% ------------------------------------------------------------------------
clear variables
close all
clc
%% STEP 1: constants and filepaths
% this folder contains the results of the smoothing and data imputation
% scripts
% results directory
init_.resdir = ['C:\Users\adria036\OneDrive - Wageningen '...
'University & Research\iAdriaens_doc\Projects\' ...
'eEllen\B4F_indTracking_cattle\B4F_results\'];
%% STEP 2: load data
% data resulting from data imputation script / reference data
% the 'ground truth' = comparison base = data_meta.selection = from first
% to last bout
% load data
load([init_.resdir 'D3_sdata_IQ_UWB.mat']) % from first to last icebout
load([init_.resdir 'D2_data_meta.mat'])
% delete cow 280 (no data)
sdata = rmfield(sdata,'cow_280');
%% STEP 3: calculate summary of lying bouts for manuscript and penalty
% based on data_meta.selection = selection
% step 1: select data_meta.selection based on sdata available
for i = 1:height(data_meta.selection)
% check whether all the data is available in sdata
fields_ = ['cow_' num2str(data_meta.selection.cow(i))]; % field of sdat
start = find(datenum(sdata.(fields_).date) == ...
datenum(data_meta.selection.bDat(i)) + data_meta.selection.bTim(i));
endb = find(datenum(sdata.(fields_).date) == ...
datenum(data_meta.selection.eDat(i)) + data_meta.selection.eTim(i));
if ~isempty(start) && ~isempty(endb)
data_meta.selection.include(i) = 1;
else
data_meta.selection.include(i) = 0;
end
end
% summary
data_meta.sums = table(unique(data_meta.selection.cow),'VariableNames',...
{'cow'});
for i = 1:height(data_meta.sums)
% no of days
data_meta.sums.NoDays(i) = length(unique(floor(...
data_meta.selection.numtime(...
data_meta.selection.cow ==data_meta.sums.cow(i)))));
% no of lying bouts
data_meta.sums.NoBouts(i) = sum(data_meta.selection.cow == ...
data_meta.sums.cow(i));
% max no of bouts per day
% days = unique(
% for j = 1:un
% min no of bouts per day
% avg number of bouts per day
% no of days sdata
field_ = ['cow_' num2str(data_meta.sums.cow(i))];
data_meta.sums.NoDaysSd(i) = length(...
unique(floor(datenum(sdata.(field_).date))));
end
% TO DO: if not all data are available? Discrepancy between icecube data
% and the UWB data
% TO DO: Per day - summary // both datasets
%% STEP 4: calculate the minimum residu per day
%
%
%
% fieldnames sdata
fields_ = fieldnames(sdata);
% select a dataset per cow
for i = 1:length(fields_)
sdata.(fields_{i}).numtime = datenum(sdata.(fields_{i}).date);
% unique days per cow
days = unique(floor(sdata.(fields_{i}).numtime));
% random day select
randindx = randi(length(days));
selday = days(randindx);
% select in seldata
seldata.(fields_{i}) = sdata.(fields_{i})(...
floor(sdata.(fields_{i}).numtime) == selday,:);
end
clear ans days endb field_ i randindx start selday
% plot both time series
close all
for i = 1:10%length(fields_)
% figure;
figure('Units','centimeters','OuterPosition',[1 1 28 20]);
subplot(2,1,1); hold on;box on; title([fields_{i} ', Z position'],...
'Interpreter','none');
xlabel('day [h]'); ylabel('Z-position [m]')
plot(seldata.(fields_{i}).numtime-min(seldata.(fields_{i}).numtime),...
seldata.(fields_{i}).avg_z_sm,'Color',[178/255 34/255 34/255],...
'LineWidth',1)
plot(seldata.(fields_{i}).numtime-min(seldata.(fields_{i}).numtime),...
seldata.(fields_{i}).islying*(-2.3)+2.3,'Color',[0/255 0/255 128/255],...
'LineWidth',2)
subplot(2,1,2); hold on;box on; title('Center distance',...
'Interpreter','none');
xlabel('day [h]'); ylabel('Center distance [m]')
plot(seldata.(fields_{i}).numtime-min(seldata.(fields_{i}).numtime),...
seldata.(fields_{i}).centerdist,'Color',[178/255 34/255 34/255],...
'LineWidth',1)
plot(seldata.(fields_{i}).numtime-min(seldata.(fields_{i}).numtime),...
seldata.(fields_{i}).islying*(-13)+13,'Color',[0/255 0/255 128/255],...
'LineWidth',2)
end
% summarize and calculate the metrics of the CP analysis
% for "mean" ==> residue = n*sum(var(x,1,2));
% for "std" ==> residue = sum(n*log(var(x,1,2)));
% for "rms" ==> residue = sum(n*log(sum(x.^2,2)/n));
% calculate residu when only one CP
%
cpsums = table((1:length(fields_))','VariableNames',{'No'});
for i = 1:height(cpsums)
% cowid
cpsums.cow(i,:) = seldata.(fields_{i}).object_name(1);
% residus for each of the stats - Z data
z = seldata.(fields_{i}).avg_z_sm(~isnan(seldata.(fields_{i}).avg_z_sm))';
p = seldata.(fields_{i}).numtime(~isnan(seldata.(fields_{i}).avg_z_sm))-...
min(seldata.(fields_{i}).numtime);
n = length(z);
cpsums.n(i) = n;
cpsums.ZmeanR(i) = n*sum(var(z,1,2));
cpsums.ZstdR(i) = sum(n*log(var(z,1,2)));
cpsums.ZrmsR(i) = sum(n*log(sum(z.^2,2)/n));
% residus for each of the stats with one CP - Z data
[~,cpsums.ZmeanR1(i)] = F2_cpsingle(z,'mean',300);
[~,cpsums.ZstdR1(i)] = F2_cpsingle(z,'std',300);
[~,cpsums.ZrmsR1(i)] = F2_cpsingle(z,'rms',300);
%
cdd = seldata.(fields_{i}).centerdist(~isnan(seldata.(fields_{i}).centerdist))';
p = seldata.(fields_{i}).numtime(~isnan(seldata.(fields_{i}).centerdist))-...
min(seldata.(fields_{i}).numtime);
n = length(cdd);
cpsums.n(i) = n;
cpsums.CDmeanR(i) = n*sum(var(cdd,1,2));
cpsums.CDstdR(i) = sum(n*log(var(cdd,1,2)));
cpsums.CDrmsR(i) = sum(n*log(sum(cdd.^2,2)/n));
% residus for each of the stats with one CP - Z data
[~,cpsums.CDmeanR1(i)] = F2_cpsingle(cdd,'mean',300);
[~,cpsums.CDstdR1(i)] = F2_cpsingle(cdd,'std',300);
[~,cpsums.CDrmsR1(i)] = F2_cpsingle(cdd,'rms',300);
end
% difference between zero and one cp
cpsums.ZmeanDif = cpsums.ZmeanR-cpsums.ZmeanR1;
cpsums.ZstdDif = cpsums.ZstdR-cpsums.ZstdR1;
cpsums.ZrmsDif = cpsums.ZrmsR-cpsums.ZrmsR1;
cpsums.CDmeanDif = cpsums.CDmeanR-cpsums.CDmeanR1;
cpsums.CDstdDif = cpsums.CDstdR-cpsums.CDstdR1;
cpsums.CDrmsDif = cpsums.CDrmsR-cpsums.CDrmsR1;
% plot penalty
figure('Units','centimeters','OuterPosition',[4 1 28 10]);
subplot(1,3,1); hold on; box on; ylim([0 30000]); xlim([0.5 10.5])
plot(cpsums.ZmeanR,'d','Color',[51/255 0 102/255], ...
'MarkerFaceColor',[51/255 0 102/255],...
'MarkerSize',8)
plot(cpsums.ZmeanR1,'o','Color',[153/255 0 76/255], ...
'MarkerFaceColor',[153/255 0 76/255],...
'MarkerSize',8)
title('Z, 0 CP - 1 CP, mean');xlabel('Example')
subplot(1,3,2); hold on; box on; ylim([-150000 0]); xlim([0.5 10.5])
plot(cpsums.ZstdR,'d','Color',[51/255 0 102/255], ...
'MarkerFaceColor',[51/255 0 102/255],...
'MarkerSize',8)
plot(cpsums.ZstdR1,'o','Color',[153/255 0 76/255], ...
'MarkerFaceColor',[153/255 0 76/255],...
'MarkerSize',8)
title('Z, 0 CP - 1 CP, std');xlabel('Example')
subplot(1,3,3); hold on; box on; ylim([-30000 30000]); xlim([0.5 10.5])
plot(cpsums.ZrmsR,'d','Color',[51/255 0 102/255], ...
'MarkerFaceColor',[51/255 0 102/255],...
'MarkerSize',8)
plot(cpsums.ZrmsR1,'o','Color',[153/255 0 76/255], ...
'MarkerFaceColor',[153/255 0 76/255],...
'MarkerSize',8)
title('Z, 0 CP - 1 CP, rms'); xlabel('Example')
figure('Units','centimeters','OuterPosition',[4 1 28 10]);
subplot(1,3,1); hold on; box on; ylim([0 700000]); xlim([0.5 10.5])
plot(cpsums.CDmeanR,'d','Color',[51/255 0 102/255], ...
'MarkerFaceColor',[51/255 0 102/255],...
'MarkerSize',8)
plot(cpsums.CDmeanR1,'o','Color',[153/255 0 76/255], ...
'MarkerFaceColor',[153/255 0 76/255],...
'MarkerSize',8)
title('CD, 0 CP - 1 CP, mean');xlabel('Example')
subplot(1,3,2); hold on; box on; ylim([0 180000]); xlim([0.5 10.5])
plot(cpsums.CDstdR,'d','Color',[51/255 0 102/255], ...
'MarkerFaceColor',[51/255 0 102/255],...
'MarkerSize',8)
plot(cpsums.CDstdR1,'o','Color',[153/255 0 76/255], ...
'MarkerFaceColor',[153/255 0 76/255],...
'MarkerSize',8)
title('CD, 0 CP - 1 CP, std');xlabel('Example')
subplot(1,3,3); hold on; box on; ylim([150000 350000]); xlim([0.5 10.5])
plot(cpsums.CDrmsR,'d','Color',[51/255 0 102/255], ...
'MarkerFaceColor',[51/255 0 102/255],...
'MarkerSize',8)
plot(cpsums.CDrmsR1,'o','Color',[153/255 0 76/255], ...
'MarkerFaceColor',[153/255 0 76/255],...
'MarkerSize',8)
title('CD, 0 CP - 1 CP, rms'); xlabel('Example')
% plot max penalty
figure('Units','centimeters','OuterPosition',[4 1 28 20]);
subplot(2,3,1); h = histogram(cpsums.ZmeanDif);h.FaceColor = [102/255 0 102/255];
subplot(2,3,2); h = histogram(cpsums.ZstdDif);h.FaceColor = [102/255 0 102/255];
subplot(2,3,3); h = histogram(cpsums.ZrmsDif);h.FaceColor = [102/255 0 102/255];
subplot(2,3,4); h = histogram(cpsums.CDmeanDif);h.FaceColor = [102/255 0 102/255];
subplot(2,3,5); h = histogram(cpsums.CDstdDif);h.FaceColor = [102/255 0 102/255];
subplot(2,3,6); h = histogram(cpsums.CDrmsDif);h.FaceColor = [102/255 0 102/255];
% plot number of data points ifv empirical estimate of data with no cp
figure('Units','centimeters','OuterPosition',[4 1 28 20]);
subplot(2,3,1); plot(cpsums.n, cpsums.ZmeanR,'o','Color',[102/255 0 51/255],...
'MarkerSize',6,'LineWidth',1.5)
title('Z, N vs. mean'); xlabel('n');ylabel('ES')
subplot(2,3,2); plot(cpsums.n, cpsums.ZstdR,'o','Color',[102/255 0 51/255],...
'MarkerSize',6,'LineWidth',1.5)
title('Z, N vs. std'); xlabel('n');ylabel('ES')
subplot(2,3,3); plot(cpsums.n, cpsums.ZrmsR,'o','Color',[102/255 0 51/255],...
'MarkerSize',6,'LineWidth',1.5)
title('Z, N vs. rms'); xlabel('n');ylabel('ES')
subplot(2,3,4); plot(cpsums.n, cpsums.CDmeanR,'o','Color',[102/255 0 51/255],...
'MarkerSize',6,'LineWidth',1.5)
title('CD, N vs. mean'); xlabel('n');ylabel('ES')
subplot(2,3,5); plot(cpsums.n, cpsums.CDstdR,'o','Color',[102/255 0 51/255],...
'MarkerSize',6,'LineWidth',1.5)
title('CD, N vs. std'); xlabel('n');ylabel('ES')
subplot(2,3,6); plot(cpsums.n, cpsums.CDrmsR,'o','Color',[102/255 0 51/255],...
'MarkerSize',6,'LineWidth',1.5)
title('CD, N vs. rms'); xlabel('n');ylabel('ES')
% plot max penalty ifo n -- the more data points, the
figure('Units','centimeters','OuterPosition',[4 1 28 20]);
subplot(2,3,1); plot(cpsums.n, cpsums.ZmeanDif,'o','Color',[102/255 0 51/255],...
'MarkerSize',6,'LineWidth',1.5)
title('Z, N vs. max penalty, mean'); xlabel('n');ylabel('max penalty')
subplot(2,3,2); plot(cpsums.n, cpsums.ZstdDif,'o','Color',[102/255 0 51/255],...
'MarkerSize',6,'LineWidth',1.5)
title('Z, N vs. max penalty, std'); xlabel('n');ylabel('max penalty')
subplot(2,3,3); plot(cpsums.n, cpsums.ZrmsDif,'o','Color',[102/255 0 51/255],...
'MarkerSize',6,'LineWidth',1.5)
title('Z, N vs. max penalty, rms'); xlabel('n');ylabel('max penalty')
subplot(2,3,4); plot(cpsums.n, cpsums.CDmeanDif,'o','Color',[102/255 0 51/255],...
'MarkerSize',6,'LineWidth',1.5)
title('CD, N vs. max penalty, mean'); xlabel('n');ylabel('max penalty')
subplot(2,3,5); plot(cpsums.n, cpsums.CDstdDif,'o','Color',[102/255 0 51/255],...
'MarkerSize',6,'LineWidth',1.5)
title('CD, N vs. max penalty, std'); xlabel('n');ylabel('max penalty')
subplot(2,3,6); plot(cpsums.n, cpsums.CDrmsDif,'o','Color',[102/255 0 51/255],...
'MarkerSize',6,'LineWidth',1.5)
title('CD, N vs. max penalty, rms'); xlabel('n');ylabel('max penalty')
% add to cpsums the number of cp we are looking for
for i = 1:length(fields_)
% number of changes
cpsums.nochanges(i) = length(...
find(seldata.(fields_{i}).islying(2:end)- ...
seldata.(fields_{i}).islying(1:end-1)~=0));
end
% plot number of changes vs diff
figure('Units','centimeters','OuterPosition',[4 1 28 20]);
subplot(2,3,1); plot(cpsums.nochanges,cpsums.ZmeanDif,'o','Color',[102/255 0 51/255],...
'MarkerSize',6,'LineWidth',1.5)
subplot(2,3,2); plot(cpsums.nochanges,cpsums.ZstdDif,'o','Color',[102/255 0 51/255],...
'MarkerSize',6,'LineWidth',1.5)
subplot(2,3,3); plot(cpsums.nochanges,cpsums.ZrmsDif,'o','Color',[102/255 0 51/255],...
'MarkerSize',6,'LineWidth',1.5)
subplot(2,3,4); plot(cpsums.nochanges,cpsums.CDmeanDif,'o','Color',[102/255 0 51/255],...
'MarkerSize',6,'LineWidth',1.5)
subplot(2,3,5); plot(cpsums.nochanges,cpsums.CDstdDif,'o','Color',[102/255 0 51/255],...
'MarkerSize',6,'LineWidth',1.5)
subplot(2,3,6); plot(cpsums.nochanges,cpsums.CDrmsDif,'o','Color',[102/255 0 51/255],...
'MarkerSize',6,'LineWidth',1.5)
% try a different approach based on a moving STD window of 30 minutes
tic
ind = find(~isnan(seldata.cow_99.avg_z_sm));
n = length(ind);
x = seldata.cow_99.avg_z_sm(ind);
test = movsum(30*60*log(movvar(x,30*60)),30*60);
toc
figure;
plot(test)
sum(n*log(var(x,1,2)));
[~,b] = findchangepts(x,'Statistic','std','MinThreshold',100000000000)
figure,
findchangepts(x,'Statistic','std','MinThreshold',abs(b)*0.05, 'MinDistance',600)
figure,
findchangepts(x,'Statistic','std','MaxNumChanges',20, 'MinDistance',600)
% for "mean" ==> residue = n*sum(var(x,1,2));
% for "std" ==> residue = sum(n*log(var(x,1,2)));
% for "rms" ==> residue = sum(n*log(sum(x.^2,2)/n));
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment