Commit 342487d9 authored by Roelofsen, Hans's avatar Roelofsen, Hans
Browse files

Merge branch 'dev' into 'master'

DEV

See merge request !1
parents 14cc7c1c b998a02a
"""
Determine draagkracht for vogel and vlinder using NDFF database
Hans Roelofsen, mei 2020
"""
import argparse
import numpy as np
import pandas as pd
import geopandas as gp
import rasterstats as rast
import rasterio as rio
from mnp import helpers as hlp
# from sample.mnp import helpers as hlp
# sp = hlp.Species('Tapuit')
# methode = 'centroid'
parser = argparse.ArgumentParser()
parser.add_argument('species', type=str, help='species name')
parser.add_argument('method', choices=['orig', 'norm', 'centroid'])
parser.add_argument('--totals', action='store_true', default=False, help='include total hectares of beheertypen?')
parser.add_argument('--indx', action='store_true', default=False, help='include index?')
parser.add_argument('--mnp', action='store_true', default=False, help='include MNP draagkracht referentie?')
parser.add_argument('--pxls', help='output in pixel count instead of hectares', action='store_true', default=False)
parser.add_argument('--ref_plys', help='path to referentie polygonen')
# Parse arguments
args = parser.parse_args()
sp = hlp.Species(args.species)
totals = args.totals
methode = args.method
output_factor = 1 if args.pxls else 0.0625
output_unit = 'pixels' if args.pxls else 'hectares'
indx = args.indx
mnp = args.mnp
# Path to beheertypenkaart
bt_tiff = r'c:\apps\temp_geodata\nbt\nbt_20190730_111603.tif'
bt_kaart = rio.open(bt_tiff, mode='r+')
# Table with pixel values, beheertypecodes etc
nbts = hlp.nbts()
pxl_2_btB = dict(zip(nbts.pxl, nbts.bt_B))
btA_2_btB = dict(zip(nbts.bt_A, nbts.bt_B))
# Read NDFF shapefile
ndff = hlp.read_ndff_shp(sp.ndff_src)
# define queries that isolate NDFF data of interest
queries = {'q1': ndff.loc[(ndff.year >= 2000) & (ndff.year.notna())].index,
'q2': ndff.loc[ndff.nl_name.isin([sp.name])].index,
'q3': ndff.loc[ndff.area < 10000].index}
sel = set.intersection(*[set(queries[x]) for x in queries.keys()]) # for query in selected_queries
assert len(sel) > 0, 'no observations remaining'
print('Found {0} NDFF observations for {1}'.format(len(sel), sp.name))
# Create GDF from NDFF DF following requested method
if methode == 'norm':
ndff_gdf = gp.GeoDataFrame({'ID': [1] * len(sel), 'soort': [sp.name] * len(sel),
'geometry': ndff.loc[sel].centroid.buffer(12.5)}).dissolve(by='ID')
elif methode == 'centroid':
ndff_gdf = gp.GeoDataFrame({'ID': [1] * len(sel), 'soort': [sp.name] * len(sel),
'geometry': ndff.loc[sel].centroid})
else: # methode == orig
ndff_gdf = gp.GeoDataFrame({'ID': [1] * len(sel), 'soort': [sp.name] * len(sel),
'geometry': ndff.loc[sel]}).dissolve(by='ID')
if methode in ['norm', 'orig']:
# Overlay normalized NDFF observations with nbt kaart
sp_stats = rast.zonal_stats(vectors=ndff_gdf, raster=bt_kaart.read(1), categorical=True, all_touched=False,
category_map=pxl_2_btB, affine=bt_kaart.affine)
else: # methode == 'centroids':
# Couple each NDFF observation to a Col-Row index from the bt_kaart using the Affine transformation.
# Then drop duplicates to ensure only 1 NDFF observation is counted per bt_kaart pixel.
# See: https://www.perrygeo.com/python-affine-transforms.html
ndff_gdf.loc[:, 'nbt_row_col'] = ndff_gdf.geometry.apply(hlp.xy_2_colrow, rio_object=bt_kaart)
ndff_gdf.drop_duplicates(subset='nbt_row_col', inplace=True)
sp_stats = rast.zonal_stats(vectors=ndff_gdf, raster=bt_kaart.read(1), categorical=True, all_touched=False,
category_map=pxl_2_btB, affine=bt_kaart.affine)
# Presence of species as pixel count for all beheertypen in de beheertypekaart, dus niet enkel de gevonden.
species_stats = dict(zip(set(nbts.bt_B), [0] * len(set(nbts.bt_B))))
for waarneming in sp_stats:
bts = [s for s in waarneming.keys()]
for beheertype in bts:
species_stats[beheertype] += np.multiply(waarneming[beheertype], output_factor)
# Total acreage of each beheertype
if totals:
bt_u, bt_c = np.unique(bt_kaart.read(1), return_counts=True)
bt_stats_prelim = dict(zip(bt_u, [np.multiply(bt_count, output_factor) for bt_count in bt_c]))
# Note how >1 pixel values can point to 1 beheertype!
bt_stats = dict(zip(set(nbts.bt_B), np.zeros(nbts.shape[0])))
for k, v in bt_stats_prelim.items():
bt = pxl_2_btB[k]
bt_stats[bt] += v
# Check totals of beheertypen!
assert np.sum([v for _, v in bt_stats.items()]) == np.multiply(np.product(bt_kaart.shape), output_factor)
# Read draagkrachten as they currently are from MNP parameterset
if mnp:
mnp_stats_prelim = hlp.get_mnp_dk(sp) # MNP draagkrachten are provided for beheertype_A, ie codes from 20190730
mnp_stats = dict(zip(set(nbts.bt_B), np.zeros(nbts.shape[0])))
for k, v in mnp_stats_prelim.items():
try:
mnp_stats[k] += v
except KeyError:
pass
# Summary dataframe w. Beheertypen as columns and species acreage as rows
datas = [species_stats]
indexes = ['{0}_{1}'.format(sp.name, output_unit)]
if mnp:
datas.append(mnp_stats)
indexes.append('03_MNP_{0}'.format(sp.name))
if totals:
datas.append(bt_stats)
indexes.append('total_{0}'.format(output_unit))
pd.DataFrame(data=datas, index=indexes).T.to_clipboard(sep=';', index=True if indx else False)
print('\n results are on ze clipboard')
"""
Program for calculating draagkracht (dk) for MNP parameterisation. Sourced from NDFF species observations dataset,
combined with nominal raster beheertypen (bt). DK is defined between a bt <-> species as the ratio b'teen overal area
of a bt and fraction of bt with 1 or more observations of the species.
Variables:
species_name: name of the species, either vogel or vlinder
ply_src: polygon shapefile for spatial differentiation of dk (areas)
nbt_src: path to neergeschaalde beheertypenkaart (bt) (nominal raster)
Method:
1. total pixels per bt per area
2. Average N-Deposition in mol/(ha*jaar) per area
3. reduce NDFF observations to 1 observation per nbt pixel using forward affine transform
4. couple each NDFF observation to 1 area
5. couple each NDFF observation to 1 bt
6. rework tables to output
<SPECIES NAME>
AreaName|BT1_pxl-count|BT1_ndff-pxl-count|BT2_pxl-count|BT2_ndff-pxl-count|...|BTn_pxl-count|BTn_ndff-pxl-count
By: Hans Roelofsen, WEnR, 29 June 2020
"""
import argparse
import numpy as np
import pandas as pd
import geopandas as gp
import rasterio as rio
import rasterstats as rast
from mnp import helpers as hlp
# from sample.mnp import helpers as hlp
parser = argparse.ArgumentParser()
parser.add_argument('species', type=str, help='species name')
parser.add_argument('--bt', type=str, help='beheertype code Nxx.yy')
parser.add_argument('--shp', action='store_true', default=False, help='write shapefile?')
# Parse arguments
args = parser.parse_args()
sp = hlp.Species(args.species)
#sp = hlp.Species('Tapuit')
bt = args.bt
shp = args.shp
# TODO: 20200707: implementing optional arguments in processflow
# Path to beheertypenkaart
bt_tiff = r'c:\Users\roelo008\OneDrive - WageningenUR\a_projects\nBT\nbt_20190730_111603.tif'
bt_kaart = rio.open(bt_tiff, mode='r+')
# Path to NDepkaart
ndep_src = r'c:\Users\roelo008\OneDrive - WageningenUR\a_projects\MNP\draakgracht\depo_ntot_2020\depo_ntot_BBR2020_1902.asc'
ndep_kaart = rio.open(ndep_src)
# Table with pixel values, beheertypecodes etc
nbts = hlp.nbts()
pxl2btA = dict(zip(nbts.pxl, nbts.bt_A)) # btA: categorisering volgens BT code 20190730
pxl2btB = dict(zip(nbts.pxl, nbts.bt_B)) # btB: categorisering volgens BT code nBT2016
btA2btB = dict(zip(nbts.bt_A, nbts.bt_B))
'''1. total btB pixels per area. NOTE: de values van pxl2btB bevatten duplicaten, ie er zijn >1 pxl-vals die verwijzen
naar eg. N12.02 Dit levert problemen op als de vals van pxl2btB als de KEYS gebruikt worden van de rasterstats.
Gebruik daarom de btA categorieen'''
areas = hlp.gen_squares(x_ul=0, y_ul=625000, ncol=28, nrow=33, size=10000)
bt_stats_prelim1 = rast.zonal_stats(vectors=areas, raster=bt_kaart.read(1), categorical=True, category_map=pxl2btA,
affine=bt_kaart.affine)
# Reclass btA categorieen naar btB categoriien
bt_stats_prelim2 = []
for stat in bt_stats_prelim1:
btB_stat = {}
for k, v in stat.items():
btB_key = btA2btB[k]
try:
btB_stat[btB_key] += v
except KeyError:
btB_stat[btB_key] = v
bt_stats_prelim2.append(btB_stat)
# pixel counts for each area should sum to 10.000^2 / 25^2 = 160.000 or 0 when area is outside the BT-kaart
assert all([np.sum(v for _,v in area_dict.items()) in [160000, 0] for area_dict in bt_stats_prelim2])
# make sure all btB categorieen are represented as dict keys
bt_stats = []
for area_stats in bt_stats_prelim2:
empty_stats = dict(zip(set(nbts.bt_B), [0] * len(set(nbts.bt_B))))
# See: https://stackoverflow.com/questions/38987/how-do-i-merge-two-dictionaries-in-a-single-expression-in-python
bt_stats.append({**empty_stats, **area_stats})
areas = areas.join(pd.DataFrame.from_dict(bt_stats, orient='columns'))
'''2. Average NDep per area'''
ndep_stats = rast.zonal_stats(vectors=areas, raster=ndep_kaart.read(1), stats='min max mean', affine=ndep_kaart.affine)
areas.loc[:, 'ndep_max'] = [x['max'] for x in ndep_stats]
areas.loc[:, 'ndep_min'] = [x['min'] for x in ndep_stats]
areas.loc[:, 'ndep_mean'] = [x['mean'] for x in ndep_stats]
'''3. Construct GDF with NDFF waarnemingen reduced to 1 waarneming per BTKaart pixel. Use forward Affine to tie NDFF
waarneming centroids to a NDFF pixel'''
ndff_all = hlp.read_ndff_shp(sp.ndff_src)
queries = {'q1': ndff_all.loc[(ndff_all.year >= 2000) & (ndff_all.year.notna())].index,
'q2': ndff_all.loc[ndff_all.nl_name.isin([sp.name])].index,
'q3': ndff_all.loc[ndff_all.area < 10000].index}
sel = set.intersection(*[set(queries[x]) for x in queries.keys()]) # for query in selected_queries
assert len(sel) > 0, ' no observations remaining'
print(' found {0} NDFF observations for {1}'.format(len(sel), sp.name))
ndff = gp.GeoDataFrame({'ndff_id': [1] * len(sel), 'soort': [sp.name] * len(sel), 'geometry': ndff_all.loc[sel].centroid})
ndff.loc[:, 'nbt_row_col'] = ndff.geometry.apply(hlp.xy_2_colrow, rio_object=bt_kaart)
ndff.drop_duplicates(subset='nbt_row_col', inplace=True)
'''4. Join each NDFF waarneming to an area'''
ndff = gp.sjoin(left_df=ndff, right_df=areas.loc[:, ['ID', 'geometry']], how='left', op='within')
'''5. Join each NDFF waarneming to a BT'''
sp_stats = rast.zonal_stats(vectors=ndff, raster=bt_kaart.read(1), categorical=True, all_touched=False,
category_map=pxl2btB, affine=bt_kaart.affine)
# not more than 1 BT per NDFF observation and not more than 1 pixel per NDFF observation
assert all([len(x) == 1 for x in sp_stats])
assert all([item for sublist in [[v == 1 for v in x.values()] for x in sp_stats] for item in sublist]) # sorry..
ndff.loc[:, 'btB'] = [item for sublist in [[k for k in x.keys()] for x in sp_stats] for item in sublist]
'''6. Pivot NDFF waarnemingen to Area ids and sum aantal waarnemingen. Then merge with areas df.'''
assert set(ndff.btB).issubset(areas.columns)
ndff_per_area = pd.pivot_table(data=ndff, index='ID', columns='btB', values='ndff_id', aggfunc='sum')
ndff_per_area.columns = ['{0}_ndffpxls'.format(x) for x in ndff_per_area.columns.tolist()]
# Drop btB columns from areas where there are no NDFF observations
bt_drop = [col for col in areas.columns.difference(set(ndff.btB)).tolist() if col.startswith(('N', 'W', 'A', 'L'))]
'''Merge NDFF observations df with areas df for full dataframe'''
out = pd.merge(left=areas, right=ndff_per_area, left_on='ID', right_index=True, how='left')
# calculate ratio bt-pixels/ndff-pixels per area
for bt in set(nbts.bt_B):
try:
dk = out.loc[:, '{}_ndffpxls'.format(bt)].divide(out.loc[:, bt])
print(dk)
except KeyError:
continue
out = out.join(pd.DataFrame(data=dk.rename('{}_dk'.format(bt))))
out = out.reindex(sorted(out.columns), axis=1)
# Copy to clipboard dropping areas entirely nodata and BT columns with no matching NDFF observation
out.loc[out.nodata < 160000, :].drop(labels=['geometry'] + bt_drop, axis=1).fillna(0).to_clipboard(sep=';', index=False)
print('\n results for {} are on ze clipboard'.format(sp.name))
if shp:
out.to_file(r'c:\apps\temp_geodata\tapuit_dk_adv.shp')
......@@ -7,9 +7,13 @@ Target format
Species codes and set are sourced from MNP parameter files.
Relation between species presence and vegetation types is sourced from DVN data, w thanks to Stephan Hennekes
Dit script staat volledig LOS van dk_vogel_vlinder.py en maakt ook GEEN gebruik van de Species en BT klasses in mnp/helpers.py. Het
zou netter zijn als dit wel gebeurt, maar daar kom ik nu (29-Oct-2020 10:39) niet meer aan toe.
Hans Roelofsen, 23 June 2020
"""
import numpy as np
import itertools
import pandas as pd
from sample.mnp import helpers as hlp
......@@ -17,77 +21,101 @@ from sample.mnp import helpers as hlp
"""Source data"""
# MNP param file with all species-codes and Taxonomic group
src_mnp_sp_tax = r'W:\PROJECTS\qmar\MNP-SNL-ParameterSet\Parameters_v06_2019_12_09\07_MNP_versie4_par_population_factors.csv'
# MNP param file with species code, local name, scientific name
src_mnp_sp_nms = r'w:\PROJECTS\qmar\MNP-SNL-ParameterSet\Parameters_v06_2019_12_09\09_MNP_versie4_Group_Species_valid model_468.csv'
# MNP param file with species code, BT code (versie Marlies), BT Code (versie nbt2016) and draagkracht
src_mnp_dk = r'w:\PROJECTS\qmar\MNP-SNL-ParameterSet\Parameters_v06_2019_12_09\03_MNP_versie6_par_density_factors_BT2019_v2.csv'
src_asso_x_bts = r'c:\Users\roelo008\OneDrive - WageningenUR\a_projects\MNP\draakgracht\plant\src_data\vertaling_IPO_ASSO_2009.txt'
src_asso_x_sps = r'c:\Users\roelo008\OneDrive - WageningenUR\a_projects\MNP\draakgracht\plant\src_data\q_syntaxa_soorten_frequentie_trouwsoorten.txt'
mnp_sp_tax = pd.read_csv(src_mnp_sp_tax, sep=',')
mnp_sp_nms = pd.read_csv(src_mnp_sp_nms, sep=',')
mnp = pd.merge(left=mnp_sp_nms, right=mnp_sp_tax, how='left', left_on='Species_code', right_on='Species_code')
# Source file van Wieger W met vertaling tussen Associaties en Beheertypen
src_assoXbts = r'c:\Users\roelo008\OneDrive - WageningenUR\a_projects\MNP\draakgracht\plant\src_data\vertaling_IPO_ASSO_2009.txt'
# Source file van Wieger W met trouwgraad en frequentie van plantensoorten binnen een Associatie
src_assoXsps = r'c:\Users\roelo008\OneDrive - WageningenUR\a_projects\MNP\draakgracht\plant\src_data\q_syntaxa_soorten_frequentie_trouwsoorten.txt'
# Merge MNP files with species information. Verify species codes and names occur once and once only in the dataset.
mnp_sp = pd.merge(left=pd.read_csv(src_mnp_sp_nms, sep=','),
right=pd.read_csv(src_mnp_sp_tax, sep=','),
how='left', left_on='Species_code', right_on='Species_code')
assert all([mnp_sp.shape[0] == len(set(getattr(mnp_sp, x))) for x in ['Species_code', 'Scientific_name', 'Local_name']])
# Read MNP Draagkrachten
mnp_dk = pd.read_csv(src_mnp_dk, sep=',', usecols=['Species_code', 'Land_type_code', 'Land_type_quality'])
asso_x_bts = pd.read_csv(src_asso_x_bts, sep=';', comment='#')
asso_x_sps = pd.read_csv(src_asso_x_sps, sep=';', comment='#', usecols=["Syntaxon_code", "Syntaxon_naam", "Soortnummer",
"Soortnaam", "Frequentie", "Trouw"])
# Read other source files.
assoXbts = pd.read_csv(src_assoXbts, sep=';', comment='#')
assoXsps = pd.read_csv(src_assoXsps, sep=';', comment='#', usecols=["Syntaxon_code", "Syntaxon_naam", "Soortnummer",
"Soortnaam", "Frequentie", "Trouw"])
'''Reduce sub-associations 00XX11a to 00XX11 and repair Beheertype codes in Input sheet vertaling_IPO_ASSO_2009.txt'''
asso_x_bts.loc[:, 'syntaxon_gen'] = asso_x_bts.short.str.slice(stop=6).str.upper()
asso_x_bts.loc[:, 'BT'] = asso_x_bts.Bc.apply(hlp.fix_bt)
assoXbts.loc[:, 'syntaxon_gen'] = assoXbts.short.str.slice(stop=6).str.upper()
assoXbts.loc[:, 'btC'] = assoXbts.Bc.apply(hlp.fix_bt, as_mnp=True)
'''Reduce sub-associations in Input sheet q_syntaxa_soorten_frequentie_trouwsoorten.txt'''
asso_x_sps.loc[:, 'syntaxon_gen'] = asso_x_sps.Syntaxon_code.str.slice(stop=6).str.upper()
'''Reduce neergeschaalde beheertypen in MNP reference draagkrachten tabel'''
sel = mnp_dk.loc[mnp_dk.Land_type_code.str.slice(start=7) == '00', :].index
mnp_dk.loc[:, 'BT'] = None
mnp_dk.loc[sel, 'BT'] = mnp_dk.loc[sel, 'Land_type_code'].str.slice(stop=6)
assoXsps.loc[:, 'syntaxon_gen'] = assoXsps.Syntaxon_code.str.slice(stop=6).str.upper()
'''Mappings between i) Associaties and Beheertypen (n:n) ii) name and species code'''
bt2asso = {}
for bt in list(set(asso_x_bts.BT)):
bt2asso[bt] = list(set(asso_x_bts.loc[asso_x_bts.BT == bt, 'syntaxon_gen']))
name2code = dict(zip(mnp_sp_nms.Scientific_name, mnp_sp_nms.Species_code))
beheertypen = list(set(assoXbts.btC))
bt2asso = dict.fromkeys(beheertypen, [])
for bt in beheertypen:
bt2asso[bt] = list(set(assoXbts.loc[assoXbts.btC == bt, 'syntaxon_gen']))
scientifcname2code = dict(zip(mnp_sp.Scientific_name, mnp_sp.Species_code))
code2scientificname = dict(zip(mnp_sp.Species_code, mnp_sp.Scientific_name))
code2localname = dict(zip(mnp_sp.Species_code, mnp_sp.Local_name))
'''New dataframe with all species <-> Beheertype combinations.'''
dk = pd.DataFrame(data=[(a, b) for (a, b) in itertools.product(mnp.loc[mnp.Taxon_group == 'P', 'Scientific_name'],
set(asso_x_bts.BT))], columns=['sp_name', 'BT'])
dk.loc[:, 'sp_code'] = dk.sp_name.map(name2code)
'''Summarize Frequentie for each BT <> Species combination'''
frequenties = []
trouws = []
mnp_draagkracht = []
dk = pd.DataFrame(columns=['Scientific_name', 'Land_type_code'],
data=[(a, b) for (a, b) in itertools.product(mnp_sp.loc[mnp_sp.Taxon_group == 'P', 'Scientific_name'],
set(assoXbts.btC))])
dk = pd.concat([dk, pd.DataFrame(columns=['mean_frequentie', 'mean_trouw', 'Land_type_quality'])], sort=True)
dk.loc[:, 'Species_code'] = dk.Scientific_name.map(scientifcname2code)
'''Calculate draagkracht for each BT <> Species combination as function of Frequentie en Trouw'''
for row in dk.itertuples():
# landtype_quality = gemiddelde frequentie van de soort in de associaties behorende bij het gevraagde BT
sel = asso_x_sps.loc[(asso_x_sps.Soortnaam == row.sp_name) & (asso_x_sps.syntaxon_gen.isin(bt2asso[row.BT])), :]
mean_freq = sel.Frequentie.mean()
mean_trouw = sel.Trouw.mean()
mean_mnp_dk = mnp_dk.loc[(mnp_dk.Species_code == row.sp_code) & (mnp_dk.BT == row.BT), 'Land_type_quality'].mean()
frequenties.append(mean_freq)
trouws.append(mean_trouw)
mnp_draagkracht.append(mean_mnp_dk)
# Land_type_quality == draagkracht == dk == functie Wieger Wamelink == f(trouw, frequentie) ==
# np.min([np.multiply(np.divide(np.max([frequentie, trouw]), 100), 5), 1])
sel = assoXsps.loc[(assoXsps.Soortnaam == row.Scientific_name) &
(assoXsps.syntaxon_gen.isin(bt2asso[row.Land_type_code])), :]
if sel.empty:
print('{0} X {1}: failed'.format(row.Scientific_name, row.Land_type_code))
mean_freq, mean_trouw, dk_ww = np.nan, np.nan, np.nan
# dit kan vast simpeler met groupby of zo, fuck it
else:
mean_freq = sel.Frequentie.mean()
mean_trouw = sel.Trouw.mean()
dk_ww = hlp.draagkracht_ww(frequentie=mean_freq, trouw=mean_trouw)
print('{0} X {1}: n={2} freq={3} trouw={4} dk={5}'
.format(row.Scientific_name, row.Land_type_code, sel.shape[0], mean_freq, mean_trouw, dk_ww))
if sel.shape[0] > 0:
print('Found {0} records for species {1} and BT {2}, with mean '
'Frequentie: {3} and mean Trouw: {4}'.format(sel.shape[0], row.sp_name, row.BT, mean_freq, mean_trouw))
dk.loc[row.Index, 'mean_frequentie'] = mean_freq
dk.loc[row.Index, 'mean_trouw'] = mean_trouw
dk.loc[row.Index, 'Land_type_quality'] = dk_ww
'''Write draagkracht file per species'''
# TODO: minimale draagkracht is 0.01
# TODO:
dk.loc[:, 'mean_frequentie'] = frequenties
dk.loc[:, 'mean_trouw'] = trouws
dk.loc[:, 'mnp_land_type_q'] = mnp_draagkracht
dk.loc[:, 'dk_ww'] = dk.apply(lambda row: hlp.draagkracht_ww(row.mean_frequentie, row.mean_trouw), axis=1)
dk.loc[dk.mnp_land_type_q.notna(), :].to_clipboard(sep=';', index=False)
for species_code in list(set(dk.Species_code)):
local_name = code2localname[species_code].replace(' ', '_')
sel = dk.loc[(dk.Species_code == species_code) & (dk.Land_type_quality.notna() & dk.Land_type_quality >= 0.01),
['Species_code', 'Land_type_code', 'Land_type_quality']].round({'Land_type_quality': 3})
if not sel.empty:
sel.to_csv(r'c:\apps\z_temp\mnp_dk\03_MNP_versie7_par_density_factors_{}.csv'.format(local_name),
index=False, header=True, sep=',')
'''5. Reporting'''
'''5. Reporting
sp_done = len(set(dk.loc[dk.dk_ww.notna(), 'sp_name']))
bt_done = len(set(dk.loc[dk.dk_ww.notna(), 'BT']))
sp_all = len(set(dk.sp_name))
bt_all = len(set(dk.BT))
u_bt = len(set(asso_x_bts.BT)) # nr of unique Beheertypen in WW's excel
u_sp = len(set(asso_x_sps.Soortnaam)) # nr of unique species in WW's excel
r_sp = len(set(mnp_sp_nms.Scientific_name).intersection(set(asso_x_sps.Soortnaam)))
u_bt = len(set(assoXbts.BT)) # nr of unique Beheertypen in WW's excel
u_sp = len(set(assoXsps.Soortnaam)) # nr of unique species in WW's excel
r_sp = len(set(mnp_sp.Scientific_name).intersection(set(assoXsps.Soortnaam)))
'''
\ No newline at end of file
"""
Determine draagkracht (DK) for vogel and vlinder using NDFF database
Hans Roelofsen, mei 2020
DK bestaat voor elke soort (sp)- beheertype (bt) combinatie als een getal tussen 0-1
bt brondata: dominantie beheertype kaart op 25m incl neerschaling
sp brondata: NDFF uittreksels.
Methode:
1. Reduce NDFF observations to centroids
2. Cluster 1) to 25m cells matching the BT kaart grid. Ignore observation count per cell, retain spatial distribution
of NDFF observations only
3. Impose 5000m square grid over 2) and BT kaart
4. For each 5000m sq calculate: area_ndff_observations_within_BT / area_BT_type
5. Assign highest value of 4 for the species - BT combination
Dit is een behoorlijk lelijk scriptje geworden. Excuses.
"""
import argparse
import numpy as np
import pandas as pd
import geopandas as gp
import rasterstats as rast
from mnp import helpers as hlp
# from sample.mnp import helpers as hlp
# sp = hlp.Species('Hooibeestje')
parser = argparse.ArgumentParser()
parser.add_argument('tax', type=str, help='taxonomic group', choices=['broedvogel', 'dagvlinder', 'test'])
parser.add_argument('species', type=str, help='species name', nargs='+')
parser.add_argument('--th', type=float, help='Minimum draagkracht', default=0.01)
parser.add_argument('--out_dir', type=hlp.valid_dir, help='output dir', default='./')
args = parser.parse_args()
species = args.species
tax = args.tax
out_dir = args.out_dir
if tax == 'test':
species = ['Scholekster', 'Zanglijster', 'Braamsluiper']
# Gather data that can be used for all species
ndff_all = hlp.read_ndff_shp(src=hlp.ndff_sources()[args.tax])
bt = hlp.BT20190612() # Beheertypenkaart (version 20190612 25m version)
for i, species in enumerate(species, start=1):
print('Doing {0} ({1}/{2})'.format(species, i, len(args.species)))
'''Query NDFF shapefile'''
try:
sp = hlp.Species(species)
ndff = hlp.query_ndff(gdf=ndff_all, species=sp, max_area=10000) # NDFF shapefile for selected species.
except (AssertionError, ImportError) as e:
hlp.write2file(df=pd.Series({sp.naam_ned: e}), out_name='error_{}'.format(species), out_dir=args.out_dir)
continue
out_name = '03_MNP_versie7_par_density_factors_{0}'.format(sp.naam_ned)
areas = hlp.gen_squares(x_ul=0, y_ul=625000, ncol=57, nrow=64, size=5000) # Square grid over NL
'''Reduce NDFF observations to NDFF grid cells. Then couple with Areas and BT kaart'''
# Couple each NDFF observation to a Col-Row index from the bt_kaart using the Affine transformation.
# Then drop duplicates to ensure only 1 NDFF observation is counted per bt_kaart pixel.
ndff.loc[:, 'nbt_row_col'] = ndff.geometry.centroid.apply(hlp.xy_2_colrow, rio_object=bt.raster)
pre = ndff.shape[0]
ndff.drop_duplicates(subset='nbt_row_col', inplace=True)
print(' aggregate to 25m pixels reduces from {0} to {1}'.format(pre, ndff.shape[0]))
# Couple NDFF observations to one of the squares. Drop observations that are outside the squares.
pre = ndff.shape[0]
ndff = gp.sjoin(left_df=ndff, right_df=areas.loc[:, ['ID', 'geometry']], how='inner', op='within')
post = ndff.shape[0]
if pre != post:
print(' dropping {} NDFF observations outside NL'.format(str(np.subtract(pre, post))))
# Couple NDFF waarneming to a BT. No more than 1 BT per NDFF observation N
# No more than 1 pixel per NDFF observation
ndff__nbt = rast.zonal_stats(vectors=ndff.geometry.centroid, raster=bt.raster.read(1), categorical=True,
all_touched=False, category_map=bt.pxl2nbt, affine=bt.raster.affine, nodata=0)
try:
assert all([len(x) == 1 for x in ndff__nbt]), 'NDFF observations are matched to > 1 BT'
assert all([item for sublist in [[v == 1 for v in x.values()] for x in ndff__nbt] for item in sublist]), 'Some NDFF obs are not matched to a BT'
except AssertionError as e:
hlp.write2file(df=pd.Series({sp.naam_ned: e}), out_name='error_{}'.format(out_name), out_dir=args.out_dir)
continue
ndff.loc[:, 'nbt'] = [item for sublist in [[k for k in x.keys()] for x in ndff__nbt] for item in sublist]
ndff.dropna(subset=['nbt'], inplace=True)
'''Determine acreage per BT for each hok.'''
areas__nbt = rast.zonal_stats(vectors=areas, raster=bt.raster.read(1), categorical=True, all_touched=False,
category_map=bt.pxl2nbt, affine=bt.raster.affine, nodata=0)
# pixel counts for each area should sum to 5000^2 / 25^2 = 40.000 or 0 when area is outside the BT-kaart
try:
assert all([np.sum(v for _, v in area_dict.items()) in [40000, 0] for area_dict in areas__nbt])
except AssertionError as e:
print(e)
continue
# Make a list with len == areas.shape[0] where each item is a dictionary w. bt.units.nbt as keys and
# pixel count of nbt X area as values. Then join to the areas gdf
bt_stats = []
for area_stats in areas__nbt:
empty_stats = dict.fromkeys(bt.units.nbt, np.nan)
# See: https://stackoverflow.com/questions/38987/how-do-i-merge-two-dictionaries-in-a-single-expression-in-python
bt_stats.append({**empty_stats, **area_stats})
areas = areas.join(pd.DataFrame.from_dict(bt_stats, orient='columns'))
# areas df now has all nbt units as as columns with pixel count per area as values
'''Pivot NDFF waarnemingen to Area ids, nbt as columns and count of ndff pixel IDs as values. Merge with areas df.'''
try:
assert set(ndff.nbt).issubset(areas.columns), "NDFF columns are not a subset of beheertypen, weird..."
except AssertionError as e:
print(e)
continue
ndff_per_area = pd.pivot_table(data=ndff, index='ID', columns='nbt', values='id', aggfunc='count')
ndff_per_area.columns = ['{0}_ndffpxls'.format(x) for x in ndff_per_area.columns.tolist()]
'''Merge NDFF observations df with areas df for final df where rows=areas, with cols for BT count and BTxNDFF count'''
areas = pd.merge(left=areas, right=ndff_per_area, left_on='ID', right_index=True, how='left')
'''Calculate Draagkracht as the largest ratio bt-pixels/ndff-pixels per area. Ignore NoData'''
dks = {}
for beheertype in set(bt.units.nbt).difference(set(['nodata'])):
try:
draagkracht = areas.loc[:, '{}_ndffpxls'.format(beheertype)].divide(areas.loc[:, beheertype]).max().round(3)
if draagkracht >= args.th:
dks[beheertype] = {'Species_code': sp.mnp_species_code, 'Land_type_code': beheertype,
'Land_type_quality': draagkracht}
else:
continue
except KeyError:
continue
out = pd.DataFrame.from_dict(dks, orient='index')
hlp.write2file(df=out, out_name=out_name, out_dir=args.out_dir)
This diff is collapsed.
python dk_vogel_vlinder.py dagvlinder "grote weerschijnvlinder" "Grote parelmoervlinder" Duinparelmoervlinder "bruin blauwtje" Veenbesparelmoervlinder "zilveren maan" purperstreepparelmoervlinder Groentje "bont dikkopje" "Tweekleurig hooibeestje" Hooibeestje Veenhooibeestje "bruin dikkopje" Kommavlinder Heivlinder "Kleine heivlinder" "kleine parelmoervlinder" argusvlinder "kleine ijsvogelvlinder" "grote vuurvlinder" "Bruine vuurvlinder" Gentiaanblauwtje "bruin zandoogje" veldparelmoervlinder "Groot dikkopje" koninginnenpage "donker pimpernelblauwtje" pimpernelblauwtje Heideblauwtje "Vals heideblauwtje" Veenbesblauwtje Aardbeivlinder zwartsprietdikkopje geelsprietdikkopje --out_dir c:\apps\z_temp\mnp_dk\
\ No newline at end of file