Commit 876036f6 authored by Roelofsen, Hans's avatar Roelofsen, Hans
Browse files

last minute updates to dk methods

parent 1051c2fc
Script om neergeschaalde beheertypen toe te kennen aan plantassociaties, op basis van relatie associatie met
Hans Roelofsen, WEnR november 2020
import pandas as pd
from sample.mnp import helpers as hlp
asso = pd.read_excel(r'c:\Users\roelo008\OneDrive - WageningenUR\a_projects\MNP\draakgracht\plant\vertaling_IPO_ASSO_2009_ps_copyHDR.xls',
asso.loc[:, 'bt'] = asso.Bc.apply(hlp.fix_bt, as_mnp=True)
bt = hlp.BT20190612()
btcode2btomschrijving = dict(zip(bt.units.nbt, bt.units.desc))
nbts = bt.units.loc[bt.units.neergeschaald == 'ja', 'nbt']
parents = nbts.str.slice(start=0, stop=6).apply(hlp.fix_bt, as_mnp=True)
nbt2parent = dict(zip(nbts, parents))
holder = []
for nbt, parent in nbt2parent.items():
sel = asso.loc[ == parent, :]
if sel.empty:
print('fail for parent-nbt {0} {1}'.format(parent, nbt))
sel.loc[:, 'Bc'] = nbt
sel.loc[:, 'Bn'] = btcode2btomschrijving[nbt]
asso_updated = pd.concat(holder)
if set(nbts).issubset(set(asso_updated.Bc)):
asso_updated.drop(columns=['bt']).to_csv(r'c:/apps/z_temp/fuck.csv', index=False, header=True, sep=';')
......@@ -13,6 +13,7 @@ zou netter zijn als dit wel gebeurt, maar daar kom ik nu (29-Oct-2020 10:39) nie
Hans Roelofsen, 23 June 2020
import os
import numpy as np
import itertools
import pandas as pd
......@@ -21,36 +22,35 @@ from sample.mnp import helpers as hlp
"""Source data"""
# MNP param file with all species-codes and Taxonomic group
src_mnp_sp_tax = r'W:\PROJECTS\qmar\MNP-SNL-ParameterSet\Parameters_v06_2019_12_09\07_MNP_versie4_par_population_factors.csv'
plant_dir = r'W:\PROJECTS\MNP2020\c_fases\f7_draagkracht\a_source_data\plant'
mnp_params_dir = r'w:\PROJECTS\qmar\MNP-SNL-ParameterSet\Parameters_v06_2019_12_09'
# MNP param file with species code, local name, scientific name
src_mnp_sp_nms = r'w:\PROJECTS\qmar\MNP-SNL-ParameterSet\Parameters_v06_2019_12_09\09_MNP_versie4_Group_Species_valid model_468.csv'
# MNP param file with species code, BT code (versie Marlies), BT Code (versie nbt2016) and draagkracht
src_mnp_dk = r'w:\PROJECTS\qmar\MNP-SNL-ParameterSet\Parameters_v06_2019_12_09\03_MNP_versie6_par_density_factors_BT2019_v2.csv'
# Source file van Wieger W met vertaling tussen Associaties en Beheertypen
src_assoXbts = r'c:\Users\roelo008\OneDrive - WageningenUR\a_projects\MNP\draakgracht\plant\src_data\vertaling_IPO_ASSO_2009.txt'
# Source file van Wieger W met trouwgraad en frequentie van plantensoorten binnen een Associatie
src_assoXsps = r'c:\Users\roelo008\OneDrive - WageningenUR\a_projects\MNP\draakgracht\plant\src_data\q_syntaxa_soorten_frequentie_trouwsoorten.txt'
src_mnp_sp_tax = r'07_MNP_versie4_par_population_factors.csv' # species-codes and Taxonomic group
src_mnp_sp_nms = r'09_MNP_versie4_Group_Species_valid model_468.csv' #species code, local name, scientific name
src_mnp_dk = r'03_MNP_versie6_par_density_factors_BT2019_v2.csv' # current MNP Draagkrachten
src_assoXbts = r'vertaling_IPO_ASSO_2009_ps.xls' # Vertaling Associatie --> Beheertype
src_assoXsps = r'q_syntaxa_soorten_frequentie_trouwsoorten.xlsx' # Trouwgraad & frequentie soort/associatie
nbt = hlp.BT20190612() # Alle info over de beheertypenkaart
# Merge MNP files with species information. Verify species codes and names occur once and once only in the dataset.
mnp_sp = pd.merge(left=pd.read_csv(src_mnp_sp_nms, sep=','),
right=pd.read_csv(src_mnp_sp_tax, sep=','),
mnp_sp = pd.merge(left=pd.read_csv(os.path.join(mnp_params_dir, src_mnp_sp_nms), sep=','),
right=pd.read_csv(os.path.join(mnp_params_dir, src_mnp_sp_tax), sep=','),
how='left', left_on='Species_code', right_on='Species_code')
assert all([mnp_sp.shape[0] == len(set(getattr(mnp_sp, x))) for x in ['Species_code', 'Scientific_name', 'Local_name']])
# TODO: use mnp_sp.duplicated()
# Read MNP Draagkrachten
mnp_dk = pd.read_csv(src_mnp_dk, sep=',', usecols=['Species_code', 'Land_type_code', 'Land_type_quality'])
mnp_dk = pd.read_csv(os.path.join(mnp_params_dir, src_mnp_dk), sep=',',
usecols=['Species_code', 'Land_type_code', 'Land_type_quality'])
# Read other source files.
assoXbts = pd.read_csv(src_assoXbts, sep=';', comment='#')
assoXsps = pd.read_csv(src_assoXsps, sep=';', comment='#', usecols=["Syntaxon_code", "Syntaxon_naam", "Soortnummer",
"Soortnaam", "Frequentie", "Trouw"])
# Read Source data on plants
assoXbts = pd.read_excel(os.path.join(plant_dir, src_assoXbts), sheet_name='vertaling_IPO_ASSO_2009')
assoXsps = pd.read_excel(os.path.join(plant_dir, src_assoXsps), sheet_name='q_soorten_verbonden_Wieger')
'''Reduce sub-associations 00XX11a to 00XX11 and repair Beheertype codes in Input sheet vertaling_IPO_ASSO_2009.txt'''
Reduce sub-associations: 00XX11a --> 00XX11
Repair Beheertype codes in Input sheet vertaling_IPO_ASSO_2009.txt 04.02 --> N04.02.00
assoXbts.loc[:, 'syntaxon_gen'] = assoXbts.short.str.slice(stop=6).str.upper()
assoXbts.loc[:, 'btC'] = assoXbts.Bc.apply(hlp.fix_bt, as_mnp=True)
......@@ -58,7 +58,7 @@ assoXbts.loc[:, 'btC'] = assoXbts.Bc.apply(hlp.fix_bt, as_mnp=True)
assoXsps.loc[:, 'syntaxon_gen'] = assoXsps.Syntaxon_code.str.slice(stop=6).str.upper()
'''Mappings between i) Associaties and Beheertypen (n:n) ii) name and species code'''
beheertypen = list(set(assoXbts.btC))
beheertypen = list(set(nbt.units.nbt))
bt2asso = dict.fromkeys(beheertypen, [])
for bt in beheertypen:
bt2asso[bt] = list(set(assoXbts.loc[assoXbts.btC == bt, 'syntaxon_gen']))
......@@ -69,7 +69,7 @@ code2localname = dict(zip(mnp_sp.Species_code, mnp_sp.Local_name))
'''New dataframe with all species <-> Beheertype combinations.'''
dk = pd.DataFrame(columns=['Scientific_name', 'Land_type_code'],
data=[(a, b) for (a, b) in itertools.product(mnp_sp.loc[mnp_sp.Taxon_group == 'P', 'Scientific_name'],
dk = pd.concat([dk, pd.DataFrame(columns=['mean_frequentie', 'mean_trouw', 'Land_type_quality'])], sort=True)
dk.loc[:, 'Species_code'] =
......@@ -19,6 +19,7 @@ Dit is een behoorlijk lelijk scriptje geworden. Excuses.
import os
import argparse
import numpy as np
import pandas as pd
......@@ -32,6 +33,7 @@ parser = argparse.ArgumentParser()
parser.add_argument('tax', type=str, help='taxonomic group', choices=['broedvogel', 'dagvlinder', 'test'])
parser.add_argument('species', type=str, help='species name', nargs='+')
parser.add_argument('--th', type=float, help='Minimum draagkracht', default=0.01)
parser.add_argument('--ndff_th', type=int, help='Minimum NDFF obs per BT per Uurhol', default=10)
parser.add_argument('--out_dir', type=hlp.valid_dir, help='output dir', default='./')
args = parser.parse_args()
species = args.species
......@@ -57,7 +59,8 @@ for i, species in enumerate(species, start=1):
hlp.write2file(df=pd.Series({species: e}), out_name='error_{}'.format(species), out_dir=args.out_dir)
out_name = '03_MNP_versie7_par_density_factors_{0}'.format(sp.naam_ned)
csv_out_name = '03_MNP_versie7_par_density_factors_{0}'.format(sp.naam_ned)
shp_out_basename = 'draagkracht_{}X{}.shp'
areas = hlp.gen_squares(x_ul=0, y_ul=625000, ncol=57, nrow=64, size=5000) # Square grid over NL
'''Reduce NDFF observations to NDFF grid cells. Then couple with Areas and BT kaart'''
......@@ -84,7 +87,7 @@ for i, species in enumerate(species, start=1):
assert all([len(x) == 1 for x in ndff__nbt]), 'NDFF observations are matched to > 1 BT'
assert all([item for sublist in [[v == 1 for v in x.values()] for x in ndff__nbt] for item in sublist]), 'Some NDFF obs are not matched to a BT'
except AssertionError as e:
hlp.write2file(df=pd.Series({sp.naam_ned: e}), out_name='error_{}'.format(out_name), out_dir=args.out_dir)
hlp.write2file(df=pd.Series({sp.naam_ned: e}), out_name='error_{}'.format(csv_out_name), out_dir=args.out_dir)
ndff.loc[:, 'nbt'] = [item for sublist in [[k for k in x.keys()] for x in ndff__nbt] for item in sublist]
......@@ -128,19 +131,34 @@ for i, species in enumerate(species, start=1):
dks = {}
for beheertype in set(bt.units.nbt).difference(set(['nodata'])):
draagkracht = areas.loc[:, '{}_ndffpxls'.format(beheertype)].divide(areas.loc[:, beheertype]).max().round(3)
ndff_pxls_col = '{}_ndffpxls'.format(beheertype)
bt_pxls_col = beheertype
# Subset from areas to Columns relating to beheertype and rows where both NDFF and BT are not NA
area_sel = areas.loc[:, [ndff_pxls_col, bt_pxls_col, 'geometry', 'ID']] \
.dropna(axis=0, subset=[ndff_pxls_col, bt_pxls_col], how='any')
area_sel['uh_dk'] = area_sel[ndff_pxls_col].divide(area_sel[bt_pxls_col]).round(3)
area_sel['dk_gte_{}'.format(args.ndff_th)] = np.where(area_sel[ndff_pxls_col] >= args.ndff_th, 1, 0)
# Final draagkracht voor Soort X Beheertype combinatie
draagkracht = area_sel.loc[area_sel.dk_gte_10 == 1, 'uh_dk'].max()
area_sel['fin_dk'] = np.where(area_sel['uh_dk'] == draagkracht, draagkracht, 0)
area_sel.rename(columns={ndff_pxls_col: 'ndff_count', bt_pxls_col: 'bt_count'})\
.to_file(os.path.join(args.out_dir, shp_out_basename.format(sp.naam_ned, beheertype.replace('.', ''))))
if draagkracht >=
dks[beheertype] = {'Species_code': sp.mnp_species_code, 'Land_type_code': beheertype,
'Land_type_quality': draagkracht}
'Land_type_quality': draagkracht}
except KeyError:
except (KeyError, ValueError): # ValueError when area_sel.empty
out = pd.DataFrame.from_dict(dks, orient='index')
if not out.empty:
print(' {0} matched to {1} beheertypen'.format(sp.naam_ned, out.shape[0]))
hlp.write2file(df=out, out_name=out_name, out_dir=args.out_dir)
hlp.write2file(df=out, out_name=csv_out_name, out_dir=args.out_dir)
print(' zero dk for {}'.format(sp.naam_ned))
This diff is collapsed.
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment