Commit bfceb10b authored by Roelofsen, Hans's avatar Roelofsen, Hans
Browse files

writing vog vli dk to tabel

parent f15dbfe1
......@@ -17,6 +17,20 @@ Methode:
Dit is een behoorlijk lelijk scriptje geworden. Excuses.
prep of ndff data:
source data: W:\PROJECTS\MNP2020\c_fases\f7_draagkracht\a_source_data\NDFF_levering_okt2020\ndff_vogels_dagvlinders_sep_okt2020.shp
W:\PROJECTS\MNP2020\c_fases\f7_draagkracht\a_source_data\NDFF_levering_2018\def_broedvogels.shp
W:\PROJECTS\MNP2020\c_fases\f7_draagkracht\a_source_data\NDFF_levering_2018\def_dagvlinders.shp
# Split 2020 data into vogels and vlinders
ogr2ogr -where "srtgroepen = 'Vogels'" ndff_vogels_sep_okt2020.shp ndff_vogels_dagvlinders_sep_okt2020.shp
ogr2ogr -where "srtgroepen = 'Dagvlinders'" ndff_dagvlinders_sep_okt2020.shp ndff_vogels_dagvlinders_sep_okt2020.shp
# merge vogels 2020 and vogels 2018 into single file
"""
import os
......@@ -54,7 +68,7 @@ for i, species in enumerate(species, start=1):
'''Query NDFF shapefile'''
try:
sp = hlp.Species(species)
ndff = hlp.query_ndff(gdf=ndff_all, species=sp, max_area=10000) # NDFF shapefile for selected species.
ndff = hlp.query_ndff(gdf=ndff_all, species=sp, max_area=10000, min_year=2010) # NDFF shapefile for selected species.
except (AssertionError, ImportError) as e:
hlp.write2file(df=pd.Series({species: e}), out_name='error_{}'.format(species), out_dir=args.out_dir)
continue
......@@ -121,7 +135,7 @@ for i, species in enumerate(species, start=1):
print(e)
continue
ndff_per_area = pd.pivot_table(data=ndff, index='ID', columns='nbt', values='id', aggfunc='count')
ndff_per_area = pd.pivot_table(data=ndff.assign(id=1), index='ID', columns='nbt', values='id', aggfunc='count')
ndff_per_area.columns = ['{0}_ndffpxls'.format(x) for x in ndff_per_area.columns.tolist()]
'''Merge NDFF observations df with areas df for final df where rows=areas, with cols for BT count and BTxNDFF count'''
......
"""
Determine draagkracht (DK) for vogel and vlinder using NDFF database
Hans Roelofsen, mei 2020
DK bestaat voor elke soort (sp)- beheertype (bt) combinatie als een getal tussen 0-1
bt brondata: dominantie beheertype kaart op 25m incl neerschaling
sp brondata: NDFF uittreksels.
Methode:
1. Reduce NDFF observations to centroids
2. Cluster 1) to 25m cells matching the BT kaart grid. Ignore observation count per cell, retain spatial distribution
of NDFF observations only
3. Impose 5000m square grid over 2) and BT kaart
4. For each 5000m sq calculate: area_ndff_observations_within_BT / area_BT_type
5. Compile table with MNP draagkracht and top10 hoogste draagkrachten van de 5000m squares
prep of ndff data:
source data: W:\\PROJECTS\\MNP2020\\c_fases\\f7_draagkracht\\a_source_data\\NDFF_levering_okt2020\\ndff_vogels_dagvlinders_sep_okt2020.shp
W:\\PROJECTS\\MNP2020\\c_fases\\f7_draagkracht\\a_source_data\\NDFF_levering_2018\\def_broedvogels.shp
W:\\PROJECTS\\MNP2020\\c_fases\\f7_draagkracht\\a_source_data\\NDFF_levering_2018\\def_dagvlinders.shp
# Split 2020 data into vogels and vlinders
ogr2ogr -where "srtgroepen = 'Vogels'" ndff_vogels_sep_okt2020.shp ndff_vogels_dagvlinders_sep_okt2020.shp
ogr2ogr -where "srtgroepen = 'Dagvlinders'" ndff_dagvlinders_sep_okt2020.shp ndff_vogels_dagvlinders_sep_okt2020.shp
# merge vogels 2020 and vogels 2018 into single file
# merge vlinders 2020 and vlinders 2018 into single file and add year col
# Add Year and Oppervlakte columns manually
"""
import os
import argparse
import numpy as np
import pandas as pd
import geopandas as gp
import rasterstats as rast
from mnp import helpers as hlp
# from sample.mnp import helpers as hlp
# sp = hlp.Species('Hooibeestje')
parser = argparse.ArgumentParser()
parser.add_argument('tax', type=str, help='taxonomic group', choices=['broedvogel', 'dagvlinder', 'test'])
parser.add_argument('species', type=str, help='species name', nargs='+')
parser.add_argument('--th', type=float, help='Minimum draagkracht', default=0.01)
parser.add_argument('--ndff_th', type=int, help='Minimum NDFF obs per BT per Uurhol', default=10)
parser.add_argument('--out_dir', type=hlp.valid_dir, help='output dir', default='./')
args = parser.parse_args()
species = args.species
tax = args.tax
out_dir = args.out_dir
if tax == 'test':
species = ['Groentje', 'Aarbeivlinder', 'Bruin zandoogje']
# Gather data that can be used for all species (read NDFF data once, use many)
ndff_all = hlp.read_ndff_shp(src=hlp.ndff_sources()[args.tax])
# ndff_all = hlp.read_ndff_shp(src=hlp.ndff_sources()['dagvlinder'])
bt = hlp.BT20190612() # Beheertypenkaart (version 20190612 25m version)
with open(os.path.join(args.out_dir, '{0}XNBT20190612_th{1}.csv'.format(args.tax, args.ndff_th)), 'w') as f:
for i, speci in enumerate(species, start=1):
print('Doing {0} ({1}/{2})'.format(speci, i, len(args.species)))
'''Query NDFF shapefile'''
try:
sp = hlp.Species(speci)
ndff = hlp.query_ndff(gdf=ndff_all, species=sp, max_area=10000, min_year=2010) # NDFF shapefile for selected species.
except (AssertionError, ImportError) as e:
hlp.write2file(df=pd.Series({speci: e}), out_name='error_{}'.format(speci), out_dir=args.out_dir)
continue
areas = hlp.gen_squares(x_ul=0, y_ul=625000, ncol=57, nrow=64, size=5000) # Square grid over NL
'''Reduce NDFF observations to NDFF grid cells. Then couple with Areas and BT kaart'''
# Couple each NDFF observation to a Col-Row index from the bt_kaart using the Affine transformation.
# Then drop duplicates to ensure only 1 NDFF observation is counted per bt_kaart pixel.
ndff.loc[:, 'nbt_row_col'] = ndff.geometry.centroid.apply(hlp.xy_2_colrow, rio_object=bt.raster)
pre = ndff.shape[0]
ndff.drop_duplicates(subset='nbt_row_col', inplace=True)
print(' aggregate to 25m pixels reduces from {0} to {1}'.format(pre, ndff.shape[0]))
# Couple NDFF observations to one of the squares. Drop observations that are outside the squares.
pre = ndff.shape[0]
ndff = gp.sjoin(left_df=ndff, right_df=areas.loc[:, ['ID', 'geometry']], how='inner', op='within')
post = ndff.shape[0]
if pre != post:
print(' dropping {} NDFF observations outside NL'.format(str(np.subtract(pre, post))))
# Couple NDFF waarneming to a BT. No more than 1 BT per NDFF observation N
# No more than 1 pixel per NDFF observation
ndff__nbt = rast.zonal_stats(vectors=ndff.geometry.centroid, raster=bt.raster.read(1), categorical=True,
all_touched=False, category_map=bt.pxl2nbt, affine=bt.raster.affine, nodata=0)
try:
assert all([len(x) == 1 for x in ndff__nbt]), 'NDFF observations are matched to > 1 BT'
assert all([item for sublist in [[v == 1 for v in x.values()] for x in ndff__nbt] for item in sublist]), 'Some NDFF obs are not matched to a BT'
except AssertionError as e:
# hlp.write2file(df=pd.Series({sp.naam_ned: e}), out_name='error_{}'.format(csv_out_name), out_dir=args.out_dir)
continue
ndff.loc[:, 'nbt'] = [item for sublist in [[k for k in x.keys()] for x in ndff__nbt] for item in sublist]
ndff.dropna(subset=['nbt'], inplace=True)
'''Determine acreage per BT for each hok.'''
areas__nbt = rast.zonal_stats(vectors=areas, raster=bt.raster.read(1), categorical=True, all_touched=False,
category_map=bt.pxl2nbt, affine=bt.raster.affine, nodata=0)
# pixel counts for each area should sum to 5000^2 / 25^2 = 40.000 or 0 when area is outside the BT-kaart
try:
assert all([np.sum(v for _, v in area_dict.items()) in [40000, 0] for area_dict in areas__nbt])
except AssertionError as e:
print(e)
continue
# Make a list with len == areas.shape[0] where each item is a dictionary w. bt.units.nbt as keys and
# pixel count of nbt X area as values. Then join to the areas gdf
bt_stats = []
for area_stats in areas__nbt:
empty_stats = dict.fromkeys(bt.units.nbt, np.nan)
# See: https://stackoverflow.com/questions/38987/how-do-i-merge-two-dictionaries-in-a-single-expression-in-python
bt_stats.append({**empty_stats, **area_stats})
areas = areas.join(pd.DataFrame.from_dict(bt_stats, orient='columns'))
# areas df now has all nbt units as as columns with pixel count per area as values
'''Pivot NDFF waarnemingen to Area ids, nbt as columns and count of ndff pixel IDs as values. Merge with areas df.'''
try:
assert set(ndff.nbt).issubset(areas.columns), "NDFF columns are not a subset of beheertypen, weird..."
except AssertionError as e:
print(e)
continue
ndff_per_area = pd.pivot_table(data=ndff, index='ID', columns='nbt', values='nbt_row_col', aggfunc='count')
ndff_per_area.columns = ['{0}_ndffpxls'.format(x) for x in ndff_per_area.columns.tolist()]
'''Merge NDFF observations df with areas df for final df where rows=areas, with cols for BT count and BTxNDFF count'''
areas = pd.merge(left=areas, right=ndff_per_area, left_on='ID', right_index=True, how='left')
'''Calculate Draagkracht as the largest ratio bt-pixels/ndff-pixels per area. Ignore NoData'''
beheertypen = list(set(bt.units.nbt).difference(set(['nodata', '0'])))
beheertypen.sort()
for j, beheertype in enumerate(beheertypen):
s1 = pd.Series(data=[sp.naam_ned, sp.mnp_species_code, beheertype, args.ndff_th, args.th,
hlp.get_mnp_dk(sp, beheertype)], index=['Species_name', 'Species_code', 'bt',
'th_ndff', 'min_dk', 'mnp_dk'])
# Subset from areas to Columns relating to beheertype and rows where both NDFF and BT are not NA
ndff_pxls_col = '{}_ndffpxls'.format(beheertype)
bt_pxls_col = beheertype
area_sel = areas.loc[:, [ndff_pxls_col, bt_pxls_col, 'geometry', 'ID']] \
.dropna(axis=0, subset=[ndff_pxls_col, bt_pxls_col], how='any')
if area_sel.empty:
# NAs wanneer er geen combinatie NDFF X Beheertype is/
top_valid_dks = pd.Series([np.nan] * 10)
else:
# Calculate draagkracht
area_sel['uh_dk'] = area_sel[ndff_pxls_col].divide(area_sel[bt_pxls_col]).round(3)
area_sel['valid_dk'] = np.where((area_sel[ndff_pxls_col] >= args.ndff_th) & (area_sel.uh_dk >= args.th),
1, 0)
# Top 10 dks of uurhokken where threshold is exceeded
top_valid_dks = area_sel.loc[area_sel['valid_dk'] == 1, 'uh_dk'].sort_values(ascending=False)[:10]
fill = 10-len(top_valid_dks)
s2 = pd.concat([top_valid_dks, pd.Series([0] * fill)]) # Opvullen met 0
s2.index = ['UH{}'.format(i) for i in range(1, 11)]
out = pd.concat([s1, s2])
f.write(pd.DataFrame(out).T.to_csv(sep=';', index=False, header=True if i == 1 and j == 0 else False))
......@@ -47,11 +47,11 @@ def read_ndff_shp(src):
"""
ndff = gp.read_file(src, encoding='UTF-8')
ndff['nl_naam_lower'] = ndff['nl_naam'].str.lower()
ndff['nl_naam_lower'] = ndff['soort_ned'].str.lower()
return ndff
def query_ndff(gdf, species, max_area):
def query_ndff(gdf, species, max_area, min_year):
"""
Query an existing NDFF geodataframe for a species and area
:param gdf: ndff gdf
......@@ -60,10 +60,10 @@ def query_ndff(gdf, species, max_area):
:return:
"""
queries = {'q1': gdf.loc[(gdf.year >= 2000) & (gdf.year.notna())].index,
queries = {'q1': gdf.loc[(gdf.year >= min_year) & (gdf.year.notna())].index,
'q2': gdf.loc[gdf.nl_naam_lower == species.naam_ned].index,
'q3': gdf.loc[gdf.area_m2 < max_area].index}
sel = set.intersection(*[set(queries[x]) for x in queries.keys()]) # TODO: select queries w **kwargs
'q3': gdf.loc[gdf.opp_m2 <= max_area].index}
sel = set.intersection(*[set(queries[x]) for x in ['q1', 'q2', 'q3']]) # TODO: select queries w **kwargs
assert len(sel) > 0, 'no observations remaining'
print(' found {0} NDFF observations for {1}'.format(len(sel), species.name))
return gdf.loc[sel]
......@@ -496,11 +496,10 @@ class BT20190730:
def ndff_sources():
return {'broedvogel': r'W:\PROJECTS\MNP2020\c_fases\f7_draagkracht\a_source_data\vogels_ndff_shapefiles\ndff_vogels_merged_okt2020.shp',
'dagvlinder': r'W:\PROJECTS\MNP2020\c_fases\f7_draagkracht\a_source_data\vlinders_ndff_shapefiles\ndff_vlinders_merged_okt2020.shp',
return {'broedvogel': r'W:\PROJECTS\MNP2020\c_fases\f7_draagkracht\a_source_data\vogels_ndff_shapefiles\ndff_vogels_merged_mk2.shp',
'dagvlinder': r'W:\PROJECTS\MNP2020\c_fases\f7_draagkracht\a_source_data\vlinders_ndff_shapefiles\ndff_dagvlinders_merged_mk2.shp',
'test': r'c:\apps\temp_geodata\ndff\ndff_vogel_sample.shp'}
class Species:
"""
Class holding all info related to a species
......@@ -568,17 +567,18 @@ def xy_2_colrow(point, rio_object):
# is dit nuttig?
def get_mnp_dk(species):
def get_mnp_dk(species, bt):
mnp = pd.read_csv(r'\\wur\dfs-root\PROJECTS\QMAR\MNP-SNL-ParameterSet\Parameters_v05_2019_06_12\03_MNP_versie5_par_density_factors_BT2019_v2.csv', sep=',',
usecols=['Species_code', 'Land_type_code', 'Land_type_quality'])
try:
df = pd.read_csv(r'c:\Users\roelo008\OneDrive - WageningenUR\a_projects\MNP\paramset\
03_MNP_versie6_par_density_factors_BT2019_v2.csv', sep=',',
usecols=['Species_code', 'Land_type_code', 'Land_type_quality'])
df['bt_B'] = np.where(df.Land_type_code.str.slice(7) == '00', df.Land_type_code.str.slice(0, 6),
df.Land_type_code)
foo = df.loc[df.Species_code == species.mnp_species_code, ['bt_B', 'Land_type_quality']]
return dict(zip(foo.bt_B, foo.Land_type_quality))
dk = mnp.loc[(mnp.Species_code == species.mnp_species_code) & (mnp.Land_type_code == bt), 'Land_type_quality']
if dk.empty:
return 0
else:
return float(dk)
except AttributeError:
print('Make sure to provide a Species object, w@nker')
print('Make sure to provide a Species object, thanks.')
raise
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment