Commit 5015005f authored by roelo008's avatar roelo008
Browse files

writing table as shapefile

parent bca41cf6
......@@ -25,30 +25,31 @@ cols = np.array([i for i in range(0, ncols)] * nrows).reshape(np.product(shape))
# affine transformation b'teen row-col and RD-New coords for grid
affine_trans = affine.Affine.from_gdal(0, 250, 0, 625000, 0, -250) # (x topleft, width, 0, y topleft, 0, height)
db = pd.DataFrame({'row': rows, 'col':cols})
db = pd.DataFrame({'row': rows, 'col': cols})
db['x_topleft'] = db.apply(lambda x: ((x.col, x.row) * affine_trans)[0], axis=1).astype(np.int32)
db['y_topleft'] = db.apply(lambda x: ((x.col, x.row) * affine_trans)[1], axis=1).astype(np.int32)
db['hok_id'] = db.apply(lambda x: '{0}_{1}'.format(x.x_topleft, x.y_topleft), axis=1)
'''
Add pickled ASC rasters showing fractual cover of LU class X as columns
Add pickled csv versions of the SNL rasters showing fractual cover of LU class X as columns
Never change nrow(db), padd missing cells with 0
'''
pkl_dir = r'c:\Users\roelo008\OneDrive - WageningenUR\a_projects\PGO_Hotspots\a_broedvogels\SNL_grids\csv_format'
for source in [x for x in os.listdir(pkl_dir) if x.endswith('csv')]:
for source in [x for x in os.listdir(pkl_dir) if x.endswith('csv') and not x.startswith('prov')]:
src_name = os.path.splitext(source)[0]
col_name = src_name.split('_')[0]
try:
print('Now adding {0}'.format(src_name))
source_df = pd.read_csv(os.path.join(pkl_dir, source), sep=';', comment='#')
source_df.set_index('hok_id', inplace=True, verify_integrity=True)
source_df.rename(columns={'area_m2': col_name}, inplace=True)
db = pd.merge(db, source_df, left_on='hok_id', right_index=True, how='left')
print('DB shape now: {0} rows, {1} cols'.format(db.shape[0], db.shape[1]))
print(' DB shape now: {0} rows, {1} cols'.format(db.shape[0], db.shape[1]))
except FileNotFoundError:
print('Warning, file not found: {0}'.format(source))
......
......@@ -13,25 +13,25 @@ Hans Roelofsen, september 2019
import os
import pandas as pd
import datetime
import numpy as np
from z_utils import clo
'''
Model run identifier
'''
id = 'MS02a'
identifier = 'HD06'
'''
Query PGO data for a specific SNL soortenlijst and specific spatial filter
'''
soortgroepen = ['vogel', 'vlinder', 'vaatplant']
snl_soortlijst = ['N1202', 'N1301'] # soortenlijst behorende bij een SNL type
sub_soortlijst = ['SNL'] # SNL, Bijlage 1 of EcoSysLijst?
snl_soortlijst = ['HalfnatuurlijkGrasland'] # soortenlijst behorende bij een SNL type
sub_soortlijst = ['EcoSysLijst'] # SNL, Bijlage 1 of EcoSysLijst?
snl_gebieden_wel = ['N1900'] # 250m cellen met welk SNL type?
snl_gebieden_niet = [] # 250m cellen met welk SNL type?
snl_gebieden_wel = ['N1202', 'N1301'] # 250m cellen met welk SNL type?
snl_gebied_drempelwaarde = 0
snl_gebieden_niet = ['N1900']
# Definieer de numerieke range voor de categorieren Afname, Stabiel, Toename:
# [a, b, c, d]
......@@ -49,7 +49,7 @@ periode_B = '2002-2009'
'''
SPATIAL SELECTION
'''
hokken, spat_query = clo.query_250m_hokken(snl_gebieden_wel, 62499, snl_gebieden_niet)
hokken, spat_query = clo.query_250m_hokken(snl_gebieden_wel, snl_gebied_drempelwaarde, snl_gebieden_niet)
'''
SELECTION FROM PGO DATA
......@@ -63,59 +63,64 @@ Pivot to show plant, vogel, vlinder count per hok. Colums are soortgroep, then p
dat_piv = pd.pivot_table(pgo_dat, index='hok_id', columns=['soortgroep', 'periode'], values='n', aggfunc='sum')
print('PGO data are based in {0} 250m hokken'.format(dat_piv.shape[0]))
'''
Calc CLO1543 between periode A - periode B for plant, vogel, vlinder.
'''
'''Calc CLO1543 between periode A - periode B for plant, vogel, vlinder'''
idx = pd.IndexSlice
plant_trend, plant_score = clo.calc_clo_1543(s_periode_A=dat_piv.loc[:, idx['vaatplant', periode_A]],
s_periode_B=dat_piv.loc[:, idx['vaatplant', periode_B]],
bins=dif_cats, labels=dif_labs)
bins=dif_cats, labels=dif_labs, species='vaatplant')
vogel_trend, vogel_score = clo.calc_clo_1543(s_periode_A=dat_piv.loc[:, idx['vogel', periode_A]],
s_periode_B=dat_piv.loc[:, idx['vogel', periode_B]],
bins=dif_cats, labels=dif_labs)
bins=dif_cats, labels=dif_labs, species='vogel')
vlinder_trend, vlinder_score = clo.calc_clo_1543(s_periode_A=dat_piv.loc[:, idx['vlinder', periode_A]],
s_periode_B=dat_piv.loc[:, idx['vlinder', periode_B]],
bins=dif_cats, labels=dif_labs)
bins=dif_cats, labels=dif_labs, species='vlinder')
'''Create new df containing scores and trends for all soortgroepen. Then attach to hokken.'''
foo_df = pd.concat([plant_trend, plant_score, vogel_trend, vogel_score, vlinder_trend, vlinder_score],
axis=1)
'''Create new df containing counts-per-periode, scores and trends for all soortgroepen. Then attach to hokken.'''
flat_cols = ['_'.join(tuple_col) for tuple_col in list(dat_piv)]
dat_piv.columns = flat_cols
foo_df = pd.concat([dat_piv, plant_trend, plant_score, vogel_trend, vogel_score, vlinder_trend, vlinder_score], axis=1)
# note observations are here reduced to valid hokken only, how=left!
clo_df = pd.merge(left=hokken, right=foo_df, how='left', right_index=True, left_index=True)
'''Niet alle hokken zullen een PGO observatie hebben. Vul score kolommen in met `onbekend`.'''
clo_df.filter(regex='*score*').fillna(value='onbekend', axis=1)
clo_df.fillna(value={'score_vaatplant': 'onbekend', 'score_vogel': 'onbekend', 'score_vlinder': 'onbekend',
'trend_vaatplant': 'onbekend', 'trend_vogel': 'onbekend', 'trend_vlinder': 'onbekend'},
inplace=True)
'''
Bereken totaal areaal per soortgroep voor CLO scores Toename, Stabiel, Afname, Onbekend.
Bereken ook netto toename (Toename-Afname) en score als netto/totaal areal
'''
plant_out = pd.pivot_table(clo_df, index='vaatplant_score', values='areaal_ha', aggfunc='sum')
plant_out.loc[:, 'netto'] = plant_out.apply(lambda row: row.toename - row.afname, axis=1)
plant_out.loc[:, 'score'] = plant_out.apply(lambda row: row.netto / hokken.areaal_ha.sum() * 100, axis=1)
plant_out = pd.pivot_table(clo_df, index='score_vaatplant', values='areaal_ha', aggfunc='sum').T \
.rename({'areaal_ha': 'vaatplant'}, axis=0)
vogel_out = pd.pivot_table(clo_df, index='vogel_score', values='areaal_ha', aggfunc='sum')
vogel_out.loc[:, 'netto'] = vogel_out.apply(lambda row: row.toename - row.afname, axis=1)
vogel_out.loc[:, 'score'] = vogel_out.apply(lambda row: row.netto / hokken.areaal_ha.sum() * 100, axis=1)
vogel_out = pd.pivot_table(clo_df, index='score_vogel', values='areaal_ha', aggfunc='sum').T \
.rename({'areaal_ha': 'vogel'}, axis=0)
vlinder_out = pd.pivot_table(clo_df, index='vlinder_score', values='areaal_ha', aggfunc='sum')
vlinder_out.loc[:, 'netto'] = vlinder_out.apply(lambda row: row.toename - row.afname, axis=1)
vlinder_out.loc[:, 'score'] = vlinder_out.apply(lambda row: row.netto / hokken.areaal_ha.sum() * 100, axis=1)
vlinder_out = pd.pivot_table(clo_df, index='score_vlinder', values='areaal_ha', aggfunc='sum').T \
.rename({'areaal_ha': 'vlinder'}, axis=0)
# concatenate into single dataframe. Note: index = [vaatplant, vogel, vlinder], cols = [afname, onbekend, stabiel,
# toename, netto, score, tot_areaal_ha]
clo_out = pd.concat([plant_out, vogel_out, vlinder_out])
clo_out.loc[:, 'netto'] = clo_out.apply(lambda row: row.toename - row.afname, axis=1)
clo_out.loc[:, 'score'] = clo_out.apply(lambda row: row.netto / hokken.areaal_ha.sum() * 100, axis=1)
clo_out.loc[:, 'tot_areaal_ha'] = clo_out.apply(lambda row: row.toename + row.afname + row.stabiel + row.onbekend,
axis=1)
# TODO: ignore NAS in sommatie
'''
Write report
'''
timestamp = datetime.datetime.now().strftime("%y%m%d-%H%M")
out_dir = r'c:\Users\roelo008\OneDrive - WageningenUR\a_projects\CLO1543'
basename = 'clo1543_{0}_{1}'.format(id, timestamp)
out_base_dir = r'c:\Users\roelo008\OneDrive - WageningenUR\a_projects\CLO1543'
out_dir = os.path.join(out_base_dir, 't{0}'.format(timestamp))
os.mkdir(out_dir)
basename = 'clo1543_{0}_{1}'.format(identifier, timestamp)
pgo_dat_summary = pd.pivot_table(pgo_dat, index='soortgroep', columns=['snl', 'periode'], values='n', aggfunc='count')
cat_lims = [(dif_cats[i-1], dif_cats[i]) for i in range(1, len(dif_cats))]
lo_lims = [l for (l,_) in cat_lims]
up_lims = [u for (_,u) in cat_lims]
# TODO: hier nog een mooie string van maken!
# category_limits = ['{0} t/m {1}'.format(min(x), max(x)) for x in dif_cats]
# categories = ', '.join('{0}: {1}'.format(k, v) for k,v in dict(zip(dif_labs, category_limits)).items())
clo_scores =
clo_scores = ['{0} <= {1} < {2}'.format(l, x, u) for x, (l, u) in zip(dif_labs, cat_lims)]
header = '#Model run dated: {0} by {1}\n#\n' \
'#Extract from PGO species distribution data with PGO Query:\n' \
'# Soortgroepen: {2}\n' \
......@@ -125,15 +130,13 @@ header = '#Model run dated: {0} by {1}\n#\n' \
'#Selection from 250m hokken grid with Beheertypen Query:\n' \
'# {7}\n' \
'# ==> {8} 250m hokken with total {9} hectare.\n' \
'#Trend refers to species difference between {10}-{11}}\n' \
'#Scores are as follows:\n' \
''
.format(timestamp, os.environ.get('USERNAME'), ', '.join([soort for soort in soortgroepen]),
', '.join(snl for snl in snl_soortlijst), '-'.join(sub for sub in sub_soortlijst),
pgo_dat.shape[0], dat_piv.shape[0], spat_query, hokken.shape[0], hokken.areaal_ha.sum(),
periode_A, periode_A,
dif_cats,
periode_A, periode_B)
'#Trend refers to species difference between {10}-{11}\n' \
'#Scores are as follows: {12}\n' \
.format(timestamp, os.environ.get('USERNAME'), ', '.join([soort for soort in soortgroepen]),
', '.join(snl for snl in snl_soortlijst), '-'.join(sub for sub in sub_soortlijst),
pgo_dat.shape[0], dat_piv.shape[0], spat_query, hokken.shape[0], hokken.areaal_ha.sum(),
periode_A, periode_B, clo_scores)
footer = '\nMade with Python 3.5 using pandas, geopandas, by Hans Roelofsen, WEnR team B&B, dated {0}'.format(timestamp)
with open(os.path.join(out_dir, basename + '.txt'), 'w') as f:
......@@ -141,13 +144,15 @@ with open(os.path.join(out_dir, basename + '.txt'), 'w') as f:
f.write('\n###PGO DATA SUMMARY###\n')
f.write(pgo_dat_summary.to_csv(sep='\t', line_terminator='\r'))
f.write('\n##### HECTARE-TOENAME-STABIEL-AFNAME #####\n')
f.write(clo_out.to_csv(sep='\t', line_terminator='\r'))
f.write('\n###DATA per HOK###\n')
f.write(clo_df.to_csv(sep='\t', line_terminator='\r'))
f.write('\n\n')
f.write(plant_out.to_csv(sep='\t', line_terminator='\r'))
f.write('\n\n')
f.write(vogel_out.to_csv(sep='\t', line_terminator='\r'))
f.write('\n\n')
f.write(vlinder_out.to_csv(sep='\t', line_terminator='\r'))
f.write(footer)
# Write selected hokken to shapefile
clo_df.index = clo_df.index.rename('dummy')
clo_gdf = clo.df_2_gdf(clo_df)
clo_gdf.to_file(os.path.join(out_dir, '{0}.shp'.format(basename)))
......@@ -40,7 +40,33 @@ def classifier(x, categories, labels):
return np.nan
'''
def hok_id_df_to_shp(df, out_name):
def df_2_gdf(df):
'''
:param df: pandas dataframe with column hok_id
:return: df as geodataframe with column geometry attached
'''
if not isinstance(df, pd.core.frame.DataFrame):
raise Exception('I only accept panda dataframes')
if 'hok_id' not in list(df):
raise Exception('No column named hok_id')
# full extent shp
try:
nlgrid = gp.read_file(r'c:\Users\roelo008\OneDrive - WageningenUR\a_projects\PGO_Hotspots\geodata\250mhok\shp\hok250m_fullextent.shp')
except OSError:
raise Exception('Requesting file from laptop Hans Roelofsen. A copy is stored here'
': wur\dfs-root/PROJECTS/hotspots_pgos/b_Prelim_Results/250_grid_shp/hok250m_fullextent.shp')
out_df = pd.merge(left=df, right=nlgrid.loc[:, ['ID', 'geometry']], left_on='hok_id', right_on='ID',
how='left')
out_gdf = gp.GeoDataFrame(out_df, geometry='geometry')
return out_gdf
def hok_id_df_to_shp(df, out_dir, out_name):
'''
:param df: pandas dataframe with column hok_id
out_name: base name of output shapefile
......@@ -61,8 +87,7 @@ def hok_id_df_to_shp(df, out_name):
': wur\dfs-root/PROJECTS/hotspots_pgos/b_Prelim_Results/250_grid_shp/hok250m_fullextent.shp')
sel_grid = nlgrid.loc[nlgrid['ID'].isin(hokken)]
sel_grid.to_file(os.path.join(r'c:\Users\roelo008\OneDrive - WageningenUR\a_projects\PGO_Hotspots\geodata\shp',
out_name + '.shp'))
sel_grid.to_file(os.path.join(out_dir, out_name + '.shp'))
......@@ -222,7 +247,7 @@ def calc_clo_1518(df, periode, labs):
print(e)
def calc_clo_1543(s_periode_A, s_periode_B, bins, labels):
def calc_clo_1543(s_periode_A, s_periode_B, bins, labels, species):
"""Function to calculate CLOR1543 score, i.e. trend in species number and corresponding score
s_periode_A: pd Series with species count prior periode
s_periode_B: pd Series with species count posterior periode (calculation is: periodeA - periodeB
......@@ -233,9 +258,9 @@ def calc_clo_1543(s_periode_A, s_periode_B, bins, labels):
try:
sp_diff = s_periode_A.sub(s_periode_B)
sp_diff.name = 'rend_{0}'.format(s_periode_A.name)
sp_diff_score =sp_diff.apply(classifier, bins=bins, labels=labels)
sp_diff_score.name = 'score_{0}'.format(s_periode_A.name)
sp_diff.name = 'trend_{0}'.format(species)
sp_diff_score = sp_diff.apply(classifier, bins=bins, labels=labels)
sp_diff_score.name = 'score_{0}'.format(species)
return sp_diff, sp_diff_score
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment