Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Wit, Allard de
grompy
Commits
2536cae3
Commit
2536cae3
authored
Oct 21, 2021
by
Wit, Allard de
Browse files
Upgrade to grompy 1.2
parent
8353c37f
Changes
8
Expand all
Hide whitespace changes
Inline
Side-by-side
README.md
View file @
2536cae3
...
...
@@ -126,6 +126,8 @@ run `grompy check` first.
### loading
*See information below for converting grompy 1.1 to 1.2 databases*
The final step is to load the parcel information and satellite observations into the database tables. This can be
done with the
`grompy load <grompy_yaml>`
command. Grompy will now show the following output:
```
commandline
...
...
@@ -147,6 +149,11 @@ server with sufficient capacity to handle multiple streams of data. Note that gr
into one SQLite database, but write locks on the database will cause delays in processing so this is not
recommended.
*Important*
: grompy 1.2 offers additional parcel selection options compared to grompy 1.2. To convert a grompy 1.1 database
to a grompy 1.2 database it is only necessary to reload the parcel information. The structure of the Sentinel1/2
parcel observations did not change from 1.1 to 1.2. A special option has been added to only reload the parcel
info in a grompy database:
`grompy load --parcel_info_only <grompy_yaml>`
## Accessing data processed by grompy
See the jupyter notebooks in the
`notebooks/`
directory in the grompy git repository.
\ No newline at end of file
grompy/__init__.py
View file @
2536cae3
# -*- coding: utf-8 -*-
# Copyright (c) 2021 Wageningen Environmental Research
# Allard de Wit (allard.dewit@wur.nl),
April
2021
# Allard de Wit (allard.dewit@wur.nl),
October
2021
"""Grompy is a tool to process and access parcel-based satellite observations from GroenMonitor.nl.
"""
__version__
=
"1.2.0"
from
.dap
import
DataAccessProvider
from
.cmd
import
cli
__version__
=
"1.1.1"
\ No newline at end of file
grompy/cmd.py
View file @
2536cae3
...
...
@@ -8,6 +8,7 @@ import click
from
.load_data
import
load_data
from
.check_files
import
check_grompy_inputs
from
.init
import
init
from
.
import
__version__
@
click
.
group
()
def
cli
():
...
...
@@ -35,9 +36,17 @@ def check_grompy(grompy_yaml):
@
click
.
command
(
"load"
)
@
click
.
argument
(
"grompy_yaml"
,
type
=
click
.
Path
(
exists
=
True
))
def
load_grompy
(
grompy_yaml
):
@
click
.
option
(
"--parcel_info_only"
,
is_flag
=
True
)
def
load_grompy
(
grompy_yaml
,
parcel_info_only
):
grompy_yaml
=
Path
(
grompy_yaml
)
load_data
(
grompy_yaml
)
load_data
(
grompy_yaml
,
parcel_info_only
)
@
click
.
group
()
@
click
.
version_option
(
__version__
)
@
click
.
pass_context
def
cli
(
ctx
):
pass
cli
.
add_command
(
init_grompy
)
...
...
grompy/dap.py
View file @
2536cae3
...
...
@@ -15,8 +15,27 @@ class Container:
class
DataAccessProvider
:
def
__init__
(
self
,
grompy_yaml
,
fieldID
=
None
,
area_gt
=
None
,
pixcount_gt
=
None
,
provincie
=
None
,
limit
=
None
,
gws_gewasc
=
None
):
"""grompy.DataAccessProvider allow to query grompy databases and iterate through the selected parcels.
:param grompy_yaml: path to the grompy YAML file
:param fieldID: A list of fieldIDs that should be selected. Note that the number of fieldIDs that can be
provided is limited but unknown (depending on the SQL parser). Above 1000 FieldIDs warning
messages will be displayed. When this keyword is used, all other selection criteria will
be ignored.
:param area_gt: Only selected parcels greater than X ha
:param pixcount_gt: Only select parcels with a pixel count greater than this number
:param provincie: Only select parcels within this province
:param limit: Limit the number of parcels selected.
:param gws_gewasc: Select only parcels with given gws_gewasc
:param cat_gewasc: Select only parcels with given cat_gewasc
:param rdx_bounds: Select only parcels within the [min, max] RD X coordinates
:param rdy_bounds: Select only parcels within the [min, max] RD Y coordinates
:param lon_bounds: Select only parcels within the [min, max] longitude
:param lat_bounds: Select only parcels within the [min, max] latitude
"""
def
__init__
(
self
,
grompy_yaml
,
fieldID
=
None
,
area_gt
=
None
,
pixcount_gt
=
None
,
provincie
=
None
,
limit
=
None
,
gws_gewasc
=
None
,
cat_gewasc
=
None
,
rdx_bounds
=
None
,
rdy_bounds
=
None
,
lon_bounds
=
None
,
lat_bounds
=
None
):
grompy_yaml
=
Path
(
grompy_yaml
)
if
not
grompy_yaml
.
exists
():
...
...
@@ -46,6 +65,10 @@ class DataAccessProvider:
if
not
isinstance
(
fieldID
,
(
list
,
tuple
,
set
)):
msg
=
"FieldID must be provide as a list, set or tuple"
raise
RuntimeError
(
msg
)
if
len
(
fieldID
)
>
1000
:
msg
=
f
"Warning: your are passing
{
len
(
fieldID
)
}
fieldIDs, this may be too many. "
\
f
"Consider splitting your selected fieldIDs in smaller batches or use another "
\
f
"selection criteria."
self
.
perc_stmt
.
append_whereclause
(
self
.
tbl_perc_info
.
c
.
fieldID
.
in_
(
fieldID
))
else
:
if
area_gt
is
not
None
:
...
...
@@ -56,6 +79,17 @@ class DataAccessProvider:
self
.
perc_stmt
.
append_whereclause
(
self
.
tbl_perc_info
.
c
.
pixcount
>
pixcount_gt
)
if
provincie
is
not
None
:
self
.
perc_stmt
.
append_whereclause
(
self
.
tbl_perc_info
.
c
.
provincie
==
provincie
)
if
cat_gewasc
is
not
None
:
cat_gewasc
=
str
(
cat_gewasc
).
strip
()
self
.
perc_stmt
.
append_whereclause
(
self
.
tbl_perc_info
.
c
.
cat_gewasc
==
cat_gewasc
)
if
rdx_bounds
is
not
None
:
self
.
perc_stmt
.
append_whereclause
(
sa
.
between
(
self
.
tbl_perc_info
.
c
.
rdx
,
rdx_bounds
[
0
],
rdx_bounds
[
1
]))
if
rdy_bounds
is
not
None
:
self
.
perc_stmt
.
append_whereclause
(
sa
.
between
(
self
.
tbl_perc_info
.
c
.
rdy
,
rdy_bounds
[
0
],
rdy_bounds
[
1
]))
if
lon_bounds
is
not
None
:
self
.
perc_stmt
.
append_whereclause
(
sa
.
between
(
self
.
tbl_perc_info
.
c
.
longitude
,
lon_bounds
[
0
],
lon_bounds
[
1
]))
if
lat_bounds
is
not
None
:
self
.
perc_stmt
.
append_whereclause
(
sa
.
between
(
self
.
tbl_perc_info
.
c
.
latitude
,
lat_bounds
[
0
],
lat_bounds
[
1
]))
s
=
sa
.
select
([
sa
.
func
.
count
()]).
select_from
(
self
.
perc_stmt
)
self
.
parcel_count
=
s
.
execute
().
fetchone
()[
0
]
...
...
@@ -104,4 +138,3 @@ class DataAccessProvider:
def
__len__
(
self
):
return
self
.
parcel_count
grompy/load_data.py
View file @
2536cae3
...
...
@@ -15,7 +15,7 @@ import geopandas as gpd
import
numpy
as
np
import
yaml
from
.util
import
prepare_db
,
printProgressBar
,
Process
,
make_path_absolute
,
open_DB_connection
from
.util
import
prepare_db
,
printProgressBar
,
Process
,
make_path_absolute
,
open_DB_connection
,
get_latlon_from_RD
dummy_date
=
"19000101"
...
...
@@ -42,21 +42,33 @@ def load_parcel_info(grompy_yaml, dsn, counts_file, shape_file, table_name):
"""
shp_fname
=
make_path_absolute
(
grompy_yaml
,
Path
(
shape_file
))
df
=
gpd
.
read_file
(
shp_fname
)
df
[
"area_ha"
]
=
df
.
geometry
.
area
/
1e4
df
=
df
.
set_index
(
"fieldid"
)
# Compute latitude/longitude of field centroids
r
=
[]
for
row
in
df
.
itertuples
():
x
,
y
=
float
(
row
.
geometry
.
centroid
.
x
),
float
(
row
.
geometry
.
centroid
.
y
)
lon
,
lat
=
get_latlon_from_RD
(
x
,
y
)
r
.
append
({
"fieldID"
:
row
.
Index
,
"latitude"
:
lat
,
"longitude"
:
lon
,
"rdx"
:
x
,
"rdy"
:
y
})
df_latlon
=
pd
.
DataFrame
(
r
).
set_index
(
"fieldID"
)
df
=
df
.
join
(
df_latlon
)
# add pixel counts
fname_counts
=
make_path_absolute
(
grompy_yaml
,
Path
(
counts_file
))
df_counts
=
pd
.
read_csv
(
fname_counts
)
df_counts
.
set_index
(
"field_ID"
,
inplace
=
True
)
df
[
"pixcount"
]
=
df_counts
[
"pixcount"
]
df
=
df
.
join
(
df_counts
)
df_out
=
pd
.
DataFrame
({
"fieldID"
:
df
.
index
,
"year"
:
df
.
year
.
astype
(
np
.
int32
),
"pixcount"
:
df
.
pixcount
.
astype
(
np
.
int32
),
"area_ha"
:
df
.
area_ha
,
"area_ha"
:
df
.
geometry
.
area
/
1e4
,
"rdx"
:
df
.
rdx
,
"rdy"
:
df
.
rdy
,
"longitude"
:
df
.
longitude
,
"latitude"
:
df
.
latitude
,
"cat_gewasc"
:
df
.
cat_gewasc
.
apply
(
str
),
"gws_gewasc"
:
df
.
gws_gewasc
.
astype
(
np
.
int32
),
"gws_gewas"
:
df
.
gws_gewas
.
apply
(
str
),
"provincie"
:
df
.
provincie
.
apply
(
str
),
"gemeente"
:
df
.
gemeente
.
apply
(
str
),
"regio"
:
df
.
regio
.
apply
(
str
),
...
...
@@ -72,9 +84,12 @@ def load_parcel_info(grompy_yaml, dsn, counts_file, shape_file, table_name):
sa
.
Column
(
"year"
,
sa
.
Integer
),
sa
.
Column
(
"pixcount"
,
sa
.
Integer
),
sa
.
Column
(
"area_ha"
,
sa
.
Float
),
sa
.
Column
(
"rdx"
,
sa
.
Float
),
sa
.
Column
(
"rdy"
,
sa
.
Float
),
sa
.
Column
(
"longitude"
,
sa
.
Float
),
sa
.
Column
(
"latitude"
,
sa
.
Float
),
sa
.
Column
(
"cat_gewasc"
,
sa
.
Text
),
sa
.
Column
(
"gws_gewasc"
,
sa
.
Integer
),
sa
.
Column
(
"gws_gewas"
,
sa
.
Text
),
sa
.
Column
(
"provincie"
,
sa
.
Text
),
sa
.
Column
(
"gemeente"
,
sa
.
Text
),
sa
.
Column
(
"regio"
,
sa
.
Text
),
...
...
@@ -226,7 +241,7 @@ def monitor_parallel_loading(process_list, parent_conn, lines_per_dataset):
print
(
e
.
traceback
)
def
load_data
(
grompy_yaml
):
def
load_data
(
grompy_yaml
,
parcel_info_only
):
"""Loads data point to by the YAML config file.
:param grompy_yaml:
...
...
@@ -240,5 +255,6 @@ def load_data(grompy_yaml):
parcel_info
=
grompy_conf
.
pop
(
"parcel_info"
)
load_parcel_info
(
grompy_yaml
,
**
parcel_info
)
process_list
,
parent_conn
,
lines_per_dataset
=
start_parallel_loading
(
grompy_yaml
,
grompy_conf
[
"datasets"
])
monitor_parallel_loading
(
process_list
,
parent_conn
,
lines_per_dataset
)
if
not
parcel_info_only
:
process_list
,
parent_conn
,
lines_per_dataset
=
start_parallel_loading
(
grompy_yaml
,
grompy_conf
[
"datasets"
])
monitor_parallel_loading
(
process_list
,
parent_conn
,
lines_per_dataset
)
grompy/util.py
View file @
2536cae3
...
...
@@ -7,6 +7,7 @@ from pathlib import Path
import
sqlalchemy.exc
from
sqlalchemy
import
MetaData
,
Table
,
Column
,
Integer
,
Date
,
Float
,
Text
,
create_engine
import
pyproj
def
take_first
(
iter
):
...
...
@@ -126,3 +127,11 @@ def open_DB_connection(grompy_yaml, dsn):
return
engine
def
get_latlon_from_RD
(
x
,
y
,
_cache
=
{}):
rd_stelsel
=
"epsg:7415"
try
:
rd2wgs84
=
_cache
[
"rd2wgs84"
]
except
KeyError
:
rd2wgs84
=
_cache
[
"rd2wgs84"
]
=
pyproj
.
Proj
(
init
=
rd_stelsel
)
return
rd2wgs84
(
x
,
y
,
inverse
=
True
)
notebooks/accessing data with grompy.ipynb
View file @
2536cae3
This diff is collapsed.
Click to expand it.
pyproject.toml
View file @
2536cae3
...
...
@@ -12,10 +12,10 @@ description-file = "README.md"
requires
=
[
"click>=7.1"
,
"geopandas>= 0.8"
,
"pyyaml>= 5.
4
"
,
"pyyaml>= 5.
3
"
,
"sqlalchemy>= 1.4"
,
]
requires-python=">
=
3.6
"
[tool.flit.scripts]
grompy
=
"grompy:cli"
\ No newline at end of file
grompy
=
"grompy:cli"
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment