I recently discovered (due to new aerial imagery becoming available) that a portion of all the geometry data we have in farmOS was both translated and rotated relative to its real-life location.
I thought I’d share how I fixed it since it might be interesting to other folks.
Notes;
- Coordinates have been replaced with example values.
- The code isn’t super beautiful and there’s no comments since this was a one-off script, but feel free to ask questions and I’ll spend time answering them and/or adding comments according to folks’ interest.
- This approach reads/writes directly to the database and could destroy all your data - you’ve now been warned!
- This only works in farmOS 1.x. In 2.x the script would need to manipulate several tables since intrinsic/movement geometries are stored separately in 2.x.
rewrite_lower_field_geometries.py
#!/bin/env python3
import argparse
import logging
import traceback
import asyncio
from math import sin, cos, tan, pi
import math
import re
import os.path
import time
import datetime
import pyproj
from geomet import wkb, wkt
import pygeohash
import shapely
from shapely.geometry import shape
from shapely.ops import transform
from sqlalchemy import (
select,
)
from sqlalchemy import func, and_, distinct, Float
import sqlalchemy as sa
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.sql.expression import select, join, Label, cast, update, bindparam
from sqlalchemy_aio import ASYNCIO_STRATEGY
class CallableAccessor(object):
def __init__(self, c):
self._c = c
def __getattr__(self, name):
return self._c(name)
def DictAccessor(d):
return CallableAccessor(lambda name: d.get(name))
EPSG4326 = pyproj.CRS('EPSG:4326')
EPSG3857 = pyproj.CRS('EPSG:3857')
ADJ_BB_WKT = 'POLYGON ((-64.9264683076 22.3661204274, -64.9252666779 22.3661204274, -64.9252666779 22.3650212285, -64.9264683076 22.3650212285, -64.9264683076 22.3661204274))'
SRC_NW_WKT = "POINT(-64.92607867061 22.36609202721699)"
SRC_SW_WKT = "POINT(-64.9260662941 22.365790632528)"
DST_NW_WKT = "POINT(-64.92610355235188 22.36606679499702)"
DST_SW_WKT = "POINT(-64.92612232781495 22.365766532392354)"
def angle_between(line_a, line_b):
(ax, ay), (cx, cy) = line_a[0].coords[0], line_a[1].coords[0]
(bx, by), (dx, dy) = line_b[0].coords[0], line_b[1].coords[0]
α0 = math.atan2(cy - ay, cx - ax)
α1 = math.atan2(dy - by, dx - bx)
return α1 - α0
async def async_main(db):
t = db.tables
adj_bb = shape(wkt.loads(ADJ_BB_WKT))
project = pyproj.Transformer.from_crs(EPSG4326, EPSG3857, always_xy=True).transform
iproject = pyproj.Transformer.from_crs(EPSG3857, EPSG4326, always_xy=True).transform
src_nw = transform(project, shape(wkt.loads(SRC_NW_WKT)))
src_sw = transform(project, shape(wkt.loads(SRC_SW_WKT)))
dst_nw = transform(project, shape(wkt.loads(DST_NW_WKT)))
dst_sw = transform(project, shape(wkt.loads(DST_SW_WKT)))
xoff = dst_nw.coords[0][0] - src_nw.coords[0][0]
yoff = dst_nw.coords[0][1] - src_nw.coords[0][1]
translated_but_unrotated_src_sw = shapely.affinity.translate(src_sw, xoff, yoff)
rotation_radians = angle_between([dst_nw, translated_but_unrotated_src_sw], [dst_nw, dst_sw])
def fix_geometry(g):
g = transform(project, g)
g = shapely.affinity.translate(g, xoff, yoff)
g = shapely.affinity.rotate(g, rotation_radians, origin=dst_nw, use_radians=True)
g = transform(iproject, g)
return g
query = (
select([
t.FieldDataFieldFarmGeofield.entity_type,
t.FieldDataFieldFarmGeofield.entity_id,
t.FieldDataFieldFarmGeofield.deleted,
t.FieldDataFieldFarmGeofield.delta,
t.FieldDataFieldFarmGeofield.language,
t.FieldDataFieldFarmGeofield.field_farm_geofield_geom,
])
.select_from(t.FieldDataFieldFarmGeofield)
)
async with db.engine.connect() as conn:
async with conn.begin():
res = await (await conn.execute(query)).fetchall()
for geofield in res:
geometry = geofield['field_farm_geofield_geom']
if geometry is None:
continue
geometry = wkb.loads(geometry)
geometry_shape = shape(geometry)
if not geometry_shape.intersects(adj_bb):
continue
geometry_shape = fix_geometry(geometry_shape)
geometry_wkb = geometry_shape.wkb
geometry_type = geometry['type'].lower()
geohash = pygeohash.encode(geometry_shape.centroid.y, geometry_shape.centroid.x)
geometry_bounds = geometry_shape.bounds
data = dict(
b_entity_type=geofield['entity_type'],
b_entity_id=geofield['entity_id'],
b_deleted=geofield['deleted'],
b_delta=geofield['delta'],
b_language=geofield['language'],
field_farm_geofield_geom=geometry_wkb,
field_farm_geofield_geo_type=geometry_type,
field_farm_geofield_lat="{:.12f}".format(geometry_shape.centroid.y),
field_farm_geofield_lon="{:.12f}".format(geometry_shape.centroid.x),
field_farm_geofield_left="{:.12f}".format(geometry_bounds[0]),
field_farm_geofield_top="{:.12f}".format(geometry_bounds[1]),
field_farm_geofield_right="{:.12f}".format(geometry_bounds[2]),
field_farm_geofield_bottom="{:.12f}".format(geometry_bounds[3]),
field_farm_geofield_geohash=geohash
)
await conn.execute(update(t.FieldDataFieldFarmGeofield).where(and_(
t.FieldDataFieldFarmGeofield.entity_type == bindparam('b_entity_type'),
t.FieldDataFieldFarmGeofield.entity_id == bindparam('b_entity_id'),
t.FieldDataFieldFarmGeofield.deleted == bindparam('b_deleted'),
t.FieldDataFieldFarmGeofield.delta == bindparam('b_delta'),
t.FieldDataFieldFarmGeofield.language == bindparam('b_language'))), [data])
await conn.execute(update(t.FieldRevisionFieldFarmGeofield).where(and_(
t.FieldRevisionFieldFarmGeofield.entity_type == bindparam('b_entity_type'),
t.FieldRevisionFieldFarmGeofield.entity_id == bindparam('b_entity_id'),
t.FieldRevisionFieldFarmGeofield.deleted == bindparam('b_deleted'),
t.FieldRevisionFieldFarmGeofield.delta == bindparam('b_delta'),
t.FieldRevisionFieldFarmGeofield.language == bindparam('b_language'))), [data])
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--db-connect-spec", help="The specification for connecting to the db", type=str,
default='mysql+pymysql://farm:farm@db:3306/farm')
args = parser.parse_args()
logging.basicConfig(level=logging.DEBUG)
engine = sa.create_engine(
args.db_connect_spec,
# echo=True,
strategy=ASYNCIO_STRATEGY,
pool_pre_ping=True,
)
engine.hide_parameters = False
Base = automap_base()
# Copied from https://docs.sqlalchemy.org/en/13/orm/extensions/automap.html#overriding-naming-schemes
def camelize_classname(base, tablename, table):
"Produce a 'camelized' class name, e.g. "
"'words_and_underscores' -> 'WordsAndUnderscores'"
return str(tablename[0].upper() + \
re.sub(r'_([a-z])', lambda m: m.group(1).upper(), tablename[1:]))
Base.prepare(engine.sync_engine, reflect=True, classname_for_table=camelize_classname)
db = DictAccessor({
'engine': engine,
'Base': Base,
'tables': Base.classes,
})
import handlers # @UnusedImport
loop = asyncio.get_event_loop()
loop.run_until_complete(async_main(db))
if __name__ == '__main__':
main()
pyproject.toml
[tool.poetry]
name = "farmos-ad-hoc-script"
version = "0.1.0"
description = ""
authors = ["Your Name <you@example.com>"]
[tool.poetry.dependencies]
python = "^3.8"
sqlalchemy = "^1.3.16"
sqlalchemy_aio = "^0.15.0"
PyMySQL = "^0.9.3"
aiohttp = "^3.6.2"
aiohttp_jinja2 = "^1.2.0"
geomet = "^0.2.1"
pygeohash = "^1.2.0"
shapely = "^1.7.0"
pyproj = "^3.0.1"
[tool.poetry.dev-dependencies]
[build-system]
requires = ["poetry>=0.12"]
build-backend = "poetry.masonry.api"