Translating + Rotating Historical Geometry

I recently discovered (due to new aerial imagery becoming available) that a portion of all the geometry data we have in farmOS was both translated and rotated relative to its real-life location.

I thought I’d share how I fixed it since it might be interesting to other folks.

Notes;

  • Coordinates have been replaced with example values.
  • The code isn’t super beautiful and there’s no comments since this was a one-off script, but feel free to ask questions and I’ll spend time answering them and/or adding comments according to folks’ interest.
  • This approach reads/writes directly to the database and could destroy all your data - you’ve now been warned!
  • This only works in farmOS 1.x. In 2.x the script would need to manipulate several tables since intrinsic/movement geometries are stored separately in 2.x.

rewrite_lower_field_geometries.py

#!/bin/env python3

import argparse
import logging
import traceback
import asyncio
from math import sin, cos, tan, pi
import math
import re
import os.path
import time
import datetime

import pyproj

from geomet import wkb, wkt
import pygeohash
import shapely
from shapely.geometry import shape
from shapely.ops import transform
from sqlalchemy import (
    select,
)
from sqlalchemy import func, and_, distinct, Float
import sqlalchemy as sa
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.sql.expression import select, join, Label, cast, update, bindparam
from sqlalchemy_aio import ASYNCIO_STRATEGY


class CallableAccessor(object):
    def __init__(self, c):
        self._c = c

    def __getattr__(self, name):
        return self._c(name)

def DictAccessor(d):
    return CallableAccessor(lambda name: d.get(name))

EPSG4326 = pyproj.CRS('EPSG:4326')
EPSG3857 = pyproj.CRS('EPSG:3857')

ADJ_BB_WKT = 'POLYGON ((-64.9264683076 22.3661204274, -64.9252666779 22.3661204274, -64.9252666779 22.3650212285, -64.9264683076 22.3650212285, -64.9264683076 22.3661204274))'

SRC_NW_WKT = "POINT(-64.92607867061 22.36609202721699)"
SRC_SW_WKT = "POINT(-64.9260662941 22.365790632528)"
DST_NW_WKT = "POINT(-64.92610355235188 22.36606679499702)"
DST_SW_WKT = "POINT(-64.92612232781495 22.365766532392354)"

def angle_between(line_a, line_b):
    (ax, ay), (cx, cy) = line_a[0].coords[0], line_a[1].coords[0]
    (bx, by), (dx, dy) = line_b[0].coords[0], line_b[1].coords[0]

    α0 = math.atan2(cy - ay, cx - ax)
    α1 = math.atan2(dy - by, dx - bx)

    return α1 - α0

async def async_main(db):
    t = db.tables

    adj_bb = shape(wkt.loads(ADJ_BB_WKT))

    project = pyproj.Transformer.from_crs(EPSG4326, EPSG3857, always_xy=True).transform
    iproject = pyproj.Transformer.from_crs(EPSG3857, EPSG4326, always_xy=True).transform

    src_nw = transform(project, shape(wkt.loads(SRC_NW_WKT)))
    src_sw = transform(project, shape(wkt.loads(SRC_SW_WKT)))

    dst_nw = transform(project, shape(wkt.loads(DST_NW_WKT)))
    dst_sw = transform(project, shape(wkt.loads(DST_SW_WKT)))

    xoff = dst_nw.coords[0][0] - src_nw.coords[0][0]
    yoff = dst_nw.coords[0][1] - src_nw.coords[0][1]

    translated_but_unrotated_src_sw = shapely.affinity.translate(src_sw, xoff, yoff)

    rotation_radians = angle_between([dst_nw, translated_but_unrotated_src_sw], [dst_nw, dst_sw])

    def fix_geometry(g):
        g = transform(project, g)
        g = shapely.affinity.translate(g, xoff, yoff)
        g = shapely.affinity.rotate(g, rotation_radians, origin=dst_nw, use_radians=True)
        g = transform(iproject, g)
        return g

    query = (
        select([
            t.FieldDataFieldFarmGeofield.entity_type,
            t.FieldDataFieldFarmGeofield.entity_id,
            t.FieldDataFieldFarmGeofield.deleted,
            t.FieldDataFieldFarmGeofield.delta,
            t.FieldDataFieldFarmGeofield.language,
            t.FieldDataFieldFarmGeofield.field_farm_geofield_geom,
        ])
        .select_from(t.FieldDataFieldFarmGeofield)
    )

    async with db.engine.connect() as conn:
        async with conn.begin():

            res = await (await conn.execute(query)).fetchall()

            for geofield in res:
                geometry = geofield['field_farm_geofield_geom']
                if geometry is None:
                    continue

                geometry = wkb.loads(geometry)
        
                geometry_shape = shape(geometry)
        
                if not geometry_shape.intersects(adj_bb):
                    continue
        
                geometry_shape = fix_geometry(geometry_shape)
        
                geometry_wkb = geometry_shape.wkb
                geometry_type = geometry['type'].lower()

                geohash = pygeohash.encode(geometry_shape.centroid.y, geometry_shape.centroid.x)
        
                geometry_bounds = geometry_shape.bounds
        
                data = dict(
                    b_entity_type=geofield['entity_type'],
                    b_entity_id=geofield['entity_id'],
                    b_deleted=geofield['deleted'],
                    b_delta=geofield['delta'],
                    b_language=geofield['language'],
                    field_farm_geofield_geom=geometry_wkb,
                    field_farm_geofield_geo_type=geometry_type,
                    field_farm_geofield_lat="{:.12f}".format(geometry_shape.centroid.y),
                    field_farm_geofield_lon="{:.12f}".format(geometry_shape.centroid.x),
                    field_farm_geofield_left="{:.12f}".format(geometry_bounds[0]),
                    field_farm_geofield_top="{:.12f}".format(geometry_bounds[1]),
                    field_farm_geofield_right="{:.12f}".format(geometry_bounds[2]),
                    field_farm_geofield_bottom="{:.12f}".format(geometry_bounds[3]),
                    field_farm_geofield_geohash=geohash
                )

                await conn.execute(update(t.FieldDataFieldFarmGeofield).where(and_(
                              t.FieldDataFieldFarmGeofield.entity_type == bindparam('b_entity_type'),
                              t.FieldDataFieldFarmGeofield.entity_id == bindparam('b_entity_id'),
                              t.FieldDataFieldFarmGeofield.deleted == bindparam('b_deleted'),
                              t.FieldDataFieldFarmGeofield.delta == bindparam('b_delta'),
                              t.FieldDataFieldFarmGeofield.language == bindparam('b_language'))), [data])

                await conn.execute(update(t.FieldRevisionFieldFarmGeofield).where(and_(
                              t.FieldRevisionFieldFarmGeofield.entity_type == bindparam('b_entity_type'),
                              t.FieldRevisionFieldFarmGeofield.entity_id == bindparam('b_entity_id'),
                              t.FieldRevisionFieldFarmGeofield.deleted == bindparam('b_deleted'),
                              t.FieldRevisionFieldFarmGeofield.delta == bindparam('b_delta'),
                              t.FieldRevisionFieldFarmGeofield.language == bindparam('b_language'))), [data])

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--db-connect-spec", help="The specification for connecting to the db", type=str,
                        default='mysql+pymysql://farm:farm@db:3306/farm')
    args = parser.parse_args()

    logging.basicConfig(level=logging.DEBUG)

    engine = sa.create_engine(
        args.db_connect_spec,
        # echo=True,
        strategy=ASYNCIO_STRATEGY,
        pool_pre_ping=True,
    )
    engine.hide_parameters = False
    Base = automap_base()

    # Copied from https://docs.sqlalchemy.org/en/13/orm/extensions/automap.html#overriding-naming-schemes
    def camelize_classname(base, tablename, table):
        "Produce a 'camelized' class name, e.g. "
        "'words_and_underscores' -> 'WordsAndUnderscores'"

        return str(tablename[0].upper() + \
                re.sub(r'_([a-z])', lambda m: m.group(1).upper(), tablename[1:]))

    Base.prepare(engine.sync_engine, reflect=True, classname_for_table=camelize_classname)

    db = DictAccessor({
        'engine': engine,
        'Base': Base,
        'tables': Base.classes,
    })

    import handlers  # @UnusedImport

    loop = asyncio.get_event_loop()
    loop.run_until_complete(async_main(db))

if __name__ == '__main__':
    main()

pyproject.toml

[tool.poetry]
name = "farmos-ad-hoc-script"
version = "0.1.0"
description = ""
authors = ["Your Name <you@example.com>"]

[tool.poetry.dependencies]
python = "^3.8"
sqlalchemy = "^1.3.16"
sqlalchemy_aio = "^0.15.0"
PyMySQL = "^0.9.3"
aiohttp = "^3.6.2"
aiohttp_jinja2 = "^1.2.0"
geomet = "^0.2.1"
pygeohash = "^1.2.0"
shapely = "^1.7.0"
pyproj = "^3.0.1"

[tool.poetry.dev-dependencies]

[build-system]
requires = ["poetry>=0.12"]
build-backend = "poetry.masonry.api"
2 Likes

Interesting! Thanks for sharing :smiley:

I’m surprised how straight forward the translation + rotation logic is (which is easy to say considering I didn’t have to figure it out from scratch - ha!). Most of the code is just wiring up sqlalchemy and is equally interesting…I’ve been curious how that might be done.

Question: would it be possible to do something like this in QGIS and update the database via farm_wfs or similar? Assuming there’s a reason you took this approach though.

1 Like

I’ll admit there were a few head-scratching moments while I was working it out for sure. The biggest one was that I needed to reproject the geometries to EPSG:4326 in order to avoid some horrible skewing distortion. (presumably this would depend on where in the world you are)

Good question!

Yeah, that would work but the 1.x version of my WFS module only supports areas and I wanted to update all the areas/plantings/logs (historical and current) within the bounding box to match their actual locations.

With 2.x the story would be better. The 2.x WFS module could update any asset’s location, but it still wouldn’t update the historical log geometries.

2 Likes