Rewriting farmOS 1.x geometry WKB to be little-endian

Recently I ran into an issue where my 1.x database included some geometry which was in big-endian WKB format. I think in my case this was a result of custom tooling I have that interacts directly with the database, but I also think it may be possible for vanilla farmOS 1.x to write such geometry in very rare cases.

Anyway, the 2.x geometry handling only works with little-endian WKB so I needed to fix my data.

You can see if you’re affected with an SQL query like this (on MariaDB - other RDBMS may require a slightly different query);

SELECT entity_type, entity_id, SUBSTR(HEX(field_farm_geofield_geom), 1, 20)
  FROM field_data_field_farm_geofield
  WHERE SUBSTR(HEX(field_farm_geofield_geom), 1, 2) = '00';

If that query returns any rows, it is likely that the 2.x migration will fail to copy those geometries.

To fix the problem, I used a python script that runs directly against the database;

WARNING: This code operates directly on the database and should not be run without adequate testing/validation on your part. You’ve been warned so it’s not my fault if this script ruins your data.

rewrite_fix_big_endian_geometries.py

#!/bin/env python3

# Usage: poetry run python rewrite_fix_big_endian_geometries.py --db-connect-spec='mysql+pymysql://farm:farm@localhost:3306/farm'

import datetime
from math import sin, cos, tan, pi
import math
import os.path
import sys
import time

import argparse
import asyncio
import logging
import re
import shapely.wkb
import shapely.wkt
from sqlalchemy import func, and_, distinct, Float
import sqlalchemy as sa
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.sql.expression import select, join, Label, cast, update, bindparam
from sqlalchemy_aio import ASYNCIO_STRATEGY


class CallableAccessor(object):
    def __init__(self, c):
        self._c = c

    def __getattr__(self, name):
        return self._c(name)

def DictAccessor(d):
    return CallableAccessor(lambda name: d.get(name))


async def async_main(db):
    t = db.tables

    query = (
        select([
            t.FieldDataFieldFarmGeofield.entity_type,
            t.FieldDataFieldFarmGeofield.entity_id,
            t.FieldDataFieldFarmGeofield.deleted,
            t.FieldDataFieldFarmGeofield.delta,
            t.FieldDataFieldFarmGeofield.language,
            t.FieldDataFieldFarmGeofield.field_farm_geofield_geom,
        ])
        .select_from(t.FieldDataFieldFarmGeofield)
    )

    async with db.engine.connect() as conn:
        async with conn.begin():

            res = await (await conn.execute(query)).fetchall()

            for geofield in res:
                geometry_wkb = geofield['field_farm_geofield_geom']
                if geometry_wkb is None:
                    continue

                if geometry_wkb[:1] == b'\x01':
                    continue

                geometry = shapely.wkb.loads(geometry_wkb)

                print("Rewriting geometry for:", geofield['entity_type'], geofield['entity_id'], geometry)

                new_geometry_wkb = shapely.wkb.dumps(geometry)

                if new_geometry_wkb[:1] != b'\x01':
                    print("ERROR: Updated geometry not little-endian:", new_geometry_wkb[0], new_geometry_wkb)
                    sys.exit(1)

                data = dict(
                    b_entity_type=geofield['entity_type'],
                    b_entity_id=geofield['entity_id'],
                    b_deleted=geofield['deleted'],
                    b_delta=geofield['delta'],
                    b_language=geofield['language'],
                    field_farm_geofield_geom=new_geometry_wkb,
                )

                await conn.execute(update(t.FieldDataFieldFarmGeofield).where(and_(
                              t.FieldDataFieldFarmGeofield.entity_type == bindparam('b_entity_type'),
                              t.FieldDataFieldFarmGeofield.entity_id == bindparam('b_entity_id'),
                              t.FieldDataFieldFarmGeofield.deleted == bindparam('b_deleted'),
                              t.FieldDataFieldFarmGeofield.delta == bindparam('b_delta'),
                              t.FieldDataFieldFarmGeofield.language == bindparam('b_language'))), [data])

                await conn.execute(update(t.FieldRevisionFieldFarmGeofield).where(and_(
                              t.FieldRevisionFieldFarmGeofield.entity_type == bindparam('b_entity_type'),
                              t.FieldRevisionFieldFarmGeofield.entity_id == bindparam('b_entity_id'),
                              t.FieldRevisionFieldFarmGeofield.deleted == bindparam('b_deleted'),
                              t.FieldRevisionFieldFarmGeofield.delta == bindparam('b_delta'),
                              t.FieldRevisionFieldFarmGeofield.language == bindparam('b_language'))), [data])

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--db-connect-spec", help="The specification for connecting to the db", type=str,
                        default='mysql+pymysql://farm:farm@db:3306/farm')
    args = parser.parse_args()

    logging.basicConfig(level=logging.DEBUG)

    engine = sa.create_engine(
        args.db_connect_spec,
        # echo=True,
        strategy=ASYNCIO_STRATEGY,
        pool_pre_ping=True,
    )
    engine.hide_parameters = False
    Base = automap_base()

    # Copied from https://docs.sqlalchemy.org/en/13/orm/extensions/automap.html#overriding-naming-schemes
    def camelize_classname(base, tablename, table):
        "Produce a 'camelized' class name, e.g. "
        "'words_and_underscores' -> 'WordsAndUnderscores'"

        return str(tablename[0].upper() + \
                re.sub(r'_([a-z])', lambda m: m.group(1).upper(), tablename[1:]))

    Base.prepare(engine.sync_engine, reflect=True, classname_for_table=camelize_classname)

    db = DictAccessor({
        'engine': engine,
        'Base': Base,
        'tables': Base.classes,
    })

    loop = asyncio.get_event_loop()
    loop.run_until_complete(async_main(db))

if __name__ == '__main__':
    main()

I’ve uploaded the full source as a gist to GitHub: rewrite_fix_big_endian_geometries · GitHub for reference.

3 Likes