A production Elasticsearch database got out of sync with the development schema. I wanted to migrate the data to the new schema, so the first step was to find all the differences. I couldn’t find any tool online so I decided to create a solution using the python deepdiff
module.
First we just get the individual schemas as JSON files:
# get local development version
curl -X GET "http://localhost:9200/my_index/_mapping?pretty" > my_index_schema-dev.json
# get production version on server
curl -X GET "http://prod_host:9200/my_index/_mapping?pretty" > my_index_schema-prod.json
Now we can use the deepdiff
python module to compare the schemas:
import argparse
import json
from deepdiff import DeepDiff
def load_json_file(file_path: str):
"""Load JSON data from a file."""
with open(file_path, 'r') as file:
return json.load(file)
def compare_schemas(schema1: str, schema2: str):
"""Compare two Elasticsearch index schemas."""
diff = DeepDiff(schema1, schema2, ignore_order=True)
return diff.pretty()
def main(schema1_path: str, schema2_path: str):
schema1 = load_json_file(schema1_path)
schema2 = load_json_file(schema2_path)
differences = compare_schemas(schema1, schema2)
print("Differences between the two schemas:")
print(differences)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Compare Elasticsearch index schemas.')
parser.add_argument('schema1_path', type=str, help='Path to the first schema.')
parser.add_argument('schema2_path', type=str, help='Path to the second schema.')
args = parser.parse_args()
main(args.schema1_path, args.schema2_path)
Be First to Comment