fayezor of Gamalon
4/21/2020 - 12:03 AM

Remove node descriptions that match node labels

import os
import warnings
import json
from pathlib import Path

from typing import Dict, Tuple

import deku
from deku.catalog import Catalog
from deku.fb_util import fetch_catalog
from deku.catalog import json_to_catalog, catalog_to_json

FIXTURES = Path('tests/fixtures/')

ORGANIZATION_ID = (
    'askcodyondev' if os.environ['PROFILE'] == 'dev'
    else 'askcody'
)


def get_catalog(
        app_id: int,
) -> Tuple[
        Catalog,
        Dict[str, Dict[str, str]],
        Dict[Tuple[str, str], str],
]:
    """Fetch catalog that has been saved in Firestore.

    The catalog is accessed via `app_id`, which is determined
    by the PROFILE environment variable that the user has set.

    This function returns
    - catalog: the Catalog object which has been fetched from Firestore
    - node_descriptions: descriptions of nodes, in the following dict format
        {
            'entity_type': {
                'product1': 'description of product1',
                'product2': 'description of product2',
            }
        }
    - edge_descriptions: descriptions of edges, in the following dict format
        {
            ('origin', 'target'): 'description of edge'
        }

    """
    catalog_info = fetch_catalog(
        ORGANIZATION_ID,
        app_id,
    )
    catalog = catalog_info['dekuCatalog']

    node_descriptions: Dict[str, Dict[str, str]] = {}
    for entity_type, name, description, _ in catalog['entities']:
        node_descriptions.setdefault(entity_type, {})[name] = description

    edge_descriptions = {}
    for item in catalog['graph']['links']:
        edge_descriptions[item['source'], item['target']] = \
            item['attr_dict']['description']

    catalog = json_to_catalog(catalog)
    return catalog, node_descriptions, edge_descriptions


def update_entity_by_description(
        catalog: deku.catalog.Catalog,
        node_descriptions: Dict[str, Dict[str, str]],
) -> deku.catalog.Catalog:
    """removes in place"""

    # Find the entities whose description needs to be updated
    entities_to_update = []
    for entity_type in node_descriptions.keys():
        for k, v in node_descriptions[entity_type].items():
            if k == v:
                entities_to_update.append(k)
    
    for entity_label in entities_to_update:
        # Find the entity to update by label
        old_entity = catalog.find_entity(entity_label)
        if old_entity is None:
            raise ValueError('No entity with this label was found.')

        # Remove the old entity from the set of entities and add the new one.
        type_, label, description, synonyms = old_entity
        catalog.entities.remove(old_entity)
        catalog.add_entity(type_, label, '', set(synonyms))

    return catalog


def main():
    app_id = None

    try:
        if os.environ['PROFILE'] == 'dev':
            app_id = 27
        elif os.environ['PROFILE'] == 'integration':
            app_id = 19
        elif os.environ['PROFILE'] == 'staging':
            app_id = None
        elif os.environ['PROFILE'] == 'production':
            app_id = 23
        else:
            msg = (f'PROFILE env variable {os.environ["PROFILE"]} '
                   + 'not recognized. PROFILE must be one of: '
                   + 'dev, integration, staging, production')
            raise ValueError(msg)
    except KeyError:
        warnings.warn('No PROFILE environment variable is set.')

    catalog, node_descriptions, edge_descriptions = \
        get_catalog(app_id)

    update_entity_by_description(catalog, node_descriptions)

    with open(FIXTURES / 'askcody_catalog.json', 'w') as f:
        json.dump(catalog_to_json(catalog), f)


if __name__ == "__main__":
    main()