Skip to content

Graph Module

The core module containing the NarrativeGraph class and related data structures.

NarrativeGraph

Bases: BaseGraph

Full narrative graph with triplet extraction, relations, and co-occurrences.

NarrativeGraph extracts subject-predicate-object triplets from text documents and builds both a directed relation graph and an undirected co-occurrence graph.

Source code in narrativegraphs/graphs.py
class NarrativeGraph(BaseGraph):
    """Full narrative graph with triplet extraction, relations, and co-occurrences.

    NarrativeGraph extracts subject-predicate-object triplets from text documents
    and builds both a directed relation graph and an undirected co-occurrence graph.
    """

    def __init__(
        self,
        triplet_extractor: TripletExtractor = None,
        cooccurrence_extractor: CooccurrenceExtractor = None,
        entity_mapper: Mapper = None,
        predicate_mapper: Mapper = None,
        sqlite_db_path: str = None,
        on_existing_db: Literal["stop", "overwrite", "reuse"] = "stop",
        n_cpu: int = 1,
    ):
        """Initialize a NarrativeGraph.

        Args:
            triplet_extractor: Extractor for subject-predicate-object triplets.
            cooccurrence_extractor: Extractor for entity co-occurrences.
            entity_mapper: Mapper for entity normalization.
            predicate_mapper: Mapper for predicate normalization.
            sqlite_db_path: Path to SQLite database file. If None, uses in-memory DB.
            on_existing_db: Behavior when database exists:
                - "stop": Raise error if DB contains data
                - "overwrite": Delete existing DB
                - "reuse": Use existing DB data
            n_cpu: Number of CPUs for parallel processing (-1 for all).
        """
        super().__init__(sqlite_db_path, on_existing_db)
        self._pipeline = Pipeline(
            self._engine,
            triplet_extractor=triplet_extractor,
            cooccurrence_extractor=cooccurrence_extractor,
            entity_mapper=entity_mapper,
            predicate_mapper=predicate_mapper,
            n_cpu=n_cpu,
        )

    def fit(
        self,
        docs: list[str],
        doc_ids: list[int | str] = None,
        timestamps: list[datetime | date] = None,
        categories: (
            list[str | list[str]]
            | dict[str, list[str | list[str]]]
            | list[dict[str, str | list[str]]]
        ) = None,
    ) -> "NarrativeGraph":
        """
        Fit a narrative graph from documents. The docs can be accompanied by lists with
        the same length of IDs, timestamps and categories.

        Args:
            docs: Required argument, a list of documents as strings.
            doc_ids: Optional list of document ids. Same length as docs.
            timestamps: Optional list of document timestamps. Same length as docs.
            categories: Optional list of document categories. Supports single or
                multiple categories. A document can have a single or multiple labels
                per category. See further down for examples.

        Returns:
            A fitted NarrativeGraph instance.

        """
        self._pipeline.run(
            docs,
            doc_ids=doc_ids,
            timestamps=timestamps,
            categories=categories,
        )
        return self

    @property
    def predicates_(self) -> pd.DataFrame:
        """Predicates as a pandas DataFrame."""
        return self.predicates.as_df()

    @property
    def relations_(self) -> pd.DataFrame:
        """Relations as a pandas DataFrame."""
        return self.relations.as_df()

    @property
    def triplets_(self) -> pd.DataFrame:
        """Triplets as a pandas DataFrame."""
        return self.triplets.as_df()

    @property
    def relation_graph_(self) -> nx.DiGraph:
        """The full relation graph as a directed NetworkX graph."""
        rg = self.graph.get_graph("relation")
        g = nx.DiGraph()
        g.add_nodes_from((n.id, n) for n in rg.nodes)
        g.add_edges_from((e.from_id, e.to_id, e) for e in rg.edges)
        return g

    @classmethod
    def load(cls, file_path: str) -> "NarrativeGraph":
        """

        Args:
            file_path: path to a SQLite database to load a NarrativeGraph from.

        Returns:
            A NarrativeGraph object
        """
        return super().load(file_path)  # noqa

predicates_ property

Predicates as a pandas DataFrame.

relations_ property

Relations as a pandas DataFrame.

triplets_ property

Triplets as a pandas DataFrame.

relation_graph_ property

The full relation graph as a directed NetworkX graph.

__init__(triplet_extractor=None, cooccurrence_extractor=None, entity_mapper=None, predicate_mapper=None, sqlite_db_path=None, on_existing_db='stop', n_cpu=1)

Initialize a NarrativeGraph.

Parameters:

Name Type Description Default
triplet_extractor TripletExtractor

Extractor for subject-predicate-object triplets.

None
cooccurrence_extractor CooccurrenceExtractor

Extractor for entity co-occurrences.

None
entity_mapper Mapper

Mapper for entity normalization.

None
predicate_mapper Mapper

Mapper for predicate normalization.

None
sqlite_db_path str

Path to SQLite database file. If None, uses in-memory DB.

None
on_existing_db Literal['stop', 'overwrite', 'reuse']

Behavior when database exists: - "stop": Raise error if DB contains data - "overwrite": Delete existing DB - "reuse": Use existing DB data

'stop'
n_cpu int

Number of CPUs for parallel processing (-1 for all).

1
Source code in narrativegraphs/graphs.py
def __init__(
    self,
    triplet_extractor: TripletExtractor = None,
    cooccurrence_extractor: CooccurrenceExtractor = None,
    entity_mapper: Mapper = None,
    predicate_mapper: Mapper = None,
    sqlite_db_path: str = None,
    on_existing_db: Literal["stop", "overwrite", "reuse"] = "stop",
    n_cpu: int = 1,
):
    """Initialize a NarrativeGraph.

    Args:
        triplet_extractor: Extractor for subject-predicate-object triplets.
        cooccurrence_extractor: Extractor for entity co-occurrences.
        entity_mapper: Mapper for entity normalization.
        predicate_mapper: Mapper for predicate normalization.
        sqlite_db_path: Path to SQLite database file. If None, uses in-memory DB.
        on_existing_db: Behavior when database exists:
            - "stop": Raise error if DB contains data
            - "overwrite": Delete existing DB
            - "reuse": Use existing DB data
        n_cpu: Number of CPUs for parallel processing (-1 for all).
    """
    super().__init__(sqlite_db_path, on_existing_db)
    self._pipeline = Pipeline(
        self._engine,
        triplet_extractor=triplet_extractor,
        cooccurrence_extractor=cooccurrence_extractor,
        entity_mapper=entity_mapper,
        predicate_mapper=predicate_mapper,
        n_cpu=n_cpu,
    )

fit(docs, doc_ids=None, timestamps=None, categories=None)

Fit a narrative graph from documents. The docs can be accompanied by lists with the same length of IDs, timestamps and categories.

Parameters:

Name Type Description Default
docs list[str]

Required argument, a list of documents as strings.

required
doc_ids list[int | str]

Optional list of document ids. Same length as docs.

None
timestamps list[datetime | date]

Optional list of document timestamps. Same length as docs.

None
categories list[str | list[str]] | dict[str, list[str | list[str]]] | list[dict[str, str | list[str]]]

Optional list of document categories. Supports single or multiple categories. A document can have a single or multiple labels per category. See further down for examples.

None

Returns:

Type Description
NarrativeGraph

A fitted NarrativeGraph instance.

Source code in narrativegraphs/graphs.py
def fit(
    self,
    docs: list[str],
    doc_ids: list[int | str] = None,
    timestamps: list[datetime | date] = None,
    categories: (
        list[str | list[str]]
        | dict[str, list[str | list[str]]]
        | list[dict[str, str | list[str]]]
    ) = None,
) -> "NarrativeGraph":
    """
    Fit a narrative graph from documents. The docs can be accompanied by lists with
    the same length of IDs, timestamps and categories.

    Args:
        docs: Required argument, a list of documents as strings.
        doc_ids: Optional list of document ids. Same length as docs.
        timestamps: Optional list of document timestamps. Same length as docs.
        categories: Optional list of document categories. Supports single or
            multiple categories. A document can have a single or multiple labels
            per category. See further down for examples.

    Returns:
        A fitted NarrativeGraph instance.

    """
    self._pipeline.run(
        docs,
        doc_ids=doc_ids,
        timestamps=timestamps,
        categories=categories,
    )
    return self

load(file_path) classmethod

Parameters:

Name Type Description Default
file_path str

path to a SQLite database to load a NarrativeGraph from.

required

Returns:

Type Description
NarrativeGraph

A NarrativeGraph object

Source code in narrativegraphs/graphs.py
@classmethod
def load(cls, file_path: str) -> "NarrativeGraph":
    """

    Args:
        file_path: path to a SQLite database to load a NarrativeGraph from.

    Returns:
        A NarrativeGraph object
    """
    return super().load(file_path)  # noqa