Source code for pyUSPTO.models.enriched_citations

"""models.enriched_citations - Data models for USPTO Enriched Citations API.

This module provides data models for representing responses from the USPTO
Enriched Cited Reference Metadata API (v3). These models cover enriched citation
records extracted from patent office actions using AI/NLP algorithms.
"""

import json
from dataclasses import dataclass, field
from datetime import datetime
from enum import Enum
from typing import Any

from pyUSPTO.models.utils import (
    parse_to_datetime_utc,
    serialize_datetime_as_naive,
)


# --- Enums for Categorical Data ---

[docs]
class CitationCategoryCode(Enum):
    """Citation category codes indicating the relevance of cited documents.

    These are standard patent citation categories used in search reports:
        X - Particularly relevant if taken alone
        Y - Particularly relevant if combined with another document
        A - Technological background
        E - Earlier patent document published on or after the filing date
        L - Document cited for other reasons
        O - Non-written disclosure
        T - Theory or principle underlying the invention
        P - Intermediate document
        & - Member of the same patent family
        D - Document cited in the application
    """

    X = "X"
    Y = "Y"
    A = "A"
    E = "E"
    L = "L"
    O = "O"  # noqa: E741
    T = "T"
    P = "P"
    AMPERSAND = "&"
    D = "D"

    @classmethod
    def _missing_(cls, value: Any) -> "CitationCategoryCode":
        """Handle case-insensitive lookup and ampersand alias."""
        if isinstance(value, str):
            val_upper = value.upper()
            for member in cls:
                if member.value.upper() == val_upper:
                    return member
        raise ValueError(f"{value!r} is not a valid {cls.__name__}")



# --- Data Models ---

[docs]
@dataclass(frozen=True)
class EnrichedCitation:
    """Represent a single enriched citation record from an office action.

    Attributes:
        id: Unique identifier for this citation record.
        patent_application_number: The application number (series code + serial number).
        cited_document_identifier: Identification of the cited patent document.
        publication_number: Publication number of the cited document.
        kind_code: Kind code of the cited document (e.g., "A1", "B2").
        country_code: Country code of the cited document.
        inventor_name_text: Inventor or owner name from the cited document.
        office_action_date: The date the office action was recorded.
        office_action_category: Category of the office action (e.g., "CTNF", "CTFR").
        citation_category_code: Relevance category code (X, Y, A, E, L, O, T, P, &, D).
        related_claim_number_text: Comma-separated claim numbers related to this citation.
        examiner_cited_reference_indicator: Whether the reference was cited by the examiner (Form PTO-892).
        applicant_cited_examiner_reference_indicator: Whether the citation was from Form PTO-1449.
        npl_indicator: Whether this is a non-patent literature citation.
        work_group_number: The work group number.
        group_art_unit_number: Four-digit art unit code for examiner assignment.
        tech_center: Technology center code (first two digits of art unit).
        quality_summary_text: Quality summary of the review status.
        passage_location_text: Pipe-delimited passage locations related to the citation.
        obsolete_document_identifier: Legacy document identifier from the IFW repository.
        create_user_identifier: Job identifier that created this record.
        create_date_time: Date and time the record was inserted in the database.
    """

    id: str = ""
    patent_application_number: str | None = None
    cited_document_identifier: str | None = None
    publication_number: str | None = None
    kind_code: str | None = None
    country_code: str | None = None
    inventor_name_text: str | None = None
    office_action_date: datetime | None = None
    office_action_category: str | None = None
    citation_category_code: str | None = None
    related_claim_number_text: str | None = None
    examiner_cited_reference_indicator: bool | None = None
    applicant_cited_examiner_reference_indicator: bool | None = None
    npl_indicator: bool | None = None
    work_group_number: str | None = None
    group_art_unit_number: str | None = None
    tech_center: str | None = None
    quality_summary_text: str | None = None
    passage_location_text: list[str] = field(default_factory=list)
    obsolete_document_identifier: str | None = None
    create_user_identifier: str | None = None
    create_date_time: datetime | None = None


[docs]
    @classmethod
    def from_dict(cls, data: dict[str, Any]) -> "EnrichedCitation":
        """Create an EnrichedCitation instance from a dictionary.

        Args:
            data: Dictionary containing enriched citation data from API response.

        Returns:
            EnrichedCitation: An instance of EnrichedCitation.
        """
        # Defensive handling for passage_location_text
        passage_location = data.get("passageLocationText", [])
        if not isinstance(passage_location, list):
            passage_location = []

        return cls(
            id=data.get("id", ""),
            patent_application_number=data.get("patentApplicationNumber"),
            cited_document_identifier=data.get("citedDocumentIdentifier"),
            publication_number=data.get("publicationNumber"),
            kind_code=data.get("kindCode"),
            country_code=data.get("countryCode"),
            inventor_name_text=data.get("inventorNameText"),
            office_action_date=parse_to_datetime_utc(data.get("officeActionDate")),
            office_action_category=data.get("officeActionCategory"),
            citation_category_code=data.get("citationCategoryCode"),
            related_claim_number_text=data.get("relatedClaimNumberText"),
            examiner_cited_reference_indicator=data.get(
                "examinerCitedReferenceIndicator"
            ),
            applicant_cited_examiner_reference_indicator=data.get(
                "applicantCitedExaminerReferenceIndicator"
            ),
            npl_indicator=data.get("nplIndicator"),
            work_group_number=data.get("workGroupNumber"),
            group_art_unit_number=data.get("groupArtUnitNumber"),
            tech_center=data.get("techCenter"),
            quality_summary_text=data.get("qualitySummaryText"),
            passage_location_text=passage_location,
            obsolete_document_identifier=data.get("obsoleteDocumentIdentifier"),
            create_user_identifier=data.get("createUserIdentifier"),
            create_date_time=parse_to_datetime_utc(data.get("createDateTime")),
        )



[docs]
    def to_dict(self) -> dict[str, Any]:
        """Convert the EnrichedCitation instance to a dictionary.

        Returns:
            Dict[str, Any]: Dictionary representation with camelCase keys.
        """
        d = {
            "id": self.id,
            "patentApplicationNumber": self.patent_application_number,
            "citedDocumentIdentifier": self.cited_document_identifier,
            "publicationNumber": self.publication_number,
            "kindCode": self.kind_code,
            "countryCode": self.country_code,
            "inventorNameText": self.inventor_name_text,
            "officeActionDate": (
                serialize_datetime_as_naive(self.office_action_date)
                if self.office_action_date
                else None
            ),
            "officeActionCategory": self.office_action_category,
            "citationCategoryCode": self.citation_category_code,
            "relatedClaimNumberText": self.related_claim_number_text,
            "examinerCitedReferenceIndicator": self.examiner_cited_reference_indicator,
            "applicantCitedExaminerReferenceIndicator": self.applicant_cited_examiner_reference_indicator,
            "nplIndicator": self.npl_indicator,
            "workGroupNumber": self.work_group_number,
            "groupArtUnitNumber": self.group_art_unit_number,
            "techCenter": self.tech_center,
            "qualitySummaryText": self.quality_summary_text,
            "passageLocationText": self.passage_location_text,
            "obsoleteDocumentIdentifier": self.obsolete_document_identifier,
            "createUserIdentifier": self.create_user_identifier,
            "createDateTime": (
                serialize_datetime_as_naive(self.create_date_time)
                if self.create_date_time
                else None
            ),
        }
        return {
            k: v
            for k, v in d.items()
            if v is not None and (not isinstance(v, list) or v)
        }





[docs]
@dataclass(frozen=True)
class EnrichedCitationResponse:
    """Response from the Enriched Citations API search endpoint.

    The API returns a Solr-style response with `start`, `numFound`, and `docs`.
    The outer envelope key is `"response"`.

    Attributes:
        num_found: Total number of matching records.
        start: The start index of the first result in this page.
        docs: List of enriched citation records in this page.
        raw_data: Optional raw JSON data from the API response (for debugging).
    """

    num_found: int = 0
    start: int = 0
    docs: list[EnrichedCitation] = field(default_factory=list)
    raw_data: str | None = field(default=None, compare=False, repr=False)

    @property
    def count(self) -> int:
        """Return total result count for pagination compatibility."""
        return self.num_found


[docs]
    @classmethod
    def from_dict(
        cls, data: dict[str, Any], include_raw_data: bool = False
    ) -> "EnrichedCitationResponse":
        """Create an EnrichedCitationResponse instance from a dictionary.

        Handles both the raw API envelope (``{"response": {...}}``) and
        a pre-unwrapped dictionary.

        Args:
            data: Dictionary containing API response data.
            include_raw_data: If True, store the raw JSON for debugging.

        Returns:
            EnrichedCitationResponse: An instance of EnrichedCitationResponse.
        """
        # Unwrap the outer "response" envelope if present
        inner = data.get("response", data)

        # Parse citation docs
        docs_data = inner.get("docs", [])
        docs = (
            [
                EnrichedCitation.from_dict(doc)
                for doc in docs_data
                if isinstance(doc, dict)
            ]
            if isinstance(docs_data, list)
            else []
        )

        return cls(
            num_found=inner.get("numFound", 0),
            start=inner.get("start", 0),
            docs=docs,
            raw_data=json.dumps(data) if include_raw_data else None,
        )



[docs]
    def to_dict(self) -> dict[str, Any]:
        """Convert the EnrichedCitationResponse instance to a dictionary.

        Returns:
            Dict[str, Any]: Dictionary representation with camelCase keys,
                wrapped in the ``"response"`` envelope matching the API format.
        """
        return {
            "response": {
                "numFound": self.num_found,
                "start": self.start,
                "docs": [doc.to_dict() for doc in self.docs],
            }
        }





[docs]
@dataclass(frozen=True)
class EnrichedCitationFieldsResponse:
    """Response from the Enriched Citations API fields endpoint.

    Contains metadata about the API including available field names
    and the last data update timestamp.

    Attributes:
        api_key: The dataset key (e.g., "enriched_cited_reference_metadata").
        api_version_number: API version (e.g., "v3").
        api_url: The URL of this fields endpoint.
        api_documentation_url: URL to the Swagger documentation.
        api_status: Publication status (e.g., "PUBLISHED").
        field_count: Number of available fields.
        fields: List of available field names.
        last_data_updated_date: Timestamp of the last data update (non-standard format).
    """

    api_key: str | None = None
    api_version_number: str | None = None
    api_url: str | None = None
    api_documentation_url: str | None = None
    api_status: str | None = None
    field_count: int = 0
    fields: list[str] = field(default_factory=list)
    last_data_updated_date: str | None = None


[docs]
    @classmethod
    def from_dict(
        cls, data: dict[str, Any], include_raw_data: bool = False
    ) -> "EnrichedCitationFieldsResponse":
        """Create an EnrichedCitationFieldsResponse instance from a dictionary.

        Args:
            data: Dictionary containing API response data.
            include_raw_data: Unused. Present for FromDictProtocol conformance.

        Returns:
            EnrichedCitationFieldsResponse: An instance of EnrichedCitationFieldsResponse.
        """
        fields_data = data.get("fields", [])
        if not isinstance(fields_data, list):
            fields_data = []

        return cls(
            api_key=data.get("apiKey"),
            api_version_number=data.get("apiVersionNumber"),
            api_url=data.get("apiUrl"),
            api_documentation_url=data.get("apiDocumentationUrl"),
            api_status=data.get("apiStatus"),
            field_count=data.get("fieldCount", 0),
            fields=fields_data,
            last_data_updated_date=data.get("lastDataUpdatedDate"),
        )



[docs]
    def to_dict(self) -> dict[str, Any]:
        """Convert the EnrichedCitationFieldsResponse instance to a dictionary.

        Returns:
            Dict[str, Any]: Dictionary representation with camelCase keys.
        """
        d = {
            "apiKey": self.api_key,
            "apiVersionNumber": self.api_version_number,
            "apiUrl": self.api_url,
            "apiDocumentationUrl": self.api_documentation_url,
            "apiStatus": self.api_status,
            "fieldCount": self.field_count,
            "fields": self.fields,
            "lastDataUpdatedDate": self.last_data_updated_date,
        }
        return {
            k: v
            for k, v in d.items()
            if v is not None and (not isinstance(v, list) or v)
        }