"""clients.patent_data - Client for USPTO patent data API.
This module provides a client for interacting with the USPTO Patent Data API.
It allows you to search for and retrieve patent application data.
"""
import dataclasses
import os
import tempfile
import warnings
import zipfile
from collections.abc import Iterator
from typing import Any
import requests
from pyUSPTO.clients.base import BaseUSPTOClient
from pyUSPTO.config import USPTOConfig
from pyUSPTO.exceptions import FormatNotAvailableError
from pyUSPTO.models.patent_data import (
ApplicationContinuityData,
ApplicationMetaData,
Assignment,
Document,
DocumentBag,
DocumentFormat,
DocumentMimeType,
EventData,
ForeignPriority,
IFWResult,
PatentDataResponse,
PatentFileWrapper,
PatentTermAdjustmentData,
PrintedMetaData,
PrintedPublication,
RecordAttorney,
StatusCodeSearchResponse,
)
from pyUSPTO.warnings import USPTODataMismatchWarning
[docs]
class PatentDataClient(BaseUSPTOClient[PatentDataResponse]):
"""Client for interacting with the USPTO Patent Data API."""
ENDPOINTS = {
"search_applications": "api/v1/patent/applications/search",
"get_search_results": "api/v1/patent/applications/search/download",
"get_application_by_number": "api/v1/patent/applications/{application_number}",
"get_application_metadata": "api/v1/patent/applications/{application_number}/meta-data",
"get_application_adjustment": "api/v1/patent/applications/{application_number}/adjustment",
"get_application_assignment": "api/v1/patent/applications/{application_number}/assignment",
"get_application_attorney": "api/v1/patent/applications/{application_number}/attorney",
"get_application_continuity": "api/v1/patent/applications/{application_number}/continuity",
"get_application_foreign_priority": "api/v1/patent/applications/{application_number}/foreign-priority",
"get_application_transactions": "api/v1/patent/applications/{application_number}/transactions",
"get_application_documents": "api/v1/patent/applications/{application_number}/documents",
"get_application_associated_documents": "api/v1/patent/applications/{application_number}/associated-documents",
"download_application_document": "api/v1/download/applications/{application_number}/{document_id}",
"status_codes": "api/v1/patent/status-codes",
}
[docs]
def __init__(
self,
config: USPTOConfig | None = None,
base_url: str | None = None,
):
"""Initialize the PatentDataClient.
Args:
config: USPTOConfig instance containing API key and settings. If not provided,
creates config from environment variables (requires USPTO_API_KEY).
base_url: Optional base URL override for the USPTO Patent Data API.
If not provided, uses config.patent_data_base_url or default.
"""
# Use provided config or create from environment
if config is None:
self.config = USPTOConfig.from_env()
else:
self.config = config
# Determine effective base URL
effective_base_url = base_url or self.config.patent_data_base_url
# Initialize base client
super().__init__(
base_url=effective_base_url,
config=self.config,
)
[docs]
def sanitize_application_number(self, input_number: str) -> str:
"""Sanitize and validate a USPTO application number.
Application numbers are either:
- 8 digits (e.g., "16123456")
- Series code format: 2 digits + "/" + 6 digits (e.g., "08/123456")
- PCT format: "PCT/US2024/012345" → "PCTUS2412345"
This method removes common separators (commas, spaces) while preserving
the "/" in series code format.
Args:
input_number: Raw application number input. May include commas,
spaces, or other formatting.
Returns:
str: Sanitized application number (either "NNNNNNNN" or "NN/NNNNNN").
Raises:
ValueError: If the format is invalid.
Examples:
>>> client.sanitize_application_number("16123456")
"16123456"
>>> client.sanitize_application_number("16,123,456")
"16123456"
>>> client.sanitize_application_number("08/123456")
"08/123456"
>>> client.sanitize_application_number("08/123,456")
"08/123456"
"""
if not input_number or not input_number.strip():
raise ValueError("Application number cannot be empty")
raw = input_number.strip()
# --- NEW: Handle PCT formats ---
# Example: "PCT/US2024/012345" -> "PCTUS2412345"
if raw.startswith("PCT"):
parts = raw.split("/")
if len(parts) == 1:
# Already sanitized (e.g. "PCTUS0812705"), return as-is
return raw
if len(parts) != 3:
raise ValueError(
f"Invalid PCT application format: {input_number}. "
"Expected PCT/CCYYYY/NNNNNN"
)
_, country_year, serial = parts
# country_year can be "US2024" or "US24"
country = country_year[:2]
year_part = country_year[2:]
if not year_part.isdigit():
raise ValueError(
f"Invalid PCT year in: {country_year}. Must be digits."
)
# Normalize:
# "2024" -> "24"
# "24" -> "24"
if len(year_part) == 4:
year = year_part[-2:]
elif len(year_part) == 2:
year = year_part
else:
raise ValueError(
f"Invalid PCT year length in: {country_year}. "
"Expected CCYYYY or CCYY."
)
# Serial must be digits only
if not serial.isdigit():
raise ValueError(f"Invalid PCT serial: {serial}. Must be numeric.")
return f"PCT{country}{year}{serial.lstrip('0')}"
# Strip whitespace and remove commas/spaces
cleaned = raw.replace(",", "").replace(" ", "")
# Check if this is series code format (NN/NNNNNN)
if "/" in cleaned:
parts = cleaned.split("/")
if len(parts) != 2:
raise ValueError(
f"Invalid application number format: {input_number}. "
"Expected format: NNNNNNNN or NN/NNNNNN"
)
series, serial = parts
if not series.isdigit() or not serial.isdigit():
raise ValueError(
f"Invalid application number format: {input_number}. "
"Series and serial must be numeric."
)
if len(series) != 2 or len(serial) != 6:
raise ValueError(
f"Invalid application number format: {input_number}. "
"Expected series code format: NN/NNNNNN (2 digits / 6 digits)"
)
return cleaned
# Standard 8-digit format
if not cleaned.isdigit():
raise ValueError(
f"Invalid application number format: {input_number}. "
"Must contain only digits."
)
if len(cleaned) != 8:
raise ValueError(
f"Invalid application number format: {input_number}. Expected 8 digits."
)
return cleaned
def _get_wrapper_from_response(
self,
response_data: PatentDataResponse,
application_number_for_validation: str | None = None,
) -> PatentFileWrapper | None:
"""Extract a single PatentFileWrapper, optionally validating the app number."""
if not response_data or not response_data.patent_file_wrapper_data_bag:
return None
wrapper = response_data.patent_file_wrapper_data_bag[0]
if (
application_number_for_validation
and wrapper.application_number_text
!= self.sanitize_application_number(application_number_for_validation)
):
warnings.warn(
f"API returned application number '{wrapper.application_number_text}' "
f"but requested '{application_number_for_validation}'. "
f"This may indicate an API data inconsistency.",
USPTODataMismatchWarning,
stacklevel=2,
)
return wrapper
[docs]
def search_applications(
self,
query: str | None = None,
sort: str | None = None,
offset: int | None = 0,
limit: int | None = 25,
facets: str | None = None,
fields: str | None = None,
filters: str | None = None,
range_filters: str | None = None,
post_body: dict[str, Any] | None = None,
application_number_q: str | None = None,
patent_number_q: str | None = None,
inventor_name_q: str | None = None,
applicant_name_q: str | None = None,
assignee_name_q: str | None = None,
filing_date_from_q: str | None = None,
filing_date_to_q: str | None = None,
grant_date_from_q: str | None = None,
grant_date_to_q: str | None = None,
classification_q: str | None = None,
earliestPublicationNumber_q: str | None = None,
pctPublicationNumber_q: str | None = None,
additional_query_params: dict[str, Any] | None = None,
) -> PatentDataResponse:
"""Search for patent applications.
Can perform a GET request based on OpenAPI query parameters or a POST request if post_body is specified.
"""
endpoint = self.ENDPOINTS["search_applications"]
if post_body is not None:
result = self._get_model(
method="POST",
endpoint=endpoint,
response_class=PatentDataResponse,
json_data=post_body,
params=additional_query_params,
)
else:
params: dict[str, Any] = {}
final_q = query
if final_q is None:
q_parts = []
if application_number_q:
q_parts.append(f"applicationNumberText:{application_number_q}")
if patent_number_q:
q_parts.append(
f"applicationMetaData.patentNumber:{patent_number_q}"
)
if inventor_name_q:
v = (
f'"{inventor_name_q}"'
if " " in inventor_name_q
else inventor_name_q
)
q_parts.append(
f"applicationMetaData.inventorBag.inventorNameText:{v}"
)
if applicant_name_q:
v = (
f'"{applicant_name_q}"'
if " " in applicant_name_q
else applicant_name_q
)
q_parts.append(f"applicationMetaData.firstApplicantName:{v}")
if assignee_name_q:
v = (
f'"{assignee_name_q}"'
if " " in assignee_name_q
else assignee_name_q
)
q_parts.append(f"assignmentBag.assigneeBag.assigneeNameText:{v}")
if classification_q:
v = (
f'"{classification_q}"'
if any(c in classification_q for c in [" ", "/"])
else classification_q
)
q_parts.append(f"applicationMetaData.cpcClassificationBag:{v}")
if earliestPublicationNumber_q:
q_parts.append(
f"applicationMetaData.earliestPublicationNumber:{earliestPublicationNumber_q}"
)
if pctPublicationNumber_q:
q_parts.append(
f"applicationMetaData.pctPublicationNumber:{pctPublicationNumber_q}"
)
if filing_date_from_q and filing_date_to_q:
q_parts.append(
f"applicationMetaData.filingDate:[{filing_date_from_q} TO {filing_date_to_q}]"
)
elif filing_date_from_q:
q_parts.append(
f"applicationMetaData.filingDate:>={filing_date_from_q}"
)
elif filing_date_to_q:
q_parts.append(
f"applicationMetaData.filingDate:<={filing_date_to_q}"
)
if grant_date_from_q and grant_date_to_q:
q_parts.append(
f"applicationMetaData.grantDate:[{grant_date_from_q} TO {grant_date_to_q}]"
)
elif grant_date_from_q:
q_parts.append(
f"applicationMetaData.grantDate:>={grant_date_from_q}"
)
elif grant_date_to_q:
q_parts.append(f"applicationMetaData.grantDate:<={grant_date_to_q}")
if q_parts:
final_q = " AND ".join(q_parts)
if final_q is not None:
params["q"] = final_q
if sort is not None:
params["sort"] = sort
if offset is not None:
params["offset"] = offset
if limit is not None:
params["limit"] = limit
if facets is not None:
params["facets"] = facets
if fields is not None:
params["fields"] = fields
if filters is not None:
params["filters"] = filters
if range_filters is not None:
params["rangeFilters"] = range_filters
if additional_query_params:
params.update(additional_query_params)
result = self._get_model(
method="GET",
endpoint=endpoint,
response_class=PatentDataResponse,
params=params,
)
return result
[docs]
def get_search_results(
self,
query: str | None = None,
sort: str | None = None,
offset: int | None = 0,
limit: int | None = 25,
fields_param: str | None = None,
filters_param: str | None = None,
range_filters_param: str | None = None,
post_body: dict[str, Any] | None = None,
application_number_q: str | None = None,
patent_number_q: str | None = None,
inventor_name_q: str | None = None,
applicant_name_q: str | None = None,
assignee_name_q: str | None = None,
filing_date_from_q: str | None = None,
filing_date_to_q: str | None = None,
grant_date_from_q: str | None = None,
grant_date_to_q: str | None = None,
classification_q: str | None = None,
additional_query_params: dict[str, Any] | None = None,
) -> list[ApplicationMetaData]:
"""Fetch a dataset of patent applications based on search criteria, always requesting JSON format.
For GET, parameters align with OpenAPI for /api/v1/patent/applications/search/download.
For POST, post_body should conform to PatentDownloadRequest schema.
"""
endpoint = self.ENDPOINTS["get_search_results"]
if post_body is not None:
if "format" not in post_body:
post_body["format"] = "json"
result = self._get_json(
method="POST",
endpoint=endpoint,
json_data=post_body,
params=additional_query_params,
)
else:
params: dict[str, Any] = {}
final_q = query
if final_q is None:
q_parts = []
if application_number_q:
q_parts.append(f"applicationNumberText:{application_number_q}")
if patent_number_q:
q_parts.append(
f"applicationMetaData.patentNumber:{patent_number_q}"
)
if inventor_name_q:
v = (
f'"{inventor_name_q}"'
if " " in inventor_name_q
else inventor_name_q
)
q_parts.append(
f"applicationMetaData.inventorBag.inventorNameText:{v}"
)
if applicant_name_q:
v = (
f'"{applicant_name_q}"'
if " " in applicant_name_q
else applicant_name_q
)
q_parts.append(f"applicationMetaData.firstApplicantName:{v}")
if assignee_name_q:
v = (
f'"{assignee_name_q}"'
if " " in assignee_name_q
else assignee_name_q
)
q_parts.append(f"assignmentBag.assigneeBag.assigneeNameText:{v}")
if classification_q:
v = (
f'"{classification_q}"'
if any(c in classification_q for c in [" ", "/"])
else classification_q
)
q_parts.append(f"applicationMetaData.cpcClassificationBag:{v}")
if filing_date_from_q and filing_date_to_q:
q_parts.append(
f"applicationMetaData.filingDate:[{filing_date_from_q} TO {filing_date_to_q}]"
)
elif filing_date_from_q:
q_parts.append(
f"applicationMetaData.filingDate:>={filing_date_from_q}"
)
elif filing_date_to_q:
q_parts.append(
f"applicationMetaData.filingDate:<={filing_date_to_q}"
)
if grant_date_from_q and grant_date_to_q:
q_parts.append(
f"applicationMetaData.grantDate:[{grant_date_from_q} TO {grant_date_to_q}]"
)
elif grant_date_from_q:
q_parts.append(
f"applicationMetaData.grantDate:>={grant_date_from_q}"
)
elif grant_date_to_q:
q_parts.append(f"applicationMetaData.grantDate:<={grant_date_to_q}")
if q_parts:
final_q = " AND ".join(q_parts)
if final_q is not None:
params["q"] = final_q
if sort is not None:
params["sort"] = sort
if offset is not None:
params["offset"] = offset
if limit is not None:
params["limit"] = limit
if fields_param is not None:
params["fields"] = fields_param
if filters_param is not None:
params["filters"] = filters_param
if range_filters_param is not None:
params["rangeFilters"] = range_filters_param
params["format"] = "json"
if additional_query_params:
params.update(additional_query_params)
result = self._get_json(
method="GET",
endpoint=endpoint,
params=params,
)
amd_list = [
ApplicationMetaData.from_dict(item["applicationMetaData"])
for item in result["patentdata"]
]
return amd_list
[docs]
def get_application_by_number(
self, application_number: str
) -> PatentFileWrapper | None:
"""Retrieve the full details for a specific patent application by its number.
This method fetches comprehensive information for a single patent application
identified by its unique application number.
Args:
application_number (str): The USPTO application number for the patent
application (e.g., "16123456" or "18/915,708"). The application
number will be automatically sanitized to remove commas and spaces.
Returns:
Optional[PatentFileWrapper]: A `PatentFileWrapper` object representing
the complete file wrapper for the application if found. This object
contains all data sections related to the application, such as
metadata, addresses, assignments, attorney/agent data, continuity
data, PTA/PTE data, transactions, and associated documents.
Returns None if the application cannot be found or if the response
does not contain the expected data.
"""
endpoint = self.ENDPOINTS["get_application_by_number"].format(
application_number=self.sanitize_application_number(application_number)
)
response_data = self._get_model(
method="GET", endpoint=endpoint, response_class=PatentDataResponse
)
ret = self._get_wrapper_from_response(
response_data=response_data,
application_number_for_validation=application_number,
)
return ret
[docs]
def get_application_adjustment(
self, application_number: str
) -> PatentTermAdjustmentData | None:
"""Retrieve patent term adjustment (PTA) data for a specific application.
This method fetches the `PatentTermAdjustmentData` component from the
full patent file wrapper. This data includes details on various delay
quantities (e.g., A, B, C delays, applicant delays), the total
calculated adjustment, and a history of PTA events that influenced the
term.
Args:
application_number (str): The USPTO application number for which PTA
data is being requested (e.g., "16123456").
Returns:
Optional[PatentTermAdjustmentData]: A `PatentTermAdjustmentData`
object containing the PTA details if the application is found
and has such data. Returns None if the application cannot be
found or if PTA data is not available in the response.
"""
endpoint = self.ENDPOINTS["get_application_adjustment"].format(
application_number=self.sanitize_application_number(application_number)
)
response_data = self._get_model(
method="GET", endpoint=endpoint, response_class=PatentDataResponse
)
wrapper = self._get_wrapper_from_response(response_data, application_number)
return wrapper.patent_term_adjustment_data if wrapper else None
[docs]
def get_application_assignment(
self, application_number: str
) -> list[Assignment] | None:
"""Retrieve a list of patent assignments for a specific application.
This method fetches the `assignment_bag` from the patent file wrapper,
which contains a list of `Assignment` objects. Each `Assignment` object
details an assignment including information such as reel and frame numbers,
recording dates, conveyance text, and details about the assignors and assignees.
Args:
application_number (str): The USPTO application number for which
assignment data is being requested (e.g., "16123456").
Returns:
Optional[List[Assignment]]: A list of `Assignment` objects, each
representing a recorded assignment for the application. Returns
None if the application cannot be found, or if no assignment
data is available in the response. An empty list may be
returned if the application is found but has no recorded
assignments.
"""
endpoint = self.ENDPOINTS["get_application_assignment"].format(
application_number=self.sanitize_application_number(application_number)
)
response_data = self._get_model(
method="GET", endpoint=endpoint, response_class=PatentDataResponse
)
wrapper = self._get_wrapper_from_response(response_data, application_number)
return wrapper.assignment_bag if wrapper else None
[docs]
def get_application_attorney(
self, application_number: str
) -> RecordAttorney | None:
"""Retrieve data for the attorney(s) of record for a specific application.
This method fetches the `RecordAttorney` object associated with the
patent application. This object contains details about the attorney(s)
of record, including customer number correspondence data, power of attorney
information, and a list of listed attorneys.
Args:
application_number (str): The USPTO application number for which
attorney data is being requested (e.g., "16123456").
Returns:
Optional[RecordAttorney]: A `RecordAttorney` object with details
about the attorney(s) of record if the application is found
and such data exists. Returns None if the application cannot
be found or if no attorney data is available in the response.
"""
endpoint = self.ENDPOINTS["get_application_attorney"].format(
application_number=self.sanitize_application_number(application_number)
)
response_data = self._get_model(
method="GET", endpoint=endpoint, response_class=PatentDataResponse
)
wrapper = self._get_wrapper_from_response(response_data, application_number)
return wrapper.record_attorney if wrapper else None
[docs]
def get_application_continuity(
self, application_number: str
) -> ApplicationContinuityData | None:
"""Retrieve continuity data (parent/child applications) for a specific application.
This method fetches the lineage of the specified application, returning an
`ApplicationContinuityData` object. This object consolidates lists of
`ParentContinuity` (applications to which the current one claims priority)
and `ChildContinuity` (applications claiming priority to the current one)
objects, each detailing the related application's key identifiers and status.
Args:
application_number (str): The USPTO application number for which
continuity data is being requested (e.g., "16123456").
Returns:
Optional[ApplicationContinuityData]: An `ApplicationContinuityData`
object containing lists of parent and child continuity relationships.
Returns None if the application cannot be found or if the underlying
data to construct continuity is not available. The lists within
the returned object may be empty if no parent or child continuity
links exist.
"""
endpoint = self.ENDPOINTS["get_application_continuity"].format(
application_number=self.sanitize_application_number(application_number)
)
response_data = self._get_model(
method="GET", endpoint=endpoint, response_class=PatentDataResponse
)
wrapper = self._get_wrapper_from_response(response_data, application_number)
return ApplicationContinuityData.from_wrapper(wrapper) if wrapper else None
[docs]
def get_application_foreign_priority(
self, application_number: str
) -> list[ForeignPriority] | None:
"""Retrieve a list of foreign priority claims for a specific application.
This method fetches the `foreign_priority_bag` from the patent file
wrapper. This bag contains a list of `ForeignPriority` objects, each
representing a claim to a foreign patent application's priority date.
Details include the IP office name, filing date, and application number
of the foreign priority application.
Args:
application_number (str): The USPTO application number for which
foreign priority data is being requested (e.g., "16123456").
Returns:
Optional[List[ForeignPriority]]: A list of `ForeignPriority` objects,
each detailing a claimed foreign priority. Returns None if the
application cannot be found or if no foreign priority data is
available. An empty list may be returned if the application
is found but has no foreign priority claims.
"""
endpoint = self.ENDPOINTS["get_application_foreign_priority"].format(
application_number=self.sanitize_application_number(application_number)
)
response_data = self._get_model(
method="GET", endpoint=endpoint, response_class=PatentDataResponse
)
wrapper = self._get_wrapper_from_response(response_data, application_number)
return wrapper.foreign_priority_bag if wrapper else None
[docs]
def get_application_transactions(
self, application_number: str
) -> list[EventData] | None:
"""Retrieve the transaction history (events) for a specific application.
This method fetches the `event_data_bag` from the patent file wrapper.
This bag contains a list of `EventData` objects, each representing a
single recorded event in the prosecution history of the patent application.
Events include details like an event code, a textual description, and
the date the event was recorded.
Args:
application_number (str): The USPTO application number for which
transaction history is being requested (e.g., "16123456").
Returns:
Optional[List[EventData]]: A list of `EventData` objects, each
detailing a transaction or event in the application's history.
Returns None if the application cannot be found or if no
transaction data is available. An empty list may be returned if
the application is found but has no recorded transaction events.
"""
endpoint = self.ENDPOINTS["get_application_transactions"].format(
application_number=self.sanitize_application_number(application_number)
)
response_data = self._get_model(
method="GET", endpoint=endpoint, response_class=PatentDataResponse
)
wrapper = self._get_wrapper_from_response(response_data, application_number)
return wrapper.event_data_bag if wrapper else None
[docs]
def get_application_documents(
self,
application_number: str,
document_codes: list[str] | None = None,
official_date_from: str | None = None,
official_date_to: str | None = None,
) -> DocumentBag:
"""Retrieve metadata for documents associated with a specific application.
This method fetches a collection of document metadata related to the given
patent application. The result is a `DocumentBag` object, which is an
iterable collection of `Document` instances. Each `Document` object
contains metadata such as its identifier, official date, document code
and description, direction (incoming/outgoing), and available download
formats.
Args:
application_number (str): The USPTO application number for which
document metadata is being requested (e.g., "16123456").
document_codes (Optional[List[str]]): Filter by specific document type
codes. If provided, only documents with these codes will be returned.
Examples: ['ABST', 'CLM', 'SPEC', 'DRWD'].
official_date_from (Optional[str]): Filter documents from this date
(inclusive). Date format: YYYY-MM-DD (e.g., "2020-01-15").
official_date_to (Optional[str]): Filter documents to this date
(inclusive). Date format: YYYY-MM-DD (e.g., "2023-12-31").
Returns:
DocumentBag: A `DocumentBag` object containing metadata for all
publicly available documents associated with the application
that match the provided filters. The bag will be empty if no
documents are found or if the API response indicates no documents.
It does not return None for "not found" cases; an empty collection
is returned instead.
"""
endpoint = self.ENDPOINTS["get_application_documents"].format(
application_number=self.sanitize_application_number(application_number)
)
params = {}
if document_codes:
params["documentCodes"] = ",".join(document_codes)
if official_date_from:
params["officialDateFrom"] = official_date_from
if official_date_to:
params["officialDateTo"] = official_date_to
return self._get_model(
method="GET",
endpoint=endpoint,
response_class=DocumentBag,
params=params if params else None,
)
[docs]
def get_application_associated_documents(
self, application_number: str
) -> PrintedPublication | None:
"""Retrieve metadata for Pre-Grant Publication and Grant documents.
This method fetches metadata specifically for published documents associated
with the patent application, such as Pre-Grant Publications (PGPUBs)
and granted patent documents. It does not retrieve the prosecution
history documents (see `get_application_documents` for that).
The result is a `PrintedPublication` object, which holds
`PrintedMetaData` including file URIs and names. Download with download_archive.
Args:
application_number (str): The USPTO application number for which
associated PGPUB/Grant document metadata is being requested
(e.g., "16123456").
Returns:
Optional[PrintedPublication]: A `PrintedPublication` object
containing `PrintedMetaData` for the Pre-Grant Publication
and/or the Grant document, if available. Returns None if the
application cannot be found or if no such associated document
metadata is available. The fields within the returned object
(`pgpub_document_meta_data`, `grant_document_meta_data`)
may themselves be None if a particular type of document
(e.g., PGPUB) does not exist for the application.
"""
endpoint = self.ENDPOINTS["get_application_associated_documents"].format(
application_number=self.sanitize_application_number(application_number)
)
response_data = self._get_model(
method="GET", endpoint=endpoint, response_class=PatentDataResponse
)
wrapper = self._get_wrapper_from_response(response_data, application_number)
return PrintedPublication.from_wrapper(wrapper) if wrapper else None
[docs]
def paginate_applications(
self, post_body: dict[str, Any] | None = None, **kwargs: Any
) -> Iterator[PatentFileWrapper]:
"""Provide an iterator to easily paginate through patent application search results.
This method simplifies the process of fetching all patent applications
that match a given search query by automatically handling pagination.
Supports both GET and POST requests.
For GET requests, provide search parameters as keyword arguments.
For POST requests, provide the search criteria in `post_body`.
The `offset` and `limit` parameters are managed by the pagination logic;
setting them directly in `kwargs` or `post_body` might lead to unexpected behavior.
Args:
post_body: Optional POST body for complex search queries. If provided,
performs POST-based pagination.
**kwargs: Keyword arguments for GET-based pagination or additional
query parameters for POST requests.
Returns:
Iterator[PatentFileWrapper]: An iterator that yields `PatentFileWrapper`
objects, allowing iteration over all matching patent applications
across multiple pages of results.
Examples:
# GET-based pagination
for wrapper in client.paginate_applications(
query="applicationNumberText:16*",
limit=50
):
print(wrapper.application_number_text)
# POST-based pagination
for wrapper in client.paginate_applications(
post_body={
"q": "applicationNumberText:16*",
"facets": "true",
"fields": "applicationNumberText,applicationMetaData"
}
):
print(wrapper.application_number_text)
"""
return self.paginate_results(
method_name="search_applications",
response_container_attr="patent_file_wrapper_data_bag",
post_body=post_body,
**kwargs,
)
[docs]
def get_status_codes(
self, params: dict[str, Any] | None = None
) -> StatusCodeSearchResponse:
"""Retrieve USPTO patent application status codes and their descriptions.
This method fetches a list of defined USPTO patent application status codes
(e.g., codes for "Pending," "Abandoned," "Issued") using a GET request.
The request can be customized with query parameters to filter or paginate
the results if supported by the API endpoint.
Args:
params (Optional[Dict[str, Any]], optional): A dictionary of query
parameters to be sent with the GET request. These parameters can
be used to filter or control the output of the status codes
list. Defaults to None, which typically retrieves all available
status codes or the API's default set.
Returns:
StatusCodeSearchResponse: An object containing a count of matching
status codes, a `StatusCodeCollection` of the `StatusCode`
objects (code and description), and a request identifier.
"""
return self._get_model(
method="GET",
endpoint=self.ENDPOINTS["status_codes"],
response_class=StatusCodeSearchResponse,
params=params,
)
[docs]
def search_status_codes(
self, search_request: dict[str, Any]
) -> StatusCodeSearchResponse:
"""Search USPTO patent application status codes using POST criteria.
Performs targeted searches for USPTO patent application status codes
(e.g., for "Pending," "Abandoned," "Issued") by sending a POST request
with a JSON body containing the `search_request` criteria. This method
is suited for more complex queries than the GET-based `get_status_codes`.
Args:
search_request (Dict[str, Any]): A dictionary with search criteria,
sent as the JSON POST body. The structure must conform to USPTO
API requirements for this endpoint (e.g., for searching by code
or description keywords).
Returns:
StatusCodeSearchResponse: An object containing a count of matching
status codes, a `StatusCodeCollection` of the `StatusCode`
objects (code and description), and a request identifier.
"""
return self._get_model(
method="POST",
endpoint=self.ENDPOINTS["status_codes"],
response_class=StatusCodeSearchResponse,
json_data=search_request,
)
[docs]
def download_document(
self,
document: Document,
format: str | DocumentMimeType = DocumentMimeType.PDF,
destination: str | None = None,
file_name: str | None = None,
overwrite: bool = False,
) -> str:
"""Download document in specified format.
Automatically extracts if USPTO sends TAR/ZIP.
Args:
document: Document with document_formats list
format: Which format (PDF, XML, MS_WORD). Can be string or DocumentMimeType enum.
Defaults to PDF.
destination: Directory to save to (default: current directory)
file_name: Override filename (default: from Content-Disposition)
overwrite: Overwrite existing file
Returns:
Path to downloaded file (extracted if was in archive)
Raises:
FormatNotAvailableError: If format not available for this document.
The exception includes `requested_format`, `available_formats`,
and `document` attributes for programmatic error handling.
Example:
>>> docs = client.get_application_documents("19312841", document_codes=["CTNF"])
>>> path = client.download_document(docs[0], format="XML")
>>> # Or using enum:
>>> path = client.download_document(docs[0], format=DocumentMimeType.XML)
"""
doc_format = self._resolve_document_format(document=document, format=format)
assert doc_format.download_url is not None
return self._download_and_extract(
url=doc_format.download_url,
destination=destination,
file_name=file_name,
overwrite=overwrite,
)
def _resolve_document_format(
self,
document: Document,
format: str | DocumentMimeType,
) -> DocumentFormat:
"""Resolve a format specifier to a DocumentFormat with a download URL.
Args:
document: Document with document_formats list.
format: MIME type string or DocumentMimeType enum.
Returns:
Matching DocumentFormat.
Raises:
FormatNotAvailableError: If no format matches.
ValueError: If the matched format has no download_url.
"""
format_str = format.value if isinstance(format, DocumentMimeType) else format
doc_format = next(
(
f
for f in document.document_formats
if f.mime_type_identifier == format_str
),
None,
)
if not doc_format:
available = [
f.mime_type_identifier
for f in document.document_formats
if f.mime_type_identifier
]
raise FormatNotAvailableError(
requested_format=format_str,
available_formats=available,
document=document,
)
if not doc_format.download_url:
raise ValueError("DocumentFormat has no download URL")
return doc_format
[docs]
def stream_document(
self,
document: Document,
format: str | DocumentMimeType = DocumentMimeType.PDF,
) -> requests.Response:
"""Stream a document in the specified format without saving to disk.
Returns a streaming ``requests.Response``. The caller is responsible
for consuming and closing it — use as a context manager or call
``response.close()`` when done.
Args:
document: Document with document_formats list.
format: Which format (PDF, XML, MS_WORD). Can be string or DocumentMimeType enum.
Defaults to PDF.
Returns:
Streaming requests.Response object.
Raises:
FormatNotAvailableError: If format not available for this document.
ValueError: If the matched DocumentFormat has no download URL.
Example:
>>> docs = client.get_application_documents("19312841", document_codes=["CTNF"])
>>> with client.stream_document(docs[0]) as response:
... for chunk in response.iter_content(chunk_size=8192):
... process(chunk)
"""
doc_format = self._resolve_document_format(document=document, format=format)
return self._stream_request(
method="GET",
endpoint="",
custom_url=doc_format.download_url,
)
def _resolve_by_search(self, **search_kwargs: Any) -> PatentFileWrapper | None:
"""Search for an application and return the first matching wrapper.
This is a shared helper for convenience methods that resolve
non-application-number identifiers to a PatentFileWrapper.
Args:
**search_kwargs: Keyword arguments passed to search_applications().
Returns:
Optional[PatentFileWrapper]: The first matching wrapper, or None.
"""
pdr = self.search_applications(**search_kwargs, limit=1)
if pdr.patent_file_wrapper_data_bag:
return pdr.patent_file_wrapper_data_bag[0]
return None
[docs]
def get_patent(self, patent_number: str) -> PatentFileWrapper | None:
"""Retrieve application metadata by patent number.
Searches the USPTO API for the given patent number and returns
the corresponding PatentFileWrapper. This is a lightweight lookup
that does not fetch the full document bag.
Args:
patent_number (str): The USPTO patent number (e.g., "11000000").
Returns:
Optional[PatentFileWrapper]: The matching patent file wrapper,
or None if not found.
"""
return self._resolve_by_search(patent_number_q=patent_number)
[docs]
def get_publication(self, publication_number: str) -> PatentFileWrapper | None:
"""Retrieve application metadata by publication number.
Searches the USPTO API for the given pre-grant publication number
and returns the corresponding PatentFileWrapper. This is a lightweight
lookup that does not fetch the full document bag.
Args:
publication_number (str): The USPTO publication number
(e.g., "20230123456").
Returns:
Optional[PatentFileWrapper]: The matching patent file wrapper,
or None if not found.
"""
return self._resolve_by_search(earliestPublicationNumber_q=publication_number)
[docs]
def get_pct(self, pct_number: str) -> PatentFileWrapper | None:
"""Retrieve application metadata by PCT number.
Accepts both PCT application numbers and PCT publication numbers.
The format is auto-detected:
- PCT application numbers (starting with "PCT") are resolved via
direct lookup using get_application_by_number.
- PCT publication numbers (e.g., "WO2024012345A1") are resolved
via search.
Args:
pct_number (str): A PCT application number (e.g.,
"PCT/US2024/012345") or PCT publication number
(e.g., "WO2024012345A1").
Returns:
Optional[PatentFileWrapper]: The matching patent file wrapper,
or None if not found.
"""
if pct_number.strip().upper().startswith("PCT"):
return self.get_application_by_number(application_number=pct_number)
return self._resolve_by_search(pctPublicationNumber_q=pct_number)
[docs]
def get_IFW(
self,
*,
application_number: str | None = None,
publication_number: str | None = None,
patent_number: str | None = None,
PCT_app_number: str | None = None,
PCT_pub_number: str | None = None,
destination: str | None = None,
overwrite: bool = False,
as_zip: bool = True,
) -> IFWResult | None:
"""Retrieve IFW metadata and download all prosecution documents.
Combines `get_IFW_metadata` with a bulk download of all available prosecution
history documents (PDF preferred, DOCX fallback). Documents with no downloadable
format (e.g., NPL references) are silently skipped. A warning is issued only
if a document has a download URL but the download itself fails.
Args:
application_number: USPTO application number (e.g., "16123456").
publication_number: USPTO pre-grant publication number.
patent_number: USPTO patent number.
PCT_app_number: PCT application number.
PCT_pub_number: PCT publication number.
destination: Directory for output. Defaults to current directory.
overwrite: Whether to overwrite an existing output. Default False.
as_zip: If True (default), package all downloads into a ZIP archive
at ``{destination}/{app_no}_ifw.zip``. If False, download files
directly into ``{destination}/{app_no}_ifw/``.
Returns:
IFWResult with the PatentFileWrapper, the output path, and a mapping
of document_identifier to filename for each downloaded document.
Returns None if no application was found.
Raises:
FileExistsError: If the output path already exists and overwrite=False.
"""
wrapper = self.get_IFW_metadata(
application_number=application_number,
publication_number=publication_number,
patent_number=patent_number,
PCT_app_number=PCT_app_number,
PCT_pub_number=PCT_pub_number,
)
if wrapper is None:
return None
dest_dir = destination or "."
app_no = wrapper.application_number_text or "unknown"
downloaded_documents: dict[str, str] = {}
if as_zip:
output_path = os.path.join(dest_dir, f"{app_no}_ifw.zip")
if os.path.exists(output_path) and not overwrite:
raise FileExistsError(
f"ZIP archive already exists: {output_path}. Use overwrite=True to replace."
)
os.makedirs(dest_dir, exist_ok=True)
with tempfile.TemporaryDirectory() as tmp_dir:
with zipfile.ZipFile(
output_path, "w", compression=zipfile.ZIP_DEFLATED
) as zf:
for doc in wrapper.document_bag or []:
if not doc.document_identifier:
continue
fmt_obj = next(
(
f
for f in doc.document_formats
if f.mime_type_identifier in ("PDF", "MS_WORD")
and f.download_url
),
None,
)
if fmt_obj is None or not fmt_obj.download_url:
continue
try:
downloaded = self._download_and_extract(
url=fmt_obj.download_url,
destination=tmp_dir,
overwrite=True,
)
arcname = os.path.basename(downloaded)
zf.write(downloaded, arcname=arcname)
downloaded_documents[doc.document_identifier] = arcname
except Exception as exc:
warnings.warn(
f"Failed to download document {doc.document_identifier} "
f"({doc.document_code}): {exc}",
stacklevel=2,
)
else:
output_path = os.path.join(dest_dir, f"{app_no}_ifw")
if os.path.exists(output_path) and not overwrite:
raise FileExistsError(
f"Output directory already exists: {output_path}. Use overwrite=True to replace."
)
os.makedirs(output_path, exist_ok=True)
for doc in wrapper.document_bag or []:
if not doc.document_identifier:
continue
fmt_obj = next(
(
f
for f in doc.document_formats
if f.mime_type_identifier in ("PDF", "MS_WORD")
and f.download_url
),
None,
)
if fmt_obj is None or not fmt_obj.download_url:
continue
try:
downloaded = self._download_and_extract(
url=fmt_obj.download_url,
destination=output_path,
overwrite=overwrite,
)
downloaded_documents[doc.document_identifier] = os.path.basename(
downloaded
)
except Exception as exc:
warnings.warn(
f"Failed to download document {doc.document_identifier} "
f"({doc.document_code}): {exc}",
stacklevel=2,
)
return IFWResult(
wrapper=wrapper,
output_path=os.path.abspath(output_path),
downloaded_documents=downloaded_documents,
)
[docs]
def download_archive(
self,
printed_metadata: PrintedMetaData,
destination: str | None = None,
file_name: str | None = None,
overwrite: bool = False,
) -> str:
"""Download Printed Metadata (XML data).
These are XML files of the patent as printed. Auto-extracts if the server
sends a TAR/ZIP archive.
Note:
See also `download_publication()` for a clearer method name with identical functionality.
Args:
printed_metadata: ArchiveMetaData object containing download URL and metadata
destination: Optional directory path to save the file
file_name: Optional filename. If not provided, uses Content-Disposition header
overwrite: Whether to overwrite existing files. Default False
Returns:
str: Path to the downloaded file (extracted if was in archive)
Raises:
ValueError: If printed_metadata has no download URL
FileExistsError: If file exists and overwrite=False
"""
if not printed_metadata.file_location_uri:
raise ValueError("PrintedMetaData has no file_location_uri")
return self._download_and_extract(
url=printed_metadata.file_location_uri,
destination=destination,
file_name=file_name,
overwrite=overwrite,
)
[docs]
def download_publication(
self,
printed_metadata: PrintedMetaData,
destination: str | None = None,
file_name: str | None = None,
overwrite: bool = False,
) -> str:
"""Download a publication XML file (grant or pre-grant publication).
This method downloads publication XML files from PrintedMetaData objects,
such as grant documents or pre-grant publications (pgpub). Auto-extracts
if the server sends a TAR/ZIP archive.
Args:
printed_metadata: PrintedMetaData object containing the publication
download URL and filename information. Typically obtained from
`get_application_associated_documents()` or from PatentFileWrapper's
`grant_document_meta_data` or `pg_publication_document_meta_data`.
destination: Optional directory path where the file should be saved.
If not provided, saves to the current directory. The directory will
be created if it doesn't exist.
file_name: Optional custom filename. If not provided, uses the
`xml_file_name` from the metadata (e.g., "18915708_12307527.xml").
overwrite: Whether to overwrite an existing file at the destination.
Default is False, which raises FileExistsError if file exists.
Returns:
str: Absolute path to the downloaded publication file (extracted if was in archive).
Raises:
ValueError: If printed_metadata has no file_location_uri (download URL).
FileExistsError: If the file already exists and overwrite=False.
Examples:
Download grant XML to a specific directory (auto-filename):
>>> response = client.get_application_by_number("18/915,708")
>>> ifw = response
>>> grant_metadata = ifw.grant_document_meta_data
>>> path = client.download_publication(grant_metadata, destination="./downloads")
>>> print(path)
'./downloads/18915708_12307527.xml'
Download pgpub XML with custom filename:
>>> pgpub_metadata = ifw.pg_publication_document_meta_data
>>> path = client.download_publication(
... pgpub_metadata,
... file_name="my_publication.xml",
... destination="./downloads"
... )
>>> print(path)
'./downloads/my_publication.xml'
Download to current directory:
>>> path = client.download_publication(grant_metadata)
>>> print(path)
'./18915708_12307527.xml'
"""
if not printed_metadata.file_location_uri:
raise ValueError("PrintedMetaData has no file_location_uri")
return self._download_and_extract(
url=printed_metadata.file_location_uri,
destination=destination,
file_name=file_name,
overwrite=overwrite,
)