Bulk Data Examples

  1"""Example usage of the BulkDataClient.
  2
  3This example demonstrates how to use the BulkDataClient to interact with the USPTO Bulk Data API.
  4It shows how to search for products, retrieve product details, and download files.
  5"""
  6
  7import os
  8
  9from pyUSPTO.clients import BulkDataClient
 10from pyUSPTO.config import USPTOConfig
 11from pyUSPTO.models.bulk_data import FileData
 12
 13
 14def format_size(size_bytes: int | float) -> str:
 15    """Format a size in bytes to a human-readable string (KB, MB, GB, etc.).
 16
 17    Args:
 18        size_bytes: The size in bytes to format
 19
 20    Returns:
 21        A human-readable string representation of the size
 22    """
 23    if size_bytes == 0:
 24        return "0 B"
 25
 26    size_names = ["B", "KB", "MB", "GB", "TB", "PB"]
 27    i = 0
 28    while size_bytes >= 1024 and i < len(size_names) - 1:
 29        size_bytes /= 1024
 30        i += 1
 31
 32    # Round to 2 decimal places
 33    return f"{size_bytes:.2f} {size_names[i]}"
 34
 35
 36# ============================================================================
 37# Client Initialization Methods
 38# ============================================================================
 39
 40# Method 1: Initialize with API key directly
 41print("Method 1: Initialize with direct API key")
 42api_key = "YOUR_API_KEY_HERE"  # Replace with your actual API key
 43client = BulkDataClient(api_key=api_key)
 44
 45# Method 2: Initialize with USPTOConfig object
 46print("\nMethod 2: Initialize with USPTOConfig")
 47config = USPTOConfig(api_key="YOUR_API_KEY_HERE")
 48client = BulkDataClient(config=config)
 49
 50# Method 3: Initialize from environment variables (recommended)
 51print("\nMethod 3: Initialize from environment variables")
 52os.environ["USPTO_API_KEY"] = "YOUR_API_KEY_HERE"  # Set this outside your script
 53config_from_env = USPTOConfig.from_env()
 54client = BulkDataClient(config=config_from_env)
 55
 56print("\n" + "=" * 60)
 57print("Beginning API requests with configured client")
 58print("=" * 60)
 59
 60
 61# ============================================================================
 62# Example 1: Search for Products
 63# ============================================================================
 64
 65print("\n--- Example 1: Search for Products ---")
 66# The Bulk Data API supports full-text search via the query parameter
 67# Field-specific queries (e.g., "productIdentifier:value") are not supported
 68
 69# Search for patent-related products
 70response = client.search_products(query="patent", limit=5)
 71print(f"Found {response.count} products matching 'patent'")
 72
 73for product in response.bulk_data_product_bag:
 74    print(f"\n  Product: {product.product_title_text}")
 75    print(f"  ID: {product.product_identifier}")
 76    print(f"  Description: {product.product_description_text[:100]}...")
 77    print(f"  Total files: {product.product_file_total_quantity}")
 78    print(f"  Total size: {format_size(product.product_total_file_size)}")
 79
 80
 81# ============================================================================
 82# Example 2: Paginate Through All Products
 83# ============================================================================
 84
 85print("\n--- Example 2: Paginate Through Products ---")
 86# Use pagination to iterate through all matching products
 87
 88count = 0
 89for product in client.paginate_products(query="trademark", limit=10):
 90    count += 1
 91    print(f"  {count}. {product.product_title_text} ({product.product_identifier})")
 92    if count >= 20:  # Limit output for example
 93        print("  ... (stopping after 20 products)")
 94        break
 95
 96
 97# ============================================================================
 98# Example 3: Get Product Details by ID
 99# ============================================================================
100
101print("\n--- Example 3: Get Product by ID ---")
102# Retrieve a specific product by its identifier
103# Use include_files=True to get file listing
104
105product_id = "PTGRXML"  # Patent Grant Full-Text Data (No Images) - XML
106product = client.get_product_by_id(product_id, include_files=True, latest=True)
107
108print(f"Product: {product.product_title_text}")
109print(f"Description: {product.product_description_text}")
110print(f"Frequency: {product.product_frequency_text}")
111print(f"Labels: {product.product_label_array_text}")
112print(f"Categories: {product.product_dataset_category_array_text}")
113print(f"Date range: {product.product_from_date} to {product.product_to_date}")
114
115
116# ============================================================================
117# Example 4: List Files for a Product
118# ============================================================================
119
120print("\n--- Example 4: List Files for a Product ---")
121# Get product with files and display file details
122
123if product.product_file_bag and product.product_file_bag.file_data_bag:
124    print(f"Found {len(product.product_file_bag.file_data_bag)} file(s):")
125
126    for file_data in product.product_file_bag.file_data_bag:
127        print(f"\n  File: {file_data.file_name}")
128        print(f"  Size: {format_size(file_data.file_size)}")
129        print(f"  Type: {file_data.file_type_text}")
130        print(
131            f"  Data range: {file_data.file_data_from_date} to {file_data.file_data_to_date}"
132        )
133        print(f"  Released: {file_data.file_release_date}")
134        print(f"  Download URI: {file_data.file_download_uri}")
135else:
136    print("No files found for this product")
137
138
139# ============================================================================
140# Example 5: Download a File
141# ============================================================================
142
143print("\n--- Example 5: Download a File ---")
144# Download a file from the product
145
146min_file: FileData | None = None
147last_bytes: float = float("inf")
148
149if product.product_file_bag and product.product_file_bag.file_data_bag:
150    for file_data in product.product_file_bag.file_data_bag:
151        if file_data.file_size < last_bytes:
152            last_bytes = file_data.file_size
153            min_file = file_data
154
155if min_file:
156    print(f"Downloading smallest file: {min_file.file_name}")
157    print(f"Size: {format_size(min_file.file_size)}")
158
159    try:
160        # Download with extraction (default behavior for archives)
161        downloaded_path = client.download_file(
162            file_data=min_file,
163            destination="./downloads",
164            overwrite=True,
165            extract=True,  # Auto-extract if it's a tar.gz or zip
166        )
167        print(f"SUCCESS: Downloaded to {downloaded_path}")
168    except Exception as e:
169        print(f"ERROR: {e}")
170
171
172# ============================================================================
173# Example 6: Download Without Extraction
174# ============================================================================
175
176print("\n--- Example 6: Download Without Extraction ---")
177# Download archive file without extracting
178
179if product.product_file_bag and product.product_file_bag.file_data_bag and min_file:
180    try:
181        # Download without extraction
182        downloaded_path = client.download_file(
183            file_data=min_file,
184            destination="./downloads",
185            overwrite=True,
186            extract=False,  # Keep archive compressed
187        )
188        print(f"SUCCESS: Archive saved to {downloaded_path}")
189    except Exception as e:
190        print(f"ERROR: {e}")
191
192
193print("\n" + "=" * 60)
194print("Examples complete!")
195print("=" * 60)