Bulk Data Examples

  1"""Example usage of pyUSPTO for the BulkDataClient.
  2
  3This example demonstrates how to use the BulkDataClient to interact with the USPTO Bulk Data API.
  4It shows how to search for products, retrieve product details, and download files.
  5"""
  6
  7import os
  8
  9from pyUSPTO.clients import BulkDataClient
 10from pyUSPTO.config import USPTOConfig
 11from pyUSPTO.models.bulk_data import FileData
 12
 13
 14def format_size(size_bytes: int | float) -> str:
 15    """Format a size in bytes to a human-readable string (KB, MB, GB, etc.).
 16
 17    Args:
 18        size_bytes: The size in bytes to format
 19
 20    Returns:
 21        A human-readable string representation of the size
 22    """
 23    if size_bytes == 0:
 24        return "0 B"
 25
 26    size_names = ["B", "KB", "MB", "GB", "TB", "PB"]
 27    i = 0
 28    while size_bytes >= 1024 and i < len(size_names) - 1:
 29        size_bytes /= 1024
 30        i += 1
 31
 32    # Round to 2 decimal places
 33    return f"{size_bytes:.2f} {size_names[i]}"
 34
 35
 36# ============================================================================
 37# Client Initialization Methods
 38# ============================================================================
 39
 40# Method 1: Initialize with USPTOConfig object
 41print("\nMethod 1: Initialize with USPTOConfig")
 42config = USPTOConfig(api_key="YOUR_API_KEY_HERE")
 43client = BulkDataClient(config=config)
 44
 45# Method 2: Initialize from environment variables (recommended)
 46print("\nMethod 2: Initialize from environment variables")
 47os.environ["USPTO_API_KEY"] = "YOUR_API_KEY_HERE"  # Set this outside your script
 48config_from_env = USPTOConfig.from_env()
 49client = BulkDataClient(config=config_from_env)
 50
 51print("\n" + "=" * 60)
 52print("Beginning API requests with configured client")
 53print("=" * 60)
 54
 55
 56# ============================================================================
 57# Example 1: Search for Products
 58# ============================================================================
 59
 60print("\n--- Example 1: Search for Products ---")
 61# The Bulk Data API supports full-text search via the query parameter
 62# Field-specific queries (e.g., "productIdentifier:value") are not supported
 63
 64# Search for patent-related products
 65response = client.search_products(query="patent", limit=5)
 66print(f"Found {response.count} products matching 'patent'")
 67
 68for product in response.bulk_data_product_bag:
 69    print(f"\n  Product: {product.product_title_text}")
 70    print(f"  ID: {product.product_identifier}")
 71    print(f"  Description: {product.product_description_text[:100]}...")
 72    print(f"  Total files: {product.product_file_total_quantity}")
 73    print(f"  Total size: {format_size(product.product_total_file_size)}")
 74
 75
 76# ============================================================================
 77# Example 2: Paginate Through All Products
 78# ============================================================================
 79
 80print("\n--- Example 2: Paginate Through Products ---")
 81# Use pagination to iterate through all matching products
 82
 83count = 0
 84for product in client.paginate_products(query="trademark", limit=10):
 85    count += 1
 86    print(f"  {count}. {product.product_title_text} ({product.product_identifier})")
 87    if count >= 20:  # Limit output for example
 88        print("  ... (stopping after 20 products)")
 89        break
 90
 91
 92# ============================================================================
 93# Example 3: Get Product Details by ID
 94# ============================================================================
 95
 96print("\n--- Example 3: Get Product by ID ---")
 97# Retrieve a specific product by its identifier
 98# Use include_files=True to get file listing
 99
100product_id = "PTGRXML"  # Patent Grant Full-Text Data (No Images) - XML
101product = client.get_product_by_id(product_id, include_files=True, latest=True)
102
103print(f"Product: {product.product_title_text}")
104print(f"Description: {product.product_description_text}")
105print(f"Frequency: {product.product_frequency_text}")
106print(f"Labels: {product.product_label_array_text}")
107print(f"Categories: {product.product_dataset_category_array_text}")
108print(f"Date range: {product.product_from_date} to {product.product_to_date}")
109
110
111# ============================================================================
112# Example 4: List Files for a Product
113# ============================================================================
114
115print("\n--- Example 4: List Files for a Product ---")
116# Get product with files and display file details
117
118if product.product_file_bag and product.product_file_bag.file_data_bag:
119    print(f"Found {len(product.product_file_bag.file_data_bag)} file(s):")
120
121    for file_data in product.product_file_bag.file_data_bag:
122        print(f"\n  File: {file_data.file_name}")
123        print(f"  Size: {format_size(file_data.file_size)}")
124        print(f"  Type: {file_data.file_type_text}")
125        print(
126            f"  Data range: {file_data.file_data_from_date} to {file_data.file_data_to_date}"
127        )
128        print(f"  Released: {file_data.file_release_date}")
129        print(f"  Download URI: {file_data.file_download_uri}")
130else:
131    print("No files found for this product")
132
133
134# ============================================================================
135# Example 5: Download a File
136# ============================================================================
137
138print("\n--- Example 5: Download a File ---")
139# Download a file from the product
140
141min_file: FileData | None = None
142last_bytes: float = float("inf")
143
144if product.product_file_bag and product.product_file_bag.file_data_bag:
145    for file_data in product.product_file_bag.file_data_bag:
146        if file_data.file_size < last_bytes:
147            last_bytes = file_data.file_size
148            min_file = file_data
149
150if min_file:
151    print(f"Downloading smallest file: {min_file.file_name}")
152    print(f"Size: {format_size(min_file.file_size)}")
153
154    try:
155        # Download with extraction (default behavior for archives)
156        downloaded_path = client.download_file(
157            file_data=min_file,
158            destination="./downloads",
159            overwrite=True,
160            extract=True,  # Auto-extract if it's a tar.gz or zip
161        )
162        print(f"SUCCESS: Downloaded to {downloaded_path}")
163    except Exception as e:
164        print(f"ERROR: {e}")
165
166
167# ============================================================================
168# Example 6: Download Without Extraction
169# ============================================================================
170
171print("\n--- Example 6: Download Without Extraction ---")
172# Download archive file without extracting
173
174if product.product_file_bag and product.product_file_bag.file_data_bag and min_file:
175    try:
176        # Download without extraction
177        downloaded_path = client.download_file(
178            file_data=min_file,
179            destination="./downloads",
180            overwrite=True,
181            extract=False,  # Keep archive compressed
182        )
183        print(f"SUCCESS: Archive saved to {downloaded_path}")
184    except Exception as e:
185        print(f"ERROR: {e}")
186
187
188print("\n" + "=" * 60)
189print("Examples complete!")
190print("=" * 60)