Bulk Data Examples

  1"""Example usage of pyUSPTO for bulk data products.
  2
  3Demonstrates the BulkDataClient for searching products, listing files,
  4and downloading bulk data archives.
  5"""
  6
  7import os
  8
  9from pyUSPTO import BulkDataClient, FileData, USPTOConfig
 10
 11DEST_PATH = "./notes/download-example"
 12
 13
 14def format_size(size_bytes: int | float) -> str:
 15    """Format a size in bytes to a human-readable string (KB, MB, GB, etc.)."""
 16    if size_bytes == 0:
 17        return "0 B"
 18
 19    size_names = ["B", "KB", "MB", "GB", "TB", "PB"]
 20    i = 0
 21    while size_bytes >= 1024 and i < len(size_names) - 1:
 22        size_bytes /= 1024
 23        i += 1
 24
 25    return f"{size_bytes:.2f} {size_names[i]}"
 26
 27
 28# --- Client Initialization ---
 29api_key = os.environ.get("USPTO_API_KEY", "YOUR_API_KEY_HERE")
 30if api_key == "YOUR_API_KEY_HERE":
 31    raise ValueError(
 32        "API key is not set. Set the USPTO_API_KEY environment variable."
 33    )
 34config = USPTOConfig(api_key=api_key)
 35client = BulkDataClient(config=config)
 36
 37print("-" * 40)
 38print("Example 1: Search for products")
 39print("-" * 40)
 40
 41response = client.search_products(query="patent", limit=5)
 42print(f"Found {response.count} products matching 'patent'")
 43
 44for product in response.bulk_data_product_bag:
 45    print(f"\n  Product: {product.product_title_text}")
 46    print(f"  ID: {product.product_identifier}")
 47    print(f"  Description: {product.product_description_text[:100]}...")
 48    print(f"  Total files: {product.product_file_total_quantity}")
 49    print(f"  Total size: {format_size(product.product_total_file_size)}")
 50
 51print("-" * 40)
 52print("Example 2: Paginate through products")
 53print("-" * 40)
 54
 55max_items = 20
 56count = 0
 57for product in client.paginate_products(query="trademark", limit=10):
 58    count += 1
 59    print(f"  {count}. {product.product_title_text} ({product.product_identifier})")
 60    if count >= max_items:
 61        print(f"  ... (stopping at {max_items} products)")
 62        break
 63
 64print("-" * 40)
 65print("Example 3: Get product by ID")
 66print("-" * 40)
 67
 68product_id = "PTGRXML"  # Patent Grant Full-Text Data (No Images) - XML
 69product = client.get_product_by_id(product_id, include_files=True, latest=True)
 70
 71print(f"Product: {product.product_title_text}")
 72print(f"Description: {product.product_description_text}")
 73print(f"Frequency: {product.product_frequency_text}")
 74print(f"Labels: {product.product_label_array_text}")
 75print(f"Categories: {product.product_dataset_category_array_text}")
 76print(f"Date range: {product.product_from_date} to {product.product_to_date}")
 77
 78print("-" * 40)
 79print("Example 4: List files for a product")
 80print("-" * 40)
 81
 82if product.product_file_bag and product.product_file_bag.file_data_bag:
 83    print(f"Found {len(product.product_file_bag.file_data_bag)} file(s):")
 84
 85    for file_data in product.product_file_bag.file_data_bag:
 86        print(f"\n  File: {file_data.file_name}")
 87        print(f"  Size: {format_size(file_data.file_size)}")
 88        print(f"  Type: {file_data.file_type_text}")
 89        print(
 90            f"  Data range: {file_data.file_data_from_date} to {file_data.file_data_to_date}"
 91        )
 92        print(f"  Released: {file_data.file_release_date}")
 93        print(f"  Download URI: {file_data.file_download_uri}")
 94else:
 95    print("No files found for this product")
 96
 97print("-" * 40)
 98print("Example 5: Download a file (with extraction)")
 99print("-" * 40)
100
101min_file: FileData | None = None
102last_bytes: float = float("inf")
103
104if product.product_file_bag and product.product_file_bag.file_data_bag:
105    for file_data in product.product_file_bag.file_data_bag:
106        if file_data.file_size < last_bytes:
107            last_bytes = file_data.file_size
108            min_file = file_data
109
110if min_file:
111    print(f"Downloading smallest file: {min_file.file_name}")
112    print(f"Size: {format_size(min_file.file_size)}")
113
114    downloaded_path = client.download_file(
115        file_data=min_file,
116        destination=DEST_PATH,
117        overwrite=True,
118        extract=True,
119    )
120    print(f"Downloaded to {downloaded_path}")
121
122print("-" * 40)
123print("Example 6: Download without extraction")
124print("-" * 40)
125
126if product.product_file_bag and product.product_file_bag.file_data_bag and min_file:
127    downloaded_path = client.download_file(
128        file_data=min_file,
129        destination=DEST_PATH,
130        overwrite=True,
131        extract=False,
132    )
133    print(f"Archive saved to {downloaded_path}")