Bulk Data Examples

  1"""Example usage of the uspto_api module for bulk data.
  2
  3This example demonstrates how to use the BulkDataClient to interact with the USPTO Bulk Data API.
  4It shows how to retrieve product information, search for products, and download files.
  5"""
  6
  7import os
  8
  9import requests
 10
 11from pyUSPTO.clients import BulkDataClient  # Import from top-level package
 12from pyUSPTO.config import USPTOConfig
 13
 14
 15def format_size(size_bytes: int | float) -> str:
 16    """Format a size in bytes to a human-readable string (KB, MB, GB, etc.).
 17
 18    Args:
 19        size_bytes: The size in bytes to format
 20
 21    Returns:
 22        A human-readable string representation of the size
 23    """
 24    if size_bytes == 0:
 25        return "0 B"
 26
 27    size_names = ["B", "KB", "MB", "GB", "TB", "PB"]
 28    i = 0
 29    while size_bytes >= 1024 and i < len(size_names) - 1:
 30        size_bytes /= 1024
 31        i += 1
 32
 33    # Round to 2 decimal places
 34    return f"{size_bytes:.2f} {size_names[i]}"
 35
 36
 37# Method 1: Initialize the client with direct API key
 38# This approach is simple but less flexible
 39print("Method 1: Initialize with direct API key")
 40api_key = "YOUR_API_KEY_HERE"  # Replace with your actual API key
 41client = BulkDataClient(api_key=api_key)
 42
 43# Method 2: Initialize the client with USPTOConfig
 44# This approach provides more configuration options
 45print("\nMethod 2: Initialize with USPTOConfig")
 46config = USPTOConfig(
 47    api_key="YOUR_API_KEY_HERE",  # Replace with your actual API key
 48    bulk_data_base_url="https://api.uspto.gov/api/v1/datasets",
 49    patent_data_base_url="https://api.uspto.gov/api/v1/patent",
 50)
 51client = BulkDataClient(config=config)
 52
 53# Method 3: Initialize the client with environment variables
 54# This is the most secure approach for production use
 55print("\nMethod 3: Initialize with environment variables")
 56# Set environment variable (in a real scenario, this would be set outside the script)
 57os.environ["USPTO_API_KEY"] = "YOUR_API_KEY_HERE"  # Replace with your actual API key
 58config_from_env = USPTOConfig.from_env()
 59client = BulkDataClient(config=config_from_env)
 60
 61print("\nBeginning API requests with configured client:")
 62
 63# Get all available products
 64response = client.get_products()
 65print(f"Found {response.count} products")
 66
 67# Display information about each product
 68for product in response.bulk_data_product_bag:
 69    print(f"\nProduct: {product.product_title_text}")
 70    print(f"ID: {product.product_identifier}")
 71    print(f"Description: {product.product_description_text}")
 72    print(f"Date range: {product.product_from_date} to {product.product_to_date}")
 73    print(f"Total files: {product.product_file_total_quantity}")
 74    print(f"Total size: {format_size(size_bytes=product.product_total_file_size)}")
 75
 76    # Get detailed product info with files included
 77    try:
 78        detailed_product = client.get_product_by_id(
 79            product.product_identifier, include_files=True
 80        )
 81        if (
 82            detailed_product.product_file_bag
 83            and detailed_product.product_file_bag.file_data_bag
 84        ):
 85            print(f"\nFiles ({detailed_product.product_file_bag.count}):")
 86            for file_data in detailed_product.product_file_bag.file_data_bag:
 87                print(f"  - {file_data.file_name} ({format_size(file_data.file_size)})")
 88                print(f"    Type: {file_data.file_type_text}")
 89                print(f"    Released: {file_data.file_release_date}")
 90                if file_data.file_download_uri:
 91                    print(f"    Download URI: {file_data.file_download_uri}")
 92        else:
 93            print("\nNo files available for this product")
 94    except Exception as e:
 95        print(f"\nError retrieving detailed product info: {e}")
 96
 97# Search for products by date range
 98date_filtered = client.search_products(from_date="2025-01-01", to_date="2025-03-31")
 99print(f"\nFound {date_filtered.count} products in date range")
100
101# Search for products by label
102try:
103    # Using labels we saw in the API response
104    label_filtered = client.search_products(labels=["Patent"])
105    print(f"\nFound {label_filtered.count} products with label 'Patent'")
106except requests.exceptions.HTTPError as e:
107    print(f"Error searching by labels: {e}")
108
109# Get a specific product by ID
110product_id = "PEDSJSON"  # Using a real product ID from the output
111try:
112    product = client.get_product_by_id(product_id, include_files=True)
113    print(f"\nRetrieved product: {product.product_title_text}")
114
115    # Download a file from this product
116    if product.product_file_bag and product.product_file_bag.file_data_bag:
117        file_to_download = product.product_file_bag.file_data_bag[0]
118        print(f"File download URI: {file_to_download.file_download_uri}")
119        downloaded_path = client.download_file(
120            file_data=file_to_download, destination="./downloads"
121        )
122        print(f"Downloaded file to: {downloaded_path}")
123        print(f"File size: {format_size(size_bytes=file_to_download.file_size)}")
124
125except Exception as e:
126    print(f"Error retrieving product {product_id}: {e}")