1"""Example usage of the uspto_api module for bulk data.
2
3This example demonstrates how to use the BulkDataClient to interact with the USPTO Bulk Data API.
4It shows how to retrieve product information, search for products, and download files.
5"""
6
7import os
8
9import requests
10
11from pyUSPTO.clients import BulkDataClient # Import from top-level package
12from pyUSPTO.config import USPTOConfig
13
14
15def format_size(size_bytes: int | float) -> str:
16 """Format a size in bytes to a human-readable string (KB, MB, GB, etc.).
17
18 Args:
19 size_bytes: The size in bytes to format
20
21 Returns:
22 A human-readable string representation of the size
23 """
24 if size_bytes == 0:
25 return "0 B"
26
27 size_names = ["B", "KB", "MB", "GB", "TB", "PB"]
28 i = 0
29 while size_bytes >= 1024 and i < len(size_names) - 1:
30 size_bytes /= 1024
31 i += 1
32
33 # Round to 2 decimal places
34 return f"{size_bytes:.2f} {size_names[i]}"
35
36
37# Method 1: Initialize the client with direct API key
38# This approach is simple but less flexible
39print("Method 1: Initialize with direct API key")
40api_key = "YOUR_API_KEY_HERE" # Replace with your actual API key
41client = BulkDataClient(api_key=api_key)
42
43# Method 2: Initialize the client with USPTOConfig
44# This approach provides more configuration options
45print("\nMethod 2: Initialize with USPTOConfig")
46config = USPTOConfig(
47 api_key="YOUR_API_KEY_HERE", # Replace with your actual API key
48 bulk_data_base_url="https://api.uspto.gov/api/v1/datasets",
49 patent_data_base_url="https://api.uspto.gov/api/v1/patent",
50)
51client = BulkDataClient(config=config)
52
53# Method 3: Initialize the client with environment variables
54# This is the most secure approach for production use
55print("\nMethod 3: Initialize with environment variables")
56# Set environment variable (in a real scenario, this would be set outside the script)
57os.environ["USPTO_API_KEY"] = "YOUR_API_KEY_HERE" # Replace with your actual API key
58config_from_env = USPTOConfig.from_env()
59client = BulkDataClient(config=config_from_env)
60
61print("\nBeginning API requests with configured client:")
62
63# Get all available products
64response = client.get_products()
65print(f"Found {response.count} products")
66
67# Display information about each product
68for product in response.bulk_data_product_bag:
69 print(f"\nProduct: {product.product_title_text}")
70 print(f"ID: {product.product_identifier}")
71 print(f"Description: {product.product_description_text}")
72 print(f"Date range: {product.product_from_date} to {product.product_to_date}")
73 print(f"Total files: {product.product_file_total_quantity}")
74 print(f"Total size: {format_size(size_bytes=product.product_total_file_size)}")
75
76 # Get detailed product info with files included
77 try:
78 detailed_product = client.get_product_by_id(
79 product.product_identifier, include_files=True
80 )
81 if (
82 detailed_product.product_file_bag
83 and detailed_product.product_file_bag.file_data_bag
84 ):
85 print(f"\nFiles ({detailed_product.product_file_bag.count}):")
86 for file_data in detailed_product.product_file_bag.file_data_bag:
87 print(f" - {file_data.file_name} ({format_size(file_data.file_size)})")
88 print(f" Type: {file_data.file_type_text}")
89 print(f" Released: {file_data.file_release_date}")
90 if file_data.file_download_uri:
91 print(f" Download URI: {file_data.file_download_uri}")
92 else:
93 print("\nNo files available for this product")
94 except Exception as e:
95 print(f"\nError retrieving detailed product info: {e}")
96
97# Search for products by date range
98date_filtered = client.search_products(from_date="2025-01-01", to_date="2025-03-31")
99print(f"\nFound {date_filtered.count} products in date range")
100
101# Search for products by label
102try:
103 # Using labels we saw in the API response
104 label_filtered = client.search_products(labels=["Patent"])
105 print(f"\nFound {label_filtered.count} products with label 'Patent'")
106except requests.exceptions.HTTPError as e:
107 print(f"Error searching by labels: {e}")
108
109# Get a specific product by ID
110product_id = "PEDSJSON" # Using a real product ID from the output
111try:
112 product = client.get_product_by_id(product_id, include_files=True)
113 print(f"\nRetrieved product: {product.product_title_text}")
114
115 # Download a file from this product
116 if product.product_file_bag and product.product_file_bag.file_data_bag:
117 file_to_download = product.product_file_bag.file_data_bag[0]
118 print(f"File download URI: {file_to_download.file_download_uri}")
119 downloaded_path = client.download_file(
120 file_data=file_to_download, destination="./downloads"
121 )
122 print(f"Downloaded file to: {downloaded_path}")
123 print(f"File size: {format_size(size_bytes=file_to_download.file_size)}")
124
125except Exception as e:
126 print(f"Error retrieving product {product_id}: {e}")