1"""Example usage of the BulkDataClient.
2
3This example demonstrates how to use the BulkDataClient to interact with the USPTO Bulk Data API.
4It shows how to search for products, retrieve product details, and download files.
5"""
6
7import os
8
9from pyUSPTO.clients import BulkDataClient
10from pyUSPTO.config import USPTOConfig
11from pyUSPTO.models.bulk_data import FileData
12
13
14def format_size(size_bytes: int | float) -> str:
15 """Format a size in bytes to a human-readable string (KB, MB, GB, etc.).
16
17 Args:
18 size_bytes: The size in bytes to format
19
20 Returns:
21 A human-readable string representation of the size
22 """
23 if size_bytes == 0:
24 return "0 B"
25
26 size_names = ["B", "KB", "MB", "GB", "TB", "PB"]
27 i = 0
28 while size_bytes >= 1024 and i < len(size_names) - 1:
29 size_bytes /= 1024
30 i += 1
31
32 # Round to 2 decimal places
33 return f"{size_bytes:.2f} {size_names[i]}"
34
35
36# ============================================================================
37# Client Initialization Methods
38# ============================================================================
39
40# Method 1: Initialize with API key directly
41print("Method 1: Initialize with direct API key")
42api_key = "YOUR_API_KEY_HERE" # Replace with your actual API key
43client = BulkDataClient(api_key=api_key)
44
45# Method 2: Initialize with USPTOConfig object
46print("\nMethod 2: Initialize with USPTOConfig")
47config = USPTOConfig(api_key="YOUR_API_KEY_HERE")
48client = BulkDataClient(config=config)
49
50# Method 3: Initialize from environment variables (recommended for production)
51print("\nMethod 3: Initialize from environment variables")
52os.environ["USPTO_API_KEY"] = "YOUR_API_KEY_HERE" # Set this outside your script
53config_from_env = USPTOConfig.from_env()
54client = BulkDataClient(config=config_from_env)
55
56print("\n" + "=" * 60)
57print("Beginning API requests with configured client")
58print("=" * 60)
59
60
61# ============================================================================
62# Example 1: Search for Products
63# ============================================================================
64
65print("\n--- Example 1: Search for Products ---")
66# The Bulk Data API supports full-text search via the query parameter
67# Field-specific queries (e.g., "productIdentifier:value") are not supported
68
69# Search for patent-related products
70response = client.search_products(query="patent", limit=5)
71print(f"Found {response.count} products matching 'patent'")
72
73for product in response.bulk_data_product_bag:
74 print(f"\n Product: {product.product_title_text}")
75 print(f" ID: {product.product_identifier}")
76 print(f" Description: {product.product_description_text[:100]}...")
77 print(f" Total files: {product.product_file_total_quantity}")
78 print(f" Total size: {format_size(product.product_total_file_size)}")
79
80
81# ============================================================================
82# Example 2: Paginate Through All Products
83# ============================================================================
84
85print("\n--- Example 2: Paginate Through Products ---")
86# Use pagination to iterate through all matching products
87
88count = 0
89for product in client.paginate_products(query="trademark", limit=10):
90 count += 1
91 print(f" {count}. {product.product_title_text} ({product.product_identifier})")
92 if count >= 20: # Limit output for example
93 print(" ... (stopping after 20 products)")
94 break
95
96
97# ============================================================================
98# Example 3: Get Product Details by ID
99# ============================================================================
100
101print("\n--- Example 3: Get Product by ID ---")
102# Retrieve a specific product by its identifier
103# Use include_files=True to get file listing
104
105product_id = "PTGRXML" # Patent Grant Full-Text Data (No Images) - XML
106product = client.get_product_by_id(product_id, include_files=True, latest=True)
107
108print(f"Product: {product.product_title_text}")
109print(f"Description: {product.product_description_text}")
110print(f"Frequency: {product.product_frequency_text}")
111print(f"Labels: {product.product_label_array_text}")
112print(f"Categories: {product.product_dataset_category_array_text}")
113print(f"Date range: {product.product_from_date} to {product.product_to_date}")
114
115
116# ============================================================================
117# Example 4: List Files for a Product
118# ============================================================================
119
120print("\n--- Example 4: List Files for a Product ---")
121# Get product with files and display file details
122
123if product.product_file_bag and product.product_file_bag.file_data_bag:
124 print(f"Found {len(product.product_file_bag.file_data_bag)} file(s):")
125
126 for file_data in product.product_file_bag.file_data_bag:
127 print(f"\n File: {file_data.file_name}")
128 print(f" Size: {format_size(file_data.file_size)}")
129 print(f" Type: {file_data.file_type_text}")
130 print(
131 f" Data range: {file_data.file_data_from_date} to {file_data.file_data_to_date}"
132 )
133 print(f" Released: {file_data.file_release_date}")
134 print(f" Download URI: {file_data.file_download_uri}")
135else:
136 print("No files found for this product")
137
138
139# ============================================================================
140# Example 5: Download a File
141# ============================================================================
142
143print("\n--- Example 5: Download a File ---")
144# Download a file from the product
145
146min_file: FileData | None = None
147last_bytes: float = float("inf")
148
149if product.product_file_bag and product.product_file_bag.file_data_bag:
150 for file_data in product.product_file_bag.file_data_bag:
151 if file_data.file_size < last_bytes:
152 last_bytes = file_data.file_size
153 min_file = file_data
154
155if min_file:
156 print(f"Downloading smallest file: {min_file.file_name}")
157 print(f"Size: {format_size(min_file.file_size)}")
158
159 try:
160 # Download with extraction (default behavior for archives)
161 downloaded_path = client.download_file(
162 file_data=min_file,
163 destination="./downloads",
164 overwrite=True,
165 extract=True, # Auto-extract if it's a tar.gz or zip
166 )
167 print(f"SUCCESS: Downloaded to {downloaded_path}")
168 except Exception as e:
169 print(f"ERROR: {e}")
170
171
172# ============================================================================
173# Example 6: Download Without Extraction
174# ============================================================================
175
176print("\n--- Example 6: Download Without Extraction ---")
177# Download archive file without extracting
178
179if product.product_file_bag and product.product_file_bag.file_data_bag and min_file:
180 try:
181 # Download without extraction
182 downloaded_path = client.download_file(
183 file_data=min_file,
184 destination="./downloads",
185 overwrite=True,
186 extract=False, # Keep archive compressed
187 )
188 print(f"SUCCESS: Archive saved to {downloaded_path}")
189 except Exception as e:
190 print(f"ERROR: {e}")
191
192
193print("\n" + "=" * 60)
194print("Examples complete!")
195print("=" * 60)