1"""Example usage of pyUSPTO for the BulkDataClient.
2
3This example demonstrates how to use the BulkDataClient to interact with the USPTO Bulk Data API.
4It shows how to search for products, retrieve product details, and download files.
5"""
6
7import os
8
9from pyUSPTO.clients import BulkDataClient
10from pyUSPTO.config import USPTOConfig
11from pyUSPTO.models.bulk_data import FileData
12
13
14def format_size(size_bytes: int | float) -> str:
15 """Format a size in bytes to a human-readable string (KB, MB, GB, etc.).
16
17 Args:
18 size_bytes: The size in bytes to format
19
20 Returns:
21 A human-readable string representation of the size
22 """
23 if size_bytes == 0:
24 return "0 B"
25
26 size_names = ["B", "KB", "MB", "GB", "TB", "PB"]
27 i = 0
28 while size_bytes >= 1024 and i < len(size_names) - 1:
29 size_bytes /= 1024
30 i += 1
31
32 # Round to 2 decimal places
33 return f"{size_bytes:.2f} {size_names[i]}"
34
35
36# ============================================================================
37# Client Initialization Methods
38# ============================================================================
39
40# Method 1: Initialize with USPTOConfig object
41print("\nMethod 1: Initialize with USPTOConfig")
42config = USPTOConfig(api_key="YOUR_API_KEY_HERE")
43client = BulkDataClient(config=config)
44
45# Method 2: Initialize from environment variables (recommended)
46print("\nMethod 2: Initialize from environment variables")
47os.environ["USPTO_API_KEY"] = "YOUR_API_KEY_HERE" # Set this outside your script
48config_from_env = USPTOConfig.from_env()
49client = BulkDataClient(config=config_from_env)
50
51print("\n" + "=" * 60)
52print("Beginning API requests with configured client")
53print("=" * 60)
54
55
56# ============================================================================
57# Example 1: Search for Products
58# ============================================================================
59
60print("\n--- Example 1: Search for Products ---")
61# The Bulk Data API supports full-text search via the query parameter
62# Field-specific queries (e.g., "productIdentifier:value") are not supported
63
64# Search for patent-related products
65response = client.search_products(query="patent", limit=5)
66print(f"Found {response.count} products matching 'patent'")
67
68for product in response.bulk_data_product_bag:
69 print(f"\n Product: {product.product_title_text}")
70 print(f" ID: {product.product_identifier}")
71 print(f" Description: {product.product_description_text[:100]}...")
72 print(f" Total files: {product.product_file_total_quantity}")
73 print(f" Total size: {format_size(product.product_total_file_size)}")
74
75
76# ============================================================================
77# Example 2: Paginate Through All Products
78# ============================================================================
79
80print("\n--- Example 2: Paginate Through Products ---")
81# Use pagination to iterate through all matching products
82
83count = 0
84for product in client.paginate_products(query="trademark", limit=10):
85 count += 1
86 print(f" {count}. {product.product_title_text} ({product.product_identifier})")
87 if count >= 20: # Limit output for example
88 print(" ... (stopping after 20 products)")
89 break
90
91
92# ============================================================================
93# Example 3: Get Product Details by ID
94# ============================================================================
95
96print("\n--- Example 3: Get Product by ID ---")
97# Retrieve a specific product by its identifier
98# Use include_files=True to get file listing
99
100product_id = "PTGRXML" # Patent Grant Full-Text Data (No Images) - XML
101product = client.get_product_by_id(product_id, include_files=True, latest=True)
102
103print(f"Product: {product.product_title_text}")
104print(f"Description: {product.product_description_text}")
105print(f"Frequency: {product.product_frequency_text}")
106print(f"Labels: {product.product_label_array_text}")
107print(f"Categories: {product.product_dataset_category_array_text}")
108print(f"Date range: {product.product_from_date} to {product.product_to_date}")
109
110
111# ============================================================================
112# Example 4: List Files for a Product
113# ============================================================================
114
115print("\n--- Example 4: List Files for a Product ---")
116# Get product with files and display file details
117
118if product.product_file_bag and product.product_file_bag.file_data_bag:
119 print(f"Found {len(product.product_file_bag.file_data_bag)} file(s):")
120
121 for file_data in product.product_file_bag.file_data_bag:
122 print(f"\n File: {file_data.file_name}")
123 print(f" Size: {format_size(file_data.file_size)}")
124 print(f" Type: {file_data.file_type_text}")
125 print(
126 f" Data range: {file_data.file_data_from_date} to {file_data.file_data_to_date}"
127 )
128 print(f" Released: {file_data.file_release_date}")
129 print(f" Download URI: {file_data.file_download_uri}")
130else:
131 print("No files found for this product")
132
133
134# ============================================================================
135# Example 5: Download a File
136# ============================================================================
137
138print("\n--- Example 5: Download a File ---")
139# Download a file from the product
140
141min_file: FileData | None = None
142last_bytes: float = float("inf")
143
144if product.product_file_bag and product.product_file_bag.file_data_bag:
145 for file_data in product.product_file_bag.file_data_bag:
146 if file_data.file_size < last_bytes:
147 last_bytes = file_data.file_size
148 min_file = file_data
149
150if min_file:
151 print(f"Downloading smallest file: {min_file.file_name}")
152 print(f"Size: {format_size(min_file.file_size)}")
153
154 try:
155 # Download with extraction (default behavior for archives)
156 downloaded_path = client.download_file(
157 file_data=min_file,
158 destination="./downloads",
159 overwrite=True,
160 extract=True, # Auto-extract if it's a tar.gz or zip
161 )
162 print(f"SUCCESS: Downloaded to {downloaded_path}")
163 except Exception as e:
164 print(f"ERROR: {e}")
165
166
167# ============================================================================
168# Example 6: Download Without Extraction
169# ============================================================================
170
171print("\n--- Example 6: Download Without Extraction ---")
172# Download archive file without extracting
173
174if product.product_file_bag and product.product_file_bag.file_data_bag and min_file:
175 try:
176 # Download without extraction
177 downloaded_path = client.download_file(
178 file_data=min_file,
179 destination="./downloads",
180 overwrite=True,
181 extract=False, # Keep archive compressed
182 )
183 print(f"SUCCESS: Archive saved to {downloaded_path}")
184 except Exception as e:
185 print(f"ERROR: {e}")
186
187
188print("\n" + "=" * 60)
189print("Examples complete!")
190print("=" * 60)