1"""Example usage of pyUSPTO for bulk data products.
2
3Demonstrates the BulkDataClient for searching products, listing files,
4and downloading bulk data archives.
5"""
6
7import os
8
9from pyUSPTO import BulkDataClient, FileData, USPTOConfig
10
11DEST_PATH = "./notes/download-example"
12
13
14def format_size(size_bytes: int | float) -> str:
15 """Format a size in bytes to a human-readable string (KB, MB, GB, etc.)."""
16 if size_bytes == 0:
17 return "0 B"
18
19 size_names = ["B", "KB", "MB", "GB", "TB", "PB"]
20 i = 0
21 while size_bytes >= 1024 and i < len(size_names) - 1:
22 size_bytes /= 1024
23 i += 1
24
25 return f"{size_bytes:.2f} {size_names[i]}"
26
27
28# --- Client Initialization ---
29api_key = os.environ.get("USPTO_API_KEY", "YOUR_API_KEY_HERE")
30if api_key == "YOUR_API_KEY_HERE":
31 raise ValueError(
32 "API key is not set. Set the USPTO_API_KEY environment variable."
33 )
34config = USPTOConfig(api_key=api_key)
35client = BulkDataClient(config=config)
36
37print("-" * 40)
38print("Example 1: Search for products")
39print("-" * 40)
40
41response = client.search_products(query="patent", limit=5)
42print(f"Found {response.count} products matching 'patent'")
43
44for product in response.bulk_data_product_bag:
45 print(f"\n Product: {product.product_title_text}")
46 print(f" ID: {product.product_identifier}")
47 print(f" Description: {product.product_description_text[:100]}...")
48 print(f" Total files: {product.product_file_total_quantity}")
49 print(f" Total size: {format_size(product.product_total_file_size)}")
50
51print("-" * 40)
52print("Example 2: Paginate through products")
53print("-" * 40)
54
55max_items = 20
56count = 0
57for product in client.paginate_products(query="trademark", limit=10):
58 count += 1
59 print(f" {count}. {product.product_title_text} ({product.product_identifier})")
60 if count >= max_items:
61 print(f" ... (stopping at {max_items} products)")
62 break
63
64print("-" * 40)
65print("Example 3: Get product by ID")
66print("-" * 40)
67
68product_id = "PTGRXML" # Patent Grant Full-Text Data (No Images) - XML
69product = client.get_product_by_id(product_id, include_files=True, latest=True)
70
71print(f"Product: {product.product_title_text}")
72print(f"Description: {product.product_description_text}")
73print(f"Frequency: {product.product_frequency_text}")
74print(f"Labels: {product.product_label_array_text}")
75print(f"Categories: {product.product_dataset_category_array_text}")
76print(f"Date range: {product.product_from_date} to {product.product_to_date}")
77
78print("-" * 40)
79print("Example 4: List files for a product")
80print("-" * 40)
81
82if product.product_file_bag and product.product_file_bag.file_data_bag:
83 print(f"Found {len(product.product_file_bag.file_data_bag)} file(s):")
84
85 for file_data in product.product_file_bag.file_data_bag:
86 print(f"\n File: {file_data.file_name}")
87 print(f" Size: {format_size(file_data.file_size)}")
88 print(f" Type: {file_data.file_type_text}")
89 print(
90 f" Data range: {file_data.file_data_from_date} to {file_data.file_data_to_date}"
91 )
92 print(f" Released: {file_data.file_release_date}")
93 print(f" Download URI: {file_data.file_download_uri}")
94else:
95 print("No files found for this product")
96
97print("-" * 40)
98print("Example 5: Download a file (with extraction)")
99print("-" * 40)
100
101min_file: FileData | None = None
102last_bytes: float = float("inf")
103
104if product.product_file_bag and product.product_file_bag.file_data_bag:
105 for file_data in product.product_file_bag.file_data_bag:
106 if file_data.file_size < last_bytes:
107 last_bytes = file_data.file_size
108 min_file = file_data
109
110if min_file:
111 print(f"Downloading smallest file: {min_file.file_name}")
112 print(f"Size: {format_size(min_file.file_size)}")
113
114 downloaded_path = client.download_file(
115 file_data=min_file,
116 destination=DEST_PATH,
117 overwrite=True,
118 extract=True,
119 )
120 print(f"Downloaded to {downloaded_path}")
121
122print("-" * 40)
123print("Example 6: Download without extraction")
124print("-" * 40)
125
126if product.product_file_bag and product.product_file_bag.file_data_bag and min_file:
127 downloaded_path = client.download_file(
128 file_data=min_file,
129 destination=DEST_PATH,
130 overwrite=True,
131 extract=False,
132 )
133 print(f"Archive saved to {downloaded_path}")