mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
[pytorch] Remove numpy dependency from Knapsack Evaluator (#150825)
Summary: The two implementations are functionally equivalent. They both calculate the memory budget at the knee point in the Pareto frontier using the same algorithm. 1. np.linspace -> basic list comprehension 2. runtime and memory values -> lists instead of numpy arrays 3. np.ptp -> max - min 4. np.norm -> diff with min value / range 5. np.sqrt -> **0.5 5. np.argmin -> .index(min(_)) Test Plan: # Unit Testing ``` buck test mode/opt //caffe2/test/functorch:test_ac_knapsack; pingme "tests done" Buck UI: https://www.internalfb.com/buck2/f4e41eb8-e775-4f04-b4e7-8e567599deb8 Test UI: https://www.internalfb.com/intern/testinfra/testrun/10133099236155875 Network: Up: 24KiB Down: 1.9GiB (reSessionID-7cd11487-f3e7-43ab-982a-805510771c8d) Executing actions. Remaining 0/259826 98:15:40.5s exec time total Command: test. Finished 3 local, 5 remote, 103467 cache (99% hit) 98:15:14.8s exec time cached (99%) Time elapsed: 1:09.9s Tests finished: Pass 15. Fail 0. Fatal 0. Skip 0. Build failure 0 ``` # End to End Testing ### Baseline Run with DP Let's confirm everything we are running on works. - Optimization Algo: DP - Memory Budget: 0.05 - AIX Link: apf_local-basilwong-2025-03-22_20:39:10 - TLParse rank 0: https://manifold.edge.x2p.facebook.net/v0/read/tree/logs/.tmpDJaWp5/rank_0/index.html?bucketName=tlparse_reports&apiKey=tlparse_reports-key&withPayload=1&timeoutMsec=10000 - TLParse rank 1: https://manifold.edge.x2p.facebook.net/v0/read/tree/logs/.tmpDJaWp5/rank_1/index.html?bucketName=tlparse_reports&apiKey=tlparse_reports-key&withPayload=1&timeoutMsec=10000 ### Dynamic Memory Budget (Before Change) - Revision: 2c95489b7f79 - Optimization Algo: Dynamic Memory Budget - Memory Budget: 0.05 - AIX Link: https://www.internalfb.com/mlhub/pipeline/4088035428184866 - TLParse: - https://manifold.edge.x2p.facebook.net/v0/read/tree/logs/.tmpykEy8U/rank_0/index.html?bucketName=tlparse_reports&apiKey=tlparse_reports-key&withPayload=1&timeoutMsec=10000 - https://manifold.edge.x2p.facebook.net/v0/read/tree/logs/.tmpykEy8U/rank_1/index.html?bucketName=tlparse_reports&apiKey=tlparse_reports-key&withPayload=1&timeoutMsec=10000 ### Dynamic Memory Budget (After Change) - Revision: 14353eef3c9e - Optimization Algo: Dynamic Memory Budget - Memory Budget: 0.05 - AIX Link: https://www.internalfb.com/mlhub/pipeline/1613558749306737 - TLParse Links: - https://manifold.edge.x2p.facebook.net/v0/read/tree/logs/.tmpZKNWFw/rank_0/index.html?bucketName=tlparse_reports&apiKey=tlparse_reports-key&withPayload=1&timeoutMsec=10000 - https://manifold.edge.x2p.facebook.net/v0/read/tree/logs/.tmpZKNWFw/rank_1/index.html?bucketName=tlparse_reports&apiKey=tlparse_reports-key&withPayload=1&timeoutMsec=10000 As a sanity check lets take the AC information for the following compile id: 7_0_0 from the rank 0 of each TLParse. {F1976883124} * Baseline: P1779400819 * Saved node values show we are storing much more compared to dynamic memory: ``` "Knapsack Saved Nodes": [ 16, 17, 19, 20, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60 ] ``` * Before Change: P1779401775 * Saved nodes are similar to after change but not exactly. ``` "Knapsack Saved Nodes": [ 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 49, 50 ] ``` * After Change: P1779402106 * Here we se the largest nodes that are saved are around the same, but there is a small discrepancy for the smallest nodes. ``` "Knapsack Saved Nodes": [ 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 50, 51, 57, 58, 59, 60, 61, 62 ], ``` The discrepancy can be explained by looking at the estimated memory values. This is the non-deterministic part(below are the top 5 memory values for considered candidates): ``` 0.05774741703905514, 0.007333005338292718, 0.007333005338292718, 0.007333005338292718, 0.007333005338292718, ``` vs ``` 0.049254204820440746, 0.006254502199421049, 0.006254502199421049, 0.006254502199421049, 0.006254502199421049, ``` Based on that the dynamic memory implementations performed similarly in an E2E test and that memory is non-deterministic we should be good to go to land. Differential Revision: D71692245 Pull Request resolved: https://github.com/pytorch/pytorch/pull/150825 Approved by: https://github.com/seemethere, https://github.com/jansel
This commit is contained in:
committed by
PyTorch MergeBot
parent
5471e80fb4
commit
1250106630
@ -279,16 +279,33 @@ class TestKnapsackEvaluator(TestCase):
|
||||
)
|
||||
|
||||
def test_get_knee_point_memory_budget(self):
|
||||
max_mem_budget = 1.0
|
||||
min_mem_budget = 0.1
|
||||
iterations = 10
|
||||
knee_point_memory_budget = self.knapsack_evaluator.get_knee_point_memory_budget(
|
||||
knapsack_algo=self.knapsack_algo,
|
||||
max_mem_budget=max_mem_budget,
|
||||
min_mem_budget=min_mem_budget,
|
||||
iterations=iterations,
|
||||
)
|
||||
self.assertEqual(knee_point_memory_budget, 0.4)
|
||||
"""
|
||||
Checks if the method correctly estimates the knee point in the memory budget
|
||||
where the trade-off between memory usage and recomputation runtime is optimal.
|
||||
|
||||
If memory budget and runtime are considered as equal cost, then the knee point
|
||||
is where the distance from 0 is smallest.
|
||||
"""
|
||||
max_mem_budget_to_expected_knee_point = {
|
||||
0.1: 0.1,
|
||||
0.2: 0.1,
|
||||
0.3: 0.3,
|
||||
0.4: 0.4, # 0.3 and 0.4 provide the same algo output so this is arbitrary
|
||||
0.5: 0.4,
|
||||
}
|
||||
for (
|
||||
max_mem_budget,
|
||||
expected_knee_point,
|
||||
) in max_mem_budget_to_expected_knee_point.items():
|
||||
knee_point_memory_budget = (
|
||||
self.knapsack_evaluator.get_knee_point_memory_budget(
|
||||
knapsack_algo=self.knapsack_algo,
|
||||
max_mem_budget=max_mem_budget,
|
||||
min_mem_budget=0.1,
|
||||
iterations=5,
|
||||
)
|
||||
)
|
||||
self.assertEqual(knee_point_memory_budget, expected_knee_point)
|
||||
|
||||
def test_get_backward_memory_from_topologically_sorted_graph(self):
|
||||
result = self.knapsack_evaluator._get_backward_memory_from_topologically_sorted_graph(
|
||||
|
@ -236,26 +236,37 @@ class KnapsackEvaluator:
|
||||
Returns:
|
||||
float: Memory budget at the knee point.
|
||||
"""
|
||||
import numpy as np
|
||||
|
||||
results = self.evaluate_distribution_of_results_for_knapsack_algo(
|
||||
knapsack_algo=knapsack_algo,
|
||||
memory_budget_values=np.linspace( # type: ignore[arg-type]
|
||||
min_mem_budget, max_mem_budget, iterations
|
||||
).tolist(),
|
||||
memory_budget_values=[
|
||||
min_mem_budget
|
||||
+ i * (max_mem_budget - min_mem_budget) / (iterations - 1)
|
||||
for i in range(iterations)
|
||||
],
|
||||
)
|
||||
runtime_values = np.array(
|
||||
[result["percentage_of_theoretical_peak_runtime"] for result in results]
|
||||
)
|
||||
memory_values = np.array(
|
||||
[result["percentage_of_theoretical_peak_memory"] for result in results]
|
||||
)
|
||||
runtime_range = np.ptp(runtime_values)
|
||||
memory_range = np.ptp(memory_values)
|
||||
runtime_values = [
|
||||
result["percentage_of_theoretical_peak_runtime"] for result in results
|
||||
]
|
||||
memory_values = [
|
||||
result["percentage_of_theoretical_peak_memory"] for result in results
|
||||
]
|
||||
runtime_range = max(runtime_values) - min(runtime_values)
|
||||
memory_range = max(memory_values) - min(memory_values)
|
||||
if runtime_range == 0 or memory_range == 0:
|
||||
return max_mem_budget
|
||||
runtime_norm = (runtime_values - runtime_values.min()) / runtime_range
|
||||
memory_norm = (memory_values - memory_values.min()) / memory_range
|
||||
distances = np.sqrt(runtime_norm**2 + memory_norm**2)
|
||||
knee_index = np.argmin(distances)
|
||||
|
||||
# Normalize values
|
||||
runtime_min = min(runtime_values)
|
||||
memory_min = min(memory_values)
|
||||
runtime_norm = [
|
||||
(value - runtime_min) / runtime_range for value in runtime_values
|
||||
]
|
||||
memory_norm = [(value - memory_min) / memory_range for value in memory_values]
|
||||
# Calculate Euclidean distance
|
||||
distances = [
|
||||
(runtime_norm[i] ** 2 + memory_norm[i] ** 2) ** 0.5
|
||||
for i in range(len(runtime_norm))
|
||||
]
|
||||
# Find the knee point(shortest distance from the origin)
|
||||
knee_index = distances.index(min(distances))
|
||||
return results[knee_index]["memory_budget"]
|
||||
|
Reference in New Issue
Block a user