Files
pytorch/torchgen/_autoheuristic/mm/train_decision_mm.py
Xuehai Pan b6bdb67f82 [BE][Easy] use pathlib.Path instead of dirname / ".." / pardir (#129374)
Changes by apply order:

1. Replace all `".."` and `os.pardir` usage with `os.path.dirname(...)`.
2. Replace nested `os.path.dirname(os.path.dirname(...))` call with `str(Path(...).parent.parent)`.
3. Reorder `.absolute()` ~/ `.resolve()`~ and `.parent`: always resolve the path first.

    `.parent{...}.absolute()` -> `.absolute().parent{...}`

4. Replace chained `.parent x N` with `.parents[${N - 1}]`: the code is easier to read (see 5.)

    `.parent.parent.parent.parent` -> `.parents[3]`

5. ~Replace `.parents[${N - 1}]` with `.parents[${N} - 1]`: the code is easier to read and does not introduce any runtime overhead.~

    ~`.parents[3]` -> `.parents[4 - 1]`~

6. ~Replace `.parents[2 - 1]` with `.parent.parent`: because the code is shorter and easier to read.~

Pull Request resolved: https://github.com/pytorch/pytorch/pull/129374
Approved by: https://github.com/justinchuby, https://github.com/malfet
2024-12-29 17:23:13 +00:00

65 lines
1.9 KiB
Python

# mypy: ignore-errors
import sys
from pathlib import Path
import pandas as pd # type: ignore[import-untyped]
sys.path.append(str(Path(__file__).absolute().parents[1]))
from train_decision import AHTrainDecisionTree
from torch._inductor.autoheuristic.autoheuristic_utils import mm_operations
class AHTrainDecisionTreeMM(AHTrainDecisionTree):
def __init__(self):
super().__init__()
def add_new_features(self, results):
ops = mm_operations()
added_categorical_features = []
for op in ops:
results[op.name] = results.apply(op.func, axis=1)
if op.is_categorical:
added_categorical_features.append(op.name)
return (results, added_categorical_features)
def get_default_config(self, row):
return "extern_mm"
def get_allowed_wrong_prediction_pct(self):
return 1.0
def get_test_and_val_size(self):
return (0.01, 0.19)
def get_grid_search_values(self):
return {"max_depth": [5], "min_samples_leaf": [0.01], "criterion": ["entropy"]}
def add_training_data(self, df_train, datasets):
# add each dataset to the training data 3 times
# we really want to make sure that the heuristic performs well on these datasets
df_timm_train = datasets["train_timm"]
df_timm_train = df_timm_train.loc[df_timm_train.index.repeat(3)].reset_index(
drop=True
)
df_hf_train = datasets["train_hf"]
df_hf_train = df_hf_train.loc[df_hf_train.index.repeat(3)].reset_index(
drop=True
)
df_train = datasets["train"]
df_train = pd.concat(
[df_train, df_timm_train, df_hf_train],
ignore_index=True,
)
return df_train
def ranking_always_included_choices(self):
return ["extern_mm"]
if __name__ == "__main__":
train = AHTrainDecisionTreeMM()
train.generate_heuristic()