torch.Package zipfile debugging printer (#52176)

Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/52176

Added tooling to print out zipfile structure for PackageExporter and PackageImporter.

API looks like:
```
exporter.print_file_structure("sss" /*only include files with this in the path*/)
importer3.print_file_structure(False /*don't print storage*/, "sss" /*only include files with this in the path*/)
```

The output looks like this with the storage hidden by default:
```
─── resnet.zip
    ├── .data
    │   ├── extern_modules
    │   └── version
    ├── models
    │   └── models1.pkl
    └── torchvision
        └── models
            ├── resnet.py
            └── utils.py
```
The output looks like this with the storage being printed out:
```
─── resnet_added_attr_test.zip
    ├── .data
    │   ├── 94574437434544.storage
    │   ├── 94574468343696.storage
    │   ├── 94574470147744.storage
    │   ├── 94574470198784.storage
    │   ├── 94574470267968.storage
    │   ├── 94574474917984.storage
    │   ├── extern_modules
    │   └── version
    ├── models
    │   └── models1.pkl
    └── torchvision
        └── models
            ├── resnet.py
            └── utils.py
```

If the output is filtered with the string 'utils' it'd looks like this:
```
─── resnet_added_attr_test.zip
    └── torchvision
        └── models
            └── utils.py
```

Test Plan: Imported from OSS

Reviewed By: suo

Differential Revision: D26429795

Pulled By: Lilyjjo

fbshipit-source-id: 4fa25b0426912f939c7b52cedd6e217672891f21
This commit is contained in:
Lillian Johnson
2021-02-22 15:00:34 -08:00
committed by Facebook GitHub Bot
parent b72a72a477
commit 0bc57f47f0
5 changed files with 209 additions and 41 deletions

View File

@ -82,6 +82,59 @@ the_math = math
self.assertEqual(package_a_i.result, 'package_a')
self.assertIsNot(package_a_i, package_a)
def test_file_structure(self):
filename = self.temp()
export_plain = """\
├── main
│ └── main
├── obj
│ └── obj.pkl
├── package_a
│ ├── __init__.py
│ └── subpackage.py
└── module_a.py
"""
export_include = """\
├── obj
│ └── obj.pkl
└── package_a
└── subpackage.py
"""
import_exclude = """\
├── .data
│ ├── extern_modules
│ └── version
├── main
│ └── main
├── obj
│ └── obj.pkl
├── package_a
│ ├── __init__.py
│ └── subpackage.py
└── module_a.py
"""
with PackageExporter(filename, verbose=False) as he:
import module_a
import package_a
import package_a.subpackage
obj = package_a.subpackage.PackageASubpackageObject()
he.save_module(module_a.__name__)
he.save_module(package_a.__name__)
he.save_pickle('obj', 'obj.pkl', obj)
he.save_text('main', 'main', "my string")
export_file_structure = he.file_structure()
# remove first line from testing because WINDOW/iOS/Unix treat the filename differently
self.assertEqual('\n'.join(str(export_file_structure).split('\n')[1:]), export_plain)
export_file_structure = he.file_structure(include=["**/subpackage.py", "**/*.pkl"])
self.assertEqual('\n'.join(str(export_file_structure).split('\n')[1:]), export_include)
hi = PackageImporter(filename)
import_file_structure = hi.file_structure(exclude="**/*.storage")
self.assertEqual('\n'.join(str(import_file_structure).split('\n')[1:]), import_exclude)
def test_save_module_binary(self):
f = BytesIO()
with PackageExporter(f, verbose=False) as he: