diff --git a/.github/workflows/requirements-dev.txt b/.github/workflows/requirements-dev.txt new file mode 100644 index 0000000..42bce9e --- /dev/null +++ b/.github/workflows/requirements-dev.txt @@ -0,0 +1,5 @@ +# Copyright (c) 2023 Mitsubishi Electric Research Laboratories (MERL) +# +# SPDX-License-Identifier: AGPL-3.0-or-later + +pre-commit diff --git a/.github/workflows/static_checks.yaml b/.github/workflows/static_checks.yaml new file mode 100644 index 0000000..885bd18 --- /dev/null +++ b/.github/workflows/static_checks.yaml @@ -0,0 +1,76 @@ +# Copyright (c) 2024 Mitsubishi Electric Research Laboratories (MERL) +# +# SPDX-License-Identifier: AGPL-3.0-or-later + +name: Static code checks + +on: # yamllint disable-line rule:truthy + pull_request: + push: + branches: + - '**' + tags-ignore: + - '**' + +env: + LICENSE: AGPL-3.0-or-later + FETCH_DEPTH: 1 + FULL_HISTORY: 0 + SKIP_WORD_PRESENCE_CHECK: 0 + +jobs: + static-code-check: + if: endsWith(github.event.repository.name, 'private') + + name: Run static code checks + runs-on: ubuntu-latest + defaults: + run: + shell: bash -l {0} + + steps: + - name: Setup history + if: github.ref == 'refs/heads/oss' + run: | + echo "FETCH_DEPTH=0" >> $GITHUB_ENV + echo "FULL_HISTORY=1" >> $GITHUB_ENV + + - name: Setup version + if: github.ref == 'refs/heads/melco' + run: | + echo "SKIP_WORD_PRESENCE_CHECK=1" >> $GITHUB_ENV + + - name: Check out code + uses: actions/checkout@v3 + with: + fetch-depth: ${{ env.FETCH_DEPTH }} # '0' to check full history + + - name: Set up environment + run: git config user.email github-bot@merl.com + + - name: Set up python + uses: actions/setup-python@v4 + with: + python-version: 3 + cache: 'pip' + cache-dependency-path: '.github/workflows/requirements-dev.txt' + + - name: Install python packages + run: pip install -r .github/workflows/requirements-dev.txt + + - name: Ensure lint and pre-commit steps have been run + uses: pre-commit/action@v3.0.0 + + - name: Check files + uses: merl-oss-private/merl-file-check-action@v1 + with: + license: ${{ env.LICENSE }} + full-history: ${{ env.FULL_HISTORY }} # If true, use fetch-depth 0 above + skip-word-presence-check: ${{ env.SKIP_WORD_PRESENCE_CHECK }} + + - name: Check license compatibility + if: github.ref != 'refs/heads/melco' + uses: merl-oss-private/merl_license_compatibility_checker@v1 + with: + input-filename: environment.yml + license: ${{ env.LICENSE }} diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..5be1dcb --- /dev/null +++ b/.gitignore @@ -0,0 +1,12 @@ +# Copyright (C) 2024 Mitsubishi Electric Research Laboratories (MERL) +# +# SPDX-License-Identifier: AGPL-3.0-or-later +.DS_Store +.idea/ +*pycache* +checkpoints/ +logs/ +outputs/ +pre_trained/ +*.pyc +*.pdf diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..b0dd40e --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,64 @@ +# Copyright (c) 2024 Mitsubishi Electric Research Laboratories (MERL) +# +# SPDX-License-Identifier: AGPL-3.0-or-later +# +# Pre-commit configuration. See https://pre-commit.com + +default_language_version: + python: python3 + +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.4.0 + hooks: + - id: end-of-file-fixer + - id: trailing-whitespace + - id: check-yaml + - id: check-added-large-files + args: ['--maxkb=5000'] + + - repo: https://gitlab.com/bmares/check-json5 + rev: v1.0.0 + hooks: + - id: check-json5 + + - repo: https://github.com/homebysix/pre-commit-macadmin + rev: v1.12.3 + hooks: + - id: check-git-config-email + args: ['--domains', 'merl.com'] + + - repo: https://github.com/psf/black + rev: 22.12.0 + hooks: + - id: black + args: + - --line-length=120 + + - repo: https://github.com/pycqa/isort + rev: 5.12.0 + hooks: + - id: isort + args: ["--profile", "black", "--filter-files", "--line-length", "120", "--skip-gitignore"] + + # Uncomment to use pyupgrade (https://github.com/asottile/pyupgrade) to automatically upgrade syntax for newer python + # - repo: https://github.com/asottile/pyupgrade + # rev: v3.3.1 + # hooks: + # - id: pyupgrade + + # To stop flake8 error from causing a failure, use --exit-zero. By default, pre-commit will not show the warnings, + # so use verbose: true to see them. + - repo: https://github.com/pycqa/flake8 + rev: 5.0.4 + hooks: + - id: flake8 + # Black compatibility, Eradicate options + args: ["--max-line-length=120", "--extend-ignore=E203", + "--eradicate-whitelist-extend", "eradicate:\\s*no", + "--exit-zero"] + verbose: true + additional_dependencies: [ + # https://github.com/myint/eradicate, https://github.com/wemake-services/flake8-eradicate + "flake8-eradicate" + ] diff --git a/.reuse/dep5 b/.reuse/dep5 new file mode 100644 index 0000000..e36be38 --- /dev/null +++ b/.reuse/dep5 @@ -0,0 +1,9 @@ +Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ + +Files: images/pipeline.png +Copyright: 2024 Mitsubishi Electric Research Laboratories (MERL) +License: AGPL-3.0-or-later + +Files: scripts/input_pose.json +Copyright: 2024 Mitsubishi Electric Research Laboratories (MERL) +License: AGPL-3.0-or-later diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..95eb1e8 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,9 @@ + +# Contributing + +Sorry, but we do not currently accept contributions in the form of pull requests +to this repository. However, you are welcome to post issues (bug reports, feature requests, questions, etc). diff --git a/Install.sh b/Install.sh new file mode 100644 index 0000000..a696bb7 --- /dev/null +++ b/Install.sh @@ -0,0 +1,11 @@ +#!/bin/bash +# Copyright (c) 2024 Mitsubishi Electric Research Laboratories (MERL) +# +# SPDX-License-Identifier: AGPL-3.0-or-later + +conda install pytorch==1.12.1 torchvision==0.13.1 torchaudio==0.12.1 cudatoolkit=11.6 -c pytorch -c conda-forge +pip install setuptools==59.5.0 kornia==0.7.1 torchmetrics==0.11.4 timm matplotlib dearpygui hydra-core==1.1.1 imageio iopath==0.1.9 lpips==0.1.4 omegaconf==2.1.1 opencv-python plyfile gdown segment-anything-hq +conda install -c fvcore -c iopath -c conda-forge fvcore iopath +conda install pytorch3d -c pytorch3d +pip install torch_geometric +pip install pytorch-lightning==1.7.6 diff --git a/LICENSE b/LICENSE deleted file mode 100644 index 0ad25db..0000000 --- a/LICENSE +++ /dev/null @@ -1,661 +0,0 @@ - GNU AFFERO GENERAL PUBLIC LICENSE - Version 3, 19 November 2007 - - Copyright (C) 2007 Free Software Foundation, Inc. - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - - Preamble - - The GNU Affero General Public License is a free, copyleft license for -software and other kinds of works, specifically designed to ensure -cooperation with the community in the case of network server software. - - The licenses for most software and other practical works are designed -to take away your freedom to share and change the works. By contrast, -our General Public Licenses are intended to guarantee your freedom to -share and change all versions of a program--to make sure it remains free -software for all its users. - - When we speak of free software, we are referring to freedom, not -price. Our General Public Licenses are designed to make sure that you -have the freedom to distribute copies of free software (and charge for -them if you wish), that you receive source code or can get it if you -want it, that you can change the software or use pieces of it in new -free programs, and that you know you can do these things. - - Developers that use our General Public Licenses protect your rights -with two steps: (1) assert copyright on the software, and (2) offer -you this License which gives you legal permission to copy, distribute -and/or modify the software. - - A secondary benefit of defending all users' freedom is that -improvements made in alternate versions of the program, if they -receive widespread use, become available for other developers to -incorporate. Many developers of free software are heartened and -encouraged by the resulting cooperation. However, in the case of -software used on network servers, this result may fail to come about. -The GNU General Public License permits making a modified version and -letting the public access it on a server without ever releasing its -source code to the public. - - The GNU Affero General Public License is designed specifically to -ensure that, in such cases, the modified source code becomes available -to the community. It requires the operator of a network server to -provide the source code of the modified version running there to the -users of that server. Therefore, public use of a modified version, on -a publicly accessible server, gives the public access to the source -code of the modified version. - - An older license, called the Affero General Public License and -published by Affero, was designed to accomplish similar goals. This is -a different license, not a version of the Affero GPL, but Affero has -released a new version of the Affero GPL which permits relicensing under -this license. - - The precise terms and conditions for copying, distribution and -modification follow. - - TERMS AND CONDITIONS - - 0. Definitions. - - "This License" refers to version 3 of the GNU Affero General Public License. - - "Copyright" also means copyright-like laws that apply to other kinds of -works, such as semiconductor masks. - - "The Program" refers to any copyrightable work licensed under this -License. Each licensee is addressed as "you". "Licensees" and -"recipients" may be individuals or organizations. - - To "modify" a work means to copy from or adapt all or part of the work -in a fashion requiring copyright permission, other than the making of an -exact copy. The resulting work is called a "modified version" of the -earlier work or a work "based on" the earlier work. - - A "covered work" means either the unmodified Program or a work based -on the Program. - - To "propagate" a work means to do anything with it that, without -permission, would make you directly or secondarily liable for -infringement under applicable copyright law, except executing it on a -computer or modifying a private copy. Propagation includes copying, -distribution (with or without modification), making available to the -public, and in some countries other activities as well. - - To "convey" a work means any kind of propagation that enables other -parties to make or receive copies. Mere interaction with a user through -a computer network, with no transfer of a copy, is not conveying. - - An interactive user interface displays "Appropriate Legal Notices" -to the extent that it includes a convenient and prominently visible -feature that (1) displays an appropriate copyright notice, and (2) -tells the user that there is no warranty for the work (except to the -extent that warranties are provided), that licensees may convey the -work under this License, and how to view a copy of this License. If -the interface presents a list of user commands or options, such as a -menu, a prominent item in the list meets this criterion. - - 1. Source Code. - - The "source code" for a work means the preferred form of the work -for making modifications to it. "Object code" means any non-source -form of a work. - - A "Standard Interface" means an interface that either is an official -standard defined by a recognized standards body, or, in the case of -interfaces specified for a particular programming language, one that -is widely used among developers working in that language. - - The "System Libraries" of an executable work include anything, other -than the work as a whole, that (a) is included in the normal form of -packaging a Major Component, but which is not part of that Major -Component, and (b) serves only to enable use of the work with that -Major Component, or to implement a Standard Interface for which an -implementation is available to the public in source code form. A -"Major Component", in this context, means a major essential component -(kernel, window system, and so on) of the specific operating system -(if any) on which the executable work runs, or a compiler used to -produce the work, or an object code interpreter used to run it. - - The "Corresponding Source" for a work in object code form means all -the source code needed to generate, install, and (for an executable -work) run the object code and to modify the work, including scripts to -control those activities. However, it does not include the work's -System Libraries, or general-purpose tools or generally available free -programs which are used unmodified in performing those activities but -which are not part of the work. For example, Corresponding Source -includes interface definition files associated with source files for -the work, and the source code for shared libraries and dynamically -linked subprograms that the work is specifically designed to require, -such as by intimate data communication or control flow between those -subprograms and other parts of the work. - - The Corresponding Source need not include anything that users -can regenerate automatically from other parts of the Corresponding -Source. - - The Corresponding Source for a work in source code form is that -same work. - - 2. Basic Permissions. - - All rights granted under this License are granted for the term of -copyright on the Program, and are irrevocable provided the stated -conditions are met. This License explicitly affirms your unlimited -permission to run the unmodified Program. The output from running a -covered work is covered by this License only if the output, given its -content, constitutes a covered work. This License acknowledges your -rights of fair use or other equivalent, as provided by copyright law. - - You may make, run and propagate covered works that you do not -convey, without conditions so long as your license otherwise remains -in force. You may convey covered works to others for the sole purpose -of having them make modifications exclusively for you, or provide you -with facilities for running those works, provided that you comply with -the terms of this License in conveying all material for which you do -not control copyright. Those thus making or running the covered works -for you must do so exclusively on your behalf, under your direction -and control, on terms that prohibit them from making any copies of -your copyrighted material outside their relationship with you. - - Conveying under any other circumstances is permitted solely under -the conditions stated below. Sublicensing is not allowed; section 10 -makes it unnecessary. - - 3. Protecting Users' Legal Rights From Anti-Circumvention Law. - - No covered work shall be deemed part of an effective technological -measure under any applicable law fulfilling obligations under article -11 of the WIPO copyright treaty adopted on 20 December 1996, or -similar laws prohibiting or restricting circumvention of such -measures. - - When you convey a covered work, you waive any legal power to forbid -circumvention of technological measures to the extent such circumvention -is effected by exercising rights under this License with respect to -the covered work, and you disclaim any intention to limit operation or -modification of the work as a means of enforcing, against the work's -users, your or third parties' legal rights to forbid circumvention of -technological measures. - - 4. Conveying Verbatim Copies. - - You may convey verbatim copies of the Program's source code as you -receive it, in any medium, provided that you conspicuously and -appropriately publish on each copy an appropriate copyright notice; -keep intact all notices stating that this License and any -non-permissive terms added in accord with section 7 apply to the code; -keep intact all notices of the absence of any warranty; and give all -recipients a copy of this License along with the Program. - - You may charge any price or no price for each copy that you convey, -and you may offer support or warranty protection for a fee. - - 5. Conveying Modified Source Versions. - - You may convey a work based on the Program, or the modifications to -produce it from the Program, in the form of source code under the -terms of section 4, provided that you also meet all of these conditions: - - a) The work must carry prominent notices stating that you modified - it, and giving a relevant date. - - b) The work must carry prominent notices stating that it is - released under this License and any conditions added under section - 7. This requirement modifies the requirement in section 4 to - "keep intact all notices". - - c) You must license the entire work, as a whole, under this - License to anyone who comes into possession of a copy. This - License will therefore apply, along with any applicable section 7 - additional terms, to the whole of the work, and all its parts, - regardless of how they are packaged. This License gives no - permission to license the work in any other way, but it does not - invalidate such permission if you have separately received it. - - d) If the work has interactive user interfaces, each must display - Appropriate Legal Notices; however, if the Program has interactive - interfaces that do not display Appropriate Legal Notices, your - work need not make them do so. - - A compilation of a covered work with other separate and independent -works, which are not by their nature extensions of the covered work, -and which are not combined with it such as to form a larger program, -in or on a volume of a storage or distribution medium, is called an -"aggregate" if the compilation and its resulting copyright are not -used to limit the access or legal rights of the compilation's users -beyond what the individual works permit. Inclusion of a covered work -in an aggregate does not cause this License to apply to the other -parts of the aggregate. - - 6. Conveying Non-Source Forms. - - You may convey a covered work in object code form under the terms -of sections 4 and 5, provided that you also convey the -machine-readable Corresponding Source under the terms of this License, -in one of these ways: - - a) Convey the object code in, or embodied in, a physical product - (including a physical distribution medium), accompanied by the - Corresponding Source fixed on a durable physical medium - customarily used for software interchange. - - b) Convey the object code in, or embodied in, a physical product - (including a physical distribution medium), accompanied by a - written offer, valid for at least three years and valid for as - long as you offer spare parts or customer support for that product - model, to give anyone who possesses the object code either (1) a - copy of the Corresponding Source for all the software in the - product that is covered by this License, on a durable physical - medium customarily used for software interchange, for a price no - more than your reasonable cost of physically performing this - conveying of source, or (2) access to copy the - Corresponding Source from a network server at no charge. - - c) Convey individual copies of the object code with a copy of the - written offer to provide the Corresponding Source. This - alternative is allowed only occasionally and noncommercially, and - only if you received the object code with such an offer, in accord - with subsection 6b. - - d) Convey the object code by offering access from a designated - place (gratis or for a charge), and offer equivalent access to the - Corresponding Source in the same way through the same place at no - further charge. You need not require recipients to copy the - Corresponding Source along with the object code. If the place to - copy the object code is a network server, the Corresponding Source - may be on a different server (operated by you or a third party) - that supports equivalent copying facilities, provided you maintain - clear directions next to the object code saying where to find the - Corresponding Source. Regardless of what server hosts the - Corresponding Source, you remain obligated to ensure that it is - available for as long as needed to satisfy these requirements. - - e) Convey the object code using peer-to-peer transmission, provided - you inform other peers where the object code and Corresponding - Source of the work are being offered to the general public at no - charge under subsection 6d. - - A separable portion of the object code, whose source code is excluded -from the Corresponding Source as a System Library, need not be -included in conveying the object code work. - - A "User Product" is either (1) a "consumer product", which means any -tangible personal property which is normally used for personal, family, -or household purposes, or (2) anything designed or sold for incorporation -into a dwelling. In determining whether a product is a consumer product, -doubtful cases shall be resolved in favor of coverage. For a particular -product received by a particular user, "normally used" refers to a -typical or common use of that class of product, regardless of the status -of the particular user or of the way in which the particular user -actually uses, or expects or is expected to use, the product. A product -is a consumer product regardless of whether the product has substantial -commercial, industrial or non-consumer uses, unless such uses represent -the only significant mode of use of the product. - - "Installation Information" for a User Product means any methods, -procedures, authorization keys, or other information required to install -and execute modified versions of a covered work in that User Product from -a modified version of its Corresponding Source. The information must -suffice to ensure that the continued functioning of the modified object -code is in no case prevented or interfered with solely because -modification has been made. - - If you convey an object code work under this section in, or with, or -specifically for use in, a User Product, and the conveying occurs as -part of a transaction in which the right of possession and use of the -User Product is transferred to the recipient in perpetuity or for a -fixed term (regardless of how the transaction is characterized), the -Corresponding Source conveyed under this section must be accompanied -by the Installation Information. But this requirement does not apply -if neither you nor any third party retains the ability to install -modified object code on the User Product (for example, the work has -been installed in ROM). - - The requirement to provide Installation Information does not include a -requirement to continue to provide support service, warranty, or updates -for a work that has been modified or installed by the recipient, or for -the User Product in which it has been modified or installed. Access to a -network may be denied when the modification itself materially and -adversely affects the operation of the network or violates the rules and -protocols for communication across the network. - - Corresponding Source conveyed, and Installation Information provided, -in accord with this section must be in a format that is publicly -documented (and with an implementation available to the public in -source code form), and must require no special password or key for -unpacking, reading or copying. - - 7. Additional Terms. - - "Additional permissions" are terms that supplement the terms of this -License by making exceptions from one or more of its conditions. -Additional permissions that are applicable to the entire Program shall -be treated as though they were included in this License, to the extent -that they are valid under applicable law. If additional permissions -apply only to part of the Program, that part may be used separately -under those permissions, but the entire Program remains governed by -this License without regard to the additional permissions. - - When you convey a copy of a covered work, you may at your option -remove any additional permissions from that copy, or from any part of -it. (Additional permissions may be written to require their own -removal in certain cases when you modify the work.) You may place -additional permissions on material, added by you to a covered work, -for which you have or can give appropriate copyright permission. - - Notwithstanding any other provision of this License, for material you -add to a covered work, you may (if authorized by the copyright holders of -that material) supplement the terms of this License with terms: - - a) Disclaiming warranty or limiting liability differently from the - terms of sections 15 and 16 of this License; or - - b) Requiring preservation of specified reasonable legal notices or - author attributions in that material or in the Appropriate Legal - Notices displayed by works containing it; or - - c) Prohibiting misrepresentation of the origin of that material, or - requiring that modified versions of such material be marked in - reasonable ways as different from the original version; or - - d) Limiting the use for publicity purposes of names of licensors or - authors of the material; or - - e) Declining to grant rights under trademark law for use of some - trade names, trademarks, or service marks; or - - f) Requiring indemnification of licensors and authors of that - material by anyone who conveys the material (or modified versions of - it) with contractual assumptions of liability to the recipient, for - any liability that these contractual assumptions directly impose on - those licensors and authors. - - All other non-permissive additional terms are considered "further -restrictions" within the meaning of section 10. If the Program as you -received it, or any part of it, contains a notice stating that it is -governed by this License along with a term that is a further -restriction, you may remove that term. If a license document contains -a further restriction but permits relicensing or conveying under this -License, you may add to a covered work material governed by the terms -of that license document, provided that the further restriction does -not survive such relicensing or conveying. - - If you add terms to a covered work in accord with this section, you -must place, in the relevant source files, a statement of the -additional terms that apply to those files, or a notice indicating -where to find the applicable terms. - - Additional terms, permissive or non-permissive, may be stated in the -form of a separately written license, or stated as exceptions; -the above requirements apply either way. - - 8. Termination. - - You may not propagate or modify a covered work except as expressly -provided under this License. Any attempt otherwise to propagate or -modify it is void, and will automatically terminate your rights under -this License (including any patent licenses granted under the third -paragraph of section 11). - - However, if you cease all violation of this License, then your -license from a particular copyright holder is reinstated (a) -provisionally, unless and until the copyright holder explicitly and -finally terminates your license, and (b) permanently, if the copyright -holder fails to notify you of the violation by some reasonable means -prior to 60 days after the cessation. - - Moreover, your license from a particular copyright holder is -reinstated permanently if the copyright holder notifies you of the -violation by some reasonable means, this is the first time you have -received notice of violation of this License (for any work) from that -copyright holder, and you cure the violation prior to 30 days after -your receipt of the notice. - - Termination of your rights under this section does not terminate the -licenses of parties who have received copies or rights from you under -this License. If your rights have been terminated and not permanently -reinstated, you do not qualify to receive new licenses for the same -material under section 10. - - 9. Acceptance Not Required for Having Copies. - - You are not required to accept this License in order to receive or -run a copy of the Program. Ancillary propagation of a covered work -occurring solely as a consequence of using peer-to-peer transmission -to receive a copy likewise does not require acceptance. However, -nothing other than this License grants you permission to propagate or -modify any covered work. These actions infringe copyright if you do -not accept this License. Therefore, by modifying or propagating a -covered work, you indicate your acceptance of this License to do so. - - 10. Automatic Licensing of Downstream Recipients. - - Each time you convey a covered work, the recipient automatically -receives a license from the original licensors, to run, modify and -propagate that work, subject to this License. You are not responsible -for enforcing compliance by third parties with this License. - - An "entity transaction" is a transaction transferring control of an -organization, or substantially all assets of one, or subdividing an -organization, or merging organizations. If propagation of a covered -work results from an entity transaction, each party to that -transaction who receives a copy of the work also receives whatever -licenses to the work the party's predecessor in interest had or could -give under the previous paragraph, plus a right to possession of the -Corresponding Source of the work from the predecessor in interest, if -the predecessor has it or can get it with reasonable efforts. - - You may not impose any further restrictions on the exercise of the -rights granted or affirmed under this License. For example, you may -not impose a license fee, royalty, or other charge for exercise of -rights granted under this License, and you may not initiate litigation -(including a cross-claim or counterclaim in a lawsuit) alleging that -any patent claim is infringed by making, using, selling, offering for -sale, or importing the Program or any portion of it. - - 11. Patents. - - A "contributor" is a copyright holder who authorizes use under this -License of the Program or a work on which the Program is based. The -work thus licensed is called the contributor's "contributor version". - - A contributor's "essential patent claims" are all patent claims -owned or controlled by the contributor, whether already acquired or -hereafter acquired, that would be infringed by some manner, permitted -by this License, of making, using, or selling its contributor version, -but do not include claims that would be infringed only as a -consequence of further modification of the contributor version. For -purposes of this definition, "control" includes the right to grant -patent sublicenses in a manner consistent with the requirements of -this License. - - Each contributor grants you a non-exclusive, worldwide, royalty-free -patent license under the contributor's essential patent claims, to -make, use, sell, offer for sale, import and otherwise run, modify and -propagate the contents of its contributor version. - - In the following three paragraphs, a "patent license" is any express -agreement or commitment, however denominated, not to enforce a patent -(such as an express permission to practice a patent or covenant not to -sue for patent infringement). To "grant" such a patent license to a -party means to make such an agreement or commitment not to enforce a -patent against the party. - - If you convey a covered work, knowingly relying on a patent license, -and the Corresponding Source of the work is not available for anyone -to copy, free of charge and under the terms of this License, through a -publicly available network server or other readily accessible means, -then you must either (1) cause the Corresponding Source to be so -available, or (2) arrange to deprive yourself of the benefit of the -patent license for this particular work, or (3) arrange, in a manner -consistent with the requirements of this License, to extend the patent -license to downstream recipients. "Knowingly relying" means you have -actual knowledge that, but for the patent license, your conveying the -covered work in a country, or your recipient's use of the covered work -in a country, would infringe one or more identifiable patents in that -country that you have reason to believe are valid. - - If, pursuant to or in connection with a single transaction or -arrangement, you convey, or propagate by procuring conveyance of, a -covered work, and grant a patent license to some of the parties -receiving the covered work authorizing them to use, propagate, modify -or convey a specific copy of the covered work, then the patent license -you grant is automatically extended to all recipients of the covered -work and works based on it. - - A patent license is "discriminatory" if it does not include within -the scope of its coverage, prohibits the exercise of, or is -conditioned on the non-exercise of one or more of the rights that are -specifically granted under this License. You may not convey a covered -work if you are a party to an arrangement with a third party that is -in the business of distributing software, under which you make payment -to the third party based on the extent of your activity of conveying -the work, and under which the third party grants, to any of the -parties who would receive the covered work from you, a discriminatory -patent license (a) in connection with copies of the covered work -conveyed by you (or copies made from those copies), or (b) primarily -for and in connection with specific products or compilations that -contain the covered work, unless you entered into that arrangement, -or that patent license was granted, prior to 28 March 2007. - - Nothing in this License shall be construed as excluding or limiting -any implied license or other defenses to infringement that may -otherwise be available to you under applicable patent law. - - 12. No Surrender of Others' Freedom. - - If conditions are imposed on you (whether by court order, agreement or -otherwise) that contradict the conditions of this License, they do not -excuse you from the conditions of this License. If you cannot convey a -covered work so as to satisfy simultaneously your obligations under this -License and any other pertinent obligations, then as a consequence you may -not convey it at all. For example, if you agree to terms that obligate you -to collect a royalty for further conveying from those to whom you convey -the Program, the only way you could satisfy both those terms and this -License would be to refrain entirely from conveying the Program. - - 13. Remote Network Interaction; Use with the GNU General Public License. - - Notwithstanding any other provision of this License, if you modify the -Program, your modified version must prominently offer all users -interacting with it remotely through a computer network (if your version -supports such interaction) an opportunity to receive the Corresponding -Source of your version by providing access to the Corresponding Source -from a network server at no charge, through some standard or customary -means of facilitating copying of software. This Corresponding Source -shall include the Corresponding Source for any work covered by version 3 -of the GNU General Public License that is incorporated pursuant to the -following paragraph. - - Notwithstanding any other provision of this License, you have -permission to link or combine any covered work with a work licensed -under version 3 of the GNU General Public License into a single -combined work, and to convey the resulting work. The terms of this -License will continue to apply to the part which is the covered work, -but the work with which it is combined will remain governed by version -3 of the GNU General Public License. - - 14. Revised Versions of this License. - - The Free Software Foundation may publish revised and/or new versions of -the GNU Affero General Public License from time to time. Such new versions -will be similar in spirit to the present version, but may differ in detail to -address new problems or concerns. - - Each version is given a distinguishing version number. If the -Program specifies that a certain numbered version of the GNU Affero General -Public License "or any later version" applies to it, you have the -option of following the terms and conditions either of that numbered -version or of any later version published by the Free Software -Foundation. If the Program does not specify a version number of the -GNU Affero General Public License, you may choose any version ever published -by the Free Software Foundation. - - If the Program specifies that a proxy can decide which future -versions of the GNU Affero General Public License can be used, that proxy's -public statement of acceptance of a version permanently authorizes you -to choose that version for the Program. - - Later license versions may give you additional or different -permissions. However, no additional obligations are imposed on any -author or copyright holder as a result of your choosing to follow a -later version. - - 15. Disclaimer of Warranty. - - THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY -APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT -HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY -OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, -THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM -IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF -ALL NECESSARY SERVICING, REPAIR OR CORRECTION. - - 16. Limitation of Liability. - - IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING -WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS -THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY -GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE -USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF -DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD -PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), -EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF -SUCH DAMAGES. - - 17. Interpretation of Sections 15 and 16. - - If the disclaimer of warranty and limitation of liability provided -above cannot be given local legal effect according to their terms, -reviewing courts shall apply local law that most closely approximates -an absolute waiver of all civil liability in connection with the -Program, unless a warranty or assumption of liability accompanies a -copy of the Program in return for a fee. - - END OF TERMS AND CONDITIONS - - How to Apply These Terms to Your New Programs - - If you develop a new program, and you want it to be of the greatest -possible use to the public, the best way to achieve this is to make it -free software which everyone can redistribute and change under these terms. - - To do so, attach the following notices to the program. It is safest -to attach them to the start of each source file to most effectively -state the exclusion of warranty; and each file should have at least -the "copyright" line and a pointer to where the full notice is found. - - - Copyright (C) - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published - by the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see . - -Also add information on how to contact you by electronic and paper mail. - - If your software can interact with users remotely through a computer -network, you should also make sure that it provides a way for users to -get its source. For example, if your program is a web application, its -interface could display a "Source" link that leads users to an archive -of the code. There are many ways you could offer source, and different -solutions will be better for different programs; see section 13 for the -specific requirements. - - You should also get your employer (if you work as a programmer) or school, -if any, to sign a "copyright disclaimer" for the program, if necessary. -For more information on this, and how to apply and follow the GNU AGPL, see -. diff --git a/LICENSE.md b/LICENSE.md new file mode 100644 index 0000000..cba6f6a --- /dev/null +++ b/LICENSE.md @@ -0,0 +1,660 @@ +### GNU AFFERO GENERAL PUBLIC LICENSE + +Version 3, 19 November 2007 + +Copyright (C) 2007 Free Software Foundation, Inc. + + +Everyone is permitted to copy and distribute verbatim copies of this +license document, but changing it is not allowed. + +### Preamble + +The GNU Affero General Public License is a free, copyleft license for +software and other kinds of works, specifically designed to ensure +cooperation with the community in the case of network server software. + +The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +our General Public Licenses are intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains +free software for all its users. + +When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + +Developers that use our General Public Licenses protect your rights +with two steps: (1) assert copyright on the software, and (2) offer +you this License which gives you legal permission to copy, distribute +and/or modify the software. + +A secondary benefit of defending all users' freedom is that +improvements made in alternate versions of the program, if they +receive widespread use, become available for other developers to +incorporate. Many developers of free software are heartened and +encouraged by the resulting cooperation. However, in the case of +software used on network servers, this result may fail to come about. +The GNU General Public License permits making a modified version and +letting the public access it on a server without ever releasing its +source code to the public. + +The GNU Affero General Public License is designed specifically to +ensure that, in such cases, the modified source code becomes available +to the community. It requires the operator of a network server to +provide the source code of the modified version running there to the +users of that server. Therefore, public use of a modified version, on +a publicly accessible server, gives the public access to the source +code of the modified version. + +An older license, called the Affero General Public License and +published by Affero, was designed to accomplish similar goals. This is +a different license, not a version of the Affero GPL, but Affero has +released a new version of the Affero GPL which permits relicensing +under this license. + +The precise terms and conditions for copying, distribution and +modification follow. + +### TERMS AND CONDITIONS + +#### 0. Definitions. + +"This License" refers to version 3 of the GNU Affero General Public +License. + +"Copyright" also means copyright-like laws that apply to other kinds +of works, such as semiconductor masks. + +"The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + +To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of +an exact copy. The resulting work is called a "modified version" of +the earlier work or a work "based on" the earlier work. + +A "covered work" means either the unmodified Program or a work based +on the Program. + +To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + +To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user +through a computer network, with no transfer of a copy, is not +conveying. + +An interactive user interface displays "Appropriate Legal Notices" to +the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + +#### 1. Source Code. + +The "source code" for a work means the preferred form of the work for +making modifications to it. "Object code" means any non-source form of +a work. + +A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + +The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + +The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + +The Corresponding Source need not include anything that users can +regenerate automatically from other parts of the Corresponding Source. + +The Corresponding Source for a work in source code form is that same +work. + +#### 2. Basic Permissions. + +All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + +You may make, run and propagate covered works that you do not convey, +without conditions so long as your license otherwise remains in force. +You may convey covered works to others for the sole purpose of having +them make modifications exclusively for you, or provide you with +facilities for running those works, provided that you comply with the +terms of this License in conveying all material for which you do not +control copyright. Those thus making or running the covered works for +you must do so exclusively on your behalf, under your direction and +control, on terms that prohibit them from making any copies of your +copyrighted material outside their relationship with you. + +Conveying under any other circumstances is permitted solely under the +conditions stated below. Sublicensing is not allowed; section 10 makes +it unnecessary. + +#### 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + +No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + +When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such +circumvention is effected by exercising rights under this License with +respect to the covered work, and you disclaim any intention to limit +operation or modification of the work as a means of enforcing, against +the work's users, your or third parties' legal rights to forbid +circumvention of technological measures. + +#### 4. Conveying Verbatim Copies. + +You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + +You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + +#### 5. Conveying Modified Source Versions. + +You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these +conditions: + +- a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. +- b) The work must carry prominent notices stating that it is + released under this License and any conditions added under + section 7. This requirement modifies the requirement in section 4 + to "keep intact all notices". +- c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. +- d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + +A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + +#### 6. Conveying Non-Source Forms. + +You may convey a covered work in object code form under the terms of +sections 4 and 5, provided that you also convey the machine-readable +Corresponding Source under the terms of this License, in one of these +ways: + +- a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. +- b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the Corresponding + Source from a network server at no charge. +- c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. +- d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. +- e) Convey the object code using peer-to-peer transmission, + provided you inform other peers where the object code and + Corresponding Source of the work are being offered to the general + public at no charge under subsection 6d. + +A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + +A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, +family, or household purposes, or (2) anything designed or sold for +incorporation into a dwelling. In determining whether a product is a +consumer product, doubtful cases shall be resolved in favor of +coverage. For a particular product received by a particular user, +"normally used" refers to a typical or common use of that class of +product, regardless of the status of the particular user or of the way +in which the particular user actually uses, or expects or is expected +to use, the product. A product is a consumer product regardless of +whether the product has substantial commercial, industrial or +non-consumer uses, unless such uses represent the only significant +mode of use of the product. + +"Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to +install and execute modified versions of a covered work in that User +Product from a modified version of its Corresponding Source. The +information must suffice to ensure that the continued functioning of +the modified object code is in no case prevented or interfered with +solely because modification has been made. + +If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + +The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or +updates for a work that has been modified or installed by the +recipient, or for the User Product in which it has been modified or +installed. Access to a network may be denied when the modification +itself materially and adversely affects the operation of the network +or violates the rules and protocols for communication across the +network. + +Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + +#### 7. Additional Terms. + +"Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + +When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + +Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders +of that material) supplement the terms of this License with terms: + +- a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or +- b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or +- c) Prohibiting misrepresentation of the origin of that material, + or requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or +- d) Limiting the use for publicity purposes of names of licensors + or authors of the material; or +- e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or +- f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions + of it) with contractual assumptions of liability to the recipient, + for any liability that these contractual assumptions directly + impose on those licensors and authors. + +All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + +If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + +Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; the +above requirements apply either way. + +#### 8. Termination. + +You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + +However, if you cease all violation of this License, then your license +from a particular copyright holder is reinstated (a) provisionally, +unless and until the copyright holder explicitly and finally +terminates your license, and (b) permanently, if the copyright holder +fails to notify you of the violation by some reasonable means prior to +60 days after the cessation. + +Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + +Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + +#### 9. Acceptance Not Required for Having Copies. + +You are not required to accept this License in order to receive or run +a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + +#### 10. Automatic Licensing of Downstream Recipients. + +Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + +An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + +You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + +#### 11. Patents. + +A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + +A contributor's "essential patent claims" are all patent claims owned +or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + +Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + +In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + +If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + +If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + +A patent license is "discriminatory" if it does not include within the +scope of its coverage, prohibits the exercise of, or is conditioned on +the non-exercise of one or more of the rights that are specifically +granted under this License. You may not convey a covered work if you +are a party to an arrangement with a third party that is in the +business of distributing software, under which you make payment to the +third party based on the extent of your activity of conveying the +work, and under which the third party grants, to any of the parties +who would receive the covered work from you, a discriminatory patent +license (a) in connection with copies of the covered work conveyed by +you (or copies made from those copies), or (b) primarily for and in +connection with specific products or compilations that contain the +covered work, unless you entered into that arrangement, or that patent +license was granted, prior to 28 March 2007. + +Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + +#### 12. No Surrender of Others' Freedom. + +If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under +this License and any other pertinent obligations, then as a +consequence you may not convey it at all. For example, if you agree to +terms that obligate you to collect a royalty for further conveying +from those to whom you convey the Program, the only way you could +satisfy both those terms and this License would be to refrain entirely +from conveying the Program. + +#### 13. Remote Network Interaction; Use with the GNU General Public License. + +Notwithstanding any other provision of this License, if you modify the +Program, your modified version must prominently offer all users +interacting with it remotely through a computer network (if your +version supports such interaction) an opportunity to receive the +Corresponding Source of your version by providing access to the +Corresponding Source from a network server at no charge, through some +standard or customary means of facilitating copying of software. This +Corresponding Source shall include the Corresponding Source for any +work covered by version 3 of the GNU General Public License that is +incorporated pursuant to the following paragraph. + +Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the work with which it is combined will remain governed by version +3 of the GNU General Public License. + +#### 14. Revised Versions of this License. + +The Free Software Foundation may publish revised and/or new versions +of the GNU Affero General Public License from time to time. Such new +versions will be similar in spirit to the present version, but may +differ in detail to address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies that a certain numbered version of the GNU Affero General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU Affero General Public License, you may choose any version ever +published by the Free Software Foundation. + +If the Program specifies that a proxy can decide which future versions +of the GNU Affero General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + +Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + +#### 15. Disclaimer of Warranty. + +THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT +WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND +PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE +DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR +CORRECTION. + +#### 16. Limitation of Liability. + +IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR +CONVEYS THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES +ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT +NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR +LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM +TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER +PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. + +#### 17. Interpretation of Sections 15 and 16. + +If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + +END OF TERMS AND CONDITIONS + +### How to Apply These Terms to Your New Programs + +If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these +terms. + +To do so, attach the following notices to the program. It is safest to +attach them to the start of each source file to most effectively state +the exclusion of warranty; and each file should have at least the +"copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as + published by the Free Software Foundation, either version 3 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper +mail. + +If your software can interact with users remotely through a computer +network, you should also make sure that it provides a way for users to +get its source. For example, if your program is a web application, its +interface could display a "Source" link that leads users to an archive +of the code. There are many ways you could offer source, and different +solutions will be better for different programs; see section 13 for +the specific requirements. + +You should also get your employer (if you work as a programmer) or +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. For more information on this, and how to apply and follow +the GNU AGPL, see . diff --git a/LICENSES/MIT.txt b/LICENSES/MIT.txt new file mode 100644 index 0000000..cb36536 --- /dev/null +++ b/LICENSES/MIT.txt @@ -0,0 +1,88 @@ +MIT License + +Copyright (c) Meta Platforms, Inc. and affiliates. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + + +MIT License + +Copyright (c) 2020 Quei-An Chen + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +MIT License + +Copyright (c) 2022 Anpei Chen + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE + +MIT License + +Copyright (c) 2022 hawkey + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md index 190e118..3e84087 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,244 @@ -# Gear-NeRF -This repository contains the implementation of the paper: "Gear-NeRF: Free-Viewpoint Rendering and Tracking with Motion-aware Spatio-Temporal Sampling", CVPR 2024 (Highlight) + + +# Gear-NeRF (CVPR 2024) + +This repository contains the implementation of the paper: +> **Gear-NeRF: Free-Viewpoint Rendering and Tracking with Motion-aware Spatio-Temporal Sampling"**
+> [Xinhang Liu](http://xinhangliu.com/), [Yu-Wing Tai](https://yuwingtai.github.io/), [Chi-Keung Tang](https://cse.hkust.edu.hk/admin/people/faculty/profile/cktang), [Pedro Miraldo](https://pmiraldo.github.io/), [Suhas Lohit](https://www.merl.com/people/slohit), [Moitreya Chatterjee](https://sites.google.com/site/metrosmiles/) + +IEEE/CVF Conference on Computer Vision and Pattern Recognition (**CVPR**), 2024 (Highlight) + + +## [Website](https://merl.com/research/highlights/gear-nerf) | [Paper](https://www.merl.com/publications/docs/TR2024-042.pdf) | [Video](https://www.youtube.com/watch?v=3Pg92mfENds) + +## Summary + +To make free viewpoint rendering systems ubiquitous, it is essential that they be capable of handling dynamic scenes, i.e. those where objects change in their position or configuration or both over time. Existing approaches for this task, propose pipelines that are agnostic to the semantic content of the scene and thus treat every region in the 3D space, as being equally important, when rendering. This results in the system struggling to render the regions of the scene that have high motion. In this paper, we depart from such a simplistic rendering pipeline by adjusting the spatio-temporal sampling resolution of the different semantic regions of the scene, based on the extent of their motion. These regions are grouped based on this criterion and each such region is called a "Gear". We propose to sample more densely from regions with high motion, i.e. those that are assigned higher gears. This results in noticeable improvement in rendering quality over the state-of-the-art approaches, across a wide variety of dynamic scenes. Furthermore, almost for free, our proposed method enables free-viewpoint tracking of objects of interest starting from a single mouse click - a functionality not yet achieved by prior methods. + +![pipeline](images/pipeline.png) + + + +## Table of contents +----- + * [Installation](#Installation) + * [Dynamic Datasets](#dynamic-datasets) + * [Running the Code on Dynamic Scenes](#running-the-code-on-dynamic-scenes) + * [Acknowledgements](#Acknowledgements) + * [Citation](#Citation) + * [License](#License) +------ + + +# Installation + +To install all required python dependences run + +``` +conda env create -f environment.yml +conda activate gearnerf +sh Install.sh +``` + +Download the [SAM-HQ checkpoint](https://drive.google.com/file/d/1qobFYrI4eyIANfBSmYcGuWRaSIXfMOQ8/view?usp=sharing) and put it in the `~/pre_trained` folder. + +Note that we tested the Gear-NeRF codebase on a machine running Ubuntu 22.04, with an NVIDIA 3090 RTX GPU, CUDA version 11.6, and 128 GB of RAM. + + + +# Dynamic Datasets + +By default, we assume that: + +1. All datasets are located in the `~/data` folder (specified by the `experiment.params.data_dir` argument) E.g. data/immersive/05_Horse/camera_0001.mp4 +2. With the subdirectory for each individual dataset specified by the `experiment.dataset.data_subdir` argument (e.g., see `conf/experiment/params/local.yaml` and `conf/experiment/dataset/technicolor.yaml`). + + +## Google Immersive + +Download the Google Immersive sequences from their [release page](https://github.com/augmentedperception/deepview_video_dataset). As an example, in order to download the flames sequence, run: + +``` +wget https://storage.googleapis.com/deepview_video_raw_data/02_Flames.zip +``` +Then extract the sequence to the `~/data` folder as stated above. + +## Example: Running the Code on the Horse Scene + +By default... + +1. Checkpoints are written to the `~/checkpoints` folder (specified by the `experiment.params.ckpt_dir` argument) +2. Logs are written to the `~/logs` folder (specified bt the `experiment.params.log_dir` argument). + +In order to train Gear-NeRF on the Horse scene from the Google Immersive dataset, run: + +``` +sh scripts/train_horse.sh +``` + +After training, you should see the rendered RGB image as well as SAM feature maps with visualization. These feature maps can be turned into segmentation masks according to various user inputs. + +To render a video of RGB and SAM feature maps with an automatically generated camera trajectory, run: + +``` +sh scripts/render_horse.sh +``` + +To render with a given camera pose and time step specified in scripts/input_pose.json, run: +``` +sh scripts/render_horse_given_pose.sh +``` + +## Tracking + +In order to track a region in novel views, run the following command: + +``` +python demo_tracking.py +``` +One needs to provide the coordinates of where the user would have clicked on a given frame in Line 108 of demo_tracking.py. + +## Contributing + +See [CONTRIBUTING.md](CONTRIBUTING.md) for our policy on contributions. + +## Acknowledgements + +The code of Gear-NeRF is inspired from: [HyperReel](https://github.com/facebookresearch/hyperreel), which in turn has been inspired from: [nerf_pl](https://github.com/kwea123/nerf_pl), [TensoRF](https://github.com/apchenstu/TensoRF), [torch-ngp](https://github.com/ashawkey/torch-ngp). We are also grateful to authors of the following repositories: [SAM](https://github.com/facebookresearch/segment-anything), and [SAM-HQ](https://github.com/SysCV/sam-hq) + +## Citation + +``` +@inproceedings{liu2024gear, + title={Gear-NeRF: Free-Viewpoint Rendering and Tracking with Motion-aware Spatio-Temporal Sampling}, + author={Liu, Xinhang and Tai, Yu-Wing and Tang, Chi-Keung and Miraldo, Pedro and Lohit, Suhas and Chatterjee, Moitreya}, + booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition}, + pages={19667--19679}, + year={2024} +} +``` + +## License + +Released under `AGPL-3.0-or-later` license, as found in the [LICENSE.md](LICENSE.md) file. + +All files, except as noted below: +``` +Copyright (c) 2024 Mitsubishi Electric Research Laboratories (MERL) +SPDX-License-Identifier: AGPL-3.0-or-later +``` + +The files under the folders: + +* `~/datasets/` +* `~/scripts/` + +were taken without modification from [here](https://github.com/facebookresearch/hyperreel/tree/main) (license included in [LICENSES/MIT.txt](LICENSES/MIT.txt)), with the following copyrights: +``` +Copyright (c) Meta Platforms, Inc. and affiliates. +``` + +The files under the folder: + +* `~/utils/` + +were taken without modification from [here](https://github.com/facebookresearch/hyperreel/tree/main) (license included in [LICENSES/MIT.txt](LICENSES/MIT.txt)), with the following copyrights: +``` +Copyright (c) Meta Platforms, Inc. and affiliates. +Copyright (c) 2022 hawkey +``` + +except the following files: + +* `~/utils/sh_utils.py` +* `~/utils/tensorf_utils.py` + +which were also taken without modification from [here](https://github.com/facebookresearch/hyperreel/tree/main) (license included in [LICENSES/MIT.txt](LICENSES/MIT.txt)), with the following copyrights: +``` +Copyright (c) Meta Platforms, Inc. and affiliates. +Copyright (c) 2022 Anpei Chen +``` + +The files under the folder: + +* `~/conf/` + +were taken without modification from [here](https://github.com/facebookresearch/hyperreel/tree/main) (license included in [LICENSES/MIT.txt](LICENSES/MIT.txt)), with the following copyrights: +``` +Copyright (c) Meta Platforms, Inc. and affiliates. +Copyright (c) 2020 Quei-An Chen +``` + +The files under the folder: + +* `~/nlf/embedding` +* `~/nlf/intersect` +* `~/nlf/models` +* `~/nlf/regularizers` +* `~/nlf/visualizers` + +were taken without modification from [here](https://github.com/facebookresearch/hyperreel/tree/main) (license included in [LICENSES/MIT.txt](LICENSES/MIT.txt)), with the following copyrights: +``` +Copyright (c) Meta Platforms, Inc. and affiliates. +``` + +The files under the folder: + +* `~/nlf/conf/` + +were taken without modification from [here](https://github.com/facebookresearch/hyperreel/tree/main) (license included in [LICENSES/MIT.txt](LICENSES/MIT.txt)), with the following copyrights: +``` +Copyright (c) Meta Platforms, Inc. and affiliates. +Copyright (c) 2020 Quei-An Chen +``` + +The following files: + +* `~/nlf/nets/array_nd.py` +* `~/nlf/nets/__init__.py` +* `~/nlf/nets/mlp.py` +* `~/nlf/nets/nets.py` +* `~/nlf/nets/siren.py` +* `~/nlf/nets/tensorf.py` +* `~/nlf/nets/tensorf_density.py` +* `~/nlf/nets/tensorf_hybrid.py` +* `~/nlf/nets/tensorf_no_sample.py` +* `~/nlf/nets/tensorf_reflect.py` + +were taken without modification from [here](https://github.com/facebookresearch/hyperreel/tree/main) (license included in [LICENSES/MIT.txt](LICENSES/MIT.txt)), with the following copyrights: +``` +Copyright (c) Meta Platforms, Inc. and affiliates. +Copyright (c) 2022 Anpei Chen +``` + +The following files: + +* `~/nlf/activations.py` +* `~/nlf/contract.py` +* `~/nlf/param.py` +* `~/nlf/pe.py` +* `~/nlf/rendering.py` +* `~/nlf/subdivision.py` + +were taken without modification from [here](https://github.com/facebookresearch/hyperreel/tree/main) (license included in [LICENSES/MIT.txt](LICENSES/MIT.txt)), with the following copyrights: +``` +Copyright (c) Meta Platforms, Inc. and affiliates. +``` + +The following files: + +* `~/nlf/__init__.py` +* `~/losses.py` +* `~/main.py` +* `~/metrics.py` +* `~/environment.yml` + +were adapted from [here](https://github.com/facebookresearch/hyperreel/tree/main) (license included in [LICENSES/MIT.txt](LICENSES/MIT.txt)), with the following copyrights: +``` +# Copyright (c) 2024 Mitsubishi Electric Research Laboratories (MERL) +# Copyright (c) Meta Platforms, Inc. and affiliates. +``` diff --git a/conf/__init__.py b/conf/__init__.py new file mode 100644 index 0000000..238fb1f --- /dev/null +++ b/conf/__init__.py @@ -0,0 +1,7 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT diff --git a/conf/config.yaml b/conf/config.yaml new file mode 100644 index 0000000..c300ae6 --- /dev/null +++ b/conf/config.yaml @@ -0,0 +1,9 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +defaults: + - experiment: local diff --git a/conf/experiment/dataset/blender.yaml b/conf/experiment/dataset/blender.yaml new file mode 100644 index 0000000..38157e1 --- /dev/null +++ b/conf/experiment/dataset/blender.yaml @@ -0,0 +1,26 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: blender +collection: lego +data_subdir: nerf_synthetic +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [400, 400] +spherical_poses: True +use_ndc: False + +val_num: 8 +val_skip: 1 +val_all: False + +render_params: + interpolate: False + supersample: 4 + crop: 1.0 diff --git a/conf/experiment/dataset/blender_large.yaml b/conf/experiment/dataset/blender_large.yaml new file mode 100644 index 0000000..c8c6155 --- /dev/null +++ b/conf/experiment/dataset/blender_large.yaml @@ -0,0 +1,26 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: blender +collection: lego +data_subdir: nerf_synthetic +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [800, 800] +spherical_poses: True +use_ndc: False + +val_num: 8 +val_skip: 1 +val_all: False + +render_params: + interpolate: False + supersample: 4 + crop: 1.0 diff --git a/conf/experiment/dataset/blender_open_movies.yaml b/conf/experiment/dataset/blender_open_movies.yaml new file mode 100644 index 0000000..cb5c85b --- /dev/null +++ b/conf/experiment/dataset/blender_open_movies.yaml @@ -0,0 +1,55 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: video3d_time +collection: 10_03_B-agent_dodges_box +data_subdir: blender_open_movies +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [1280, 720] +spherical_poses: False +use_ndc: False +correct_poses: True + +# Validation +val_num: 8 +val_skip: 2 +val_set: 'lightfield' + +val_all: False +val_pairs: [[2,2]] + +lightfield_step: 1 +lightfield_rows: 5 +lightfield_cols: 5 + +#val_all: False +#val_set: [0] + +# Video +start_frame: 0 +num_frames: 50 +keyframe_step: 4 + +#load_full_step: 8 +#subsample_keyframe_step: 4 +#subsample_keyframe_frac: 0.25 +#subsample_frac: 0.125 + +load_full_step: 8 +subsample_keyframe_step: 4 +subsample_keyframe_frac: 0.25 +subsample_frac: 0.125 + +# Render +render_params: + interpolate_time: False + interpolate: False + supersample: 2 + crop: 1.0 diff --git a/conf/experiment/dataset/blender_small.yaml b/conf/experiment/dataset/blender_small.yaml new file mode 100644 index 0000000..b73978b --- /dev/null +++ b/conf/experiment/dataset/blender_small.yaml @@ -0,0 +1,26 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: blender +collection: lego +data_subdir: nerf_synthetic +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [200, 200] +spherical_poses: True +use_ndc: False + +val_num: 8 +val_skip: 1 +val_all: False + +render_params: + interpolate: False + supersample: 4 + crop: 1.0 diff --git a/conf/experiment/dataset/catacaustics.yaml b/conf/experiment/dataset/catacaustics.yaml new file mode 100644 index 0000000..4e1c888 --- /dev/null +++ b/conf/experiment/dataset/catacaustics.yaml @@ -0,0 +1,27 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: catacaustics +collection: compost +data_subdir: catacaustics +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [1000, 666] +spherical_poses: False +use_ndc: False +correct_poses: True + +val_num: 8 +val_skip: -1 +val_all: False + +render_params: + interpolate: False + supersample: 4 + crop: 1.0 diff --git a/conf/experiment/dataset/donerf.yaml b/conf/experiment/dataset/donerf.yaml new file mode 100644 index 0000000..6171507 --- /dev/null +++ b/conf/experiment/dataset/donerf.yaml @@ -0,0 +1,28 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: donerf +collection: barbershop +data_subdir: donerf +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [400, 400] +spherical_poses: True +use_ndc: False +correct_poses: False +center_poses: True + +val_num: 8 +val_skip: 1 +val_all: False + +render_params: + interpolate: False + supersample: 4 + crop: 1.0 diff --git a/conf/experiment/dataset/donerf_large.yaml b/conf/experiment/dataset/donerf_large.yaml new file mode 100644 index 0000000..229e3b7 --- /dev/null +++ b/conf/experiment/dataset/donerf_large.yaml @@ -0,0 +1,28 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: donerf +collection: barbershop +data_subdir: donerf +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [800, 800] +spherical_poses: True +use_ndc: False +correct_poses: True +center_poses: True + +val_num: 8 +val_skip: 1 +val_all: False + +render_params: + interpolate: False + supersample: 4 + crop: 1.0 diff --git a/conf/experiment/dataset/eikonal.yaml b/conf/experiment/dataset/eikonal.yaml new file mode 100644 index 0000000..38e95eb --- /dev/null +++ b/conf/experiment/dataset/eikonal.yaml @@ -0,0 +1,26 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: eikonal +collection: Pen +data_subdir: eikonal +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [672, 504] +spherical_poses: False +use_ndc: False + +num_views: -1 +val_num: 1000 +val_skip: 10 + +render_params: + interpolate: False + supersample: 4 + crop: 1.0 diff --git a/conf/experiment/dataset/immersive.yaml b/conf/experiment/dataset/immersive.yaml new file mode 100644 index 0000000..1b30f4e --- /dev/null +++ b/conf/experiment/dataset/immersive.yaml @@ -0,0 +1,50 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: immersive +collection: 05_Horse +data_subdir: immersive +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [1280, 960] +spherical_poses: False +use_ndc: False +use_reference: False +correct_poses: True + +# Validation +val_num: 8 +val_skip: 2 + +val_all: False +val_set: [0] + +# Video +start_frame: 0 +num_frames: 50 +keyframe_step: 4 + +# load_full_step: 8 +# subsample_keyframe_step: 4 +# subsample_keyframe_frac: 0.25 +# subsample_frac: 0.125 + +load_full_step: 4 +subsample_keyframe_step: 2 +subsample_keyframe_frac: 0.25 +subsample_frac: 0.125 + +# Render +render_params: + interpolate_time: False + interpolate: False + supersample: 2 + crop: 1.0 + +static: False diff --git a/conf/experiment/dataset/llff.yaml b/conf/experiment/dataset/llff.yaml new file mode 100644 index 0000000..dbefddc --- /dev/null +++ b/conf/experiment/dataset/llff.yaml @@ -0,0 +1,25 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: llff +collection: fern +data_subdir: nerf_llff_data +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [504, 378] +spherical_poses: False +use_ndc: True + +val_num: 1000 +val_skip: 8 + +render_params: + interpolate: False + supersample: 4 + crop: 1.0 diff --git a/conf/experiment/dataset/llff_360.yaml b/conf/experiment/dataset/llff_360.yaml new file mode 100644 index 0000000..4a058dd --- /dev/null +++ b/conf/experiment/dataset/llff_360.yaml @@ -0,0 +1,27 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: llff +collection: vasedeck +data_subdir: nerf_real_360 +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [504, 378] +spherical_poses: True +use_ndc: False +centered_pixels: True + +val_num: 1000 +val_skip: inf +val_all: True + +render_params: + interpolate: False + supersample: 4 + crop: 1.0 diff --git a/conf/experiment/dataset/llff_large.yaml b/conf/experiment/dataset/llff_large.yaml new file mode 100644 index 0000000..6f52052 --- /dev/null +++ b/conf/experiment/dataset/llff_large.yaml @@ -0,0 +1,25 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: llff +collection: fern +data_subdir: nerf_llff_data +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [1008, 756] +spherical_poses: False +use_ndc: True + +val_num: 1000 +val_skip: 8 + +render_params: + interpolate: False + supersample: 4 + crop: 1.0 diff --git a/conf/experiment/dataset/llff_undistort.yaml b/conf/experiment/dataset/llff_undistort.yaml new file mode 100644 index 0000000..dfd44b5 --- /dev/null +++ b/conf/experiment/dataset/llff_undistort.yaml @@ -0,0 +1,25 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: shiny +collection: fern_undistort +data_subdir: shiny +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [504, 378] +spherical_poses: False +use_ndc: True + +val_num: 1000 +val_skip: 8 + +render_params: + interpolate: False + supersample: 4 + crop: 1.0 diff --git a/conf/experiment/dataset/llff_undistort_large.yaml b/conf/experiment/dataset/llff_undistort_large.yaml new file mode 100644 index 0000000..aa15cdc --- /dev/null +++ b/conf/experiment/dataset/llff_undistort_large.yaml @@ -0,0 +1,25 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: shiny +collection: fern_undistort +data_subdir: shiny +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [1008, 756] +spherical_poses: False +use_ndc: True + +val_num: 1000 +val_skip: 8 + +render_params: + interpolate: False + supersample: 4 + crop: 1.0 diff --git a/conf/experiment/dataset/neural_3d.yaml b/conf/experiment/dataset/neural_3d.yaml new file mode 100644 index 0000000..426c7a9 --- /dev/null +++ b/conf/experiment/dataset/neural_3d.yaml @@ -0,0 +1,52 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: neural_3d +collection: coffee_martini +data_subdir: neural_3d +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +#img_wh: [2704, 2028] +img_wh: [1352, 1014] +spherical_poses: False +use_ndc: True +use_reference: False +correct_poses: False + +# Validation +val_num: 8 +val_skip: 2 + +val_all: False +val_set: [0] + +# Video +start_frame: 0 +num_frames: 50 +keyframe_step: 4 + +#load_full_step: 4 +#subsample_keyframe_step: 2 +#subsample_keyframe_frac: 0.25 +#subsample_frac: 0.125 + +load_full_step: 4 +subsample_keyframe_step: 2 +subsample_keyframe_frac: 0.25 +subsample_frac: 0.125 + +# Rendering +render_params: + interpolate_time: False + interpolate: False + supersample: 2 + crop: 0.85 + +static: False +num_chunks: 1 diff --git a/conf/experiment/dataset/refnerf_large.yaml b/conf/experiment/dataset/refnerf_large.yaml new file mode 100644 index 0000000..e07e362 --- /dev/null +++ b/conf/experiment/dataset/refnerf_large.yaml @@ -0,0 +1,26 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: blender +collection: ball +data_subdir: refnerf +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [800, 800] +spherical_poses: True +use_ndc: False + +val_num: 8 +val_skip: 1 +val_all: False + +render_params: + interpolate: False + supersample: 4 + crop: 1.0 diff --git a/conf/experiment/dataset/shiny.yaml b/conf/experiment/dataset/shiny.yaml new file mode 100644 index 0000000..f8971af --- /dev/null +++ b/conf/experiment/dataset/shiny.yaml @@ -0,0 +1,26 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: shiny +collection: food +data_subdir: shiny +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [504, 378] +use_full_image: False +spherical_poses: False +use_ndc: True + +val_num: 1000 +val_skip: 8 + +render_params: + interpolate: False + supersample: 4 + crop: 1.0 diff --git a/conf/experiment/dataset/shiny_dense.yaml b/conf/experiment/dataset/shiny_dense.yaml new file mode 100644 index 0000000..b5b36ac --- /dev/null +++ b/conf/experiment/dataset/shiny_dense.yaml @@ -0,0 +1,25 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: shiny +collection: cd +data_subdir: shiny +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [504, 284] +use_ndc: True + +val_num: 1000 +val_skip: 8 + +render_params: + interpolate: False + supersample: 4 + max_frames: 400 + crop: 0.85 diff --git a/conf/experiment/dataset/shiny_dense_large.yaml b/conf/experiment/dataset/shiny_dense_large.yaml new file mode 100644 index 0000000..1ddfe35 --- /dev/null +++ b/conf/experiment/dataset/shiny_dense_large.yaml @@ -0,0 +1,28 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: shiny +collection: cd +data_subdir: shiny +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [1008, 567] +spherical_poses: False +use_ndc: True + +val_num: 8 +#val_skip: 8 +val_skip: 2 +val_all: False + +render_params: + interpolate: False + supersample: 4 + max_frames: 400 + crop: 0.8 diff --git a/conf/experiment/dataset/shiny_large.yaml b/conf/experiment/dataset/shiny_large.yaml new file mode 100644 index 0000000..e3a34d4 --- /dev/null +++ b/conf/experiment/dataset/shiny_large.yaml @@ -0,0 +1,26 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: shiny +collection: food +data_subdir: shiny +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [1008, 756] +spherical_poses: False +use_ndc: True + +val_num: 8 +val_skip: 8 +val_all: False + +render_params: + interpolate: False + supersample: 4 + crop: 1.0 diff --git a/conf/experiment/dataset/spaces.yaml b/conf/experiment/dataset/spaces.yaml new file mode 100644 index 0000000..5c8dcf2 --- /dev/null +++ b/conf/experiment/dataset/spaces.yaml @@ -0,0 +1,26 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: spaces +collection: scene_000 +data_subdir: spaces +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [800, 480] +spherical_poses: False +use_ndc: True + +val_num: 8 +val_skip: 8 +val_all: False + +render_params: + interpolate: False + supersample: 4 + crop: 1.0 diff --git a/conf/experiment/dataset/stanford.yaml b/conf/experiment/dataset/stanford.yaml new file mode 100644 index 0000000..96d2635 --- /dev/null +++ b/conf/experiment/dataset/stanford.yaml @@ -0,0 +1,43 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: stanford +collection: gem +data_subdir: stanford_half +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: +downsample: 1 +spherical_poses: False +use_ndc: False + +val_pairs: [] +val_num: 8 +val_skip: 1 + +render_params: + spiral: True + far: False + interpolate: False + supersample: 4 + crop: 1.0 + + spiral_rad: 0.5 + uv_downscale: 0.0 + +lightfield: + rows: 17 + cols: 17 + step: 4 + supersample: 2 + disp_row: 8 + + use_file_coords: True + st_scale: 0.25 + vis_st_scale: diff --git a/conf/experiment/dataset/stanford_beans.yaml b/conf/experiment/dataset/stanford_beans.yaml new file mode 100644 index 0000000..9707487 --- /dev/null +++ b/conf/experiment/dataset/stanford_beans.yaml @@ -0,0 +1,38 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: stanford +collection: beans +data_subdir: stanford_half +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [512, 256] +spherical_poses: False +use_ndc: False + +val_pairs: [] +val_num: 8 +val_skip: 1 +val_all: False + +render_params: + interpolate: False + supersample: 4 + crop: 1.0 + +lightfield: + rows: 17 + cols: 17 + step: 4 + supersample: 2 + disp_row: 8 + + use_file_coords: True + st_scale: 0.25 + vis_st_scale: diff --git a/conf/experiment/dataset/stanford_beans_large.yaml b/conf/experiment/dataset/stanford_beans_large.yaml new file mode 100644 index 0000000..e1d5d0e --- /dev/null +++ b/conf/experiment/dataset/stanford_beans_large.yaml @@ -0,0 +1,46 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: stanford +collection: beans +data_subdir: stanford +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [1024, 512] +spherical_poses: False +use_ndc: False + +val_pairs: [] +val_num: 8 +val_skip: 1 +val_all: False + +render_params: + interpolate: False + supersample: 4 + crop: 1.0 + +lightfield: + rows: 17 + cols: 17 + + start_row: 0 + end_row: 17 + + keyframe_step: -1 + #keyframe_subsample: 1 + keyframe_subsample: 1 + + step: 4 + supersample: 2 + disp_row: 8 + + use_file_coords: True + st_scale: 0.125 + vis_st_scale: diff --git a/conf/experiment/dataset/stanford_bracelet.yaml b/conf/experiment/dataset/stanford_bracelet.yaml new file mode 100644 index 0000000..656230e --- /dev/null +++ b/conf/experiment/dataset/stanford_bracelet.yaml @@ -0,0 +1,37 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: stanford +collection: bracelet +data_subdir: stanford_half +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [512, 320] +spherical_poses: False +use_ndc: False + +val_pairs: [] +val_num: 8 +val_skip: 1 + +render_params: + interpolate: False + supersample: 4 + crop: 1.0 + +lightfield: + rows: 17 + cols: 17 + step: 4 + supersample: 2 + disp_row: 8 + + use_file_coords: True + st_scale: 0.1 + vis_st_scale: diff --git a/conf/experiment/dataset/stanford_bracelet_large.yaml b/conf/experiment/dataset/stanford_bracelet_large.yaml new file mode 100644 index 0000000..d8966d0 --- /dev/null +++ b/conf/experiment/dataset/stanford_bracelet_large.yaml @@ -0,0 +1,56 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: stanford +collection: bracelet +data_subdir: stanford +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [1024, 640] +spherical_poses: False +use_ndc: False + +val_pairs: [] +val_num: 8 +val_skip: 1 +val_all: False + +#render_params: +# interpolate: False +# supersample: 4 +# crop: 1.0 + +render_params: + spiral: True + far: False + interpolate: False + supersample: 4 + crop: 1.0 + + spiral_rad: 0.5 + uv_downscale: 0.0 + +lightfield: + rows: 17 + cols: 17 + + start_row: 0 + end_row: 17 + + keyframe_step: -1 + #keyframe_subsample: 1 + keyframe_subsample: 1 + + step: 4 + supersample: 2 + disp_row: 8 + + use_file_coords: True + st_scale: 0.125 + vis_st_scale: diff --git a/conf/experiment/dataset/stanford_bulldozer.yaml b/conf/experiment/dataset/stanford_bulldozer.yaml new file mode 100644 index 0000000..a64ddb5 --- /dev/null +++ b/conf/experiment/dataset/stanford_bulldozer.yaml @@ -0,0 +1,47 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: stanford +collection: bulldozer +data_subdir: stanford_half +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [768, 576] +spherical_poses: False +use_ndc: False + +val_all: False +val_pairs: [] +val_num: 8 +val_skip: 1 + +render_params: + spiral: True + far: False + interpolate: False + supersample: 4 + crop: 1.0 + + spiral_rad: 0.35 + uv_downscale: 0.0 + +lightfield: + rows: 17 + cols: 17 + + start_row: 0 + end_row: 17 + + step: 4 + supersample: 2 + disp_row: 8 + + use_file_coords: True + st_scale: 0.125 + vis_st_scale: diff --git a/conf/experiment/dataset/stanford_bulldozer_large.yaml b/conf/experiment/dataset/stanford_bulldozer_large.yaml new file mode 100644 index 0000000..b061757 --- /dev/null +++ b/conf/experiment/dataset/stanford_bulldozer_large.yaml @@ -0,0 +1,46 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: stanford +collection: bulldozer +data_subdir: stanford +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [1536, 1152] +spherical_poses: False +use_ndc: False + +val_pairs: [] +val_num: 8 +val_skip: 1 +val_all: False + +render_params: + interpolate: False + supersample: 4 + crop: 1.0 + +lightfield: + rows: 17 + cols: 17 + + start_row: 0 + end_row: 17 + + keyframe_step: -1 + #keyframe_subsample: 1 + keyframe_subsample: 1 + + step: 4 + supersample: 2 + disp_row: 8 + + use_file_coords: True + st_scale: 0.125 + vis_st_scale: diff --git a/conf/experiment/dataset/stanford_bunny.yaml b/conf/experiment/dataset/stanford_bunny.yaml new file mode 100644 index 0000000..8dc0b95 --- /dev/null +++ b/conf/experiment/dataset/stanford_bunny.yaml @@ -0,0 +1,42 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: stanford +collection: bunny +data_subdir: stanford_half +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [512, 512] +spherical_poses: False +use_ndc: False + +val_all: False +val_pairs: [] +val_num: 8 +val_skip: 1 + +render_params: + interpolate: False + supersample: 4 + crop: 1.0 + +lightfield: + rows: 17 + cols: 17 + + start_row: 0 + end_row: 17 + + step: 4 + supersample: 2 + disp_row: 8 + + use_file_coords: True + st_scale: 0.25 + vis_st_scale: diff --git a/conf/experiment/dataset/stanford_bunny_large.yaml b/conf/experiment/dataset/stanford_bunny_large.yaml new file mode 100644 index 0000000..ae446ba --- /dev/null +++ b/conf/experiment/dataset/stanford_bunny_large.yaml @@ -0,0 +1,49 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: stanford +collection: bunny +data_subdir: stanford +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [1024, 1024] +spherical_poses: False +use_ndc: False + +val_all: False +val_pairs: [] +val_num: 8 +val_skip: 1 + +render_params: + interpolate: False + supersample: 4 + crop: 1.0 + +lightfield: + rows: 17 + cols: 17 + + start_row: 0 + end_row: 17 + + start_col: 0 + end_col: 17 + + keyframe_step: -1 + #keyframe_subsample: 1 + keyframe_subsample: 1 + + step: 4 + supersample: 4 + disp_row: 8 + + use_file_coords: True + st_scale: 0.125 + vis_st_scale: diff --git a/conf/experiment/dataset/stanford_chess.yaml b/conf/experiment/dataset/stanford_chess.yaml new file mode 100644 index 0000000..74f21f7 --- /dev/null +++ b/conf/experiment/dataset/stanford_chess.yaml @@ -0,0 +1,42 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: stanford +collection: chess +data_subdir: stanford_half +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [700, 400] +spherical_poses: False +use_ndc: False + +val_all: False +val_pairs: [] +val_num: 8 +val_skip: 1 + +render_params: + interpolate: False + supersample: 4 + crop: 1.0 + +lightfield: + rows: 17 + cols: 17 + + start_row: 0 + end_row: 17 + + step: 4 + supersample: 4 + disp_row: 8 + + use_file_coords: True + st_scale: 0.1 + vis_st_scale: diff --git a/conf/experiment/dataset/stanford_chess_large.yaml b/conf/experiment/dataset/stanford_chess_large.yaml new file mode 100644 index 0000000..10b41ae --- /dev/null +++ b/conf/experiment/dataset/stanford_chess_large.yaml @@ -0,0 +1,46 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: stanford +collection: chess +data_subdir: stanford +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [1400, 800] +spherical_poses: False +use_ndc: False + +val_all: False +val_pairs: [] +val_num: 8 +val_skip: 1 + +render_params: + interpolate: False + supersample: 4 + crop: 1.0 + +lightfield: + rows: 17 + cols: 17 + + start_row: 0 + end_row: 17 + + keyframe_step: -1 + #keyframe_subsample: 1 + keyframe_subsample: 1 + + step: 4 + supersample: 2 + disp_row: 8 + + use_file_coords: True + st_scale: 0.125 + vis_st_scale: diff --git a/conf/experiment/dataset/stanford_epi.yaml b/conf/experiment/dataset/stanford_epi.yaml new file mode 100644 index 0000000..c0219b8 --- /dev/null +++ b/conf/experiment/dataset/stanford_epi.yaml @@ -0,0 +1,50 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: stanford_epi +collection: bunny +data_subdir: stanford_epi +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: +downsample: 1 +spherical_poses: False +use_ndc: False + +val_all: False +val_pairs: [] +val_num: 8 +val_skip: 1 + +render_params: + interpolate: False + supersample: 4 + crop: 1.0 + +lightfield: + rows: 17 + cols: 17 + + start_row: 0 + end_row: 17 + + start_col: 0 + end_col: 17 + + keyframe_step: -1 + #keyframe_subsample: 1 + keyframe_subsample: 1 + + step: 4 + supersample: 16 + disp_row: 8 + + use_file_coords: True + st_scale: 0.125 + vis_st_scale: diff --git a/conf/experiment/dataset/stanford_flowers.yaml b/conf/experiment/dataset/stanford_flowers.yaml new file mode 100644 index 0000000..30338ed --- /dev/null +++ b/conf/experiment/dataset/stanford_flowers.yaml @@ -0,0 +1,42 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: stanford +collection: flowers +data_subdir: stanford_half +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [640, 768] +spherical_poses: False +use_ndc: False + +val_pairs: [] +val_num: 8 +val_skip: 1 + +render_params: + spiral: True + far: False + interpolate: False + supersample: 4 + crop: 1.0 + + spiral_rad: 0.5 + uv_downscale: 0.25 + +lightfield: + rows: 17 + cols: 17 + step: 4 + supersample: 4 + disp_row: 8 + + use_file_coords: True + st_scale: 0.25 + vis_st_scale: diff --git a/conf/experiment/dataset/stanford_flowers_large.yaml b/conf/experiment/dataset/stanford_flowers_large.yaml new file mode 100644 index 0000000..43884d2 --- /dev/null +++ b/conf/experiment/dataset/stanford_flowers_large.yaml @@ -0,0 +1,46 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: stanford +collection: flowers +data_subdir: stanford +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [1280, 1536] +spherical_poses: False +use_ndc: False + +val_pairs: [] +val_num: 8 +val_skip: 1 +val_all: False + +render_params: + interpolate: False + supersample: 4 + crop: 1.0 + +lightfield: + rows: 17 + cols: 17 + + start_row: 0 + end_row: 17 + + keyframe_step: -1 + #keyframe_subsample: 1 + keyframe_subsample: 1 + + step: 4 + supersample: 2 + disp_row: 8 + + use_file_coords: True + st_scale: 0.125 + vis_st_scale: diff --git a/conf/experiment/dataset/stanford_gem.yaml b/conf/experiment/dataset/stanford_gem.yaml new file mode 100644 index 0000000..9c55f83 --- /dev/null +++ b/conf/experiment/dataset/stanford_gem.yaml @@ -0,0 +1,47 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: stanford +collection: gem +data_subdir: stanford_half +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [384, 512] +spherical_poses: False +use_ndc: False + +val_all: False +val_pairs: [] +val_num: 8 +val_skip: 1 + +render_params: + spiral: True + far: False + interpolate: False + supersample: 4 + crop: 1.0 + + spiral_rad: 0.5 + uv_downscale: 0.0 + +lightfield: + rows: 17 + cols: 17 + + start_row: 0 + end_row: 17 + + step: 4 + supersample: 2 + disp_row: 8 + + use_file_coords: True + st_scale: 0.25 + vis_st_scale: diff --git a/conf/experiment/dataset/stanford_gem_large.yaml b/conf/experiment/dataset/stanford_gem_large.yaml new file mode 100644 index 0000000..3fe3b9c --- /dev/null +++ b/conf/experiment/dataset/stanford_gem_large.yaml @@ -0,0 +1,46 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: stanford +collection: gem +data_subdir: stanford +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [768, 1024] +spherical_poses: False +use_ndc: False + +val_pairs: [] +val_num: 8 +val_skip: 1 +val_all: False + +render_params: + interpolate: False + supersample: 4 + crop: 1.0 + +lightfield: + rows: 17 + cols: 17 + + start_row: 0 + end_row: 17 + + keyframe_step: -1 + #keyframe_subsample: 1 + keyframe_subsample: 1 + + step: 4 + supersample: 2 + disp_row: 8 + + use_file_coords: True + st_scale: 0.125 + vis_st_scale: diff --git a/conf/experiment/dataset/stanford_knights.yaml b/conf/experiment/dataset/stanford_knights.yaml new file mode 100644 index 0000000..1a1d8b4 --- /dev/null +++ b/conf/experiment/dataset/stanford_knights.yaml @@ -0,0 +1,45 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: stanford +collection: knights +data_subdir: stanford_half +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [512, 512] +spherical_poses: False +use_ndc: False + +val_all: False +val_pairs: [] +val_num: 8 +val_skip: 1 + +render_params: + interpolate: False + supersample: 4 + crop: 1.0 + +lightfield: + rows: 17 + cols: 17 + + start_row: 0 + end_row: 17 + + start_col: 0 + end_col: 17 + + step: 4 + supersample: 4 + disp_row: 8 + + use_file_coords: True + st_scale: 0.125 + vis_st_scale: diff --git a/conf/experiment/dataset/stanford_knights_large.yaml b/conf/experiment/dataset/stanford_knights_large.yaml new file mode 100644 index 0000000..d412268 --- /dev/null +++ b/conf/experiment/dataset/stanford_knights_large.yaml @@ -0,0 +1,54 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: stanford +collection: knights +data_subdir: stanford +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [1024, 1024] +spherical_poses: False +use_ndc: False + +val_all: False +val_pairs: [] +val_num: 8 +val_skip: 1 + +render_params: + spiral: True + far: False + interpolate: False + supersample: 4 + crop: 1.0 + + spiral_rad: 0.5 + uv_downscale: 0.0 + +lightfield: + rows: 17 + cols: 17 + + start_row: 0 + end_row: 17 + + start_col: 0 + end_col: 17 + + keyframe_step: -1 + #keyframe_subsample: 1 + keyframe_subsample: 1 + + step: 4 + supersample: 4 + disp_row: 8 + + use_file_coords: True + st_scale: 0.125 + vis_st_scale: diff --git a/conf/experiment/dataset/stanford_large.yaml b/conf/experiment/dataset/stanford_large.yaml new file mode 100644 index 0000000..72a09bd --- /dev/null +++ b/conf/experiment/dataset/stanford_large.yaml @@ -0,0 +1,47 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: stanford +collection: gem +data_subdir: stanford +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: +downsample: 1 +spherical_poses: False +use_ndc: False + +val_pairs: [] +val_num: 8 +val_skip: 1 +val_all: False + +render_params: + interpolate: False + supersample: 4 + crop: 1.0 + +lightfield: + rows: 17 + cols: 17 + + start_row: 0 + end_row: 17 + + keyframe_step: -1 + #keyframe_subsample: 1 + keyframe_subsample: 1 + + step: 4 + supersample: 2 + disp_row: 8 + + use_file_coords: True + st_scale: 0.125 + vis_st_scale: diff --git a/conf/experiment/dataset/stanford_llff.yaml b/conf/experiment/dataset/stanford_llff.yaml new file mode 100644 index 0000000..7962b13 --- /dev/null +++ b/conf/experiment/dataset/stanford_llff.yaml @@ -0,0 +1,34 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: stanford_llff +collection: tarot +data_subdir: stanford +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: +spherical_poses: False +use_ndc: False + +val_num: 8 +val_skip: 1 +val_pairs: [] +val_all: False + +val_set: lightfield +lightfield_step: 1 +lightfield_rows: 17 +lightfield_cols: 17 + +# Rendering +render_params: + interpolate_time: False + interpolate: False + supersample: 4 + crop: 1.0 diff --git a/conf/experiment/dataset/stanford_tarot.yaml b/conf/experiment/dataset/stanford_tarot.yaml new file mode 100644 index 0000000..c7bf6e9 --- /dev/null +++ b/conf/experiment/dataset/stanford_tarot.yaml @@ -0,0 +1,57 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: stanford +collection: tarot +data_subdir: stanford_half +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [512, 512] +spherical_poses: False +use_ndc: False + +val_pairs: [] +val_num: 8 +val_skip: 1 +val_all: False + +#render_params: +# interpolate: False +# supersample: 4 +# crop: 1.0 + +render_params: + spiral: True + far: False + interpolate: False + supersample: 4 + crop: 1.0 + + spiral_rad: 0.5 + uv_downscale: 0.0 + +lightfield: + rows: 17 + cols: 17 + + start_row: 0 + end_row: 17 + + keyframe_step: -1 + #keyframe_subsample: 1 + keyframe_subsample: 1 + + step: 4 + supersample: 2 + disp_row: 8 + + use_file_coords: True + st_scale: 0.5 + uv_scale: 1.0 + vis_st_scale: diff --git a/conf/experiment/dataset/stanford_tarot_large.yaml b/conf/experiment/dataset/stanford_tarot_large.yaml new file mode 100644 index 0000000..008c1ed --- /dev/null +++ b/conf/experiment/dataset/stanford_tarot_large.yaml @@ -0,0 +1,57 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: stanford +collection: tarot +data_subdir: stanford +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [1024, 1024] +spherical_poses: False +use_ndc: False + +val_pairs: [] +val_num: 8 +val_skip: 1 +val_all: False + +#render_params: +# interpolate: False +# supersample: 4 +# crop: 1.0 + +render_params: + spiral: True + far: False + interpolate: False + supersample: 4 + crop: 1.0 + + spiral_rad: 0.5 + uv_downscale: 0.0 + +lightfield: + rows: 17 + cols: 17 + + start_row: 0 + end_row: 17 + + keyframe_step: -1 + #keyframe_subsample: 1 + keyframe_subsample: 1 + + step: 4 + supersample: 2 + disp_row: 8 + + use_file_coords: True + st_scale: 0.5 + uv_scale: 1.0 + vis_st_scale: diff --git a/conf/experiment/dataset/stanford_tarot_small.yaml b/conf/experiment/dataset/stanford_tarot_small.yaml new file mode 100644 index 0000000..91f8a98 --- /dev/null +++ b/conf/experiment/dataset/stanford_tarot_small.yaml @@ -0,0 +1,42 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: stanford +collection: tarot_small +data_subdir: stanford_half +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [512, 512] +spherical_poses: False +use_ndc: False + +val_all: False +val_pairs: [] +val_num: 8 +val_skip: 1 + +render_params: + interpolate: False + supersample: 4 + crop: 1.0 + +lightfield: + rows: 17 + cols: 17 + + start_row: 0 + end_row: 17 + + step: 4 + supersample: 2 + disp_row: 8 + + use_file_coords: True + st_scale: 0.25 + vis_st_scale: diff --git a/conf/experiment/dataset/stanford_tarot_small_large.yaml b/conf/experiment/dataset/stanford_tarot_small_large.yaml new file mode 100644 index 0000000..1dc03a3 --- /dev/null +++ b/conf/experiment/dataset/stanford_tarot_small_large.yaml @@ -0,0 +1,56 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: stanford +collection: tarot_small +data_subdir: stanford +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [1024, 1024] +spherical_poses: False +use_ndc: False + +val_all: False +val_pairs: [] +val_num: 8 +val_skip: 1 + +#render_params: +# interpolate: False +# supersample: 4 +# crop: 1.0 + +render_params: + spiral: True + far: False + interpolate: False + supersample: 4 + crop: 1.0 + + spiral_rad: 0.5 + uv_downscale: 0.0 + +lightfield: + rows: 17 + cols: 17 + + start_row: 0 + end_row: 17 + + keyframe_step: -1 + #keyframe_subsample: 1 + keyframe_subsample: 1 + + step: 4 + supersample: 2 + disp_row: 8 + + use_file_coords: True + st_scale: 0.125 + vis_st_scale: diff --git a/conf/experiment/dataset/stanford_treasure.yaml b/conf/experiment/dataset/stanford_treasure.yaml new file mode 100644 index 0000000..2d9aad9 --- /dev/null +++ b/conf/experiment/dataset/stanford_treasure.yaml @@ -0,0 +1,42 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: stanford +collection: treasure +data_subdir: stanford_half +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [768, 640] +spherical_poses: False +use_ndc: False + +val_pairs: [] +val_num: 8 +val_skip: 1 + +render_params: + spiral: True + far: False + interpolate: False + supersample: 4 + crop: 1.0 + + spiral_rad: 0.75 + uv_downscale: 0.0 + +lightfield: + rows: 17 + cols: 17 + step: 4 + supersample: 2 + disp_row: 8 + + use_file_coords: True + st_scale: 0.25 + vis_st_scale: diff --git a/conf/experiment/dataset/stanford_treasure_large.yaml b/conf/experiment/dataset/stanford_treasure_large.yaml new file mode 100644 index 0000000..01e07d5 --- /dev/null +++ b/conf/experiment/dataset/stanford_treasure_large.yaml @@ -0,0 +1,46 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: stanford +collection: treasure +data_subdir: stanford +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [1536, 1280] +spherical_poses: False +use_ndc: False + +val_pairs: [] +val_num: 8 +val_skip: 1 +val_all: False + +render_params: + interpolate: False + supersample: 4 + crop: 1.0 + +lightfield: + rows: 17 + cols: 17 + + start_row: 0 + end_row: 17 + + keyframe_step: -1 + #keyframe_subsample: 1 + keyframe_subsample: 1 + + step: 4 + supersample: 2 + disp_row: 8 + + use_file_coords: True + st_scale: 0.125 + vis_st_scale: diff --git a/conf/experiment/dataset/stanford_truck.yaml b/conf/experiment/dataset/stanford_truck.yaml new file mode 100644 index 0000000..aa89aa8 --- /dev/null +++ b/conf/experiment/dataset/stanford_truck.yaml @@ -0,0 +1,37 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: stanford +collection: truck +data_subdir: stanford_half +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [640, 480] +spherical_poses: False +use_ndc: False + +val_pairs: [] +val_num: 8 +val_skip: 1 + +render_params: + interpolate: False + supersample: 4 + crop: 1.0 + +lightfield: + rows: 17 + cols: 17 + step: 4 + supersample: 2 + disp_row: 8 + + use_file_coords: True + st_scale: 0.1 + vis_st_scale: diff --git a/conf/experiment/dataset/stanford_truck_large.yaml b/conf/experiment/dataset/stanford_truck_large.yaml new file mode 100644 index 0000000..eb90e41 --- /dev/null +++ b/conf/experiment/dataset/stanford_truck_large.yaml @@ -0,0 +1,46 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: stanford +collection: truck +data_subdir: stanford +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [1280, 960] +spherical_poses: False +use_ndc: False + +val_pairs: [] +val_num: 8 +val_skip: 1 +val_all: False + +render_params: + interpolate: False + supersample: 4 + crop: 1.0 + +lightfield: + rows: 17 + cols: 17 + + start_row: 0 + end_row: 17 + + keyframe_step: -1 + #keyframe_subsample: 1 + keyframe_subsample: 1 + + step: 4 + supersample: 2 + disp_row: 8 + + use_file_coords: True + st_scale: 0.125 + vis_st_scale: diff --git a/conf/experiment/dataset/technicolor.yaml b/conf/experiment/dataset/technicolor.yaml new file mode 100644 index 0000000..552a001 --- /dev/null +++ b/conf/experiment/dataset/technicolor.yaml @@ -0,0 +1,60 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: technicolor +collection: painter +data_subdir: technicolor +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [2048, 1088] +spherical_poses: False +use_ndc: True +use_reference: False +correct_poses: False + +# Validation +val_num: 8 +val_skip: 2 +val_set: 'lightfield' + +val_all: False +val_pairs: [[2, 2]] + +lightfield_step: 1 +lightfield_rows: 4 +lightfield_cols: 4 + +# Video +start_frame: 0 +num_frames: 50 # 1.3 seconds +keyframe_step: 4 # every 5 frames + +# load_full_step: 8 # every 4 frames +# subsample_keyframe_step: 4 # every 2 frames +# subsample_keyframe_frac: 0.25 # load 1/4 of the pixels from each keyframe +# subsample_frac: 0.125 # load 1/20 of the pixels from every other frame + +load_full_step: 1 # every 4 frames +subsample_keyframe_step: 1 # every 2 frames +subsample_keyframe_frac: 1.0 # load 1/4 of the pixels from each keyframe +subsample_frac: 1.0 # load 1/20 of the pixels from every other frame + +#load_full_step: 4 # every 4 frames +#subsample_keyframe_step: 2 # every 2 frames +#subsample_keyframe_frac: 0.25 # load 1/4 of the pixels from each keyframe +#subsample_frac: 0.25 # load 1/20 of the pixels from every other frame + +# Rendering +render_params: + interpolate_time: False + interpolate: False + supersample: 2 + crop: 1.0 + +num_chunks: 1 diff --git a/conf/experiment/dataset/technicolor_subsample.yaml b/conf/experiment/dataset/technicolor_subsample.yaml new file mode 100644 index 0000000..51a815b --- /dev/null +++ b/conf/experiment/dataset/technicolor_subsample.yaml @@ -0,0 +1,60 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: technicolor +collection: painter +data_subdir: technicolor +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [2048, 1088] +spherical_poses: False +use_ndc: True +use_reference: False +correct_poses: False + +# Validation +val_num: 8 +val_skip: 2 +val_set: 'lightfield' + +val_all: False +val_pairs: [[2, 2]] + +lightfield_step: 1 +lightfield_rows: 4 +lightfield_cols: 4 + +# Video +start_frame: 0 +num_frames: 50 # 1.3 seconds +keyframe_step: 4 # every 5 frames + +load_full_step: 8 # every 4 frames +subsample_keyframe_step: 4 # every 2 frames +subsample_keyframe_frac: 0.25 # load 1/4 of the pixels from each keyframe +subsample_frac: 0.125 # load 1/20 of the pixels from every other frame + +# load_full_step: 1 # every 4 frames +# subsample_keyframe_step: 1 # every 2 frames +# subsample_keyframe_frac: 1.0 # load 1/4 of the pixels from each keyframe +# subsample_frac: 1.0 # load 1/20 of the pixels from every other frame + +#load_full_step: 4 # every 4 frames +#subsample_keyframe_step: 2 # every 2 frames +#subsample_keyframe_frac: 0.25 # load 1/4 of the pixels from each keyframe +#subsample_frac: 0.25 # load 1/20 of the pixels from every other frame + +# Rendering +render_params: + interpolate_time: False + interpolate: False + supersample: 2 + crop: 1.0 + +num_chunks: 1 diff --git a/conf/experiment/local.yaml b/conf/experiment/local.yaml new file mode 100644 index 0000000..c4449ad --- /dev/null +++ b/conf/experiment/local.yaml @@ -0,0 +1,16 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +defaults: + - params: local + - dataset: donerf_large + - model: donerf_cylinder + - training: donerf_tensorf + - regularizers: all + - visualizers: all diff --git a/conf/experiment/model/blender_voxel.yaml b/conf/experiment/model/blender_voxel.yaml new file mode 100644 index 0000000..5ea6a76 --- /dev/null +++ b/conf/experiment/model/blender_voxel.yaml @@ -0,0 +1,161 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + # 1) Per ray outputs + emb0: + type: ray_prediction + + # Parameterization + param: + n_dims: 6 + fn: pluecker + direction_multiplier: 1.0 + moment_multiplier: 1.0 + + # Net + net: + pe: + type: windowed + n_freqs: 2 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + #type: zero + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + z_channels: 192 + + outputs: + z_vals: 1 + sigma: 1 + point_offset: 3 + + # 2) Ray density + emb1: + type: point_density + shift: 2.0 + activation: + type: sigmoid + fac: 1.0 + + # 3) Intersection + emb2: + type: ray_intersect + + # Intersect + z_channels: 192 + + intersect: + type: voxel_grid + + sort: True + outward_facing: False + use_disparity: False + use_sigma: True + + origin: [0.0, 0.0, 0.0] + initial: [-2.0, -2.0, -2.0] + end: [2.0, 2.0, 2.0] + + near: 2.0 + far: 6.0 + + activation: + type: identity + fac: 0.5 + + # 5) Add extra outputs + emb3: + type: add_point_outputs + extra_outputs: ['viewdirs'] + + # 7) Ray density + emb5: + type: point_density + shift: 2.0 + activation: + type: sigmoid + fac: 1.0 + + # 8) Add point offset + emb6: + type: point_offset + use_sigma: True + activation: + type: identity + fac: 0.25 + + # 9) Extract + emb7: + type: extract_fields + fields: ['points', 'distances', 'viewdirs'] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_no_sample + + # Scene hyper-params + white_bg: 1 + ndc_ray: 0 + + # Density activation + fea2denseAct: softplus + distance_scale: 25.0 + density_shift: -10.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -2.0], [2.0, 2.0, 2.0]] + + # Grid size and upsampling + N_voxel_init: 1000000 # 100**3 + N_voxel_final: 27000000 # 300**3 + upsamp_list: [4000,6000,8000,10000,12000] + lr_upsample_reset: True + + # Thresholding + update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 1e-4 + alpha_mask_thre: 1e-4 + + # Tensor sizes + n_lamb_sigma: [8,8,8] + n_lamb_sh: [8,8,8] + num_frames: 8 + + # Shading + #shadingMode: RGB + #data_dim_color: 3 + + shadingMode: SH + data_dim_color: 27 diff --git a/conf/experiment/model/bom_cylinder.yaml b/conf/experiment/model/bom_cylinder.yaml new file mode 100644 index 0000000..7e7e327 --- /dev/null +++ b/conf/experiment/model/bom_cylinder.yaml @@ -0,0 +1,261 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 6 + fn: pluecker + direction_multiplier: 1.0 + moment_multiplier: 1.0 + + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 1 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + time: + start: 7 + end: 8 + + param: + n_dims: 1 + fn: identity + + pe: + type: windowed + n_freqs: 2 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + z_channels: 32 + + outputs: + z_vals: + channels: 4 + + spatial_flow: + channels: 3 + + activation: + type: identity + outer_fac: 1.0 + + sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 1 + + activation: + type: sigmoid + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: tanh + outer_fac: 0.25 + + color_scale: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + color_shift: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + ray_intersect_0: + type: ray_intersect + + # Intersect + z_channels: 32 # NOTE: Changed from 32 + + intersect: + type: cylinder + + sort: True + outward_facing: True + use_disparity: False + max_axis: False + + use_sigma: True + + out_points: raw_points + out_distance: raw_distance + + use_dataset_bounds: True + origin_scale_factor: 0.0 + + contract: + type: mipnerf + contract_samples: True + use_dataset_bounds: True + + activation: + type: identity + fac: 0.5 + + flow_0: + type: advect_points + use_spatial_flow: True + use_angular_flow: False + + out_flow_field: raw_flow + flow_scale: 0.0 + + spatial_flow_activation: + type: identity + fac: 0.25 + + angular_flow_rotation_activation: + type: identity + fac: 0.25 + + angular_flow_anchor_activation: + type: identity + fac: 0.25 + + point_offset_0: + type: point_offset + in_density_field: point_sigma + use_sigma: True + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs', 'times'] + + extract_fields: + type: extract_fields + fields: [ + 'points', 'distances', 'base_times', 'time_offset', 'times', 'viewdirs', 'weights', + 'color_transform_global', 'color_scale_global', 'color_shift_global', + 'color_transform', 'color_scale', 'color_shift' + ] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_time + + # Scene hyper-params + white_bg: 0 + black_bg: 0 + + # Density activation + fea2denseAct: relu + distance_scale: 16.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -2.0], [2.0, 2.0, 2.0]] + + # Grid size and upsampling + N_voxel_init: 2097152 + N_voxel_final: 262144000 + + upsamp_list: [4000,6000,8000,10000,12000] + + lr_upsample_reset: True + + # Thresholding + update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + n_lamb_sigma: [8,4,4] + n_lamb_sh: [8,4,4] + + # Shading + #shadingMode: SH + #data_dim_color: 27 + + shadingMode: RGB + data_dim_color: 3 + + # Density + densityMode: Density diff --git a/conf/experiment/model/bom_sphere.yaml b/conf/experiment/model/bom_sphere.yaml new file mode 100644 index 0000000..cc48d50 --- /dev/null +++ b/conf/experiment/model/bom_sphere.yaml @@ -0,0 +1,265 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 6 + fn: pluecker + direction_multiplier: 1.0 + moment_multiplier: 1.0 + + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 1 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + time: + start: 7 + end: 8 + + param: + n_dims: 1 + fn: identity + + pe: + type: windowed + n_freqs: 2 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + z_channels: 32 + + outputs: + z_vals: + channels: 8 + + spatial_flow: + channels: 3 + + activation: + type: identity + outer_fac: 1.0 + + sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 1 + + activation: + type: sigmoid + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: tanh + outer_fac: 0.25 + + color_scale: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + color_shift: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + ray_intersect_0: + type: ray_intersect + + # Intersect + z_channels: 32 # NOTE: Changed from 32 + + intersect: + type: sphere_new + + sort: True + outward_facing: True + use_disparity: False + max_axis: False + + use_sigma: True + + out_points: raw_points + out_distance: raw_distance + + use_dataset_bounds: True + origin_scale_factor: 0.0 + resize_scale_factor: 0.0 + + contract: + type: mipnerf + contract_samples: True + use_dataset_bounds: True + + activation: + type: identity + fac: 0.5 + + flow_0: + type: advect_points + use_spatial_flow: True + use_angular_flow: False + + out_flow_field: raw_flow + flow_scale: 0.0 + + spatial_flow_activation: + type: identity + fac: 0.25 + + angular_flow_rotation_activation: + type: identity + fac: 0.25 + + angular_flow_anchor_activation: + type: identity + fac: 0.25 + + point_offset_0: + type: point_offset + in_density_field: point_sigma + use_sigma: True + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs', 'times'] + + extract_fields: + type: extract_fields + fields: [ + 'points', 'distances', 'base_times', 'time_offset', 'times', 'viewdirs', 'weights', + 'color_transform_global', 'color_scale_global', 'color_shift_global', + 'color_transform', 'color_scale', 'color_shift' + ] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_time + + # Scene hyper-params + white_bg: 0 + black_bg: 0 + + # Density activation + fea2denseAct: relu + distance_scale: 16.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -2.0], [2.0, 2.0, 2.0]] + + # Grid size and upsampling + N_voxel_init: 2097152 + N_voxel_final: 262144000 + + upsamp_list: [4000,6000,8000,10000,12000] + + lr_upsample_reset: True + + # Thresholding + update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + n_lamb_sigma: [8,4,4] + n_lamb_sh: [8,4,4] + + # Shading + shadingMode: SH + data_dim_color: 27 + + # shadingMode: RGB + # data_dim_color: 3 + + # shadingMode: MLP_Fea + # data_dim_color: 27 + + # Density + densityMode: Density diff --git a/conf/experiment/model/bom_z_plane.yaml b/conf/experiment/model/bom_z_plane.yaml new file mode 100644 index 0000000..fdc085a --- /dev/null +++ b/conf/experiment/model/bom_z_plane.yaml @@ -0,0 +1,7 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ diff --git a/conf/experiment/model/catacaustics_cylinder.yaml b/conf/experiment/model/catacaustics_cylinder.yaml new file mode 100644 index 0000000..19865af --- /dev/null +++ b/conf/experiment/model/catacaustics_cylinder.yaml @@ -0,0 +1,195 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 6 + fn: pluecker + direction_multiplier: 1.0 + moment_multiplier: 1.0 + + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 2 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + z_channels: 64 + + outputs: + z_vals: + channels: 4 + + sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: tanh + outer_fac: 0.25 + + color_scale_global: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + color_shift_global: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + ray_intersect_0: + type: ray_intersect + + # Intersect + z_channels: 64 + + intersect: + type: cylinder + + sort: True + outward_facing: False + use_disparity: False + + use_sigma: True + + out_points: raw_points + out_distance: raw_distance + + use_dataset_bounds: True + + contract: + type: mipnerf + contract_samples: True + use_dataset_bounds: True + + activation: + type: identity + fac: 0.5 + + point_offset_0: + type: point_offset + use_sigma: True + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs'] + + extract_fields: + type: extract_fields + fields: ['points', 'distances', 'viewdirs', 'weights', 'color_scale_global', 'color_shift_global'] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_no_sample + + # Scene hyper-params + white_bg: 0 + black_bg: 0 + + # Density activation + fea2denseAct: relu + distance_scale: 16.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -2.0], [2.0, 2.0, 2.0]] + + # Grid size and upsampling + grid_size: + start: [150, 150, 150] + end: [600, 600, 600] + upsamp_list: [4000,6000,8000,10000,12000] + lr_upsample_reset: True + + # Thresholding + update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + n_lamb_sigma: [8,8,8] + n_lamb_sh: [8,8,8] + + # Shading + #shadingMode: RGB + #data_dim_color: 3 + + shadingMode: SH + data_dim_color: 27 diff --git a/conf/experiment/model/catacaustics_distance.yaml b/conf/experiment/model/catacaustics_distance.yaml new file mode 100644 index 0000000..84ed705 --- /dev/null +++ b/conf/experiment/model/catacaustics_distance.yaml @@ -0,0 +1,195 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 6 + fn: pluecker + direction_multiplier: 1.0 + moment_multiplier: 1.0 + + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 2 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + z_channels: 64 + + outputs: + z_vals: + channels: 1 + + sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: tanh + outer_fac: 0.25 + + color_scale_global: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + color_shift_global: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + ray_intersect_0: + type: ray_intersect + + # Intersect + z_channels: 64 + + intersect: + type: euclidean_distance_unified + + sort: True + outward_facing: False + use_disparity: False + + use_sigma: True + + out_points: raw_points + out_distance: raw_distance + + use_dataset_bounds: True + + contract: + type: mipnerf + contract_samples: True + use_dataset_bounds: True + + activation: + type: identity + fac: 0.5 + + point_offset_0: + type: point_offset + use_sigma: True + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs'] + + extract_fields: + type: extract_fields + fields: ['points', 'distances', 'viewdirs', 'weights', 'color_scale_global', 'color_shift_global'] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_no_sample + + # Scene hyper-params + white_bg: 0 + black_bg: 0 + + # Density activation + fea2denseAct: relu + distance_scale: 16.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -2.0], [2.0, 2.0, 2.0]] + + # Grid size and upsampling + grid_size: + start: [100, 100, 100] + end: [400, 400, 400] + upsamp_list: [4000,6000,8000,10000,12000] + lr_upsample_reset: True + + # Thresholding + update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + n_lamb_sigma: [8,8,8] + n_lamb_sh: [8,8,8] + + # Shading + #shadingMode: RGB + #data_dim_color: 3 + + shadingMode: SH + data_dim_color: 27 diff --git a/conf/experiment/model/catacaustics_sphere.yaml b/conf/experiment/model/catacaustics_sphere.yaml new file mode 100644 index 0000000..106d1ab --- /dev/null +++ b/conf/experiment/model/catacaustics_sphere.yaml @@ -0,0 +1,250 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 6 + fn: pluecker + direction_multiplier: 1.0 + moment_multiplier: 1.0 + + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 1 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + z_channels: 64 + + outputs: + z_vals: + channels: 8 + #channels: 1 + + sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 0 + #window_epochs: 0 + #wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 1 + #window_epochs: 0 + #wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: tanh + outer_fac: 0.125 + + color_scale: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + color_shift: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + #normal: + # channels: 3 + + # activation: + # type: identity + + #ref_viewdirs_offset: + # channels: 3 + + # activation: + # type: identity + + #ref_distance: + # channels: 1 + + # activation: + # type: identity + + ray_intersect_0: + type: ray_intersect + + # Intersect + z_channels: 64 + + intersect: + type: sphere + #type: euclidean_distance_unified + + sort: True + outward_facing: False + use_disparity: False + max_axis: False + + use_sigma: True + + out_points: raw_points + out_distance: raw_distance + + initial: -2.0 + end: 2.0 + + use_dataset_bounds: True + #origin_scale_factor: 1.0 + #resize_scale_factor: 1.0 + origin_scale_factor: 0.0 + resize_scale_factor: 0.0 + + contract: + type: bbox + contract_samples: True + use_dataset_bounds: True + + activation: + type: identity + fac: 0.5 + + #reflect_0: + # type: reflect + # direction_init: True + + # out_points_field: points_temp + # out_direction_field: viewdirs + + point_offset_0: + type: point_offset + use_sigma: True + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs'] + + extract_fields: + type: extract_fields + fields: ['points', 'distances', 'viewdirs', 'weights', 'color_scale', 'color_shift'] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_no_sample + + # Scene hyper-params + white_bg: 1 + black_bg: 0 + + # Density activation + fea2denseAct: relu + #distance_scale: 16.0 + distance_scale: 8.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -2.0], [2.0, 2.0, 2.0]] + + # Grid size and upsampling + N_voxel_init: 2097152 + #N_voxel_final: 216000000 + N_voxel_final: 64000000 + + + upsamp_list: [4000,6000,8000,10000,12000] + lr_upsample_reset: True + + # Thresholding + update_AlphaMask_list: [] + #update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + n_lamb_sigma: [8,8,8] + n_lamb_sh: [8,8,8] + + # Shading + shadingMode: RGB + data_dim_color: 3 + #shadingMode: SH + #data_dim_color: 27 diff --git a/conf/experiment/model/catacaustics_voxel.yaml b/conf/experiment/model/catacaustics_voxel.yaml new file mode 100644 index 0000000..84b7fe7 --- /dev/null +++ b/conf/experiment/model/catacaustics_voxel.yaml @@ -0,0 +1,195 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 6 + fn: pluecker + direction_multiplier: 1.0 + moment_multiplier: 1.0 + + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 2 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + z_channels: 96 + + outputs: + z_vals: + channels: 1 + + sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: tanh + outer_fac: 0.25 + + color_scale_global: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + color_shift_global: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + ray_intersect_0: + type: ray_intersect + + # Intersect + z_channels: 96 + + intersect: + type: voxel_grid + + sort: True + outward_facing: False + use_disparity: False + use_sigma: False + max_axis: False + + out_points: raw_points + out_distance: raw_distance + + use_dataset_bounds: True + + contract: + type: mipnerf + contract_samples: True + use_dataset_bounds: True + + activation: + type: identity + fac: 0.5 + + point_offset_0: + type: point_offset + use_sigma: True + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs'] + + extract_fields: + type: extract_fields + fields: ['points', 'distances', 'viewdirs', 'weights', 'color_scale_global', 'color_shift_global'] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_no_sample + + # Scene hyper-params + white_bg: 0 + black_bg: 0 + + # Density activation + fea2denseAct: relu + distance_scale: 16.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -2.0], [2.0, 2.0, 2.0]] + + # Grid size and upsampling + grid_size: + start: [150, 150, 150] + end: [600, 600, 600] + upsamp_list: [4000,6000,8000,10000,12000] + lr_upsample_reset: True + + # Thresholding + update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + n_lamb_sigma: [8,8,8] + n_lamb_sh: [8,8,8] + + # Shading + #shadingMode: RGB + #data_dim_color: 3 + + shadingMode: SH + data_dim_color: 27 diff --git a/conf/experiment/model/catacaustics_z_plane.yaml b/conf/experiment/model/catacaustics_z_plane.yaml new file mode 100644 index 0000000..8c463a1 --- /dev/null +++ b/conf/experiment/model/catacaustics_z_plane.yaml @@ -0,0 +1,200 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 6 + fn: pluecker + direction_multiplier: 1.0 + moment_multiplier: 1.0 + + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 2 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + z_channels: 64 + + outputs: + z_vals: + channels: 1 + + sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: tanh + outer_fac: 0.25 + + color_scale_global: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + color_shift_global: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + ray_intersect_0: + type: ray_intersect + + # Intersect + z_channels: 64 + + intersect: + type: z_plane + + sort: True + outward_facing: False + use_disparity: False + + use_sigma: True + + out_points: raw_points + out_distance: raw_distance + + initial: -1.0 + end: 1.0 + + contract: + type: mipnerf + contract_samples: True + contract_start_radius: 1.0 + contract_end_radius: 8.0 + + activation: + type: identity + fac: 0.5 + + point_offset_0: + type: point_offset + use_sigma: True + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs'] + + extract_fields: + type: extract_fields + fields: ['points', 'distances', 'viewdirs', 'weights', 'color_scale_global', 'color_shift_global'] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_no_sample + + # Scene hyper-params + white_bg: 0 + black_bg: 0 + + # Density activation + fea2denseAct: relu + distance_scale: 16.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -2.0], [2.0, 2.0, 2.0]] + + # Grid size and upsampling + grid_size: + start: [150, 150, 150] + end: [400, 400, 400] + upsamp_list: [4000,6000,8000,10000,12000] + lr_upsample_reset: True + + # Thresholding + update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + n_lamb_sigma: [8,0,0] + n_lamb_sh: [8,0,0] + + # Shading + #shadingMode: RGB + #data_dim_color: 3 + + shadingMode: SH + data_dim_color: 27 + + #shadingMode: MLP_Fea + #data_dim_color: 27 diff --git a/conf/experiment/model/donerf_cylinder.yaml b/conf/experiment/model/donerf_cylinder.yaml new file mode 100644 index 0000000..505bbca --- /dev/null +++ b/conf/experiment/model/donerf_cylinder.yaml @@ -0,0 +1,208 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 6 + fn: pluecker + direction_multiplier: 1.0 + moment_multiplier: 1.0 + + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 1 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + z_channels: 32 + + outputs: + z_vals: + channels: 4 + + sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 1 + + activation: + type: sigmoid + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: tanh + outer_fac: 0.125 + + color_scale: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + color_shift: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + ray_intersect_0: + type: ray_intersect + + # Intersect + z_channels: 32 + + intersect: + type: cylinder + #type: sphere + + sort: True + outward_facing: False + use_disparity: False + max_axis: False + + use_sigma: True + + out_points: raw_points + out_distance: raw_distance + + use_dataset_bounds: True + origin_scale_factor: 0.0 + + contract: + type: mipnerf + contract_samples: True + use_dataset_bounds: True + + activation: + type: identity + fac: 0.5 + + point_offset_0: + type: point_offset + use_sigma: True + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs'] + + extract_fields: + type: extract_fields + fields: ['points', 'distances', 'viewdirs', 'weights', 'color_scale', 'color_shift'] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_no_sample + + # Scene hyper-params + white_bg: 0 + black_bg: 0 + + # Density activation + fea2denseAct: relu + distance_scale: 16.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -2.0], [2.0, 2.0, 2.0]] + + # Grid size and upsampling + N_voxel_init: 3375000 + N_voxel_final: 216000000 + + upsamp_list: [4000,6000,8000,10000,12000] + lr_upsample_reset: True + + # Thresholding + update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + n_lamb_sigma: [8,4,4] + n_lamb_sh: [8,4,4] + + ## Shading + shadingMode: RGB + data_dim_color: 3 diff --git a/conf/experiment/model/donerf_cylinder_no_point.yaml b/conf/experiment/model/donerf_cylinder_no_point.yaml new file mode 100644 index 0000000..83fdc5a --- /dev/null +++ b/conf/experiment/model/donerf_cylinder_no_point.yaml @@ -0,0 +1,204 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 6 + fn: pluecker + direction_multiplier: 1.0 + moment_multiplier: 1.0 + + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 1 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + z_channels: 32 + + outputs: + z_vals: + channels: 4 + + sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 1 + + activation: + type: sigmoid + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: tanh + outer_fac: 0.125 + + color_scale: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + color_shift: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + ray_intersect_0: + type: ray_intersect + + # Intersect + z_channels: 32 + + intersect: + #type: cylinder + type: sphere + + sort: True + outward_facing: False + use_disparity: False + max_axis: False + + use_sigma: True + + out_points: raw_points + out_distance: raw_distance + + use_dataset_bounds: True + origin_scale_factor: 0.0 + + contract: + type: mipnerf + contract_samples: True + use_dataset_bounds: True + + activation: + type: identity + fac: 0.5 + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs'] + + extract_fields: + type: extract_fields + fields: ['points', 'distances', 'viewdirs', 'weights', 'color_scale', 'color_shift'] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_no_sample + + # Scene hyper-params + white_bg: 0 + black_bg: 0 + + # Density activation + fea2denseAct: relu + distance_scale: 16.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -2.0], [2.0, 2.0, 2.0]] + + # Grid size and upsampling + N_voxel_init: 3375000 + N_voxel_final: 216000000 + + upsamp_list: [4000,6000,8000,10000,12000] + lr_upsample_reset: True + + # Thresholding + update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + n_lamb_sigma: [8,4,4] + n_lamb_sh: [8,4,4] + + ## Shading + shadingMode: RGB + data_dim_color: 3 diff --git a/conf/experiment/model/donerf_cylinder_small.yaml b/conf/experiment/model/donerf_cylinder_small.yaml new file mode 100644 index 0000000..d74bba4 --- /dev/null +++ b/conf/experiment/model/donerf_cylinder_small.yaml @@ -0,0 +1,210 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 6 + fn: pluecker + direction_multiplier: 1.0 + moment_multiplier: 1.0 + + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 1 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + z_channels: 32 + + outputs: + z_vals: + channels: 4 + + sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 1 + + activation: + type: sigmoid + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: tanh + outer_fac: 0.125 + + color_scale: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + color_shift: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + ray_intersect_0: + type: ray_intersect + + # Intersect + z_channels: 32 + + intersect: + #type: cylinder + type: sphere + + sort: True + outward_facing: False + use_disparity: False + max_axis: False + + use_sigma: True + + out_points: raw_points + out_distance: raw_distance + + use_dataset_bounds: True + origin_scale_factor: 0.0 + + contract: + type: mipnerf + contract_samples: True + use_dataset_bounds: True + + activation: + type: identity + fac: 0.5 + + point_offset_0: + type: point_offset + use_sigma: True + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs'] + + extract_fields: + type: extract_fields + fields: ['points', 'distances', 'viewdirs', 'weights', 'color_scale', 'color_shift'] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_no_sample + + # Scene hyper-params + white_bg: 0 + black_bg: 0 + + # Density activation + fea2denseAct: relu + distance_scale: 16.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -2.0], [2.0, 2.0, 2.0]] + + # Grid size and upsampling + #N_voxel_init: 1000000 + #N_voxel_final: 64000000 + N_voxel_init: 262144000 + N_voxel_final: 262144000 + + upsamp_list: [4000,6000,8000,10000,12000] + lr_upsample_reset: True + + # Thresholding + update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + n_lamb_sigma: [8,4,4] + n_lamb_sh: [8,4,4] + + ## Shading + shadingMode: RGB + data_dim_color: 3 diff --git a/conf/experiment/model/donerf_sphere.yaml b/conf/experiment/model/donerf_sphere.yaml new file mode 100644 index 0000000..0957e93 --- /dev/null +++ b/conf/experiment/model/donerf_sphere.yaml @@ -0,0 +1,207 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 6 + fn: pluecker + direction_multiplier: 1.0 + moment_multiplier: 1.0 + + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 1 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + z_channels: 32 + + outputs: + z_vals: + channels: 4 + + sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 1 + + activation: + type: sigmoid + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: tanh + outer_fac: 0.125 + + color_scale: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + color_shift: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + ray_intersect_0: + type: ray_intersect + + # Intersect + z_channels: 32 + + intersect: + type: sphere + + sort: True + outward_facing: False + use_disparity: False + max_axis: False + + use_sigma: True + + out_points: raw_points + out_distance: raw_distance + + use_dataset_bounds: True + origin_scale_factor: 0.0 + + contract: + type: mipnerf + contract_samples: True + use_dataset_bounds: True + + activation: + type: identity + fac: 0.5 + + point_offset_0: + type: point_offset + use_sigma: True + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs'] + + extract_fields: + type: extract_fields + fields: ['points', 'distances', 'viewdirs', 'weights', 'color_scale', 'color_shift'] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_no_sample + + # Scene hyper-params + white_bg: 0 + black_bg: 0 + + # Density activation + fea2denseAct: relu + distance_scale: 16.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -2.0], [2.0, 2.0, 2.0]] + + # Grid size and upsampling + N_voxel_init: 3375000 + N_voxel_final: 216000000 + + upsamp_list: [4000,6000,8000,10000,12000] + lr_upsample_reset: True + + # Thresholding + update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + n_lamb_sigma: [8,4,4] + n_lamb_sh: [8,4,4] + + ## Shading + shadingMode: RGB + data_dim_color: 3 diff --git a/conf/experiment/model/donerf_voxel.yaml b/conf/experiment/model/donerf_voxel.yaml new file mode 100644 index 0000000..227d5cf --- /dev/null +++ b/conf/experiment/model/donerf_voxel.yaml @@ -0,0 +1,232 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 6 + fn: pluecker + direction_multiplier: 1.0 + moment_multiplier: 1.0 + + #param: + # fn: contract_points + + # param: + # n_dims: 6 + # fn: pluecker + # direction_multiplier: 1.0 + # moment_multiplier: 1.0 + + # contract: + # type: mipnerf + # contract_samples: True + # use_dataset_bounds: True + # + # contract_start_channel: 3 + # contract_end_channel: 6 + + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 1 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + z_channels: 48 + + outputs: + z_vals: + channels: 1 + + sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 1 + + activation: + type: sigmoid + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: tanh + outer_fac: 0.125 + + color_scale: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + color_shift: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + ray_intersect_0: + type: ray_intersect + + # Intersect + z_channels: 48 + + intersect: + type: voxel_grid + + sort: True + outward_facing: False + use_disparity: False + max_axis: False + + use_sigma: True + + out_points: raw_points + out_distance: raw_distance + + use_dataset_bounds: True + origin_scale_factor: 0.0 + + contract: + type: mipnerf + contract_samples: True + use_dataset_bounds: True + + activation: + type: identity + fac: 0.5 + + point_offset_0: + type: point_offset + use_sigma: True + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs'] + + extract_fields: + type: extract_fields + fields: ['points', 'distances', 'viewdirs', 'weights', 'color_scale', 'color_shift'] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_no_sample + + # Scene hyper-params + white_bg: 0 + black_bg: 0 + + # Density activation + fea2denseAct: relu + distance_scale: 16.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -2.0], [2.0, 2.0, 2.0]] + + # Grid size and upsampling + #N_voxel_init: 2097152 + N_voxel_init: 3375000 # TODO: original + #N_voxel_init: 8000000 + + N_voxel_final: 216000000 # TODO: original + #N_voxel_final: 262144000 + + upsamp_list: [4000,6000,8000,10000,12000] + lr_upsample_reset: True + + # Thresholding + #update_AlphaMask_list: [] + update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + n_lamb_sigma: [8,8,8] + n_lamb_sh: [8,8,8] + + ## Shading + #shadingMode: SH + #data_dim_color: 27 + + shadingMode: RGB + data_dim_color: 3 diff --git a/conf/experiment/model/donerf_z.yaml b/conf/experiment/model/donerf_z.yaml new file mode 100644 index 0000000..367d1c9 --- /dev/null +++ b/conf/experiment/model/donerf_z.yaml @@ -0,0 +1,121 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + +embedding: + type: epipolar + + # Parameterization + param: + n_dims: 6 + fn: pluecker + direction_multiplier: 1.0 + moment_multiplier: 1.0 + + net: + pe: + type: windowed + n_freqs: 1 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Intersection + z_channels: 96 + preds_per_z: 1 + + intersect: + type: z_plane + out_channels_per_z: 3 + extra_outputs: ['distance'] + + stratified: True + sort: True + use_disparity: False + + initial: -1.0 + end: 1.0 + + add_random: False + random_per_sample: 1 + + add_point_offset: True + + add_flow: False + flow_keyframes: 1 + flow_scale: 1.0 + + z_activation: + type: identity + fac: 0.5 + point_activation: + type: tanh + fac: 0.25 + flow_activation: + type: identity + fac: 0.25 + sigma_activation: + type: sigmoid + fac: 1.0 + + sigma_shift: 5.0 + + # Transform + extra_out_channels: 3 + +color: + type: base + + net: + type: tensor_vm_split_no_sample + + white_bg: 0 + ndc_ray: 0 + nSamples: 96 + step_ratio: 0.5 + + fea2denseAct: relu + distance_scale: 4.0 + density_shift: 0.0 + + aabb: [[-2.0, -2.0, -2.0], [2.0, 2.0, 2.0]] + + N_voxel_init: 64000000 # 400**3 + N_voxel_final: 512000000 # 800**3 + upsamp_list: [2000,3000,4000,5500,7000] + update_AlphaMask_list: [] + + rm_weight_mask_thre: 1e-4 + alpha_mask_thre: 1e-4 + + n_lamb_sigma: [8,8,8] + n_lamb_sh: [8,8,8] + + shadingMode: SH + data_dim_color: 27 + + view_pe: 2 + fea_pe: 2 + featureC: 128 + + lr_upsample_reset: True diff --git a/conf/experiment/model/immersive_cylinder.yaml b/conf/experiment/model/immersive_cylinder.yaml new file mode 100644 index 0000000..7ec3d6f --- /dev/null +++ b/conf/experiment/model/immersive_cylinder.yaml @@ -0,0 +1,272 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 6 + fn: pluecker + direction_multiplier: 1.0 + moment_multiplier: 1.0 + + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 1 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + time: + start: 7 + end: 8 + + param: + n_dims: 1 + fn: identity + + pe: + type: windowed + n_freqs: 2 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + z_channels: 32 # NOTE: Changed from 32 + + outputs: + z_vals: + channels: 4 # NOTE: Changed from 4 + + spatial_flow: + channels: 3 + + activation: + type: identity + #outer_fac: 8.0 + #outer_fac: 4.0 + outer_fac: 1.0 + + sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 1 + + activation: + type: sigmoid + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: tanh + outer_fac: 0.25 + + color_scale: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + color_shift: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + ray_intersect_0: + type: ray_intersect + + # Intersect + #z_channels: 64 # NOTE: Changed from 32 + z_channels: 32 # NOTE: Changed from 32 + + intersect: + type: cylinder # NOTE: Changed from cylinder + #type: sphere # NOTE: Changed from cylinder + + sort: True + outward_facing: True + use_disparity: False + max_axis: False + + use_sigma: True + + out_points: raw_points + out_distance: raw_distance + + #near: 0.5 # NOTE: Changed from 0.5 + #initial: 0.75 # NOTE: Changed from 0.75 + #end: 64.0 + #origin_scale_factor: 0.0 + + use_dataset_bounds: True + origin_scale_factor: 0.0 + + contract: + type: mipnerf + contract_samples: True + #contract_start_radius: 1.75 # NOTE: Changed from 1.75 + #contract_end_radius: 64.0 + use_dataset_bounds: True + + activation: + type: identity + fac: 0.5 + + flow_0: + type: advect_points + use_spatial_flow: True + use_angular_flow: False + + out_flow_field: raw_flow + flow_scale: 0.0 + + spatial_flow_activation: + type: identity + fac: 0.25 + + angular_flow_rotation_activation: + type: identity + fac: 0.25 + + angular_flow_anchor_activation: + type: identity + fac: 0.25 + + point_offset_0: + type: point_offset + in_density_field: point_sigma + use_sigma: True + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs', 'times'] + + extract_fields: + type: extract_fields + fields: [ + 'points', 'distances', 'base_times', 'time_offset', 'times', 'viewdirs', 'weights', + 'color_transform_global', 'color_scale_global', 'color_shift_global', + 'color_transform', 'color_scale', 'color_shift' + ] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_time + + # Scene hyper-params + white_bg: 0 + black_bg: 0 + + # Density activation + fea2denseAct: relu + distance_scale: 16.0 + #distance_scale: 8.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -2.0], [2.0, 2.0, 2.0]] + + # Grid size and upsampling + N_voxel_init: 2097152 + N_voxel_final: 262144000 + + upsamp_list: [4000,6000,8000,10000,12000] + + lr_upsample_reset: True + + # Thresholding + update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + n_lamb_sigma: [8,4,4] + n_lamb_sh: [8,4,4] + + # Shading + shadingMode: SH + data_dim_color: 27 + #shadingMode: RGB + #data_dim_color: 3 + + # Density + densityMode: Density diff --git a/conf/experiment/model/immersive_cylinder_pe.yaml b/conf/experiment/model/immersive_cylinder_pe.yaml new file mode 100644 index 0000000..d9587ce --- /dev/null +++ b/conf/experiment/model/immersive_cylinder_pe.yaml @@ -0,0 +1,269 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 6 + fn: pluecker + direction_multiplier: 1.0 + moment_multiplier: 1.0 + + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 2 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + time: + start: 7 + end: 8 + + param: + n_dims: 1 + fn: identity + + pe: + type: windowed + n_freqs: 4 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + z_channels: 32 # NOTE: Changed from 32 + + outputs: + z_vals: + channels: 4 # NOTE: Changed from 4 + + spatial_flow: + channels: 3 + + activation: + type: identity + outer_fac: 8.0 + + sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 1 + + activation: + type: sigmoid + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: tanh + outer_fac: 0.125 + + color_scale: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + color_shift: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + ray_intersect_0: + type: ray_intersect + + # Intersect + #z_channels: 64 # NOTE: Changed from 32 + z_channels: 32 # NOTE: Changed from 32 + + intersect: + type: cylinder # NOTE: Changed from cylinder + + sort: True + outward_facing: True + use_disparity: False + max_axis: False + + use_sigma: True + + out_points: raw_points + out_distance: raw_distance + + #near: 0.5 # NOTE: Changed from 0.5 + #initial: 0.75 # NOTE: Changed from 0.75 + #end: 64.0 + #origin_scale_factor: 0.0 + + use_dataset_bounds: True + origin_scale_factor: 0.0 + + contract: + type: mipnerf + contract_samples: True + #contract_start_radius: 1.75 # NOTE: Changed from 1.75 + #contract_end_radius: 64.0 + use_dataset_bounds: True + + activation: + type: identity + fac: 0.5 + + flow_0: + type: advect_points + use_spatial_flow: True + use_angular_flow: False + + out_flow_field: raw_flow + flow_scale: 0.0 + + spatial_flow_activation: + type: identity + fac: 0.25 + + angular_flow_rotation_activation: + type: identity + fac: 0.25 + + angular_flow_anchor_activation: + type: identity + fac: 0.25 + + point_offset_0: + type: point_offset + in_density_field: point_sigma + use_sigma: True + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs', 'times'] + + extract_fields: + type: extract_fields + fields: [ + 'points', 'distances', 'base_times', 'time_offset', 'times', 'viewdirs', 'weights', + 'color_transform_global', 'color_scale_global', 'color_shift_global', + 'color_transform', 'color_scale', 'color_shift' + ] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_time + + # Scene hyper-params + white_bg: 0 + black_bg: 0 + + # Density activation + fea2denseAct: relu + distance_scale: 16.0 + #distance_scale: 8.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -2.0], [2.0, 2.0, 2.0]] + + # Grid size and upsampling + N_voxel_init: 2097152 + N_voxel_final: 262144000 + + upsamp_list: [4000,6000,8000,10000,12000] + + lr_upsample_reset: True + + # Thresholding + update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + n_lamb_sigma: [8,4,4] + n_lamb_sh: [8,4,4] + + # Shading + shadingMode: SH + data_dim_color: 27 + #shadingMode: RGB + #data_dim_color: 3 + + # Density + densityMode: Density diff --git a/conf/experiment/model/immersive_sphere.yaml b/conf/experiment/model/immersive_sphere.yaml new file mode 100644 index 0000000..fbe6afd --- /dev/null +++ b/conf/experiment/model/immersive_sphere.yaml @@ -0,0 +1,278 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 6 + fn: pluecker + direction_multiplier: 1.0 + moment_multiplier: 1.0 + + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 1 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + time: + start: 7 + end: 8 + + param: + n_dims: 1 + fn: identity + + pe: + type: windowed + n_freqs: 2 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + z_channels: 64 # NOTE: Changed from 32 + + outputs: + z_vals: + channels: 4 # NOTE: Changed from 4 + + spatial_flow: + channels: 3 + + activation: + type: identity + #outer_fac: 8.0 + #outer_fac: 4.0 + outer_fac: 1.0 + + sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 1 + + activation: + type: sigmoid + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: tanh + outer_fac: 0.25 + + color_scale: + channels: 259 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + color_shift: + channels: 259 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + + + ray_intersect_0: + type: ray_intersect + + # Intersect + z_channels: 64 # NOTE: Changed from 32 + # z_channels: 32 # NOTE: Changed from 32 + + intersect: + #type: cylinder # NOTE: Changed from cylinder + type: sphere # NOTE: Changed from cylinder + + sort: True + outward_facing: True + use_disparity: False + max_axis: False + + use_sigma: True + + out_points: raw_points + out_distance: raw_distance + + #near: 0.5 # NOTE: Changed from 0.5 + #initial: 0.75 # NOTE: Changed from 0.75 + #end: 64.0 + #origin_scale_factor: 0.0 + + use_dataset_bounds: True + origin_scale_factor: 0.0 + + contract: + type: mipnerf + contract_samples: True + #contract_start_radius: 1.75 # NOTE: Changed from 1.75 + #contract_end_radius: 64.0 + use_dataset_bounds: True + + activation: + type: identity + fac: 0.5 + + flow_0: + type: advect_points + use_spatial_flow: True + use_angular_flow: False + + out_flow_field: raw_flow + flow_scale: 0.0 + + spatial_flow_activation: + type: identity + fac: 0.25 + + angular_flow_rotation_activation: + type: identity + fac: 0.25 + + angular_flow_anchor_activation: + type: identity + fac: 0.25 + + point_offset_0: + type: point_offset + in_density_field: point_sigma + use_sigma: True + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs', 'times'] + + extract_fields: + type: extract_fields + fields: [ + 'points', 'distances', 'base_times', 'time_offset', 'times', 'viewdirs', 'weights', + 'color_transform_global', 'color_scale_global', 'color_shift_global', + 'color_transform', 'color_scale', 'color_shift' + ] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_time + + # Scene hyper-params + white_bg: 0 + black_bg: 0 + + # Density activation + fea2denseAct: relu + distance_scale: 16.0 + #distance_scale: 8.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -2.0], [2.0, 2.0, 2.0]] + + # Grid size and upsampling + N_voxel_init: 2097152 + N_voxel_final: 262144000 + + upsamp_list: [4000,6000,8000,10000,12000] + + lr_upsample_reset: True + + # Thresholding + update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + n_lamb_sigma: [8,4,4] + n_lamb_sh: [8,4,4] + + # Shading + # shadingMode: SH + # data_dim_color: 27 + # #shadingMode: RGB + # #data_dim_color: 3 + shadingMode: MLP_Fea + data_dim_color: 27 + + # Density + densityMode: Density + + gear_num: 3 diff --git a/conf/experiment/model/immersive_sphere_new.yaml b/conf/experiment/model/immersive_sphere_new.yaml new file mode 100644 index 0000000..e03db90 --- /dev/null +++ b/conf/experiment/model/immersive_sphere_new.yaml @@ -0,0 +1,273 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 6 + fn: pluecker + direction_multiplier: 1.0 + moment_multiplier: 1.0 + + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 1 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + time: + start: 7 + end: 8 + + param: + n_dims: 1 + fn: identity + + pe: + type: windowed + n_freqs: 2 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + z_channels: 32 # NOTE: Changed from 32 + + outputs: + z_vals: + channels: 8 + + spatial_flow: + channels: 3 + + activation: + type: identity + #outer_fac: 8.0 + #outer_fac: 4.0 + outer_fac: 1.0 + + sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 1 + + activation: + type: sigmoid + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: tanh + outer_fac: 0.25 + + color_scale: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + color_shift: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + ray_intersect_0: + type: ray_intersect + + # Intersect + #z_channels: 64 # NOTE: Changed from 32 + z_channels: 32 # NOTE: Changed from 32 + + intersect: + #type: cylinder # NOTE: Changed from cylinder + type: sphere_new # NOTE: Changed from cylinder + + sort: True + outward_facing: True + use_disparity: False + max_axis: False + + use_sigma: True + + out_points: raw_points + out_distance: raw_distance + + #near: 0.5 # NOTE: Changed from 0.5 + #initial: 0.75 # NOTE: Changed from 0.75 + #end: 64.0 + #origin_scale_factor: 0.0 + + use_dataset_bounds: True + resize_scale_factor: 1.0 + origin_scale_factor: 1.0 + + contract: + type: mipnerf + contract_samples: True + #contract_start_radius: 1.75 # NOTE: Changed from 1.75 + #contract_end_radius: 64.0 + use_dataset_bounds: True + + activation: + type: identity + fac: 0.5 + + flow_0: + type: advect_points + use_spatial_flow: True + use_angular_flow: False + + out_flow_field: raw_flow + flow_scale: 0.0 + + spatial_flow_activation: + type: identity + fac: 0.25 + + angular_flow_rotation_activation: + type: identity + fac: 0.25 + + angular_flow_anchor_activation: + type: identity + fac: 0.25 + + point_offset_0: + type: point_offset + in_density_field: point_sigma + use_sigma: True + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs', 'times'] + + extract_fields: + type: extract_fields + fields: [ + 'points', 'distances', 'base_times', 'time_offset', 'times', 'viewdirs', 'weights', + 'color_transform_global', 'color_scale_global', 'color_shift_global', + 'color_transform', 'color_scale', 'color_shift' + ] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_time + + # Scene hyper-params + white_bg: 0 + black_bg: 0 + + # Density activation + fea2denseAct: relu + distance_scale: 16.0 + #distance_scale: 8.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -2.0], [2.0, 2.0, 2.0]] + + # Grid size and upsampling + N_voxel_init: 2097152 + N_voxel_final: 262144000 + + upsamp_list: [4000,6000,8000,10000,12000] + + lr_upsample_reset: True + + # Thresholding + update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + n_lamb_sigma: [8,4,4] + n_lamb_sh: [8,4,4] + + # Shading + shadingMode: SH + data_dim_color: 27 + #shadingMode: RGB + #data_dim_color: 3 + + # Density + densityMode: Density diff --git a/conf/experiment/model/immersive_sphere_test.yaml b/conf/experiment/model/immersive_sphere_test.yaml new file mode 100644 index 0000000..f41bb0c --- /dev/null +++ b/conf/experiment/model/immersive_sphere_test.yaml @@ -0,0 +1,240 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 6 + fn: pluecker + direction_multiplier: 1.0 + moment_multiplier: 1.0 + + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 1 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + time: + start: 7 + end: 8 + + param: + n_dims: 1 + fn: identity + + pe: + type: windowed + n_freqs: 2 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + z_channels: 32 # NOTE: Changed from 32 + + outputs: + z_vals: + channels: 4 # NOTE: Changed from 4 + + spatial_flow: + channels: 3 + + activation: + type: identity + #outer_fac: 8.0 + #outer_fac: 4.0 + outer_fac: 1.0 + + sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 1 + + activation: + type: sigmoid + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: tanh + outer_fac: 0.25 + + ray_intersect_0: + type: ray_intersect + + # Intersect + #z_channels: 64 # NOTE: Changed from 32 + z_channels: 32 # NOTE: Changed from 32 + + intersect: + #type: cylinder # NOTE: Changed from cylinder + type: sphere # NOTE: Changed from cylinder + + sort: True + outward_facing: True + use_disparity: False + max_axis: False + + use_sigma: True + + out_points: raw_points + out_distance: raw_distance + + #near: 0.5 # NOTE: Changed from 0.5 + #initial: 0.75 # NOTE: Changed from 0.75 + #end: 64.0 + #origin_scale_factor: 0.0 + + use_dataset_bounds: True + origin_scale_factor: 0.0 + + contract: + type: mipnerf + contract_samples: True + #contract_start_radius: 1.75 # NOTE: Changed from 1.75 + #contract_end_radius: 64.0 + use_dataset_bounds: True + + activation: + type: identity + fac: 0.5 + + flow_0: + type: advect_points + use_spatial_flow: True + use_angular_flow: False + + out_flow_field: raw_flow + flow_scale: 0.0 + + spatial_flow_activation: + type: identity + fac: 0.25 + + angular_flow_rotation_activation: + type: identity + fac: 0.25 + + angular_flow_anchor_activation: + type: identity + fac: 0.25 + + point_offset_0: + type: point_offset + in_density_field: point_sigma + use_sigma: True + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs', 'times'] + + extract_fields: + type: extract_fields + fields: [ + 'points', 'distances', 'base_times', 'time_offset', 'times', 'viewdirs', 'weights', + 'color_transform_global', 'color_scale_global', 'color_shift_global', + 'color_transform', 'color_scale', 'color_shift' + ] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_time + + # Scene hyper-params + white_bg: 0 + black_bg: 0 + + # Density activation + fea2denseAct: relu + distance_scale: 16.0 + #distance_scale: 8.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -2.0], [2.0, 2.0, 2.0]] + + # Grid size and upsampling + N_voxel_init: 2097152 + N_voxel_final: 262144000 + + upsamp_list: [4000,6000,8000,10000,12000] + + lr_upsample_reset: True + + # Thresholding + update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + n_lamb_sigma: [8,4,4] + n_lamb_sh: [8,4,4] + + # Shading + shadingMode: RGB + data_dim_color: 3 + + # Density + densityMode: Density diff --git a/conf/experiment/model/immersive_z_plane.yaml b/conf/experiment/model/immersive_z_plane.yaml new file mode 100644 index 0000000..099f8e4 --- /dev/null +++ b/conf/experiment/model/immersive_z_plane.yaml @@ -0,0 +1,242 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 4 + fn: two_plane + + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 2 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + z_channels: 32 + + outputs: + z_vals: + channels: 1 + + sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: tanh + outer_fac: 0.25 + + color_scale: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + color_shift: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + ray_intersect_0: + type: ray_intersect + + # Intersect + z_channels: 32 + + intersect: + type: z_plane + + sort: True + outward_facing: True + use_disparity: False + max_axis: False + + use_sigma: True + + out_points: raw_points + out_distance: raw_distance + + near: 0.5 + initial: -1.0 + end: -100.0 + + contract: + type: mipnerf + contract_samples: True + contract_start_radius: 8.0 # NOTE: Changed from 2 + contract_end_radius: 100.0 + + activation: + type: identity + fac: 0.5 + + color_transform: + type: color_transform + + transform_activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 0.1 + outer_fac: 1.0 + + shift_activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 0.1 + outer_fac: 1.0 + + point_offset_0: + type: point_offset + use_sigma: True + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs'] + + extract_fields: + type: extract_fields + fields: [ + 'points', 'distances', 'viewdirs', 'weights', + 'color_transform_global', 'color_scale_global', 'color_shift_global', + 'color_transform', 'color_scale', 'color_shift' + ] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_no_sample + + # Scene hyper-params + white_bg: 0 + black_bg: 0 + + # Density activation + fea2denseAct: relu + distance_scale: 16.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -2.0], [2.0, 2.0, 2.0]] + + # Grid size and upsampling + grid_size: + start: [192, 192, 192] # NOTE: Changed from 192 + end: [800, 800, 800] # NOTE: Changed from 800 + + upsamp_list: [4000,6000,8000,10000,12000] + + lr_upsample_reset: True + + # Thresholding + update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + n_lamb_sigma: [8,0,0] + n_lamb_sh: [8,0,0] + + # Shading + shadingMode: RGB + data_dim_color: 3 + + #shadingMode: SH + #data_dim_color: 27 + + #shadingMode: MLP_Fea + #data_dim_color: 27 + + # Density + densityMode: Density diff --git a/conf/experiment/model/llff_z_plane.yaml b/conf/experiment/model/llff_z_plane.yaml new file mode 100644 index 0000000..9af11db --- /dev/null +++ b/conf/experiment/model/llff_z_plane.yaml @@ -0,0 +1,212 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + #param: + # n_dims: 4 + # fn: two_plane + + param: + n_dims: 6 + fn: pluecker + direction_multiplier: 1.0 + moment_multiplier: 1.0 + + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 1 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + z_channels: 32 + + outputs: + z_vals: + channels: 1 + + sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 1 + + activation: + type: sigmoid + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: tanh + outer_fac: 0.125 + + color_scale: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + color_shift: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + ray_intersect_0: + type: ray_intersect + + # Intersect + z_channels: 32 + + intersect: + type: z_plane + + sort: True + outward_facing: False + use_disparity: False + + use_sigma: True + + out_points: raw_points + out_distance: raw_distance + + initial: -1.0 + end: 1.0 + + contract: + type: mipnerf + contract_samples: True + contract_start_radius: 1.0 + contract_end_radius: 8.0 + + activation: + type: identity + fac: 0.5 + + point_offset_0: + type: point_offset + in_density_field: point_sigma + use_sigma: True + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs'] + + extract_fields: + type: extract_fields + fields: ['points', 'distances', 'viewdirs', 'weights', 'color_scale', 'color_shift'] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_no_sample + + # Scene hyper-params + white_bg: 0 + black_bg: 0 + + # Density activation + fea2denseAct: relu + distance_scale: 16.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-1.5, -1.67, -1.0], [1.5, 1.67, 1.0]] + + # Grid size and upsampling + N_voxel_init: 2097152 + N_voxel_final: 262144000 + upsamp_list: [4000,6000,8000,10000,12000] + lr_upsample_reset: True + + # Thresholding + update_AlphaMask_list: [] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + n_lamb_sigma: [8,4,4] + n_lamb_sh: [8,4,4] + + # Shading + shadingMode: SH + data_dim_color: 27 diff --git a/conf/experiment/model/llff_z_plane_small.yaml b/conf/experiment/model/llff_z_plane_small.yaml new file mode 100644 index 0000000..33fa7e8 --- /dev/null +++ b/conf/experiment/model/llff_z_plane_small.yaml @@ -0,0 +1,213 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + #param: + # n_dims: 4 + # fn: two_plane + + param: + n_dims: 6 + fn: pluecker + direction_multiplier: 1.0 + moment_multiplier: 1.0 + + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 1 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + z_channels: 32 + + outputs: + z_vals: + channels: 1 + + sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 1 + + activation: + type: sigmoid + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: tanh + outer_fac: 0.125 + + color_scale: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + color_shift: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + ray_intersect_0: + type: ray_intersect + + # Intersect + z_channels: 32 + + intersect: + type: z_plane + + sort: True + outward_facing: False + use_disparity: False + + use_sigma: True + + out_points: raw_points + out_distance: raw_distance + + initial: -1.0 + end: 1.0 + + contract: + type: mipnerf + contract_samples: True + contract_start_radius: 1.0 + contract_end_radius: 8.0 + + activation: + type: identity + fac: 0.5 + + point_offset_0: + type: point_offset + in_density_field: point_sigma + use_sigma: True + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs'] + + extract_fields: + type: extract_fields + fields: ['points', 'distances', 'viewdirs', 'weights', 'color_scale', 'color_shift'] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_no_sample + + # Scene hyper-params + white_bg: 0 + black_bg: 0 + + # Density activation + fea2denseAct: relu + distance_scale: 16.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-1.5, -1.67, -1.0], [1.5, 1.67, 1.0]] + + # Grid size and upsampling + N_voxel_init: 1000000 + #N_voxel_final: 64000000 + N_voxel_final: 125000000 + upsamp_list: [4000,6000,8000,10000,12000] + lr_upsample_reset: True + + # Thresholding + update_AlphaMask_list: [] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + n_lamb_sigma: [8,4,4] + n_lamb_sh: [8,4,4] + + # Shading + shadingMode: SH + data_dim_color: 27 diff --git a/conf/experiment/model/neural_3d_z_plane.yaml b/conf/experiment/model/neural_3d_z_plane.yaml new file mode 100644 index 0000000..532aaa0 --- /dev/null +++ b/conf/experiment/model/neural_3d_z_plane.yaml @@ -0,0 +1,268 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + #param: + # n_dims: 4 + # fn: two_plane + + param: + n_dims: 6 + fn: pluecker + direction_multiplier: 1.0 + moment_multiplier: 1.0 + + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 1 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + time: + start: 7 + end: 8 + + param: + n_dims: 1 + fn: identity + + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 2 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + z_channels: 128 + + outputs: + z_vals: + channels: 1 + + spatial_flow: + channels: 3 + + activation: + type: identity + outer_fac: 4.0 + + sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 0 + + activation: + type: sigmoid + #shift: 4.0 + shift: 1.0 + + point_sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 1 + + activation: + type: sigmoid + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: tanh + outer_fac: 0.25 + + color_scale: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + color_shift: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + ray_intersect_0: + type: ray_intersect + + # Intersect + z_channels: 128 + + intersect: + type: z_plane + + sort: True + outward_facing: False + use_disparity: False + + use_sigma: True + + out_points: raw_points + out_distance: raw_distance + + initial: -1.0 + end: 1.0 + + contract: + type: mipnerf + contract_samples: True + contract_start_radius: 1.0 + contract_end_radius: 8.0 + + activation: + type: identity + fac: 0.5 + + flow_0: + type: advect_points + use_spatial_flow: True + use_angular_flow: False + + out_flow_field: raw_flow + flow_scale: 0.0 + + spatial_flow_activation: + type: identity + fac: 0.25 + + angular_flow_rotation_activation: + type: identity + fac: 0.25 + + angular_flow_anchor_activation: + type: identity + fac: 0.25 + + point_offset_0: + type: point_offset + in_density_field: point_sigma + use_sigma: True + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs', 'times'] + + extract_fields: + type: extract_fields + fields: [ + 'points', 'distances', 'base_times', 'time_offset', 'times', 'viewdirs', 'weights', + 'color_transform_global', 'color_scale_global', 'color_shift_global', + 'color_transform', 'color_scale', 'color_shift' + ] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_time + + # Scene hyper-params + white_bg: 0 + black_bg: 0 + + # Density activation + fea2denseAct: relu + #distance_scale: 8.0 + distance_scale: 16.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -1.5, -1.25], [2.0, 1.5, 1.25]] + + # Grid size and upsampling + N_voxel_init: 2097152 # TODO: original + N_voxel_final: 262144000 + + upsamp_list: [4000,6000,8000,10000,12000] + lr_upsample_reset: True + + # Thresholding + update_AlphaMask_list: [] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + n_lamb_sigma: [8,4,4] + n_lamb_sh: [8,4,4] + + # Shading + #shadingMode: RGB + #data_dim_color: 3 + + shadingMode: MLP_Fea + data_dim_color: 27 + + # Density + densityMode: Density diff --git a/conf/experiment/model/neural_3d_z_plane_static.yaml b/conf/experiment/model/neural_3d_z_plane_static.yaml new file mode 100644 index 0000000..9427765 --- /dev/null +++ b/conf/experiment/model/neural_3d_z_plane_static.yaml @@ -0,0 +1,238 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 4 + fn: two_plane + + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 2 + #n_freqs: 6 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + z_channels: 256 + + outputs: + z_vals: + channels: 1 + + spatial_flow: + channels: 3 + + sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 0 + wait_epochs: 0 + #window_epochs: 1 + #wait_epochs: 1 + + activation: + type: sigmoid + #shift: 2.0 + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: tanh + outer_fac: 0.25 + + color_scale_global: + channels: 3 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + color_shift_global: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + ray_intersect_0: + type: ray_intersect + + # Intersect + z_channels: 256 + + intersect: + type: z_plane + + sort: True + outward_facing: False + use_disparity: False + + use_sigma: True + + out_points: raw_points + out_distance: raw_distance + + initial: -1.0 + end: 1.0 + + contract: + type: mipnerf + contract_samples: True + contract_start_radius: 1.0 + contract_end_radius: 8.0 + + activation: + type: identity + fac: 0.5 + + point_offset_0: + type: point_offset + use_sigma: True + + #color_transform: + # type: color_transform + + # transform_activation: + # type: ease_value + # start_value: 0.0 + # window_epochs: 0 + # wait_epochs: 0 + + # activation: + # type: identity + # shift: 0.0 + # inner_fac: 0.1 + # outer_fac: 1.0 + + # shift_activation: + # type: ease_value + # start_value: 0.0 + # window_epochs: 0 + # wait_epochs: 0 + + # activation: + # type: identity + # shift: 0.0 + # inner_fac: 0.1 + # outer_fac: 1.0 + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs', 'times'] + + extract_fields: + type: extract_fields + fields: ['points', 'distances', 'base_times', 'time_offset', 'times', 'viewdirs', 'weights', 'color_scale_global', 'color_transform_global', 'color_shift_global'] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_no_sample + + # Scene hyper-params + white_bg: 0 + black_bg: 0 + + # Density activation + fea2denseAct: relu + distance_scale: 16.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -2.0], [2.0, 2.0, 2.0]] + + # Grid size and upsampling + grid_size: + start: [100, 100, 100] + end: [300, 300, 300] + #grid_size: + # start: [192, 192, 192] + # end: [800, 800, 800] + upsamp_list: [4000,6000,8000,10000,12000] + lr_upsample_reset: True + + # Thresholding + update_AlphaMask_list: [12000] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + n_lamb_sigma: [8,0,0] + n_lamb_sh: [8,0,0] + + # Shading + shadingMode: RGB + #shadingMode: RGBtFourier + data_dim_color: 3 + + #shadingMode: SH + #data_dim_color: 27 + + # Density + densityMode: Density + #densityMode: DensityFourier diff --git a/conf/experiment/model/neural_3d_z_plane_world.yaml b/conf/experiment/model/neural_3d_z_plane_world.yaml new file mode 100644 index 0000000..98ccb50 --- /dev/null +++ b/conf/experiment/model/neural_3d_z_plane_world.yaml @@ -0,0 +1,267 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 6 + fn: pluecker + direction_multiplier: 1.0 + moment_multiplier: 1.0 + + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 2 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + time: + start: 7 + end: 8 + + param: + n_dims: 1 + fn: identity + + pe: + type: windowed + n_freqs: 2 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + z_channels: 48 + + outputs: + z_vals: + channels: 1 + + spatial_flow: + channels: 3 + + sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: tanh + outer_fac: 0.25 + + color_scale: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + color_shift: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + ray_intersect_0: + type: ray_intersect + + # Intersect + z_channels: 48 + + intersect: + type: z_plane + + sort: True + outward_facing: False + use_disparity: False + + use_sigma: True + + out_points: raw_points + out_distance: raw_distance + + use_dataset_bounds: True + origin_scale_factor: 0.0 + + contract: + type: mipnerf + contract_samples: True + use_dataset_bounds: True + + activation: + type: identity + fac: 0.5 + + flow_0: + type: advect_points + use_spatial_flow: True + use_angular_flow: False + + out_flow_field: raw_flow + flow_scale: 0.0 + + spatial_flow_activation: + type: identity + fac: 0.25 + + angular_flow_rotation_activation: + type: identity + fac: 0.25 + + angular_flow_anchor_activation: + type: identity + fac: 0.25 + + point_offset_0: + type: point_offset + in_density_field: point_sigma + use_sigma: True + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs', 'times'] + + extract_fields: + type: extract_fields + fields: [ + 'points', 'distances', 'base_times', 'time_offset', 'times', 'viewdirs', 'weights', + 'color_transform_global', 'color_scale_global', 'color_shift_global', + 'color_transform', 'color_scale', 'color_shift' + ] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_time + + # Scene hyper-params + white_bg: 0 + black_bg: 0 + + # Density activation + fea2denseAct: relu + #distance_scale: 8.0 + distance_scale: 16.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -2.0], [2.0, 2.0, 2.0]] + + # Grid size and upsampling + N_voxel_init: 2097152 # TODO: original + #N_voxel_init: 3375000 + #N_voxel_init: 8000000 + + N_voxel_final: 262144000 + + upsamp_list: [4000,6000,8000,10000,12000] + lr_upsample_reset: True + + # Thresholding + #update_AlphaMask_list: [] + update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + n_lamb_sigma: [8,4,4] + n_lamb_sh: [8,4,4] + + # Shading + #shadingMode: RGB + #data_dim_color: 3 + + shadingMode: SH + data_dim_color: 27 + + # Density + densityMode: Density diff --git a/conf/experiment/model/refnerf_sphere.yaml b/conf/experiment/model/refnerf_sphere.yaml new file mode 100644 index 0000000..09ea7be --- /dev/null +++ b/conf/experiment/model/refnerf_sphere.yaml @@ -0,0 +1,246 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 6 + fn: pluecker + direction_multiplier: 1.0 + moment_multiplier: 1.0 + + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 1 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + z_channels: 64 + + outputs: + z_vals: + channels: 8 + #channels: 1 + + sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 0 + #window_epochs: 0 + #wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 1 + #window_epochs: 0 + #wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: tanh + outer_fac: 0.125 + + color_scale: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + color_shift: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + #normal: + # channels: 3 + + # activation: + # type: identity + + #ref_viewdirs_offset: + # channels: 3 + + # activation: + # type: identity + + #ref_distance: + # channels: 1 + + # activation: + # type: identity + + ray_intersect_0: + type: ray_intersect + + # Intersect + z_channels: 64 + + intersect: + type: sphere + #type: euclidean_distance_unified + + sort: True + outward_facing: False + use_disparity: False + max_axis: False + + use_sigma: True + + out_points: raw_points + out_distance: raw_distance + + initial: -2.0 + end: 2.0 + + use_dataset_bounds: True + #origin_scale_factor: 1.0 + #resize_scale_factor: 1.0 + origin_scale_factor: 0.0 + resize_scale_factor: 0.0 + + activation: + type: identity + fac: 0.5 + + #reflect_0: + # type: reflect + # direction_init: True + + # out_points_field: points_temp + # out_direction_field: viewdirs + + point_offset_0: + type: point_offset + use_sigma: True + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs'] + + extract_fields: + type: extract_fields + fields: ['points', 'distances', 'viewdirs', 'weights', 'color_scale', 'color_shift'] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_no_sample + + # Scene hyper-params + white_bg: 1 + black_bg: 0 + + # Density activation + fea2denseAct: relu + #distance_scale: 16.0 + distance_scale: 8.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -2.0], [2.0, 2.0, 2.0]] + + # Grid size and upsampling + N_voxel_init: 2097152 + #N_voxel_final: 216000000 + N_voxel_final: 64000000 + + + upsamp_list: [4000,6000,8000,10000,12000] + lr_upsample_reset: True + + # Thresholding + update_AlphaMask_list: [] + #update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + n_lamb_sigma: [8,8,8] + n_lamb_sh: [8,8,8] + + # Shading + shadingMode: RGB + data_dim_color: 3 + #shadingMode: SH + #data_dim_color: 27 diff --git a/conf/experiment/model/shiny_z_deformable.yaml b/conf/experiment/model/shiny_z_deformable.yaml new file mode 100644 index 0000000..558a0ee --- /dev/null +++ b/conf/experiment/model/shiny_z_deformable.yaml @@ -0,0 +1,160 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 4 + fn: two_plane + + pe: + type: basic + n_freqs: 2 + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + z_channels: 64 + + outputs: + z_vals: + channels: 4 + + sigma: + channels: 1 + + activation: + type: sigmoid + fac: 1.0 + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: tanh + fac: 0.25 + + ray_intersect_0: + type: ray_intersect + + # Intersect + z_channels: 64 + + intersect: + type: deformable_voxel_grid + + sort: True + outward_facing: False + use_disparity: False + use_sigma: False + max_axis: False + + out_points: raw_points + out_distance: raw_distance + + start_normal: [ + [ 0.0, 0.0, 1.0 ] + ] + normal_scale_factor: 1.0 + + initial: [-1.0] + end: [1.0] + + activation: + type: identity + fac: 0.5 + + point_offset_0: + type: point_offset + use_sigma: True + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs'] + + extract_fields: + type: extract_fields + fields: ['points', 'distances', 'viewdirs', 'weights'] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_no_sample + + # Scene hyper-params + white_bg: 0 + black_bg: 0 + + # Density activation + #fea2denseAct: softplus + #distance_scale: 25.0 + #density_shift: -10.0 + + fea2denseAct: relu + distance_scale: 4.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -2.0], [2.0, 2.0, 2.0]] + + # Grid size and upsampling + grid_size: + start: [150, 150, 150] + end: [600, 600, 600] + upsamp_list: [4000,6000,8000,10000,12000] + lr_upsample_reset: True + + # Thresholding + update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + n_lamb_sigma: [8,8,8] + n_lamb_sh: [8,8,8] + + # Shading + #shadingMode: RGB + #data_dim_color: 3 + + shadingMode: SH + data_dim_color: 27 diff --git a/conf/experiment/model/shiny_z_depth.yaml b/conf/experiment/model/shiny_z_depth.yaml new file mode 100644 index 0000000..697af6d --- /dev/null +++ b/conf/experiment/model/shiny_z_depth.yaml @@ -0,0 +1,91 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + +embedding: + type: epipolar + + # Parameterization + param: + n_dims: 4 + fn: two_plane + + # MLP + net: + # PE + pe_channels: 4 + + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 2 + wait_epochs: 0 + max_freq_epoch: 4 + exclude_identity: False + + type: base + depth: 6 + hidden_channels: 256 + skips: [3] + + layer_activation: + type: leaky_relu + sigma: 0.25 + + # Z outputs + z_channels: 1 + preds_per_z: 1 + z_activation: identity + + intersect: + type: z_plane + forward_facing: True + stratified: False + sort: False + out_channels_per_z: 3 + + # Transform + tform_in_channels: 0 + tform_out_channels: 3 + tform_activation: + type: row_l2_norm + param_channels: 3 + bias_activation: zero + activation: identity + +color: + type: base + + net: + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 8 + wait_epochs: 0 + max_freq_epoch: 16 + exclude_identity: False + + type: base + depth: 6 + hidden_channels: 384 + skips: [3] + + layer_activation: + type: leaky_relu + sigma: 0.25 + + activation: sigmoid diff --git a/conf/experiment/model/shiny_z_plane.yaml b/conf/experiment/model/shiny_z_plane.yaml new file mode 100644 index 0000000..83a3a2c --- /dev/null +++ b/conf/experiment/model/shiny_z_plane.yaml @@ -0,0 +1,220 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 4 + fn: two_plane + + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 1 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + z_channels: 32 + #z_channels: 64 + #z_channels: 128 + + outputs: + z_vals: + channels: 1 + + sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 1 + + activation: + type: sigmoid + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: tanh + outer_fac: 0.25 + + color_scale: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + color_shift: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + ray_intersect_0: + type: ray_intersect + + # Intersect + z_channels: 32 + #z_channels: 64 + #z_channels: 128 + + intersect: + type: z_plane + + sort: True + outward_facing: False + use_disparity: False + + use_sigma: True + + out_points: raw_points + out_distance: raw_distance + + initial: -1.0 + end: 1.0 + + num_samples_for_scale: 32 + #num_samples_for_scale: 64 + #num_samples_for_scale: 128 + + activation: + type: identity + fac: 0.5 + + #generate_samples_0: + # type: generate_samples + + # sample_range: [32, 32] + # inference_samples: 32 + # total_samples: 32 + + #select_points_0: + # type: select_points + # fields: ['points', 'distances', 'sigma', 'point_sigma', 'point_offset', 'weights', 'color_scale', 'color_shift'] + + point_offset_0: + type: point_offset + in_density_field: point_sigma + use_sigma: True + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs'] + + extract_fields: + type: extract_fields + fields: ['points', 'distances', 'viewdirs', 'weights', 'color_scale', 'color_shift'] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_no_sample + + # Scene hyper-params + white_bg: 0 + black_bg: 0 + + # Density activation + fea2denseAct: relu + distance_scale: 8.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -1.0], [2.0, 2.0, 1.0]] + + # Grid size and upsampling + N_voxel_init: 2097152 + N_voxel_final: 262144000 + + upsamp_list: [4000,6000,8000,10000,12000] + lr_upsample_reset: True + + # Thresholding + update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + n_lamb_sigma: [8,4,4] + n_lamb_sh: [8,4,4] + + # Shading + shadingMode: RGB + data_dim_color: 3 diff --git a/conf/experiment/model/shiny_z_plane_cascaded.yaml b/conf/experiment/model/shiny_z_plane_cascaded.yaml new file mode 100644 index 0000000..a62b31c --- /dev/null +++ b/conf/experiment/model/shiny_z_plane_cascaded.yaml @@ -0,0 +1,272 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 4 + fn: two_plane + + pe: + type: basic + n_freqs: 0 + + # Net + net: + group: embedding_impl + type: zero + + # Outputs + z_channels: 8 + + outputs: + z_vals: + channels: 1 + + ray_intersect_0: + type: ray_intersect + + # Intersect + z_channels: 8 + + intersect: + type: z_plane + + mask: + stop_iters: -1 + + sort: True + outward_facing: False + use_disparity: False + + use_sigma: False + + out_points: raw_points + out_distance: raw_distance + + initial: -1.0 + end: 1.0 + + activation: + type: identity + fac: 0.5 + + point_prediction_0: + type: point_prediction + + # Inputs + in_z_channels: 8 + + inputs: + points: 3 + viewdirs: 3 + times: 1 + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 6 + fn: identity + + pe: + type: basic + n_freqs: 1 + + z: + start: 2 + end: 3 + + param: + n_dims: 1 + fn: identity + + pe: + type: basic + n_freqs: 8 + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + out_z_channels: 32 + + outputs: + z_vals: + channels: 1 + + sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 1 + + activation: + type: sigmoid + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: tanh + outer_fac: 0.25 + + color_scale: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + color_shift: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + ray_intersect_1: + type: ray_intersect + + # Intersect + z_channels: 32 + + intersect: + type: z_plane + + sort: True + outward_facing: False + use_disparity: False + + use_sigma: True + + initial: -1.0 + end: 1.0 + + activation: + type: identity + fac: 0.5 + + point_offset_0: + type: point_offset + in_density_field: point_sigma + use_sigma: True + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs'] + + extract_fields: + type: extract_fields + fields: ['points', 'distances', 'viewdirs', 'weights', 'color_scale', 'color_shift'] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_no_sample + + # Scene hyper-params + white_bg: 0 + black_bg: 0 + + # Density activation + fea2denseAct: relu + distance_scale: 8.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -1.0], [2.0, 2.0, 1.0]] + + # Grid size and upsampling + N_voxel_init: 2097152 + #N_voxel_init: 512000 + N_voxel_final: 262144000 + + upsamp_list: [4000,6000,8000,10000,12000] + lr_upsample_reset: True + + # Thresholding + update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + n_lamb_sigma: [8,4,4] + n_lamb_sh: [8,4,4] + + # Shading + shadingMode: RGB + data_dim_color: 3 diff --git a/conf/experiment/model/shiny_z_plane_feedback.yaml b/conf/experiment/model/shiny_z_plane_feedback.yaml new file mode 100644 index 0000000..c155579 --- /dev/null +++ b/conf/experiment/model/shiny_z_plane_feedback.yaml @@ -0,0 +1,231 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 4 + fn: two_plane + + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 0 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + z_channels: 8 + + outputs: + z_vals: + channels: 1 + + ray_intersect_0: + wait_iters: 0 + type: ray_intersect + + # Intersect + z_channels: 8 + + intersect: + mask: + stop_iters: -1 + + type: z_plane + + sort: False + outward_facing: False + use_disparity: False + use_sigma: False + + initial: -1.0 + end: 1.0 + + out_points: raw_points + out_distance: raw_distance + + activation: + type: identity + fac: 0.5 + + point_prediction_0: + type: point_prediction + + # Inputs + in_z_channels: 8 + + inputs: + points: 3 + viewdirs: 3 + + # Parameterization + params: + all: + start: 0 + end: 6 + + param: + n_dims: 6 + fn: identity + + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 2 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + out_z_channels: 32 + + outputs: + z_vals: + channels: 1 + + sigma: + channels: 1 + + activation: + type: sigmoid + fac: 1.0 + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: tanh + fac: 0.25 + + ray_intersect_1: + type: ray_intersect + + # Intersect + z_channels: 32 + + intersect: + type: z_plane + + sort: True + outward_facing: False + use_disparity: False + use_sigma: True + max_axis: False + + out_points: raw_points + out_distance: raw_distance + + initial: -1.0 + end: 1.0 + + activation: + type: identity + fac: 0.5 + + point_offset_0: + type: point_offset + use_sigma: True + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs'] + + extract_fields: + type: extract_fields + fields: ['points', 'distances', 'viewdirs', 'weights'] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_no_sample + + # Scene hyper-params + white_bg: 0 + black_bg: 0 + + # Density activation + #fea2denseAct: softplus + #distance_scale: 25.0 + #density_shift: -10.0 + + fea2denseAct: relu + distance_scale: 4.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -2.0], [2.0, 2.0, 2.0]] + + # Grid size and upsampling + grid_size: + start: [150, 150, 150] + end: [600, 600, 600] + upsamp_list: [4000,6000,8000,10000,12000] + lr_upsample_reset: True + + # Thresholding + update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + n_lamb_sigma: [8,0,0] + n_lamb_sh: [8,0,0] + + # Shading + #shadingMode: RGB + #data_dim_color: 3 + + shadingMode: SH + data_dim_color: 27 diff --git a/conf/experiment/model/shiny_z_plane_no_point.yaml b/conf/experiment/model/shiny_z_plane_no_point.yaml new file mode 100644 index 0000000..f45b44e --- /dev/null +++ b/conf/experiment/model/shiny_z_plane_no_point.yaml @@ -0,0 +1,213 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 4 + fn: two_plane + + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 1 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + z_channels: 32 + + outputs: + z_vals: + channels: 1 + + sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 1 + + activation: + type: sigmoid + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: tanh + outer_fac: 0.25 + + color_scale: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + color_shift: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + ray_intersect_0: + type: ray_intersect + + # Intersect + z_channels: 32 + + intersect: + type: z_plane + + sort: True + outward_facing: False + use_disparity: False + + use_sigma: True + + out_points: raw_points + out_distance: raw_distance + + initial: -1.0 + end: 1.0 + + #contract: + # type: mipnerf + # contract_samples: True + # contract_start_radius: 1.0 + # contract_end_radius: 8.0 + + activation: + type: identity + fac: 0.5 + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs'] + + extract_fields: + type: extract_fields + fields: ['points', 'distances', 'viewdirs', 'weights', 'color_scale', 'color_shift'] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_no_sample + + # Scene hyper-params + white_bg: 0 + black_bg: 0 + + # Density activation + fea2denseAct: relu + distance_scale: 8.0 + density_shift: 0.0 + + # Grid bounds + #aabb: [[-2.0, -2.0, -2.0], [2.0, 2.0, 2.0]] + aabb: [[-2.0, -2.0, -1.0], [2.0, 2.0, 1.0]] + + # Grid size and upsampling + #grid_size: + # start: [128, 128, 128] + # end: [640, 640, 640] + N_voxel_init: 2097152 + N_voxel_final: 262144000 + + upsamp_list: [4000,6000,8000,10000,12000] + lr_upsample_reset: True + + # Thresholding + update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + #n_lamb_sigma: [8,0,0] + #n_lamb_sh: [8,0,0] + #n_lamb_sigma: [8,8,8] + #n_lamb_sh: [8,8,8] + n_lamb_sigma: [8,4,4] + n_lamb_sh: [8,4,4] + + # Shading + #shadingMode: RGB + #data_dim_color: 3 + + shadingMode: SH + data_dim_color: 27 diff --git a/conf/experiment/model/shiny_z_plane_small.yaml b/conf/experiment/model/shiny_z_plane_small.yaml new file mode 100644 index 0000000..75fa048 --- /dev/null +++ b/conf/experiment/model/shiny_z_plane_small.yaml @@ -0,0 +1,207 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 4 + fn: two_plane + + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 1 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: base + group: embedding_impl + + #depth: 6 + #hidden_channels: 256 + #skips: [3] + + depth: 4 + hidden_channels: 256 + skips: [2] + + # Outputs + #z_channels: 32 + z_channels: 16 + + outputs: + z_vals: + channels: 1 + + sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 1 + + activation: + type: sigmoid + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: tanh + outer_fac: 0.25 + + color_scale: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + color_shift: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + ray_intersect_0: + type: ray_intersect + + # Intersect + #z_channels: 32 + z_channels: 16 + + intersect: + type: z_plane + + sort: True + outward_facing: False + use_disparity: False + + use_sigma: True + + out_points: raw_points + out_distance: raw_distance + + initial: -1.0 + end: 1.0 + + activation: + type: identity + fac: 0.5 + + point_offset_0: + type: point_offset + in_density_field: point_sigma + use_sigma: True + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs'] + + extract_fields: + type: extract_fields + fields: ['points', 'distances', 'viewdirs', 'weights', 'color_scale', 'color_shift'] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_no_sample + + # Scene hyper-params + white_bg: 0 + black_bg: 0 + + # Density activation + fea2denseAct: relu + distance_scale: 8.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -1.0], [2.0, 2.0, 1.0]] + + # Grid size and upsampling + N_voxel_init: 2097152 + N_voxel_final: 262144000 + + upsamp_list: [4000,6000,8000,10000,12000] + lr_upsample_reset: True + + # Thresholding + update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + n_lamb_sigma: [8,4,4] + n_lamb_sh: [8,4,4] + + # Shading + shadingMode: RGB + data_dim_color: 3 diff --git a/conf/experiment/model/shiny_z_plane_tiny.yaml b/conf/experiment/model/shiny_z_plane_tiny.yaml new file mode 100644 index 0000000..2f9ce1c --- /dev/null +++ b/conf/experiment/model/shiny_z_plane_tiny.yaml @@ -0,0 +1,207 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 4 + fn: two_plane + + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 1 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: base + group: embedding_impl + + #depth: 6 + #hidden_channels: 256 + #skips: [3] + + depth: 4 + hidden_channels: 128 + skips: [2] + + # Outputs + #z_channels: 32 + z_channels: 8 + + outputs: + z_vals: + channels: 1 + + sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 1 + + activation: + type: sigmoid + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: tanh + outer_fac: 0.25 + + color_scale: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + color_shift: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + ray_intersect_0: + type: ray_intersect + + # Intersect + #z_channels: 32 + z_channels: 8 + + intersect: + type: z_plane + + sort: True + outward_facing: False + use_disparity: False + + use_sigma: True + + out_points: raw_points + out_distance: raw_distance + + initial: -1.0 + end: 1.0 + + activation: + type: identity + fac: 0.5 + + point_offset_0: + type: point_offset + in_density_field: point_sigma + use_sigma: True + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs'] + + extract_fields: + type: extract_fields + fields: ['points', 'distances', 'viewdirs', 'weights', 'color_scale', 'color_shift'] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_no_sample + + # Scene hyper-params + white_bg: 0 + black_bg: 0 + + # Density activation + fea2denseAct: relu + distance_scale: 8.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -1.0], [2.0, 2.0, 1.0]] + + # Grid size and upsampling + N_voxel_init: 2097152 + N_voxel_final: 262144000 + + upsamp_list: [4000,6000,8000,10000,12000] + lr_upsample_reset: True + + # Thresholding + update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + n_lamb_sigma: [8,4,4] + n_lamb_sh: [8,4,4] + + # Shading + shadingMode: RGB + data_dim_color: 3 diff --git a/conf/experiment/model/shiny_z_tensorf.yaml b/conf/experiment/model/shiny_z_tensorf.yaml new file mode 100644 index 0000000..1e58cd2 --- /dev/null +++ b/conf/experiment/model/shiny_z_tensorf.yaml @@ -0,0 +1,161 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + # 1) Per ray outputs + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 6 + fn: two_plane + + pe: + type: windowed + n_freqs: 2 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + z_channels: 96 + + outputs: + z_vals: + channels: 1 + + sigma: + channels: 1 + + activation: + type: sigmoid + fac: 1.0 + shift: 2.0 + + point_offset: + channels: 3 + + activation: + type: tanh + fac: 0.25 + + # 2) Intersection + ray_intersect_0: + type: ray_intersect + + # Intersect + z_channels: 96 + + intersect: + type: z + + sort: True + outward_facing: False + use_disparity: False + use_sigma: False + + initial: -1.0 + end: 1.0 + near: 0.25 + + activation: + type: identity + fac: 0.5 + + # 4) Add point offset + point_offset_0: + type: point_offset + use_sigma: True + + # 5) Add extra outputs + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs'] + + # 6) Extract + extract_fields: + type: extract_fields + fields: ['points', 'distances', 'viewdirs', 'weights'] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_no_sample + + # Scene hyper-params + white_bg: 0 + ndc_ray: 0 + + # Density activation + #fea2denseAct: softplus + #distance_scale: 25.0 + #density_shift: -10.0 + fea2denseAct: relu + distance_scale: 4.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -2.0], [2.0, 2.0, 2.0]] + + # Grid size and upsampling + #N_voxel_init: 8000000 # 200**3 + N_voxel_init: 1000000 # 100**3 + N_voxel_final: 64000000 # 400**3 + upsamp_list: [4000,6000,8000,10000,12000] + lr_upsample_reset: True + + # Thresholding + #update_AlphaMask_list: [] + update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 1e-4 + alpha_mask_thre: 1e-4 + + # Tensor sizes + n_lamb_sigma: [8,8,8] + n_lamb_sh: [8,8,8] + num_frames: 8 + + # Shading + #shadingMode: RGB + #data_dim_color: 3 + + shadingMode: SH + data_dim_color: 27 diff --git a/conf/experiment/model/shiny_z_tensorf_cascaded.yaml b/conf/experiment/model/shiny_z_tensorf_cascaded.yaml new file mode 100644 index 0000000..e16ea45 --- /dev/null +++ b/conf/experiment/model/shiny_z_tensorf_cascaded.yaml @@ -0,0 +1,230 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + # 1) Per ray outputs + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 4 + fn: two_plane + + pe: + type: windowed + n_freqs: 0 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + z_channels: 24 + + outputs: + z_vals: + channels: 1 + + # 2) Intersection + ray_intersect_0: + type: ray_intersect + + # Intersect + z_channels: 24 + + intersect: + type: z_plane + + mask: + stop_iters: -1 + + sort: True + outward_facing: False + use_disparity: False + use_sigma: False + + initial: -1.0 + end: 1.0 + near: 0.25 + + activation: + type: identity + fac: 0.5 + + point_prediction_0: + type: point_prediction + + # Inputs + in_z_channels: 24 + + inputs: + points: 3 + viewdirs: 3 + + # Parameterization + params: + all: + start: 0 + end: 6 + + param: + n_dims: 6 + fn: identity + + pe: + type: windowed + n_freqs: 2 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + out_z_channels: 96 + + outputs: + z_vals: + channels: 1 + + sigma: + channels: 1 + + activation: + type: sigmoid + fac: 1.0 + shift: 2.0 + + point_offset: + channels: 3 + + activation: + type: tanh + fac: 0.25 + + # 2) Intersection + ray_intersect_1: + type: ray_intersect + + # Intersect + z_channels: 96 + + intersect: + type: z_plane + + sort: True + outward_facing: False + use_disparity: False + use_sigma: False + + initial: -1.0 + end: 1.0 + near: 0.25 + + activation: + type: identity + fac: 0.5 + + # 4) Add point offset + point_offset_0: + type: point_offset + use_sigma: True + + # 5) Add extra outputs + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs'] + + # 6) Extract + extract_fields: + type: extract_fields + fields: ['points', 'distances', 'viewdirs', 'weights'] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_no_sample + + # Scene hyper-params + white_bg: 0 + ndc_ray: 0 + + # Density activation + #fea2denseAct: softplus + #distance_scale: 25.0 + #density_shift: -10.0 + fea2denseAct: relu + distance_scale: 4.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -1.0], [2.0, 2.0, 1.0]] + + # Grid size and upsampling + #N_voxel_init: 8000000 # 200**3 + N_voxel_init: 1000000 # 100**3 + N_voxel_final: 64000000 # 400**3 + upsamp_list: [4000,6000,8000,10000,12000] + lr_upsample_reset: True + + # Thresholding + #update_AlphaMask_list: [] + update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 1e-4 + alpha_mask_thre: 1e-4 + + # Tensor sizes + n_lamb_sigma: [8,8,8] + n_lamb_sh: [8,8,8] + num_frames: 8 + + # Shading + #shadingMode: RGB + #data_dim_color: 3 + + shadingMode: SH + data_dim_color: 27 diff --git a/conf/experiment/model/spaces_z_plane.yaml b/conf/experiment/model/spaces_z_plane.yaml new file mode 100644 index 0000000..11b64ca --- /dev/null +++ b/conf/experiment/model/spaces_z_plane.yaml @@ -0,0 +1,206 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 4 + fn: two_plane + + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 1 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + z_channels: 32 + + outputs: + z_vals: + channels: 1 + + sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 1 + + activation: + type: sigmoid + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: tanh + outer_fac: 0.125 + + color_scale: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + color_shift: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + ray_intersect_0: + type: ray_intersect + + # Intersect + z_channels: 32 + + intersect: + type: z_plane + + sort: True + outward_facing: False + use_disparity: False + + use_sigma: True + + out_points: raw_points + out_distance: raw_distance + + initial: -1.0 + end: 1.0 + + contract: + type: mipnerf + contract_samples: True + contract_start_radius: 1.0 + contract_end_radius: 8.0 + + activation: + type: identity + fac: 0.5 + + point_offset_0: + type: point_offset + in_density_field: point_sigma + use_sigma: True + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs'] + + extract_fields: + type: extract_fields + fields: ['points', 'distances', 'viewdirs', 'weights', 'color_scale', 'color_shift'] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_no_sample + + # Scene hyper-params + white_bg: 0 + black_bg: 0 + + # Density activation + fea2denseAct: relu + distance_scale: 16.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -1.0], [2.0, 2.0, 1.0]] + + # Grid size and upsampling + N_voxel_init: 2097152 + N_voxel_final: 262144000 + upsamp_list: [4000,6000,8000,10000,12000] + lr_upsample_reset: True + + # Thresholding + update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + n_lamb_sigma: [8,4,4] + n_lamb_sh: [8,4,4] + + # Shading + shadingMode: SH + data_dim_color: 27 diff --git a/conf/experiment/model/spaces_z_plane_world.yaml b/conf/experiment/model/spaces_z_plane_world.yaml new file mode 100644 index 0000000..b17cb65 --- /dev/null +++ b/conf/experiment/model/spaces_z_plane_world.yaml @@ -0,0 +1,253 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 4 + fn: two_plane + + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 2 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + z_channels: 32 + + outputs: + z_vals: + channels: 1 + + sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 1 + + activation: + type: sigmoid + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: tanh + outer_fac: 0.25 + + color_scale: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + color_shift: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + ray_intersect_0: + type: ray_intersect + + # Intersect + z_channels: 32 + + intersect: + type: z_plane + + sort: True + outward_facing: False + use_disparity: False + + use_sigma: True + + out_points: raw_points + out_distance: raw_distance + + near: 0.35 + use_dataset_bounds: True + + contract: + type: mipnerf + contract_samples: True + use_dataset_bounds: True + + activation: + type: identity + fac: 0.5 + + #dropout: + # frequency: 2 + # stop_epoch: 10 + + #color_transform: + # type: color_transform + + # transform_activation: + # type: ease_value + # start_value: 0.0 + # window_epochs: 0 + # wait_epochs: 0 + + # activation: + # type: identity + # shift: 0.0 + # inner_fac: 0.1 + # outer_fac: 1.0 + + # shift_activation: + # type: ease_value + # start_value: 0.0 + # window_epochs: 0 + # wait_epochs: 0 + + # activation: + # type: identity + # shift: 0.0 + # inner_fac: 0.1 + # outer_fac: 1.0 + + point_offset_0: + type: point_offset + in_density_field: point_sigma + use_sigma: True + + #dropout: + # frequency: 2 + # stop_epoch: 10 + + #random_offset_0: + # type: random_offset + + # frequency: 2 + # random_per_sample: 4 + # stop_epoch: 10 + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs'] + + extract_fields: + type: extract_fields + fields: ['points', 'distances', 'viewdirs', 'weights', 'color_scale', 'color_shift'] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_no_sample + + # Scene hyper-params + white_bg: 0 + black_bg: 0 + + # Density activation + fea2denseAct: relu + distance_scale: 8.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -2.0], [2.0, 2.0, 2.0]] + + # Grid size and upsampling + grid_size: + start: [100, 100, 100] + end: [600, 600, 600] + upsamp_list: [4000,6000,8000,10000,12000] + lr_upsample_reset: True + + # Thresholding + update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + #n_lamb_sigma: [8,0,0] + #n_lamb_sh: [8,0,0] + n_lamb_sigma: [8,8,8] + n_lamb_sh: [8,8,8] + + # Shading + #shadingMode: RGB + #data_dim_color: 3 + + shadingMode: SH + data_dim_color: 27 diff --git a/conf/experiment/model/stanford_llff_z_plane.yaml b/conf/experiment/model/stanford_llff_z_plane.yaml new file mode 100644 index 0000000..a3b7387 --- /dev/null +++ b/conf/experiment/model/stanford_llff_z_plane.yaml @@ -0,0 +1,209 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 4 + fn: two_plane + + near: -1.0 + far: 0.0 + + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 1 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + z_channels: 32 + + outputs: + z_vals: + channels: 1 + + sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: tanh + outer_fac: 0.25 + + color_scale: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + color_shift: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + ray_intersect_0: + type: ray_intersect + + # Intersect + z_channels: 32 + + intersect: + type: z_plane + + sort: True + outward_facing: False + use_disparity: False + + use_sigma: True + + out_points: raw_points + out_distance: raw_distance + + initial: -1.0 + end: 1.0 + + mask: + stop_iters: -1 + + activation: + type: identity + fac: 0.5 + + point_offset_0: + type: point_offset + in_density_field: point_sigma + use_sigma: True + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs'] + + extract_fields: + type: extract_fields + fields: ['points', 'distances', 'viewdirs', 'weights', 'color_scale', 'color_shift'] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_no_sample + + # Scene hyper-params + white_bg: 0 + black_bg: 0 + + # Density activation + fea2denseAct: relu + distance_scale: 8.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -1.0], [2.0, 2.0, 1.0]] + + # Grid size and upsampling + N_voxel_init: 512000 + N_voxel_final: 512000000 + + upsamp_list: [4000,6000,8000,10000,12000] + #upsamp_list: [8000, 12000, 16000, 20000, 24000] + lr_upsample_reset: True + + # Thresholding + #update_AlphaMask_list: [] + update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + n_lamb_sigma: [8,0,0] + n_lamb_sh: [8,0,0] + + # Shading + shadingMode: RGB + data_dim_color: 3 diff --git a/conf/experiment/model/stanford_z_plane.yaml b/conf/experiment/model/stanford_z_plane.yaml new file mode 100644 index 0000000..1ceab3d --- /dev/null +++ b/conf/experiment/model/stanford_z_plane.yaml @@ -0,0 +1,201 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 4 + fn: two_plane + + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 1 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + z_channels: 32 + + outputs: + z_vals: + channels: 1 + + sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: tanh + outer_fac: 0.25 + + color_scale: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + color_shift: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + ray_intersect_0: + type: ray_intersect + + # Intersect + z_channels: 32 + + intersect: + type: z_plane + + sort: True + outward_facing: False + use_disparity: False + + use_sigma: True + + out_points: raw_points + out_distance: raw_distance + + initial: -0.65 + end: 1.0 + + activation: + type: identity + fac: 0.5 + + point_offset_0: + type: point_offset + in_density_field: point_sigma + use_sigma: True + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs'] + + extract_fields: + type: extract_fields + fields: ['points', 'distances', 'viewdirs', 'weights', 'color_scale', 'color_shift'] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_no_sample + + # Scene hyper-params + white_bg: 0 + black_bg: 0 + + # Density activation + fea2denseAct: relu + distance_scale: 8.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -1.0], [2.0, 2.0, 2.0]] + + # Grid size and upsampling + N_voxel_init: 2097152 + N_voxel_final: 512000000 + + upsamp_list: [4000,6000,8000,10000,12000] + lr_upsample_reset: True + + # Thresholding + update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + n_lamb_sigma: [8,0,0] + n_lamb_sh: [8,0,0] + + # Shading + shadingMode: RGB + data_dim_color: 3 diff --git a/conf/experiment/model/stanford_z_plane_mem.yaml b/conf/experiment/model/stanford_z_plane_mem.yaml new file mode 100644 index 0000000..cb0fff5 --- /dev/null +++ b/conf/experiment/model/stanford_z_plane_mem.yaml @@ -0,0 +1,205 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 4 + fn: two_plane + + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 6 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + z_channels: 32 + + outputs: + z_vals: + channels: 1 + + sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: tanh + outer_fac: 0.25 + + color_scale: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + color_shift: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + ray_intersect_0: + type: ray_intersect + + # Intersect + z_channels: 32 + + intersect: + type: z_plane + + sort: True + outward_facing: False + use_disparity: False + + use_sigma: True + + out_points: raw_points + out_distance: raw_distance + + initial: -0.65 + end: 1.0 + + activation: + type: identity + fac: 0.5 + + point_offset_0: + type: point_offset + in_density_field: point_sigma + use_sigma: True + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs'] + + extract_fields: + type: extract_fields + fields: ['points', 'distances', 'viewdirs', 'weights', 'color_scale', 'color_shift'] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_no_sample + + # Scene hyper-params + white_bg: 0 + black_bg: 0 + + # Density activation + fea2denseAct: relu + #distance_scale: 16.0 + distance_scale: 8.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -1.0], [2.0, 2.0, 2.0]] + + # Grid size and upsampling + N_voxel_init: 2097152 + N_voxel_final: 512000000 + + upsamp_list: [4000,6000,8000,10000,12000] + lr_upsample_reset: True + + # Thresholding + update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + n_lamb_sigma: [8,0,0] + n_lamb_sh: [8,0,0] + + # Shading + shadingMode: RGB + data_dim_color: 3 + + #shadingMode: SH + #data_dim_color: 27 diff --git a/conf/experiment/model/stanford_z_plane_small.yaml b/conf/experiment/model/stanford_z_plane_small.yaml new file mode 100644 index 0000000..dee0e4f --- /dev/null +++ b/conf/experiment/model/stanford_z_plane_small.yaml @@ -0,0 +1,207 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 4 + fn: two_plane + + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 1 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: base + group: embedding_impl + + #depth: 6 + #hidden_channels: 256 + #skips: [3] + + depth: 4 + hidden_channels: 256 + skips: [2] + + # Outputs + #z_channels: 32 + z_channels: 16 + + outputs: + z_vals: + channels: 1 + + sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: tanh + outer_fac: 0.25 + + color_scale: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + color_shift: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + ray_intersect_0: + type: ray_intersect + + # Intersect + #z_channels: 32 + z_channels: 16 + + intersect: + type: z_plane + + sort: True + outward_facing: False + use_disparity: False + + use_sigma: True + + out_points: raw_points + out_distance: raw_distance + + initial: -0.65 + end: 1.0 + + activation: + type: identity + fac: 0.5 + + point_offset_0: + type: point_offset + in_density_field: point_sigma + use_sigma: True + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs'] + + extract_fields: + type: extract_fields + fields: ['points', 'distances', 'viewdirs', 'weights', 'color_scale', 'color_shift'] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_no_sample + + # Scene hyper-params + white_bg: 0 + black_bg: 0 + + # Density activation + fea2denseAct: relu + distance_scale: 8.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -1.0], [2.0, 2.0, 2.0]] + + # Grid size and upsampling + N_voxel_init: 2097152 + N_voxel_final: 512000000 + + upsamp_list: [4000,6000,8000,10000,12000] + lr_upsample_reset: True + + # Thresholding + update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + n_lamb_sigma: [8,0,0] + n_lamb_sh: [8,0,0] + + # Shading + shadingMode: RGB + data_dim_color: 3 diff --git a/conf/experiment/model/technicolor_cascaded.yaml b/conf/experiment/model/technicolor_cascaded.yaml new file mode 100644 index 0000000..5fb562e --- /dev/null +++ b/conf/experiment/model/technicolor_cascaded.yaml @@ -0,0 +1,325 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 4 + fn: two_plane + + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 0 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + time: + start: 7 + end: 8 + + param: + n_dims: 1 + fn: identity + + pe: + type: windowed + n_freqs: 2 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + z_channels: 8 + + outputs: + z_vals: + channels: 1 + + ray_intersect_0: + type: ray_intersect + + # Intersect + z_channels: 8 + + intersect: + type: z_plane + + mask: + stop_iters: -1 + + sort: True + outward_facing: False + use_disparity: False + + use_sigma: True + + out_points: raw_points + out_distance: raw_distance + + initial: -1.0 + end: 1.0 + + activation: + type: identity + fac: 0.5 + + point_prediction_0: + type: point_prediction + + # Inputs + in_z_channels: 8 + + inputs: + points: 3 + viewdirs: 3 + times: 1 + + # Parameterization + params: + ray: + start: 0 + end: 3 + + param: + n_dims: 3 + fn: identity + + pe: + type: basic + n_freqs: 2 + + time: + start: 3 + end: 4 + + param: + n_dims: 1 + fn: identity + + pe: + type: basic + n_freqs: 4 + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + out_z_channels: 32 + + outputs: + z_vals: + channels: 1 + + spatial_flow: + channels: 3 + + sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 1 + + activation: + type: sigmoid + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: tanh + outer_fac: 0.125 + + color_scale: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + color_shift: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + ray_intersect_1: + type: ray_intersect + + # Intersect + z_channels: 32 + + intersect: + type: z_plane + + sort: True + outward_facing: False + use_disparity: False + + use_sigma: True + + initial: -1.0 + end: 1.0 + + activation: + type: identity + fac: 0.5 + + flow_0: + type: advect_points + use_spatial_flow: True + use_angular_flow: False + + out_flow_field: raw_flow + flow_scale: 0.0 + + spatial_flow_activation: + type: identity + fac: 0.25 + + angular_flow_rotation_activation: + type: identity + fac: 0.25 + + angular_flow_anchor_activation: + type: identity + fac: 0.25 + + point_offset_1: + type: point_offset + in_density_field: point_sigma + use_sigma: True + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs', 'times'] + + extract_fields: + type: extract_fields + fields: [ + 'points', 'distances', 'base_times', 'time_offset', 'times', 'viewdirs', 'weights', + 'color_transform_global', 'color_scale_global', 'color_shift_global', + 'color_transform', 'color_scale', 'color_shift' + ] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_time + + # Scene hyper-params + white_bg: 0 + black_bg: 0 + + # Density activation + fea2denseAct: relu + distance_scale: 16.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -1.0], [2.0, 2.0, 1.0]] + + # Grid size and upsampling + N_voxel_init: 2097152 + N_voxel_final: 512000000 + + upsamp_list: [4000,6000,8000,10000,12000] + + lr_upsample_reset: True + + # Thresholding + update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + n_lamb_sigma: [8,0,0] + n_lamb_sh: [8,0,0] + + # Shading + shadingMode: SH + data_dim_color: 27 + + # Density + densityMode: Density diff --git a/conf/experiment/model/technicolor_z_plane.yaml b/conf/experiment/model/technicolor_z_plane.yaml new file mode 100644 index 0000000..3aab19e --- /dev/null +++ b/conf/experiment/model/technicolor_z_plane.yaml @@ -0,0 +1,268 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 4 + fn: two_plane + + #param: + # n_dims: 6 + # fn: pluecker + # direction_multiplier: 1.0 + # moment_multiplier: 1.0 + + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 0 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + time: + start: 7 + end: 8 + + param: + n_dims: 1 + fn: identity + + pe: + type: windowed + n_freqs: 2 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + z_channels: 64 + + outputs: + z_vals: + channels: 1 + + spatial_flow: + channels: 3 + + activation: + type: identity + outer_fac: 0.25 # TODO: Original + #outer_fac: 1.0 # TODO: New + #outer_fac: 4.0 # TODO: New + + sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 1 + + activation: + type: sigmoid + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: tanh + outer_fac: 0.25 # TODO: original + #outer_fac: 0.125 # TODO: New (but not used) + + color_scale: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + color_shift: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + ray_intersect_0: + type: ray_intersect + + # Intersect + z_channels: 64 + + intersect: + type: z_plane + + sort: True + outward_facing: False + use_disparity: False + + use_sigma: True + + out_points: raw_points + out_distance: raw_distance + + initial: -1.0 + end: 1.0 + + activation: + type: identity + fac: 0.5 + + flow_0: + type: advect_points + use_spatial_flow: True + use_angular_flow: False + + out_flow_field: raw_flow + flow_scale: 0.0 + + spatial_flow_activation: + type: identity + fac: 0.25 + + angular_flow_rotation_activation: + type: identity + fac: 0.25 + + angular_flow_anchor_activation: + type: identity + fac: 0.25 + + point_offset_0: + type: point_offset + in_density_field: point_sigma + use_sigma: True + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs', 'times'] + + extract_fields: + type: extract_fields + fields: [ + 'points', 'distances', 'base_times', 'time_offset', 'times', 'viewdirs', 'weights', + 'color_transform_global', 'color_scale_global', 'color_shift_global', + 'color_transform', 'color_scale', 'color_shift' + ] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_time + + # Scene hyper-params + white_bg: 0 + black_bg: 0 + + # Density activation + fea2denseAct: relu + distance_scale: 16.0 # TODO: Original + #distance_scale: 8.0 # TODO: New + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -1.0], [2.0, 2.0, 1.0]] + + # Grid size and upsampling + N_voxel_init: 2097152 + N_voxel_final: 512000000 + + upsamp_list: [4000,6000,8000,10000,12000] + + lr_upsample_reset: True + + # Thresholding + update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + n_lamb_sigma: [8,0,0] + n_lamb_sh: [8,0,0] + + # Shading + #shadingMode: RGB # TODO: Changed + #data_dim_color: 3 + + # shadingMode: SH + # data_dim_color: 27 + + + shadingMode: MLP_Fea + data_dim_color: 27 + + # Density + densityMode: Density diff --git a/conf/experiment/model/technicolor_z_plane_ff.yaml b/conf/experiment/model/technicolor_z_plane_ff.yaml new file mode 100644 index 0000000..b53884e --- /dev/null +++ b/conf/experiment/model/technicolor_z_plane_ff.yaml @@ -0,0 +1,251 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 4 + fn: two_plane + + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 0 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + time: + start: 7 + end: 8 + + param: + n_dims: 1 + fn: identity + + pe: + type: windowed + n_freqs: 2 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + z_channels: 32 + + outputs: + z_vals: + channels: 1 + + spatial_flow: + channels: 3 + + activation: + type: identity + outer_fac: 1.0 + + sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 1 + + activation: + type: sigmoid + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: tanh + outer_fac: 0.125 + + color_scale: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + color_shift: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + ray_intersect_0: + type: ray_intersect + + # Intersect + z_channels: 32 + + intersect: + type: z_plane + + sort: True + outward_facing: False + use_disparity: False + + use_sigma: True + + out_points: raw_points + out_distance: raw_distance + + initial: -1.0 + end: 1.0 + + activation: + type: identity + fac: 0.5 + + flow_0: + type: advect_points + use_spatial_flow: True + use_angular_flow: False + + out_flow_field: raw_flow + flow_scale: 0.0 + + spatial_flow_activation: + type: identity + fac: 0.25 + + angular_flow_rotation_activation: + type: identity + fac: 0.25 + + angular_flow_anchor_activation: + type: identity + fac: 0.25 + + point_offset_0: + type: point_offset + in_density_field: point_sigma + use_sigma: True + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs', 'times'] + + extract_fields: + type: extract_fields + fields: [ + 'points', 'distances', 'base_times', 'time_offset', 'times', 'viewdirs', 'weights', + 'color_transform_global', 'color_scale_global', 'color_shift_global', + 'color_transform', 'color_scale', 'color_shift' + ] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_time + + # Scene hyper-params + white_bg: 0 + black_bg: 0 + + # Density activation + fea2denseAct: relu + distance_scale: 16.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -1.0], [2.0, 2.0, 1.0]] + + # Grid size and upsampling + N_voxel_init: 2097152 + N_voxel_final: 512000000 + + upsamp_list: [4000,6000,8000,10000,12000] + + lr_upsample_reset: True + + # Thresholding + update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + n_lamb_sigma: [8,0,0] + n_lamb_sh: [8,0,0] + + # Shading + shadingMode: SH + data_dim_color: 27 + + # Density + densityMode: Density diff --git a/conf/experiment/model/technicolor_z_plane_large.yaml b/conf/experiment/model/technicolor_z_plane_large.yaml new file mode 100644 index 0000000..080af0d --- /dev/null +++ b/conf/experiment/model/technicolor_z_plane_large.yaml @@ -0,0 +1,214 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 4 + fn: two_plane + + pe: + type: basic + n_freqs: 1 + + time: + start: 7 + end: 8 + + param: + n_dims: 1 + fn: identity + + pe: + type: basic + n_freqs: 2 + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + z_channels: 32 + + outputs: + z_vals: + channels: 1 + + spatial_flow: + channels: 3 + + activation: + type: identity + outer_fac: 4.0 + + sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 1 + + activation: + type: sigmoid + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: tanh + outer_fac: 0.125 + + ray_intersect_0: + type: ray_intersect + + # Intersect + z_channels: 32 + + intersect: + type: z_plane + + sort: True + outward_facing: False + use_disparity: False + + use_sigma: True + + out_points: raw_points + out_distance: raw_distance + + initial: -1.0 + end: 1.0 + + activation: + type: identity + fac: 0.5 + + flow_0: + type: advect_points + use_spatial_flow: True + use_angular_flow: False + + out_flow_field: raw_flow + flow_scale: 0.0 + + spatial_flow_activation: + type: identity + fac: 0.25 + + angular_flow_rotation_activation: + type: identity + fac: 0.25 + + angular_flow_anchor_activation: + type: identity + fac: 0.25 + + point_offset_0: + type: point_offset + in_density_field: point_sigma + use_sigma: True + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs', 'times'] + + extract_fields: + type: extract_fields + fields: [ + 'points', 'distances', 'base_times', 'time_offset', 'times', 'viewdirs', 'weights', + 'color_transform_global', 'color_scale_global', 'color_shift_global', + 'color_transform', 'color_scale', 'color_shift' + ] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_time + + # Scene hyper-params + white_bg: 0 + black_bg: 0 + + # Density activation + fea2denseAct: relu + distance_scale: 16.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -1.0], [2.0, 2.0, 1.0]] + + # Grid size and upsampling + N_voxel_init: 2097152 + N_voxel_final: 512000000 + + upsamp_list: [4000,6000,8000,10000,12000] + + lr_upsample_reset: True + + # Thresholding + update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + n_lamb_sigma: [8,0,0] + n_lamb_sh: [8,0,0] + + # Shading + shadingMode: RGB + data_dim_color: 3 + + # Density + densityMode: Density diff --git a/conf/experiment/model/technicolor_z_plane_mem.yaml b/conf/experiment/model/technicolor_z_plane_mem.yaml new file mode 100644 index 0000000..e862403 --- /dev/null +++ b/conf/experiment/model/technicolor_z_plane_mem.yaml @@ -0,0 +1,251 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 4 + fn: two_plane + + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 2 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + time: + start: 7 + end: 8 + + param: + n_dims: 1 + fn: identity + + pe: + type: windowed + n_freqs: 4 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + z_channels: 32 # NOTE: Changed from 32 + + outputs: + z_vals: + channels: 1 + + spatial_flow: + channels: 3 + + sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 2.0 + + point_sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 2.0 + + point_offset: + channels: 3 + + activation: + type: tanh + outer_fac: 0.25 + + color_scale: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + color_shift: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + ray_intersect_0: + type: ray_intersect + + # Intersect + z_channels: 32 # NOTE: Changed from 32 + + intersect: + type: z_plane + + sort: True + outward_facing: False + use_disparity: False + + use_sigma: True + + out_points: raw_points + out_distance: raw_distance + + initial: -1.0 + end: 1.0 + + activation: + type: identity + fac: 0.5 + + flow_0: + type: advect_points + use_spatial_flow: True + use_angular_flow: False + + out_flow_field: raw_flow + flow_scale: 0.0 + + spatial_flow_activation: + type: identity + fac: 0.25 + + angular_flow_rotation_activation: + type: identity + fac: 0.25 + + angular_flow_anchor_activation: + type: identity + fac: 0.25 + + point_offset_0: + type: point_offset + in_density_field: point_sigma + use_sigma: True + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs', 'times'] + + extract_fields: + type: extract_fields + fields: [ + 'points', 'distances', 'base_times', 'time_offset', 'times', 'viewdirs', 'weights', + 'color_transform_global', 'color_scale_global', 'color_shift_global', + 'color_transform', 'color_scale', 'color_shift' + ] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_time + + # Scene hyper-params + white_bg: 0 + black_bg: 0 + + # Density activation + fea2denseAct: relu + distance_scale: 16.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -1.0], [2.0, 2.0, 1.0]] + + # Grid size and upsampling + N_voxel_init: 7077888 + N_voxel_final: 1728000000 + #grid_size: + # start: [192, 192, 192] + # end: [1200, 1200, 1200] + + upsamp_list: [4000,6000,8000,10000,12000] + + lr_upsample_reset: True + + # Thresholding + update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + n_lamb_sigma: [8,0,0] + n_lamb_sh: [8,0,0] + + # Shading + shadingMode: RGB # NOTE: Changed from RGB + data_dim_color: 3 + + # Density + densityMode: Density # NOTE: CHanged from Density + #densityMode: DensityFourier diff --git a/conf/experiment/model/technicolor_z_plane_no_sample.yaml b/conf/experiment/model/technicolor_z_plane_no_sample.yaml new file mode 100644 index 0000000..5d50060 --- /dev/null +++ b/conf/experiment/model/technicolor_z_plane_no_sample.yaml @@ -0,0 +1,222 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 4 + fn: two_plane + + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 0 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + time: + start: 7 + end: 8 + + param: + n_dims: 1 + fn: identity + + pe: + type: windowed + n_freqs: 2 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: zero + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + z_channels: 128 + + outputs: + z_vals: + channels: 1 + + spatial_flow: + channels: 3 + + activation: + type: identity + outer_fac: 0.25 + + sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 1 + + activation: + type: sigmoid + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: tanh + outer_fac: 0.25 + + ray_intersect_0: + type: ray_intersect + + # Intersect + z_channels: 128 + + intersect: + type: z_plane + + sort: True + outward_facing: False + use_disparity: False + + use_sigma: True + + out_points: raw_points + out_distance: raw_distance + + initial: -1.0 + end: 1.0 + + activation: + type: identity + fac: 0.5 + + flow_0: + type: advect_points + use_spatial_flow: False + use_angular_flow: False + + out_flow_field: raw_flow + flow_scale: 0.0 + + spatial_flow_activation: + type: identity + fac: 0.25 + + angular_flow_rotation_activation: + type: identity + fac: 0.25 + + angular_flow_anchor_activation: + type: identity + fac: 0.25 + + point_offset_0: + type: point_offset + in_density_field: point_sigma + use_sigma: True + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs', 'times'] + + extract_fields: + type: extract_fields + fields: [ + 'points', 'distances', 'base_times', 'time_offset', 'times', 'viewdirs', 'weights', + 'color_transform_global', 'color_scale_global', 'color_shift_global', + 'color_transform', 'color_scale', 'color_shift' + ] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_time + + # Scene hyper-params + white_bg: 0 + black_bg: 0 + + # Density activation + fea2denseAct: relu + distance_scale: 16.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -1.0], [2.0, 2.0, 1.0]] + + # Grid size and upsampling + #N_voxel_init: 2097152 + N_voxel_init: 512000000 + N_voxel_final: 512000000 + + upsamp_list: [4000,6000,8000,10000,12000] + + lr_upsample_reset: True + + # Thresholding + update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + n_lamb_sigma: [8,0,0] + n_lamb_sh: [8,0,0] + + # Shading + shadingMode: SH + data_dim_color: 27 + + # Density + densityMode: Density diff --git a/conf/experiment/model/technicolor_z_plane_small.yaml b/conf/experiment/model/technicolor_z_plane_small.yaml new file mode 100644 index 0000000..dec7066 --- /dev/null +++ b/conf/experiment/model/technicolor_z_plane_small.yaml @@ -0,0 +1,220 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 4 + fn: two_plane + + #param: + # n_dims: 6 + # fn: pluecker + # direction_multiplier: 1.0 + # moment_multiplier: 1.0 + + pe: + type: basic + n_freqs: 1 + + time: + start: 7 + end: 8 + + param: + n_dims: 1 + fn: identity + + pe: + type: basic + n_freqs: 2 + + # Net + net: + type: base + group: embedding_impl + + depth: 4 + hidden_channels: 256 + skips: [2] + + # Outputs + z_channels: 16 + + outputs: + z_vals: + channels: 1 + + spatial_flow: + channels: 3 + + activation: + type: identity + outer_fac: 4.0 + + sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 1 + + activation: + type: sigmoid + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: tanh + outer_fac: 0.125 + + ray_intersect_0: + type: ray_intersect + + # Intersect + z_channels: 16 + + intersect: + type: z_plane + + sort: True + outward_facing: False + use_disparity: False + + use_sigma: True + + out_points: raw_points + out_distance: raw_distance + + initial: -1.0 + end: 1.0 + + activation: + type: identity + fac: 0.5 + + flow_0: + type: advect_points + use_spatial_flow: True + use_angular_flow: False + + out_flow_field: raw_flow + flow_scale: 0.0 + + spatial_flow_activation: + type: identity + fac: 0.25 + + angular_flow_rotation_activation: + type: identity + fac: 0.25 + + angular_flow_anchor_activation: + type: identity + fac: 0.25 + + point_offset_0: + type: point_offset + in_density_field: point_sigma + use_sigma: True + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs', 'times'] + + extract_fields: + type: extract_fields + fields: [ + 'points', 'distances', 'base_times', 'time_offset', 'times', 'viewdirs', 'weights', + 'color_transform_global', 'color_scale_global', 'color_shift_global', + 'color_transform', 'color_scale', 'color_shift' + ] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_time + + # Scene hyper-params + white_bg: 0 + black_bg: 0 + + # Density activation + fea2denseAct: relu + distance_scale: 16.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -1.0], [2.0, 2.0, 1.0]] + + # Grid size and upsampling + N_voxel_init: 2097152 + N_voxel_final: 512000000 + + upsamp_list: [4000,6000,8000,10000,12000] + + lr_upsample_reset: True + + # Thresholding + update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + n_lamb_sigma: [8,0,0] + n_lamb_sh: [8,0,0] + + # Shading + shadingMode: RGB + data_dim_color: 3 + + # Density + densityMode: Density diff --git a/conf/experiment/model/technicolor_z_plane_tiny.yaml b/conf/experiment/model/technicolor_z_plane_tiny.yaml new file mode 100644 index 0000000..bd78aff --- /dev/null +++ b/conf/experiment/model/technicolor_z_plane_tiny.yaml @@ -0,0 +1,220 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 4 + fn: two_plane + + #param: + # n_dims: 6 + # fn: pluecker + # direction_multiplier: 1.0 + # moment_multiplier: 1.0 + + pe: + type: basic + n_freqs: 1 + + time: + start: 7 + end: 8 + + param: + n_dims: 1 + fn: identity + + pe: + type: basic + n_freqs: 2 + + # Net + net: + type: base + group: embedding_impl + + depth: 4 + hidden_channels: 128 + skips: [2] + + # Outputs + z_channels: 8 + + outputs: + z_vals: + channels: 1 + + spatial_flow: + channels: 3 + + activation: + type: identity + outer_fac: 4.0 + + sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 1 + + activation: + type: sigmoid + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: tanh + outer_fac: 0.125 + + ray_intersect_0: + type: ray_intersect + + # Intersect + z_channels: 8 + + intersect: + type: z_plane + + sort: True + outward_facing: False + use_disparity: False + + use_sigma: True + + out_points: raw_points + out_distance: raw_distance + + initial: -1.0 + end: 1.0 + + activation: + type: identity + fac: 0.5 + + flow_0: + type: advect_points + use_spatial_flow: True + use_angular_flow: False + + out_flow_field: raw_flow + flow_scale: 0.0 + + spatial_flow_activation: + type: identity + fac: 0.25 + + angular_flow_rotation_activation: + type: identity + fac: 0.25 + + angular_flow_anchor_activation: + type: identity + fac: 0.25 + + point_offset_0: + type: point_offset + in_density_field: point_sigma + use_sigma: True + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs', 'times'] + + extract_fields: + type: extract_fields + fields: [ + 'points', 'distances', 'base_times', 'time_offset', 'times', 'viewdirs', 'weights', + 'color_transform_global', 'color_scale_global', 'color_shift_global', + 'color_transform', 'color_scale', 'color_shift' + ] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_time + + # Scene hyper-params + white_bg: 0 + black_bg: 0 + + # Density activation + fea2denseAct: relu + distance_scale: 16.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -1.0], [2.0, 2.0, 1.0]] + + # Grid size and upsampling + N_voxel_init: 2097152 + N_voxel_final: 512000000 + + upsamp_list: [4000,6000,8000,10000,12000] + + lr_upsample_reset: True + + # Thresholding + update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + n_lamb_sigma: [8,0,0] + n_lamb_sh: [8,0,0] + + # Shading + shadingMode: RGB + data_dim_color: 3 + + # Density + densityMode: Density diff --git a/conf/experiment/model/technicolor_z_plane_world.yaml b/conf/experiment/model/technicolor_z_plane_world.yaml new file mode 100644 index 0000000..a4005dc --- /dev/null +++ b/conf/experiment/model/technicolor_z_plane_world.yaml @@ -0,0 +1,284 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 4 + fn: two_plane + + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 0 + #n_freqs: 4 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + time: + start: 6 + end: 7 + + param: + n_dims: 1 + fn: identity + + pe: + type: windowed + n_freqs: 2 + #n_freqs: 4 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + z_channels: 32 + + outputs: + z_vals: + channels: 1 + + spatial_flow: + channels: 3 + + sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: tanh + outer_fac: 0.25 + + color_scale: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + color_shift: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + ray_intersect_0: + type: ray_intersect + + # Intersect + z_channels: 32 + + intersect: + type: z_plane + + sort: True + outward_facing: False + use_disparity: False + + use_sigma: True + + out_points: raw_points + out_distance: raw_distance + + near: 0.5 + + initial: -1.25 + end: -5.0 + + contract: + type: bbox + contract_samples: True + bbox_min: [-2.0, -2.0, 0.0] + bbox_max: [2.0, 2.0, -5.0] + + activation: + type: identity + fac: 0.5 + + flow_0: + type: advect_points + use_spatial_flow: True + use_angular_flow: False + + out_flow_field: raw_flow + flow_scale: 2.0 + + spatial_flow_activation: + type: identity + fac: 0.25 + + angular_flow_rotation_activation: + type: identity + fac: 0.25 + + angular_flow_anchor_activation: + type: identity + fac: 0.25 + + point_offset_0: + type: point_offset + use_sigma: True + + #color_transform: + # type: color_transform + + # transform_activation: + # type: ease_value + # start_value: 0.0 + # window_epochs: 0 + # wait_epochs: 0 + + # activation: + # type: identity + # shift: 0.0 + # inner_fac: 0.1 + # outer_fac: 1.0 + + # shift_activation: + # type: ease_value + # start_value: 0.0 + # window_epochs: 0 + # wait_epochs: 0 + + # activation: + # type: identity + # shift: 0.0 + # inner_fac: 0.1 + # outer_fac: 1.0 + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs', 'times'] + + extract_fields: + type: extract_fields + fields: [ + 'points', 'distances', 'base_times', 'time_offset', 'times', 'viewdirs', 'weights', + 'color_transform_global', 'color_scale_global', 'color_shift_global', + 'color_transform', 'color_scale', 'color_shift' + ] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_time + + # Scene hyper-params + white_bg: 0 + black_bg: 0 + + # Density activation + fea2denseAct: relu + distance_scale: 16.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -1.0], [2.0, 2.0, 1.0]] + + # Grid size and upsampling + #grid_size: + # start: [150, 150, 150] + # end: [600, 600, 600] + grid_size: + start: [192, 192, 192] + end: [800, 800, 800] + + upsamp_list: [4000,6000,8000,10000,12000] + + lr_upsample_reset: True + + # Thresholding + update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + n_lamb_sigma: [8,0,0] + n_lamb_sh: [8,0,0] + + # Shading + shadingMode: RGB + #shadingMode: RGBtFourier + data_dim_color: 3 + + #shadingMode: SH + #data_dim_color: 27 + + # Density + densityMode: Density + #densityMode: DensityFourier + + #filter: + # weight_thresh: 1e-3 + # max_samples: 16 + # wait_epochs: 3 diff --git a/conf/experiment/params/local.yaml b/conf/experiment/params/local.yaml new file mode 100644 index 0000000..c69fb91 --- /dev/null +++ b/conf/experiment/params/local.yaml @@ -0,0 +1,36 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +tensor: True + +ckpt_dir: checkpoints +log_dir: logs +data_dir: data +name: "${experiment.dataset.name}_${experiment.dataset.collection}" + +print_loss: False +save_results: True + +tensorboard: False +log_images: False +log_videos: False + +show_embedding: False + +test_only: False +render_only: False +load_from_weights: False +start_epoch: 0 + +seed: 1 + +save_video_dir: "val_videos" +save_image_dir: "val_images" + +input_pose: “” diff --git a/conf/experiment/regularizers/all.yaml b/conf/experiment/regularizers/all.yaml new file mode 100644 index 0000000..ea4b51e --- /dev/null +++ b/conf/experiment/regularizers/all.yaml @@ -0,0 +1,8 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ diff --git a/conf/experiment/regularizers/blurry_teacher/default.yaml b/conf/experiment/regularizers/blurry_teacher/default.yaml new file mode 100644 index 0000000..6511870 --- /dev/null +++ b/conf/experiment/regularizers/blurry_teacher/default.yaml @@ -0,0 +1,39 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: blurry_teacher +batch_size: 4096 + +dataset: + name: "dense_${experiment.dataset.name}" + collection: "${experiment.dataset.collection}_dense_blurry" + split: train + root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}_dense_blurry/" + + num_rows: 10 + num_cols: 10 + train_row_skip: 1 + train_col_skip: 1 + size: 1000 + + use_patches: True + blur_radius: 0 + patch_width: 64 + +blur_radius: 0 + +weight: + type: exponential_decay + start: 0.5 + decay: 0.1 + stop_weight: 0.025 + num_epochs: 250 + +loss: + type: mse diff --git a/conf/experiment/regularizers/coarse/default.yaml b/conf/experiment/regularizers/coarse/default.yaml new file mode 100644 index 0000000..88e8e47 --- /dev/null +++ b/conf/experiment/regularizers/coarse/default.yaml @@ -0,0 +1,23 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: coarse +batch_size: 8192 + +weight: + type: exponential_decay + start: 1.0 + decay: 0.1 + stop_weight: 0.01 + + stop_epochs: 100 + num_epochs: 50 + +loss: + type: mse diff --git a/conf/experiment/regularizers/depth_classification/default.yaml b/conf/experiment/regularizers/depth_classification/default.yaml new file mode 100644 index 0000000..c7c9e2d --- /dev/null +++ b/conf/experiment/regularizers/depth_classification/default.yaml @@ -0,0 +1,85 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + + +type: depth_classification +ray_chunk: 32768 +batch_size: 4096 +weight: 0.5 +use_color_embedding: True + +use_disparity: True +offset: 0.0 +near: 1.0 +far: 16.0 +num_slices: 128 + +dataset: + name: random_view + num_views: all + +lookup_loss: + type: mae + weight: 0.5 + warmup_iters: 1000 + + angle_std: 10.0 + dist_std: 0.5 + +color_loss: + type: mae + weight: 0.25 + + angle_std: 5.0 + dist_std: 0.25 + +depth_loss: + type: mae + weight: 0.25 + + angle_std: 5.0 + dist_std: 0.25 + +range: + pos: 1.0 + dir: 1.0 + +jitter: + pos: 0.25 + dir: 0.25 + +param: + n_dims: 6 + fn: pluecker + +pe: + n_freqs: 10 + warmup_iters: 0 + max_freq_iter: 0 + exclude_identity: False + +net: + depth: 3 + hidden_channels: 128 + skips: [] + activation: 'identity' + +optimizer: + optimizer: adam + lr: 0.001 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + lr_scheduler: steplr + warmup_multipler: 1.0 + warmup_epochs: 0 + + decay_epoch: 500 + decay_gamma: 0.1 diff --git a/conf/experiment/regularizers/feedback/donerf.yaml b/conf/experiment/regularizers/feedback/donerf.yaml new file mode 100644 index 0000000..300b541 --- /dev/null +++ b/conf/experiment/regularizers/feedback/donerf.yaml @@ -0,0 +1,23 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: geometry_feedback +ray_chunk: 32768 +batch_size: 8192 +wait_epochs: 1 + +student_fields: ['raw_points'] +teacher_fields: ['points'] +num_points: -1 + +weight: + type: exponential_decay + start: 0.1 + decay: 0.25 + num_epochs: 50 diff --git a/conf/experiment/regularizers/feedback/reflect.yaml b/conf/experiment/regularizers/feedback/reflect.yaml new file mode 100644 index 0000000..7190cd9 --- /dev/null +++ b/conf/experiment/regularizers/feedback/reflect.yaml @@ -0,0 +1,26 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: geometry_feedback +ray_chunk: 32768 +batch_size: 8192 +wait_epochs: 0 + +student_fields: ['normal'] +teacher_fields: ['render_normal'] +sizes: [3] + +weights: [[0.001, 1.0]] +num_points: -1 + +weight: + type: exponential_decay + start: 1.0 + decay: 0.25 + num_epochs: 50 diff --git a/conf/experiment/regularizers/feedback/stanford.yaml b/conf/experiment/regularizers/feedback/stanford.yaml new file mode 100644 index 0000000..5245604 --- /dev/null +++ b/conf/experiment/regularizers/feedback/stanford.yaml @@ -0,0 +1,23 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: geometry_feedback +ray_chunk: 32768 +batch_size: 8192 +wait_epochs: 1 + +student_fields: ['raw_points', 'raw_distance'] +teacher_fields: ['points'] +num_points: -1 + +weight: + type: exponential_decay + start: 1.0 + decay: 0.25 + num_epochs: 50 diff --git a/conf/experiment/regularizers/flow/video3d.yaml b/conf/experiment/regularizers/flow/video3d.yaml new file mode 100644 index 0000000..305439d --- /dev/null +++ b/conf/experiment/regularizers/flow/video3d.yaml @@ -0,0 +1,28 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: flow +ray_chunk: 32768 +batch_size: 8192 + +fields: ['raw_points_start', 'raw_points_end', 'raw_distance'] +origin: [0.0, 0.0, 0.0] +num_points: -1 + +contract: + type: mipnerf + contract_samples: True + contract_start_radius: 1.5 + contract_end_radius: 6.0 + +weight: + type: exponential_decay + start: 1.0 + decay: 0.25 + num_epochs: 50 diff --git a/conf/experiment/regularizers/fourier/default.yaml b/conf/experiment/regularizers/fourier/default.yaml new file mode 100644 index 0000000..13f43ed --- /dev/null +++ b/conf/experiment/regularizers/fourier/default.yaml @@ -0,0 +1,34 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: fourier +ray_chunk: 131072 +net_chunk: 131072 +batch_size: 1 +use_inp_freq: inf +wait_iters: 0 + +weight: + type: exponential_decay + start: 1.92 + decay: 0.1 + num_epochs: 500 + +dataset: + name: fourier_lightfield + num_views: all + +fourier_loss: + type: mse + weight: 1 + wait_iters: 0 + +range: + pos: 1.0 + dir: 1.0 diff --git a/conf/experiment/regularizers/geometry/donerf_barbershop.yaml b/conf/experiment/regularizers/geometry/donerf_barbershop.yaml new file mode 100644 index 0000000..dc300fb --- /dev/null +++ b/conf/experiment/regularizers/geometry/donerf_barbershop.yaml @@ -0,0 +1,30 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: geometry +ray_chunk: 32768 +batch_size: 8192 +#wait_iters: -16000 +#stop_iters: 16000 + +fields: ['raw_points', 'raw_distance'] +origin: [2.25, 7.75, 1.5] +num_points: -1 + +contract: + type: mipnerf + contract_samples: True + contract_start_radius: 2.0 + contract_end_radius: 16.0 + +weight: + type: exponential_decay + start: 1.0 + decay: 0.25 + num_epochs: 50 diff --git a/conf/experiment/regularizers/geometry/donerf_classroom.yaml b/conf/experiment/regularizers/geometry/donerf_classroom.yaml new file mode 100644 index 0000000..bdcfc9c --- /dev/null +++ b/conf/experiment/regularizers/geometry/donerf_classroom.yaml @@ -0,0 +1,30 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: geometry +ray_chunk: 32768 +batch_size: 8192 +#wait_iters: -16000 +#stop_iters: 16000 + +fields: ['raw_points', 'raw_distance'] +origin: [0.783, -3.19, 1.39] +num_points: -1 + +contract: + type: mipnerf + contract_samples: True + contract_start_radius: 2.0 + contract_end_radius: 48.0 + +weight: + type: exponential_decay + start: 1.0 + decay: 0.25 + num_epochs: 50 diff --git a/conf/experiment/regularizers/geometry/donerf_pavillon.yaml b/conf/experiment/regularizers/geometry/donerf_pavillon.yaml new file mode 100644 index 0000000..5e5ceb6 --- /dev/null +++ b/conf/experiment/regularizers/geometry/donerf_pavillon.yaml @@ -0,0 +1,30 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: geometry +ray_chunk: 32768 +batch_size: 8192 +#wait_iters: -16000 +#stop_iters: 16000 + +fields: ['raw_points', 'raw_distance'] +origin: [-17.5, -9.5, 2.4] +num_points: 24 + +contract: + type: mipnerf + contract_samples: True + contract_start_radius: 2.0 + contract_end_radius: 120.0 + +weight: + type: exponential_decay + start: 1.0 + decay: 0.125 + num_epochs: 50 diff --git a/conf/experiment/regularizers/geometry/lf.yaml b/conf/experiment/regularizers/geometry/lf.yaml new file mode 100644 index 0000000..49b51fe --- /dev/null +++ b/conf/experiment/regularizers/geometry/lf.yaml @@ -0,0 +1,41 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: geometry +ray_chunk: 32768 +batch_size: 32768 +use_inp_freq: 0 +wait_epochs: 0 + +z_channels: 12 + +weight: + type: exponential_decay + start: 1.0 + decay: 1.0 + num_epochs: 500 + +color_loss: + type: mae + weight: 0.0 + wait_epochs: 0 + +geometry_loss: + type: mae + weight: 0.1 + wait_epochs: 0 + +range: + pos: 1.0 + dir: 1.0 + +jitter: + pos: 0.05 + dir: 0.05 + bundle_size: 1 diff --git a/conf/experiment/regularizers/geometry/video3d.yaml b/conf/experiment/regularizers/geometry/video3d.yaml new file mode 100644 index 0000000..eac90fe --- /dev/null +++ b/conf/experiment/regularizers/geometry/video3d.yaml @@ -0,0 +1,28 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: geometry +ray_chunk: 32768 +batch_size: 8192 + +fields: ['raw_points', 'raw_distance'] +origin: [0.0, 0.0, 0.0] +num_points: -1 + +contract: + type: mipnerf + contract_samples: True + contract_start_radius: 1.5 + contract_end_radius: 6.0 + +weight: + type: exponential_decay + start: 1.0 + decay: 0.25 + num_epochs: 50 diff --git a/conf/experiment/regularizers/geometry_tv/lf.yaml b/conf/experiment/regularizers/geometry_tv/lf.yaml new file mode 100644 index 0000000..eea4931 --- /dev/null +++ b/conf/experiment/regularizers/geometry_tv/lf.yaml @@ -0,0 +1,41 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: geometry_tv +ray_chunk: 32768 +batch_size: 32768 +use_inp_freq: 0 +wait_epochs: 0 + +z_channels: 12 + +weight: + type: exponential_decay + start: 1.0 + decay: 1.0 + num_epochs: 500 + +color_loss: + type: mae + weight: 10000.0 + wait_epochs: 0 + +geometry_loss: + type: mae + weight: 0.0 + wait_epochs: 0 + +range: + pos: 1.0 + dir: 1.0 + +jitter: + pos: 0.1 + dir: 0.1 + bundle_size: 1 diff --git a/conf/experiment/regularizers/inverse_ray_depth/default.yaml b/conf/experiment/regularizers/inverse_ray_depth/default.yaml new file mode 100644 index 0000000..74ab365 --- /dev/null +++ b/conf/experiment/regularizers/inverse_ray_depth/default.yaml @@ -0,0 +1,44 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: inverse_ray_depth +ray_chunk: 32768 +net_chunk: 32768 +batch_size: 4096 + +wait_iters: 0 +warmup_iters: 0 +use_inp_freq: 2 + +dataset: + name: random_view + num_views: all + +weight: + type: exponential_decay + start: 0.5 + decay: 0.5 + num_epochs: 500 + +use_disparity: False +num_samples: 8 + +range: + pos: 1.0 + dir: 1.0 + +lookup_weight_map: + angle_std: 5.0 + dist_std: 0.125 + rgb_std: 0.125 + +embedding_lookup_loss: + type: mae + weight: 1.0 + wait_iters: 0 diff --git a/conf/experiment/regularizers/multiple_ray_depth/default.yaml b/conf/experiment/regularizers/multiple_ray_depth/default.yaml new file mode 100644 index 0000000..7bf1edb --- /dev/null +++ b/conf/experiment/regularizers/multiple_ray_depth/default.yaml @@ -0,0 +1,111 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: multiple_ray_depth +ray_chunk: 131072 +net_chunk: 131072 +batch_size: 32768 + +wait_iters: 1000 +warmup_iters: 10000 +use_inp_freq: 2 + +use_disparity: False +occlusion_aware: False +num_slices: 8 +num_filler: 0 + +dataset: + name: random_view + num_views: all + +weight: + type: exponential_decay + start: 0.5 + decay: 0.1 + num_epochs: 500 + +range: + pos: 1.0 + dir: 1.0 + +jitter: + pos: 0.25 + dir: 0.25 + bundle_size: 1 + +lookup_weight_map: + angle_std: 10.0 + dist_std: 0.1 + + rgb_std: + type: linear_decay + num_epochs: 100 + start: 0.1 + end: 0.01 + +color_weight_map: + angle_std: 5.0 + dist_std: 0.05 + + rgb_std: + type: linear_decay + num_epochs: 100 + start: 0.1 + end: 0.01 + +depth_weight_map: + angle_std: 10.0 + dist_std: 0.25 + +color_lookup_loss: + type: mae + weight: 0.25 + wait_iters: 0 + +color_loss: + type: mae + weight: 0.5 + wait_iters: 0 + +depth_loss: + type: mae + weight: 0.25 + wait_iters: 0 + +param: + n_dims: 4 + fn: two_plane + +pe: + n_freqs: 8 + wait_iters: 0 + max_freq_iter: 50000 + exclude_identity: False + +net: + depth: 8 + hidden_channels: 256 + skips: [4] + activation: identity + +optimizer: + optimizer: adam + lr: 0.001 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 500 + decay_gamma: 0.1 diff --git a/conf/experiment/regularizers/point/default.yaml b/conf/experiment/regularizers/point/default.yaml new file mode 100644 index 0000000..d7491bf --- /dev/null +++ b/conf/experiment/regularizers/point/default.yaml @@ -0,0 +1,23 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: point +ray_chunk: 32768 +batch_size: 8192 + +weight: + type: exponential_decay + start: 0.2 + decay: 0.1 + num_epochs: 100 + +loss: + type: mae + weight: 1.0 + wait_iters: 0 diff --git a/conf/experiment/regularizers/random_pixel/default.yaml b/conf/experiment/regularizers/random_pixel/default.yaml new file mode 100644 index 0000000..a8e0eaf --- /dev/null +++ b/conf/experiment/regularizers/random_pixel/default.yaml @@ -0,0 +1,24 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: teacher +batch_size: 4096 + +dataset: + name: random_pixel + +weight: + type: exponential_decay + start: 1.0 + decay: 1.0 + stop_epochs: 100 + num_epochs: 100 + +loss: + type: mse diff --git a/conf/experiment/regularizers/ray_bundle/default.yaml b/conf/experiment/regularizers/ray_bundle/default.yaml new file mode 100644 index 0000000..c1dbe47 --- /dev/null +++ b/conf/experiment/regularizers/ray_bundle/default.yaml @@ -0,0 +1,43 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: ray_bundle +ray_chunk: 131072 +net_chunk: 131072 +batch_size: 32768 +use_inp_freq: inf +wait_iters: 0 + +weight: + type: exponential_decay + start: 0.5 + decay: 0.1 + num_epochs: 500 + +dataset: + name: random_lightfield + st_plane: -1.0 + uv_plane: 0.0 + num_views: all + +color_weight_map: + angle_std: 5.0 + dist_std: 0.1 + +color_loss: + type: mse + wait_iters: 0 + +range: + pos: 1.0 + dir: 1.0 + +jitter: + dir: 0.25 + bundle_size: 8 diff --git a/conf/experiment/regularizers/ray_bundle/embedding.yaml b/conf/experiment/regularizers/ray_bundle/embedding.yaml new file mode 100644 index 0000000..484de6c --- /dev/null +++ b/conf/experiment/regularizers/ray_bundle/embedding.yaml @@ -0,0 +1,56 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: ray_bundle +ray_chunk: 131072 +net_chunk: 131072 +batch_size: 8192 +use_inp_freq: inf + +wait_epochs: 0 + +weight: + type: exponential_decay + start: 10.0 + decay: 0.1 + stop_weight: 0.01 + num_epochs: 100 + +dataset: + name: random_lightfield + st_plane: -1.0 + uv_plane: 0.0 + num_views: all + +color_weight_map: + angle_std: 5.0 + dist_std: 0.1 + +color_loss: + type: mse + wait_iters: inf + weight: 0.0 + +embed_weight_map: + angle_std: 20.0 + dist_std: 0.25 + +embed_loss: + type: mse + wait_iters: 0 + weight: 1.0 + +range: + pos: 1.5 + dir: 1.0 + +jitter: + pos: 0.1 + dir: 0.1 + bundle_size: 4 diff --git a/conf/experiment/regularizers/ray_density/default.yaml b/conf/experiment/regularizers/ray_density/default.yaml new file mode 100644 index 0000000..fb314b5 --- /dev/null +++ b/conf/experiment/regularizers/ray_density/default.yaml @@ -0,0 +1,41 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: ray_density + +ray_chunk: 32768 +batch_size: 8192 + +weight: + type: exponential_decay + start: 0.125 + decay: 0.25 + stop_weight: 0.00001 + + stop_epochs: 100 + num_epochs: 50 + +loss: + type: huber + delta: 0.25 + +num_views_for_random: 16 +num_views_for_ray: 16 + +extrapolate_freq: 2 +extrapolate_scale: 2.0 + +use_jitter: True +jitter: + dir_std: 0.1 + pos_std: 0.1 + +use_ndc: True +angle_std: 120.0 +dot_std: 120.0 diff --git a/conf/experiment/regularizers/ray_density/simple.yaml b/conf/experiment/regularizers/ray_density/simple.yaml new file mode 100644 index 0000000..3a1b96b --- /dev/null +++ b/conf/experiment/regularizers/ray_density/simple.yaml @@ -0,0 +1,41 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: simple_ray_density + +ray_chunk: 32768 +batch_size: 8192 + +weight: + type: exponential_decay + start: 0.125 + decay: 0.25 + stop_weight: 0.00001 + + stop_epochs: 100 + num_epochs: 50 + +loss: + type: mae + delta: 0.25 + +num_views_for_random: 16 +num_views_for_ray: 16 + +extrapolate_freq: 1 +extrapolate_scale: 2.0 + +use_jitter: True +jitter: + dir_std: 0.1 + pos_std: 0.1 + +use_ndc: True +angle_std: 10.0 +dot_std: 10.0 diff --git a/conf/experiment/regularizers/ray_depth/default.yaml b/conf/experiment/regularizers/ray_depth/default.yaml new file mode 100644 index 0000000..b7ddfdc --- /dev/null +++ b/conf/experiment/regularizers/ray_depth/default.yaml @@ -0,0 +1,109 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: ray_depth +ray_chunk: 131072 +net_chunk: 131072 +batch_size: 4096 + +wait_iters: 1000 +warmup_iters: 1000 +use_inp_freq: 2 + +occlusion_aware: False + +dataset: + name: random_view + num_views: all + +weight: + type: exponential_decay + start: 0.5 + decay: 0.1 + num_epochs: 500 + +range: + pos: 1.0 + dir: 1.0 + +jitter: + pos: 0.1 + dir: 0.1 + bundle_size: 1 + +lookup_weight_map: + angle_std: 10.0 + dist_std: 0.25 + + rgb_std: + type: linear_decay + num_epochs: 100 + start: 0.1 + end: 0.01 + +color_weight_map: + angle_std: 10.0 + dist_std: 0.25 + + rgb_std: + type: linear_decay + num_epochs: 100 + start: 0.1 + end: 0.01 + +depth_weight_map: + angle_std: 10.0 + dist_std: 0.25 + depth_std: 0.5 + +color_lookup_loss: + type: mae + weight: 0.25 + wait_iters: 0 + +color_loss: + type: mae + weight: 0.5 + wait_iters: 10000 + +depth_loss: + type: mae + weight: 0.25 + wait_iters: 0 + +param: + n_dims: 6 + fn: pluecker + +pe: + n_freqs: 8 + wait_iters: 0 + max_freq_iter: 50000 + exclude_identity: False + +net: + depth: 8 + hidden_channels: 256 + skips: [4] + activation: identity + +optimizer: + optimizer: adam + lr: 0.001 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 500 + decay_gamma: 0.1 diff --git a/conf/experiment/regularizers/ray_depth_blending/default.yaml b/conf/experiment/regularizers/ray_depth_blending/default.yaml new file mode 100644 index 0000000..bac64e2 --- /dev/null +++ b/conf/experiment/regularizers/ray_depth_blending/default.yaml @@ -0,0 +1,86 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + + +type: ray_depth_blending +ray_chunk: 32768 +batch_size: 4096 +use_inp_freq: 0 + +use_depth_embedding: False +use_color_embedding: True + +dataset: + name: random_view + num_views: 8 + +warmup_iters: 1000 + +weight: + type: exponential_decay + start: 1.0 + decay: 0.5 + num_epochs: 500 + +lookup: + angle_std: 5.0 + dist_std: 0.1 + +color_loss: + type: mae + weight: 0.5 + + angle_std: 5.0 + dist_std: 0.05 + +depth_loss: + type: mae + weight: 0.5 + + angle_std: 5.0 + dist_std: 0.05 + +range: + pos: 1.0 + dir: 1.0 + +jitter: + pos: 0.1 + dir: 0.1 + +param: + n_dims: 6 + fn: pluecker + +pe: + n_freqs: 10 + warmup_iters: 0 + max_freq_iter: 120000 + exclude_identity: False + +net: + depth: 8 + hidden_channels: 256 + skips: [4] + activation: 'identity' + +optimizer: + optimizer: adam + lr: 0.001 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multipler: 1.0 + warmup_epochs: 0 + + decay_epoch: 500 + decay_gamma: 0.5 diff --git a/conf/experiment/regularizers/ray_depth_occ_dir/default.yaml b/conf/experiment/regularizers/ray_depth_occ_dir/default.yaml new file mode 100644 index 0000000..880a123 --- /dev/null +++ b/conf/experiment/regularizers/ray_depth_occ_dir/default.yaml @@ -0,0 +1,103 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: ray_depth_occ_dir +ray_chunk: 131072 +net_chunk: 131072 +batch_size: 4096 + +wait_iters: 1000 +warmup_iters: 1000 +use_inp_freq: 2 + +dataset: + name: random_view + num_views: all + +weight: + type: exponential_decay + start: 0.25 + decay: 0.1 + num_epochs: 500 + +use_disparity: False +num_features: 128 +num_samples: 2 + +range: + pos: 1.0 + dir: 1.0 + +jitter: + pos: 0.25 + dir: 0.25 + bundle_size: 1 + +lookup_weight_map: + angle_std: 10.0 + dist_std: 0.25 + +color_weight_map: + angle_std: 5.0 + dist_std: 0.1 + +depth_weight_map: + angle_std: 5.0 + dist_std: 0.1 + +color_lookup_loss: + type: mae + weight: 0.1 + wait_iters: 0 + +color_loss: + type: mae + weight: 0.5 + wait_iters: 0 + +depth_loss: + type: mae + weight: 0.25 + wait_iters: 0 + +occ_loss: + type: mae + weight: 0.05 + wait_iters: 0 + +param: + n_dims: 6 + fn: pluecker + +pe: + n_freqs: 4 + wait_iters: 0 + max_freq_iter: 0 + exclude_identity: False + +net: + depth: 8 + hidden_channels: 256 + skips: [4] + activation: identity + +optimizer: + optimizer: adam + lr: 0.001 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 500 + decay_gamma: 0.1 diff --git a/conf/experiment/regularizers/ray_interpolation/default.yaml b/conf/experiment/regularizers/ray_interpolation/default.yaml new file mode 100644 index 0000000..58d6775 --- /dev/null +++ b/conf/experiment/regularizers/ray_interpolation/default.yaml @@ -0,0 +1,65 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: ray_interpolation +ray_chunk: 32768 +net_chunk: 32768 +batch_size: 4096 + +warmup_iters: 0 +use_inp_freq: 0 + +dataset: + name: random_view + num_views: 8 + +weight: + type: exponential_decay + start: 1.0 + decay: 1.0 + num_epochs: 500 + +use_disparity: True +points_per_view: 8 + +range: + pos: 1.0 + dir: 1.0 + +color_loss: + type: mse + weight: 1.0 + warmup_iters: 0 + + angle_std: 25.0 + dist_std: 0.5 + +blending_net: + type: base + depth: 8 + hidden_channels: 256 + skips: [4] + activation: sigmoid + #activation: softmax + #activation: identity + +optimizer: + optimizer: adam + lr: 0.001 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multipler: 1.0 + warmup_epochs: 0 + + decay_epoch: 500 + decay_gamma: 0.5 diff --git a/conf/experiment/regularizers/render_weight/default.yaml b/conf/experiment/regularizers/render_weight/default.yaml new file mode 100644 index 0000000..46b6b42 --- /dev/null +++ b/conf/experiment/regularizers/render_weight/default.yaml @@ -0,0 +1,23 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: render_weight +ray_chunk: 32768 +batch_size: 8192 + +wait_epochs: 0 +window_epochs: 1.5 + +num_points: -1 + +weight: + type: exponential_decay + start: 0.1 + decay: 0.125 + num_epochs: 100 diff --git a/conf/experiment/regularizers/render_weight/entropy.yaml b/conf/experiment/regularizers/render_weight/entropy.yaml new file mode 100644 index 0000000..23db389 --- /dev/null +++ b/conf/experiment/regularizers/render_weight/entropy.yaml @@ -0,0 +1,21 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: render_weight +ray_chunk: 32768 +batch_size: 8192 + +num_points: -1 +window_epochs: 5 + +weight: + type: exponential_decay + start: 0.001 + decay: 0.125 + num_epochs: 20 diff --git a/conf/experiment/regularizers/teacher/default.yaml b/conf/experiment/regularizers/teacher/default.yaml new file mode 100644 index 0000000..e7070fb --- /dev/null +++ b/conf/experiment/regularizers/teacher/default.yaml @@ -0,0 +1,33 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: teacher +batch_size: 4096 + +dataset: + name: "dense_${experiment.dataset.name}" + collection: "${experiment.dataset.collection}_teacher" + split: train + root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}_teacher/" + + num_rows: 10 + num_cols: 10 + train_row_skip: 1 + train_col_skip: 1 + size: 1000 + +weight: + type: exponential_decay + start: 1.0 + decay: 1.0 + stop_weight: 0.025 + num_epochs: 500 + +loss: + type: mse diff --git a/conf/experiment/regularizers/teacher_model/default.yaml b/conf/experiment/regularizers/teacher_model/default.yaml new file mode 100644 index 0000000..8bd6ce9 --- /dev/null +++ b/conf/experiment/regularizers/teacher_model/default.yaml @@ -0,0 +1,31 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: teacher_model +ray_chunk: 32768 +batch_size: 16384 + +weight: + type: exponential_decay + start: 1.0 + decay: 0.25 + num_epochs: 50 + +model_ckpt_path: shiny_lab_temp/last.ckpt +model_start_epoch: 100 + +origin_range: [[-2.0, -2.0, -1.0], [2.0, 2.0, -1.0]] +#origin_range: [[-1.0, -1.0, -1.0], [1.0, 1.0, -1.0]] +direction_range: [[-0.5, -0.5, 2.0], [0.5, 0.5, 2.0]] + +use_ndc: True +convert_ndc: False + +defaults: + - model: ../../../model/shiny_z_plane diff --git a/conf/experiment/regularizers/tensor_tv/l1.yaml b/conf/experiment/regularizers/tensor_tv/l1.yaml new file mode 100644 index 0000000..786be90 --- /dev/null +++ b/conf/experiment/regularizers/tensor_tv/l1.yaml @@ -0,0 +1,25 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: tensor_tv +ray_chunk: 32768 +batch_size: 8192 + +weight: + type: exponential_decay + start: 0.00005 + decay: 0.25 + num_epochs: 100 + +use_tv: False +opacity_weight: 1.0 +color_weight: 0.1 + +skip_row: -1 +skip_col: -1 diff --git a/conf/experiment/regularizers/tensor_tv/tv.yaml b/conf/experiment/regularizers/tensor_tv/tv.yaml new file mode 100644 index 0000000..ada2edc --- /dev/null +++ b/conf/experiment/regularizers/tensor_tv/tv.yaml @@ -0,0 +1,22 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: tensor_tv +ray_chunk: 32768 +batch_size: 8192 + +weight: + type: exponential_decay + start: 0.05 + decay: 0.25 + num_epochs: 100 + +use_tv: True +opacity_weight: 1.0 +color_weight: 0.1 diff --git a/conf/experiment/regularizers/tensor_tv/tv_subdivided.yaml b/conf/experiment/regularizers/tensor_tv/tv_subdivided.yaml new file mode 100644 index 0000000..c16a147 --- /dev/null +++ b/conf/experiment/regularizers/tensor_tv/tv_subdivided.yaml @@ -0,0 +1,26 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: tensor_tv +ray_chunk: 32768 +batch_size: 8192 + +weight: + type: exponential_decay + start: 0.05 + decay: 0.25 + num_epochs: 100 + +use_tv: True +opacity_weight: 1.0 +color_weight: 0.1 + + +skip_row: -1 +skip_col: -1 diff --git a/conf/experiment/regularizers/tensorf/l1_2000.yaml b/conf/experiment/regularizers/tensorf/l1_2000.yaml new file mode 100644 index 0000000..64c820a --- /dev/null +++ b/conf/experiment/regularizers/tensorf/l1_2000.yaml @@ -0,0 +1,29 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: tensorf +ray_chunk: 32768 +batch_size: 8192 + +weight: + type: exponential_decay + start: 1.0 + decay: 1.0 + num_epochs: 100 + +update_AlphaMask_list: [2000,4000] +lr_decay_target_ratio: 0.1 +n_iters: 30000 + +L1_weight_initial: 8e-5 +L1_weight_rest: 4e-5 +lr_upsample_reset: True + +TV_weight_density: 0.0 +TV_weight_app: 0.0 diff --git a/conf/experiment/regularizers/tensorf/l1_4000.yaml b/conf/experiment/regularizers/tensorf/l1_4000.yaml new file mode 100644 index 0000000..b52e887 --- /dev/null +++ b/conf/experiment/regularizers/tensorf/l1_4000.yaml @@ -0,0 +1,29 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: tensorf +ray_chunk: 32768 +batch_size: 8192 + +weight: + type: exponential_decay + start: 1.0 + decay: 1.0 + num_epochs: 100 + +update_AlphaMask_list: [4000,8000] +lr_decay_target_ratio: 0.1 +n_iters: 30000 + +L1_weight_initial: 8e-5 +L1_weight_rest: 4e-5 +lr_upsample_reset: True + +TV_weight_density: 0.0 +TV_weight_app: 0.0 diff --git a/conf/experiment/regularizers/tensorf/tv.yaml b/conf/experiment/regularizers/tensorf/tv.yaml new file mode 100644 index 0000000..25a8d69 --- /dev/null +++ b/conf/experiment/regularizers/tensorf/tv.yaml @@ -0,0 +1,34 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: tensorf +ray_chunk: 32768 +batch_size: 8192 + +weight: + type: exponential_decay + start: 1.0 + decay: 1.0 + num_epochs: 100 + +update_AlphaMask_list: [2000,4000] +lr_decay_target_ratio: 0.1 +n_iters: 30000 + +lr_upsample_reset: True + +L1_weight_initial: 8e-5 +L1_weight_rest: 4e-5 +TV_weight_density: 0.05 +TV_weight_app: 0.05 + +#L1_weight_initial: 0 +#L1_weight_rest: 0 +#TV_weight_density: 0.1 +#TV_weight_app: 0.1 diff --git a/conf/experiment/regularizers/tensorf/tv_2000.yaml b/conf/experiment/regularizers/tensorf/tv_2000.yaml new file mode 100644 index 0000000..3b996ce --- /dev/null +++ b/conf/experiment/regularizers/tensorf/tv_2000.yaml @@ -0,0 +1,29 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: tensorf +ray_chunk: 32768 +batch_size: 8192 + +weight: + type: exponential_decay + start: 1.0 + decay: 1.0 + num_epochs: 100 + +update_AlphaMask_list: [2000,4000] +lr_decay_target_ratio: 0.1 +n_iters: 30000 + +lr_upsample_reset: True + +L1_weight_initial: 8e-5 +L1_weight_rest: 4e-5 +TV_weight_density: 0.05 +TV_weight_app: 0.05 diff --git a/conf/experiment/regularizers/tensorf/tv_4000.yaml b/conf/experiment/regularizers/tensorf/tv_4000.yaml new file mode 100644 index 0000000..ee6f296 --- /dev/null +++ b/conf/experiment/regularizers/tensorf/tv_4000.yaml @@ -0,0 +1,30 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: tensorf +ray_chunk: 32768 +batch_size: 8192 + +weight: + type: exponential_decay + start: 1.0 + decay: 1.0 + num_epochs: 100 + +update_AlphaMask_list: [4000,8000] +lr_decay_target_ratio: 0.1 +n_iters: 30000 +#total_num_tv_iters: 60000 + +lr_upsample_reset: True + +L1_weight_initial: 8e-5 +L1_weight_rest: 4e-5 +TV_weight_density: 0.05 +TV_weight_app: 0.05 diff --git a/conf/experiment/regularizers/tensorf/tv_4000_donerf.yaml b/conf/experiment/regularizers/tensorf/tv_4000_donerf.yaml new file mode 100644 index 0000000..ee6f296 --- /dev/null +++ b/conf/experiment/regularizers/tensorf/tv_4000_donerf.yaml @@ -0,0 +1,30 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: tensorf +ray_chunk: 32768 +batch_size: 8192 + +weight: + type: exponential_decay + start: 1.0 + decay: 1.0 + num_epochs: 100 + +update_AlphaMask_list: [4000,8000] +lr_decay_target_ratio: 0.1 +n_iters: 30000 +#total_num_tv_iters: 60000 + +lr_upsample_reset: True + +L1_weight_initial: 8e-5 +L1_weight_rest: 4e-5 +TV_weight_density: 0.05 +TV_weight_app: 0.05 diff --git a/conf/experiment/regularizers/tensorf/tv_4000_immersive.yaml b/conf/experiment/regularizers/tensorf/tv_4000_immersive.yaml new file mode 100644 index 0000000..ee6f296 --- /dev/null +++ b/conf/experiment/regularizers/tensorf/tv_4000_immersive.yaml @@ -0,0 +1,30 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: tensorf +ray_chunk: 32768 +batch_size: 8192 + +weight: + type: exponential_decay + start: 1.0 + decay: 1.0 + num_epochs: 100 + +update_AlphaMask_list: [4000,8000] +lr_decay_target_ratio: 0.1 +n_iters: 30000 +#total_num_tv_iters: 60000 + +lr_upsample_reset: True + +L1_weight_initial: 8e-5 +L1_weight_rest: 4e-5 +TV_weight_density: 0.05 +TV_weight_app: 0.05 diff --git a/conf/experiment/regularizers/tensorf/tv_4000_large.yaml b/conf/experiment/regularizers/tensorf/tv_4000_large.yaml new file mode 100644 index 0000000..6f812af --- /dev/null +++ b/conf/experiment/regularizers/tensorf/tv_4000_large.yaml @@ -0,0 +1,30 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: tensorf +ray_chunk: 32768 +batch_size: 8192 + +weight: + type: exponential_decay + start: 1.0 + decay: 1.0 + num_epochs: 100 + +update_AlphaMask_list: [] +lr_decay_target_ratio: 0.1 +n_iters: 20000 +total_num_tv_iters: 60000 + +lr_upsample_reset: True + +L1_weight_initial: 0.0 +L1_weight_rest: 0.0 +TV_weight_density: 0.25 +TV_weight_app: 0.25 diff --git a/conf/experiment/regularizers/tensorf/tv_4000_large_small.yaml b/conf/experiment/regularizers/tensorf/tv_4000_large_small.yaml new file mode 100644 index 0000000..6f812af --- /dev/null +++ b/conf/experiment/regularizers/tensorf/tv_4000_large_small.yaml @@ -0,0 +1,30 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: tensorf +ray_chunk: 32768 +batch_size: 8192 + +weight: + type: exponential_decay + start: 1.0 + decay: 1.0 + num_epochs: 100 + +update_AlphaMask_list: [] +lr_decay_target_ratio: 0.1 +n_iters: 20000 +total_num_tv_iters: 60000 + +lr_upsample_reset: True + +L1_weight_initial: 0.0 +L1_weight_rest: 0.0 +TV_weight_density: 0.25 +TV_weight_app: 0.25 diff --git a/conf/experiment/regularizers/tensorf/tv_4000_llff.yaml b/conf/experiment/regularizers/tensorf/tv_4000_llff.yaml new file mode 100644 index 0000000..1b967a4 --- /dev/null +++ b/conf/experiment/regularizers/tensorf/tv_4000_llff.yaml @@ -0,0 +1,30 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: tensorf +ray_chunk: 32768 +batch_size: 8192 + +weight: + type: exponential_decay + start: 1.0 + decay: 1.0 + num_epochs: 100 + +update_AlphaMask_list: [] +lr_decay_target_ratio: 0.1 +n_iters: 20000 +total_num_tv_iters: 60000 + +lr_upsample_reset: True + +L1_weight_initial: 0.0 +L1_weight_rest: 0.0 +TV_weight_density: 1.0 +TV_weight_app: 1.0 diff --git a/conf/experiment/regularizers/tensorf/tv_4000_many.yaml b/conf/experiment/regularizers/tensorf/tv_4000_many.yaml new file mode 100644 index 0000000..238fb1f --- /dev/null +++ b/conf/experiment/regularizers/tensorf/tv_4000_many.yaml @@ -0,0 +1,7 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT diff --git a/conf/experiment/regularizers/tensorf/tv_4000_no_app.yaml b/conf/experiment/regularizers/tensorf/tv_4000_no_app.yaml new file mode 100644 index 0000000..c0b14a7 --- /dev/null +++ b/conf/experiment/regularizers/tensorf/tv_4000_no_app.yaml @@ -0,0 +1,29 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: tensorf +ray_chunk: 32768 +batch_size: 8192 + +weight: + type: exponential_decay + start: 1.0 + decay: 1.0 + num_epochs: 100 + +update_AlphaMask_list: [4000,8000] +lr_decay_target_ratio: 0.1 +n_iters: 30000 + +lr_upsample_reset: True + +L1_weight_initial: 8e-5 +L1_weight_rest: 4e-5 +TV_weight_density: 0.05 +TV_weight_app: 0.0 diff --git a/conf/experiment/regularizers/tensorf/tv_4000_small.yaml b/conf/experiment/regularizers/tensorf/tv_4000_small.yaml new file mode 100644 index 0000000..ee6f296 --- /dev/null +++ b/conf/experiment/regularizers/tensorf/tv_4000_small.yaml @@ -0,0 +1,30 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: tensorf +ray_chunk: 32768 +batch_size: 8192 + +weight: + type: exponential_decay + start: 1.0 + decay: 1.0 + num_epochs: 100 + +update_AlphaMask_list: [4000,8000] +lr_decay_target_ratio: 0.1 +n_iters: 30000 +#total_num_tv_iters: 60000 + +lr_upsample_reset: True + +L1_weight_initial: 8e-5 +L1_weight_rest: 4e-5 +TV_weight_density: 0.05 +TV_weight_app: 0.05 diff --git a/conf/experiment/regularizers/tensorf/tv_4000_stanford_llff.yaml b/conf/experiment/regularizers/tensorf/tv_4000_stanford_llff.yaml new file mode 100644 index 0000000..8010378 --- /dev/null +++ b/conf/experiment/regularizers/tensorf/tv_4000_stanford_llff.yaml @@ -0,0 +1,29 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: tensorf +ray_chunk: 32768 +batch_size: 8192 + +weight: + type: exponential_decay + start: 1.0 + decay: 1.0 + num_epochs: 100 + +update_AlphaMask_list: [20000,24000] +lr_decay_target_ratio: 0.1 +n_iters: 30000 + +lr_upsample_reset: True + +L1_weight_initial: 8e-5 +L1_weight_rest: 4e-5 +TV_weight_density: 0.05 +TV_weight_app: 0.05 diff --git a/conf/experiment/regularizers/tensorf/tv_4000_technicolor.yaml b/conf/experiment/regularizers/tensorf/tv_4000_technicolor.yaml new file mode 100644 index 0000000..d2d6d44 --- /dev/null +++ b/conf/experiment/regularizers/tensorf/tv_4000_technicolor.yaml @@ -0,0 +1,30 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: tensorf +ray_chunk: 32768 +batch_size: 8192 + +weight: + type: exponential_decay + start: 1.0 + decay: 1.0 + num_epochs: 100 + +update_AlphaMask_list: [4000,8000] +lr_decay_target_ratio: 0.1 +n_iters: 30000 +total_num_tv_iters: 60000 + +lr_upsample_reset: True + +L1_weight_initial: 8e-5 +L1_weight_rest: 4e-5 +TV_weight_density: 0.0125 +TV_weight_app: 0.0125 diff --git a/conf/experiment/regularizers/tensorf/tv_8000.yaml b/conf/experiment/regularizers/tensorf/tv_8000.yaml new file mode 100644 index 0000000..9834112 --- /dev/null +++ b/conf/experiment/regularizers/tensorf/tv_8000.yaml @@ -0,0 +1,29 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: tensorf +ray_chunk: 32768 +batch_size: 8192 + +weight: + type: exponential_decay + start: 1.0 + decay: 1.0 + num_epochs: 100 + +update_AlphaMask_list: [8000,16000] +lr_decay_target_ratio: 0.1 +n_iters: 60000 + +lr_upsample_reset: True + +L1_weight_initial: 8e-5 +L1_weight_rest: 4e-5 +TV_weight_density: 0.05 +TV_weight_app: 0.05 diff --git a/conf/experiment/regularizers/tensorf/tv_donerf.yaml b/conf/experiment/regularizers/tensorf/tv_donerf.yaml new file mode 100644 index 0000000..b8c4611 --- /dev/null +++ b/conf/experiment/regularizers/tensorf/tv_donerf.yaml @@ -0,0 +1,34 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: tensorf +ray_chunk: 32768 +batch_size: 8192 + +weight: + type: exponential_decay + start: 1.0 + decay: 1.0 + num_epochs: 100 + +update_AlphaMask_list: [4000,8000] +lr_decay_target_ratio: 0.1 +n_iters: 30000 + +lr_upsample_reset: True + +L1_weight_initial: 8e-5 +L1_weight_rest: 4e-5 +TV_weight_density: 0.05 +TV_weight_app: 0.05 + +#L1_weight_initial: 0 +#L1_weight_rest: 0 +#TV_weight_density: 0.1 +#TV_weight_app: 0.1 diff --git a/conf/experiment/regularizers/tensorf/tv_shiny.yaml b/conf/experiment/regularizers/tensorf/tv_shiny.yaml new file mode 100644 index 0000000..07064dd --- /dev/null +++ b/conf/experiment/regularizers/tensorf/tv_shiny.yaml @@ -0,0 +1,29 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: tensorf +ray_chunk: 32768 +batch_size: 8192 + +weight: + type: exponential_decay + start: 1.0 + decay: 1.0 + num_epochs: 100 + +update_AlphaMask_list: [2000,4000] +lr_decay_target_ratio: 0.1 +n_iters: 30000 + +lr_upsample_reset: True + +L1_weight_initial: 8e-5 +L1_weight_rest: 4e-5 +TV_weight_density: 0.25 +TV_weight_app: 0.05 diff --git a/conf/experiment/regularizers/tensorf/tv_video3d.yaml b/conf/experiment/regularizers/tensorf/tv_video3d.yaml new file mode 100644 index 0000000..bb5008c --- /dev/null +++ b/conf/experiment/regularizers/tensorf/tv_video3d.yaml @@ -0,0 +1,36 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: tensorf +ray_chunk: 32768 +batch_size: 8192 + +weight: + type: exponential_decay + start: 1.0 + decay: 1.0 + num_epochs: 100 + +update_AlphaMask_list: [2000,4000] +lr_decay_target_ratio: 0.1 +n_iters: 30000 + +lr_upsample_reset: True + +L1_weight_initial: 8e-5 +L1_weight_rest: 4e-5 +#L1_weight_initial: 4e-5 +#L1_weight_rest: 2e-5 +TV_weight_density: 0.05 +TV_weight_app: 0.05 + +#L1_weight_initial: 0 +#L1_weight_rest: 0 +#TV_weight_density: 0.1 +#TV_weight_app: 0.1 diff --git a/conf/experiment/regularizers/voxel_sparsity/default.yaml b/conf/experiment/regularizers/voxel_sparsity/default.yaml new file mode 100644 index 0000000..e6a0b31 --- /dev/null +++ b/conf/experiment/regularizers/voxel_sparsity/default.yaml @@ -0,0 +1,22 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: voxel_sparsity +ray_chunk: 32768 +batch_size: 32768 +use_inp_freq: inf + +weight: + type: exponential_decay + start: 0.01 + decay: 0.5 + num_epochs: 500 + +loss: + type: mse diff --git a/conf/experiment/regularizers/warp_level/lf.yaml b/conf/experiment/regularizers/warp_level/lf.yaml new file mode 100644 index 0000000..b19aa89 --- /dev/null +++ b/conf/experiment/regularizers/warp_level/lf.yaml @@ -0,0 +1,54 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: warp_level +ray_chunk: 32768 +batch_size: 8192 +use_inp_freq: inf +wait_iters: 10000 + +weight: + type: exponential_decay + start: 0.1 + decay: 1.0 + num_epochs: 500 + +dataset: + name: random_lightfield + st_plane: -1.0 + uv_plane: 0.0 + st_scale: + +color_loss: + type: mae + weight: 0.25 + wait_iters: 0 + +svd_loss: + type: mae + weight: 0.0 + wait_iters: 0 + +level_loss: + type: mae + weight: 0.25 + wait_iters: 0 + +range: + pos: 1.0 + dir: 1.0 + +jitter: + pos: 0.1 + dir: 0.1 + bundle_size: 1 + +param: + n_dims: 4 + fn: two_plane diff --git a/conf/experiment/regularizers/warp_level/subdivided.yaml b/conf/experiment/regularizers/warp_level/subdivided.yaml new file mode 100644 index 0000000..cb40bb2 --- /dev/null +++ b/conf/experiment/regularizers/warp_level/subdivided.yaml @@ -0,0 +1,54 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: warp_level +ray_chunk: 32768 +batch_size: 4096 +use_inp_freq: inf +wait_iters: 10000 + +weight: + type: exponential_decay + start: 0.1 + decay: 1.0 + num_epochs: 500 + +dataset: + name: random_lightfield + st_plane: -1.0 + uv_plane: 0.0 + st_scale: + +color_loss: + type: mae + weight: 0.25 + wait_iters: 0 + +svd_loss: + type: mae + weight: 0.0 + wait_iters: 0 + +level_loss: + type: mae + weight: 0.25 + wait_iters: 0 + +range: + pos: 1.0 + dir: 1.0 + +jitter: + pos: 0.1 + dir: 0.1 + bundle_size: 1 + +param: + n_dims: 4 + fn: two_plane diff --git a/conf/experiment/regularizers/weak_teacher/lf.yaml b/conf/experiment/regularizers/weak_teacher/lf.yaml new file mode 100644 index 0000000..d597b4a --- /dev/null +++ b/conf/experiment/regularizers/weak_teacher/lf.yaml @@ -0,0 +1,33 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: teacher +batch_size: 8192 + +dataset: + name: "dense_${experiment.dataset.name}" + collection: "${experiment.dataset.collection}_dense" + split: train + root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}_teacher/" + + num_rows: 10 + num_cols: 10 + train_row_skip: 1 + train_col_skip: 1 + size: 1000 + +weight: + type: exponential_decay + start: 1.0 + decay: 0.25 + stop_weight: 0.025 + num_epochs: 500 + +loss: + type: mse diff --git a/conf/experiment/regularizers/weak_teacher/subdivided.yaml b/conf/experiment/regularizers/weak_teacher/subdivided.yaml new file mode 100644 index 0000000..9d335c4 --- /dev/null +++ b/conf/experiment/regularizers/weak_teacher/subdivided.yaml @@ -0,0 +1,33 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: teacher +batch_size: 4096 + +dataset: + name: "dense_${experiment.dataset.name}" + collection: "${experiment.dataset.collection}_dense" + split: train + root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}_teacher/" + + num_rows: 10 + num_cols: 10 + train_row_skip: 1 + train_col_skip: 1 + size: 1000 + +weight: + type: exponential_decay + start: 1.0 + decay: 0.25 + stop_weight: 0.025 + num_epochs: 500 + +loss: + type: mse diff --git a/conf/experiment/training/bom_tensorf.yaml b/conf/experiment/training/bom_tensorf.yaml new file mode 100644 index 0000000..afbaeff --- /dev/null +++ b/conf/experiment/training/bom_tensorf.yaml @@ -0,0 +1,111 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +batch_size: 16384 +ray_chunk: 16384 +net_chunk: 16384 +render_ray_chunk: 16384 + +update_data_every: 1 +reload_data_every: 100 + +log_every: 1 +val_every: 10 +render_every: 40 +ckpt_every: 20 +test_every: 20 +flush_logs: 1000 + +num_workers: 16 +num_gpus: 1 + +multiscale: False +scales: [2, 1] +scale_epochs: [0, 50] +scale_batch_sizes: [4096, 32768] + +weight_init: + type: none + +loss: + type: mse + +# TensoRF +sample_with_replacement: True +num_iters: 4000 +num_epochs: 20 + +optimizers: + color: + optimizer: adam + lr: 0.02 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 100 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] + + color_impl: + optimizer: adam + lr: 0.001 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 100 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] + + embedding: + optimizer: adam + lr: 0.01 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 100 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] + + embedding_impl: + optimizer: adam + lr: 0.00075 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 100 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] diff --git a/conf/experiment/training/catacaustics_tensorf.yaml b/conf/experiment/training/catacaustics_tensorf.yaml new file mode 100644 index 0000000..497722a --- /dev/null +++ b/conf/experiment/training/catacaustics_tensorf.yaml @@ -0,0 +1,110 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +batch_size: 16384 +ray_chunk: 65536 +net_chunk: 65536 +render_ray_chunk: 65536 + +update_data_every: 1 +reload_data_every: 100 + +log_every: 1 +val_every: 100 +render_every: 1000 +ckpt_every: 100 +test_every: 200 +flush_logs: 1000 + +num_workers: 16 +num_gpus: 1 + +multiscale: False +scales: [2, 1] +scale_epochs: [0, 50] +scale_batch_sizes: [4096, 32768] + +weight_init: + type: none + +loss: + type: mse + +# TensoRF +sample_with_replacement: True +num_iters: 4000 + +optimizers: + color: + optimizer: adam + lr: 0.02 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 100 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] + + color_impl: + optimizer: adam + lr: 0.001 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 100 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] + + embedding: + optimizer: adam + lr: 0.01 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 50 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] + + embedding_impl: + optimizer: adam + lr: 0.00075 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 50 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] diff --git a/conf/experiment/training/default.yaml b/conf/experiment/training/default.yaml new file mode 100644 index 0000000..37e0933 --- /dev/null +++ b/conf/experiment/training/default.yaml @@ -0,0 +1,62 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +batch_size: 4096 +ray_chunk: 131072 +net_chunk: 131072 + +update_data_every: 1 +reload_data_every: 100 + +log_every: 1 +val_every: 25 +render_every: 100 +ckpt_every: 100 +test_every: 200 +flush_logs: 1000 +num_epochs: 5000 + +num_workers: 16 +num_gpus: 1 + +weight_init: + type: none + +loss: + type: mse + +color: + optimizer: adam + lr: 0.001 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: steplr + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 2000 + decay_gamma: 0.5 + +embedding: + optimizer: adam + lr: 0.0005 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 500 + decay_gamma: 0.1 diff --git a/conf/experiment/training/donerf_tensorf.yaml b/conf/experiment/training/donerf_tensorf.yaml new file mode 100644 index 0000000..e321697 --- /dev/null +++ b/conf/experiment/training/donerf_tensorf.yaml @@ -0,0 +1,111 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +batch_size: 16384 +ray_chunk: 16384 +net_chunk: 16384 +render_ray_chunk: 16384 + +update_data_every: 1 +reload_data_every: 100 + +log_every: 1 +val_every: 10 +render_every: 50 +ckpt_every: 20 +test_every: 50 +flush_logs: 1000 + +num_workers: 16 +num_gpus: 1 + +multiscale: False +scales: [2, 1] +scale_epochs: [0, 50] +scale_batch_sizes: [4096, 32768] + +weight_init: + type: none + +loss: + type: mse + +# TensoRF +sample_with_replacement: True +num_iters: 4000 +num_epochs: 80 + +optimizers: + color: + optimizer: adam + lr: 0.025 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 100 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] + + color_impl: + optimizer: adam + lr: 0.001 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 100 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] + + embedding: + optimizer: adam + lr: 0.01 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 50 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] + + embedding_impl: + optimizer: adam + lr: 0.00075 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 50 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] diff --git a/conf/experiment/training/eikonal_tensorf.yaml b/conf/experiment/training/eikonal_tensorf.yaml new file mode 100644 index 0000000..81676a9 --- /dev/null +++ b/conf/experiment/training/eikonal_tensorf.yaml @@ -0,0 +1,110 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +batch_size: 4096 +ray_chunk: 16384 +net_chunk: 16384 +render_ray_chunk: 16384 + +update_data_every: 1 +reload_data_every: 100 + +log_every: 1 +val_every: 100 +render_every: 1000 +ckpt_every: 100 +test_every: 200 +flush_logs: 1000 + +num_workers: 16 +num_gpus: 1 + +multiscale: False +scales: [2, 1] +scale_epochs: [0, 50] +scale_batch_sizes: [4096, 32768] + +weight_init: + type: none + +loss: + type: mse + +# TensoRF +sample_with_replacement: True +num_iters: 4000 + +optimizers: + color: + optimizer: adam + lr: 0.02 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 100 + decay_gamma: 0.25 + + reset_opt_list: [2000, 3000, 4000, 5500, 7000] + + color_impl: + optimizer: adam + lr: 0.001 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 100 + decay_gamma: 0.25 + + reset_opt_list: [2000, 3000, 4000, 5500, 7000] + + embedding: + optimizer: adam + lr: 0.01 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 50 + decay_gamma: 0.125 + + #reset_opt_list: [2000, 3000, 4000, 5500, 7000] + + embedding_impl: + optimizer: adam + lr: 0.0005 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 50 + decay_gamma: 0.125 + + #reset_opt_list: [2000, 3000, 4000, 5500, 7000] diff --git a/conf/experiment/training/immersive_tensorf.yaml b/conf/experiment/training/immersive_tensorf.yaml new file mode 100644 index 0000000..e42a9cf --- /dev/null +++ b/conf/experiment/training/immersive_tensorf.yaml @@ -0,0 +1,111 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +batch_size: 5000 +ray_chunk: 5000 +net_chunk: 5000 +render_ray_chunk: 5000 + +update_data_every: 1 +reload_data_every: 100 + +log_every: 1 +val_every: 10 +render_every: 40 +ckpt_every: 20 +test_every: 20 +flush_logs: 1000 + +num_workers: 16 +num_gpus: 1 + +multiscale: False +scales: [2, 1] +scale_epochs: [0, 50] +scale_batch_sizes: [4096, 32768] + +weight_init: + type: none + +loss: + type: mse + +# TensoRF +sample_with_replacement: True +num_iters: 4000 +num_epochs: 20 + +optimizers: + color: + optimizer: adam + lr: 0.02 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 100 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] + + color_impl: + optimizer: adam + lr: 0.001 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 100 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] + + embedding: + optimizer: adam + lr: 0.01 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 100 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] + + embedding_impl: + optimizer: adam + lr: 0.00075 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 100 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] diff --git a/conf/experiment/training/llff_tensorf.yaml b/conf/experiment/training/llff_tensorf.yaml new file mode 100644 index 0000000..91a5683 --- /dev/null +++ b/conf/experiment/training/llff_tensorf.yaml @@ -0,0 +1,111 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +batch_size: 16384 +ray_chunk: 65536 +net_chunk: 65536 +render_ray_chunk: 65536 + +update_data_every: 1 +reload_data_every: 100 + +log_every: 1 +val_every: 10 +render_every: 10 +ckpt_every: 10 +test_every: 10 +flush_logs: 1000 + +num_workers: 16 +num_gpus: 1 + +multiscale: False +scales: [2, 1] +scale_epochs: [0, 50] +scale_batch_sizes: [4096, 32768] + +weight_init: + type: none + +loss: + type: mse + +# TensoRF +sample_with_replacement: True +num_iters: 4000 +num_epochs: 20 + +optimizers: + color: + optimizer: adam + lr: 0.02 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 100 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] + + color_impl: + optimizer: adam + lr: 0.001 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 100 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] + + embedding: + optimizer: adam + lr: 0.01 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 50 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] + + embedding_impl: + optimizer: adam + lr: 0.00075 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 50 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] diff --git a/conf/experiment/training/neural_3d_tensorf.yaml b/conf/experiment/training/neural_3d_tensorf.yaml new file mode 100644 index 0000000..afbaeff --- /dev/null +++ b/conf/experiment/training/neural_3d_tensorf.yaml @@ -0,0 +1,111 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +batch_size: 16384 +ray_chunk: 16384 +net_chunk: 16384 +render_ray_chunk: 16384 + +update_data_every: 1 +reload_data_every: 100 + +log_every: 1 +val_every: 10 +render_every: 40 +ckpt_every: 20 +test_every: 20 +flush_logs: 1000 + +num_workers: 16 +num_gpus: 1 + +multiscale: False +scales: [2, 1] +scale_epochs: [0, 50] +scale_batch_sizes: [4096, 32768] + +weight_init: + type: none + +loss: + type: mse + +# TensoRF +sample_with_replacement: True +num_iters: 4000 +num_epochs: 20 + +optimizers: + color: + optimizer: adam + lr: 0.02 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 100 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] + + color_impl: + optimizer: adam + lr: 0.001 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 100 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] + + embedding: + optimizer: adam + lr: 0.01 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 100 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] + + embedding_impl: + optimizer: adam + lr: 0.00075 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 100 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] diff --git a/conf/experiment/training/shiny_tensorf.yaml b/conf/experiment/training/shiny_tensorf.yaml new file mode 100644 index 0000000..497722a --- /dev/null +++ b/conf/experiment/training/shiny_tensorf.yaml @@ -0,0 +1,110 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +batch_size: 16384 +ray_chunk: 65536 +net_chunk: 65536 +render_ray_chunk: 65536 + +update_data_every: 1 +reload_data_every: 100 + +log_every: 1 +val_every: 100 +render_every: 1000 +ckpt_every: 100 +test_every: 200 +flush_logs: 1000 + +num_workers: 16 +num_gpus: 1 + +multiscale: False +scales: [2, 1] +scale_epochs: [0, 50] +scale_batch_sizes: [4096, 32768] + +weight_init: + type: none + +loss: + type: mse + +# TensoRF +sample_with_replacement: True +num_iters: 4000 + +optimizers: + color: + optimizer: adam + lr: 0.02 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 100 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] + + color_impl: + optimizer: adam + lr: 0.001 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 100 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] + + embedding: + optimizer: adam + lr: 0.01 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 50 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] + + embedding_impl: + optimizer: adam + lr: 0.00075 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 50 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] diff --git a/conf/experiment/training/shiny_tensorf_small.yaml b/conf/experiment/training/shiny_tensorf_small.yaml new file mode 100644 index 0000000..70c69a6 --- /dev/null +++ b/conf/experiment/training/shiny_tensorf_small.yaml @@ -0,0 +1,127 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +batch_size: 4096 +ray_chunk: 65536 +net_chunk: 65536 +render_ray_chunk: 65536 + +update_data_every: 1 +reload_data_every: 100 + +log_every: 1 +val_every: 100 +render_every: 1000 +ckpt_every: 100 +test_every: 200 +flush_logs: 1000 + +num_workers: 16 +num_gpus: 1 + +multiscale: False +scales: [2, 1] +scale_epochs: [0, 50] +scale_batch_sizes: [4096, 32768] + +weight_init: + type: none + +loss: + type: mse + +# TensoRF +sample_with_replacement: True +num_iters: 4000 + +optimizers: + color: + optimizer: adam + lr: 0.01 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 100 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] + + color_impl: + optimizer: adam + lr: 0.001 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 100 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] + + embedding: + optimizer: adam + lr: 0.01 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 50 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] + + embedding_impl: + optimizer: adam + lr: 0.0005 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 50 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] + + calibration: + optimizer: adam + lr: 0.0005 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 25 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] diff --git a/conf/experiment/training/spaces_tensorf.yaml b/conf/experiment/training/spaces_tensorf.yaml new file mode 100644 index 0000000..91a5683 --- /dev/null +++ b/conf/experiment/training/spaces_tensorf.yaml @@ -0,0 +1,111 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +batch_size: 16384 +ray_chunk: 65536 +net_chunk: 65536 +render_ray_chunk: 65536 + +update_data_every: 1 +reload_data_every: 100 + +log_every: 1 +val_every: 10 +render_every: 10 +ckpt_every: 10 +test_every: 10 +flush_logs: 1000 + +num_workers: 16 +num_gpus: 1 + +multiscale: False +scales: [2, 1] +scale_epochs: [0, 50] +scale_batch_sizes: [4096, 32768] + +weight_init: + type: none + +loss: + type: mse + +# TensoRF +sample_with_replacement: True +num_iters: 4000 +num_epochs: 20 + +optimizers: + color: + optimizer: adam + lr: 0.02 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 100 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] + + color_impl: + optimizer: adam + lr: 0.001 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 100 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] + + embedding: + optimizer: adam + lr: 0.01 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 50 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] + + embedding_impl: + optimizer: adam + lr: 0.00075 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 50 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] diff --git a/conf/experiment/training/stanford_tensorf.yaml b/conf/experiment/training/stanford_tensorf.yaml new file mode 100644 index 0000000..18bf3de --- /dev/null +++ b/conf/experiment/training/stanford_tensorf.yaml @@ -0,0 +1,114 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +batch_size: 16384 +#ray_chunk: 3145728 +#net_chunk: 3145728 +#render_ray_chunk: 3145728 +ray_chunk: 16384 +net_chunk: 16384 +render_ray_chunk: 16384 + +update_data_every: 1 +reload_data_every: 100 + +log_every: 1 +val_every: 10 +render_every: 40 +ckpt_every: 40 +test_every: 20 +flush_logs: 1000 + +num_workers: 16 +num_gpus: 1 + +multiscale: False +scales: [2, 1] +scale_epochs: [0, 50] +scale_batch_sizes: [4096, 32768] + +weight_init: + type: none + +loss: + type: mse + +# TensoRF +sample_with_replacement: True +num_iters: 4000 +num_epochs: 40 + +optimizers: + color: + optimizer: adam + lr: 0.005 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 100 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] + + color_impl: + optimizer: adam + lr: 0.001 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 100 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] + + embedding: + optimizer: adam + lr: 0.01 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 100 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] + + embedding_impl: + optimizer: adam + lr: 0.001 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 100 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] diff --git a/conf/experiment/training/technicolor_tensorf.yaml b/conf/experiment/training/technicolor_tensorf.yaml new file mode 100644 index 0000000..fcdc66b --- /dev/null +++ b/conf/experiment/training/technicolor_tensorf.yaml @@ -0,0 +1,114 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +batch_size: 50000 +#ray_chunk: 3145728 +#net_chunk: 3145728 +#render_ray_chunk: 3145728 +ray_chunk: 50000 +net_chunk: 50000 +render_ray_chunk: 50000 + +update_data_every: 1 +reload_data_every: 100 + +log_every: 1 +val_every: 5 +render_every: 20 +ckpt_every: 5 +test_every: 5 +flush_logs: 1000 + +num_workers: 16 +num_gpus: 1 + +multiscale: False +scales: [2, 1] +scale_epochs: [0, 50] +scale_batch_sizes: [4096, 32768] + +weight_init: + type: none + +loss: + type: mse + +# TensoRF +sample_with_replacement: True +num_iters: 10000 +num_epochs: 4000 + +optimizers: + color: + optimizer: adam + lr: 0.02 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 100 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] + + color_impl: + optimizer: adam + lr: 0.001 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 100 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] + + embedding: + optimizer: adam + lr: 0.01 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 100 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] + + embedding_impl: + optimizer: adam + lr: 0.00075 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 100 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] diff --git a/conf/experiment/visualizers/all.yaml b/conf/experiment/visualizers/all.yaml new file mode 100644 index 0000000..ea4b51e --- /dev/null +++ b/conf/experiment/visualizers/all.yaml @@ -0,0 +1,8 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ diff --git a/conf/experiment/visualizers/closest_view/default.yaml b/conf/experiment/visualizers/closest_view/default.yaml new file mode 100644 index 0000000..7ef36ac --- /dev/null +++ b/conf/experiment/visualizers/closest_view/default.yaml @@ -0,0 +1,10 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: closest_view diff --git a/conf/experiment/visualizers/embedding/default.yaml b/conf/experiment/visualizers/embedding/default.yaml new file mode 100644 index 0000000..320c2ad --- /dev/null +++ b/conf/experiment/visualizers/embedding/default.yaml @@ -0,0 +1,28 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: embedding + +run_on_test: False +save_data: False + +no_over_fields: [] + +fields: + distances: + use_abs: False + normalize: True + + point_offset: + use_abs: True + bounds: [0.0, 0.25] + + points: + use_abs: False + bounds: [-2.0, 2.0] diff --git a/conf/experiment/visualizers/embedding/default_cascaded.yaml b/conf/experiment/visualizers/embedding/default_cascaded.yaml new file mode 100644 index 0000000..13adbc4 --- /dev/null +++ b/conf/experiment/visualizers/embedding/default_cascaded.yaml @@ -0,0 +1,29 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: embedding + +run_on_test: False +save_data: False + +no_over_fields: [raw_distance] + +fields: + distances: + use_abs: False + normalize: True + + raw_distance: + use_abs: False + normalize: True + sort: True + + point_offset: + use_abs: True + bounds: [0.0, 0.25] diff --git a/conf/experiment/visualizers/embedding/default_cascaded_2.yaml b/conf/experiment/visualizers/embedding/default_cascaded_2.yaml new file mode 100644 index 0000000..26a58f1 --- /dev/null +++ b/conf/experiment/visualizers/embedding/default_cascaded_2.yaml @@ -0,0 +1,28 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: embedding + +run_on_test: False +save_data: False + +no_over_fields: [] + +fields: + distances: + use_abs: False + normalize: True + + raw_distance: + use_abs: False + normalize: True + + point_offset: + use_abs: True + bounds: [0.0, 0.25] diff --git a/conf/experiment/visualizers/embedding/default_reflect.yaml b/conf/experiment/visualizers/embedding/default_reflect.yaml new file mode 100644 index 0000000..2a36f39 --- /dev/null +++ b/conf/experiment/visualizers/embedding/default_reflect.yaml @@ -0,0 +1,34 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: embedding + +run_on_test: False +save_data: False + +no_over_fields: [] + +fields: + distances: + use_abs: False + normalize: True + + normal: + use_abs: True + normalize: False + bounds: [0.0, 1.0] + + render_normal: + use_abs: True + normalize: False + bounds: [0.0, 1.0] + + point_offset: + use_abs: True + bounds: [0.0, 0.25] diff --git a/conf/experiment/visualizers/embedding/default_time.yaml b/conf/experiment/visualizers/embedding/default_time.yaml new file mode 100644 index 0000000..1865e08 --- /dev/null +++ b/conf/experiment/visualizers/embedding/default_time.yaml @@ -0,0 +1,28 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: embedding + +run_on_test: False +save_data: False + +no_over_fields: [raw_distance, raw_flow] + +fields: + distances: + use_abs: False + normalize: True + + point_offset: + use_abs: True + bounds: [0.0, 0.25] + + spatial_flow: + use_abs: True + bounds: [0.0, 1.0] diff --git a/conf/experiment/visualizers/embedding/default_time_cascaded.yaml b/conf/experiment/visualizers/embedding/default_time_cascaded.yaml new file mode 100644 index 0000000..d38e34a --- /dev/null +++ b/conf/experiment/visualizers/embedding/default_time_cascaded.yaml @@ -0,0 +1,38 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: embedding + +run_on_test: False +save_data: False + +no_over_fields: [raw_distance, raw_flow] + +fields: + distances: + use_abs: False + normalize: True + + raw_distance: + use_abs: False + normalize: True + sort: True + + raw_flow: + use_abs: True + normalize: True + sort: True + + point_offset: + use_abs: True + bounds: [0.0, 0.25] + + spatial_flow: + use_abs: True + bounds: [0.0, 1.0] diff --git a/conf/experiment/visualizers/embedding/default_time_cascaded_2.yaml b/conf/experiment/visualizers/embedding/default_time_cascaded_2.yaml new file mode 100644 index 0000000..c3e8ad4 --- /dev/null +++ b/conf/experiment/visualizers/embedding/default_time_cascaded_2.yaml @@ -0,0 +1,36 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: embedding + +run_on_test: False +save_data: False + +no_over_fields: [] + +fields: + distances: + use_abs: False + normalize: True + + raw_distance: + use_abs: False + normalize: True + + raw_flow: + use_abs: True + normalize: True + + point_offset: + use_abs: True + bounds: [0.0, 0.25] + + spatial_flow: + use_abs: True + bounds: [0.0, 1.0] diff --git a/conf/experiment/visualizers/embedding/points.yaml b/conf/experiment/visualizers/embedding/points.yaml new file mode 100644 index 0000000..74ca0d2 --- /dev/null +++ b/conf/experiment/visualizers/embedding/points.yaml @@ -0,0 +1,20 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: embedding + +run_on_test: False +save_data: False + +no_over_fields: [] + +fields: + points: + use_abs: False + bounds: [-2.0, 2.0] diff --git a/conf/experiment/visualizers/epipolar/default.yaml b/conf/experiment/visualizers/epipolar/default.yaml new file mode 100644 index 0000000..c39f50b --- /dev/null +++ b/conf/experiment/visualizers/epipolar/default.yaml @@ -0,0 +1,19 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: epipolar +t: +v: +H: + +st_scale: +uv_scale: + +near: +far: diff --git a/conf/experiment/visualizers/focus/default.yaml b/conf/experiment/visualizers/focus/default.yaml new file mode 100644 index 0000000..d3863d5 --- /dev/null +++ b/conf/experiment/visualizers/focus/default.yaml @@ -0,0 +1,23 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: focus + +s: +t: + +ds: 200.0 +dt: 200.0 + +st_scale: +uv_scale: + +near: -1.0 +far: 0.0 +focal: 0.0 diff --git a/conf/experiment/visualizers/tensor/default.yaml b/conf/experiment/visualizers/tensor/default.yaml new file mode 100644 index 0000000..2732af6 --- /dev/null +++ b/conf/experiment/visualizers/tensor/default.yaml @@ -0,0 +1,10 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: tensor diff --git a/datasets/__init__.py b/datasets/__init__.py new file mode 100644 index 0000000..fffdc45 --- /dev/null +++ b/datasets/__init__.py @@ -0,0 +1,51 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +from .blender import BlenderDataset, BlenderLightfieldDataset, DenseBlenderDataset +from .catacaustics import CatacausticsDataset +from .donerf import DONeRFDataset +from .eikonal import EikonalDataset +from .fourier import FourierDataset, FourierLightfieldDataset +from .immersive import ImmersiveDataset +from .llff import DenseLLFFDataset, LLFFDataset +from .neural_3d import Neural3DVideoDataset +from .random import RandomPixelDataset, RandomRayDataset, RandomRayLightfieldDataset, RandomViewSubsetDataset +from .shiny import DenseShinyDataset, ShinyDataset +from .spaces import SpacesDataset +from .stanford import StanfordEPIDataset, StanfordLightfieldDataset, StanfordLLFFDataset +from .technicolor import TechnicolorDataset +from .video3d_ground_truth import Video3DTimeGroundTruthDataset +from .video3d_static import Video3DDataset +from .video3d_time import Video3DTimeDataset + +dataset_dict = { + "fourier": FourierDataset, + "fourier_lightfield": FourierLightfieldDataset, + "random_ray": RandomRayDataset, + "random_pixel": RandomPixelDataset, + "random_lightfield": RandomRayLightfieldDataset, + "random_view": RandomViewSubsetDataset, + "donerf": DONeRFDataset, + "blender": BlenderDataset, + "dense_blender": DenseBlenderDataset, + "llff": LLFFDataset, + "eikonal": EikonalDataset, + "dense_llff": DenseLLFFDataset, + "dense_shiny": DenseShinyDataset, + "shiny": ShinyDataset, + "blender_lightfield": BlenderLightfieldDataset, + "stanford": StanfordLightfieldDataset, + "stanford_llff": StanfordLLFFDataset, + "stanford_epi": StanfordEPIDataset, + "video3d": Video3DDataset, + "video3d_time": Video3DTimeDataset, + "video3d_time_ground_truth": Video3DTimeGroundTruthDataset, + "technicolor": TechnicolorDataset, + "neural_3d": Neural3DVideoDataset, + "catacaustics": CatacausticsDataset, + "immersive": ImmersiveDataset, + "spaces": SpacesDataset, +} diff --git a/datasets/base.py b/datasets/base.py new file mode 100644 index 0000000..f514dcc --- /dev/null +++ b/datasets/base.py @@ -0,0 +1,526 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +import copy +import os +import pdb + +import numpy as np +import torch +from iopath.common.file_io import NativePathHandler, PathManager +from omegaconf import OmegaConf # @manual //github/third-party/omry/omegaconf:omegaconf +from torch.utils.data import Dataset +from torchvision import transforms as T + +from utils.pose_utils import create_spherical_poses, create_spiral_poses, interpolate_poses +from utils.ray_utils import get_ndc_rays_fx_fy, get_ray_directions_K, get_rays + + +class BaseDataset(Dataset): + def __init__(self, cfg, split="train", **kwargs): + + ## Settings ## + self.chunks = None + # Path manager + self.pmgr = PathManager() + self.pmgr.register_handler(NativePathHandler()) + + # Copy train dataset config + if "train_dataset" in kwargs: + base_dataset_cfg = copy.deepcopy(kwargs["train_dataset"].cfg.dataset) + OmegaConf.set_struct(base_dataset_cfg, False) + + for key in cfg.dataset.keys(): + base_dataset_cfg.__dict__[key] = cfg.dataset[key] + setattr(base_dataset_cfg, key, cfg.dataset[key]) + + cfg.dataset = base_dataset_cfg + + ## Dataset cfg + self.cfg = cfg + self.split = getattr(cfg.dataset, "split", split) + self.dataset_cfg = getattr(cfg.dataset, self.split, cfg.dataset) + + # Basic dataset params + self.root_dir = os.path.expanduser(self.dataset_cfg.root_dir) + + if "img_wh" in self.dataset_cfg and ( + not isinstance(self.dataset_cfg.img_wh, str) and self.dataset_cfg.img_wh is not None + ): + self._img_wh = tuple(self.dataset_cfg.img_wh) + self.img_wh = self._img_wh + self.aspect = float(self.img_wh[0]) / self.img_wh[1] + self.downsample = 1 + else: + self.img_wh = None + self.downsample = getattr(self.dataset_cfg, "downsample", 1) + + self.centered_pixels = getattr(self.dataset_cfg, "centered_pixels", False) + + # Rendering + self.render_supersample = self.dataset_cfg.render_params.supersample + self.render_crop = self.dataset_cfg.render_params.crop + + # Validation + self.val_num = self.dataset_cfg.val_num + self.val_skip = self.dataset_cfg.val_skip + self.val_set = self.dataset_cfg.val_set if "val_set" in self.dataset_cfg else [] + self.val_crop = self.dataset_cfg.val_crop if "val_crop" in self.dataset_cfg else 1.0 + self.val_all = (self.dataset_cfg.val_all if "val_all" in self.dataset_cfg else False) or ( + kwargs["val_all"] if "val_all" in kwargs else False + ) + + # Crop + self.precrop_iters = self.dataset_cfg.precrop_iters if "precrop_iters" in self.dataset_cfg else 0 + self.use_crop = self.precrop_iters > 0 + self.cur_iter = 0 + self.precrop_frac = self.dataset_cfg.precrop_frac if "precrop_fac" in self.dataset_cfg else 0.5 + + # Patch loading + self.use_patches = self.dataset_cfg.use_patches if "use_patches" in self.dataset_cfg else False + self.use_one_image = self.dataset_cfg.use_one_image if "use_one_image" in self.dataset_cfg else False + self.use_full_image = ( + self.dataset_cfg.use_full_image if "use_full_image" in self.dataset_cfg else self.use_one_image + ) + self.blur_radius = self.dataset_cfg.blur_radius if "blur_radius" in self.dataset_cfg else 0 + + ## Set-up data ## + + self.define_transforms() + self.prepare_data() + + def read_meta(self): + pass + + def prepare_train_data(self): + self.num_images = len(self.image_paths) + + ## Collect training data + self.all_coords = [] + self.all_rgb = [] + + for idx in range(len(self.image_paths)): + # coords + self.all_coords += [self.get_coords(idx)] + + # Color + self.all_rgb += [self.get_rgb(idx)] + + self.update_all_data(torch.cat(self.all_coords, 0), torch.cat(self.all_rgb, 0)) + + def update_all_data(self, coords, rgb): + self.all_coords = coords + self.all_rgb = rgb + self.all_weights = self.get_weights() + + ## Patches + if self.use_patches or self.use_crop: + self._all_coords = torch.clone(self.all_coords) + self._all_rgb = torch.clone(self.all_rgb) + + ## All inputs + self.all_inputs = torch.cat([self.all_coords, self.all_rgb, self.all_weights], -1) + + def prepare_val_data(self): + self.prepare_test_data() + + def prepare_test_data(self): + pass + + def prepare_render_data(self): + pass + + def shift_chunk(self): + return 0 + + def prepare_data(self): + self.read_meta() + + if self.split == "train": + if self.cfg.params.render_only or self.cfg.params.test_only: + self.all_inputs = [0] + self.all_inputs_sam = [0] + else: + self.prepare_train_data() + elif self.split == "val": + self.prepare_val_data() + elif self.split == "test": + self.prepare_test_data() + elif self.split == "render": + self.prepare_render_data() + + def define_transforms(self): + if self.blur_radius > 0: + self.transform = T.Compose( + [ + T.ToTensor(), + T.GaussianBlur((self.blur_radius * 2 + 1, self.blur_radius * 2 + 1), self.blur_radius / 3.0), + ] + ) + else: + self.transform = T.ToTensor() + + def scale(self, scale): + self.img_wh = (self._img_wh[0] // scale, self._img_wh[1] // scale) + self.aspect = float(self.img_wh[0]) / self.img_wh[1] + + self.define_transforms() + self.prepare_data() + + def get_coords(self, idx): + pass + + def get_weights(self, device="cpu"): + return torch.ones(*self.all_coords[..., 0:1].shape, device=device) + + def get_rgb(self, idx): + pass + + def get_closest_rgb(self, query_pose): + pass + + def shuffle(self): + if not self.use_patches: + # Get permutation + if self.use_full_image: + self.all_coords = self.all_coords.view(-1, self.img_wh[0] * self.img_wh[1], 6) + self.all_rgb = self.all_rgb.view(-1, self.img_wh[0] * self.img_wh[1], 3) + + perm = torch.tensor(np.random.permutation(self.all_coords.shape[0])) + else: + perm = torch.tensor(np.random.permutation(len(self))) + + # Shuffle + self.all_coords = self.all_coords[perm].view(-1, 6) + self.all_rgb = self.all_rgb[perm].view(-1, 3) + else: + self.shuffle_patches() + + # Weights and inputs + self.all_weights = self.get_weights() + self.all_inputs = torch.cat([self.all_coords, self.all_rgb, self.all_weights], -1) + + def __len__(self): + if self.split == "train": + if self.cfg.params.render_only or self.cfg.params.test_only: + return 1 + else: + return len(self.all_coords) + elif self.split == "val": + return min(self.val_num, len(self.poses)) + elif self.split == "render": + if self.render_max_frames > 0: + return min(self.render_max_frames, len(self.poses)) + else: + return len(self.poses) + else: + return len(self.poses) + + def get_one_image_batch(self, idx, batch_size, device="cuda"): + return None + + def __getitem__(self, idx): + if self.split == "render": + batch = {"coords": self.get_coords(idx), "pose": self.poses[idx], "idx": idx} + + batch["weight"] = torch.ones_like(batch["coords"][..., -1:]) + + elif self.split == "val" or self.split == "test": + batch = {"coords": self.get_coords(idx), "rgb": self.get_rgb(idx), "idx": idx} + + batch["weight"] = torch.ones_like(batch["coords"][..., -1:]) + else: + batch = { + "inputs": self.all_inputs[idx], + } + + W, H, batch = self.crop_batch(batch) + batch["W"] = W + batch["H"] = H + + return batch + + def format_batch(self, batch): + batch["coords"] = batch["inputs"][..., : self.all_coords.shape[-1]] + batch["rgb"] = batch["inputs"][..., self.all_coords.shape[-1] : self.all_coords.shape[-1] + 3] + batch["weight"] = batch["inputs"][..., -1:] + del batch["inputs"] + + return batch + + def get_batch(self, batch_idx, batch_size, jitter=None): + batch_start = batch_idx * batch_size + batch_end = (batch_idx + 1) * batch_size + return self[batch_start:batch_end] + + def crop_all_tensors( + self, + t, + W, + H, + dW, + dH, + ): + t = t.view(self.num_images, H, W, -1) + + t = t[ + :, + (H // 2 - dH) : (H // 2 + dH + 1), + (W // 2 - dW) : (W // 2 + dW + 1), + ] + + return t.reshape(-1, t.shape[-1]) + + def crop_one_tensor( + self, + t, + W, + H, + dW, + dH, + ): + t = t.view(1, H, W, -1) + + t = t[ + :, + (H // 2 - dH) : (H // 2 + dH + 1), + (W // 2 - dW) : (W // 2 + dW + 1), + ] + + H, W = t.shape[1], t.shape[2] + + return W, H, t.reshape(-1, t.shape[-1]) + + def crop(self): + if self.use_crop and self.cur_iter < self.precrop_iters: + W = self.img_wh[0] + H = self.img_wh[1] + dW = int(W // 2 * self.precrop_frac) + dH = int(H // 2 * self.precrop_frac) + + self.all_coords = self.crop_all_tensors(self._all_coords, W, H, dW, dH) + self.all_rgb = self.crop_all_tensors(self._all_rgb, W, H, dW, dH) + + def crop_batch(self, batch): + W = self.img_wh[0] + H = self.img_wh[1] + + if self.split == "val" or self.split == "test": + crop = self.val_crop + elif self.split == "render": + crop = self.render_crop + else: + crop = 1.0 + + if crop < 1.0: + dW = int(W // 2 * crop) + dH = int(H // 2 * crop) + + for k in batch.keys(): + if torch.is_tensor(batch[k]): + temp_W, temp_H, batch[k] = self.crop_one_tensor(batch[k], W, H, dW, dH) + + W, H = temp_W, temp_H + + return W, H, batch + + def patchify_tensor( + self, + t, + width, + height, + patch_offset, + patch_width, + ): + c = t.shape[-1] + t = t.view(self.num_images, height, width, c) + + # Remove boundaries + p = self.blur_radius + + if p > 0: + t = t[:, p:-p, p:-p] + + # Patch offset + t = t[:, patch_offset:, patch_offset:] + + # Crop to multiple of patch width + round_height = (t.shape[1] // patch_width) * patch_width + round_width = (t.shape[2] // patch_width) * patch_width + t = t[:, :round_height, :round_width] + + t = t.reshape( + t.shape[0], round_height // patch_width, patch_width, round_width // patch_width, patch_width, c + ).permute(0, 1, 3, 2, 4, 5) + + return t.reshape(-1, patch_width * patch_width, c) + + def shuffle_patches(self): + print("Shuffle patches") + + # Patchify + patch_width = self.dataset_cfg.patch_width + width, height = self.img_wh[0], self.img_wh[1] + patch_offset = int(np.random.uniform() * patch_width) + + self.all_coords = self.patchify_tensor(self._all_coords, width, height, patch_offset, patch_width) + + self.all_rgb = self.patchify_tensor(self._all_rgb, width, height, patch_offset, patch_width) + + # Shuffle + perm = torch.tensor(np.random.permutation(self.all_coords.shape[0])) + + self.all_coords = self.all_coords[perm].reshape(-1, self.all_coords.shape[-1]) + self.all_rgb = self.all_rgb[perm].reshape(-1, self.all_rgb.shape[-1]) + + +class Base5DDataset(BaseDataset): + def __init__(self, cfg, split="train", **kwargs): + + # Rendering + self.render_spherical = cfg.dataset.spherical_poses if "spherical_poses" in cfg else False + self.render_interpolate = cfg.dataset.render_params.interpolate + self.render_max_frames = ( + cfg.dataset.render_params.max_frames if "max_frames" in cfg.dataset.render_params else 0 + ) + self.use_ndc = cfg.dataset.use_ndc if "use_ndc" in cfg.dataset else False + self.include_world = cfg.dataset.include_world if "include_world" in cfg.dataset else False + + super().__init__(cfg, split, **kwargs) + + def prepare_render_data(self): + if self.render_spherical: + self.poses = create_spherical_poses(self.bounds.max()) + + if not self.render_interpolate: + close_depth, inf_depth = self.bounds.min() * 0.9, self.bounds.max() * 5.0 + dt = 0.75 + mean_dz = 1.0 / (((1.0 - dt) / close_depth + dt / inf_depth)) + focus_depth = mean_dz + + radii = np.percentile(np.abs(self.poses[..., 3]), 90, axis=0) + self.poses = create_spiral_poses(self.poses, radii, focus_depth) + else: + self.poses = interpolate_poses(self.poses, self.render_supersample) + + def get_intrinsics_screen_space(self): + K = np.copy(self.get_intrinsics()) + K[0, 2] = K[0, 2] - self.img_wh[0] / 2 + K[1, 2] = K[1, 2] - self.img_wh[1] / 2 + K[0, :] = 2 * K[0, :] / self.img_wh[0] + K[1, :] = -2 * K[1, :] / self.img_wh[1] + return K + + def get_intrinsics(self): + pass + + def get_closest_rgb(self, query_pose): + W = self.img_wh[0] + H = self.img_wh[1] + + images = self.all_rgb.view(self.num_images, H, W, -1) + dists = np.linalg.norm(self.poses[:, :3, -1] - query_pose[None, :3, -1], axis=-1) + return images[list(np.argsort(dists))[0]] + + def get_coords_from_camera(self, pose, time, cam_idx, K, W, H, device="cuda"): + # Directions + directions = get_ray_directions_K(H, W, K, centered_pixels=True, device=device) + + # Pose + c2w = torch.FloatTensor(pose[:3, :4]).to(device) + + # Rays + rays_o, rays_d = get_rays(directions, c2w) + rays = torch.cat([rays_o, rays_d], dim=-1) + + # To NDC + if self.use_ndc: + rays = self.to_ndc(rays) + + # Add camera idx + rays = torch.cat([rays, torch.ones_like(rays[..., :1]) * cam_idx], dim=-1) + + # Add times + rays = torch.cat([rays, torch.ones_like(rays[..., :1]) * time], dim=-1) + + # Add camera idx + return rays + + +class Base6DDataset(Base5DDataset): + def __init__(self, cfg, split="train", **kwargs): + + self.render_interpolate_time = ( + cfg.dataset.render_params.interpolate_time if "interpolate_time" in cfg.dataset.render_params else False + ) + + super().__init__(cfg, split, **kwargs) + + def prepare_render_data(self): + if self.render_spherical: + self.poses = create_spherical_poses(self.bounds.max()) + + if not self.render_interpolate: + close_depth, inf_depth = self.bounds.min() * 0.9, self.bounds.max() * 5.0 + dt = 0.75 + mean_dz = 1.0 / (((1.0 - dt) / close_depth + dt / inf_depth)) + focus_depth = mean_dz + + radii = np.percentile(np.abs(self.poses[..., 3]), 90, axis=0) + self.poses = create_spiral_poses(self.poses, radii, focus_depth) + else: + self.poses = interpolate_poses(self.poses, self.render_supersample) + + # Get times + if (self.num_frames - 1) > 0: + self.times = np.linspace(0, self.num_frames - 1, len(self.poses)) + + if not self.render_interpolate_time: + self.times = np.round(self.times) + + self.times = self.times / (self.num_frames - 1) + else: + self.times = [0.0 for p in self.poses] + + def get_closest_rgb(self, query_pose, query_time): + W = self.img_wh[0] + H = self.img_wh[1] + + # Reshape + images = self.all_rgb.view(self.num_frames, -1, H, W, self.all_rgb.shape[-1]) + poses = self.poses.reshape(self.num_frames, -1, self.poses.shape[-2], self.poses.shape[-1]) + + # Get poses at current frame + frame_idx = int(np.round(query_time * (self.num_frames - 1))) + images = images[frame_idx] + poses = poses[frame_idx] + + # Distances + dists = np.linalg.norm(poses[:, :3, -1] - query_pose[None, :3, -1], axis=-1) + + # Closest rgb + return images[list(np.argsort(dists))[0]] + + def __getitem__(self, idx): + if self.split == "render": + batch = {"coords": self.get_coords(idx), "pose": self.poses[idx], "time": self.times[idx], "idx": idx} + + batch["weight"] = torch.ones_like(batch["coords"][..., -1:]) + + elif self.split == "val" or self.split == "test": + batch = {"coords": self.get_coords(idx), "rgb": self.get_rgb(idx), "idx": idx} + + batch["weight"] = torch.ones_like(batch["coords"][..., -1:]) + else: + batch = { + "inputs": self.all_inputs[idx], + } + + W, H, batch = self.crop_batch(batch) + batch["W"] = W + batch["H"] = H + + return batch diff --git a/datasets/blender.py b/datasets/blender.py new file mode 100644 index 0000000..2d15550 --- /dev/null +++ b/datasets/blender.py @@ -0,0 +1,274 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +import json +import os + +import cv2 +import numpy as np +import torch +from PIL import Image + +from utils.ray_utils import get_ray_directions_K, get_rays + +from .base import Base5DDataset +from .lightfield import LightfieldDataset + + +class BlenderLightfieldDataset(LightfieldDataset): + def __init__(self, cfg, split="train", **kwargs): + super().__init__(cfg, split, **kwargs) + + def read_meta(self): + # Read meta + transforms_path = os.path.join(self.root_dir, "transforms.json") + + with self.pmgr.open(transforms_path, "r") as f: + self.meta = json.load(f) + + # Image paths and pose + self.image_paths = [] + self.poses = [] + + for frame in self.meta["frames"]: + # Image path + image_path = frame["file_path"].split("/")[-1] + self.image_paths += [image_path] + + # Pose + pose = np.array(frame["transform_matrix"])[:3, :4] + self.poses += [pose] + + def get_rgb(self, idx): + image_path = self.image_paths[idx] + + with self.pmgr.open(os.path.join(self.root_dir, f"{image_path}.png"), "rb") as im_file: + img = Image.open(im_file).convert("RGBA") + + img = img.resize(self._img_wh, Image.LANCZOS) + + if self.img_wh[0] != self._img_wh[0] or self.img_wh[1] != self._img_wh[1]: + img = img.resize(self.img_wh, Image.BOX) + + img = self.transform(img) + img = img.view(4, -1).permute(1, 0) + img = img[:, :3] * img[:, -1:] + (1 - img[:, -1:]) + + return img + + +class BlenderDataset(Base5DDataset): + def __init__(self, cfg, split="train", **kwargs): + + super().__init__(cfg, split, **kwargs) + + def read_meta(self): + if self.split == "render": + self.read_meta_for_split("test") + elif self.split == "train": + self.read_meta_for_split("train") + elif self.split == "val": + self.read_meta_for_split("test") + else: + self.read_meta_for_split(self.split) + + def read_meta_for_split(self, split): + with self.pmgr.open(os.path.join(self.root_dir, f"transforms_{split}.json"), "r") as f: + self.meta = json.load(f) + + if split == "val": + self.meta["frames"] = self.meta["frames"][: self.val_num] + + W, H = self.img_wh + + self.focal = 0.5 * 800 / np.tan(0.5 * self.meta["camera_angle_x"]) + self.focal *= self.img_wh[0] / 800 + + self.K = np.eye(3) + self.K[0, 0] = self.focal + self.K[0, 2] = W / 2.0 + self.K[1, 1] = self.focal + self.K[1, 2] = H / 2.0 + + # Bounds, common for all scenes + self.near = 2.0 + self.far = 6.0 + self.bounds = np.array([self.near, self.far]) + self.depth_range = np.array([self.near, self.far]) + + # Ray directions for all pixels, same for all images (same H, W, focal) + self.centered_pixels = True + self.directions = get_ray_directions_K(H, W, self.K, centered_pixels=True) + + # Image paths and pose + self.image_paths = [] + self.poses = [] + + for frame in self.meta["frames"]: + # Image path + self.image_paths += [frame["file_path"]] + + # Pose + pose = np.array(frame["transform_matrix"])[:3, :4] + self.poses += [pose] + + self.poses = np.stack(self.poses, axis=0) + + def prepare_render_data(self): + self.prepare_test_data() + + def get_coords(self, idx): + c2w = torch.FloatTensor(self.poses[idx]) + rays_o, rays_d = get_rays(self.directions, c2w) + return torch.cat([rays_o, rays_d], dim=-1) + + def get_rgb(self, idx): + image_path = self.image_paths[idx] + + with self.pmgr.open(os.path.join(self.root_dir, f"{image_path}.png"), "rb") as im_file: + img = Image.open(im_file).convert("RGBA") + + img = img.resize(self._img_wh, Image.LANCZOS) + + if self.img_wh[0] != self._img_wh[0] or self.img_wh[1] != self._img_wh[1]: + img = img.resize(self.img_wh, Image.BOX) + + img = self.transform(img) + img = img.view(4, -1).permute(1, 0) + img = img[:, :3] * img[:, -1:] + (1 - img[:, -1:]) + + return img + + def get_intrinsics(self): + K = np.eye(3) + K[0, 0] = self.focal + K[0, 2] = self.img_wh[0] / 2 + K[1, 1] = self.focal + K[1, 2] = self.img_wh[1] / 2 + + return K + + +class DenseBlenderDataset(Base5DDataset): + def __init__(self, cfg, split="train", **kwargs): + super().__init__(cfg, split, **kwargs) + + def read_meta(self): + if self.split == "render": + self.read_meta_for_split("test") + elif self.split == "train": + self.read_meta_for_split("test") + elif self.split == "val": + self.read_meta_for_split("test") + else: + self.read_meta_for_split(self.split) + + def read_meta_for_split(self, split): + with self.pmgr.open(os.path.join(self.root_dir, f"transforms_{split}.json"), "r") as f: + self.meta = json.load(f) + + W, H = self.img_wh + + self.focal = 0.5 * 800 / np.tan(0.5 * self.meta["camera_angle_x"]) + self.focal *= self.img_wh[0] / 800 + + self.K = np.eye(3) + self.K[0, 0] = self.focal + self.K[0, 2] = W / 2.0 + self.K[1, 1] = self.focal + self.K[1, 2] = H / 2.0 + + # Bounds, common for all scenes + self.near = 2.0 + self.far = 6.0 + self.bounds = np.array([self.near, self.far]) + self.depth_range = np.array([self.near, self.far]) + + # Ray directions for all pixels, same for all images (same H, W, focal) + self.centered_pixels = True + self.directions = get_ray_directions_K(H, W, self.K, centered_pixels=self.centered_pixels) + + # Image paths and pose + self.image_paths = [] + self.poses = [] + + for frame in self.meta["frames"]: + # Image path + self.image_paths += [frame["file_path"]] + + # Pose + pose = np.array(frame["transform_matrix"])[:3, :4] + self.poses += [pose] + + self.poses = np.stack(self.poses, axis=0) + + ## Holdout validation images + if self.val_set == "lightfield": + step = self.dataset_cfg.lightfield_step + rows = self.dataset_cfg.lightfield_rows + cols = self.dataset_cfg.lightfield_cols + val_indices = [] + + for row in range(0, rows, 1): + for col in range(0, cols, 1): + idx = row * cols + col + + if row % step != 0 or col % step != 0: + val_indices.append(idx) + + elif len(self.val_set) > 0: + val_indices = self.val_set + elif self.val_skip != "inf": + self.val_skip = min(len(self.image_paths), self.val_skip) + val_indices = list(range(0, len(self.image_paths), self.val_skip)) + else: + val_indices = [] + + train_indices = [i for i in range(len(self.image_paths)) if i not in val_indices] + + if self.val_all: + val_indices = [i for i in train_indices] # noqa + + if self.split == "val" or self.split == "test": + self.image_paths = [self.image_paths[i] for i in val_indices] + self.poses = self.poses[val_indices] + elif self.split == "train": + self.image_paths = [self.image_paths[i] for i in train_indices] + self.poses = self.poses[train_indices] + + def prepare_render_data(self): + self.prepare_test_data() + + def get_coords(self, idx): + c2w = torch.FloatTensor(self.poses[idx]) + rays_o, rays_d = get_rays(self.directions, c2w) + return torch.cat([rays_o, rays_d], dim=-1) + + def get_rgb(self, idx): + image_path = self.image_paths[idx] + + with self.pmgr.open(os.path.join(self.root_dir, f"{image_path}.png"), "rb") as im_file: + img = Image.open(im_file).convert("RGBA") + + img = img.resize(self._img_wh, Image.LANCZOS) + + if self.img_wh[0] != self._img_wh[0] or self.img_wh[1] != self._img_wh[1]: + img = img.resize(self.img_wh, Image.BOX) + + img = self.transform(img) + img = img.view(4, -1).permute(1, 0) + img = img[:, :3] * img[:, -1:] + (1 - img[:, -1:]) + + return img + + def get_intrinsics(self): + K = np.eye(3) + K[0, 0] = self.focal + K[0, 2] = self.img_wh[0] / 2 + K[1, 1] = self.focal + K[1, 2] = self.img_wh[1] / 2 + + return K diff --git a/datasets/catacaustics.py b/datasets/catacaustics.py new file mode 100644 index 0000000..74708ac --- /dev/null +++ b/datasets/catacaustics.py @@ -0,0 +1,321 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +import os +import sys +from pathlib import Path + +import numpy as np +import torch +from PIL import Image + +from utils.intersect_utils import intersect_axis_plane +from utils.pose_utils import center_poses_with, correct_poses_bounds, create_spiral_poses, interpolate_poses +from utils.ray_utils import get_ndc_rays_fx_fy, get_ray_directions_K, get_rays + +from .llff import LLFFDataset + +# import open3d as o3d + + +def readBundleFolder(cameras_folder, W, H, extension=".png", name_ints=8): + poses = [] + intrinsics = [] + image_paths = [] + + with open(os.path.join(cameras_folder, "bundle.out")) as bundle_file: + # First line is a comment + _ = bundle_file.readline() + num_cameras, _ = [int(x) for x in bundle_file.readline().split()] + + for idx in range(num_cameras): + cam_name = "{num:0{width}}".format(num=idx, width=name_ints) + extension + focal, dist0, dist1 = [float(x) for x in bundle_file.readline().split()] + + # Rotation + R = [] + + for i in range(3): + R.append([float(x) for x in bundle_file.readline().split()]) + + R = np.array(R).reshape(3, 3) + + # Translation + T = [float(x) for x in bundle_file.readline().split()] + T = np.array(T) + + # Pose + pose = np.eye(4) + pose[:3, :3] = R + pose[:3, -1] = T + # pose[:3, :3] = R.T + # pose[:3, -1] = -R.T @ T.T + pose = np.linalg.inv(pose) + + pose_pre = np.eye(4) + # pose_pre[1, 1] *= -1 + # pose_pre[2, 2] *= -1 + + pose = pose_pre @ pose @ pose_pre + + poses.append(pose[:3]) + + # Intrinsics + image_path = os.path.join(cameras_folder, cam_name) + image_name = Path(cam_name).stem + image = Image.open(image_path) + + K = np.eye(3) + K[0, 0] = focal * W / float(image.size[0]) + K[0, 2] = W / 2.0 + K[1, 1] = focal * H / float(image.size[1]) + K[1, 2] = H / 2.0 + intrinsics.append(K) + + # TODO: + # 1) Poses + # 2) Intrinsics + # 3) Model settings + + # Image + image_path = os.path.join(cameras_folder, cam_name) + image_paths.append(image_path) + + return np.stack(poses, 0), np.stack(intrinsics, 0), image_paths + + +class CatacausticsDataset(LLFFDataset): + def __init__(self, cfg, split="train", **kwargs): + self.use_reference = cfg.dataset.use_reference if "use_reference" in cfg.dataset else False + self.correct_poses = cfg.dataset.correct_poses if "correct_poses" in cfg.dataset else False + self.use_ndc = cfg.dataset.use_ndc if "use_ndc" in cfg.dataset else False + + super().__init__(cfg, split, **kwargs) + + def read_meta(self): + self.train_cameras_folder = os.path.join(self.root_dir, "cropped_train_cameras") + self.validation_cameras_folder = os.path.join(self.root_dir, "validation_cameras") + self.test_cameras_folder = os.path.join(self.root_dir, "test_path_cameras") + + train_poses, train_intrinsics, train_image_paths = readBundleFolder( + self.train_cameras_folder, self.img_wh[0], self.img_wh[1] + ) + validation_poses, validation_intrinsics, validation_image_paths = readBundleFolder( + self.validation_cameras_folder, self.img_wh[0], self.img_wh[1], name_ints=5 + ) + test_poses, test_intrinsics, test_image_paths = readBundleFolder( + self.test_cameras_folder, self.img_wh[0], self.img_wh[1], name_ints=5 + ) + + self.poses_dict = { + "train": train_poses, + "render": test_poses, + "val": validation_poses, + "test": test_poses, + } + self.poses = np.stack(self.poses_dict[self.split], 0) + + self.intrinsics_dict = { + "train": train_intrinsics, + "render": test_intrinsics, + "val": validation_intrinsics, + "test": test_intrinsics, + } + self.intrinsics = np.stack(self.intrinsics_dict[self.split], 0) + self.K = self.intrinsics_dict["train"][0] + + self.image_paths_dict = { + "train": train_image_paths, + "render": test_image_paths, + "val": validation_image_paths, + "test": test_image_paths, + } + self.image_paths = self.image_paths_dict[self.split] + + # Geometry + print("Reading Point-Cloud...") + + pcd = o3d.io.read_point_cloud(os.path.join(self.root_dir, "meshes", "dense_point_cloud.ply")) + self.bbox_center = np.array(pcd.get_center()) + points = np.array(pcd.points) + + min_dist = np.linalg.norm(points - self.bbox_center[None], axis=-1).min() + max_dist = np.linalg.norm(points - self.bbox_center[None], axis=-1).max() + fac = 8.0 / (min_dist + max_dist) + + min_dist = min_dist * fac + max_dist = max_dist * fac + self.bbox_center = self.bbox_center * fac + self.bbox_min = np.array(pcd.get_min_bound()) * fac - self.bbox_center + self.bbox_max = np.array(pcd.get_max_bound()) * fac - self.bbox_center + + self.depth_range = [min_dist, max_dist] + + # Change poses + self.poses[..., -1] = self.poses[..., -1] * fac - self.bbox_center + + # Bounds + self.near = min_dist + self.far = max_dist + self.bounds = np.array([self.near, self.far]) + + self.near = self.bounds.min() * 0.95 + self.far = self.bounds.max() * 1.05 + + ## Correct poses + # poses = np.copy(self.poses) + # train_poses = np.stack(self.poses_dict["train"]) + + # if self.use_ndc or self.correct_poses: + # self.poses, self.poses_avg = center_poses_with( + # poses, np.stack(self.poses_dict["train"][:1]) + # ) + # train_poses, _ = center_poses_with( + # np.copy(train_poses), np.stack(self.poses_dict["train"][:1]) + # ) + # #self.poses, self.poses_avg = center_poses_with( + # # poses, np.stack(self.poses_dict["train"]) + # #) + # #train_poses, _ = center_poses_with( + # # np.copy(train_poses), np.stack(self.poses_dict["train"]) + # #) + + # sc = np.max(np.abs(train_poses[..., -1])) + # self.poses[..., -1] /= sc + + # filter_idx = np.argwhere(self.poses[..., 2, 2] > 0.75).astype(np.int32).reshape(-1).tolist() + # self.image_paths = [self.image_paths[i] for i in filter_idx] + # self.poses = self.poses[filter_idx] + # self.intrinsics = self.intrinsics[filter_idx] + + def prepare_render_data(self): + self.prepare_test_data() + + def prepare_train_data(self): + self.num_images = len(self.image_paths) + + ## Collect training data + self.all_coords = [] + self.all_rgb = [] + + for idx in range(len(self.image_paths)): + # for idx in range(1): + # coords + self.all_coords += [self.get_coords(idx)] + + # Color + self.all_rgb += [self.get_rgb(idx)] + + # Format / save loaded data + self.update_all_data( + torch.cat(self.all_coords, 0), + torch.cat(self.all_rgb, 0), + ) + + def update_all_data(self, coords, rgb): + self.all_coords = coords + self.all_rgb = rgb + self.all_weights = self.get_weights() + + ## Patches + if self.use_patches or self.use_crop: + self._all_coords = torch.clone(self.all_coords) + self._all_rgb = torch.clone(self.all_rgb) + + ## All inputs + self.all_inputs = torch.cat( + [ + self.all_coords, + self.all_rgb, + self.all_weights, + ], + -1, + ) + + def format_batch(self, batch): + batch["coords"] = batch["inputs"][..., : self.all_coords.shape[-1]] + batch["rgb"] = batch["inputs"][..., self.all_coords.shape[-1] : self.all_coords.shape[-1] + 3] + batch["weight"] = batch["inputs"][..., -1:] + del batch["inputs"] + + return batch + + def to_ndc(self, rays): + return get_ndc_rays_fx_fy(self.img_wh[1], self.img_wh[0], self.K[0, 0], self.K[1, 1], self.near, rays) + + def get_coords(self, idx): + K = torch.FloatTensor(self.intrinsics[idx]) + c2w = torch.FloatTensor(self.poses[idx]) + + directions = get_ray_directions_K(self.img_wh[1], self.img_wh[0], K, centered_pixels=True) + rays_o, rays_d = get_rays(directions, c2w) + + if self.use_ndc: + rays = self.to_ndc(torch.cat([rays_o, rays_d], dim=-1)) + else: + rays = torch.cat([rays_o, rays_d], dim=-1) + + return rays + + def get_rgb(self, idx): + image_path = self.image_paths[idx] + + print(f"Loading image {idx}") + + with self.pmgr.open(os.path.join(self.root_dir, "images", image_path), "rb") as im_file: + img = Image.open(im_file) + img = img.convert("RGBA") + + img = img.resize(self._img_wh) + + if self.img_wh[0] != self._img_wh[0] or self.img_wh[1] != self._img_wh[1]: + img = img.resize(self.img_wh, Image.BOX) + + img = self.transform(img) + img = img.view(4, -1).permute(1, 0) + img = img[:, :3] * img[:, -1:] + (1 - img[:, -1:]) + + return img + + def get_intrinsics(self): + return self.intrinsics + + def __getitem__(self, idx): + if self.split == "render": + batch = { + "coords": self.get_coords(idx), + "pose": self.poses[idx], + "idx": idx, + } + + batch["weight"] = torch.ones_like(batch["coords"][..., -1:]) + + elif self.split == "test": + batch = { + "coords": self.get_coords(idx), + "rgb": self.get_rgb(idx), + "idx": idx, + } + + batch["weight"] = torch.ones_like(batch["coords"][..., -1:]) + elif self.split == "val": + batch = { + "coords": self.get_coords(idx), + "rgb": self.get_rgb(idx), + "idx": idx, + } + + batch["weight"] = torch.ones_like(batch["coords"][..., -1:]) + else: + batch = { + "inputs": self.all_inputs[idx], + } + + W, H, batch = self.crop_batch(batch) + batch["W"] = W + batch["H"] = H + + return batch diff --git a/datasets/donerf.py b/datasets/donerf.py new file mode 100644 index 0000000..aa64414 --- /dev/null +++ b/datasets/donerf.py @@ -0,0 +1,290 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT + +import json +import os + +import cv2 +import numpy as np +import torch +from PIL import Image + +from utils.pose_utils import ( + center_poses_with, + center_poses_with_rotation_only, + correct_poses_bounds, + create_rotating_spiral_poses, + create_spiral_poses, + interpolate_poses, +) +from utils.ray_utils import get_ndc_rays_fx_fy, get_ray_directions_K, get_rays + +from .base import Base5DDataset, Base6DDataset +from .lightfield import LightfieldDataset + + +class DONeRFDataset(Base5DDataset): + def __init__(self, cfg, split="train", **kwargs): + + self.correct_poses = cfg.dataset.correct_poses if "correct_poses" in cfg.dataset else False + self.center_poses = cfg.dataset.center_poses if "center_poses" in cfg.dataset else False + self.use_ndc = cfg.dataset.use_ndc if "use_ndc" in cfg.dataset else False + + super().__init__(cfg, split, **kwargs) + + def read_meta(self): + if self.split == "render": + self.read_meta_for_split("test", "cam_path_pan.json") + elif self.split == "test": + self.read_meta_for_split("test", "transforms_test.json") + elif self.split == "train": + self.read_meta_for_split("train", "transforms_train.json") + elif self.split == "val": + self.read_meta_for_split("val", "transforms_val.json") + else: + self.read_meta_for_split(self.split, "transforms_test.json") + + def load_poses_from_meta(self, meta, dataset_meta): + origin = np.array(dataset_meta["view_cell_center"]) + + # Image paths and pose + image_paths = [] + poses = [] + + for frame in meta["frames"]: + # Image path + if "file_path" in frame: + image_paths += [frame["file_path"]] + else: + image_paths += [None] + + # Pose + pose = np.array(frame["transform_matrix"])[:3, :4] + + if self.center_poses: + pose[:3, -1] = pose[:3, -1] - origin + + poses += [pose] + + poses = np.stack(poses, axis=0) + + return poses, image_paths + + def read_meta_for_split(self, split, split_file): + # Load train meta + with self.pmgr.open(os.path.join(self.root_dir, "transforms_train.json"), "r") as f: + self.train_meta = json.load(f) + + # Load meta + with self.pmgr.open(os.path.join(self.root_dir, split_file), "r") as f: + self.meta = json.load(f) + + if split == "val": + self.meta["frames"] = self.meta["frames"][: self.val_num] + + # Load dataset info + with self.pmgr.open(os.path.join(self.root_dir, "dataset_info.json"), "r") as f: + self.dataset_meta = json.load(f) + + W, H = self.img_wh + + self.focal = 0.5 * 800 / np.tan(0.5 * self.dataset_meta["camera_angle_x"]) + self.focal *= self.img_wh[0] / 800 + + self.K = np.eye(3) + self.K[0, 0] = self.focal + self.K[0, 2] = W / 2.0 + self.K[1, 1] = self.focal + self.K[1, 2] = H / 2.0 + + # Bounds, common for all scenes + self.depth_range = self.dataset_meta["depth_range"] + self.near = self.dataset_meta["depth_range"][0] + self.far = self.dataset_meta["depth_range"][1] + # self.depth_range = np.array([self.near * 1.5, self.far]) + + self.view_cell_size = np.max(np.array(self.dataset_meta["view_cell_size"])) + self.bounds = np.array([self.near, self.far]) + + # Image paths and pose + self.train_poses, _ = self.load_poses_from_meta(self.train_meta, self.dataset_meta) + self.poses, self.image_paths = self.load_poses_from_meta(self.meta, self.dataset_meta) + + # Correct + if self.use_ndc or self.correct_poses: + self.poses, _ = center_poses_with_rotation_only(self.poses, self.train_poses) + + if self.dataset_cfg.collection in ["pavillon"] and self.split == "render": + self.poses[..., :3, -1] *= 0.35 + + # Ray directions for all pixels, same for all images (same H, W, focal) + self.centered_pixels = True + self.directions = get_ray_directions_K(H, W, self.K, centered_pixels=self.centered_pixels) + + def prepare_train_data(self): + self.num_images = len(self.image_paths) + + ## Collect training data + self.all_coords = [] + self.all_rgb = [] + self.all_depth = [] + self.all_points = [] + + for idx in range(len(self.image_paths)): + # coords + self.all_coords += [self.get_coords(idx)] + + # Color + self.all_rgb += [self.get_rgb(idx)] + + # Depth + self.all_depth += [self.get_depth(idx)] + + # Points + self.all_points += [self.get_points(idx)] + + self.update_all_data( + torch.cat(self.all_coords, 0), + torch.cat(self.all_rgb, 0), + torch.cat(self.all_depth, 0), + torch.cat(self.all_points, 0), + ) + + # Calculate bounds + mask = self.all_depth != 0.0 + self.bbox_min = self.all_points[mask.repeat(1, 3)].reshape(-1, 3).min(0)[0] + self.bbox_max = self.all_points[mask.repeat(1, 3)].reshape(-1, 3).max(0)[0] + + # self.near = float(self.all_depth[mask].min()) + # self.far = float(self.all_depth[mask].max()) + + def update_all_data(self, coords, rgb, depth, points): + self.all_coords = coords + self.all_rgb = rgb + self.all_depth = depth + self.all_points = points + self.all_weights = self.get_weights() + + ## Patches + if self.use_patches or self.use_crop: + self._all_coords = torch.clone(self.all_coords) + self._all_rgb = torch.clone(self.all_rgb) + self._all_depth = torch.clone(self.all_depth) + + ## All inputs + self.all_inputs = torch.cat([self.all_coords, self.all_rgb, self.all_depth, self.all_weights], -1) + + def format_batch(self, batch): + batch["coords"] = batch["inputs"][..., : self.all_coords.shape[-1]] + batch["rgb"] = batch["inputs"][..., self.all_coords.shape[-1] : self.all_coords.shape[-1] + 3] + batch["depth"] = batch["inputs"][..., self.all_coords.shape[-1] + 3 : self.all_coords.shape[-1] + 4] + batch["weight"] = batch["inputs"][..., -1:] + del batch["inputs"] + + return batch + + def prepare_render_data(self): + self.prepare_test_data() + + def to_ndc(self, rays): + return get_ndc_rays_fx_fy(self.img_wh[1], self.img_wh[0], self.K[0, 0], self.K[1, 1], self.near, rays) + + def get_coords(self, idx): + c2w = torch.FloatTensor(self.poses[idx]) + rays_o, rays_d = get_rays(self.directions, c2w) + + if self.use_ndc: + return self.to_ndc(torch.cat([rays_o, rays_d], dim=-1)) + else: + return torch.cat([rays_o, rays_d], dim=-1) + + def get_rgb(self, idx): + image_path = self.image_paths[idx] + + with self.pmgr.open(os.path.join(self.root_dir, f"{image_path}.png"), "rb") as im_file: + img = np.array(Image.open(im_file).convert("RGBA")) + + img = cv2.resize(img, self._img_wh, interpolation=cv2.INTER_AREA) + + if self.img_wh[0] != self._img_wh[0] or self.img_wh[1] != self._img_wh[1]: + img = cv2.resize(img, self.img_wh, interpolation=cv2.INTER_AREA) + + img = self.transform(img) + img = img.view(4, -1).permute(1, 0) + img = img[:, :3] * img[:, -1:] + (1 - img[:, -1:]) + + return img + + def get_depth(self, idx): + image_path = self.image_paths[idx] + + with self.pmgr.open(os.path.join(self.root_dir, f"{image_path}_depth.npz"), "rb") as depth_file: + with np.load(depth_file) as depth: + img = depth["arr_0"].reshape(800, 800) + + # Resize + img = cv2.resize(img, self._img_wh, interpolation=cv2.INTER_NEAREST) + + if self.img_wh[0] != self._img_wh[0] or self.img_wh[1] != self._img_wh[1]: + img = cv2.resize(img, self.img_wh, interpolation=cv2.INTER_NEAREST) + + # Flip + img = np.flip(img, 0) + + # Transform + img = self.transform(np.copy(img)) + + # Return + depth = img.view(1, -1).permute(1, 0) + directions = torch.nn.functional.normalize(self.directions, p=2.0, dim=-1).view(-1, 3) + depth = depth / torch.abs(directions[..., 2:3]) + + # depth[depth < self.near] = self.near + # depth[depth > self.far] = self.far + depth[depth < self.near] = 0.0 + depth[depth > self.far] = 0.0 + + return depth + + def get_points(self, idx): + rays = self.all_coords[idx][..., :6].reshape(-1, 6) + depth = self.all_depth[idx].reshape(-1, 1) + return rays[..., :3] + rays[..., 3:6] * depth + + def get_intrinsics(self): + K = np.eye(3) + K[0, 0] = self.focal + K[0, 2] = self.img_wh[0] / 2 + K[1, 1] = self.focal + K[1, 2] = self.img_wh[1] / 2 + + return K + + def __getitem__(self, idx): + if self.split == "render": + batch = {"coords": self.get_coords(idx), "pose": self.poses[idx], "idx": idx} + + batch["weight"] = torch.ones_like(batch["coords"][..., -1:]) + + elif self.split == "test": + batch = {"coords": self.get_coords(idx), "rgb": self.get_rgb(idx), "idx": idx} + + batch["weight"] = torch.ones_like(batch["coords"][..., -1:]) + elif self.split == "val": + batch = {"coords": self.get_coords(idx), "rgb": self.get_rgb(idx), "depth": self.get_depth(idx), "idx": idx} + + batch["weight"] = torch.ones_like(batch["coords"][..., -1:]) + else: + batch = { + "inputs": self.all_inputs[idx], + } + + W, H, batch = self.crop_batch(batch) + batch["W"] = W + batch["H"] = H + + return batch diff --git a/datasets/eikonal.py b/datasets/eikonal.py new file mode 100644 index 0000000..ce4b848 --- /dev/null +++ b/datasets/eikonal.py @@ -0,0 +1,151 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +import os + +import numpy as np +import torch +from PIL import Image + +from utils.pose_utils import correct_poses_bounds, create_spiral_poses, interpolate_poses +from utils.ray_utils import get_ndc_rays_fx_fy, get_ray_directions_K, get_rays + +from .base import Base5DDataset + + +class EikonalDataset(Base5DDataset): + def __init__(self, cfg, split="train", **kwargs): + self.num_views = cfg.dataset.num_views if "num_views" in cfg.dataset else -1 + + super().__init__(cfg, split, **kwargs) + + def read_meta(self): + with self.pmgr.open(os.path.join(self.root_dir, "poses_bounds.npy"), "rb") as f: + poses_bounds = np.load(f) + + self.image_paths = sorted(self.pmgr.ls(os.path.join(self.root_dir, "images/"))) + + if self.img_wh is None: + image_path = self.image_paths[0] + + with self.pmgr.open(os.path.join(self.root_dir, "images", image_path), "rb") as im_file: + img = np.array(Image.open(im_file).convert("RGB")) + + self._img_wh = (img.shape[1] // self.downsample, img.shape[0] // self.downsample) + self.img_wh = (img.shape[1] // self.downsample, img.shape[0] // self.downsample) + self.aspect = float(self.img_wh[0]) / self.img_wh[1] + + if self.split in ["train", "val"]: + assert len(poses_bounds) == len( + self.image_paths + ), "Mismatch between number of images and number of poses! Please rerun COLMAP!" + + poses = poses_bounds[:, :15].reshape(-1, 3, 5) + self.bounds = poses_bounds[:, -2:] + + if self.num_views > 0: + poses = poses[: self.num_views] + self.image_paths = self.image_paths[: self.num_views] + + # Step 1: rescale focal length according to training resolution + H, W, self.focal = poses[0, :, -1] + self.cx, self.cy = W / 2.0, H / 2.0 + + self.K = np.eye(3) + self.K[0, 0] = self.focal * self.img_wh[0] / W + self.K[0, 2] = self.cx * self.img_wh[0] / W + self.K[1, 1] = self.focal * self.img_wh[1] / H + self.K[1, 2] = self.cy * self.img_wh[1] / H + + # Step 2: correct poses, bounds + self.poses, self.poses_avg, self.bounds = correct_poses_bounds(poses, self.bounds, center=True) + + if not self.use_ndc: + self.bounds = self.bounds / np.max(np.abs(poses[..., :3, 3])) + self.poses[..., :3, 3] = self.poses[..., :3, 3] / np.max(np.abs(poses[..., :3, 3])) + + self.near = self.bounds.min() + self.far = self.bounds.max() + + # Step 3: Ray directions for all pixels + self.centered_pixels = True + self.directions = get_ray_directions_K( + self.img_wh[1], self.img_wh[0], self.K, centered_pixels=self.centered_pixels + ) + + # Step 4: Holdout validation images + if len(self.val_set) > 0: + val_indices = self.val_set + elif self.val_skip != "inf": + self.val_skip = min(len(self.image_paths), self.val_skip) + val_indices = list(range(0, len(self.image_paths), self.val_skip)) + else: + val_indices = [] + + train_indices = [i for i in range(len(self.image_paths)) if i not in val_indices] + + if self.val_all: + val_indices = [i for i in train_indices] # noqa + + if self.split == "val" or self.split == "test": + self.image_paths = [self.image_paths[i] for i in val_indices] + self.poses = self.poses[val_indices] + elif self.split == "train": + self.image_paths = [self.image_paths[i] for i in train_indices] + self.poses = self.poses[train_indices] + + def get_intrinsics(self): + return self.K + + def to_ndc(self, rays): + return get_ndc_rays_fx_fy(self.img_wh[1], self.img_wh[0], self.K[0, 0], self.K[1, 1], self.near, rays) + + def get_coords(self, idx): + c2w = torch.FloatTensor(self.poses[idx]) + rays_o, rays_d = get_rays(self.directions, c2w) + + if self.use_ndc: + rays = self.to_ndc(torch.cat([rays_o, rays_d], dim=-1)) + + if self.include_world: + rays = torch.cat([rays, rays_o, rays_d], dim=-1) + + return rays + else: + return torch.cat([rays_o, rays_d], dim=-1) + + def get_rgb(self, idx): + # Colors + image_path = self.image_paths[idx] + + with self.pmgr.open(os.path.join(self.root_dir, "images", image_path), "rb") as im_file: + img = Image.open(im_file).convert("RGB") + + img = img.resize(self._img_wh, Image.LANCZOS) + + if self.img_wh[0] != self._img_wh[0] or self.img_wh[1] != self._img_wh[1]: + img = img.resize(self.img_wh, Image.BOX) + + img = self.transform(img) + img = img.view(3, -1).permute(1, 0) + + return img + + def prepare_render_data(self): + if not self.render_interpolate: + close_depth, inf_depth = self.bounds.min() * 0.9, self.bounds.max() * 5.0 + + dt = 0.75 + mean_dz = 1.0 / (((1.0 - dt) / close_depth + dt / inf_depth)) + focus_depth = mean_dz + + radii = np.percentile(np.abs(self.poses[:16, ..., 3]), 50, axis=0) + self.poses = create_spiral_poses(self.poses[:16], radii, focus_depth * 100) + + self.poses = np.stack(self.poses, axis=0) + self.poses[..., :3, 3] = self.poses[..., :3, 3] - 0.1 * close_depth * self.poses[..., :3, 2] + else: + self.poses = interpolate_poses(self.poses, self.render_supersample) diff --git a/datasets/fourier.py b/datasets/fourier.py new file mode 100644 index 0000000..eea868c --- /dev/null +++ b/datasets/fourier.py @@ -0,0 +1,193 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +import numpy as np +import torch +from torch.utils.data import Dataset + +from utils.ray_utils import get_lightfield_rays + + +def fft_rgb(rgb): + return torch.stack( + [ + torch.fft.fft2(rgb[..., 0], norm="ortho"), + torch.fft.fft2(rgb[..., 1], norm="ortho"), + torch.fft.fft2(rgb[..., 2], norm="ortho"), + ], + dim=-1, + ) + + +class FourierDataset(Dataset): + def __init__(self, cfg, train_dataset=None, **kwargs): + super().__init__() + + self.cfg = cfg + self.img_wh = train_dataset.img_wh + self.width, self.height = self.img_wh[0], self.img_wh[1] + self.aspect = train_dataset.aspect + self.num_images = train_dataset.num_images + self.batch_size = cfg.batch_size + + self.all_rays = torch.clone(train_dataset.all_rays) + self.all_rgb = torch.clone(train_dataset.all_rgb) + + # Prepare + self.compute_stats() + self.prepare_data() + self.shuffle() + + def compute_stats(self): + all_rays = self.all_rays.view(self.num_images, self.img_wh[1] * self.img_wh[0], -1) + ray_dim = all_rays.shape[-1] // 2 + + ## Per view statistics + self.all_means = [] + self.all_stds = [] + + for idx in range(self.num_images): + cur_rays = all_rays[idx] + self.all_means += [cur_rays.mean(0)] + self.all_stds += [cur_rays.std(0)] + + self.all_means = torch.stack(self.all_means, 0) + self.all_stds = torch.stack(self.all_stds, 0) + + ## Full dataset statistics + self.pos_mean = self.all_rays[..., :ray_dim].mean(0) + self.pos_std = self.all_rays[..., :ray_dim].std(0) + + self.dir_mean = self.all_rays[..., ray_dim:].mean(0) + self.dir_std = self.all_rays[..., ray_dim:].std(0) + + def prepare_data(self): + self.all_rays = self.all_rays.view(self.num_images, self.img_wh[1], self.img_wh[0], -1) + self.all_rgb = self.all_rgb.view(self.num_images, self.img_wh[1], self.img_wh[0], -1) + self.all_rgb_fft = fft_rgb(self.all_rgb) + self.rgb_fft_mean = self.all_rgb_fft.mean(0) + + def shuffle(self): + idx = list(np.random.choice(np.arange(0, self.num_images), size=self.num_images, replace=False)) + + self.all_rays = self.all_rays[idx] + self.all_rgb = self.all_rgb[idx] + # self.all_rgb_fft = torch.abs(self.all_rgb_fft[idx]) + self.all_rgb_fft = self.all_rgb_fft[idx] + + def __len__(self): + return len(self.all_rays) + + def __getitem__(self, idx): + return { + "rays": self.all_rays[idx], + "rgb": self.all_rgb[idx], + "mean_fft": self.rgb_fft_mean, + } + + def get_random_rays(self, ray_range): + pos_rand = ( + torch.rand( + ( + 1, + 1, + 3, + ) + ) + * 2 + - 1 + ) * ray_range.pos + pos_rand[..., 2] = 0 + + dir_rand = ( + torch.rand( + ( + self.height, + self.width, + 3, + ) + ) + * 2 + - 1 + ) * ray_range.dir + dir_rand[..., 2] = -1 + dir_rand = torch.nn.functional.normalize(dir_rand, p=2.0, dim=-1) + + pos_rand = pos_rand.repeat(self.height, self.width, 1) + + return torch.cat([pos_rand, dir_rand], -1) + + +class FourierLightfieldDataset(Dataset): + def __init__(self, cfg, train_dataset=None, **kwargs): + super().__init__() + + self.cfg = cfg + self.img_wh = train_dataset.img_wh + self.width, self.height = self.img_wh[0], self.img_wh[1] + self.aspect = train_dataset.aspect + self.num_images = train_dataset.num_images + self.batch_size = cfg.batch_size + + self.all_rays = torch.clone(train_dataset.all_rays) + self.all_rgb = torch.clone(train_dataset.all_rgb) + + # Prepare + self.compute_stats() + self.prepare_data() + self.shuffle() + + def compute_stats(self): + all_rays = self.all_rays.view(self.num_images, self.img_wh[1] * self.img_wh[0], -1) + ray_dim = all_rays.shape[-1] // 2 + + ## Per view statistics + self.all_means = [] + self.all_stds = [] + + for idx in range(self.num_images): + cur_rays = all_rays[idx] + self.all_means += [cur_rays.mean(0)] + self.all_stds += [cur_rays.std(0)] + + self.all_means = torch.stack(self.all_means, 0) + self.all_stds = torch.stack(self.all_stds, 0) + + ## Full dataset statistics + self.pos_mean = self.all_rays[..., :ray_dim].mean(0) + self.pos_std = self.all_rays[..., :ray_dim].std(0) + + self.dir_mean = self.all_rays[..., ray_dim:].mean(0) + self.dir_std = self.all_rays[..., ray_dim:].std(0) + + def prepare_data(self): + self.all_rays = self.all_rays.view(self.num_images, self.img_wh[1], self.img_wh[0], -1) + self.all_rgb = self.all_rgb.view(self.num_images, self.img_wh[1], self.img_wh[0], -1) + self.all_rgb_fft = fft_rgb(self.all_rgb) + self.rgb_fft_mean = self.all_rgb_fft.mean(0) + + def shuffle(self): + idx = list(np.random.choice(np.arange(0, self.num_images), size=self.num_images, replace=False)) + + self.all_rays = self.all_rays[idx] + self.all_rgb = self.all_rgb[idx] + # self.all_rgb_fft = torch.abs(self.all_rgb_fft[idx]) + self.all_rgb_fft = self.all_rgb_fft[idx] + + def __len__(self): + return len(self.all_rays) + + def __getitem__(self, idx): + return { + "rays": self.all_rays[idx], + "rgb": self.all_rgb[idx], + "mean_fft": self.rgb_fft_mean, + } + + def get_random_rays(self, ray_range): + pos_rand = (torch.rand((2,)) * 2 - 1) * ray_range.pos + + return get_lightfield_rays(self.width, self.height, pos_rand[0], pos_rand[1], self.aspect) diff --git a/datasets/immersive.py b/datasets/immersive.py new file mode 100644 index 0000000..f251464 --- /dev/null +++ b/datasets/immersive.py @@ -0,0 +1,687 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT + +import csv +import gc +import glob +import json +import os + +os.environ["OPENCV_IO_ENABLE_OPENEXR"] = "1" +import pdb +import random + +import cv2 +import numpy as np +import torch +import torch.nn.functional as F +import torchvision.transforms.functional as TF +from PIL import Image +from scipy.spatial.transform import Rotation +from segment_anything_hq import SamPredictor, sam_model_registry +from torchvision.utils import save_image + +from utils.pose_utils import ( + average_poses, + center_poses_with, + correct_poses_bounds, + create_rotating_spiral_poses, + create_spiral_poses, + interpolate_poses, +) +from utils.ray_utils import ( + get_ndc_rays_fx_fy, + get_pixels_for_image, + get_ray_directions_K, + get_rays, + sample_images_at_xy, +) + +from .base import Base5DDataset, Base6DDataset + + +def random_crop(image, output_size): + """ + Randomly crop an image to the specified output size and return the crop + along with its position. + + Parameters: + - image: A PIL Image or a Tensor image. + - output_size: Tuple or list of (height, width) for the output crop size. + + Returns: + - cropped_image: The cropped image. + - crop_position: The top-left corner position (x, y) of the crop. + """ + image_width, image_height = image.size if isinstance(image, Image.Image) else image.shape[-2:] + crop_height, crop_width = output_size + + if crop_width > image_width or crop_height > image_height: + raise ValueError("Crop size must be smaller than image size.") + + x = random.randint(0, image_width - crop_width) + y = random.randint(0, image_height - crop_height) + + cropped_image = TF.crop(image, x, y, crop_height, crop_width) + crop_position = (x, y) + + return cropped_image, crop_position + + +def perspective_to_fisheye(points, K, radial_distortion): + return cv2.fisheye.undistortPoints( + points[:, None], K, np.array([radial_distortion[0], radial_distortion[1], 0.0, 0.0]).astype(np.float32) + ) + + +class ImmersiveDataset(Base6DDataset): + def __init__(self, cfg, split="train", **kwargs): + self.use_reference = cfg.dataset.use_reference if "use_reference" in cfg.dataset else False + self.correct_poses = cfg.dataset.correct_poses if "correct_poses" in cfg.dataset else False + + self.use_ndc = cfg.dataset.use_ndc if "use_ndc" in cfg.dataset else False + + self.num_frames = cfg.dataset.num_frames if "num_frames" in cfg.dataset else 1 + self.start_frame = cfg.dataset.start_frame if "start_frame" in cfg.dataset else 1 + self.keyframe_step = cfg.dataset.keyframe_step if "keyframe_step" in cfg.dataset else 1 + self.num_keyframes = ( + cfg.dataset.num_keyframes if "num_keyframes" in cfg.dataset else self.num_frames // self.keyframe_step + ) + + self.load_full_step = cfg.dataset.load_full_step if "load_full_step" in cfg.dataset else 1 + self.subsample_keyframe_step = ( + cfg.dataset.subsample_keyframe_step if "subsample_keyframe_step" in cfg.dataset else 1 + ) + self.subsample_keyframe_frac = ( + cfg.dataset.subsample_keyframe_frac if "subsample_keyframe_frac" in cfg.dataset else 1.0 + ) + self.subsample_frac = cfg.dataset.subsample_frac if "subsample_frac" in cfg.dataset else 1.0 + + self.keyframe_offset = 0 + self.frame_offset = 0 + + super().__init__(cfg, split, **kwargs) + + def read_meta(self): + W, H = self.img_wh + + # Load meta + with self.pmgr.open(os.path.join(self.root_dir, "models.json"), "r") as f: + self.meta = json.load(f) + + # Populate vars + self.video_paths = [] + self.intrinsics = [] + self.distortions = [] + self.poses = [] + + for idx, camera in enumerate(self.meta): + + # DEBUGGING + # if idx >= 2: + # break + + # Path + self.video_paths.append(os.path.join(self.root_dir, camera["name"] + ".mp4")) + + # Intrinsics + width_factor = self.img_wh[0] / 2560.0 + height_factor = self.img_wh[1] / 1920.0 + + K = np.eye(3) + K = np.array( + [ + [camera["focal_length"] * width_factor, 0.0, camera["principal_point"][0] * width_factor], + [0.0, camera["focal_length"] * height_factor, camera["principal_point"][1] * height_factor], + [0.0, 0.0, 1.0], + ] + ) + + self.intrinsics.append(K) + + # Distortion + radial_distortion = np.array(camera["radial_distortion"]) + self.distortions.append(radial_distortion[:2]) + + # Pose + R = Rotation.from_rotvec(camera["orientation"]).as_matrix() + T = np.array(camera["position"]) + + pose = np.eye(4) + pose[:3, :3] = R.T + pose[:3, -1] = T + + pose_pre = np.eye(4) + pose_pre[1, 1] *= -1 + pose_pre[2, 2] *= -1 + pose = pose_pre @ pose @ pose_pre + + if camera["name"] == "camera_0001": + val_idx = idx + center_pose = pose[None, :3, :4] + + self.poses.append(pose[:3, :4]) + + self.images_per_frame = len(self.video_paths) + self.total_num_views = len(self.video_paths) + self.intrinsics = np.stack([self.intrinsics for i in range(self.num_frames)]).reshape(-1, 3, 3) + self.distortions = np.stack([self.distortions for i in range(self.num_frames)]).reshape(-1, 2) + self.poses = np.stack([self.poses for i in range(self.num_frames)]).reshape(-1, 3, 4) + self.K = self.intrinsics[0] + + # Times + self.times = np.tile(np.linspace(0, 1, self.num_frames)[..., None], (1, self.images_per_frame)) + self.times = self.times.reshape(-1) + + self.camera_ids = np.tile( + np.linspace(0, self.images_per_frame - 1, self.images_per_frame)[None, :], (self.num_frames, 1) + ) + self.camera_ids = self.camera_ids.reshape(-1) + + ## Bounds, common for all scenes + if self.dataset_cfg.collection in ["01_Welder"]: + self.near = 0.25 + self.far = 6.0 + self.bounds = np.array([self.near, self.far]) + self.depth_range = np.array([self.near * 2.0, self.far]) + + if self.dataset_cfg.collection in ["02_Flames"]: + self.near = 1.0 + self.far = 10.0 + self.bounds = np.array([self.near, self.far]) + self.depth_range = np.array([self.near * 2.0, self.far]) + if self.dataset_cfg.collection in ["04_Truck"]: + self.near = 0.5 + self.far = 10.0 + self.bounds = np.array([self.near, self.far]) + self.depth_range = np.array([self.near * 2.0, self.far]) + elif self.dataset_cfg.collection in ["05_Horse"]: + self.near = 0.5 + self.far = 45.0 + self.bounds = np.array([self.near, self.far]) + self.depth_range = np.array([self.near * 2.0, self.far]) + elif self.dataset_cfg.collection in ["07_Car"]: + self.near = 0.5 + self.far = 50.0 + self.bounds = np.array([self.near, self.far]) + self.depth_range = np.array([self.near * 2.0, self.far]) + elif self.dataset_cfg.collection in ["09_Alexa_Meade_Exhibit"]: + self.near = 0.5 + self.far = 30.0 + self.bounds = np.array([self.near, self.far]) + self.depth_range = np.array([self.near * 2.0, self.far]) + elif self.dataset_cfg.collection in ["10_Alexa_Meade_Face_Paint_1"]: + self.near = 0.25 + self.far = 6.0 + self.bounds = np.array([self.near, self.far]) + self.depth_range = np.array([0.5, self.far]) + elif self.dataset_cfg.collection in ["11_Alexa_Meade_Face_Paint_2"]: + self.near = 0.25 + self.far = 6.0 + self.bounds = np.array([self.near, self.far]) + self.depth_range = np.array([0.5, self.far]) + elif self.dataset_cfg.collection in ["12_Cave"]: + self.near = 0.5 + self.far = 20.0 + self.bounds = np.array([self.near, self.far]) + self.depth_range = np.array([self.near * 2.0, self.far]) + else: + self.near = 0.5 + self.far = 10.0 + self.bounds = np.array([self.near, self.far]) + self.depth_range = np.array([self.near * 2.0, self.far]) + + ## Correct poses, bounds + poses = np.copy(self.poses) + + if self.use_ndc or self.correct_poses: + self.poses, self.poses_avg = center_poses_with(poses, center_pose) + + self.near = self.bounds.min() * 0.95 + self.far = self.bounds.max() * 1.05 + + ## Holdout validation images + val_indices = [] + + if len(self.val_set) > 0: + val_indices += [frame * self.images_per_frame + val_idx for frame in range(self.num_frames)] + + train_indices = [i for i in range(len(self.poses)) if i not in val_indices] + + if self.val_all: + val_indices = [i for i in train_indices] # noqa + + if self.split == "val" or self.split == "test": + if not self.val_all and len(self.val_set) > 0: + self.video_paths = [self.video_paths[val_idx]] + + self.intrinsics = self.intrinsics[val_indices] + self.camera_ids = self.camera_ids[val_indices] + self.distortions = self.distortions[val_indices] + self.poses = self.poses[val_indices] + self.times = self.times[val_indices] + elif self.split == "train": + if not self.val_all and len(self.val_set) > 0: + self.video_paths = [self.video_paths[i] for i in range(len(self.video_paths)) if i != val_idx] + + self.intrinsics = self.intrinsics[train_indices] + self.camera_ids = self.camera_ids[train_indices] + self.distortions = self.distortions[train_indices] + self.poses = self.poses[train_indices] + self.times = self.times[train_indices] + + self.num_images = len(self.poses) + self.images_per_frame = len(self.video_paths) + + def random_subsample(self, coords, rgb, last_rgb, frame, fac=1.0): + if (frame % self.load_full_step) == 0: + return coords, rgb + elif (frame % self.subsample_keyframe_step) == 0: + num_take = int(np.round(coords.shape[0] * self.subsample_keyframe_frac * fac)) + perm = torch.tensor(np.random.permutation(coords.shape[0]))[:num_take] + else: + num_take = int(np.round(coords.shape[0] * self.subsample_frac * fac)) + perm = torch.tensor(np.random.permutation(coords.shape[0]))[:num_take] + + return coords[perm].view(-1, coords.shape[-1]), rgb[perm].view(-1, rgb.shape[-1]) + + def regular_subsample(self, coords, rgb, last_rgb, frame, fac=1.0): + if (frame % self.load_full_step) == 0: + return coords, rgb + elif (frame % self.subsample_keyframe_step) == 0: + subsample_every = int(np.round(1.0 / (self.subsample_keyframe_frac * fac))) + offset = self.keyframe_offset + self.keyframe_offset += 1 + else: + subsample_every = int(np.round(1.0 / (self.subsample_frac * fac))) + offset = self.frame_offset + self.frame_offset += 1 + + pixels = get_pixels_for_image(self.img_wh[1], self.img_wh[0]).reshape(-1, 2).long() + mask = ((pixels[..., 0] + pixels[..., 1] + offset) % subsample_every) == 0.0 + + return coords[mask].view(-1, coords.shape[-1]), rgb[mask].view(-1, rgb.shape[-1]) + + def test_subsample(self, coords, rgb, last_rgb, frame): + mask = coords[..., 5] < -0.25 + return coords[mask].view(-1, coords.shape[-1]), rgb[mask].view(-1, rgb.shape[-1]) + + def importance_subsample(self, coords, rgb, last_rgb, frame, fac=1.0): + if (frame % self.load_full_step) == 0: + return coords, rgb + + diff = torch.abs(rgb - last_rgb).mean(-1) + diff_sorted, _ = torch.sort(diff) + + if (frame % self.subsample_keyframe_step) == 0: + num_take = int(np.round(coords.shape[0] * self.subsample_keyframe_frac * fac)) + else: + num_take = int(np.round(coords.shape[0] * self.subsample_frac * fac)) + + mask = diff > diff_sorted[-num_take] + mask = mask & (coords[..., 5] < -0.05) + + return coords[mask].view(-1, coords.shape[-1]), rgb[mask].view(-1, rgb.shape[-1]) + + def subsample(self, coords, rgb, last_rgb, frame): + # coords, rgb = self.regular_subsample(coords, rgb, last_rgb, frame) + # return coords, rgb + + if (frame % self.load_full_step) == 0: + return coords, rgb + else: + coords, rgb = self.importance_subsample(coords, rgb, last_rgb, frame) + + return coords, rgb + + def prepare_train_data(self): + sam_checkpoint = "pre_trained/sam_hq_vit_h.pth" # Update this path to your checkpoint + model_type = "vit_h" + sam = sam_model_registry[model_type](checkpoint=sam_checkpoint) + sam.to("cuda") + predictor = SamPredictor(sam) + + ## Collect training data + self.all_coords = [] + self.all_rgb = [] + self.all_sam_coords = [] + self.all_sam = [] + num_pixels = 0 + last_rgb_full = None + + for video_idx in range(len(self.video_paths)): + self.keyframe_offset = video_idx + self.frame_offset = video_idx + + # Open video + cam = cv2.VideoCapture(self.video_paths[video_idx]) + # Get coords + video_coords = self.get_coords(video_idx) + + ctr = 0 + frame_idx = 0 + + while ctr < self.start_frame + self.num_frames: + _, frame = cam.read() + if ctr < self.start_frame: + ctr += 1 + continue + else: + ctr += 1 + + cur_time = self.times[frame_idx * self.images_per_frame + video_idx] + cur_frame = int( + np.round(self.times[frame_idx * self.images_per_frame + video_idx] * (self.num_frames - 1)) + ) + + # Coords + cur_coords = torch.cat([video_coords[..., :-1], torch.ones_like(video_coords[..., -1:]) * cur_time], -1) + + # Get RGB + crop_size = 960 + cur_rgb_full, img_cropped, features_coords = self.get_rgb_sam(frame, cur_coords, crop_size) + predictor.set_image((img_cropped * 255).permute(1, 2, 0).numpy().astype(np.uint8)) + features = predictor.features + + features = features / 2 + 0.5 + features = features.squeeze() # torch.Size([256, 64, 64]) + # features = F.interpolate(features.unsqueeze(0), size=(crop_size, crop_size), mode='bilinear').squeeze(0) + features_coords = F.interpolate(features_coords.unsqueeze(0), size=(64, 64), mode="bilinear").squeeze(0) + + # Subsample + if frame_idx == 0: + cur_rgb = cur_rgb_full + else: + cur_coords, cur_rgb = self.subsample(cur_coords, cur_rgb_full, last_rgb_full, cur_frame) + + cur_feature_coords = features_coords.permute(1, 2, 0).reshape(64 * 64, -1) + cur_feature = features.permute(1, 2, 0).reshape(64 * 64, -1).cpu() + + # Save for later + last_rgb_full = cur_rgb_full + self.all_coords += [cur_coords] + self.all_rgb += [cur_rgb] + self.all_sam_coords += [cur_feature_coords] + self.all_sam += [cur_feature] + + # Number of pixels + num_pixels += cur_rgb.shape[0] + + print(f"Video {video_idx} frame {frame_idx}") + print("Full res images loaded:", num_pixels / (self.img_wh[0] * self.img_wh[1])) + + # Increment frame idx + frame_idx += 1 + + cam.release() + del sam + del predictor + + # Format / save loaded data + self.all_coords = torch.cat(self.all_coords, 0) + self.all_rgb = torch.cat(self.all_rgb, 0) + self.all_sam_coords = torch.cat(self.all_sam_coords, 0) + self.all_sam = torch.cat(self.all_sam, 0) + + self.update_all_data() + + def update_all_data(self): + ## All inputs + self.all_inputs = torch.cat( + [ + self.all_coords, + self.all_rgb, + ], + -1, + ) + self.all_inputs_sam = torch.cat( + [ + self.all_sam_coords, + self.all_sam, + ], + -1, + ) + + def format_batch(self, batch): + batch["coords"] = batch["inputs"][..., : self.all_coords.shape[-1]] + batch["rgb"] = batch["inputs"][..., self.all_coords.shape[-1] : self.all_coords.shape[-1] + 3] + batch["coords_sam"] = batch["inputs_sam"][..., : self.all_sam_coords.shape[-1]] + batch["sam"] = batch["inputs_sam"][..., self.all_sam_coords.shape[-1] : self.all_sam_coords.shape[-1] + 256] + del batch["inputs"] + del batch["inputs_sam"] + + return batch + + def prepare_render_data(self): + if os.path.exists(self.cfg.params.input_pose): + with open(self.cfg.params.input_pose, "r") as json_file: + pose_time_list = json.load(json_file) + self.times = [pose_time["time"] for pose_time in pose_time_list] + self.poses = [pose_time["pose"] for pose_time in pose_time_list] + self.num_frames = len(self.times) + + else: + # Get poses + if not self.render_interpolate: + close_depth, inf_depth = self.bounds.min() * 0.9, self.bounds.max() * 5.0 + + dt = 0.75 + mean_dz = 1.0 / (((1.0 - dt) / close_depth + dt / inf_depth)) + focus_depth = mean_dz + + poses_per_frame = self.poses.shape[0] // self.num_frames + poses_one_frame = self.poses[ + (self.num_frames // 2) * poses_per_frame : (self.num_frames // 2 + 1) * poses_per_frame + ] + poses_each_frame = interpolate_poses(self.poses[::poses_per_frame], self.render_supersample) + radii = np.percentile(np.abs(poses_one_frame[..., 3]), 50, axis=0) + radii[..., :-1] *= 1.0 + radii[..., -1] *= 0.05 + + if self.num_frames > 1: + poses = create_spiral_poses( + poses_one_frame, + radii, + focus_depth, + N=self.num_frames * self.render_supersample, + ) + + reference_pose = np.eye(4) + reference_pose[:3, :4] = self.poses[(self.num_frames // 2) * poses_per_frame] + reference_pose = np.linalg.inv(reference_pose) + + for pose_idx in range(len(poses)): + cur_pose = np.eye(4) + cur_pose[:3, :4] = poses[pose_idx] + poses[pose_idx] = poses_each_frame[pose_idx] @ (reference_pose @ cur_pose) + else: + poses = create_spiral_poses(poses_one_frame, radii, focus_depth * 100, N=120) + + self.poses = np.stack(poses, axis=0) + else: + self.poses = interpolate_poses(self.poses, self.render_supersample) + + # Get times + if (self.num_frames - 1) > 0: + self.times = np.linspace(0, self.num_frames - 1, len(self.poses)) + + if not self.render_interpolate_time: + self.times = np.round(self.times) + + self.times = self.times / (self.num_frames - 1) + else: + self.times = [0.0 for p in self.poses] + + def to_ndc(self, rays): + return get_ndc_rays_fx_fy(self.img_wh[1], self.img_wh[0], self.K[0, 0], self.K[1, 1], self.near, rays) + + def get_coords(self, idx): + if self.split != "train" or self.split == "render": + camera_id = 1 + else: + camera_id = self.camera_ids[idx] + + if self.split != "render": + K = torch.FloatTensor(self.intrinsics[idx]) + distortion = self.distortions[idx] + else: + K = torch.FloatTensor(self.intrinsics[0]) + K[0, 0] *= 0.75 + K[1, 1] *= 0.75 + distortion = None + + c2w = torch.FloatTensor(self.poses[idx]) + time = self.times[idx] + + print("Loading time:", np.round(time * (self.num_frames - 1))) + + # Undistort + if distortion is not None: + directions = get_ray_directions_K(self.img_wh[1], self.img_wh[0], K, centered_pixels=True).view(-1, 3) + directions = perspective_to_fisheye( + np.array(directions[..., :2]).astype(np.float32), + np.eye(3).astype(np.float32), + distortion.astype(np.float32), + )[:, 0] + directions = np.concatenate( + [directions[..., 0:1], directions[..., 1:2], -np.ones_like(directions[..., -1:])], -1 + ) + + directions = torch.tensor(directions) + directions = torch.nn.functional.normalize(directions, dim=-1) + else: + directions = get_ray_directions_K(self.img_wh[1], self.img_wh[0], K, centered_pixels=True).view(-1, 3) + + # Convert to world space + rays_o, rays_d = get_rays(directions, c2w) + + # Convert to NDC + if self.use_ndc: + rays = self.to_ndc(torch.cat([rays_o, rays_d], dim=-1)) + else: + rays = torch.cat([rays_o, rays_d], dim=-1) + + # Add camera idx + rays = torch.cat([rays, torch.ones_like(rays[..., :1]) * camera_id], dim=-1) + + # Add times + rays = torch.cat([rays, torch.ones_like(rays[..., :1]) * time], dim=-1) + + # Return + return rays + + def get_rgb(self, img): + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + + if img.shape[0] != self._img_wh[0] or img.shape[1] != self._img_wh[1]: + img = cv2.resize(img, self._img_wh, cv2.INTER_LANCZOS4) + + if self.img_wh[0] != self._img_wh[0] or self.img_wh[1] != self._img_wh[1]: + img = cv2.resize(img, self.img_wh, cv2.INTER_AREA) + + img = self.transform(img) + img = img.view(3, -1).permute(1, 0) + + return img + + def get_rgb_sam(self, img, coords, crop_size): + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + + if img.shape[0] != self._img_wh[0] or img.shape[1] != self._img_wh[1]: + img = cv2.resize(img, self._img_wh, cv2.INTER_LANCZOS4) + + if self.img_wh[0] != self._img_wh[0] or self.img_wh[1] != self._img_wh[1]: + img = cv2.resize(img, self.img_wh, cv2.INTER_AREA) + + img = self.transform(img) + coords = coords.permute(1, 0).view(-1, img.shape[1], img.shape[2]) + + img_coords = torch.cat([img, coords], 0) + + img_coords_cropped, crop_position = random_crop(img_coords, (crop_size, crop_size)) + img_cropped = img_coords_cropped[:3] + coords_cropped = img_coords_cropped[3:] + + img = img.view(3, -1).permute(1, 0) + + return img, img_cropped, coords_cropped + + def get_rgb_one(self, idx): + # Open video + cam = cv2.VideoCapture(self.video_paths[idx % self.images_per_frame]) + + # Get RGB + ctr = 0 + frame_idx = 0 + + while ctr < self.start_frame + self.num_frames: + _, frame = cam.read() + + if ctr < self.start_frame: + ctr += 1 + continue + else: + ctr += 1 + + if frame_idx != (idx // self.images_per_frame): + frame_idx += 1 + continue + else: + rgb = self.get_rgb(frame) + break + + cam.release() + return rgb + + def get_intrinsics(self): + return self.intrinsics + + def __getitem__(self, idx): + if self.split == "render": + batch = { + "coords": self.get_coords(idx), + "pose": self.poses[idx], + "time": self.times[idx], + "idx": idx, + } + + batch["weight"] = torch.ones_like(batch["coords"][..., -1:]) + + elif self.split == "test": + batch = { + "coords": self.get_coords(idx), + "rgb": self.get_rgb_one(idx), + "idx": idx, + } + + batch["weight"] = torch.ones_like(batch["coords"][..., -1:]) + elif self.split == "val": + batch = { + "coords": self.get_coords(idx), + "rgb": self.get_rgb_one(idx), + "idx": idx, + } + + batch["weight"] = torch.ones_like(batch["coords"][..., -1:]) + else: + if isinstance(self.all_inputs_sam, list) or isinstance(self.all_inputs_sam, np.ndarray): + length = len(self.all_inputs_sam) + elif isinstance(self.all_inputs_sam, torch.Tensor): + length = self.all_inputs_sam.size(0) + else: + raise TypeError("Unsupported data type for self.all_inputs_sam") + + batch = { + "inputs": self.all_inputs[idx], + "inputs_sam": self.all_inputs_sam[idx % length], + } + + W, H, batch = self.crop_batch(batch) + batch["W"] = W + batch["H"] = H + + return batch diff --git a/datasets/lightfield.py b/datasets/lightfield.py new file mode 100644 index 0000000..6bbcdfa --- /dev/null +++ b/datasets/lightfield.py @@ -0,0 +1,393 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +import numpy as np +import torch + +from utils.ray_utils import get_lightfield_rays + +from .base import Base5DDataset, BaseDataset + + +class LightfieldDataset(Base5DDataset): + def __init__(self, cfg, split="train", **kwargs): + + ## Dataset cfg + self.cfg = cfg + self.split = getattr(cfg.dataset, "split", split) + self.dataset_cfg = getattr(cfg.dataset, self.split, cfg.dataset) + + ## Param + + # Lightfield params + self.rows = self.dataset_cfg.lightfield.rows + self.cols = self.dataset_cfg.lightfield.cols + self.step = self.dataset_cfg.lightfield.step + + self.start_row = self.dataset_cfg.lightfield.start_row if "start_row" in self.dataset_cfg.lightfield else 0 + self.end_row = self.dataset_cfg.lightfield.end_row if "end_row" in self.dataset_cfg.lightfield else self.rows + + self.start_col = self.dataset_cfg.lightfield.start_col if "start_col" in self.dataset_cfg.lightfield else 0 + self.end_col = self.dataset_cfg.lightfield.end_col if "end_col" in self.dataset_cfg.lightfield else self.cols + + self.st_scale = self.dataset_cfg.lightfield.st_scale if "st_scale" in self.dataset_cfg.lightfield else 1.0 + self.uv_scale = self.dataset_cfg.lightfield.uv_scale if "uv_scale" in self.dataset_cfg.lightfield else 1.0 + + if self.step > 1: + self.num_rows = (self.end_row - self.start_row) // self.step + 1 + self.num_cols = (self.end_col - self.start_col) // self.step + 1 + else: + self.num_rows = (self.end_row - self.start_row) // self.step + self.num_cols = (self.end_col - self.start_col) // self.step + + self.num_images = self.num_rows * self.num_cols + + self.near = 0 + self.far = 1 + self.near_plane = self.dataset_cfg.lightfield.near if "near" in self.dataset_cfg.lightfield else -1.0 + self.far_plane = self.dataset_cfg.lightfield.far if "far" in self.dataset_cfg.lightfield else 0.0 + + # Validation and testing + self.val_all = (self.dataset_cfg.val_all if "val_all" in self.dataset_cfg else False) or self.step == 1 + self.val_pairs = self.dataset_cfg.val_pairs if "val_pairs" in self.dataset_cfg else [] + + if len(self.val_pairs) > 0: + self.val_pairs = list(zip(self.val_pairs[::2], self.val_pairs[1::2])) + self.num_test_images = len(self.val_pairs) + elif self.val_all: + self.num_test_images = (self.end_row - self.start_row) * (self.end_col - self.start_col) + else: + self.num_test_images = (self.end_row - self.start_row) * (self.end_col - self.start_col) - self.num_images + + # Render params + self.disp_row = self.dataset_cfg.lightfield.disp_row + self.supersample = self.dataset_cfg.lightfield.supersample + self.keyframe_step = ( + self.dataset_cfg.lightfield.keyframe_step if "keyframe_step" in self.dataset_cfg.lightfield else -1 + ) + self.keyframe_subsample = ( + self.dataset_cfg.lightfield.keyframe_subsample if "keyframe_subsample" in self.dataset_cfg.lightfield else 1 + ) + + self.render_spiral = ( + self.dataset_cfg.render_params.spiral if "spiral" in self.dataset_cfg.render_params else False + ) + self.render_far = self.dataset_cfg.render_params.far if "far" in self.dataset_cfg.render_params else False + + self.spiral_rad = ( + self.dataset_cfg.render_params.spiral_rad if "spiral_rad" in self.dataset_cfg.render_params else 0.5 + ) + self.uv_downscale = ( + self.dataset_cfg.render_params.uv_downscale if "uv_downscale" in self.dataset_cfg.render_params else 0.0 + ) + + if "vis_st_scale" in self.dataset_cfg.lightfield: + self.vis_st_scale = ( + self.dataset_cfg.lightfield.vis_st_scale + if self.dataset_cfg.lightfield.vis_st_scale is not None + else self.st_scale + ) + else: + self.vis_st_scale = self.st_scale + + if "vis_uv_scale" in self.dataset_cfg.lightfield: + self.vis_uv_scale = ( + self.dataset_cfg.lightfield.vis_uv_scale + if self.dataset_cfg.lightfield.vis_uv_scale is not None + else self.uv_scale + ) + else: + self.vis_uv_scale = self.uv_scale + + super().__init__(cfg, split, val_all=self.val_all, **kwargs) + + self.poses = [self.get_coord(st_idx) for st_idx in self.all_st_idx] + + def read_meta(self): + pass + + def prepare_train_data(self): + self.all_coords = [] + self.all_rgb = [] + self.all_st_idx = [] + + for t_idx in range(self.start_row, self.end_row, self.step): + for s_idx in range(self.start_col, self.end_col, self.step): + if (s_idx, t_idx) in self.val_pairs: + continue + + # Rays + self.all_coords += [self.get_coords(s_idx, t_idx)] + + idx = t_idx * self.cols + s_idx + image_path = self.image_paths[idx] + + print(image_path) + print(self.all_coords[0][0]) + exit() + + # Color + self.all_rgb += [self.get_rgb(s_idx, t_idx)] + + # Random subsample for frames that are not keyframes + # TODO: Re-do every N iterations + if self.keyframe_step != -1 and self.keyframe_subsample != 1: + num_take = self.all_coords[-1].shape[0] // self.keyframe_subsample + + if (s_idx % self.keyframe_step != 0) or (t_idx % self.keyframe_step != 0): + perm = torch.tensor(np.random.permutation(self.all_coords[-1].shape[0]))[:num_take] + + self.all_coords[-1] = self.all_coords[-1][perm].view(-1, 6) + self.all_rgb[-1] = self.all_rgb[-1][perm].view(-1, 3) + + # Pose + self.all_st_idx.append((s_idx, t_idx)) + + self.all_coords = torch.cat(self.all_coords, 0) + self.all_rgb = torch.cat(self.all_rgb, 0) + self.all_weights = self.get_weights() + self.all_inputs = torch.cat([self.all_coords, self.all_rgb, self.all_weights], -1) + + def prepare_val_data(self): + self.prepare_test_data() + + def prepare_test_data(self): + self.all_st_idx = [] + + for t_idx in range(self.start_row, self.end_row, 1): + for s_idx in range(self.start_col, self.end_col, 1): + if len(self.val_pairs) == 0: + if (t_idx % self.step) == 0 and (s_idx % self.step) == 0 and not self.val_all: + continue + elif (s_idx, t_idx) not in self.val_pairs: + continue + + self.all_st_idx.append((s_idx, t_idx)) + + def prepare_render_data(self): + if not self.render_spiral: + self.all_st_idx = [] + t_idx = self.disp_row + + for s_idx in range(self.cols * self.supersample): + self.all_st_idx.append((s_idx / self.supersample, t_idx)) + else: + N = 120 + rots = 2 + scale = self.spiral_rad + + self.all_st_idx = [] + + for theta in np.linspace(0.0, 2.0 * np.pi * rots, N + 1)[:-1]: + s = (np.cos(theta) * scale + 1) / 2.0 * (self.cols - 1) + t = -np.sin(theta) * scale / 2.0 * (self.rows - 1) + ((self.rows - 1) - self.disp_row) + + self.all_st_idx.append((s, t)) + + def get_coord(self, st_idx): + s = (st_idx[0] / (self.cols - 1)) * 2 - 1 if self.cols > 1 else 0 + t = -(((st_idx[1] / (self.rows - 1)) * 2 - 1) if self.rows > 1 else 0) + + return (s, t) + + def get_coords(self, s_idx, t_idx): + if self.split == "render": + st_scale = self.vis_st_scale + uv_scale = self.vis_uv_scale + else: + st_scale = self.st_scale + uv_scale = self.uv_scale + + s, t = self.get_coord((s_idx, t_idx)) + + if self.render_spiral or self.render_far: + return get_lightfield_rays( + self.img_wh[0], + self.img_wh[1], + s, + t, + self.aspect, + st_scale=st_scale, + uv_scale=uv_scale, + near=self.near_plane, + far=self.far_plane, + use_inf=True, + center_u=-s * self.uv_downscale, + center_v=-t * self.uv_downscale, + ) + else: + return get_lightfield_rays( + self.img_wh[0], + self.img_wh[1], + s, + t, + self.aspect, + near=self.near_plane, + far=self.far_plane, + st_scale=st_scale, + uv_scale=uv_scale, + ) + + def get_rgb(self, s_idx, t_idx): + pass + + def get_closest_rgb(self, query_st): + W = self.img_wh[0] + H = self.img_wh[1] + + images = self.all_rgb.view(self.num_images, H, W, -1) + dists = np.linalg.norm(np.array(self.poses) - np.array(query_st)[None], axis=-1) + return images[list(np.argsort(dists))[0]] + + def __len__(self): + if self.split == "train": + return len(self.all_coords) + elif self.split == "val": + return min(self.val_num, self.num_test_images) + elif self.split == "render": + if not self.render_spiral: + return self.supersample * self.cols + else: + return 120 + else: + return self.num_test_images + + def __getitem__(self, idx): + if self.split == "render": + s_idx, t_idx = self.all_st_idx[idx] + + batch = { + "coords": LightfieldDataset.get_coords(self, s_idx, t_idx), + "pose": self.poses[idx], + "idx": idx, + "s_idx": s_idx, + "t_idx": t_idx, + } + + elif self.split == "val" or self.split == "test": + s_idx, t_idx = self.all_st_idx[idx] + + batch = { + "coords": self.get_coords(s_idx, t_idx), + "rgb": self.get_rgb(s_idx, t_idx), + "idx": idx, + "s_idx": s_idx, + "t_idx": t_idx, + } + else: + batch = { + "inputs": self.all_inputs[idx], + } + + W, H, batch = self.crop_batch(batch) + batch["W"] = W + batch["H"] = H + + return batch + + +class EPIDataset(BaseDataset): + def __init__(self, cfg, split="train", **kwargs): + + ## Dataset cfg + self.cfg = cfg + self.split = getattr(cfg.dataset, "split", split) + self.dataset_cfg = getattr(cfg.dataset, self.split, cfg.dataset) + + # Lightfield params + self.st_scale = self.dataset_cfg.lightfield.st_scale if "st_scale" in self.dataset_cfg.lightfield else 1.0 + self.supersample = self.dataset_cfg.lightfield.supersample if self.split == "render" else 1 + + super().__init__(cfg, split, **kwargs) + + def read_meta(self): + pass + + def prepare_train_data(self): + self.all_coords = [] + self.all_rgb = [] + + # Rays + self.all_coords += [self.get_coords()] + + # Color + self.all_rgb += [self.get_rgb()] + + # Stack + self.all_coords = torch.cat(self.all_coords, 0) + self.all_rgb = torch.cat(self.all_rgb, 0) + self.all_weights = self.get_weights() + + self.all_inputs = torch.cat([self.all_coords, self.all_rgb, self.all_weights], -1) + + def prepare_val_data(self): + self.prepare_test_data() + + def prepare_test_data(self): + self.prepare_train_data() + + def prepare_render_data(self): + self.all_coords = [] + self.all_rgb = [] + + # Rays + self.all_coords += [self.get_coords()] + + # Color + self.all_rgb += [self.get_rgb()] + + # Stack + self.all_coords = torch.cat(self.all_coords, 0) + self.all_rgb = torch.cat(self.all_rgb, 0) + self.all_weights = self.get_weights() + + def get_coords(self): + u = torch.linspace(-1, 1, self.img_wh[0], dtype=torch.float32) + s = torch.linspace(-1, 1, self.img_wh[1] * self.supersample, dtype=torch.float32) * self.st_scale + su = list(torch.meshgrid([s, u])) + return torch.stack(su, -1).view(-1, 2) + + def get_rgb(self): + # TODO: return single image + pass + + def get_closest_rgb(self, query_st): + pass + + def __len__(self): + if self.split == "train": + return len(self.all_coords) + elif self.split == "val": + return 1 + elif self.split == "render": + return 1 + else: + return 1 + + def __getitem__(self, idx): + if self.split == "render": + batch = { + "coords": self.get_coords(), + } + + elif self.split == "val" or self.split == "test": + batch = { + "coords": self.get_coords(), + "rgb": self.get_rgb(), + } + else: + batch = { + "inputs": self.all_inputs[idx], + } + + W, H, batch = self.crop_batch(batch) + batch["W"] = W + batch["H"] = H + + if self.split == "render": + batch["H"] *= self.supersample + + return batch diff --git a/datasets/llff.py b/datasets/llff.py new file mode 100644 index 0000000..c6992e1 --- /dev/null +++ b/datasets/llff.py @@ -0,0 +1,212 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +import os + +import numpy as np +import torch +from PIL import Image + +from utils.pose_utils import correct_poses_bounds +from utils.ray_utils import get_ndc_rays_fx_fy, get_ray_directions_K, get_rays + +from .base import Base5DDataset + + +class LLFFDataset(Base5DDataset): + def __init__(self, cfg, split="train", **kwargs): + super().__init__(cfg, split, **kwargs) + + def read_meta(self): + with self.pmgr.open(os.path.join(self.root_dir, "poses_bounds.npy"), "rb") as f: + poses_bounds = np.load(f) + + self.image_paths = sorted(self.pmgr.ls(os.path.join(self.root_dir, "images/"))) + self.camera_ids = np.linspace(0, len(self.image_paths) - 1, len(self.image_paths)) + self.total_num_views = len(self.image_paths) + + if self.img_wh is None: + image_path = self.image_paths[0] + + with self.pmgr.open(os.path.join(self.root_dir, "images", image_path), "rb") as im_file: + img = np.array(Image.open(im_file).convert("RGB")) + + self._img_wh = (img.shape[1] // self.downsample, img.shape[0] // self.downsample) + self.img_wh = (img.shape[1] // self.downsample, img.shape[0] // self.downsample) + self.aspect = float(self.img_wh[0]) / self.img_wh[1] + + if self.split in ["train", "val"]: + assert len(poses_bounds) == len( + self.image_paths + ), "Mismatch between number of images and number of poses! Please rerun COLMAP!" + + poses = poses_bounds[:, :15].reshape(-1, 3, 5) + self.bounds = poses_bounds[:, -2:] + + # Step 1: rescale focal length according to training resolution + H, W, self.focal = poses[0, :, -1] + self.cx, self.cy = W / 2.0, H / 2.0 + + self.K = np.eye(3) + self.K[0, 0] = self.focal * self.img_wh[0] / W + self.K[0, 2] = self.cx * self.img_wh[0] / W + self.K[1, 1] = self.focal * self.img_wh[1] / H + self.K[1, 2] = self.cy * self.img_wh[1] / H + + # Step 2: correct poses, bounds + self.poses, self.poses_avg, self.bounds = correct_poses_bounds(poses, self.bounds) + + if not self.use_ndc: + self.bounds = self.bounds / np.max(np.abs(poses[..., :3, 3])) + + self.near = self.bounds.min() * 0.95 + self.far = self.bounds.max() * 1.05 + self.depth_range = np.array([self.near * 2.0, self.far]) + + # Step 3: Ray directions for all pixels + self.directions = get_ray_directions_K(self.img_wh[1], self.img_wh[0], self.K, centered_pixels=True) + + # Step 4: Holdout validation images + if len(self.val_set) > 0: + val_indices = self.val_set + elif self.val_skip != "inf": + self.val_skip = min(len(self.image_paths), self.val_skip) + val_indices = list(range(0, len(self.image_paths), self.val_skip)) + else: + val_indices = [] + + train_indices = [i for i in range(len(self.image_paths)) if i not in val_indices] + + if self.val_all: + val_indices = [i for i in train_indices] # noqa + + if self.split == "val" or self.split == "test": + self.image_paths = [self.image_paths[i] for i in val_indices] + self.camera_ids = self.camera_ids[val_indices] + self.poses = self.poses[val_indices] + elif self.split == "train": + self.image_paths = [self.image_paths[i] for i in train_indices] + self.camera_ids = self.camera_ids[train_indices] + self.poses = self.poses[train_indices] + + def get_intrinsics(self): + return self.K + + def to_ndc(self, rays): + return get_ndc_rays_fx_fy(self.img_wh[1], self.img_wh[0], self.K[0, 0], self.K[1, 1], self.near, rays) + + def get_coords(self, idx): + if self.split != "train" or self.split == "render": + camera_id = 1 + else: + camera_id = self.camera_ids[idx] + + c2w = torch.FloatTensor(self.poses[idx]) + rays_o, rays_d = get_rays(self.directions, c2w) + + print(f"Loading image {idx}") + + rays = torch.cat([rays_o, rays_d], dim=-1) + + if self.use_ndc: + rays = self.to_ndc(rays) + + # Add camera idx + rays = torch.cat([rays, torch.ones_like(rays[..., :1]) * camera_id], dim=-1) + return rays + + def get_rgb(self, idx): + # Colors + image_path = self.image_paths[idx] + + with self.pmgr.open(os.path.join(self.root_dir, "images", image_path), "rb") as im_file: + img = Image.open(im_file).convert("RGB") + + img = img.resize(self._img_wh, Image.LANCZOS) + + if self.img_wh[0] != self._img_wh[0] or self.img_wh[1] != self._img_wh[1]: + img = img.resize(self.img_wh, Image.BOX) + + img = self.transform(img) + img = img.view(3, -1).permute(1, 0) + + return img + + +class DenseLLFFDataset(LLFFDataset): + def __init__(self, cfg, split="train", **kwargs): + super().__init__(cfg, split, **kwargs) + + def read_meta(self): + ## Bounds + with self.pmgr.open(os.path.join(self.root_dir, "bounds.npy"), "rb") as f: + bounds = np.load(f) + + self.bounds = bounds[:, -2:] + + ## Poses + with self.pmgr.open(os.path.join(self.root_dir, "poses.npy"), "rb") as f: + poses = np.load(f) + + ## Image paths + self.image_paths = sorted(self.pmgr.ls(os.path.join(self.root_dir, "images/"))) + + if self.img_wh is None: + image_path = self.image_paths[0] + + with self.pmgr.open(os.path.join(self.root_dir, "images", image_path), "rb") as im_file: + img = np.array(Image.open(im_file).convert("RGB")) + + self._img_wh = (img.shape[1] // self.downsample, img.shape[0] // self.downsample) + self.img_wh = (img.shape[1] // self.downsample, img.shape[0] // self.downsample) + self.aspect = float(self.img_wh[0]) / self.img_wh[1] + + ## Skip + row_skip = self.dataset_cfg.train_row_skip + col_skip = self.dataset_cfg.train_col_skip + + poses_skipped = [] + image_paths_skipped = [] + + for row in range(self.dataset_cfg.num_rows): + for col in range(self.dataset_cfg.num_cols): + idx = row * self.dataset_cfg.num_cols + col + + if self.split == "train" and ( + (row % row_skip) != 0 or (col % col_skip) != 0 or (idx % self.val_skip) == 0 + ): + continue + + if (self.split == "val" or self.split == "test") and ( + ((row % row_skip) == 0 and (col % col_skip) == 0) and (idx % self.val_skip) != 0 + ): + continue + + poses_skipped.append(poses[idx]) + image_paths_skipped.append(self.image_paths[idx]) + + poses = np.stack(poses_skipped, axis=0) + self.poses = poses.reshape(-1, 3, 5) + self.image_paths = image_paths_skipped + + # Step 1: rescale focal length according to training resolution + H, W, self.focal = poses[0, :, -1] + self.cx, self.cy = W / 2.0, H / 2.0 + + self.K = np.eye(3) + self.K[0, 0] = self.focal * self.img_wh[0] / W + self.K[0, 2] = self.cx * self.img_wh[0] / W + self.K[1, 1] = self.focal * self.img_wh[1] / H + self.K[1, 2] = self.cy * self.img_wh[1] / H + + # Step 2: correct poses, bounds + self.near = self.bounds.min() + self.far = self.bounds.max() + + # Step 3: Ray directions for all pixels + self.directions = get_ray_directions_K( + self.img_wh[1], self.img_wh[0], self.K, centered_pixels=self.centered_pixels + ) diff --git a/datasets/neural_3d.py b/datasets/neural_3d.py new file mode 100644 index 0000000..4f6b1ca --- /dev/null +++ b/datasets/neural_3d.py @@ -0,0 +1,470 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT + +import csv +import json +import os + +os.environ["OPENCV_IO_ENABLE_OPENEXR"] = "1" +import glob + +import cv2 +import numpy as np +import torch +from PIL import Image +from scipy.spatial.transform import Rotation + +from utils.pose_utils import ( + average_poses, + correct_poses_bounds, + create_rotating_spiral_poses, + create_spiral_poses, + interpolate_poses, +) +from utils.ray_utils import ( + get_ndc_rays_fx_fy, + get_pixels_for_image, + get_ray_directions_K, + get_rays, + sample_images_at_xy, +) + +from .base import Base5DDataset, Base6DDataset + + +class Neural3DVideoDataset(Base6DDataset): + def __init__(self, cfg, split="train", **kwargs): + self.use_reference = cfg.dataset.use_reference if "use_reference" in cfg.dataset else False + self.correct_poses = cfg.dataset.correct_poses if "correct_poses" in cfg.dataset else False + self.use_ndc = cfg.dataset.use_ndc if "use_ndc" in cfg.dataset else False + + self.num_frames = cfg.dataset.num_frames if "num_frames" in cfg.dataset else 1 + self.start_frame = cfg.dataset.start_frame if "start_frame" in cfg.dataset else 1 + self.keyframe_step = cfg.dataset.keyframe_step if "keyframe_step" in cfg.dataset else 1 + self.num_keyframes = ( + cfg.dataset.num_keyframes if "num_keyframes" in cfg.dataset else self.num_frames // self.keyframe_step + ) + + self.load_full_step = cfg.dataset.load_full_step if "load_full_step" in cfg.dataset else 1 + self.subsample_keyframe_step = ( + cfg.dataset.subsample_keyframe_step if "subsample_keyframe_step" in cfg.dataset else 1 + ) + self.subsample_keyframe_frac = ( + cfg.dataset.subsample_keyframe_frac if "subsample_keyframe_frac" in cfg.dataset else 1.0 + ) + self.subsample_frac = cfg.dataset.subsample_frac if "subsample_frac" in cfg.dataset else 1.0 + + self.keyframe_offset = 0 + self.frame_offset = 0 + + super().__init__(cfg, split, **kwargs) + + def read_meta(self): + W, H = self.img_wh + + # Poses, bounds + with self.pmgr.open(os.path.join(self.root_dir, "poses_bounds.npy"), "rb") as f: + poses_bounds = np.load(f) + + # Video paths + self.video_paths = sorted(glob.glob(os.path.join(self.root_dir, "*.mp4"))) + self.images_per_frame = len(self.video_paths) + self.total_images_per_frame = len(self.video_paths) + + # if self.dataset_cfg.collection in ['coffee_martini']: + # self.video_paths = [path for path in self.video_paths if 'cam13' not in path] + + # Get intrinsics & extrinsics + poses = poses_bounds[:, :15].reshape(-1, 3, 5) + self.bounds = poses_bounds[:, -2:] + + # if self.dataset_cfg.collection in ['coffee_martini']: + # poses = np.delete(poses, (12), axis=0) + + H, W, self.focal = poses[0, :, -1] + self.cx, self.cy = W / 2.0, H / 2.0 + + self.K = np.eye(3) + self.K[0, 0] = self.focal * self.img_wh[0] / W + self.K[0, 2] = self.cx * self.img_wh[0] / W + self.K[1, 1] = self.focal * self.img_wh[1] / H + self.K[1, 2] = self.cy * self.img_wh[1] / H + + # Correct poses, bounds + self.poses, self.poses_avg, self.bounds = correct_poses_bounds(poses, self.bounds) + + self.near = self.bounds.min() * 0.95 + self.far = self.bounds.max() * 1.05 + self.depth_range = np.array([self.near * 2.0, self.far]) + + # Ray directions for all pixels + self.directions = get_ray_directions_K(self.img_wh[1], self.img_wh[0], self.K, centered_pixels=True) + + # Repeat poses, times + self.poses = np.stack([self.poses for i in range(self.num_frames)]).reshape(-1, 3, 4) + self.times = np.tile(np.linspace(0, 1, self.num_frames)[..., None], (1, self.images_per_frame)) + self.times = self.times.reshape(-1) + self.camera_ids = np.tile( + np.linspace(0, self.images_per_frame - 1, self.images_per_frame)[None, :], (self.num_frames, 1) + ) + self.camera_ids = self.camera_ids.reshape(-1) + + # Holdout validation images + val_indices = [] + + for idx in self.val_set: + val_indices += [frame * self.images_per_frame + idx for frame in range(self.num_frames)] + + train_indices = [i for i in range(len(self.poses)) if i not in val_indices] + + if self.val_all: + val_indices = [i for i in train_indices] # noqa + + if self.split == "val" or self.split == "test": + if not self.val_all: + self.video_paths = [self.video_paths[i] for i in self.val_set] + + self.poses = self.poses[val_indices] + self.times = self.times[val_indices] + self.camera_ids = self.camera_ids[val_indices] + elif self.split == "train": + if not self.val_all: + self.video_paths = [self.video_paths[i] for i in range(len(self.video_paths)) if i not in self.val_set] + + self.poses = self.poses[train_indices] + self.times = self.times[train_indices] + self.camera_ids = self.camera_ids[train_indices] + + self.num_images = len(self.poses) + self.images_per_frame = len(self.video_paths) + + def random_subsample(self, coords, rgb, last_rgb, frame, fac=1.0): + if (frame % self.load_full_step) == 0: + return coords, rgb + elif (frame % self.subsample_keyframe_step) == 0: + num_take = int(np.round(coords.shape[0] * self.subsample_keyframe_frac * fac)) + perm = torch.tensor(np.random.permutation(coords.shape[0]))[:num_take] + else: + num_take = int(np.round(coords.shape[0] * self.subsample_frac * fac)) + perm = torch.tensor(np.random.permutation(coords.shape[0]))[:num_take] + + return coords[perm].view(-1, coords.shape[-1]), rgb[perm].view(-1, rgb.shape[-1]) + + def regular_subsample(self, coords, rgb, last_rgb, frame, fac=1.0): + if (frame % self.load_full_step) == 0: + return coords, rgb + elif (frame % self.subsample_keyframe_step) == 0: + subsample_every = int(np.round(1.0 / (self.subsample_keyframe_frac * fac))) + offset = self.keyframe_offset + self.keyframe_offset += 1 + else: + subsample_every = int(np.round(1.0 / (self.subsample_frac * fac))) + offset = self.frame_offset + self.frame_offset += 1 + + pixels = get_pixels_for_image(self.img_wh[1], self.img_wh[0]).reshape(-1, 2).long() + mask = ((pixels[..., 0] + pixels[..., 1] + offset) % subsample_every) == 0.0 + + return coords[mask].view(-1, coords.shape[-1]), rgb[mask].view(-1, rgb.shape[-1]) + + def test_subsample(self, coords, rgb, last_rgb, frame): + mask = coords[..., 5] < -0.25 + return coords[mask].view(-1, coords.shape[-1]), rgb[mask].view(-1, rgb.shape[-1]) + + def importance_subsample(self, coords, rgb, last_rgb, frame, fac=1.0): + if (frame % self.load_full_step) == 0: + return coords, rgb + + diff = torch.abs(rgb - last_rgb).mean(-1) + diff_sorted, _ = torch.sort(diff) + + if (frame % self.subsample_keyframe_step) == 0: + num_take = int(np.round(coords.shape[0] * self.subsample_keyframe_frac * fac)) + else: + num_take = int(np.round(coords.shape[0] * self.subsample_frac * fac)) + + mask = diff > diff_sorted[-num_take] + return coords[mask].view(-1, coords.shape[-1]), rgb[mask].view(-1, rgb.shape[-1]) + + def subsample(self, coords, rgb, last_rgb, frame): + coords, rgb = self.regular_subsample(coords, rgb, last_rgb, frame) + return coords, rgb + + # if (frame % self.load_full_step) == 0: + # return coords, rgb + # else: + # coords, rgb = self.importance_subsample(coords, rgb, last_rgb, frame) + + # return coords, rgb + + def prepare_train_data(self): + ## Collect training data + self.all_coords = [] + self.all_rgb = [] + num_pixels = 0 + last_rgb_full = None + + for video_idx in range(len(self.video_paths)): + self.keyframe_offset = video_idx + self.frame_offset = video_idx + + # Open video + cam = cv2.VideoCapture(self.video_paths[video_idx]) + + # Get coords + video_coords = self.get_coords(video_idx) + + ctr = 0 + frame_idx = 0 + + while ctr < self.start_frame + self.num_frames: + _, frame = cam.read() + + if ctr < self.start_frame: + ctr += 1 + continue + else: + ctr += 1 + + cur_time = self.times[frame_idx * self.images_per_frame + video_idx] + cur_frame = int( + np.round(self.times[frame_idx * self.images_per_frame + video_idx] * (self.num_frames - 1)) + ) + + # Coords + cur_coords = torch.cat( + [ + video_coords[..., :-1], + torch.ones_like(video_coords[..., -1:]) * cur_time, + ], + -1, + ) + + # Get RGB + cur_rgb_full = self.get_rgb(frame) + + # Subsample + if frame_idx == 0: + cur_rgb = cur_rgb_full + else: + cur_coords, cur_rgb = self.subsample(cur_coords, cur_rgb_full, last_rgb_full, cur_frame) + + # Save for later + last_rgb_full = cur_rgb_full + + # Coords + self.all_coords += [cur_coords] + + # Color + self.all_rgb += [cur_rgb] + + # Number of pixels + num_pixels += cur_rgb.shape[0] + + print(f"Video {video_idx} frame {frame_idx}") + print("Full res images loaded:", num_pixels / (self.img_wh[0] * self.img_wh[1])) + + # Increment frame idx + frame_idx += 1 + + cam.release() + + # Format / save loaded data + self.all_coords = torch.cat(self.all_coords, 0) + # self.all_coords = self.all_coords.view( + # -1, self.images_per_frame, self.num_frames, self.all_coords.shape[-1] + # ).permute(0, 2, 1, 3).reshape(-1, self.all_coords.shape[-1]) + self.all_rgb = torch.cat(self.all_rgb, 0) + # self.all_rgb = self.all_rgb.view( + # -1, self.images_per_frame, self.num_frames, self.all_rgb.shape[-1] + # ).permute(0, 2, 1, 3).reshape(-1, self.all_rgb.shape[-1]) + self.update_all_data() + + def update_all_data(self): + self.all_weights = self.get_weights() + + ## All inputs + self.all_inputs = torch.cat( + [ + self.all_coords, + self.all_rgb, + self.all_weights, + ], + -1, + ) + + def format_batch(self, batch): + batch["coords"] = batch["inputs"][..., : self.all_coords.shape[-1]] + batch["rgb"] = batch["inputs"][..., self.all_coords.shape[-1] : self.all_coords.shape[-1] + 3] + batch["weight"] = batch["inputs"][..., -1:] + del batch["inputs"] + + return batch + + def prepare_render_data(self): + # Get poses + if not self.render_interpolate: + close_depth, inf_depth = self.bounds.min() * 0.9, self.bounds.max() * 5.0 + + dt = 0.75 + mean_dz = 1.0 / (((1.0 - dt) / close_depth + dt / inf_depth)) + focus_depth = mean_dz + + poses_per_frame = self.poses.shape[0] // self.num_frames + poses_one_frame = self.poses[ + (self.num_frames // 2) * poses_per_frame : (self.num_frames // 2 + 1) * poses_per_frame + ] + poses_each_frame = interpolate_poses(self.poses[::poses_per_frame], self.render_supersample) + radii = np.percentile(np.abs(poses_one_frame[..., 3]), 50, axis=0) + radii[..., :2] *= 0.5 + + if self.num_frames > 1: + poses = create_spiral_poses( + poses_one_frame, + radii, + focus_depth * 2, + N=self.num_frames * self.render_supersample, + ) + + reference_pose = np.eye(4) + reference_pose[:3, :4] = self.poses[(self.num_frames // 2) * poses_per_frame] + reference_pose = np.linalg.inv(reference_pose) + + for pose_idx in range(len(poses)): + cur_pose = np.eye(4) + cur_pose[:3, :4] = poses[pose_idx] + poses[pose_idx] = poses_each_frame[pose_idx] @ (reference_pose @ cur_pose) + else: + poses = create_spiral_poses(poses_one_frame, radii, focus_depth * 100, N=120) + + self.poses = np.stack(poses, axis=0) + else: + self.poses = interpolate_poses(self.poses, self.render_supersample) + + # Get times + if (self.num_frames - 1) > 0: + self.times = np.linspace(0, self.num_frames - 1, len(self.poses)) + + if not self.render_interpolate_time: + self.times = np.round(self.times) + + self.times = self.times / (self.num_frames - 1) + else: + self.times = [0.0 for p in self.poses] + + for i in range(100): + self.poses[i] = self.poses[0] + + def to_ndc(self, rays): + return get_ndc_rays_fx_fy(self.img_wh[1], self.img_wh[0], self.K[0, 0], self.K[1, 1], self.near, rays) + + def get_coords(self, idx): + c2w = torch.FloatTensor(self.poses[idx]) + time = self.times[idx] + + if self.split != "train" or self.split == "render": + camera_id = 1 + else: + camera_id = self.camera_ids[idx] + + rays_o, rays_d = get_rays(self.directions, c2w) + + print("Loading time:", np.round(time * (self.num_frames - 1))) + + if self.use_ndc: + rays = self.to_ndc(torch.cat([rays_o, rays_d], dim=-1)) + else: + rays = torch.cat([rays_o, rays_d], dim=-1) + + # Camera ID + rays = torch.cat([rays, torch.ones_like(rays[..., :1]) * camera_id], dim=-1) + + # Time stamp + rays = torch.cat([rays, torch.ones_like(rays[..., :1]) * time], dim=-1) + return rays + + def get_rgb(self, img): + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + + if img.shape[0] != self._img_wh[0] or img.shape[1] != self._img_wh[1]: + img = cv2.resize(img, self._img_wh, cv2.INTER_LANCZOS4) + + if self.img_wh[0] != self._img_wh[0] or self.img_wh[1] != self._img_wh[1]: + img = cv2.resize(img, self.img_wh, cv2.INTER_AREA) + + img = self.transform(img) + img = img.view(3, -1).permute(1, 0) + + return img + + def get_rgb_one(self, idx): + # Open video + cam = cv2.VideoCapture(self.video_paths[idx % self.images_per_frame]) + + # Get RGB + ctr = 0 + frame_idx = 0 + + while ctr < self.start_frame + self.num_frames: + _, frame = cam.read() + + if ctr < self.start_frame: + ctr += 1 + continue + else: + ctr += 1 + + if frame_idx != (idx // self.images_per_frame): + frame_idx += 1 + continue + else: + rgb = self.get_rgb(frame) + break + + cam.release() + return rgb + + def get_intrinsics(self): + return self.K + + def __getitem__(self, idx): + if self.split == "render": + batch = { + "coords": self.get_coords(idx), + "pose": self.poses[idx], + "time": self.times[idx], + "idx": idx, + } + + batch["weight"] = torch.ones_like(batch["coords"][..., -1:]) + + elif self.split == "test": + batch = { + "coords": self.get_coords(idx), + "rgb": self.get_rgb_one(idx), + "idx": idx, + } + + batch["weight"] = torch.ones_like(batch["coords"][..., -1:]) + elif self.split == "val": + batch = { + "coords": self.get_coords(idx), + "rgb": self.get_rgb_one(idx), + "idx": idx, + } + + batch["weight"] = torch.ones_like(batch["coords"][..., -1:]) + else: + batch = { + "inputs": self.all_inputs[idx], + } + + W, H, batch = self.crop_batch(batch) + batch["W"] = W + batch["H"] = H + + return batch diff --git a/datasets/random.py b/datasets/random.py new file mode 100644 index 0000000..4833c7b --- /dev/null +++ b/datasets/random.py @@ -0,0 +1,460 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +import matplotlib.pyplot as plt +import numpy as np +import torch +from torch.utils.data import Dataset + +from utils.ray_utils import ( + get_pixels_for_image, + get_random_pixels, + get_ray_directions_from_pixels_K, + get_rays, + sample_images_at_xy, +) + + +class RandomRayDataset(Dataset): + def __init__(self, cfg, train_dataset=None, **kwargs): + super().__init__() + + self.cfg = cfg + self.img_wh = train_dataset.img_wh + self.near = train_dataset.near + self.far = train_dataset.far + self.use_ndc = train_dataset.use_ndc + self.num_images = train_dataset.num_images + self.batch_size = cfg.batch_size + + if "save_data" not in kwargs or kwargs["save_data"]: + self.all_rays = torch.clone(train_dataset.all_rays) + self.all_rgb = torch.clone(train_dataset.all_rgb) + + # Current + self.current_rays = self.all_rays + self.current_rgb = self.all_rgb + + # Prepare + self.prepare_data() + + def compute_stats(self): + all_rays = self.all_rays.view(self.num_images, self.img_wh[1] * self.img_wh[0], -1) + + ## Per view statistics + self.all_means = [] + self.all_stds = [] + + for idx in range(self.num_images): + cur_rays = all_rays[idx] + + self.all_means += [cur_rays.mean(0)] + self.all_stds += [cur_rays.std(0)] + + self.all_means = torch.stack(self.all_means, 0) + self.all_stds = torch.stack(self.all_stds, 0) + + ## Full dataset statistics + self.pos_mean = self.all_rays[..., :3].mean(0) + self.pos_std = self.all_rays[..., :3].std(0) + + self.dir_mean = self.all_rays[..., 3:].mean(0) + self.dir_std = self.all_rays[..., 3:].std(0) + + self.rgb_mean = self.all_rgb.mean(0) + self.rgb_std = self.all_rgb.std(0) + + def prepare_data(self): + self.compute_stats() + self.shuffle() + + def shuffle(self): + pass + + def __len__(self): + return len(self.all_rays) + + def jitter(self, rays, jitter=None): + if jitter is not None: + jitter_rays = rays + + if "pos" in jitter: + jitter_rays = self.jitter_ray_origins(jitter_rays, jitter) + + if "dir" in jitter: + jitter_rays = self.jitter_ray_directions(jitter_rays, jitter) + + return jitter_rays + else: + return rays + + def get_batch(self, batch_idx, batch_size, jitter=None): + batch = {} + + ## Get random rays + batch["rays"] = self.get_random_rays(batch_size, self.cfg.range) + + ## Jitter + batch["jitter_rays"] = self.jitter(batch["rays"], jitter) + + return batch + + def get_random_rays(self, num_rays, ray_range): + ray_dim = self.all_rays.shape[-1] // 2 + + pos_rand = torch.randn((num_rays, ray_dim)) * self.pos_std[None] * ray_range.pos + rays_o = self.pos_mean[None] + pos_rand + + dir_rand = torch.randn((num_rays, ray_dim)) * self.dir_std[None] * ray_range.dir + rays_d = self.dir_mean[None] + dir_rand + rays_d = torch.nn.functional.normalize(rays_d, p=2.0, dim=-1) + + return torch.cat([rays_o, rays_d], -1) + + def jitter_ray_origins(self, rays, jitter): + ray_dim = self.all_rays.shape[-1] // 2 + + pos_rand = ( + torch.randn((rays.shape[0], jitter.bundle_size, ray_dim), device=rays.device) + * self.pos_std[None].type_as(rays) + * jitter.pos + ) + + rays = rays.view(rays.shape[0], -1, rays.shape[-1]) + if rays.shape[1] == 1: + rays = rays.repeat(1, jitter.bundle_size, 1) + + rays_o = rays[..., :ray_dim] + pos_rand.type_as(rays) + + return torch.cat([rays_o, rays[..., ray_dim:]], -1) + + def jitter_ray_directions(self, rays, jitter): + ray_dim = self.all_rays.shape[-1] // 2 + + dir_rand = ( + torch.randn((rays.shape[0], jitter.bundle_size, ray_dim), device=rays.device) + * self.dir_std[None].type_as(rays) + * jitter.dir + ) + + rays = rays.view(rays.shape[0], -1, rays.shape[-1]) + if rays.shape[1] == 1: + rays = rays.repeat(1, jitter.bundle_size, 1) + + rays_d = rays[..., ray_dim:] + dir_rand.type_as(rays) + rays_d = torch.nn.functional.normalize(rays_d, p=2.0, dim=-1) + + return torch.cat([rays[..., :ray_dim], rays_d], -1) + + +class RandomPixelDataset(Dataset): + def __init__(self, cfg, train_dataset=None, **kwargs): + super().__init__() + + self.cfg = cfg + self.pixels_per_image = cfg.batch_size if "pixels_per_image" in cfg else None + self.use_ndc = train_dataset.use_ndc + self.prepare_data(train_dataset) + + def prepare_data(self, train_dataset): + # Create tensors + self.all_rays = [] + self.all_rgb = [] + + if self.use_ndc: + self.all_ndc_rays = [] + + # Random rays for each training image + if self.pixels_per_image is None: + self.pixels_per_image = train_dataset.img_wh[1] * train_dataset.img_wh[0] + + H, W = train_dataset.img_wh[1], train_dataset.img_wh[0] + + for i in range(train_dataset.num_images): + # Get random directions + cur_pixels = get_random_pixels( + self.pixels_per_image, + H, + W, + ) + cur_directions = get_ray_directions_from_pixels_K( + cur_pixels, train_dataset.K, centered_pixels=train_dataset.centered_pixels + ) + + # Sample rays + c2w = torch.FloatTensor(train_dataset.poses[i]) + cur_rays = torch.cat(list(get_rays(cur_directions, c2w)), -1) + + # Sample pixel colors + cur_rgb = train_dataset.all_rgb.view(train_dataset.num_images, H, W, -1)[i].unsqueeze(0) + cur_rgb = sample_images_at_xy(cur_rgb, cur_pixels, H, W) + + # Append + self.all_rays.append(cur_rays.reshape(-1, 6)) + self.all_rgb.append(cur_rgb.reshape(-1, 3)) + + if self.use_ndc: + self.all_ndc_rays.append(train_dataset.to_ndc(self.all_rays[-1])) + + # Concat tensors + self.all_rays = torch.cat(self.all_rays, 0) + self.all_rgb = torch.cat(self.all_rgb, 0) + + if self.use_ndc: + self.all_ndc_rays = torch.cat(self.all_ndc_rays, 0) + + def shuffle(self): + perm = torch.tensor(np.random.permutation(len(self))) + self.all_rays = self.all_rays[perm] + self.all_rgb = self.all_rgb[perm] + + if self.use_ndc: + self.all_ndc_rays = self.all_ndc_rays[perm] + + def __len__(self): + return len(self.all_rays) + + def jitter(self, rays, jitter=None): + return rays + + def get_batch(self, batch_idx, batch_size, jitter=None): + batch = {} + batch_start = batch_size * batch_idx + + if self.use_ndc: + batch["rays"] = self.all_ndc_rays[batch_start : batch_start + batch_size] + else: + batch["rays"] = self.all_rays[batch_start : batch_start + batch_size] + + batch["rgb"] = self.all_rgb[batch_start : batch_start + batch_size] + + return batch + + +class RandomViewSubsetDataset(RandomRayDataset): + def __init__(self, cfg, train_dataset=None, **kwargs): + self.num_images = len(train_dataset.image_paths) + self.num_views = train_dataset.num_images if cfg.dataset.num_views == "all" else cfg.dataset.num_views + + self.poses = np.tile(np.eye(4)[None], (self.num_images, 1, 1)) + self.poses[..., :3, :4] = train_dataset.poses[..., :3, :4] + self.poses_inv = np.linalg.inv(self.poses) + self.intrinsics = train_dataset.get_intrinsics_screen_space() + self.current_poses_inv = self.poses_inv + + super().__init__(cfg, train_dataset=train_dataset, **kwargs) + + def shuffle(self): + ## Get random view subset + self.current_views = self.get_random_views(self.num_views) + + self.current_rays = self.all_rays.view(self.num_images, self.img_wh[1] * self.img_wh[0], -1)[self.current_views] + self.current_rgb = self.all_rgb.view(self.num_images, self.img_wh[1] * self.img_wh[0], -1)[self.current_views] + self.current_poses = self.poses[self.current_views] + self.current_poses_inv = np.linalg.inv(self.current_poses) + + self.current_means = self.all_means[self.current_views] + self.current_stds = self.all_stds[self.current_views] + + print(self.current_views) + + def __len__(self): + return len(self.all_rays) + + def __getitem__(self, idx): + return { + "rays": self.random_rays[idx], + "jitter_rays": self.jitter_rays[idx], + } + + def get_random_views(self, n_views): + if self.num_views == self.num_images: + return list(range(self.num_images)) + else: + return list(np.random.choice(np.arange(0, self.num_images), size=n_views, replace=False)) + + def get_random_rays_convex_hull(self, num_rays, ray_range): + rays = self.current_rays + rays = rays[:, torch.randperm(rays.shape[1])] + rays = rays[:, :num_rays] + + weights = torch.rand(num_rays, self.num_views).type_as(rays) + weights = weights / (weights.sum(-1).unsqueeze(-1) + 1e-8) + weights = weights.permute(1, 0) + + rays = rays * weights.unsqueeze(-1) + rays = rays.sum(0) + + rays_o = rays[..., 0:3] + rays_d = rays[..., 3:6] + rays_d = torch.nn.functional.normalize(rays_d, p=2.0, dim=-1) + + return torch.cat([rays_o, rays_d], -1) + + def project_points(self, P, points): + points = torch.cat([points, torch.ones_like(points[..., -1:])], dim=-1) + points = points.unsqueeze(0) + points = (P @ points.permute(0, 2, 1)).permute(0, 2, 1) + pixels = points[..., :2] / (-points[..., -1:]) + + return pixels + + def lookup_points(self, points): + # Projection matrix + poses_inv = torch.Tensor(self.current_poses_inv).type_as(points)[..., :3, :4] + K = torch.Tensor(self.intrinsics).type_as(points).unsqueeze(0) + P = K @ poses_inv + + # Project points + pixels = self.project_points(P, points) + + # Valid mask + valid_mask = (pixels[..., 0] > -1) & (pixels[..., 0] < 1) & (pixels[..., 1] > -1) & (pixels[..., 1] < 1) + valid_mask = valid_mask.type_as(points).detach()[..., None] + + # Weights + camera_centers = ( + torch.Tensor(self.current_poses).type_as(points)[..., None, :3, -1].repeat(1, points.shape[0], 1) + ) + camera_dirs = torch.nn.functional.normalize(points.unsqueeze(0) - camera_centers, p=2.0, dim=-1) + camera_rays = torch.cat([camera_centers, camera_dirs], dim=-1) + + # Lookup + pixels = pixels.view(self.num_views, -1, 1, 2) + rgb = self.current_rgb.permute(0, 2, 1).view(self.num_views, 3, self.img_wh[1], self.img_wh[0]).type_as(points) + values = torch.nn.functional.grid_sample(rgb, pixels) + values = values.permute(0, 2, 3, 1).reshape(self.num_views, -1, 3) + + return values, camera_rays, valid_mask + + def project_points_single(self, P, points): + points = torch.cat([points, torch.ones_like(points[..., -1:])], dim=-1) + points = (P @ points.permute(0, 2, 1)).permute(0, 2, 1) + pixels = points[..., :2] / (-points[..., -1:]) + + return pixels + + def lookup_points_single(self, points, weights=None): + # Projection matrix + poses_inv = torch.Tensor(self.current_poses_inv).type_as(points)[..., :3, :4] + K = torch.Tensor(self.intrinsics).type_as(points).unsqueeze(0) + P = K @ poses_inv + + # Project points + pixels = self.project_points_single(P, points) + + # Valid mask + valid_mask = (pixels[..., 0] > -1) & (pixels[..., 0] < 1) & (pixels[..., 1] > -1) & (pixels[..., 1] < 1) + valid_mask = valid_mask.type_as(points).detach()[..., None] + + # Weights + camera_centers = ( + torch.Tensor(self.current_poses).type_as(points)[..., None, :3, -1].repeat(1, points.shape[1], 1) + ) + camera_dirs = torch.nn.functional.normalize(points - camera_centers, p=2.0, dim=-1) + camera_rays = torch.cat([camera_centers, camera_dirs], dim=-1) + + # Lookup + pixels = pixels.view(self.num_views, -1, 1, 2) + rgb = self.current_rgb.permute(0, 2, 1).view(self.num_views, 3, self.img_wh[1], self.img_wh[0]).type_as(points) + values = torch.nn.functional.grid_sample(rgb, pixels) + values = values.permute(0, 2, 3, 1).reshape(self.num_views, -1, 3) + + return values, camera_rays, valid_mask + + +class RandomRayLightfieldDataset(RandomRayDataset): + def __init__(self, cfg, train_dataset=None): + self.num_images = len(train_dataset.image_paths) + self.size = len(train_dataset) + + self.uv_plane = cfg.dataset.uv_plane + self.st_plane = cfg.dataset.st_plane + + if "st_scale" in cfg.dataset and cfg.dataset.st_scale is not None: + self.st_scale = cfg.dataset.st_scale + elif train_dataset is not None and "lightfield" in train_dataset.dataset_cfg: + self.st_scale = train_dataset.st_scale + else: + self.st_scale = 1.0 + + super().__init__(cfg, train_dataset, save_data=False) + + def get_random_rays(self, num_rays, ray_range): + st = (torch.rand((num_rays, 2)) * 2 - 1) * ray_range.pos + + s = st[..., 0] * self.st_scale + t = st[..., 1] * self.st_scale + + uv = (torch.rand((num_rays, 2)) * 2 - 1) * ray_range.dir + + u = uv[..., 0] + v = uv[..., 1] + + rays = torch.stack( + [ + s, + t, + self.st_plane * torch.ones_like(s), + u - s, + v - t, + (self.uv_plane - self.st_plane) * torch.ones_like(s), + ], + -1, + ) + + rays = torch.cat([rays[..., 0:3], torch.nn.functional.normalize(rays[..., 3:6], p=2.0, dim=-1)], -1) + + return rays + + def jitter_ray_directions(self, rays, jitter): + dir_rand = torch.randn((rays.shape[0], jitter.bundle_size, 2), device=rays.device) * jitter.dir + + rays = rays.view(rays.shape[0], -1, rays.shape[-1]) + if rays.shape[1] == 1: + rays = rays.repeat(1, jitter.bundle_size, 1) + + rays_d = torch.cat([rays[..., 3:5] + dir_rand.type_as(rays), rays[..., 5:]], -1) + + rays_d = torch.nn.functional.normalize(rays_d, p=2.0, dim=-1) + + rays = torch.cat( + [ + rays[..., :3], + rays_d, + ], + -1, + ) + + return rays + + def jitter_ray_origins(self, rays, jitter): + pos_rand = torch.randn((rays.shape[0], jitter.bundle_size, 2), device=rays.device) * jitter.pos * self.st_scale + + rays = rays.view(rays.shape[0], -1, rays.shape[-1]) + if rays.shape[1] == 1: + rays = rays.repeat(1, jitter.bundle_size, 1) + + rays_o = rays[..., :2] + pos_rand.type_as(rays) + + rays = torch.cat( + [ + rays_o, + rays[..., 2:], + ], + -1, + ) + + return rays + + def __len__(self): + return len(self.random_rays) + + def __getitem__(self, idx): + return { + "rays": self.random_rays[idx], + "jitter_rays": self.jitter_rays[idx], + } diff --git a/datasets/shiny.py b/datasets/shiny.py new file mode 100644 index 0000000..71a9776 --- /dev/null +++ b/datasets/shiny.py @@ -0,0 +1,206 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +import os + +import numpy as np +import torch +from PIL import Image + +from utils.intersect_utils import intersect_axis_plane +from utils.pose_utils import correct_poses_bounds, create_spiral_poses, interpolate_poses +from utils.ray_utils import get_ndc_rays_fx_fy, get_ray_directions_K + +from .llff import LLFFDataset + + +class ShinyDataset(LLFFDataset): + def __init__(self, cfg, split="train", **kwargs): + self.dense = cfg.dataset.collection == "cd" or cfg.dataset.collection == "lab" + + super().__init__(cfg, split, **kwargs) + + def read_meta(self): + with self.pmgr.open(os.path.join(self.root_dir, "poses_bounds.npy"), "rb") as f: + poses_bounds = np.load(f) + + with self.pmgr.open(os.path.join(self.root_dir, "hwf_cxcy.npy"), "rb") as f: + hwfc = np.load(f) + + self.image_paths = sorted(self.pmgr.ls(os.path.join(self.root_dir, "images/"))) + self.camera_ids = np.linspace(0, len(self.image_paths) - 1, len(self.image_paths)) + self.total_num_views = len(self.image_paths) + + if self.img_wh is None: + image_path = self.image_paths[0] + + with self.pmgr.open(os.path.join(self.root_dir, "images", image_path), "rb") as im_file: + img = np.array(Image.open(im_file).convert("RGB")) + + self._img_wh = (img.shape[1] // self.downsample, img.shape[0] // self.downsample) + self.img_wh = (img.shape[1] // self.downsample, img.shape[0] // self.downsample) + self.aspect = float(self.img_wh[0]) / self.img_wh[1] + + if self.split in ["train", "val"]: + assert len(poses_bounds) == len( + self.image_paths + ), "Mismatch between number of images and number of poses! Please rerun COLMAP!" + + poses = poses_bounds[:, :12].reshape(-1, 3, 4) + self.bounds = poses_bounds[:, -2:] + + # Step 1: rescale focal length according to training resolution + H, W, self.focal = hwfc[:3, 0] + self.cx, self.cy = hwfc[-2:, 0] + + self.K = np.eye(3) + self.K[0, 0] = self.focal * self.img_wh[0] / W + self.K[0, 2] = self.cx * self.img_wh[0] / W + self.K[1, 1] = self.focal * self.img_wh[1] / H + self.K[1, 2] = self.cy * self.img_wh[1] / H + + # Step 2: correct poses, bounds + self.poses, self.poses_avg, self.bounds = correct_poses_bounds(poses, self.bounds, use_train_pose=True) + + with self.pmgr.open(os.path.join(self.root_dir, "planes.txt"), "r") as f: + planes = [float(i) for i in f.read().strip().split(" ")] + + self.near = planes[0] * 0.95 + self.far = planes[1] * 1.05 + self.depth_range = np.array([self.near * 2.0, self.far]) + + # Step 3: Ray directions for all pixels + self.centered_pixels = True + self.directions = get_ray_directions_K( + self.img_wh[1], self.img_wh[0], self.K, centered_pixels=self.centered_pixels + ) + + # Step 4: Holdout validation images + if len(self.val_set) > 0: + val_indices = self.val_set + elif self.val_skip != "inf": + self.val_skip = min(len(self.image_paths), self.val_skip) + val_indices = list(range(0, len(self.image_paths), self.val_skip)) + else: + val_indices = [] + + train_indices = [i for i in range(len(self.image_paths)) if i not in val_indices] + + if self.val_all: + val_indices = [i for i in train_indices] # noqa + + if self.split == "val" or self.split == "test": + self.image_paths = [self.image_paths[i] for i in val_indices] + self.camera_ids = self.camera_ids[val_indices] + self.poses = self.poses[val_indices] + elif self.split == "train": + self.image_paths = [self.image_paths[i] for i in train_indices] + self.camera_ids = self.camera_ids[train_indices] + self.poses = self.poses[train_indices] + + def to_ndc(self, rays): + return get_ndc_rays_fx_fy(self.img_wh[1], self.img_wh[0], self.K[0, 0], self.K[1, 1], self.near, rays) + + def prepare_render_data(self): + if not self.render_interpolate: + close_depth, inf_depth = self.bounds.min() * 0.9, self.bounds.max() * 5.0 + + dt = 0.75 + mean_dz = 1.0 / (((1.0 - dt) / close_depth + dt / inf_depth)) + focus_depth = mean_dz + + if self.dense: + radii = np.percentile(np.abs(self.poses[..., 3]), 50, axis=0) + self.poses = create_spiral_poses(self.poses, radii, focus_depth * 100) + else: + radii = np.percentile(np.abs(self.poses[..., 3]), 85, axis=0) + self.poses = create_spiral_poses(self.poses, radii, focus_depth * 2) + + self.poses = np.stack(self.poses, axis=0) + self.poses[..., :3, 3] = self.poses[..., :3, 3] - 0.1 * close_depth * self.poses[..., :3, 2] + else: + self.poses = interpolate_poses(self.poses, self.render_supersample) + + +class DenseShinyDataset(ShinyDataset): + def __init__(self, cfg, split="train", **kwargs): + super().__init__(cfg, split, **kwargs) + + def read_meta(self): + ## Bounds + with self.pmgr.open(os.path.join(self.root_dir, "bounds.npy"), "rb") as f: + bounds = np.load(f) + + self.bounds = bounds[:, -2:] + + ## Intrinsics + with self.pmgr.open(os.path.join(self.root_dir, "hwf_cxcy.npy"), "rb") as f: + hwfc = np.load(f) + + ## Poses + with self.pmgr.open(os.path.join(self.root_dir, "poses.npy"), "rb") as f: + poses = np.load(f) + + ## Image paths + self.image_paths = sorted(self.pmgr.ls(os.path.join(self.root_dir, "images/"))) + + if self.img_wh is None: + image_path = self.image_paths[0] + + with self.pmgr.open(os.path.join(self.root_dir, "images", image_path), "rb") as im_file: + img = np.array(Image.open(im_file).convert("RGB")) + + self._img_wh = (img.shape[1] // self.downsample, img.shape[0] // self.downsample) + self.img_wh = (img.shape[1] // self.downsample, img.shape[0] // self.downsample) + self.aspect = float(self.img_wh[0]) / self.img_wh[1] + + ## Skip + row_skip = self.dataset_cfg.train_row_skip + col_skip = self.dataset_cfg.train_col_skip + + poses_skipped = [] + image_paths_skipped = [] + + for row in range(self.dataset_cfg.num_rows): + for col in range(self.dataset_cfg.num_cols): + idx = row * self.dataset_cfg.num_cols + col + + if self.split == "train" and ( + (row % row_skip) != 0 or (col % col_skip) != 0 or (idx % self.val_skip) == 0 + ): + continue + + if (self.split == "val" or self.split == "test") and ( + ((row % row_skip) == 0 and (col % col_skip) == 0) and (idx % self.val_skip) != 0 + ): + continue + + poses_skipped.append(poses[idx]) + image_paths_skipped.append(self.image_paths[idx]) + + poses = np.stack(poses_skipped, axis=0) + self.poses = poses.reshape(-1, 3, 5) + self.image_paths = image_paths_skipped + + # Step 1: rescale focal length according to training resolution + H, W, self.focal = hwfc[:3, 0] + self.cx, self.cy = hwfc[-2:, 0] + + self.K = np.eye(3) + self.K[0, 0] = self.focal * self.img_wh[0] / W + self.K[0, 2] = self.cx * self.img_wh[0] / W + self.K[1, 1] = self.focal * self.img_wh[1] / H + self.K[1, 2] = self.cy * self.img_wh[1] / H + + # Step 2: correct poses, bounds + self.near = self.bounds.min() + self.far = self.bounds.max() + + # Step 3: Ray directions for all pixels + self.centered_pixels = True + self.directions = get_ray_directions_K( + self.img_wh[1], self.img_wh[0], self.K, centered_pixels=self.centered_pixels + ) diff --git a/datasets/spaces.py b/datasets/spaces.py new file mode 100644 index 0000000..e5d7237 --- /dev/null +++ b/datasets/spaces.py @@ -0,0 +1,193 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +import json +import os + +import numpy as np +import torch +from PIL import Image +from scipy.spatial.transform import Rotation + +from utils.pose_utils import ( + average_poses, + center_poses_with, + correct_poses_bounds, + create_rotating_spiral_poses, + create_spiral_poses, + interpolate_poses, +) +from utils.ray_utils import get_ndc_rays_fx_fy, get_ray_directions_K, get_rays + +from .base import Base5DDataset + + +class SpacesDataset(Base5DDataset): + def __init__(self, cfg, split="train", **kwargs): + super().__init__(cfg, split, **kwargs) + + def read_meta(self): + # Load meta + with self.pmgr.open(os.path.join(self.root_dir, "models.json"), "r") as f: + self.meta = json.load(f) + + # Train and test split paths + with self.pmgr.open(os.path.join(self.root_dir, "train_image.txt"), "r") as f: + self.train_images = f.readlines() + self.train_images = [os.path.join(self.root_dir, l.strip()) for l in self.train_images] + + with self.pmgr.open(os.path.join(self.root_dir, "val_image.txt"), "r") as f: + self.val_images = f.readlines() + self.val_images = [os.path.join(self.root_dir, l.strip()) for l in self.val_images] + + with self.pmgr.open(os.path.join(self.root_dir, "ref_image.txt"), "r") as f: + self.ref_image = os.path.join(self.root_dir, f.read().split(" ")[0].strip()) + + # Populate vars + self.image_paths = [] + self.intrinsics = [] + self.poses = [] + + for rig in self.meta: + for camera in rig: + image_path = os.path.join(self.root_dir, camera["relative_path"]) + + if image_path not in self.train_images and image_path not in self.val_images: + continue + + self.image_paths.append(image_path) + + width_factor = self.img_wh[0] / camera["width"] + height_factor = self.img_wh[1] / camera["height"] + + if camera["height"] != self.img_wh[1]: + print(camera["height"], camera["principal_point"][1]) + + pa = camera["pixel_aspect_ratio"] + K = np.eye(3) + K = np.array( + [ + [camera["focal_length"] * width_factor, 0.0, camera["principal_point"][0] * width_factor], + [ + 0.0, + pa * camera["focal_length"] * height_factor, + camera["principal_point"][1] * height_factor, + ], + [0.0, 0.0, 1.0], + ] + ) + + self.intrinsics.append(K) + + # Pose + R = Rotation.from_rotvec(camera["orientation"]).as_matrix() + T = np.array(camera["position"]) + + pose = np.eye(4) + pose[:3, :3] = R.T + pose[:3, -1] = T + + pose_pre = np.eye(4) + pose_pre[1, 1] *= -1 + pose_pre[2, 2] *= -1 + pose = pose_pre @ pose @ pose_pre + + self.poses.append(pose[:3, :4]) + + # Camera IDs & other + self.K = self.intrinsics[0] + self.ref_idx = self.image_paths.index(self.ref_image) + self.intrinsics = np.stack(self.intrinsics) + self.poses = np.stack(self.poses) + + self.camera_ids = np.linspace(0, len(self.image_paths) - 1, len(self.image_paths)) + self.total_num_views = len(self.image_paths) + + # Bounds + with self.pmgr.open(os.path.join(self.root_dir, "planes.txt"), "r") as f: + planes = [float(i) for i in f.read().strip().split(" ")] + + self.bounds = np.array([planes[0], planes[1]]) + + # Correct poses & bounds + poses = np.copy(self.poses) + + self.poses, self.poses_avg = center_poses_with(poses, poses[self.ref_idx : self.ref_idx + 1]) + + if not self.use_ndc: + self.poses, self.poses_avg, self.bounds = correct_poses_bounds(poses, self.bounds, flip=False, center=False) + + self.near = self.bounds.min() * 0.95 + self.far = self.bounds.max() * 1.05 + self.depth_range = np.array([self.near * 2.0, self.far]) + + # Holdout + val_indices = [i for i in range(len(self.image_paths)) if self.image_paths[i] in self.val_images] + train_indices = [i for i in range(len(self.image_paths)) if i not in val_indices] + + if self.val_all: + val_indices = [i for i in train_indices] # noqa + + if self.split == "val" or self.split == "test": + self.image_paths = [self.image_paths[i] for i in val_indices] + self.camera_ids = self.camera_ids[val_indices] + self.poses = self.poses[val_indices] + self.intrinsics = self.intrinsics[val_indices] + elif self.split == "train": + self.image_paths = [self.image_paths[i] for i in train_indices] + self.camera_ids = self.camera_ids[train_indices] + self.poses = self.poses[train_indices] + self.intrinsics = self.intrinsics[train_indices] + + def get_intrinsics(self): + return self.K + + def to_ndc(self, rays): + return get_ndc_rays_fx_fy(self.img_wh[1], self.img_wh[0], self.K[0, 0], self.K[1, 1], self.near, rays) + + def get_coords(self, idx): + if self.split != "train" or self.split == "render": + camera_id = 1 + else: + camera_id = self.camera_ids[idx] + + if self.split != "render": + K = torch.FloatTensor(self.intrinsics[idx]) + else: + K = torch.FloatTensor(self.intrinsics[0]) + + print(f"Loading image {idx}") + + # Get rays + directions = get_ray_directions_K(self.img_wh[1], self.img_wh[0], K, centered_pixels=True).view(-1, 3) + c2w = torch.FloatTensor(self.poses[idx]) + rays_o, rays_d = get_rays(directions, c2w) + rays = torch.cat([rays_o, rays_d], dim=-1) + + # Convert to NDC + if self.use_ndc: + rays = self.to_ndc(rays) + + # Add camera idx + rays = torch.cat([rays, torch.ones_like(rays[..., :1]) * camera_id], dim=-1) + return rays + + def get_rgb(self, idx): + # Colors + image_path = self.image_paths[idx] + + with self.pmgr.open(os.path.join(self.root_dir, "images", image_path), "rb") as im_file: + img = Image.open(im_file).convert("RGB") + + img = img.resize(self._img_wh, Image.LANCZOS) + + if self.img_wh[0] != self._img_wh[0] or self.img_wh[1] != self._img_wh[1]: + img = img.resize(self.img_wh, Image.BOX) + + img = self.transform(img) + img = img.view(3, -1).permute(1, 0) + + return img diff --git a/datasets/stanford.py b/datasets/stanford.py new file mode 100644 index 0000000..82bf106 --- /dev/null +++ b/datasets/stanford.py @@ -0,0 +1,464 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +import os + +import numpy as np +import torch +from PIL import Image + +from utils.pose_utils import correct_poses_bounds, create_spiral_poses, interpolate_poses +from utils.ray_utils import get_lightfield_rays, get_ndc_rays_fx_fy, get_ray_directions_K, get_rays + +from .lightfield import EPIDataset, LightfieldDataset +from .llff import LLFFDataset + + +class StanfordLightfieldDataset(LightfieldDataset): + def __init__(self, cfg, split="train", **kwargs): + self.use_file_coords = ( + cfg.dataset.lightfield.use_file_coords if "use_file_coords" in cfg.dataset.lightfield else False + ) + + super().__init__(cfg, split, **kwargs) + + if self.split == "train" and self.use_file_coords: + self.poses = [] + + for (s_idx, t_idx) in self.all_st_idx: + idx = t_idx * self.cols + s_idx + coord = self.normalize_coord(self.camera_coords[idx]) + self.poses.append(coord) + + def read_meta(self): + self.image_paths = sorted(self.pmgr.ls(self.root_dir)) + + if self.img_wh is None: + image_path = self.image_paths[0] + + with self.pmgr.open(os.path.join(self.root_dir, image_path), "rb") as im_file: + img = np.array(Image.open(im_file).convert("RGB")) + + self._img_wh = (img.shape[1] // self.downsample, img.shape[0] // self.downsample) + self.img_wh = (img.shape[1] // self.downsample, img.shape[0] // self.downsample) + self.aspect = float(self.img_wh[0]) / self.img_wh[1] + + self.camera_coords = [] + + if self.use_file_coords: + for image_path in self.image_paths: + if self.dataset_cfg.collection in ["beans", "knights", "tarot", "tarot_small"]: + yx = image_path.split("_")[-2:] + y = -float(yx[0]) + x = float(yx[1].split(".png")[0]) + else: + yx = image_path.split("_")[-3:-1] + y, x = float(yx[0]), float(yx[1]) + + self.camera_coords.append((x, y)) + + def get_camera_range(self): + xs = [coord[0] for coord in self.camera_coords] + ys = [coord[1] for coord in self.camera_coords] + + min_x, max_x = np.min(xs), np.max(xs) + min_y, max_y = np.min(ys), np.max(ys) + + return (min_x, max_x), (min_y, max_y) + + def get_camera_center(self): + idx = (self.rows // 2) * self.cols + self.cols // 2 + return self.camera_coords[idx] + + def normalize_coord(self, coord): + x_range, y_range = self.get_camera_range() + + # x_c, y_c = self.get_camera_center() + # norm_x = 2 * (coord[0] - x_c) / (x_range[1] - x_range[0]) + # norm_y = 2 * (coord[1] - y_c) / (x_range[1] - x_range[0]) + + aspect = (x_range[1] - x_range[0]) / (y_range[1] - y_range[0]) + norm_x = ((coord[0] - x_range[0]) / (x_range[1] - x_range[0])) * 2 - 1 + norm_y = (((coord[1] - y_range[0]) / (y_range[1] - y_range[0])) * 2 - 1) / aspect + + return (norm_x, norm_y) + + def get_coords(self, s_idx, t_idx): + if not self.use_file_coords: + return super().get_coords(s_idx, t_idx) + + idx = t_idx * self.cols + s_idx + coord = self.normalize_coord(self.camera_coords[idx]) + + if self.split == "render": + st_scale = self.vis_st_scale + else: + st_scale = self.st_scale + + return get_lightfield_rays( + self.img_wh[0], + self.img_wh[1], + coord[0], + coord[1], + self.aspect, + near=self.near_plane, + far=self.far_plane, + st_scale=st_scale, + uv_scale=self.uv_scale, + ) + + def get_rgb(self, s_idx, t_idx): + idx = t_idx * self.cols + s_idx + image_path = self.image_paths[idx] + + with self.pmgr.open(os.path.join(self.root_dir, image_path), "rb") as im_file: + img = Image.open(im_file).convert("RGB") + + img = img.resize(self._img_wh, Image.LANCZOS) + + if self.img_wh[0] != self._img_wh[0] or self.img_wh[1] != self._img_wh[1]: + img = img.resize(self.img_wh, Image.BOX) + + img = self.transform(img) + img = img.view(3, -1).permute(1, 0) + + return img + + +class StanfordEPIDataset(EPIDataset): + def __init__(self, cfg, split="train", **kwargs): + + super().__init__(cfg, split, **kwargs) + + def read_meta(self): + self.image_paths = sorted(self.pmgr.ls(self.root_dir)) + + if self.img_wh is None: + image_path = self.image_paths[0] + + with self.pmgr.open(os.path.join(self.root_dir, image_path), "rb") as im_file: + img = np.array(Image.open(im_file).convert("RGB")) + + self._img_wh = (img.shape[1] // self.downsample, img.shape[0] // self.downsample) + self.img_wh = (img.shape[1] // self.downsample, img.shape[0] // self.downsample) + self.aspect = float(self.img_wh[0]) / self.img_wh[1] + + def get_coords(self): + if self.dataset_cfg.collection in ["tarot_small", "tarot", "chess"]: + u = torch.linspace(-1, 1, self.img_wh[0], dtype=torch.float32) + s = torch.linspace(1, -1, self.img_wh[1] * self.supersample, dtype=torch.float32) * self.st_scale + else: + u = torch.linspace(-1, 1, self.img_wh[0], dtype=torch.float32) + s = torch.linspace(-1, 1, self.img_wh[1] * self.supersample, dtype=torch.float32) * self.st_scale + + su = list(torch.meshgrid([s, u])) + return torch.stack(su, -1).view(-1, 2) + + def get_rgb(self): + image_path = self.image_paths[0] + + with self.pmgr.open(os.path.join(self.root_dir, image_path), "rb") as im_file: + img = Image.open(im_file).convert("RGB") + + img = img.resize(self._img_wh, Image.LANCZOS) + + if self.img_wh[0] != self._img_wh[0] or self.img_wh[1] != self._img_wh[1]: + img = img.resize(self.img_wh, Image.BOX) + + img = self.transform(img) + img = img.view(3, -1).permute(1, 0) + + return img + + +class StanfordEPIDataset(EPIDataset): + def __init__(self, cfg, split="train", **kwargs): + + super().__init__(cfg, split, **kwargs) + + def read_meta(self): + self.image_paths = sorted(self.pmgr.ls(self.root_dir)) + + if self.img_wh is None: + image_path = self.image_paths[0] + + with self.pmgr.open(os.path.join(self.root_dir, image_path), "rb") as im_file: + img = np.array(Image.open(im_file).convert("RGB")) + + self._img_wh = (img.shape[1] // self.downsample, img.shape[0] // self.downsample) + self.img_wh = (img.shape[1] // self.downsample, img.shape[0] // self.downsample) + self.aspect = float(self.img_wh[0]) / self.img_wh[1] + + def get_coords(self): + if self.dataset_cfg.collection in ["tarot_small", "tarot", "chess"]: + u = torch.linspace(-1, 1, self.img_wh[0], dtype=torch.float32) + s = torch.linspace(1, -1, self.img_wh[1] * self.supersample, dtype=torch.float32) * self.st_scale + else: + u = torch.linspace(-1, 1, self.img_wh[0], dtype=torch.float32) + s = torch.linspace(-1, 1, self.img_wh[1] * self.supersample, dtype=torch.float32) * self.st_scale + + su = list(torch.meshgrid([s, u])) + return torch.stack(su, -1).view(-1, 2) + + def get_rgb(self): + image_path = self.image_paths[0] + + with self.pmgr.open(os.path.join(self.root_dir, image_path), "rb") as im_file: + img = Image.open(im_file).convert("RGB") + + img = img.resize(self._img_wh, Image.LANCZOS) + + if self.img_wh[0] != self._img_wh[0] or self.img_wh[1] != self._img_wh[1]: + img = img.resize(self.img_wh, Image.BOX) + + img = self.transform(img) + img = img.view(3, -1).permute(1, 0) + + return img + + +class StanfordLLFFDataset(LLFFDataset): + def __init__(self, cfg, split="train", **kwargs): + + # Scale of ST plane relative to UV plane + st_scale_dict = { + "tarot": 0.125, + "tarot_small": 0.125, + "knights": 0.125, + "bracelet": 0.125, + } + + if "st_scale" in cfg.dataset: + self.st_scale = cfg.dataset.st_scale + else: + self.st_scale = st_scale_dict.get(cfg.dataset.collection, 1.0) + + # Near, far plane locations + self.near_plane = cfg.dataset.near if "near" in cfg.dataset else -1.0 + self.far_plane = cfg.dataset.far if "far" in cfg.dataset else 0.0 + + super().__init__(cfg, split, **kwargs) + + def read_meta(self): + # Camera coords + self.image_paths = sorted(self.pmgr.ls(self.root_dir)) + + # Get width, height + if self.img_wh is None: + image_path = self.image_paths[0] + + with self.pmgr.open(os.path.join(self.root_dir, image_path), "rb") as im_file: + img = np.array(Image.open(im_file).convert("RGB")) + + self._img_wh = (img.shape[1] // self.downsample, img.shape[0] // self.downsample) + self.img_wh = (img.shape[1] // self.downsample, img.shape[0] // self.downsample) + self.aspect = float(self.img_wh[0]) / self.img_wh[1] + + # Get camera coords + self.camera_coords = [] + + for image_path in self.image_paths: + if self.dataset_cfg.collection in ["beans", "knights", "tarot", "tarot_small"]: + yx = image_path.split("_")[-2:] + y = -float(yx[0]) + x = float(yx[1].split(".png")[0]) + else: + yx = image_path.split("_")[-3:-1] + y, x = float(yx[0]), float(yx[1]) + + self.camera_coords.append((x, y)) + + self.camera_coords = np.array(self.camera_coords) + self.camera_min = np.min(self.camera_coords, axis=0) + self.camera_max = np.max(self.camera_coords, axis=0) + + self.camera_coords = (self.camera_coords - self.camera_min) / (self.camera_max - self.camera_min) * 2 - 1 + st_aspect = (self.camera_max[0] - self.camera_min[0]) / (self.camera_max[1] - self.camera_min[1]) + self.camera_coords[:, 1] /= st_aspect + self.camera_coords *= self.st_scale + + # Set up poses + self.poses = np.tile(np.eye(4, 4)[..., None], [1, 1, len(self.image_paths)]) + self.poses[:, 1:3, :] *= -1 + self.poses[:2, 3, :] = self.camera_coords.T + self.poses[2, 3, :] = self.near_plane + self.poses = self.poses.transpose(2, 0, 1) + self.poses = self.poses[:, :3, :4] + + # Set up intrinsics + focal = 1 + pixel_scale = self.img_wh[0] / 2 + + self.intrinsics = np.tile(np.eye(3)[..., None], [1, 1, len(self.image_paths)]) + self.intrinsics[0, 0, :] = focal * pixel_scale + self.intrinsics[1, 1, :] = focal * pixel_scale + self.intrinsics[0, 2, :] = self.camera_coords.T[0] * focal * pixel_scale + self.img_wh[0] / 2 + self.intrinsics[1, 2, :] = -self.camera_coords.T[1] * focal * pixel_scale + self.img_wh[1] / 2 + self.intrinsics = self.intrinsics.transpose(2, 0, 1) + + self.K = np.eye(3) + self.K[0, 0] = focal * pixel_scale + self.K[1, 1] = focal * pixel_scale + self.K[0, 2] = self.img_wh[0] / 2 + self.K[1, 2] = self.img_wh[1] / 2 + + ## Correct poses, bounds + self.bounds = np.array([0.25, 2.0]) + + if self.use_ndc: + self.poses, self.poses_avg, self.bounds = correct_poses_bounds( + np.copy(self.poses), np.copy(self.bounds), flip=False, center=True + ) + + self.near = self.bounds.min() * 0.95 + self.far = self.bounds.max() * 1.05 + self.depth_range = np.array([self.near * 2.0, self.far]) + + ## Holdout validation images + if self.val_set == "lightfield": + step = self.dataset_cfg.lightfield_step + rows = self.dataset_cfg.lightfield_rows + cols = self.dataset_cfg.lightfield_cols + val_indices = [] + + self.val_pairs = self.dataset_cfg.val_pairs if "val_pairs" in self.dataset_cfg else [] + self.val_all = (step == 1 and len(self.val_pairs) == 0) or self.val_all + + for row in range(rows): + for col in range(cols): + idx = row * rows + col + + if (row % step != 0 or col % step != 0 or ([row, col] in self.val_pairs)) and not self.val_all: + val_indices += [idx] + + elif len(self.val_set) > 0 or self.val_all: + val_indices = self.val_set + elif self.val_skip != "inf": + self.val_skip = min(len(self.image_paths), self.val_skip) + val_indices = list(range(0, len(self.image_paths), self.val_skip)) + else: + val_indices = [] + + train_indices = [i for i in range(len(self.image_paths)) if i not in val_indices] + + if self.val_all: + val_indices = [i for i in train_indices] # noqa + + if self.split == "val" or self.split == "test": + self.image_paths = [self.image_paths[i] for i in val_indices] + self.intrinsics = self.intrinsics[val_indices] + self.poses = self.poses[val_indices] + elif self.split == "train": + self.image_paths = [self.image_paths[i] for i in train_indices] + self.intrinsics = self.intrinsics[train_indices] + self.poses = self.poses[train_indices] + + def prepare_train_data(self, reset=False): + self.num_images = len(self.image_paths) + + ## Collect training data + self.all_coords = [] + self.all_rgb = [] + num_pixels = 0 + + for idx in range(len(self.image_paths)): + cur_coords = self.get_coords(idx) + cur_rgb = self.get_rgb(idx) + + # Coords + self.all_coords += [cur_coords] + + # Color + self.all_rgb += [cur_rgb] + + # Number of pixels + num_pixels += cur_rgb.shape[0] + + print("Full res images loaded:", num_pixels / (self.img_wh[0] * self.img_wh[1])) + + # Format / save loaded data + self.all_coords = torch.cat(self.all_coords, 0) + self.all_rgb = torch.cat(self.all_rgb, 0) + self.update_all_data() + + def update_all_data(self): + self.all_weights = self.get_weights() + + ## All inputs + self.all_inputs = torch.cat( + [ + self.all_coords, + self.all_rgb, + self.all_weights, + ], + -1, + ) + + def to_ndc(self, rays): + return get_ndc_rays_fx_fy(self.img_wh[1], self.img_wh[0], self.K[0, 0], self.K[1, 1], self.near, rays) + + def prepare_render_data(self): + if not self.render_interpolate: + close_depth, inf_depth = self.bounds.min() * 0.9, self.bounds.max() * 5.0 + + dt = 0.75 + mean_dz = 1.0 / (((1.0 - dt) / close_depth + dt / inf_depth)) + focus_depth = mean_dz + + radii = np.percentile(np.abs(self.poses[..., 3] - np.mean(self.poses[..., 3], axis=0)), 50, axis=0) + self.poses = create_spiral_poses(self.poses, radii, focus_depth * 4) + + self.poses = np.stack(self.poses, axis=0) + self.poses[..., :3, 3] = self.poses[..., :3, 3] - 0.1 * close_depth * self.poses[..., :3, 2] + else: + self.poses = interpolate_poses(self.poses, self.render_supersample) + + def get_coords(self, idx): + if self.split != "train" and not self.val_all: + cam_idx = 0 + else: + cam_idx = idx + + if self.split != "render": + K = torch.FloatTensor(self.intrinsics[idx]) + else: + K = torch.FloatTensor(np.copy(self.K)) + + c2w = torch.FloatTensor(self.poses[idx]) + + directions = get_ray_directions_K(self.img_wh[1], self.img_wh[0], K, centered_pixels=False, flipped=True) + + # Convert to world space / NDC + rays_o, rays_d = get_rays(directions, c2w) + + if self.use_ndc: + rays = self.to_ndc(torch.cat([rays_o, rays_d], dim=-1)) + else: + rays = torch.cat([rays_o, rays_d], dim=-1) + + # Add camera idx + rays = torch.cat([rays, torch.ones_like(rays[..., :1]) * cam_idx], dim=-1) + + # Return + return rays + + def get_rgb(self, idx): + image_path = self.image_paths[idx] + + with self.pmgr.open(os.path.join(self.root_dir, image_path), "rb") as im_file: + img = Image.open(im_file) + img = img.convert("RGB") + + if img.size[0] != self._img_wh[0] or img.size[1] != self._img_wh[1]: + img = img.resize(self._img_wh, Image.LANCZOS) + + if self.img_wh[0] != self._img_wh[0] or self.img_wh[1] != self._img_wh[1]: + img = img.resize(self.img_wh, Image.BOX) + + img = self.transform(img) + img = img.view(3, -1).permute(1, 0) + + return img diff --git a/datasets/technicolor.py b/datasets/technicolor.py new file mode 100644 index 0000000..6edc461 --- /dev/null +++ b/datasets/technicolor.py @@ -0,0 +1,504 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT + +import csv +import gc +import json +import os + +os.environ["OPENCV_IO_ENABLE_OPENEXR"] = "1" +import pdb +import random + +import cv2 +import numpy as np +import torch +from PIL import Image +from scipy.spatial.transform import Rotation + +from utils.pose_utils import ( + average_poses, + correct_poses_bounds, + create_rotating_spiral_poses, + create_spiral_poses, + interpolate_poses, +) +from utils.ray_utils import ( + get_ndc_rays_fx_fy, + get_pixels_for_image, + get_ray_directions_K, + get_rays, + sample_images_at_xy, +) + +from .base import Base5DDataset, Base6DDataset + + +class TechnicolorDataset(Base6DDataset): + def __init__(self, cfg, split="train", **kwargs): + self.use_reference = cfg.dataset.use_reference if "use_reference" in cfg.dataset else False + self.correct_poses = cfg.dataset.correct_poses if "correct_poses" in cfg.dataset else False + + self.use_ndc = cfg.dataset.use_ndc if "use_ndc" in cfg.dataset else False + + self.num_frames = cfg.dataset.num_frames if "num_frames" in cfg.dataset else 1 + self.start_frame = cfg.dataset.start_frame if "start_frame" in cfg.dataset else 1 + + self.keyframe_step = cfg.dataset.keyframe_step if "keyframe_step" in cfg.dataset else 1 + self.num_keyframes = ( + cfg.dataset.num_keyframes if "num_keyframes" in cfg.dataset else self.num_frames // self.keyframe_step + ) + + self.load_full_step = cfg.dataset.load_full_step if "load_full_step" in cfg.dataset else 1 + self.subsample_keyframe_step = ( + cfg.dataset.subsample_keyframe_step if "subsample_keyframe_step" in cfg.dataset else 1 + ) + self.subsample_keyframe_frac = ( + cfg.dataset.subsample_keyframe_frac if "subsample_keyframe_frac" in cfg.dataset else 1.0 + ) + self.subsample_frac = cfg.dataset.subsample_frac if "subsample_frac" in cfg.dataset else 1.0 + + self.keyframe_offset = 0 + self.frame_offset = 0 + + self.num_chunks = cfg.dataset.num_chunks + + super().__init__(cfg, split, **kwargs) + + def read_meta(self): + W, H = self.img_wh + + # Image paths + self.num_rows = self.dataset_cfg.lightfield_rows + self.num_cols = self.dataset_cfg.lightfield_cols + rows = self.num_rows + cols = self.num_cols + self.images_per_frame = rows * cols + + self.image_paths = sorted(self.pmgr.ls(os.path.join(self.root_dir, "images/")))[ + self.images_per_frame * self.start_frame : self.images_per_frame * (self.start_frame + self.num_frames) + ] + + self.num_frames = len(self.image_paths) // self.images_per_frame + + # Poses + self.intrinsics = [] + self.poses = [] + + with self.pmgr.open(os.path.join(self.root_dir, "cameras_parameters.txt"), "r") as f: + reader = csv.reader(f, delimiter=" ") + + for idx, row in enumerate(reader): + if idx == 0: + continue + + row = [float(c) for c in row if c.strip() != ""] + + # Intrinsics + K = np.eye(3) + K[0, 0] = row[0] * self.img_wh[0] / 2048 + K[0, 2] = row[1] * self.img_wh[0] / 2048 + K[1, 1] = row[3] * row[0] * self.img_wh[1] / 1088 + K[1, 2] = row[2] * self.img_wh[1] / 1088 + self.intrinsics.append(K) + + # Pose + R = Rotation.from_quat([row[6], row[7], row[8], row[5]]).as_matrix() + pose = np.eye(4) + pose[:3, :3] = R.T + pose[:3, -1] = -R.T @ np.array(row[-3:]).T + + pose_pre = np.eye(4) + pose_pre[1, 1] *= -1 + pose_pre[2, 2] *= -1 + + pose = pose_pre @ pose @ pose_pre + self.poses.append(pose[:3, :4]) + + self.intrinsics = np.stack([self.intrinsics for i in range(self.num_frames)]).reshape(-1, 3, 3) + self.poses = np.stack([self.poses for i in range(self.num_frames)]).reshape(-1, 3, 4) + self.K = self.intrinsics[0] + + # Times + self.times = np.tile(np.linspace(0, 1, self.num_frames)[..., None], (1, self.images_per_frame)) + self.times = self.times.reshape(-1) + + ## Bounds, common for all scenes + if self.dataset_cfg.collection in ["painter"]: + self.near = 1.75 + self.far = 10.0 + elif self.dataset_cfg.collection in ["trains"]: + self.near = 0.65 + self.far = 10.0 + elif self.dataset_cfg.collection in ["theater"]: + self.near = 0.65 + self.far = 10.0 + elif self.dataset_cfg.collection in ["fabien"]: + self.near = 0.35 + # self.near = 0.5 + # self.near = 0.45 + # self.near = 0.4 + self.far = 2.0 + elif self.dataset_cfg.collection in ["birthday"]: + self.near = 1.75 + self.far = 10.0 + + # Broken file + if len(self.image_paths) > 377: + self.image_paths[377] = self.image_paths[361] + self.poses[377] = self.poses[361] + self.intrinsics[377] = self.intrinsics[361] + self.times[377] = self.times[361] + else: + self.near = 0.65 + self.far = 10.0 + + self.bounds = np.array([self.near, self.far]) + + ## Correct poses, bounds + poses = np.copy(self.poses) + + if self.use_ndc or self.correct_poses: + self.poses, self.poses_avg, self.bounds = correct_poses_bounds(poses, self.bounds, flip=False, center=True) + + self.near = self.bounds.min() * 0.95 + self.far = self.bounds.max() * 1.05 + + ## Holdout validation images + if self.val_set == "lightfield": + step = self.dataset_cfg.lightfield_step + rows = self.dataset_cfg.lightfield_rows + cols = self.dataset_cfg.lightfield_cols + val_indices = [] + + self.val_pairs = self.dataset_cfg.val_pairs if "val_pairs" in self.dataset_cfg else [] + self.val_all = (step == 1 and len(self.val_pairs) == 0) or self.val_all + + for row in range(rows): + for col in range(cols): + idx = row * rows + col + + if (row % step != 0 or col % step != 0 or ([row, col] in self.val_pairs)) and not self.val_all: + val_indices += [frame * self.images_per_frame + idx for frame in range(self.num_frames)] + + elif len(self.val_set) > 0 or self.val_all: + val_indices = self.val_set + elif self.val_skip != "inf": + self.val_skip = min(len(self.image_paths), self.val_skip) + val_indices = list(range(0, len(self.image_paths), self.val_skip)) + else: + val_indices = [] + + train_indices = [i for i in range(len(self.image_paths)) if i not in val_indices] + + if self.val_all: + val_indices = [i for i in train_indices] # noqa + + if self.split == "val" or self.split == "test": + self.image_paths = [self.image_paths[i] for i in val_indices] + self.intrinsics = self.intrinsics[val_indices] + self.poses = self.poses[val_indices] + self.times = self.times[val_indices] + elif self.split == "train": + self.image_paths = [self.image_paths[i] for i in train_indices] + self.intrinsics = self.intrinsics[train_indices] + self.poses = self.poses[train_indices] + self.times = self.times[train_indices] + + def subsample(self, coords, rgb, frame): + if (frame % self.load_full_step) == 0: + return coords, rgb + elif (frame % self.subsample_keyframe_step) == 0: + subsample_every = int(np.round(1.0 / self.subsample_keyframe_frac)) + offset = self.keyframe_offset + self.keyframe_offset += 1 + # num_take = int(np.round(coords.shape[0] * self.subsample_keyframe_frac)) + # perm = torch.tensor( + # np.random.permutation(coords.shape[0]) + # )[:num_take] + else: + subsample_every = int(np.round(1.0 / self.subsample_frac)) + offset = self.frame_offset + self.frame_offset += 1 + # num_take = int(np.round(coords.shape[0] * self.subsample_frac)) + # perm = torch.tensor( + # np.random.permutation(coords.shape[0]) + # )[:num_take] + + # return coords[perm].view(-1, coords.shape[-1]), rgb[perm].view(-1, rgb.shape[-1]) + pixels = get_pixels_for_image(self.img_wh[1], self.img_wh[0]).reshape(-1, 2).long() + mask = ((pixels[..., 0] + pixels[..., 1] + offset) % subsample_every) == 0.0 + return coords[mask].view(-1, coords.shape[-1]), rgb[mask].view(-1, rgb.shape[-1]) + + def prepare_train_data(self, reset=False): + self.num_images = len(self.image_paths) + + # Shuffle the range + shuffled_range = random.sample(range(self.num_images), self.num_images) + + # Chunkify the shuffled range + chunk_size = (self.num_images + self.num_chunks - 1) // self.num_chunks + self.chunks = [shuffled_range[i : i + chunk_size] for i in range(0, self.num_images, chunk_size)] + self.chunk_num_pixels = [] + self.coords_chunk_paths = [] + self.rgb_chunk_paths = [] + cur_coords_chunk = [] + cur_rgb_chunk = [] + num_pixels = 0 + + for chunk_idx in range(len(self.chunks)): + coords_chunk_path = os.path.join(self.root_dir, "rays", f"coords_chunk_{chunk_idx}.pt") + rgb_chunk_path = os.path.join(self.root_dir, "rays", f"rgb_chunk_{chunk_idx}.pt") + if os.path.exists(coords_chunk_path) and os.path.exists(rgb_chunk_path): + self.coords_chunk_paths.append(coords_chunk_path) + self.rgb_chunk_paths.append(rgb_chunk_path) + print("Chunk %d loaded." % chunk_idx) + else: + image_indices = self.chunks[chunk_idx] + for idx in image_indices: + cur_coords = self.get_coords(idx) + cur_rgb = self.get_rgb(idx) + cur_frame = int(np.round(self.times[idx] * (self.num_frames - 1))) + + # Subsample + cur_coords, cur_rgb = self.subsample(cur_coords, cur_rgb, cur_frame) + + # Coords + cur_coords_chunk.append(cur_coords) + + # Color + cur_rgb_chunk.append(cur_rgb) + + # Number of pixels + num_pixels += cur_rgb.shape[0] + + # Format / save loaded data + coords_chunk = torch.cat(cur_coords_chunk, 0) + rgb_chunk = torch.cat(cur_rgb_chunk, 0) + + if not os.path.exists(os.path.dirname(coords_chunk_path)): + os.makedirs(os.path.dirname(coords_chunk_path)) + + torch.save(coords_chunk, coords_chunk_path) + + if not os.path.exists(os.path.dirname(rgb_chunk_path)): + os.makedirs(os.path.dirname(rgb_chunk_path)) + + torch.save(rgb_chunk, rgb_chunk_path) + + self.coords_chunk_paths.append(coords_chunk_path) + self.rgb_chunk_paths.append(rgb_chunk_path) + self.chunk_num_pixels.append(num_pixels) + print("Chunk %d saved: %d pixels." % (chunk_idx, num_pixels)) + + # Reset + cur_coords_chunk = [] + cur_rgb_chunk = [] + num_pixels = 0 + + # Format / save loaded data + self.all_coords = torch.load(self.coords_chunk_paths[0]) + self.all_rgb = torch.load(self.rgb_chunk_paths[0]) + self.current_chunk = 0 + self.update_all_data() + + def update_all_data(self): + self.all_weights = self.get_weights() + + ## All inputs + self.all_inputs = torch.cat( + [ + self.all_coords, + self.all_rgb, + self.all_weights, + ], + -1, + ) + + def shift_chunk(self): + + self.current_chunk = (self.current_chunk + 1) % len(self.coords_chunk_paths) + self.all_coords = torch.load(self.coords_chunk_paths[self.current_chunk]) + self.all_rgb = torch.load(self.rgb_chunk_paths[self.current_chunk]) + print("loading", self.coords_chunk_paths[self.current_chunk]) + + self.all_weights = self.get_weights() + + ## All inputs + self.all_inputs = torch.cat( + [ + self.all_coords, + self.all_rgb, + self.all_weights, + ], + -1, + ) + + return self.current_chunk + + def format_batch(self, batch): + batch["coords"] = batch["inputs"][..., : self.all_coords.shape[-1]] + batch["rgb"] = batch["inputs"][..., self.all_coords.shape[-1] : self.all_coords.shape[-1] + 3] + batch["weight"] = batch["inputs"][..., -1:] + del batch["inputs"] + + return batch + + def prepare_render_data(self): + # Get poses + if not self.render_interpolate: + close_depth, inf_depth = self.bounds.min() * 0.9, self.bounds.max() * 5.0 + + dt = 0.75 + mean_dz = 1.0 / (((1.0 - dt) / close_depth + dt / inf_depth)) + focus_depth = mean_dz + + poses_per_frame = self.poses.shape[0] // self.num_frames + poses_one_frame = self.poses[ + (self.num_frames // 2) * poses_per_frame : (self.num_frames // 2 + 1) * poses_per_frame + ] + poses_each_frame = interpolate_poses(self.poses[::poses_per_frame], self.render_supersample) + radii = np.percentile(np.abs(poses_one_frame[..., 3]), 60, axis=0) + radii[..., :2] *= 0.25 + + if self.num_frames > 1: + poses = create_spiral_poses( + poses_one_frame, + radii, + focus_depth * 100, + N=self.num_frames * self.render_supersample, + ) + + reference_pose = np.eye(4) + reference_pose[:3, :4] = self.poses[(self.num_frames // 2) * poses_per_frame] + reference_pose = np.linalg.inv(reference_pose) + + for pose_idx in range(len(poses)): + cur_pose = np.eye(4) + cur_pose[:3, :4] = poses[pose_idx] + poses[pose_idx] = poses_each_frame[pose_idx] @ (reference_pose @ cur_pose) + else: + poses = create_spiral_poses(poses_one_frame, radii, focus_depth * 100, N=120) + + self.poses = np.stack(poses, axis=0) + else: + self.poses = interpolate_poses(self.poses, self.render_supersample) + + # Get times + if (self.num_frames - 1) > 0: + self.times = np.linspace(0, self.num_frames - 1, len(self.poses)) + + if not self.render_interpolate_time: + self.times = np.round(self.times) + + self.times = self.times / (self.num_frames - 1) + else: + self.times = [0.0 for p in self.poses] + + # for i in range(100): + # self.poses[i] = self.poses[0] + + def to_ndc(self, rays): + return get_ndc_rays_fx_fy(self.img_wh[1], self.img_wh[0], self.K[0, 0], self.K[1, 1], self.near, rays) + + def get_coords(self, idx): + if self.split != "train" and not self.val_all: + cam_idx = 3 + else: + cam_idx = idx % self.images_per_frame + + if self.split != "render": + K = torch.FloatTensor(self.intrinsics[idx]) + else: + K = torch.FloatTensor(self.intrinsics[0]) + + c2w = torch.FloatTensor(self.poses[idx]) + + time = self.times[idx] + + print("Loading time:", np.round(time * (self.num_frames - 1))) + + directions = get_ray_directions_K(self.img_wh[1], self.img_wh[0], K, centered_pixels=True) + + # Convert to world space / NDC + rays_o, rays_d = get_rays(directions, c2w) + + if self.use_ndc: + rays = self.to_ndc(torch.cat([rays_o, rays_d], dim=-1)) + else: + rays = torch.cat([rays_o, rays_d], dim=-1) + + # Add camera idx + rays = torch.cat([rays, torch.ones_like(rays[..., :1]) * cam_idx], dim=-1) + + # Add times + rays = torch.cat([rays, torch.ones_like(rays[..., :1]) * time], dim=-1) + + # Return + return rays + + def get_rgb(self, idx): + image_path = self.image_paths[idx] + + with self.pmgr.open(os.path.join(self.root_dir, "images", image_path), "rb") as im_file: + img = Image.open(im_file) + img = img.convert("RGB") + + if img.size[0] != self._img_wh[0] or img.size[1] != self._img_wh[1]: + img = img.resize(self._img_wh, Image.LANCZOS) + + if self.img_wh[0] != self._img_wh[0] or self.img_wh[1] != self._img_wh[1]: + img = img.resize(self.img_wh, Image.BOX) + + img = self.transform(img) + img = img.view(3, -1).permute(1, 0) + + # img = img.view(4, -1).permute(1, 0) + # img = img[:, :3] * img[:, -1:] + (1 - img[:, -1:]) + + return img + + def get_intrinsics(self): + return self.intrinsics + + def __getitem__(self, idx): + if self.split == "render": + batch = { + "coords": self.get_coords(idx), + "pose": self.poses[idx], + "time": self.times[idx], + "idx": idx, + } + + batch["weight"] = torch.ones_like(batch["coords"][..., -1:]) + + elif self.split == "test": + batch = { + "coords": self.get_coords(idx), + "rgb": self.get_rgb(idx), + "idx": idx, + } + + batch["weight"] = torch.ones_like(batch["coords"][..., -1:]) + elif self.split == "val": + batch = { + "coords": self.get_coords(idx), + "rgb": self.get_rgb(idx), + "idx": idx, + } + + batch["weight"] = torch.ones_like(batch["coords"][..., -1:]) + else: + batch = { + "inputs": self.all_inputs[idx], + } + + W, H, batch = self.crop_batch(batch) + batch["W"] = W + batch["H"] = H + + return batch diff --git a/datasets/video3d_ground_truth.py b/datasets/video3d_ground_truth.py new file mode 100644 index 0000000..31d9b97 --- /dev/null +++ b/datasets/video3d_ground_truth.py @@ -0,0 +1,459 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT + +import json +import os + +os.environ["OPENCV_IO_ENABLE_OPENEXR"] = "1" +import cv2 +import numpy as np +import torch +from PIL import Image + +from utils.pose_utils import ( + average_poses, + correct_poses_bounds, + create_rotating_spiral_poses, + create_spiral_poses, + interpolate_poses, +) +from utils.ray_utils import ( + get_ndc_rays_fx_fy, + get_pixels_for_image, + get_ray_directions_K, + get_rays, + sample_images_at_xy, +) + +from .base import Base6DDataset + + +class Video3DTimeGroundTruthDataset(Base6DDataset): + def __init__(self, cfg, split="train", **kwargs): + self.use_reference = cfg.dataset.use_reference if "use_reference" in cfg.dataset else False + self.correct_poses = cfg.dataset.correct_poses if "correct_poses" in cfg.dataset else False + self.use_ndc = cfg.dataset.use_ndc if "use_ndc" in cfg.dataset else False + self.num_keyframes = cfg.dataset.num_keyframes if "num_keyframes" in cfg.dataset else -1 + + super().__init__(cfg, split, **kwargs) + + def read_meta(self): + W, H = self.img_wh + + self.frame_paths = sorted(self.pmgr.ls(os.path.join(self.root_dir))) + self.num_frames = len(self.frame_paths) + if self.num_keyframes == -1: + self.num_keyframes = self.num_frames + self.keyframe_step = self.num_frames // self.num_keyframes + + ## Image and pose paths + self.image_paths = [] + self.pose_paths = [] + self.depth_paths = [] + + for frame_path in self.frame_paths: + all_paths = sorted(self.pmgr.ls(os.path.join(self.root_dir, frame_path))) + image_paths = [p for p in all_paths if p.endswith(".png")] + pose_paths = [p for p in all_paths if p.endswith(".json")] + depth_paths = [p for p in all_paths if p.endswith("_depth")] + + image_paths = [os.path.join(frame_path, p) for p in image_paths] + pose_paths = [os.path.join(frame_path, p) for p in pose_paths] + depth_paths = [os.path.join(frame_path, p) for p in depth_paths] + + self.image_paths += image_paths + self.pose_paths += pose_paths + self.depth_paths += depth_paths + + ## Load poses + poses = [] + self.reference_matrix = [] + self.times = [] + self.frames = [] + + for i, pose_path in enumerate(self.pose_paths): + with self.pmgr.open(os.path.join(self.root_dir, pose_path), "r") as f: + meta = json.load(f) + + if "frame" in meta: + frame = meta["frame"] + else: + frame = int(pose_path.split("/")[-2].split("frame_")[-1]) + + # Intrinsics + if i == 0: + self.meta = meta + self.focal_x = self.meta["normalized_focal_length_x"] + self.focal_y = self.meta["normalized_focal_length_y"] + self.principal_point_x = self.meta["normalized_principal_point_x"] + self.principal_point_y = self.meta["normalized_principal_point_y"] + self.start_frame = frame + self.end_frame = self.start_frame + self.num_frames - 1 + + # Reference matrix + if self.use_reference: + self.reference_matrix.append(np.array(meta["world_to_camera"])[:3, :4]) + else: + self.reference_matrix = np.eye(4) + + # Reference matrix + if self.use_reference: + self.reference_matrix = average_poses(np.stack(self.reference_matrix, 0)) + + # Get all poses + for i, pose_path in enumerate(self.pose_paths): + with self.pmgr.open(os.path.join(self.root_dir, pose_path), "r") as f: + meta = json.load(f) + + if "frame" in meta: + frame = meta["frame"] + else: + frame = int(pose_path.split("/")[-2].split("frame_")[-1]) + + frame_matrix = np.array(meta["camera_to_world"]) + pose = (self.reference_matrix @ frame_matrix)[:3, :4] + poses += [pose] + + # Time + if self.num_frames - 1 > 0: + self.times.append((frame - self.start_frame) / (self.num_frames - 1)) + self.frames.append(frame - self.start_frame) + else: + self.times.append(0.0) + self.frames.append(0) + + poses = np.stack(poses, axis=0) + self.times = np.array(self.times) + + ## Intrinsics + self.K = np.eye(3) + self.K[0, 0] = self.focal_x * W + self.K[0, 2] = self.principal_point_x * W + self.K[1, 1] = self.focal_y * H + self.K[1, 2] = self.principal_point_x * H + + ## Bounds, common for all scenes + # self.near = meta['near_clip'] + # self.far = meta['far_clip'] + self.near = 0.25 + self.far = 10.0 + self.bounds = np.array([self.near, self.far]) + + ## Correct poses, bounds + if self.use_ndc or self.correct_poses: + self.poses, self.poses_avg, self.bounds = correct_poses_bounds(poses, self.bounds, flip=False, center=True) + else: + self.poses = poses + + self.near = self.bounds.min() * 0.95 + self.far = self.bounds.max() * 1.05 + + ## Ray directions for all pixels, same for all images (same H, W, focal) + self.centered_pixels = True + self.directions = get_ray_directions_K(H, W, self.K, centered_pixels=self.centered_pixels) + + ## Holdout validation images + if self.val_set == "lightfield": + step = self.dataset_cfg.lightfield_step + rows = self.dataset_cfg.lightfield_rows + cols = self.dataset_cfg.lightfield_cols + val_indices = [] + + self.val_pairs = self.dataset_cfg.val_pairs if "val_pairs" in self.dataset_cfg else [] + self.val_all = (step == 1 and len(self.val_pairs) == 0) or self.val_all + + for idx, path in enumerate(self.image_paths): + n = int(path.split("_")[-1].split(".")[0]) + row = n // cols + col = n % cols + + if row % step != 0 or col % step != 0 or ((row, col) in self.val_pairs): + val_indices.append(idx) + + elif len(self.val_set) > 0: + val_indices = self.val_set + elif self.val_skip != "inf": + self.val_skip = min(len(self.image_paths), self.val_skip) + val_indices = list(range(0, len(self.image_paths), self.val_skip)) + else: + val_indices = [] + + train_indices = [i for i in range(len(self.image_paths)) if i not in val_indices] + + if self.val_all: + val_indices = [i for i in train_indices] # noqa + + if self.split == "val" or self.split == "test": + self.image_paths = [self.image_paths[i] for i in val_indices] + self.depth_paths = [self.depth_paths[i] for i in val_indices] + self.poses = self.poses[val_indices] + self.times = self.times[val_indices] + self.frames = [self.frames[i] for i in val_indices] + elif self.split == "train": + self.image_paths = [self.image_paths[i] for i in train_indices] + self.depth_paths = [self.depth_paths[i] for i in train_indices] + self.poses = self.poses[train_indices] + self.times = self.times[train_indices] + self.frames = [self.frames[i] for i in train_indices] + + def prepare_train_data(self): + self.num_images = len(self.image_paths) + + ## Collect training data + self.all_coords = [] + self.all_rgb = [] + self.all_depth = [] + self.all_pixel_flow = [] + self.all_flow = [] + + for idx in range(len(self.image_paths)): + # for idx in range(75, 76): # TODO: Remove + # coords + self.all_coords += [self.get_coords(idx)] + + # Color + self.all_rgb += [self.get_rgb(idx)] + + # Depth + self.all_depth += [self.get_depth(idx)] + + # Flow + self.all_pixel_flow += [self.get_pixel_flow(idx)] + self.all_flow += [self.get_flow(idx)] + + # Format / save loaded data + self.update_all_data( + torch.cat(self.all_coords, 0), + torch.cat(self.all_rgb, 0), + torch.cat(self.all_depth, 0), + torch.cat(self.all_flow, 0), + ) + + def update_all_data(self, coords, rgb, depth, flow): + self.all_coords = coords + self.all_rgb = rgb + self.all_depth = depth + self.all_flow = flow + self.all_weights = self.get_weights() + + ## Patches + if self.use_patches or self.use_crop: + self._all_coords = torch.clone(self.all_coords) + self._all_rgb = torch.clone(self.all_rgb) + self._all_depth = torch.clone(self.all_depth) + self._all_flow = torch.clone(self.all_flow) + + ## All inputs + self.all_inputs = torch.cat( + [ + self.all_coords, + self.all_rgb, + self.all_depth, + self.all_flow, + self.all_weights, + ], + -1, + ) + + def format_batch(self, batch): + batch["coords"] = batch["inputs"][..., : self.all_coords.shape[-1]] + batch["rgb"] = batch["inputs"][..., self.all_coords.shape[-1] : self.all_coords.shape[-1] + 3] + batch["depth"] = batch["inputs"][..., self.all_coords.shape[-1] + 3 : self.all_coords.shape[-1] + 4] + batch["flow"] = batch["inputs"][..., self.all_coords.shape[-1] + 4 : self.all_coords.shape[-1] + 7] + batch["weight"] = batch["inputs"][..., -1:] + del batch["inputs"] + + return batch + + def prepare_render_data(self): + # Get poses + if not self.render_interpolate: + close_depth, inf_depth = self.bounds.min() * 0.9, self.bounds.max() * 5.0 + + dt = 0.75 + mean_dz = 1.0 / (((1.0 - dt) / close_depth + dt / inf_depth)) + focus_depth = mean_dz + + poses_per_frame = self.poses.shape[0] // self.num_frames + poses_one_frame = self.poses[ + (self.num_frames // 2) * poses_per_frame : (self.num_frames // 2 + 1) * poses_per_frame + ] + poses_each_frame = interpolate_poses(self.poses[::poses_per_frame], self.render_supersample) + radii = np.percentile(np.abs(poses_one_frame[..., 3]), 80, axis=0) + + if self.num_frames > 1: + poses = create_spiral_poses( + poses_one_frame, + radii, + focus_depth * 100, + N=self.num_frames * self.render_supersample, + ) + + reference_pose = np.eye(4) + reference_pose[:3, :4] = self.poses[(self.num_frames // 2) * poses_per_frame] + reference_pose = np.linalg.inv(reference_pose) + + for pose_idx in range(len(poses)): + cur_pose = np.eye(4) + cur_pose[:3, :4] = poses[pose_idx] + poses[pose_idx] = poses_each_frame[pose_idx] @ (reference_pose @ cur_pose) + else: + poses = create_spiral_poses(poses_one_frame, radii, focus_depth * 100, N=120) + + self.poses = np.stack(poses, axis=0) + else: + self.poses = interpolate_poses(self.poses, self.render_supersample) + + # Get times + if (self.num_frames - 1) > 0: + self.times = np.linspace(0, self.num_frames - 1, len(self.poses)) + + if not self.render_interpolate_time: + self.times = np.round(self.times) + + self.times = self.times / (self.num_frames - 1) + else: + self.times = [0.0 for p in self.poses] + + def to_ndc(self, rays): + return get_ndc_rays_fx_fy(self.img_wh[1], self.img_wh[0], self.K[0, 0], self.K[1, 1], self.near, rays) + + def get_coords(self, idx): + c2w = torch.FloatTensor(self.poses[idx]) + time = self.times[idx] + print("Loading time:", np.round(time * (self.num_frames - 1))) + rays_o, rays_d = get_rays(self.directions, c2w) + + if self.use_ndc: + rays = self.to_ndc(torch.cat([rays_o, rays_d], dim=-1)) + else: + rays = torch.cat([rays_o, rays_d], dim=-1) + + rays = torch.cat([rays, torch.ones_like(rays[..., :1]) * time], dim=-1) + return rays + + def get_rgb(self, idx): + image_path = self.image_paths[idx] + + with self.pmgr.open(os.path.join(self.root_dir, image_path), "rb") as im_file: + img = Image.open(im_file).convert("RGBA") + + img = img.resize(self._img_wh, Image.LANCZOS) + + if self.img_wh[0] != self._img_wh[0] or self.img_wh[1] != self._img_wh[1]: + img = img.resize(self.img_wh, Image.BOX) + + img = self.transform(img) + img = img.view(4, -1).permute(1, 0) + img = img[:, :3] * img[:, -1:] + (1 - img[:, -1:]) + + return img + + def load_geometry(self, idx, prefix="depth", mode="exr"): + gt_path = os.path.join(self.root_dir, self.depth_paths[idx].replace("depth", prefix)) + gt_image_path = [p for p in self.pmgr.ls(gt_path) if p.endswith(mode)][0] + + depth_file = os.path.join(gt_path, gt_image_path) + + if mode == "exr": + img = cv2.imread(depth_file, cv2.IMREAD_ANYCOLOR | cv2.IMREAD_ANYDEPTH) + else: + img = np.load(depth_file) + + # Resize + img = cv2.resize(img, self._img_wh, interpolation=cv2.INTER_NEAREST) + + if self.img_wh[0] != self._img_wh[0] or self.img_wh[1] != self._img_wh[1]: + img = cv2.resize(img, self.img_wh, interpolation=cv2.INTER_NEAREST) + + # Transform + img = self.transform(np.copy(img)) + return img.view(img.shape[0], -1).permute(1, 0) + + def get_depth(self, idx, return_mask=False): + depth = self.load_geometry(idx, "depth")[..., 0:1] + + directions = torch.nn.functional.normalize(self.directions, p=2.0, dim=-1).view(-1, 3) + depth = depth / torch.abs(directions[..., 2:3]) + + mask = (depth < self.near) | (depth > self.far) + depth[depth < self.near] = self.near + depth[depth > self.far] = self.far + + if return_mask: + return depth, mask + else: + return depth + + def get_pixel_flow(self, idx): + # Pixel flow + pixel_flow = self.load_geometry(idx, "vector")[..., 1:3] + pixel_flow = torch.flip(pixel_flow, [1]) + pixel_flow[..., 1] *= -1 + pixel_flow = pixel_flow * self.img_wh[0] / 800 + + return pixel_flow + + def get_uv(self, idx): + # Pixel flow + uv = self.load_geometry(idx, "uv")[..., 1:3] + uv = torch.flip(uv[1:3], [-1]) + + return uv + + def get_flow(self, idx): + # Flow + return self.load_geometry(idx, "vector", mode="npy") + + def get_intrinsics(self): + K = np.eye(3) + K[0, 0] = self.focal_x * self.img_wh[0] + K[0, 2] = self.principal_point_x * self.img_wh[0] + K[1, 1] = self.focal_y * self.img_wh[1] + K[1, 2] = self.principal_point_x * self.img_wh[1] + + return K + + def __getitem__(self, idx): + if self.split == "render": + batch = { + "coords": self.get_coords(idx), + "pose": self.poses[idx], + "time": self.times[idx], + "idx": idx, + } + + batch["weight"] = torch.ones_like(batch["coords"][..., -1:]) + + elif self.split == "test": + batch = { + "coords": self.get_coords(idx), + "rgb": self.get_rgb(idx), + "idx": idx, + } + + batch["weight"] = torch.ones_like(batch["coords"][..., -1:]) + elif self.split == "val": + batch = { + "coords": self.get_coords(idx + 75), + "rgb": self.get_rgb(idx + 75), + "depth": self.get_depth(idx + 75), + "flow": self.get_flow(idx + 75), + "idx": idx, + } + + batch["weight"] = torch.ones_like(batch["coords"][..., -1:]) + else: + batch = { + "inputs": self.all_inputs[idx], + } + + W, H, batch = self.crop_batch(batch) + batch["W"] = W + batch["H"] = H + + return batch diff --git a/datasets/video3d_static.py b/datasets/video3d_static.py new file mode 100644 index 0000000..32ce38b --- /dev/null +++ b/datasets/video3d_static.py @@ -0,0 +1,217 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT + +import json +import os + +os.environ["OPENCV_IO_ENABLE_OPENEXR"] = "1" +import cv2 +import numpy as np +import torch +from PIL import Image + +from utils.pose_utils import ( + average_poses, + correct_poses_bounds, + create_rotating_spiral_poses, + create_spiral_poses, + interpolate_poses, +) +from utils.ray_utils import ( + get_ndc_rays_fx_fy, + get_pixels_for_image, + get_ray_directions_K, + get_rays, + sample_images_at_xy, +) + +from .base import Base5DDataset + + +class Video3DDataset(Base5DDataset): + def __init__(self, cfg, split="train", **kwargs): + self.use_reference = cfg.dataset.use_reference if "use_reference" in cfg.dataset else False + self.correct_poses = cfg.dataset.correct_poses if "correct_poses" in cfg.dataset else False + self.use_ndc = cfg.dataset.use_ndc if "use_ndc" in cfg.dataset else False + + super().__init__(cfg, split, **kwargs) + + def read_meta(self): + W, H = self.img_wh + + ## Image paths + self.image_paths = sorted(self.pmgr.ls(os.path.join(self.root_dir, "images"))) + + ## Load poses + self.pose_paths = sorted(self.pmgr.ls(os.path.join(self.root_dir, "cameras"))) + + poses = [] + + for i, pose_path in enumerate(self.pose_paths): + with self.pmgr.open(os.path.join(self.root_dir, "cameras", pose_path), "r") as f: + meta = json.load(f) + + if i == 0: + self.meta = meta + self.focal_x = self.meta["normalized_focal_length_x"] + self.focal_y = self.meta["normalized_focal_length_y"] + self.principal_point_x = self.meta["normalized_principal_point_x"] + self.principal_point_y = self.meta["normalized_principal_point_y"] + + # Correct pose + if "reference_world_to_camera" in meta and self.use_reference: + self.reference_matrix = np.array(meta["reference_world_to_camera"]) + else: + self.reference_matrix = np.eye(4) + + frame_matrix = np.array(meta["camera_to_world"]) + pose = (self.reference_matrix @ frame_matrix)[:3, :4] + poses += [pose] + + poses = np.stack(poses, axis=0) + + ## Intrinsics + self.K = np.eye(3) + self.K[0, 0] = self.focal_x * W + self.K[0, 2] = self.principal_point_x * W + self.K[1, 1] = self.focal_y * H + self.K[1, 2] = self.principal_point_x * H + + ## Bounds, common for all scenes + self.near = 0.75 + self.far = 4.0 + self.bounds = np.array([self.near, self.far]) + + ## Correct poses, bounds + if self.use_ndc or self.correct_poses: + self.poses, self.poses_avg, self.bounds = correct_poses_bounds(poses, self.bounds, flip=False, center=True) + else: + self.poses = poses + + self.near = self.bounds.min() * 0.95 + self.far = self.bounds.max() * 1.05 + + ## Ray directions for all pixels, same for all images (same H, W, focal) + self.centered_pixels = True + self.directions = get_ray_directions_K(H, W, self.K, centered_pixels=self.centered_pixels) + + ## Holdout validation images + if self.val_set == "lightfield": + step = self.dataset_cfg.lightfield_step + cols = self.dataset_cfg.lightfield_cols + val_indices = [] + + self.val_pairs = self.dataset_cfg.val_pairs if "val_pairs" in self.dataset_cfg else [] + self.val_all = (step == 1 and len(self.val_pairs) == 0) or self.val_all + + for idx, path in enumerate(self.image_paths): + n = int(path.split("_")[-1].split(".")[0]) + row = n // cols + col = n % cols + + if row % step != 0 or col % step != 0 or ((row, col) in self.val_pairs): + val_indices.append(idx) + + elif len(self.val_set) > 0: + val_indices = self.val_set + elif self.val_skip != "inf": + self.val_skip = min(len(self.image_paths), self.val_skip) + val_indices = list(range(0, len(self.image_paths), self.val_skip)) + else: + val_indices = [] + + train_indices = [i for i in range(len(self.image_paths)) if i not in val_indices] + + if self.val_all: + val_indices = [i for i in train_indices] # noqa + + if self.split == "val" or self.split == "test": + self.image_paths = [self.image_paths[i] for i in val_indices] + self.poses = self.poses[val_indices] + elif self.split == "train": + self.image_paths = [self.image_paths[i] for i in train_indices] + self.poses = self.poses[train_indices] + + def prepare_render_data(self): + if not self.render_interpolate: + close_depth, inf_depth = self.bounds.min() * 0.9, self.bounds.max() * 5.0 + + dt = 0.75 + mean_dz = 1.0 / (((1.0 - dt) / close_depth + dt / inf_depth)) + focus_depth = mean_dz + + radii = np.percentile(np.abs(self.poses[..., 3]), 50, axis=0) + camera_radius = 0.35 + + # self.poses = create_rotating_spiral_poses( + # [0.0, -0.2, 0.0], + # self.poses, + # camera_radius, + # [0.0, radii[1], camera_radius * 0.25], + # focus_depth * 100, + # [-1.0, 1.0], + # N=360 + # ) + self.poses = create_rotating_spiral_poses( + [0.0, 0.0, 0.0], + self.poses, + camera_radius, + [0.0, radii[1], camera_radius * 0.25], + focus_depth * 100, + [-1.0, 1.0], + N=360, + ) + # self.poses = create_rotating_spiral_poses( + # [0.0, 0.0, 0.35], + # self.poses, + # camera_radius, + # [0.0, radii[1], camera_radius * 0.25], + # focus_depth * 100, + # [-0.2, 0.2] + # ) + + self.poses = np.stack(self.poses, axis=0) + else: + self.poses = interpolate_poses(self.poses, self.render_supersample) + + def to_ndc(self, rays): + return get_ndc_rays_fx_fy(self.img_wh[1], self.img_wh[0], self.K[0, 0], self.K[1, 1], self.near, rays) + + def get_coords(self, idx): + c2w = torch.FloatTensor(self.poses[idx]) + rays_o, rays_d = get_rays(self.directions, c2w) + + if self.use_ndc: + return self.to_ndc(torch.cat([rays_o, rays_d], dim=-1)) + else: + return torch.cat([rays_o, rays_d], dim=-1) + + def get_rgb(self, idx): + image_path = self.image_paths[idx] + + with self.pmgr.open(os.path.join(self.root_dir, "images", image_path), "rb") as im_file: + img = Image.open(im_file).convert("RGBA") + + img = img.resize(self._img_wh, Image.LANCZOS) + + if self.img_wh[0] != self._img_wh[0] or self.img_wh[1] != self._img_wh[1]: + img = img.resize(self.img_wh, Image.BOX) + + img = self.transform(img) + img = img.view(4, -1).permute(1, 0) + img = img[:, :3] * img[:, -1:] + (1 - img[:, -1:]) + + return img + + def get_intrinsics(self): + K = np.eye(3) + K[0, 0] = self.focal_x * self.img_wh[0] + K[0, 2] = self.principal_point_x * self.img_wh[0] + K[1, 1] = self.focal_y * self.img_wh[1] + K[1, 2] = self.principal_point_x * self.img_wh[1] + + return K diff --git a/datasets/video3d_time.py b/datasets/video3d_time.py new file mode 100644 index 0000000..c4462c3 --- /dev/null +++ b/datasets/video3d_time.py @@ -0,0 +1,428 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT + +import json +import os + +os.environ["OPENCV_IO_ENABLE_OPENEXR"] = "1" +import cv2 +import numpy as np +import torch +from PIL import Image + +from utils.pose_utils import ( + average_poses, + correct_poses_bounds, + create_rotating_spiral_poses, + create_spiral_poses, + interpolate_poses, +) +from utils.ray_utils import ( + get_ndc_rays_fx_fy, + get_pixels_for_image, + get_ray_directions_K, + get_rays, + sample_images_at_xy, +) + +from .base import Base6DDataset + + +class Video3DTimeDataset(Base6DDataset): + def __init__(self, cfg, split="train", **kwargs): + self.use_reference = cfg.dataset.use_reference if "use_reference" in cfg.dataset else False + self.correct_poses = cfg.dataset.correct_poses if "correct_poses" in cfg.dataset else False + + self.use_ndc = cfg.dataset.use_ndc if "use_ndc" in cfg.dataset else False + + self.num_frames = cfg.dataset.num_frames if "num_frames" in cfg.dataset else 1 + self.start_frame = cfg.dataset.start_frame if "start_frame" in cfg.dataset else 1 + self.keyframe_step = cfg.dataset.keyframe_step if "keyframe_step" in cfg.dataset else 1 + self.num_keyframes = ( + cfg.dataset.num_keyframes if "num_keyframes" in cfg.dataset else self.num_frames // self.keyframe_step + ) + + self.load_full_step = cfg.dataset.load_full_step if "load_full_step" in cfg.dataset else 1 + self.subsample_keyframe_step = ( + cfg.dataset.subsample_keyframe_step if "subsample_keyframe_step" in cfg.dataset else 1 + ) + self.subsample_keyframe_frac = ( + cfg.dataset.subsample_keyframe_frac if "subsample_keyframe_frac" in cfg.dataset else 1.0 + ) + self.subsample_frac = cfg.dataset.subsample_frac if "subsample_frac" in cfg.dataset else 1.0 + + self.keyframe_offset = 0 + self.frame_offset = 0 + + super().__init__(cfg, split, **kwargs) + + def read_meta(self): + W, H = self.img_wh + + # Image paths + self.num_rows = self.dataset_cfg.lightfield_rows + self.num_cols = self.dataset_cfg.lightfield_cols + + rows = self.num_rows + cols = self.num_cols + + self.images_per_frame = rows * cols + self.total_num_views = rows * cols + + # Video paths + self.frame_paths = sorted(self.pmgr.ls(os.path.join(self.root_dir)))[ + self.start_frame : self.start_frame + self.num_frames + ] + + # Image and pose paths + self.image_paths = [] + self.pose_paths = [] + + for frame_path in self.frame_paths: + all_paths = sorted(self.pmgr.ls(os.path.join(self.root_dir, frame_path))) + + self.image_paths += [os.path.join(self.root_dir, frame_path, p) for p in all_paths if p.endswith(".png")] + self.pose_paths += [os.path.join(self.root_dir, frame_path, p) for p in all_paths if p.endswith(".json")] + + # Set up poses, times, frames + self.poses = [] + self.times = [] + self.frames = [] + self.intrinsics = [] + + for i, pose_path in enumerate(self.pose_paths): + with self.pmgr.open(pose_path, "r") as f: + meta = json.load(f) + + if "frame" in meta: + frame = meta["frame"] + else: + frame = int(pose_path.split("/")[-2].split("frame_")[-1]) + + # Intrinsics + if i == 0: + self.global_meta = meta + self.start_frame = frame + self.end_frame = self.start_frame + self.num_frames - 1 + + # Intrinsics + focal_x = meta["normalized_focal_length_x"] + focal_y = meta["normalized_focal_length_y"] + principal_point_x = meta["normalized_principal_point_x"] + principal_point_y = meta["normalized_principal_point_y"] + + K = np.eye(3) + K[0, 0] = focal_x * W + K[0, 2] = principal_point_x * W + K[1, 1] = focal_y * H + K[1, 2] = principal_point_y * H + self.intrinsics.append(K) + + # Get current pose + frame_matrix = np.array(meta["camera_to_world"]) + pose = frame_matrix[:3, :4] + self.poses += [pose] + + # Time + self.times.append((frame - self.start_frame) / (self.num_frames - 1)) + self.frames.append(frame - self.start_frame) + + self.poses = np.stack(self.poses, axis=0) + self.intrinsics = np.stack(self.intrinsics, axis=0) + self.times = np.array(self.times) + self.frames = np.array(self.frames) + self.K = self.intrinsics[0] + + ## Bounds, common for all scenes + self.near = self.global_meta["near_clip"] + self.far = self.global_meta["far_clip"] + self.bounds = np.array([self.near, self.far]) + + ## Correct poses, bounds + if self.use_ndc or self.correct_poses: + self.poses, self.poses_avg, self.bounds = correct_poses_bounds( + np.copy(self.poses), self.bounds, flip=False, center=True + ) + + self.near = self.bounds.min() * 0.95 + self.far = self.bounds.max() * 1.05 + self.depth_range = np.array([2 * self.near, self.far]) + + ## Holdout validation images + if self.val_set == "lightfield": + step = self.dataset_cfg.lightfield_step + rows = self.dataset_cfg.lightfield_rows + cols = self.dataset_cfg.lightfield_cols + val_indices = [] + + self.val_pairs = self.dataset_cfg.val_pairs if "val_pairs" in self.dataset_cfg else [] + self.val_all = (step == 1 and len(self.val_pairs) == 0) or self.val_all + + for row in range(rows): + for col in range(cols): + idx = row * rows + col + + if row % step != 0 or col % step != 0 or ([row, col] in self.val_pairs): + val_indices += [frame * self.images_per_frame + idx for frame in range(self.num_frames)] + + if self.val_num > 0: + val_indices = val_indices[: self.val_num] + + elif len(self.val_set) > 0: + val_indices = self.val_set + elif self.val_skip != "inf": + self.val_skip = min(len(self.image_paths), self.val_skip) + val_indices = list(range(0, len(self.image_paths), self.val_skip)) + else: + val_indices = [] + + train_indices = [i for i in range(len(self.image_paths)) if i not in val_indices] + + if self.val_all: + val_indices = [i for i in train_indices] # noqa + + if self.split == "val" or self.split == "test": + self.image_paths = [self.image_paths[i] for i in val_indices] + self.poses = self.poses[val_indices] + self.intrinsics = self.intrinsics[val_indices] + self.frames = self.frames[val_indices] + self.times = self.times[val_indices] + elif self.split == "train": + self.image_paths = [self.image_paths[i] for i in train_indices] + self.poses = self.poses[train_indices] + self.intrinsics = self.intrinsics[train_indices] + self.frames = self.frames[train_indices] + self.times = self.times[train_indices] + + def subsample(self, coords, rgb, frame): + if (frame % self.load_full_step) == 0: + return coords, rgb + elif (frame % self.subsample_keyframe_step) == 0: + subsample_every = int(np.round(1.0 / self.subsample_keyframe_frac)) + offset = self.keyframe_offset + self.keyframe_offset += 1 + # num_take = int(np.round(coords.shape[0] * self.subsample_keyframe_frac)) + # perm = torch.tensor( + # np.random.permutation(coords.shape[0]) + # )[:num_take] + else: + subsample_every = int(np.round(1.0 / self.subsample_frac)) + offset = self.frame_offset + self.frame_offset += 1 + # num_take = int(np.round(coords.shape[0] * self.subsample_frac)) + # perm = torch.tensor( + # np.random.permutation(coords.shape[0]) + # )[:num_take] + + # return coords[perm].view(-1, coords.shape[-1]), rgb[perm].view(-1, rgb.shape[-1]) + pixels = get_pixels_for_image(self.img_wh[1], self.img_wh[0]).reshape(-1, 2).long() + mask = ((pixels[..., 0] + pixels[..., 1] + offset) % subsample_every) == 0.0 + return coords[mask].view(-1, coords.shape[-1]), rgb[mask].view(-1, rgb.shape[-1]) + + def prepare_train_data(self, reset=False): + self.num_images = len(self.image_paths) + + ## Collect training data + self.all_coords = [] + self.all_rgb = [] + num_pixels = 0 + + for idx in range(len(self.image_paths)): + # for idx in range(1): + cur_coords = self.get_coords(idx) + cur_rgb = self.get_rgb(idx) + cur_frame = int(np.round(self.times[idx] * (self.num_frames - 1))) + + # Subsample + cur_coords, cur_rgb = self.subsample(cur_coords, cur_rgb, cur_frame) + + # Coords + self.all_coords += [cur_coords] + + # Color + self.all_rgb += [cur_rgb] + + # Number of pixels + num_pixels += cur_rgb.shape[0] + + print("Full res images loaded:", num_pixels / (self.img_wh[0] * self.img_wh[1])) + + # Format / save loaded data + self.all_coords = torch.cat(self.all_coords, 0) + self.all_rgb = torch.cat(self.all_rgb, 0) + self.update_all_data() + + def update_all_data(self): + self.all_weights = self.get_weights() + + ## All inputs + self.all_inputs = torch.cat( + [ + self.all_coords, + self.all_rgb, + self.all_weights, + ], + -1, + ) + + def format_batch(self, batch): + batch["coords"] = batch["inputs"][..., : self.all_coords.shape[-1]] + batch["rgb"] = batch["inputs"][..., self.all_coords.shape[-1] : self.all_coords.shape[-1] + 3] + batch["weight"] = batch["inputs"][..., -1:] + del batch["inputs"] + + return batch + + def prepare_render_data(self): + # Get poses + if not self.render_interpolate: + close_depth, inf_depth = self.bounds.min() * 0.9, self.bounds.max() * 5.0 + + dt = 0.75 + mean_dz = 1.0 / (((1.0 - dt) / close_depth + dt / inf_depth)) + focus_depth = mean_dz + + poses_per_frame = self.poses.shape[0] // self.num_frames + poses_one_frame = self.poses[ + (self.num_frames // 2) * poses_per_frame : (self.num_frames // 2 + 1) * poses_per_frame + ] + poses_each_frame = interpolate_poses(self.poses[::poses_per_frame], self.render_supersample) + radii = np.percentile(np.abs(poses_one_frame[..., 3]), 60, axis=0) + + if self.num_frames > 1: + poses = create_spiral_poses( + poses_one_frame, + radii, + focus_depth * 100, + N=self.num_frames * self.render_supersample, + ) + + reference_pose = np.eye(4) + reference_pose[:3, :4] = self.poses[(self.num_frames // 2) * poses_per_frame] + reference_pose = np.linalg.inv(reference_pose) + + for pose_idx in range(len(poses)): + cur_pose = np.eye(4) + cur_pose[:3, :4] = poses[pose_idx] + poses[pose_idx] = poses_each_frame[pose_idx] @ (reference_pose @ cur_pose) + else: + poses = create_spiral_poses(poses_one_frame, radii, focus_depth * 100, N=120) + + self.poses = np.stack(poses, axis=0) + else: + self.poses = interpolate_poses(self.poses, self.render_supersample) + + # Get times + if (self.num_frames - 1) > 0: + self.times = np.linspace(0, self.num_frames - 1, len(self.poses)) + + if not self.render_interpolate_time: + self.times = np.round(self.times) + + self.times = self.times / (self.num_frames - 1) + else: + self.times = [0.0 for p in self.poses] + + def to_ndc(self, rays): + return get_ndc_rays_fx_fy(self.img_wh[1], self.img_wh[0], self.K[0, 0], self.K[1, 1], self.near, rays) + + def get_coords(self, idx): + if self.split != "train" and not self.val_all: + cam_idx = 3 + else: + cam_idx = idx % self.images_per_frame + + if self.split != "render": + K = torch.FloatTensor(self.intrinsics[idx]) + else: + K = torch.FloatTensor(self.intrinsics[0]) + + c2w = torch.FloatTensor(self.poses[idx]) + time = self.times[idx] + + print("Loading time:", np.round(time * (self.num_frames - 1))) + + directions = get_ray_directions_K(self.img_wh[1], self.img_wh[0], K, centered_pixels=True) + + # Convert to world space / NDC + rays_o, rays_d = get_rays(directions, c2w) + + if self.use_ndc: + rays = self.to_ndc(torch.cat([rays_o, rays_d], dim=-1)) + else: + rays = torch.cat([rays_o, rays_d], dim=-1) + + # Add camera idx + rays = torch.cat([rays, torch.ones_like(rays[..., :1]) * cam_idx], dim=-1) + + # Add times + rays = torch.cat([rays, torch.ones_like(rays[..., :1]) * time], dim=-1) + + # Return + return rays + + def get_rgb(self, idx): + image_path = self.image_paths[idx] + + with self.pmgr.open(os.path.join(self.root_dir, "images", image_path), "rb") as im_file: + img = Image.open(im_file) + img = img.convert("RGB") + + if img.size[0] != self._img_wh[0] or img.size[1] != self._img_wh[1]: + img = img.resize(self._img_wh, Image.LANCZOS) + + if self.img_wh[0] != self._img_wh[0] or self.img_wh[1] != self._img_wh[1]: + img = img.resize(self.img_wh, Image.BOX) + + img = self.transform(img) + img = img.view(3, -1).permute(1, 0) + + # img = img.view(4, -1).permute(1, 0) + # img = img[:, :3] * img[:, -1:] + (1 - img[:, -1:]) + + return img + + def get_intrinsics(self): + return self.K + + def __getitem__(self, idx): + if self.split == "render": + batch = { + "coords": self.get_coords(idx), + "pose": self.poses[idx], + "time": self.times[idx], + "idx": idx, + } + + batch["weight"] = torch.ones_like(batch["coords"][..., -1:]) + + elif self.split == "test": + batch = { + "coords": self.get_coords(idx), + "rgb": self.get_rgb(idx), + "idx": idx, + } + + batch["weight"] = torch.ones_like(batch["coords"][..., -1:]) + elif self.split == "val": + batch = { + "coords": self.get_coords(idx), + "rgb": self.get_rgb(idx), + "idx": idx, + } + + batch["weight"] = torch.ones_like(batch["coords"][..., -1:]) + else: + batch = { + "inputs": self.all_inputs[idx], + } + + W, H, batch = self.crop_batch(batch) + batch["W"] = W + batch["H"] = H + + return batch diff --git a/demo_tracking.py b/demo_tracking.py new file mode 100644 index 0000000..bc2a9d8 --- /dev/null +++ b/demo_tracking.py @@ -0,0 +1,155 @@ +#!/usr/bin/env python +# Copyright (c) 2024 Mitsubishi Electric Research Laboratories (MERL) +# +# SPDX-License-Identifier: AGPL-3.0-or-later + +import os +import pdb + +import cv2 +import matplotlib.pyplot as plt +import numpy as np +import torch +import torch.nn.functional as F +from segment_anything_hq import SamPredictor, sam_model_registry + + +def calculate_bounding_box(mask): + """ + Calculate bounding box from a binary mask. + + Args: + - mask: Binary mask array + + Returns: + - box: Bounding box coordinates [x_min, y_min, x_max, y_max] + """ + # Find indices of non-zero elements + non_zero_indices = np.argwhere(mask) + + # Extract x and y coordinates + x_coords = non_zero_indices[:, 1] + y_coords = non_zero_indices[:, 0] + + # Calculate bounding box coordinates + x_min = np.min(x_coords) + x_max = np.max(x_coords) + y_min = np.min(y_coords) + y_max = np.max(y_coords) + + return [[x_min, y_min, x_max, y_max]] + + +def show_mask(mask, ax, random_color=False): + if random_color: + color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0) + else: + color = np.array([30 / 255, 144 / 255, 255 / 255, 0.6]) + h, w = mask.shape[-2:] + mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1) + ax.imshow(mask_image) + + +def show_points(coords, labels, ax, marker_size=375): + pos_points = coords[labels == 1] + neg_points = coords[labels == 0] + ax.scatter( + pos_points[:, 0], pos_points[:, 1], color="green", marker="*", s=marker_size, edgecolor="white", linewidth=1.25 + ) + ax.scatter( + neg_points[:, 0], neg_points[:, 1], color="red", marker="*", s=marker_size, edgecolor="white", linewidth=1.25 + ) + + +def show_box(box, ax): + x0, y0 = box[0], box[1] + w, h = box[2] - box[0], box[3] - box[1] + ax.add_patch(plt.Rectangle((x0, y0), w, h, edgecolor="green", facecolor=(0, 0, 0, 0), lw=2)) + + +def show_res(masks, scores, input_point, input_label, input_box, filename, image): + for i, (mask, score) in enumerate(zip(masks, scores)): + plt.figure(figsize=(10, 10)) + plt.imshow(image) + show_mask(mask, plt.gca()) + if input_box is not None: + box = input_box[i] + show_box(box, plt.gca()) + if (input_point is not None) and (input_label is not None): + show_points(input_point, input_label, plt.gca()) + + print(f"Score: {score:.3f}") + plt.axis("off") + plt.savefig(filename + "_" + str(i) + ".png", bbox_inches="tight", pad_inches=-0.1) + plt.close() + + +def show_res_multi(masks, scores, input_point, input_label, input_box, filename, image): + plt.figure(figsize=(10, 10)) + plt.imshow(image) + for mask in masks: + show_mask(mask, plt.gca(), random_color=True) + for box in input_box: + show_box(box, plt.gca()) + for score in scores: + print(f"Score: {score:.3f}") + plt.axis("off") + plt.savefig(filename + ".png", bbox_inches="tight", pad_inches=-0.1) + plt.close() + + +if __name__ == "__main__": + sam_checkpoint = "pre_trained/sam_hq_vit_h.pth" + model_type = "vit_h" + device = "cuda" + sam = sam_model_registry[model_type](checkpoint=sam_checkpoint) + sam.to(device=device) + predictor = SamPredictor(sam) + + for i in range(60): + print("Frame: ", i) + hq_token_only = False + + image = cv2.imread("logs/horse/val_videos/30/rgb/%04d.png" % i) + image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + image = image[:, 160:-160] + predictor.set_image(image) + predictor.features = np.load("logs/horse/val_videos/30/sam/%04d.npy" % i) + predictor.features = torch.Tensor(predictor.features[None, :, :, 160:-160]).to(device) + predictor.features = F.interpolate(predictor.features, size=(64, 64), mode="bilinear") + + if i == 0: + input_box = None + input_point = np.array([[395, 380]]) # USER INPUT COORDINATE # , [317,340] + input_label = np.ones(input_point.shape[0]) + else: + input_box = np.array(calculate_bounding_box(masks.squeeze())) + input_point = None + input_label = None + + batch_box = False if input_box is None else len(input_box) > 1 + result_path = "logs/horse/val_videos/30/masks/" + os.makedirs(result_path, exist_ok=True) + + if not batch_box: + masks, scores, logits = predictor.predict( + point_coords=input_point, + point_labels=input_label, + box=input_box, + multimask_output=False, + hq_token_only=hq_token_only, + ) + show_res(masks, scores, input_point, input_label, input_box, result_path + "example" + str(i), image) + + else: + masks, scores, logits = predictor.predict_torch( + point_coords=input_point, + point_labels=input_label, + boxes=input_box, + multimask_output=False, + hq_token_only=hq_token_only, + ) + masks = masks.squeeze(1).cpu().numpy() + scores = scores.squeeze(1).cpu().numpy() + input_box = input_box.cpu().numpy() + show_res_multi(masks, scores, input_point, input_label, input_box, result_path + "example" + str(i), image) diff --git a/environment.yml b/environment.yml new file mode 100644 index 0000000..76964d4 --- /dev/null +++ b/environment.yml @@ -0,0 +1,15 @@ +#!/usr/bin/env python +# Copyright (c) 2024 Mitsubishi Electric Research Laboratories (MERL) +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# SPDX-License-Identifier: AGPL-3.0-or-later +# SPDX-License-Identifier: MIT +name: gearnerf +channels: + - anaconda +dependencies: + - matplotlib + - pillow + - scikit-image + - scipy + - python=3.8 diff --git a/images/pipeline.png b/images/pipeline.png new file mode 100644 index 0000000..4aad45e Binary files /dev/null and b/images/pipeline.png differ diff --git a/losses.py b/losses.py new file mode 100644 index 0000000..731792d --- /dev/null +++ b/losses.py @@ -0,0 +1,166 @@ +#!/usr/bin/env python +# Copyright (c) 2024 Mitsubishi Electric Research Laboratories (MERL) +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# SPDX-License-Identifier: AGPL-3.0-or-later +# SPDX-License-Identifier: MIT + +import torch +from torch import nn + + +class HuberLoss(nn.Module): + def __init__(self, cfg): + super().__init__() + self.loss = nn.HuberLoss(reduction="mean", delta=cfg.delta if "delta" in cfg else 1.0) + + def forward(self, inputs, targets, **kwargs): + loss = self.loss(inputs, targets) + return loss + + +class MSELoss(nn.Module): + def __init__(self, *args): + super().__init__() + self.loss = nn.MSELoss(reduction="mean") + + def forward(self, inputs, targets, **kwargs): + loss = self.loss(inputs, targets) + return loss + + +class HuberLoss(nn.Module): + def __init__(self, cfg): + super().__init__() + + self.loss = nn.HuberLoss(reduction="mean", delta=cfg.delta) + + def forward(self, inputs, targets, **kwargs): + loss = self.loss(inputs, targets) + return loss + + +class WeightedMSELoss(nn.Module): + def __init__(self, *args): + super().__init__() + + def forward(self, inputs, targets, **kwargs): + if "weight" in kwargs: + weight = kwargs["weight"] + else: + weight = 1.0 + + return torch.mean(weight * torch.square(inputs - targets)) + + +class MAELoss(nn.Module): + def __init__(self, *args): + super().__init__() + self.loss = nn.L1Loss(reduction="mean") + + def forward(self, inputs, targets, **kwargs): + loss = self.loss(inputs, targets) + return loss + + +class WeightedMAELoss(nn.Module): + def __init__(self, *args): + super().__init__() + + def forward(self, inputs, targets, **kwargs): + if "weight" in kwargs: + weight = kwargs["weight"] + else: + weight = 1.0 + + return torch.mean(weight * torch.abs(inputs - targets)) + + +class TVLoss(nn.Module): + def __init__(self, *args): + super().__init__() + + def forward(self, inputs, targets): + return torch.sqrt(torch.square(inputs - targets).sum(-1) + 1e-8).mean() + + +class ComplexMSELoss(nn.Module): + def __init__(self, *args): + super().__init__() + self.loss = nn.MSELoss(reduction="mean") + + def forward(self, inputs, targets): + loss = self.loss(torch.real(inputs), torch.real(targets)) + loss += self.loss(torch.imag(inputs), torch.imag(targets)) + return loss + + +class ComplexMAELoss(nn.Module): + def __init__(self, *args): + super().__init__() + self.loss = nn.L1Loss(reduction="mean") + + def forward(self, inputs, targets): + loss = self.loss(torch.real(inputs), torch.real(targets)) + loss += self.loss(torch.imag(inputs), torch.imag(targets)) + return loss + + +class MSETopN(nn.Module): + def __init__(self, cfg): + super().__init__() + + self.frac = cfg.frac + self.loss = nn.MSELoss(reduction="mean") + + def forward(self, inputs, targets): + diff = torch.abs(inputs - targets) + n = int(self.frac * targets.shape[0]) + + idx = torch.argsort(diff, dim=0) + + targets_sorted = torch.gather(targets, 0, idx) + targets_sorted = targets_sorted[:n] + + inputs_sorted = torch.gather(inputs, 0, idx) + inputs_sorted = inputs_sorted[:n] + + loss = self.loss(inputs_sorted, targets_sorted) + return loss + + +class MAETopN(nn.Module): + def __init__(self, cfg): + super().__init__() + + self.frac = cfg.frac + self.loss = nn.L1Loss(reduction="mean") + + def forward(self, inputs, targets): + diff = torch.abs(inputs - targets) + n = int(self.frac * targets.shape[0]) + + idx = torch.argsort(diff, dim=0) + + targets_sorted = torch.gather(targets, 0, idx) + targets_sorted = targets_sorted[:n] + + inputs_sorted = torch.gather(inputs, 0, idx) + inputs_sorted = inputs_sorted[:n] + + loss = self.loss(inputs_sorted, targets_sorted) + return loss + + +loss_dict = { + "huber": HuberLoss, + "mse": MSELoss, + "weighted_mse": WeightedMSELoss, + "mae": MAELoss, + "weighted_mae": WeightedMAELoss, + "tv": TVLoss, + "complex_mse": ComplexMSELoss, + "complex_mae": ComplexMAELoss, + "mse_top_n": MSETopN, + "mae_top_n": MAETopN, +} diff --git a/main.py b/main.py new file mode 100644 index 0000000..2f60a71 --- /dev/null +++ b/main.py @@ -0,0 +1,192 @@ +#!/usr/bin/env python +# Copyright (c) 2024 Mitsubishi Electric Research Laboratories (MERL) +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# SPDX-License-Identifier: AGPL-3.0-or-later +# SPDX-License-Identifier: MIT + +import os +import random +from typing import Dict +from uuid import uuid4 + +import hydra +import numpy as np +import pytorch_lightning as pl +import torch +from iopath.common.file_io import NativePathHandler, PathManager +from omegaconf import DictConfig, OmegaConf # @manual //github/third-party/omry/omegaconf:omegaconf +from pytorch_lightning.callbacks import TQDMProgressBar +from pytorch_lightning.callbacks.model_checkpoint import ModelCheckpoint +from pytorch_lightning.loggers import TensorBoardLogger +from pytorch_lightning.profiler import AdvancedProfiler, PassThroughProfiler, PyTorchProfiler, SimpleProfiler +from pytorch_lightning.utilities.seed import seed_everything +from torch.distributed.launcher import LaunchConfig +from torch.distributed.launcher import elastic_launch as launch + +from nlf import INRDataModule, INRSystem, INRTrainer + + +class INRModelCheckpoint(ModelCheckpoint): + """Like pytorch_lightning.callbacks.ModelCheckpoint but allowing saving last top k checkpoints. + See https://github.com/PyTorchLightning/pytorch-lightning/discussions/10669 + """ + + def _save_last_checkpoint(self, trainer: "pl.Trainer", monitor_candidates: Dict[str, torch.Tensor]) -> None: + if self._every_n_epochs >= 1 and (trainer.current_epoch + 1) % self._every_n_epochs == 0: + super()._save_last_checkpoint(trainer, monitor_candidates) + + +def run(cfg: DictConfig, log_dir: str, ckpt_dir: str, workflow_id: str) -> None: + # Print + print(OmegaConf.to_yaml(cfg)) + OmegaConf.set_struct(cfg, False) + cfg = cfg.experiment + + # Seed + if "seed" in cfg.params and not isinstance(cfg.params.seed, str) and cfg.params.seed is not None: + + seed_everything(cfg.params.seed, workers=True) + + # CWD paths + dir_path = os.path.normpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), "../../../")) + os.chdir(dir_path) + + # PathManager + pmgr = PathManager() + pmgr.register_handler(NativePathHandler()) + + # Logging and saving + if log_dir is None or log_dir == "": + log_dir = os.path.expanduser(cfg.params.log_dir) + + log_dir = os.path.join(log_dir, cfg.params.name) + + pmgr.mkdirs(log_dir) + + if cfg.params.save_results: + cfg.params.save_video_dir = os.path.join(log_dir, cfg.params.save_video_dir) + cfg.params.save_image_dir = os.path.join(log_dir, cfg.params.save_image_dir) + pmgr.mkdirs(cfg.params.save_video_dir) + pmgr.mkdirs(cfg.params.save_image_dir) + + logger = TensorBoardLogger(save_dir=log_dir, name=cfg.params.name) + + # Setup system and datamodule + dm = INRDataModule(cfg) + dm.prepare_data() + + if "sample_with_replacement" in cfg.training and cfg.training.sample_with_replacement: + cfg.training.iters_per_epoch = cfg.training.num_iters + else: + cfg.training.iters_per_epoch = int(np.ceil(dm.original_dataset_size / cfg.training.batch_size)) + + # Checkpointing + if ckpt_dir is None or ckpt_dir == "": + ckpt_dir = os.path.expanduser(cfg.params.ckpt_dir) + + ckpt_dir = os.path.join(ckpt_dir, cfg.params.name) + + if "ckpt_name" in cfg.params and cfg.params.ckpt_name != "" and cfg.params.ckpt_name is not None: + ckpt_name = cfg.params.ckpt_name + elif cfg.params.load_from_weights: + ckpt_name = "last-weights" + else: + ckpt_name = "last" + + if cfg.params.load_from_weights: + last_ckpt_path = f"{ckpt_dir}/{ckpt_name}.ckpt" + else: + last_ckpt_path = f"{ckpt_dir}/{ckpt_name}.ckpt" + + if not pmgr.exists(last_ckpt_path): + last_ckpt_path = None + + checkpoint_callback = INRModelCheckpoint( + dirpath=ckpt_dir, + filename="{epoch:d}", + monitor="val/loss", + mode="min", + save_top_k=-1, + save_last=True, + every_n_epochs=cfg.training.ckpt_every, + ) + checkpoint_callback.CHECKPOINT_NAME_LAST = "last" + + weights_checkpoint_callback = INRModelCheckpoint( + save_weights_only=True, + dirpath=ckpt_dir, + filename="{epoch:d}-weights", + monitor="val/loss", + mode="min", + save_top_k=-1, + save_last=True, + every_n_epochs=cfg.training.ckpt_every, + ) + weights_checkpoint_callback.CHECKPOINT_NAME_LAST = "last-weights" + + # Other callbacks + callbacks = [] + callbacks.append(TQDMProgressBar(refresh_rate=10)) + + # Load checkpoint + if last_ckpt_path is not None and cfg.params.load_from_weights: + system = INRSystem.load_from_checkpoint(last_ckpt_path, cfg=cfg, dm=dm) + else: + system = INRSystem(cfg, dm=dm) + + # Trainer + if cfg.params.render_only: + cfg.training.render_every = 1 + cfg.training.val_every = 1 + + if cfg.params.test_only: + cfg.training.test_every = 1 + cfg.training.val_every = 1 + + trainer = INRTrainer( + cfg, + callbacks=[checkpoint_callback, weights_checkpoint_callback] + callbacks, + resume_from_checkpoint=last_ckpt_path if not cfg.params.load_from_weights else None, + logger=logger if cfg.params.tensorboard else False, + accelerator="gpu", + strategy="ddp" if cfg.training.num_gpus > 1 else None, + check_val_every_n_epoch=cfg.training.val_every, + benchmark=False, + profiler=None, + # profiler=AdvancedProfiler(dirpath='/home/benattal/logs/profiler', filename='logs.txt'), + # profiler=PyTorchProfiler(dirpath='/home/benattal/logs/pytorch_profiler', filename='logs.txt', row_limit=-1), + ) + + # Fit + trainer.fit(system, datamodule=dm) + + +def elastic_run(cfg: DictConfig): + if cfg.experiment.training.num_gpus > 1: + lc = LaunchConfig( + # Assuming devgpu testing, min = max nodes = 1 + min_nodes=1, + max_nodes=1, + nproc_per_node=cfg.experiment.training.num_gpus, + rdzv_backend="zeus", + # run_id just has to be globally unique + run_id=f"your_run_identifier_{uuid4()}", + # for fault tolerance; for testing set it to 0 (no fault tolerance) + max_restarts=0, + start_method="spawn", + ) + # The "run" function is called inside the elastic_launch + ret = launch(lc, run)(cfg, "", "", "") + print(f"Rank 0 results = {ret[0]}") + else: + run(cfg, "", "", "") + + +@hydra.main(config_path="conf", config_name="config") +def main(cfg: DictConfig): + elastic_run(cfg) + + +if __name__ == "__main__": + main() diff --git a/metrics.py b/metrics.py new file mode 100644 index 0000000..98d7816 --- /dev/null +++ b/metrics.py @@ -0,0 +1,104 @@ +#!/usr/bin/env python +# Copyright (c) 2024 Mitsubishi Electric Research Laboratories (MERL) +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# SPDX-License-Identifier: AGPL-3.0-or-later +# SPDX-License-Identifier: MIT + +import lpips +import numpy as np +import torch +from kornia.losses import ssim as dssim +from skimage.metrics import ( # @manual=fbsource//third-party/pythonlibs/native/scikit-image:scikit-image + peak_signal_noise_ratio, + structural_similarity, +) + + +def mse(image_pred, image_gt, valid_mask=None, reduction="mean"): + value = (image_pred - image_gt) ** 2 + + if valid_mask is not None: + value = value[valid_mask] + if reduction == "mean": + return torch.mean(value) + + return value + + +def psnr(image_pred, image_gt): + # image_gt = np.array(image_gt) + # return peak_signal_noise_ratio(np.array(image_pred), image_gt, data_range=(image_gt.max() - image_gt.min())) + # return peak_signal_noise_ratio(np.round(np.array(image_pred) * 255), np.round(np.array(image_gt) * 255), data_range=255.0) + + return peak_signal_noise_ratio(np.array(image_pred), np.array(image_gt), data_range=1.0) + + # return np.array(-10 * torch.log10(mse(torch.tensor(image_pred), torch.tensor(image_gt), torch.tensor((image_gt != 1.0).any(-1)).unsqueeze(-1).repeat((1, 1, 3)), 'mean'))) + + +def ssim(image0, image1): + return structural_similarity( + np.array(image1), np.array(image0), win_size=11, multichannel=True, gaussian_weights=True, data_range=1.0 + ) + + +def psnr_gpu(image_pred, image_gt, valid_mask=None, reduction="mean"): + # return 10*torch.log10(torch.square(image_gt.max() - image_gt.min()) / mse(image_pred, image_gt, valid_mask, reduction)) + # return -10*torch.log10(mse(image_pred, image_gt, valid_mask, reduction)) + + # image_pred = torch.round(image_pred * 255).float() + # image_gt = torch.round(image_gt * 255).float() + # return -10*torch.log10(mse(image_pred, image_gt, valid_mask, reduction) / (255 * 255)) + + return -10 * torch.log10(mse(image_pred, image_gt, valid_mask, reduction)) + + +def ssim_gpu(image_pred, image_gt, reduction="mean"): + """ + image_pred and image_gt: (1, 3, H, W) + """ + dssim_ = dssim(image_pred, image_gt, 11, reduction) # dissimilarity in [0, 1] + return 1 - 2 * dssim_ # in [-1, 1] + + +def compute_lpips(image0, image1, lpips_model): + gt_lpips = torch.tensor(image0).clone().cpu() * 2.0 - 1.0 + predict_image_lpips = torch.tensor(image1).clone().detach().cpu() * 2.0 - 1.0 + lpips_result = lpips_model.forward(predict_image_lpips, gt_lpips).cpu().detach().numpy() + return np.squeeze(lpips_result) + + +def get_mean_outputs(outputs, cpu=False): + # Stack + stacked = {} + + for x in outputs: + for key, val in x.items(): + if key not in stacked: + stacked[key] = [] + + stacked[key].append(val) + + # Mean + mean = {} + + for key in stacked: + if cpu: + mean_val = np.stack(stacked[key]).mean() + else: + mean_val = torch.stack(stacked[key]).mean() + + mean[key] = mean_val + + # if cpu: + # if 'val/loss' in mean: + # mean['val/psnr'] = -10*np.log10(mean['val/loss']) + # elif 'train/loss' in mean: + # mean['train/psnr'] = -10*np.log10(mean['train/loss']) + # else: + # if 'val/loss' in mean: + # mean['val/psnr'] = -10*torch.log10(mean['val/loss']) + # elif 'train/loss' in mean: + # mean['train/psnr'] = -10*torch.log10(mean['train/loss']) + + return mean diff --git a/nlf/__init__.py b/nlf/__init__.py new file mode 100644 index 0000000..a2d3f20 --- /dev/null +++ b/nlf/__init__.py @@ -0,0 +1,1000 @@ +#!/usr/bin/env python +# Copyright (c) 2024 Mitsubishi Electric Research Laboratories (MERL) +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# SPDX-License-Identifier: AGPL-3.0-or-later +# SPDX-License-Identifier: MIT + +import copy +import os +import time +from collections import namedtuple + +import imageio # noqa +import lpips +import numpy as np +import torch +from iopath.common.file_io import NativePathHandler, PathManager +from omegaconf import ListConfig +from PIL import Image +from pytorch_lightning import LightningDataModule, LightningModule, Trainer +from torch.utils.data import DataLoader, RandomSampler + +from datasets import dataset_dict +from datasets.base import Base5DDataset, Base6DDataset +from losses import loss_dict +from metrics import compute_lpips, get_mean_outputs, psnr, psnr_gpu, ssim, ssim_gpu # noqa +from utils import get_optimizer, get_scheduler, to8b, weight_init_dict +from utils.config_utils import lambda_config, replace_config +from utils.gui_utils import NeRFGUI +from utils.tensorf_utils import AlphaGridMask + +from .models import model_dict +from .regularizers import regularizer_dict +from .rendering import render_chunked, render_fn_dict +from .subdivision import subdivision_dict +from .visualizers import visualizer_dict + +lpips_dist = lpips.LPIPS(net="alex", version="0.1") + +import pdb + + +class INRTrainer(Trainer): + def __init__( + self, + cfg, + **kwargs, + ): + super().__init__( + gpus=cfg.training.num_gpus, + max_epochs=cfg.training.num_epochs if "num_epochs" in cfg.training else None, + max_steps=-1, + log_every_n_steps=cfg.training.flush_logs, + **kwargs, + ) + + def save_checkpoint(self, *args, **kwargs): + if not self.is_global_zero: + return + + super().save_checkpoint(*args, **kwargs) + + +class INRDataModule(LightningDataModule): + def __init__(self, cfg): + super().__init__() + + self.cfg = cfg + self.current_epoch = 0 + + # Replacement + self.sample_with_replacement = getattr(self.cfg.training, "sample_with_replacement", False) + self.num_iters = getattr(self.cfg.training, "num_iters", -1) + + # Testing + self.test_every = cfg.training.test_every + self.is_testing = False + self.test_only = getattr(self.cfg.params, "test_only", False) + + # Multiscale + self.multiscale_training = getattr(self.cfg.training, "multiscale", False) + self.scale_epochs = getattr(self.cfg.training, "scale_epochs", []) + self.scales = getattr(self.cfg.training, "scales", []) + self.scale_batch_sizes = getattr(self.cfg.training, "scale_batch_sizes", []) + + # TODO: + # - NOW: Same scale factor for all + # - LATER: Allow for completely different configs for multi-scale training + self.scale_lrs = getattr(self.cfg.training, "scale_lrs", []) + + self.cur_idx = -1 + self.cur_scale = 1.0 + self.cur_batch_size = self.cfg.training.batch_size + self.cur_lr = 1.0 + self.prepared = False + + def get_cur_scale(self, epoch): + if not self.multiscale_training: + return { + "idx": self.cur_idx, + "scale": self.cur_scale, + "batch_size": self.cur_batch_size, + "lr": self.lr, + } + + cur_idx = 0 + cur_scale = 1.0 + cur_batch_size = self.cfg.training.batch_size + cur_lr = 1.0 + + for idx in range(len(self.scales)): + if epoch >= self.scale_epochs[idx]: + cur_idx = idx + cur_scale = self.scales[idx] + cur_lr = self.scale_lrs[idx] + + return { + "idx": cur_idx, + "scale": cur_scale, + "batch_size": cur_batch_size, + "lr": cur_lr, + } + + def prepare_data(self): + if self.prepared: + return + + self.prepared = True + self.reload_data() + + def reload_data(self): + ## Train, val, test datasets + dataset_cl = ( + dataset_dict[self.cfg.dataset.train.name] + if "train" in self.cfg.dataset + else dataset_dict[self.cfg.dataset.name] + ) + self.train_dataset = dataset_cl(self.cfg, split="train") + dataset_cl = ( + dataset_dict[self.cfg.dataset.val.name] + if "val" in self.cfg.dataset + else dataset_dict[self.cfg.dataset.name] + ) + self.val_dataset = dataset_cl(self.cfg, split="val") + dataset_cl = ( + dataset_dict[self.cfg.dataset.test.name] + if "test" in self.cfg.dataset + else dataset_dict[self.cfg.dataset.name] + ) + self.test_dataset = dataset_cl(self.cfg, split="test") + dataset_cl = ( + dataset_dict[self.cfg.dataset.render.name] + if "render" in self.cfg.dataset + else dataset_dict[self.cfg.dataset.name] + ) + self.render_dataset = dataset_cl(self.cfg, split="render") + + ## Stats + self.original_dataset_size = len(self.train_dataset) + + ## Regularizer datasets + self.create_regularizer_datasets() + self.update_data() + + def setup(self, stage): + pass + + def create_regularizer_datasets(self): + self.regularizer_datasets = {} + + for key in self.cfg.regularizers.keys(): + cfg = self.cfg.regularizers[key] + + if cfg is not None and "dataset" in cfg: + dataset_cl = dataset_dict[cfg.dataset.name] + self.regularizer_datasets[cfg.type] = dataset_cl(cfg, train_dataset=self.train_dataset) + + def update_data(self): + # Set iter + self.train_dataset.cur_iter = self.current_epoch + + # Resize + reset_dataloaders = False + + if self.multiscale_training: + scale_params = self.get_cur_scale(self.current_epoch) + + if scale_params["idx"] != self.cur_idx: + print(f"Scaling dataset to scale {scale_params['scale']} batch_size: {scale_params['batch_size']}") + + self.cur_idx = scale_params["idx"] + self.cur_scale = scale_params["scale"] + self.cur_batch_size = scale_params["batch_size"] + self.cur_lr = scale_params["lr"] + + self.train_dataset.scale(self.cur_scale) + self.val_dataset.scale(self.cur_scale) + self.create_regularizer_datasets() + reset_dataloaders = True + + # Crop + self.train_dataset.crop() + + # Shuffle + if self.train_dataset.use_full_image: + self.train_dataset.shuffle() + + for dataset in self.regularizer_datasets.values(): + dataset.shuffle() + + return reset_dataloaders + + def train_dataloader(self): + if self.sample_with_replacement: + sampler = RandomSampler( + self.train_dataset, replacement=True, num_samples=self.num_iters * self.cur_batch_size + ) + + return DataLoader( + self.train_dataset, + num_workers=self.cfg.training.num_workers, + persistent_workers=True, + sampler=sampler, + batch_size=self.cur_batch_size, + pin_memory=True, + ) + else: + return DataLoader( + self.train_dataset, + shuffle=(not self.train_dataset.use_full_image), + num_workers=self.cfg.training.num_workers, + persistent_workers=True, + batch_size=self.cur_batch_size, + pin_memory=True, + ) + + def val_dataloader(self): + if ((self.current_epoch + 1) % self.test_every == 0) or self.test_only: + print("Testing") + dataset = self.test_dataset + self.is_testing = True + + if hasattr(self.test_dataset, "video_paths"): + return DataLoader( + dataset, shuffle=False, num_workers=0, persistent_workers=False, batch_size=1, pin_memory=True + ) + else: + print("Validating") + self.is_testing = False + dataset = self.val_dataset + + return DataLoader( + dataset, + shuffle=False, + num_workers=self.cfg.training.num_workers, + persistent_workers=True, + batch_size=1, + pin_memory=True, + ) + + +class INRSystem(LightningModule): + def __init__(self, cfg, dm): + super().__init__() + + self.cfg = cfg + self.dm = dm + + ## Settings ## + + # Path manager + self.pmgr = PathManager() + self.pmgr.register_handler(NativePathHandler()) + + # Training and loss + self.automatic_optimization = False + self.training_started = False + self.loss = loss_dict[self.cfg.training.loss.type](self.cfg.training.loss) + + # Data loading + self.sample_with_replacement = getattr(self.cfg.training, "sample_with_replacement", False) + self.num_iters = getattr(self.cfg.training, "num_iters", -1) + + # Test & render + self.render_only = getattr(self.cfg.params, "render_only", False) + self.test_only = getattr(self.cfg.params, "test_only", False) + self.interact_only = getattr(self.cfg.params, "interact_only", False) + + # Convert epochs -> iterations in config + def set_iter(cfg, key): + if isinstance(cfg[key], ListConfig): + cfg[key.replace("epoch", "iter")] = [ + [lii * self.cfg.training.iters_per_epoch for lii in li] for li in cfg[key] + ] + else: + cfg[key.replace("epoch", "iter")] = cfg[key] * self.cfg.training.iters_per_epoch + + for key in [ + "max_freq", + "wait", + "stop", + "falloff", + "window", + "no_bias", + "window_bias", + "window_bias_start", + "decay", + "warmup", + ]: + lambda_config(self.cfg, f"{key}_epoch", set_iter) + lambda_config(self.cfg, f"{key}_epochs", set_iter) + + ## Set-up rendering pipeline ## + + # Create subdivision (sampling) scheme + self.is_subdivided = ("subdivision" in cfg.model) and (cfg.model.subdivision.type is not None) + + if self.is_subdivided: + self.subdivision = subdivision_dict[self.cfg.model.subdivision.type]( + self, + self.cfg.model.subdivision, + ) + + replace_config(self.cfg, voxel_size=float(self.subdivision.voxel_size.cpu())) + + if "min_point" in self.subdivision.__dict__: + replace_config(self.cfg, min_point=self.subdivision.min_point) + + if "max_point" in self.subdivision.__dict__: + replace_config(self.cfg, max_point=self.subdivision.max_point) + else: + self.subdivision = None + + # Model mapping samples -> color + model = model_dict[self.cfg.model.type](self.cfg.model, system=self) + + # Render function that queries model using subdivision scheme + self.rendering = False + self.render_fn = render_fn_dict[self.cfg.model.render.type]( + model, + self.subdivision, + cfg.model.render, + net_chunk=self.cfg.training.net_chunk, + ) + + ## Optimizers ## + + self.optimizer_configs = {} + + for idx, key in enumerate(self.cfg.training.optimizers.keys()): + opt_cfg = copy.deepcopy(self.cfg.training.optimizers[key]) + self.optimizer_configs[key] = opt_cfg + + self.optimizer_groups = {} + + for module in self.render_fn.modules(): + if "opt_group" in module.__dict__: + if isinstance(module.opt_group, str): + if module.opt_group in self.optimizer_groups: + self.optimizer_groups[module.opt_group] += [module] + else: + self.optimizer_groups[module.opt_group] = [module] + else: + for k, v in module.opt_group.items(): + if k in self.optimizer_groups: + self.optimizer_groups[k] += copy.copy(v) + else: + self.optimizer_groups[k] = copy.copy(v) + + self.reset_opt_list = getattr(self.cfg.training, "reset_opt_list", []) + self.skip_opt_list = [] + + ## Additional objects used for training & visualization ## + + # Regularizers for additional losses during training + self.regularizers = [] + self.regularizer_configs = [] + + for key in self.cfg.regularizers.keys(): + cfg = self.cfg.regularizers[key] + reg = regularizer_dict[cfg.type](self, cfg) + + self.regularizer_configs.append(cfg) + self.regularizers.append(reg) + setattr(self, f"reg_{cfg.type}", reg) + + # Number of regulariztion pretraining iterations + self.num_regularizer_pretraining_iters = getattr(self.cfg.training, "num_regularizer_pretraining_iters", 0) + + # Visualizers + self.visualizers = [] + + for key in self.cfg.visualizers.keys(): + cfg = self.cfg.visualizers[key] + vis = visualizer_dict[cfg.type](self, cfg) + + self.visualizers.append(vis) + + ## Network weight initialization ## + + self.apply(weight_init_dict[self.cfg.training.weight_init.type](self.cfg.training.weight_init)) + + def load_state_dict(self, state_dict, strict=False): + new_state_dict = {} + + # For loading subdivision variables (voxel grid, voxel size, etc.) # + alpha_aabb = None + alpha_volume = None + + for key in state_dict.keys(): + new_state_dict[key] = state_dict[key] + + # Update size of tensor components + if "alpha_aabb" in key: + alpha_aabb = state_dict[key] + elif "alpha_volume" in key: + alpha_volume = state_dict[key] + elif "gridSize" in key: + self.render_fn.model.color_model.net.gridSize = state_dict[key] + + self.render_fn.model.color_model.net.init_svd_volume( + self.render_fn.model.color_model.net.gridSize[0], self.render_fn.model.color_model.net.device + ) + + for key in state_dict.keys(): + if ( + "app_plane" in key + or "density_plane" in key + or "gear_plane" in key + or "app_line" in key + or "density_line" in key + or "gear_line" in key + ): + + new_shape = self.state_dict()[key].shape + + if state_dict[key].shape != new_shape: + new_state_dict[key] = state_dict[key].view(*new_shape) + + super().load_state_dict(new_state_dict, strict=False) + + # Update other grid-size-dependent variables + self.render_fn.model.color_model.net.update_stepSize(self.render_fn.model.color_model.net.gridSize) + + # Update alpha mask + if alpha_volume is not None: + device = self.render_fn.model.color_model.net.device + self.render_fn.model.color_model.net.alphaMask = AlphaGridMask( + device, alpha_aabb.to(device), alpha_volume.to(device) + ) + + def render(self, method_name, coords, **render_kwargs): + return self.run_chunked(coords, getattr(self.render_fn, method_name), **render_kwargs) + + def forward(self, coords, **render_kwargs): + return self.run_chunked(coords, self.render_fn, **render_kwargs) + + def run_chunked(self, coords, fn, **render_kwargs): + if self.rendering: + ray_chunk = ( + self.cfg.training.render_ray_chunk + if "render_ray_chunk" in self.cfg.training + else self.cfg.training.ray_chunk + ) + else: + ray_chunk = self.cfg.training.ray_chunk + + return render_chunked(coords, fn, render_kwargs, chunk=ray_chunk) + + def configure_optimizers(self): + print("Configuring optimizers") + + optimizers = [] + schedulers = [] + + # Iterate over groups + for idx, key in enumerate(self.optimizer_groups.keys()): + opt_cfg = copy.deepcopy(self.optimizer_configs[key]) + opt_cfg.lr *= self.trainer.datamodule.cur_lr + + # Optimizer + optimizer = get_optimizer(opt_cfg, self.optimizer_groups[key]) + optimizers.append(optimizer) + + # Scheduler + scheduler = get_scheduler(opt_cfg, optimizer, self.cfg.training.iters_per_epoch) + schedulers.append(scheduler) + + return optimizers, schedulers + + def needs_opt_reset(self, train_iter): + # Check if reset needed + needs_reset = False + + for idx, key in enumerate(self.optimizer_groups.keys()): + opt_cfg = self.optimizer_configs[key] + + if "reset_opt_list" in opt_cfg and (train_iter in opt_cfg.reset_opt_list): + needs_reset = True + + return needs_reset or (train_iter == 0) + + def reset_optimizers(self, train_iter): + # Perform opt reset + optimizers = [] + schedulers = [] + + # Iterate over groups + for idx, key in enumerate(self.optimizer_groups.keys()): + opt_cfg = copy.deepcopy(self.optimizer_configs[key]) + + if "skip_opt_list" in opt_cfg and (train_iter in opt_cfg.skip_opt_list): + self.skip_opt_list.append(key) + elif "remove_skip_opt_list" in opt_cfg and (train_iter in opt_cfg.remove_skip_opt_list): + self.skip_opt_list = [skip_key for skip_key in self.skip_opt_list if skip_key != key] + + if "reset_opt_list" in opt_cfg and (train_iter in opt_cfg.reset_opt_list): + print("Resetting optimizer", opt_cfg) + opt_cfg.lr *= self.trainer.datamodule.cur_lr + + # Optimizer + optimizer = get_optimizer(opt_cfg, self.optimizer_groups[key]) + optimizers.append(optimizer) + + # Scheduler + scheduler = get_scheduler(opt_cfg, optimizer, self.cfg.training.iters_per_epoch) + schedulers.append(scheduler) + else: + optimizers.append(self.trainer.optimizers[idx]) + schedulers.append(self.trainer.lr_scheduler_configs[idx].scheduler) + + self.trainer.optimizers = optimizers + self.trainer.strategy.lr_scheduler_configs = [ + namedtuple("scheduler_config", ("scheduler",))(s) for s in schedulers + ] + + def get_train_iter(self, epoch, batch_idx, val=False): + # Get epoch + if self.render_only or self.test_only: + epoch = 10000000 + elif self.cfg.params.load_from_weights: + epoch = self.cfg.params.start_epoch + + # Get number of iterations per epoch + num_iters = self.num_iters + + if not self.sample_with_replacement: + num_iters = len(self.trainer.datamodule.train_dataset) // self.cfg.training.batch_size + + # Get train iteration + train_iter = (num_iters) * epoch + batch_idx * self.cfg.training.num_gpus + + # Multi-GPU + if not val: + train_iter += self.global_rank + + # Regularization + train_iter -= self.num_regularizer_pretraining_iters + + return train_iter + + def set_train_iter(self, train_iter): + # Set model iter + self.render_fn.model.set_iter(train_iter) + + # Set regularizer iter + for reg in self.regularizers: + reg.set_iter(train_iter) + + @property + def regularizer_render_kwargs(self): + render_kwargs = {} + + for reg in self.regularizers: + render_kwargs.update(reg.render_kwargs) + + return render_kwargs + + @property + def visualizer_render_kwargs(self): + render_kwargs = {} + + for vis in self.visualizers: + render_kwargs.update(vis.render_kwargs) + + return render_kwargs + + def training_step(self, batch, batch_idx): + # return {} + if self.render_only or self.test_only or self.interact_only: + return {} + + ## Flag indicating the training has started + self.training_started = True + + ## Tell model what training iter it is + train_iter = self.get_train_iter(self.current_epoch, batch_idx) + self.set_train_iter(train_iter) + + # Reset optimizers if necessary + if self.needs_opt_reset(train_iter): + self.reset_optimizers(train_iter) + + # Input batch + batch = self.trainer.datamodule.train_dataset.format_batch(batch) + # Results + # with torch.autocast("cuda"): + outputs = {} + coords, rgb, coords_sam, sam = batch["coords"], batch["rgb"], batch["coords_sam"], batch["sam"] + + results = self(coords, **self.regularizer_render_kwargs) + results_sam = self(coords_sam, **self.regularizer_render_kwargs) + + # Image loss + loss = 0.0 + + if train_iter >= 0: + # Calculate image loss and PSNR + image_loss = self.loss(results["rgb"][:, :3], rgb, **batch) + feature_loss = self.loss(results_sam["rgb"][:, 3:], sam, **batch) + loss = image_loss + feature_loss + + outputs["train/psnr"] = psnr_gpu(results["rgb"][:, :3], rgb).detach() + outputs["train/psnr_sam"] = psnr_gpu(results["rgb"][:, 3:], sam).detach() + + # Print + if self.cfg.params.print_loss: + print(f"PSNR: {outputs['train/psnr']:.04f}, PSNR_SAM: {outputs['train/psnr_sam']:.04f}") + + # Regularization losses + reg_loss = 0.0 + + for reg, cfg in zip(self.regularizers, self.regularizer_configs): + reg.batch_size = self.trainer.datamodule.cur_batch_size + cur_loss = reg.loss(batch, results, batch_idx) * reg.loss_weight() + reg_loss += cur_loss + + if not reg.warming_up(): + loss += cur_loss + + # Print + if self.cfg.params.print_loss and reg_loss > 0.0: + print(f"Regularization loss: {reg_loss:.04f}, Iter: f{train_iter}") + + # Optimizers + optimizers = self.optimizers(use_pl_optimizer=True) + if not isinstance(optimizers, list): + optimizers = [optimizers] + + # Gradient descent step + for opt in optimizers: + opt.zero_grad() + self.manual_backward(loss) + for opt in optimizers: + opt.step() + + ## Return + outputs["train/loss"] = loss.detach() + + return outputs + + def training_epoch_end(self, outputs): + if ((self.current_epoch + 1) % self.cfg.training.log_every) == 0: + # Log + mean = get_mean_outputs(outputs) + + for key, val in mean.items(): + self.log(key, val, on_epoch=True, on_step=False, sync_dist=True) + print(f"{key}: {val}") + + # Scheduler step + if self.training_started: + schedulers = self.lr_schedulers() + + for sched in schedulers: + sched.step() + + # Dataset update & resize + self.trainer.datamodule.current_epoch = self.current_epoch + 1 + + reset_val = ( + (self.current_epoch + 2) % self.trainer.datamodule.test_every == 0 + or (self.current_epoch + 1) % self.trainer.datamodule.test_every == 0 + or (self.current_epoch) % self.trainer.datamodule.test_every == 0 + ) or self.test_only + resized = False + + if ((self.current_epoch + 1) % self.cfg.training.update_data_every) == 0: + print("Updating data") + resized = self.trainer.datamodule.update_data() + + self.trainer.datamodule.update_data() + self.trainer.reset_train_dataloader(self) + + if resized: + print("Resized data") + self.trainer.reset_train_dataloader(self) + + if "reset_after_resize" in self.cfg.training and self.cfg.training.reset_after_resize: + optimizers, schedulers = self.configure_optimizers() + self.trainer.optimizers = optimizers + self.trainer.lr_schedulers = [{"scheduler": s} for s in schedulers] + + if reset_val or resized: + print("Re-setting dataloaders") + self.trainer.reset_val_dataloader(self) + + def interact(self): # noqa + with torch.no_grad(): + with torch.cuda.amp.autocast(enabled=True): + gui = NeRFGUI(self, W=self.cfg.dataset.img_wh[0], H=self.cfg.dataset.img_wh[1]) + gui.render() + + # Record time-to-render + all_times = [] + + # Get rays + coords = self.trainer.datamodule.render_dataset[0]["coords"].cuda() + origins = coords[..., :3] + directions = coords[..., 3:6] + extra = coords[..., 6:] + + # Initial pose + initial_pose = np.eye(4) + initial_pose[:3, :4] = self.trainer.datamodule.render_dataset.poses[0] + + initial_pose_inv = np.linalg.inv(initial_pose) + initial_pose_inv = torch.FloatTensor(initial_pose_inv).cuda() + + # Visualizer kwargs + visualizer_render_kwargs = self.visualizer_render_kwargs + + for idx in range(len(self.trainer.datamodule.render_dataset)): + # Set rendering + self.rendering = True + + # Render + torch.cuda.synchronize() + start_time = time.time() + + cur_pose = torch.FloatTensor(self.trainer.datamodule.render_dataset.poses[idx]).cuda() + pose_offset = cur_pose @ initial_pose_inv + + cur_origins = (pose_offset[:3, :3] @ origins.permute(1, 0)).permute(1, 0) + pose_offset[None, :3, -1] + cur_directions = (pose_offset[:3, :3] @ directions.permute(1, 0)).permute(1, 0) + cur_extra = extra + cur_coords = torch.cat([cur_origins, cur_directions, cur_extra], -1) + cur_results = self(cur_coords) + + torch.cuda.synchronize() + + # Record time + all_times.append(time.time() - start_time) + print(idx, all_times[-1]) + + # Set not rendering + self.rendering = False + + def validation_video(self, batch, batch_idx): # noqa + if not self.trainer.is_global_zero: + return + + # Render outputs + all_videos = {"videos/rgb": [], "videos/sam": []} + + # Function for adding outputs + def _add_outputs(outputs): + for key in outputs: + all_videos[key] = np.array(outputs[key]) + + # Loop over all render poses + all_times = [] + + for idx in range(len(self.trainer.datamodule.render_dataset)): + # Convert batch to CUDA + cur_batch = self.trainer.datamodule.render_dataset[idx] + W, H = cur_batch["W"], cur_batch["H"] + self.cur_wh = [int(W), int(H)] + + for k in cur_batch: + if isinstance(cur_batch[k], torch.Tensor): + cur_batch[k] = cur_batch[k].cuda() + + self.rendering = True + + # Render current pose + visualizer_render_kwargs = self.visualizer_render_kwargs + + torch.cuda.synchronize() + start_time = time.time() + + # cur_results = self.render_fn.model.embedding_model(cur_batch['coords'], {}) + # cur_results = self.render_fn.model.embedding_model.embeddings[0]({'rays': cur_batch['coords']}, {}) + + cur_results = self(cur_batch["coords"], rendering=True, **visualizer_render_kwargs) + # cur_results = self.model_script(cur_batch['coords']) + + torch.cuda.synchronize() + + all_times.append(time.time() - start_time) + + self.rendering = False + print(idx, all_times[-1]) + + cur_img = cur_results["rgb"].view(H, W, 3 + 256).cpu().numpy() + + # Format output RGB + cur_sam = cur_img.transpose(2, 0, 1)[3:] + cur_img = cur_img.transpose(2, 0, 1)[:3] + all_videos["videos/rgb"] = cur_img + all_videos["videos/sam"] = cur_sam + + # Visualizer outputs + for vis in self.visualizers: + outputs = vis.validation_video(cur_batch, idx) + _add_outputs(outputs) + + # Save outputs + if self.cfg.params.save_results: + epoch = str(self.current_epoch + 1) + + if self.render_only: + save_video_dir = self.cfg.params.save_video_dir.replace("val_videos", "render") + else: + save_video_dir = os.path.join(self.cfg.params.save_video_dir, epoch) + + for key in all_videos: + cur_im = np.squeeze(all_videos[key]) + vid_suffix = key.split("/")[-1] + + self.pmgr.mkdirs(os.path.join(save_video_dir, vid_suffix)) + + if vid_suffix == "sam": + sam_vis = cur_im[:3] + with self.pmgr.open(os.path.join(save_video_dir, vid_suffix, f"{idx:04d}.png"), "wb") as f: + sam_vis = np.squeeze(sam_vis) + Image.fromarray(to8b(sam_vis.transpose(1, 2, 0))).save(f) + + with self.pmgr.open(os.path.join(save_video_dir, vid_suffix, f"{idx:04d}.npy"), "wb") as f: + cur_im = (np.squeeze(cur_im) - 0.5) * 2 + np.save(f, cur_im) + else: + with self.pmgr.open(os.path.join(save_video_dir, vid_suffix, f"{idx:04d}.png"), "wb") as f: + if len(cur_im.shape) == 3: + Image.fromarray(to8b(cur_im.transpose(1, 2, 0))).save(f) + else: + Image.fromarray(to8b(cur_im)).save(f) + + print("Average time:", np.mean(all_times[1:-1])) + + def validation_image(self, batch, batch_idx): # noqa + batch_idx = batch_idx * self.cfg.training.num_gpus + self.global_rank + + # Forward + coords, rgb, = ( + batch["coords"], + batch["rgb"], + ) + coords = torch.clone(coords.view(-1, coords.shape[-1])) + rgb = rgb.view(-1, 3) + results = self(coords, **self.visualizer_render_kwargs) + + # Setup + W, H = batch["W"], batch["H"] + self.cur_wh = [int(W), int(H)] + all_images = {} + + # Logging + img = results["rgb"].view(H, W, 3 + 256).cpu().numpy() + img = img.transpose(2, 0, 1) + sam = img[3:] + img = img[:3] + img_gt = rgb.view(H, W, 3).cpu().numpy() + img_gt = img_gt.transpose(2, 0, 1) + + all_images["eval/pred"] = img + all_images["eval/gt"] = img_gt + all_images["eval/sam"] = sam + + # Helper for adding outputs + def _add_outputs(outputs): + for key in outputs: + if key not in all_images: + all_images[key] = np.clip(np.array(outputs[key]), 0.0, 1.0) + + # Visualizer images + for vis in self.visualizers: + if not self.trainer.datamodule.is_testing or vis.run_on_test: + outputs = vis.validation_image(batch, batch_idx) + _add_outputs(outputs) + + # Log all images + for key in all_images: + if "eval/" in key: + continue + + if self.cfg.params.tensorboard and self.cfg.training.num_gpus <= 1 and self.cfg.params.log_images: + self.logger.experiment.add_images( + f"{key}_{batch_idx}", + all_images[key][None], + self.global_step, + ) + + # Save outputs + if self.cfg.params.save_results: + epoch = str(self.current_epoch + 1) + + if self.test_only: + save_image_dir = self.cfg.params.save_image_dir.replace("val_images", "testset") + else: + save_image_dir = os.path.join(self.cfg.params.save_image_dir, epoch) + + for key in all_images: + im_suffix = key.split("/")[0] + im_name = key.split("/")[-1] + + self.pmgr.mkdirs(os.path.join(save_image_dir, im_suffix)) + + if im_suffix == "data": + with self.pmgr.open( + os.path.join(save_image_dir, im_suffix, f"{batch_idx:04d}_{im_name}.npy"), "wb" + ) as f: + all_images[key] = np.squeeze(all_images[key]) + np.save(f, all_images[key]) + else: + if im_name == "sam": + sam_vis = all_images[key][:3] + with self.pmgr.open( + os.path.join(save_image_dir, im_suffix, f"{batch_idx:04d}_{im_name}.png"), "wb" + ) as f: + sam_vis = np.squeeze(sam_vis) + Image.fromarray(to8b(sam_vis.transpose(1, 2, 0))).save(f) + + with self.pmgr.open( + os.path.join(save_image_dir, im_suffix, f"{batch_idx:04d}_{im_name}.npy"), "wb" + ) as f: + all_images[key] = (np.squeeze(all_images[key]) - 0.5) * 2 + np.save(f, all_images[key]) + else: + with self.pmgr.open( + os.path.join(save_image_dir, im_suffix, f"{batch_idx:04d}_{im_name}.png"), "wb" + ) as f: + all_images[key] = np.squeeze(all_images[key]) + + if len(all_images[key].shape) == 3: + Image.fromarray(to8b(all_images[key].transpose(1, 2, 0))).save(f) + else: + Image.fromarray(to8b(all_images[key])).save(f) + + # Output metrics + outputs = {} + outputs["val/loss"] = self.loss(results["rgb"][:, :3], rgb, **batch).detach().cpu().numpy() + outputs["val/psnr"] = psnr(img.transpose(1, 2, 0), img_gt.transpose(1, 2, 0)) + outputs["val/ssim"] = ssim(img.transpose(1, 2, 0), img_gt.transpose(1, 2, 0)) + outputs["val/lpips"] = lpips_dist(torch.tensor(img).unsqueeze(0), torch.tensor(img_gt).unsqueeze(0)) + + return outputs + + def validation_step(self, batch, batch_idx): + # with torch.autocast("cuda"): + self.render_fn.eval() + + with torch.no_grad(): + train_iter = self.get_train_iter(self.current_epoch + 1, 0, True) + self.set_train_iter(max(train_iter, 0)) + + # Interact + if self.interact_only: + self.interact() + exit(0) + + # Render video + if batch_idx == 0 and ((self.current_epoch + 1) % self.cfg.training.render_every == 0 or self.render_only): + self.validation_video(batch, batch_idx) + + # Render image + log = self.validation_image(batch, batch_idx) + + # Do not train + if self.render_only: + exit(0) + + self.render_fn.train() + + # Return + return log + + def validation_epoch_end(self, outputs): + # Log + mean = get_mean_outputs(outputs, cpu=True) + epoch = str(self.current_epoch + 1) + self.pmgr.mkdirs(os.path.join(self.cfg.params.save_image_dir, epoch)) + + with self.pmgr.open(os.path.join(self.cfg.params.save_image_dir, epoch, "metrics.txt"), "w") as f: + for key, val in mean.items(): + self.log(key, val, on_epoch=True, on_step=False, sync_dist=True) + print(f"{key}: {val}") + f.write(f"{key}: {float(val)}\n") + + return {} diff --git a/nlf/activations.py b/nlf/activations.py new file mode 100644 index 0000000..241e49e --- /dev/null +++ b/nlf/activations.py @@ -0,0 +1,563 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +import numpy as np +import torch +from torch import nn + +from utils.rotation_conversions import axis_angle_to_matrix + + +class LeakyReLU(nn.Module): + def __init__(self, cfg, **kwargs): + super().__init__() + + if "a" in cfg and not isinstance(cfg, str): + self.a = cfg.a + else: + self.a = 0.01 + + if "inplace" not in kwargs: + kwargs["inplace"] = True + + self.act = nn.LeakyReLU(self.a, **kwargs) + + def forward(self, x): + return self.act(x) + + +class ReLU(nn.Module): + def __init__(self, cfg, **kwargs): + super().__init__() + + if "inplace" not in kwargs: + kwargs["inplace"] = False + + self.act = nn.ReLU(**kwargs) + + def forward(self, x): + return self.act(x) + + +class Abs(nn.Module): + def __init__(self, cfg): + super().__init__() + + def forward(self, x): + return torch.abs(x) + + +class Sigmoid(nn.Module): + def __init__(self, cfg, **kwargs): + super().__init__() + + self.act = nn.Sigmoid(**kwargs) + self.inner_fac = cfg["inner_fac"] if "inner_fac" in cfg else 1.0 + self.outer_fac = cfg["outer_fac"] if "outer_fac" in cfg else 1.0 + self.shift = cfg.shift if "shift" in cfg else 0.0 + + if "fac" in cfg: + self.outer_fac = cfg["fac"] + + def forward(self, x): + return self.act(x * self.inner_fac + self.shift) * self.outer_fac + + def set_iter(self, i): + self.cur_iter = i + + +class Softplus(nn.Module): + def __init__(self, cfg, **kwargs): + super().__init__() + + self.inner_fac = cfg["inner_fac"] if "inner_fac" in cfg else 1.0 + self.outer_fac = cfg["outer_fac"] if "outer_fac" in cfg else 1.0 + self.shift = cfg.shift if "shift" in cfg else 0.0 + + if "fac" in cfg: + self.outer_fac = cfg["fac"] + + def forward(self, x): + return nn.functional.softplus(x * self.inner_fac + self.shift) * self.outer_fac + + def set_iter(self, i): + self.cur_iter = i + + +class Softmax(nn.Module): + def __init__(self, cfg, **kwargs): + super().__init__() + + self.act = nn.Softmax(dim=-1, **kwargs) + + def forward(self, x): + return self.act(x) + + +class SparseMagnitude(nn.Module): + def __init__(self, cfg, **kwargs): + super().__init__() + + self.act = nn.Softmax(dim=-1, **kwargs) + self.inner_fac = cfg["inner_fac"] if "inner_fac" in cfg else 1.0 + self.outer_fac = cfg["outer_fac"] if "outer_fac" in cfg else 1.0 + + if "param_channels" in cfg and not isinstance(cfg, str): + self.param_channels = cfg.param_channels + else: + self.param_channels = 3 + + def forward(self, x): + x = x.view(x.shape[0], -1, self.param_channels) + mag = torch.linalg.norm(x, dim=-1) + mag = self.act(mag * self.inner_fac) * self.outer_fac + x = torch.nn.functional.normalize(x, dim=-1) * mag[..., None] + return x + + +class Tanh(nn.Module): + def __init__(self, cfg, **kwargs): + super().__init__() + + self.act = nn.Tanh(**kwargs) + self.inner_fac = cfg["inner_fac"] if "inner_fac" in cfg else 1.0 + self.outer_fac = cfg["outer_fac"] if "outer_fac" in cfg else 1.0 + self.shift = cfg.shift if "shift" in cfg else 0.0 + + if "fac" in cfg: + self.outer_fac = cfg["fac"] + + def forward(self, x): + return self.act(x * self.inner_fac + self.shift) * self.outer_fac + + def inverse(self, x): + return (torch.atanh(x / self.outer_fac) - self.shift) / self.inner_fac + + +class IdentityTanh(nn.Module): + def __init__(self, cfg, **kwargs): + super().__init__() + + self.act = nn.Tanh(**kwargs) + self.fac = cfg["fac"] if "fac" in cfg else 1.0 + + # Mapping from [-inf, +inf] to [-1, 1] that acts as an *almost* identity mapping (for most of the space) + # Derived from an *almost* identity mapping from [-inf, +inf] to [-2, 2] (identity on [-1.9, +1.9]) + + def forward(self, x): + x = x * 2.0 + + return torch.where(torch.abs(x) < 1.91501, x, self.act(x) * 2.0) * self.fac / 2.0 + + def inverse(self, x): + x = (x / self.fac) * 2.0 + + return torch.where(torch.abs(x) < 1.91501, x, torch.atanh(x / 2.0)) / 2.0 + + +class Identity(nn.Module): + def __init__(self, cfg, **kwargs): + super().__init__() + + self.shift = cfg.shift if "shift" in cfg else 0.0 + self.inner_fac = cfg["inner_fac"] if "inner_fac" in cfg else 1.0 + self.outer_fac = cfg["outer_fac"] if "outer_fac" in cfg else 1.0 + + if "fac" in cfg: + self.outer_fac = cfg["fac"] + + def forward(self, x): + return (x * self.inner_fac + self.shift) * self.outer_fac + + def inverse(self, x): + return (x / self.outer_fac - self.shift) / self.inner_fac + + +class Power(nn.Module): + def __init__(self, cfg, **kwargs): + super().__init__() + + self.power = cfg["power"] if "power" in cfg else 1.0 + + def forward(self, x): + return torch.pow(torch.abs(x) + 1e-8, self.power) * torch.sign(x) + + def inverse(self, x): + return torch.pow(torch.abs(x) + 1e-8, 1.0 / self.power) * torch.sign(x) + + +class L1Norm(nn.Module): + def __init__(self, cfg): + super().__init__() + + def forward(self, x): + return torch.nn.functional.normalize(x, p=1, dim=-1) * x.shape[-1] + + +class Probs(nn.Module): + def __init__(self, cfg): + super().__init__() + + def forward(self, x): + return torch.nn.functional.normalize(torch.abs(x), p=1, dim=-1) + + +class RowL2Norm(nn.Module): + def __init__(self, cfg): + super().__init__() + + if "param_channels" in cfg and not isinstance(cfg, str): + self.param_channels = cfg.param_channels + else: + self.param_channels = 4 + + if "fac" in cfg and not isinstance(cfg, str): + self.fac = cfg.fac + else: + self.fac = 1.0 + + def forward(self, x): + batch_size = x.shape[0] + total_channels = x.shape[-1] + + if total_channels > 0: + x = x.view(-1, total_channels // self.param_channels, self.param_channels) + x = torch.nn.functional.normalize(x, p=2.0, dim=-1) + + return x.view(batch_size, total_channels) * self.fac + + +class RowL2NormZOnly(nn.Module): + def __init__(self, cfg): + super().__init__() + + if "param_channels" in cfg and not isinstance(cfg, str): + self.param_channels = cfg.param_channels + else: + self.param_channels = 4 + + if "fac" in cfg and not isinstance(cfg, str): + self.fac = cfg.fac + else: + self.fac = 1.0 + + def forward(self, x): + batch_size = x.shape[0] + total_channels = x.shape[-1] + + if total_channels > 0: + x = x.view(-1, total_channels // self.param_channels, self.param_channels) + x = torch.nn.functional.normalize(x, p=2.0, dim=-1) + x[..., :-1, :] = torch.eye( + total_channels // self.param_channels - 1, + self.param_channels, + device=x.device, + ) + + return x.view(batch_size, total_channels) * self.fac + + +class RowLInfNorm(nn.Module): + def __init__(self, cfg): + super().__init__() + + if "param_channels" in cfg and not isinstance(cfg, str): + self.param_channels = cfg.param_channels + else: + self.param_channels = 4 + + if "fac" in cfg and not isinstance(cfg, str): + self.fac = cfg.fac + else: + self.fac = 1.0 + + def forward(self, x): + batch_size = x.shape[0] + total_channels = x.shape[-1] + + if total_channels > 0: + x = x.view(-1, total_channels // self.param_channels, self.param_channels) + x = torch.nn.functional.normalize(x, p=float("inf"), dim=-1) + + return x.view(batch_size, total_channels) * self.fac + + +class RowL1Norm(nn.Module): + def __init__(self, cfg): + super().__init__() + + if "param_channels" in cfg and not isinstance(cfg, str): + self.param_channels = cfg.param_channels + else: + self.param_channels = 4 + + if "fac" in cfg and not isinstance(cfg, str): + self.fac = cfg.fac + else: + self.fac = 1.0 + + def forward(self, x): + batch_size = x.shape[0] + total_channels = x.shape[-1] + + if total_channels > 0: + x = x.view(-1, total_channels // self.param_channels, self.param_channels) + x = torch.nn.functional.normalize(x, p=1, dim=-1) + + return x.view(batch_size, total_channels) * self.fac + + +class L2Norm(nn.Module): + def __init__(self, cfg): + super().__init__() + + if "param_channels" in cfg and cfg.param_channels is not None: + self.fac = 1.0 / np.sqrt(cfg.param_channels) + else: + self.fac = 1.0 + + def forward(self, x): + return torch.nn.functional.normalize(x, p=2.0, dim=-1) * np.sqrt(x.shape[-1]) * self.fac + + +class Zero(nn.Module): + def __init__(self, cfg): + super().__init__() + + def forward(self, x): + return torch.zeros_like(x) + + +class RGBA(nn.Module): + def __init__(self, cfg): + super().__init__() + + self.voxel_size = cfg.voxel_size if "voxel_size" in cfg else None + self.window_iters = cfg.window_iters if "window_iters" in cfg else 0.0 + self.cur_iter = 0 + + def forward(self, x): + raw_alpha = x[..., -1:] + + if self.voxel_size is not None: + alpha = 1.0 - torch.exp(self.voxel_size * -torch.abs(raw_alpha)) + else: + alpha = torch.sigmoid(raw_alpha) + + return torch.cat([torch.sigmoid(x[..., :-1]), alpha], -1) + + def set_iter(self, i): + self.cur_iter = i + + +class Alpha(nn.Module): + def __init__(self, cfg): + super().__init__() + + def forward(self, x): + return 1.0 - torch.exp(-torch.relu(x)) + + +class Gaussian(nn.Module): + def __init__(self, cfg): + super().__init__() + + if "sigma" in cfg and not isinstance(cfg, str): + self.sigma = cfg.sigma + else: + self.sigma = 0.05 + + def forward(self, x): + return torch.exp(-0.5 * torch.square(x / self.sigma)) + + +def se3_hat(twist): + zero = torch.zeros_like(twist[..., 0]) + + mat = torch.stack( + [ + torch.stack([zero, twist[..., 2], -twist[..., 1], zero], axis=-1), + torch.stack([-twist[..., 2], zero, twist[..., 0], zero], axis=-1), + torch.stack([twist[..., 1], -twist[..., 0], zero, zero], axis=-1), + torch.stack([twist[..., 3], twist[..., 4], twist[..., 5], zero], axis=-1), + ], + axis=-1, + ) + + return torch.linalg.matrix_exp(mat) + + +class TwistToMatrix(nn.Module): + def __init__(self, cfg): + super().__init__() + + if "rot_fac" in cfg and not isinstance(cfg, str): + self.rot_fac = cfg.rot_fac + else: + self.rot_fac = 1.0 + + if "trans_fac" in cfg and not isinstance(cfg, str): + self.trans_fac = cfg.trans_fac + else: + self.trans_fac = 1.0 + + def forward(self, twist): + twist = torch.cat( + [ + twist[..., 0:3] * self.rot_fac, + twist[..., 3:6] * self.trans_fac, + ], + -1, + ) + + return se3_hat(twist).view(twist.shape[0], -1) + + +class AxisAngle(nn.Module): + def __init__(self, cfg): + super().__init__() + + if "fac" in cfg and not isinstance(cfg, str): + self.fac = cfg.fac + else: + self.fac = 1.0 + + def forward(self, twist): + axis_angle = twist[..., 0:3] * self.fac + rot_mat = axis_angle_to_matrix(axis_angle) + return rot_mat + + +class AxisAngleTranslation(nn.Module): + def __init__(self, cfg): + super().__init__() + + if "rot_fac" in cfg and not isinstance(cfg, str): + self.rot_fac = cfg.rot_fac + else: + self.rot_fac = 1.0 + + if "trans_fac" in cfg and not isinstance(cfg, str): + self.trans_fac = cfg.trans_fac + else: + self.trans_fac = 1.0 + + def forward(self, twist): + axis_angle = twist[..., 0:3] * self.rot_fac + trans = twist[..., 3:6] * self.trans_fac + rot_mat = axis_angle_to_matrix(axis_angle) + + return torch.cat([rot_mat, trans.unsqueeze(-1)], dim=-1) + + +class EaseValue(nn.Module): + def __init__(self, cfg, **kwargs): + super().__init__() + + self.act = get_activation(cfg.activation, **kwargs) + + self.start_value = cfg.start_value if "start_value" in cfg else 0.0 + self.wait_iters = cfg.wait_iters if "wait_iters" in cfg else 0.0 + self.window_iters = cfg.window_iters if "window_iters" in cfg else 0.0 + self.cur_iter = 0 + + def weight(self): + if self.cur_iter >= self.window_iters: + return 1.0 + elif self.window_iters == 0: + return 0.0 + else: + w = min(max(float(self.cur_iter) / self.window_iters, 0.0), 1.0) + return w + + def ease_out(self, out): + if self.cur_iter >= self.window_iters: + return out + elif self.window_iters == 0: + return torch.ones_like(out) * self.start_value + else: + w = min(max(float(self.cur_iter) / self.window_iters, 0.0), 1.0) + return w * out + (1 - w) * self.start_value + + def forward(self, x): + out = self.act(x) + return self.ease_out(out) + + def set_iter(self, i): + self.cur_iter = i - self.wait_iters + + +class InterpValue(nn.Module): + def __init__(self, cfg, **kwargs): + super().__init__() + + self.act1 = get_activation(cfg.act1, **kwargs) + self.act2 = get_activation(cfg.act2, **kwargs) + + self.wait_iters = cfg.wait_iters if "wait_iters" in cfg else 0.0 + self.window_iters = cfg.window_iters if "window_iters" in cfg else 0.0 + self.cur_iter = 0 + + def weight(self): + if self.cur_iter >= self.window_iters: + return 1.0 + elif self.window_iters == 0: + return 0.0 + else: + w = min(max(float(self.cur_iter) / self.window_iters, 0.0), 1.0) + return w + + def forward(self, x): + w = self.weight() + + if w <= 0.0: + return self.act1(x) + elif w >= 1.0: + return self.act2(x) + else: + val1 = self.act1(x) + val2 = self.act2(x) + return (1.0 - w) * val1 + w * val2 + + def set_iter(self, i): + self.cur_iter = i - self.wait_iters + + +activation_map = { + "alpha": Alpha, + "rgba": RGBA, + "sigmoid": Sigmoid, + "softplus": Softplus, + "softmax": Softmax, + "sparse_magnitude": SparseMagnitude, + "tanh": Tanh, + "identity_tanh": IdentityTanh, + "identity": Identity, + "power": Power, + "probs": Probs, + "l1_norm": L1Norm, + "l2_norm": L2Norm, + "row_l1_norm": RowL1Norm, + "row_l2_norm": RowL2Norm, + "row_l2_norm_z_only": RowL2NormZOnly, + "row_linf_norm": RowLInfNorm, + "zero": Zero, + "gaussian": Gaussian, + "leaky_relu": LeakyReLU, + "relu": ReLU, + "abs": Abs, + "twist_to_matrix": TwistToMatrix, + "axis_angle_translation": AxisAngleTranslation, + "ease_value": EaseValue, + "interp_value": InterpValue, +} + + +def get_activation(cfg, **kwargs): + if isinstance(cfg, str): + return activation_map[cfg]({}, **kwargs) + else: + return activation_map[cfg.type](cfg, **kwargs) diff --git a/nlf/conf/__init__.py b/nlf/conf/__init__.py new file mode 100644 index 0000000..238fb1f --- /dev/null +++ b/nlf/conf/__init__.py @@ -0,0 +1,7 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT diff --git a/nlf/conf/config.yaml b/nlf/conf/config.yaml new file mode 100644 index 0000000..c300ae6 --- /dev/null +++ b/nlf/conf/config.yaml @@ -0,0 +1,9 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +defaults: + - experiment: local diff --git a/nlf/conf/experiment/dataset/blender.yaml b/nlf/conf/experiment/dataset/blender.yaml new file mode 100644 index 0000000..38157e1 --- /dev/null +++ b/nlf/conf/experiment/dataset/blender.yaml @@ -0,0 +1,26 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: blender +collection: lego +data_subdir: nerf_synthetic +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [400, 400] +spherical_poses: True +use_ndc: False + +val_num: 8 +val_skip: 1 +val_all: False + +render_params: + interpolate: False + supersample: 4 + crop: 1.0 diff --git a/nlf/conf/experiment/dataset/blender_large.yaml b/nlf/conf/experiment/dataset/blender_large.yaml new file mode 100644 index 0000000..c8c6155 --- /dev/null +++ b/nlf/conf/experiment/dataset/blender_large.yaml @@ -0,0 +1,26 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: blender +collection: lego +data_subdir: nerf_synthetic +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [800, 800] +spherical_poses: True +use_ndc: False + +val_num: 8 +val_skip: 1 +val_all: False + +render_params: + interpolate: False + supersample: 4 + crop: 1.0 diff --git a/nlf/conf/experiment/dataset/blender_open_movies.yaml b/nlf/conf/experiment/dataset/blender_open_movies.yaml new file mode 100644 index 0000000..cb5c85b --- /dev/null +++ b/nlf/conf/experiment/dataset/blender_open_movies.yaml @@ -0,0 +1,55 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: video3d_time +collection: 10_03_B-agent_dodges_box +data_subdir: blender_open_movies +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [1280, 720] +spherical_poses: False +use_ndc: False +correct_poses: True + +# Validation +val_num: 8 +val_skip: 2 +val_set: 'lightfield' + +val_all: False +val_pairs: [[2,2]] + +lightfield_step: 1 +lightfield_rows: 5 +lightfield_cols: 5 + +#val_all: False +#val_set: [0] + +# Video +start_frame: 0 +num_frames: 50 +keyframe_step: 4 + +#load_full_step: 8 +#subsample_keyframe_step: 4 +#subsample_keyframe_frac: 0.25 +#subsample_frac: 0.125 + +load_full_step: 8 +subsample_keyframe_step: 4 +subsample_keyframe_frac: 0.25 +subsample_frac: 0.125 + +# Render +render_params: + interpolate_time: False + interpolate: False + supersample: 2 + crop: 1.0 diff --git a/nlf/conf/experiment/dataset/blender_small.yaml b/nlf/conf/experiment/dataset/blender_small.yaml new file mode 100644 index 0000000..b73978b --- /dev/null +++ b/nlf/conf/experiment/dataset/blender_small.yaml @@ -0,0 +1,26 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: blender +collection: lego +data_subdir: nerf_synthetic +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [200, 200] +spherical_poses: True +use_ndc: False + +val_num: 8 +val_skip: 1 +val_all: False + +render_params: + interpolate: False + supersample: 4 + crop: 1.0 diff --git a/nlf/conf/experiment/dataset/catacaustics.yaml b/nlf/conf/experiment/dataset/catacaustics.yaml new file mode 100644 index 0000000..4e1c888 --- /dev/null +++ b/nlf/conf/experiment/dataset/catacaustics.yaml @@ -0,0 +1,27 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: catacaustics +collection: compost +data_subdir: catacaustics +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [1000, 666] +spherical_poses: False +use_ndc: False +correct_poses: True + +val_num: 8 +val_skip: -1 +val_all: False + +render_params: + interpolate: False + supersample: 4 + crop: 1.0 diff --git a/nlf/conf/experiment/dataset/donerf.yaml b/nlf/conf/experiment/dataset/donerf.yaml new file mode 100644 index 0000000..6171507 --- /dev/null +++ b/nlf/conf/experiment/dataset/donerf.yaml @@ -0,0 +1,28 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: donerf +collection: barbershop +data_subdir: donerf +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [400, 400] +spherical_poses: True +use_ndc: False +correct_poses: False +center_poses: True + +val_num: 8 +val_skip: 1 +val_all: False + +render_params: + interpolate: False + supersample: 4 + crop: 1.0 diff --git a/nlf/conf/experiment/dataset/donerf_large.yaml b/nlf/conf/experiment/dataset/donerf_large.yaml new file mode 100644 index 0000000..229e3b7 --- /dev/null +++ b/nlf/conf/experiment/dataset/donerf_large.yaml @@ -0,0 +1,28 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: donerf +collection: barbershop +data_subdir: donerf +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [800, 800] +spherical_poses: True +use_ndc: False +correct_poses: True +center_poses: True + +val_num: 8 +val_skip: 1 +val_all: False + +render_params: + interpolate: False + supersample: 4 + crop: 1.0 diff --git a/nlf/conf/experiment/dataset/eikonal.yaml b/nlf/conf/experiment/dataset/eikonal.yaml new file mode 100644 index 0000000..38e95eb --- /dev/null +++ b/nlf/conf/experiment/dataset/eikonal.yaml @@ -0,0 +1,26 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: eikonal +collection: Pen +data_subdir: eikonal +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [672, 504] +spherical_poses: False +use_ndc: False + +num_views: -1 +val_num: 1000 +val_skip: 10 + +render_params: + interpolate: False + supersample: 4 + crop: 1.0 diff --git a/nlf/conf/experiment/dataset/immersive.yaml b/nlf/conf/experiment/dataset/immersive.yaml new file mode 100644 index 0000000..1b30f4e --- /dev/null +++ b/nlf/conf/experiment/dataset/immersive.yaml @@ -0,0 +1,50 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: immersive +collection: 05_Horse +data_subdir: immersive +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [1280, 960] +spherical_poses: False +use_ndc: False +use_reference: False +correct_poses: True + +# Validation +val_num: 8 +val_skip: 2 + +val_all: False +val_set: [0] + +# Video +start_frame: 0 +num_frames: 50 +keyframe_step: 4 + +# load_full_step: 8 +# subsample_keyframe_step: 4 +# subsample_keyframe_frac: 0.25 +# subsample_frac: 0.125 + +load_full_step: 4 +subsample_keyframe_step: 2 +subsample_keyframe_frac: 0.25 +subsample_frac: 0.125 + +# Render +render_params: + interpolate_time: False + interpolate: False + supersample: 2 + crop: 1.0 + +static: False diff --git a/nlf/conf/experiment/dataset/llff.yaml b/nlf/conf/experiment/dataset/llff.yaml new file mode 100644 index 0000000..dbefddc --- /dev/null +++ b/nlf/conf/experiment/dataset/llff.yaml @@ -0,0 +1,25 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: llff +collection: fern +data_subdir: nerf_llff_data +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [504, 378] +spherical_poses: False +use_ndc: True + +val_num: 1000 +val_skip: 8 + +render_params: + interpolate: False + supersample: 4 + crop: 1.0 diff --git a/nlf/conf/experiment/dataset/llff_360.yaml b/nlf/conf/experiment/dataset/llff_360.yaml new file mode 100644 index 0000000..4a058dd --- /dev/null +++ b/nlf/conf/experiment/dataset/llff_360.yaml @@ -0,0 +1,27 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: llff +collection: vasedeck +data_subdir: nerf_real_360 +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [504, 378] +spherical_poses: True +use_ndc: False +centered_pixels: True + +val_num: 1000 +val_skip: inf +val_all: True + +render_params: + interpolate: False + supersample: 4 + crop: 1.0 diff --git a/nlf/conf/experiment/dataset/llff_large.yaml b/nlf/conf/experiment/dataset/llff_large.yaml new file mode 100644 index 0000000..6f52052 --- /dev/null +++ b/nlf/conf/experiment/dataset/llff_large.yaml @@ -0,0 +1,25 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: llff +collection: fern +data_subdir: nerf_llff_data +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [1008, 756] +spherical_poses: False +use_ndc: True + +val_num: 1000 +val_skip: 8 + +render_params: + interpolate: False + supersample: 4 + crop: 1.0 diff --git a/nlf/conf/experiment/dataset/llff_undistort.yaml b/nlf/conf/experiment/dataset/llff_undistort.yaml new file mode 100644 index 0000000..dfd44b5 --- /dev/null +++ b/nlf/conf/experiment/dataset/llff_undistort.yaml @@ -0,0 +1,25 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: shiny +collection: fern_undistort +data_subdir: shiny +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [504, 378] +spherical_poses: False +use_ndc: True + +val_num: 1000 +val_skip: 8 + +render_params: + interpolate: False + supersample: 4 + crop: 1.0 diff --git a/nlf/conf/experiment/dataset/llff_undistort_large.yaml b/nlf/conf/experiment/dataset/llff_undistort_large.yaml new file mode 100644 index 0000000..aa15cdc --- /dev/null +++ b/nlf/conf/experiment/dataset/llff_undistort_large.yaml @@ -0,0 +1,25 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: shiny +collection: fern_undistort +data_subdir: shiny +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [1008, 756] +spherical_poses: False +use_ndc: True + +val_num: 1000 +val_skip: 8 + +render_params: + interpolate: False + supersample: 4 + crop: 1.0 diff --git a/nlf/conf/experiment/dataset/neural_3d.yaml b/nlf/conf/experiment/dataset/neural_3d.yaml new file mode 100644 index 0000000..426c7a9 --- /dev/null +++ b/nlf/conf/experiment/dataset/neural_3d.yaml @@ -0,0 +1,52 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: neural_3d +collection: coffee_martini +data_subdir: neural_3d +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +#img_wh: [2704, 2028] +img_wh: [1352, 1014] +spherical_poses: False +use_ndc: True +use_reference: False +correct_poses: False + +# Validation +val_num: 8 +val_skip: 2 + +val_all: False +val_set: [0] + +# Video +start_frame: 0 +num_frames: 50 +keyframe_step: 4 + +#load_full_step: 4 +#subsample_keyframe_step: 2 +#subsample_keyframe_frac: 0.25 +#subsample_frac: 0.125 + +load_full_step: 4 +subsample_keyframe_step: 2 +subsample_keyframe_frac: 0.25 +subsample_frac: 0.125 + +# Rendering +render_params: + interpolate_time: False + interpolate: False + supersample: 2 + crop: 0.85 + +static: False +num_chunks: 1 diff --git a/nlf/conf/experiment/dataset/refnerf_large.yaml b/nlf/conf/experiment/dataset/refnerf_large.yaml new file mode 100644 index 0000000..e07e362 --- /dev/null +++ b/nlf/conf/experiment/dataset/refnerf_large.yaml @@ -0,0 +1,26 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: blender +collection: ball +data_subdir: refnerf +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [800, 800] +spherical_poses: True +use_ndc: False + +val_num: 8 +val_skip: 1 +val_all: False + +render_params: + interpolate: False + supersample: 4 + crop: 1.0 diff --git a/nlf/conf/experiment/dataset/shiny.yaml b/nlf/conf/experiment/dataset/shiny.yaml new file mode 100644 index 0000000..f8971af --- /dev/null +++ b/nlf/conf/experiment/dataset/shiny.yaml @@ -0,0 +1,26 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: shiny +collection: food +data_subdir: shiny +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [504, 378] +use_full_image: False +spherical_poses: False +use_ndc: True + +val_num: 1000 +val_skip: 8 + +render_params: + interpolate: False + supersample: 4 + crop: 1.0 diff --git a/nlf/conf/experiment/dataset/shiny_dense.yaml b/nlf/conf/experiment/dataset/shiny_dense.yaml new file mode 100644 index 0000000..b5b36ac --- /dev/null +++ b/nlf/conf/experiment/dataset/shiny_dense.yaml @@ -0,0 +1,25 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: shiny +collection: cd +data_subdir: shiny +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [504, 284] +use_ndc: True + +val_num: 1000 +val_skip: 8 + +render_params: + interpolate: False + supersample: 4 + max_frames: 400 + crop: 0.85 diff --git a/nlf/conf/experiment/dataset/shiny_dense_large.yaml b/nlf/conf/experiment/dataset/shiny_dense_large.yaml new file mode 100644 index 0000000..1ddfe35 --- /dev/null +++ b/nlf/conf/experiment/dataset/shiny_dense_large.yaml @@ -0,0 +1,28 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: shiny +collection: cd +data_subdir: shiny +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [1008, 567] +spherical_poses: False +use_ndc: True + +val_num: 8 +#val_skip: 8 +val_skip: 2 +val_all: False + +render_params: + interpolate: False + supersample: 4 + max_frames: 400 + crop: 0.8 diff --git a/nlf/conf/experiment/dataset/shiny_large.yaml b/nlf/conf/experiment/dataset/shiny_large.yaml new file mode 100644 index 0000000..e3a34d4 --- /dev/null +++ b/nlf/conf/experiment/dataset/shiny_large.yaml @@ -0,0 +1,26 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: shiny +collection: food +data_subdir: shiny +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [1008, 756] +spherical_poses: False +use_ndc: True + +val_num: 8 +val_skip: 8 +val_all: False + +render_params: + interpolate: False + supersample: 4 + crop: 1.0 diff --git a/nlf/conf/experiment/dataset/spaces.yaml b/nlf/conf/experiment/dataset/spaces.yaml new file mode 100644 index 0000000..5c8dcf2 --- /dev/null +++ b/nlf/conf/experiment/dataset/spaces.yaml @@ -0,0 +1,26 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: spaces +collection: scene_000 +data_subdir: spaces +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [800, 480] +spherical_poses: False +use_ndc: True + +val_num: 8 +val_skip: 8 +val_all: False + +render_params: + interpolate: False + supersample: 4 + crop: 1.0 diff --git a/nlf/conf/experiment/dataset/stanford.yaml b/nlf/conf/experiment/dataset/stanford.yaml new file mode 100644 index 0000000..96d2635 --- /dev/null +++ b/nlf/conf/experiment/dataset/stanford.yaml @@ -0,0 +1,43 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: stanford +collection: gem +data_subdir: stanford_half +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: +downsample: 1 +spherical_poses: False +use_ndc: False + +val_pairs: [] +val_num: 8 +val_skip: 1 + +render_params: + spiral: True + far: False + interpolate: False + supersample: 4 + crop: 1.0 + + spiral_rad: 0.5 + uv_downscale: 0.0 + +lightfield: + rows: 17 + cols: 17 + step: 4 + supersample: 2 + disp_row: 8 + + use_file_coords: True + st_scale: 0.25 + vis_st_scale: diff --git a/nlf/conf/experiment/dataset/stanford_beans.yaml b/nlf/conf/experiment/dataset/stanford_beans.yaml new file mode 100644 index 0000000..9707487 --- /dev/null +++ b/nlf/conf/experiment/dataset/stanford_beans.yaml @@ -0,0 +1,38 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: stanford +collection: beans +data_subdir: stanford_half +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [512, 256] +spherical_poses: False +use_ndc: False + +val_pairs: [] +val_num: 8 +val_skip: 1 +val_all: False + +render_params: + interpolate: False + supersample: 4 + crop: 1.0 + +lightfield: + rows: 17 + cols: 17 + step: 4 + supersample: 2 + disp_row: 8 + + use_file_coords: True + st_scale: 0.25 + vis_st_scale: diff --git a/nlf/conf/experiment/dataset/stanford_beans_large.yaml b/nlf/conf/experiment/dataset/stanford_beans_large.yaml new file mode 100644 index 0000000..e1d5d0e --- /dev/null +++ b/nlf/conf/experiment/dataset/stanford_beans_large.yaml @@ -0,0 +1,46 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: stanford +collection: beans +data_subdir: stanford +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [1024, 512] +spherical_poses: False +use_ndc: False + +val_pairs: [] +val_num: 8 +val_skip: 1 +val_all: False + +render_params: + interpolate: False + supersample: 4 + crop: 1.0 + +lightfield: + rows: 17 + cols: 17 + + start_row: 0 + end_row: 17 + + keyframe_step: -1 + #keyframe_subsample: 1 + keyframe_subsample: 1 + + step: 4 + supersample: 2 + disp_row: 8 + + use_file_coords: True + st_scale: 0.125 + vis_st_scale: diff --git a/nlf/conf/experiment/dataset/stanford_bracelet.yaml b/nlf/conf/experiment/dataset/stanford_bracelet.yaml new file mode 100644 index 0000000..656230e --- /dev/null +++ b/nlf/conf/experiment/dataset/stanford_bracelet.yaml @@ -0,0 +1,37 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: stanford +collection: bracelet +data_subdir: stanford_half +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [512, 320] +spherical_poses: False +use_ndc: False + +val_pairs: [] +val_num: 8 +val_skip: 1 + +render_params: + interpolate: False + supersample: 4 + crop: 1.0 + +lightfield: + rows: 17 + cols: 17 + step: 4 + supersample: 2 + disp_row: 8 + + use_file_coords: True + st_scale: 0.1 + vis_st_scale: diff --git a/nlf/conf/experiment/dataset/stanford_bracelet_large.yaml b/nlf/conf/experiment/dataset/stanford_bracelet_large.yaml new file mode 100644 index 0000000..d8966d0 --- /dev/null +++ b/nlf/conf/experiment/dataset/stanford_bracelet_large.yaml @@ -0,0 +1,56 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: stanford +collection: bracelet +data_subdir: stanford +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [1024, 640] +spherical_poses: False +use_ndc: False + +val_pairs: [] +val_num: 8 +val_skip: 1 +val_all: False + +#render_params: +# interpolate: False +# supersample: 4 +# crop: 1.0 + +render_params: + spiral: True + far: False + interpolate: False + supersample: 4 + crop: 1.0 + + spiral_rad: 0.5 + uv_downscale: 0.0 + +lightfield: + rows: 17 + cols: 17 + + start_row: 0 + end_row: 17 + + keyframe_step: -1 + #keyframe_subsample: 1 + keyframe_subsample: 1 + + step: 4 + supersample: 2 + disp_row: 8 + + use_file_coords: True + st_scale: 0.125 + vis_st_scale: diff --git a/nlf/conf/experiment/dataset/stanford_bulldozer.yaml b/nlf/conf/experiment/dataset/stanford_bulldozer.yaml new file mode 100644 index 0000000..a64ddb5 --- /dev/null +++ b/nlf/conf/experiment/dataset/stanford_bulldozer.yaml @@ -0,0 +1,47 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: stanford +collection: bulldozer +data_subdir: stanford_half +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [768, 576] +spherical_poses: False +use_ndc: False + +val_all: False +val_pairs: [] +val_num: 8 +val_skip: 1 + +render_params: + spiral: True + far: False + interpolate: False + supersample: 4 + crop: 1.0 + + spiral_rad: 0.35 + uv_downscale: 0.0 + +lightfield: + rows: 17 + cols: 17 + + start_row: 0 + end_row: 17 + + step: 4 + supersample: 2 + disp_row: 8 + + use_file_coords: True + st_scale: 0.125 + vis_st_scale: diff --git a/nlf/conf/experiment/dataset/stanford_bulldozer_large.yaml b/nlf/conf/experiment/dataset/stanford_bulldozer_large.yaml new file mode 100644 index 0000000..b061757 --- /dev/null +++ b/nlf/conf/experiment/dataset/stanford_bulldozer_large.yaml @@ -0,0 +1,46 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: stanford +collection: bulldozer +data_subdir: stanford +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [1536, 1152] +spherical_poses: False +use_ndc: False + +val_pairs: [] +val_num: 8 +val_skip: 1 +val_all: False + +render_params: + interpolate: False + supersample: 4 + crop: 1.0 + +lightfield: + rows: 17 + cols: 17 + + start_row: 0 + end_row: 17 + + keyframe_step: -1 + #keyframe_subsample: 1 + keyframe_subsample: 1 + + step: 4 + supersample: 2 + disp_row: 8 + + use_file_coords: True + st_scale: 0.125 + vis_st_scale: diff --git a/nlf/conf/experiment/dataset/stanford_bunny.yaml b/nlf/conf/experiment/dataset/stanford_bunny.yaml new file mode 100644 index 0000000..8dc0b95 --- /dev/null +++ b/nlf/conf/experiment/dataset/stanford_bunny.yaml @@ -0,0 +1,42 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: stanford +collection: bunny +data_subdir: stanford_half +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [512, 512] +spherical_poses: False +use_ndc: False + +val_all: False +val_pairs: [] +val_num: 8 +val_skip: 1 + +render_params: + interpolate: False + supersample: 4 + crop: 1.0 + +lightfield: + rows: 17 + cols: 17 + + start_row: 0 + end_row: 17 + + step: 4 + supersample: 2 + disp_row: 8 + + use_file_coords: True + st_scale: 0.25 + vis_st_scale: diff --git a/nlf/conf/experiment/dataset/stanford_bunny_large.yaml b/nlf/conf/experiment/dataset/stanford_bunny_large.yaml new file mode 100644 index 0000000..ae446ba --- /dev/null +++ b/nlf/conf/experiment/dataset/stanford_bunny_large.yaml @@ -0,0 +1,49 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: stanford +collection: bunny +data_subdir: stanford +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [1024, 1024] +spherical_poses: False +use_ndc: False + +val_all: False +val_pairs: [] +val_num: 8 +val_skip: 1 + +render_params: + interpolate: False + supersample: 4 + crop: 1.0 + +lightfield: + rows: 17 + cols: 17 + + start_row: 0 + end_row: 17 + + start_col: 0 + end_col: 17 + + keyframe_step: -1 + #keyframe_subsample: 1 + keyframe_subsample: 1 + + step: 4 + supersample: 4 + disp_row: 8 + + use_file_coords: True + st_scale: 0.125 + vis_st_scale: diff --git a/nlf/conf/experiment/dataset/stanford_chess.yaml b/nlf/conf/experiment/dataset/stanford_chess.yaml new file mode 100644 index 0000000..74f21f7 --- /dev/null +++ b/nlf/conf/experiment/dataset/stanford_chess.yaml @@ -0,0 +1,42 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: stanford +collection: chess +data_subdir: stanford_half +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [700, 400] +spherical_poses: False +use_ndc: False + +val_all: False +val_pairs: [] +val_num: 8 +val_skip: 1 + +render_params: + interpolate: False + supersample: 4 + crop: 1.0 + +lightfield: + rows: 17 + cols: 17 + + start_row: 0 + end_row: 17 + + step: 4 + supersample: 4 + disp_row: 8 + + use_file_coords: True + st_scale: 0.1 + vis_st_scale: diff --git a/nlf/conf/experiment/dataset/stanford_chess_large.yaml b/nlf/conf/experiment/dataset/stanford_chess_large.yaml new file mode 100644 index 0000000..10b41ae --- /dev/null +++ b/nlf/conf/experiment/dataset/stanford_chess_large.yaml @@ -0,0 +1,46 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: stanford +collection: chess +data_subdir: stanford +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [1400, 800] +spherical_poses: False +use_ndc: False + +val_all: False +val_pairs: [] +val_num: 8 +val_skip: 1 + +render_params: + interpolate: False + supersample: 4 + crop: 1.0 + +lightfield: + rows: 17 + cols: 17 + + start_row: 0 + end_row: 17 + + keyframe_step: -1 + #keyframe_subsample: 1 + keyframe_subsample: 1 + + step: 4 + supersample: 2 + disp_row: 8 + + use_file_coords: True + st_scale: 0.125 + vis_st_scale: diff --git a/nlf/conf/experiment/dataset/stanford_epi.yaml b/nlf/conf/experiment/dataset/stanford_epi.yaml new file mode 100644 index 0000000..c0219b8 --- /dev/null +++ b/nlf/conf/experiment/dataset/stanford_epi.yaml @@ -0,0 +1,50 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: stanford_epi +collection: bunny +data_subdir: stanford_epi +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: +downsample: 1 +spherical_poses: False +use_ndc: False + +val_all: False +val_pairs: [] +val_num: 8 +val_skip: 1 + +render_params: + interpolate: False + supersample: 4 + crop: 1.0 + +lightfield: + rows: 17 + cols: 17 + + start_row: 0 + end_row: 17 + + start_col: 0 + end_col: 17 + + keyframe_step: -1 + #keyframe_subsample: 1 + keyframe_subsample: 1 + + step: 4 + supersample: 16 + disp_row: 8 + + use_file_coords: True + st_scale: 0.125 + vis_st_scale: diff --git a/nlf/conf/experiment/dataset/stanford_flowers.yaml b/nlf/conf/experiment/dataset/stanford_flowers.yaml new file mode 100644 index 0000000..30338ed --- /dev/null +++ b/nlf/conf/experiment/dataset/stanford_flowers.yaml @@ -0,0 +1,42 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: stanford +collection: flowers +data_subdir: stanford_half +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [640, 768] +spherical_poses: False +use_ndc: False + +val_pairs: [] +val_num: 8 +val_skip: 1 + +render_params: + spiral: True + far: False + interpolate: False + supersample: 4 + crop: 1.0 + + spiral_rad: 0.5 + uv_downscale: 0.25 + +lightfield: + rows: 17 + cols: 17 + step: 4 + supersample: 4 + disp_row: 8 + + use_file_coords: True + st_scale: 0.25 + vis_st_scale: diff --git a/nlf/conf/experiment/dataset/stanford_flowers_large.yaml b/nlf/conf/experiment/dataset/stanford_flowers_large.yaml new file mode 100644 index 0000000..43884d2 --- /dev/null +++ b/nlf/conf/experiment/dataset/stanford_flowers_large.yaml @@ -0,0 +1,46 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: stanford +collection: flowers +data_subdir: stanford +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [1280, 1536] +spherical_poses: False +use_ndc: False + +val_pairs: [] +val_num: 8 +val_skip: 1 +val_all: False + +render_params: + interpolate: False + supersample: 4 + crop: 1.0 + +lightfield: + rows: 17 + cols: 17 + + start_row: 0 + end_row: 17 + + keyframe_step: -1 + #keyframe_subsample: 1 + keyframe_subsample: 1 + + step: 4 + supersample: 2 + disp_row: 8 + + use_file_coords: True + st_scale: 0.125 + vis_st_scale: diff --git a/nlf/conf/experiment/dataset/stanford_gem.yaml b/nlf/conf/experiment/dataset/stanford_gem.yaml new file mode 100644 index 0000000..9c55f83 --- /dev/null +++ b/nlf/conf/experiment/dataset/stanford_gem.yaml @@ -0,0 +1,47 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: stanford +collection: gem +data_subdir: stanford_half +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [384, 512] +spherical_poses: False +use_ndc: False + +val_all: False +val_pairs: [] +val_num: 8 +val_skip: 1 + +render_params: + spiral: True + far: False + interpolate: False + supersample: 4 + crop: 1.0 + + spiral_rad: 0.5 + uv_downscale: 0.0 + +lightfield: + rows: 17 + cols: 17 + + start_row: 0 + end_row: 17 + + step: 4 + supersample: 2 + disp_row: 8 + + use_file_coords: True + st_scale: 0.25 + vis_st_scale: diff --git a/nlf/conf/experiment/dataset/stanford_gem_large.yaml b/nlf/conf/experiment/dataset/stanford_gem_large.yaml new file mode 100644 index 0000000..3fe3b9c --- /dev/null +++ b/nlf/conf/experiment/dataset/stanford_gem_large.yaml @@ -0,0 +1,46 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: stanford +collection: gem +data_subdir: stanford +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [768, 1024] +spherical_poses: False +use_ndc: False + +val_pairs: [] +val_num: 8 +val_skip: 1 +val_all: False + +render_params: + interpolate: False + supersample: 4 + crop: 1.0 + +lightfield: + rows: 17 + cols: 17 + + start_row: 0 + end_row: 17 + + keyframe_step: -1 + #keyframe_subsample: 1 + keyframe_subsample: 1 + + step: 4 + supersample: 2 + disp_row: 8 + + use_file_coords: True + st_scale: 0.125 + vis_st_scale: diff --git a/nlf/conf/experiment/dataset/stanford_knights.yaml b/nlf/conf/experiment/dataset/stanford_knights.yaml new file mode 100644 index 0000000..1a1d8b4 --- /dev/null +++ b/nlf/conf/experiment/dataset/stanford_knights.yaml @@ -0,0 +1,45 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: stanford +collection: knights +data_subdir: stanford_half +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [512, 512] +spherical_poses: False +use_ndc: False + +val_all: False +val_pairs: [] +val_num: 8 +val_skip: 1 + +render_params: + interpolate: False + supersample: 4 + crop: 1.0 + +lightfield: + rows: 17 + cols: 17 + + start_row: 0 + end_row: 17 + + start_col: 0 + end_col: 17 + + step: 4 + supersample: 4 + disp_row: 8 + + use_file_coords: True + st_scale: 0.125 + vis_st_scale: diff --git a/nlf/conf/experiment/dataset/stanford_knights_large.yaml b/nlf/conf/experiment/dataset/stanford_knights_large.yaml new file mode 100644 index 0000000..d412268 --- /dev/null +++ b/nlf/conf/experiment/dataset/stanford_knights_large.yaml @@ -0,0 +1,54 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: stanford +collection: knights +data_subdir: stanford +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [1024, 1024] +spherical_poses: False +use_ndc: False + +val_all: False +val_pairs: [] +val_num: 8 +val_skip: 1 + +render_params: + spiral: True + far: False + interpolate: False + supersample: 4 + crop: 1.0 + + spiral_rad: 0.5 + uv_downscale: 0.0 + +lightfield: + rows: 17 + cols: 17 + + start_row: 0 + end_row: 17 + + start_col: 0 + end_col: 17 + + keyframe_step: -1 + #keyframe_subsample: 1 + keyframe_subsample: 1 + + step: 4 + supersample: 4 + disp_row: 8 + + use_file_coords: True + st_scale: 0.125 + vis_st_scale: diff --git a/nlf/conf/experiment/dataset/stanford_large.yaml b/nlf/conf/experiment/dataset/stanford_large.yaml new file mode 100644 index 0000000..72a09bd --- /dev/null +++ b/nlf/conf/experiment/dataset/stanford_large.yaml @@ -0,0 +1,47 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: stanford +collection: gem +data_subdir: stanford +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: +downsample: 1 +spherical_poses: False +use_ndc: False + +val_pairs: [] +val_num: 8 +val_skip: 1 +val_all: False + +render_params: + interpolate: False + supersample: 4 + crop: 1.0 + +lightfield: + rows: 17 + cols: 17 + + start_row: 0 + end_row: 17 + + keyframe_step: -1 + #keyframe_subsample: 1 + keyframe_subsample: 1 + + step: 4 + supersample: 2 + disp_row: 8 + + use_file_coords: True + st_scale: 0.125 + vis_st_scale: diff --git a/nlf/conf/experiment/dataset/stanford_llff.yaml b/nlf/conf/experiment/dataset/stanford_llff.yaml new file mode 100644 index 0000000..7962b13 --- /dev/null +++ b/nlf/conf/experiment/dataset/stanford_llff.yaml @@ -0,0 +1,34 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: stanford_llff +collection: tarot +data_subdir: stanford +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: +spherical_poses: False +use_ndc: False + +val_num: 8 +val_skip: 1 +val_pairs: [] +val_all: False + +val_set: lightfield +lightfield_step: 1 +lightfield_rows: 17 +lightfield_cols: 17 + +# Rendering +render_params: + interpolate_time: False + interpolate: False + supersample: 4 + crop: 1.0 diff --git a/nlf/conf/experiment/dataset/stanford_tarot.yaml b/nlf/conf/experiment/dataset/stanford_tarot.yaml new file mode 100644 index 0000000..c7bf6e9 --- /dev/null +++ b/nlf/conf/experiment/dataset/stanford_tarot.yaml @@ -0,0 +1,57 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: stanford +collection: tarot +data_subdir: stanford_half +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [512, 512] +spherical_poses: False +use_ndc: False + +val_pairs: [] +val_num: 8 +val_skip: 1 +val_all: False + +#render_params: +# interpolate: False +# supersample: 4 +# crop: 1.0 + +render_params: + spiral: True + far: False + interpolate: False + supersample: 4 + crop: 1.0 + + spiral_rad: 0.5 + uv_downscale: 0.0 + +lightfield: + rows: 17 + cols: 17 + + start_row: 0 + end_row: 17 + + keyframe_step: -1 + #keyframe_subsample: 1 + keyframe_subsample: 1 + + step: 4 + supersample: 2 + disp_row: 8 + + use_file_coords: True + st_scale: 0.5 + uv_scale: 1.0 + vis_st_scale: diff --git a/nlf/conf/experiment/dataset/stanford_tarot_large.yaml b/nlf/conf/experiment/dataset/stanford_tarot_large.yaml new file mode 100644 index 0000000..008c1ed --- /dev/null +++ b/nlf/conf/experiment/dataset/stanford_tarot_large.yaml @@ -0,0 +1,57 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: stanford +collection: tarot +data_subdir: stanford +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [1024, 1024] +spherical_poses: False +use_ndc: False + +val_pairs: [] +val_num: 8 +val_skip: 1 +val_all: False + +#render_params: +# interpolate: False +# supersample: 4 +# crop: 1.0 + +render_params: + spiral: True + far: False + interpolate: False + supersample: 4 + crop: 1.0 + + spiral_rad: 0.5 + uv_downscale: 0.0 + +lightfield: + rows: 17 + cols: 17 + + start_row: 0 + end_row: 17 + + keyframe_step: -1 + #keyframe_subsample: 1 + keyframe_subsample: 1 + + step: 4 + supersample: 2 + disp_row: 8 + + use_file_coords: True + st_scale: 0.5 + uv_scale: 1.0 + vis_st_scale: diff --git a/nlf/conf/experiment/dataset/stanford_tarot_small.yaml b/nlf/conf/experiment/dataset/stanford_tarot_small.yaml new file mode 100644 index 0000000..91f8a98 --- /dev/null +++ b/nlf/conf/experiment/dataset/stanford_tarot_small.yaml @@ -0,0 +1,42 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: stanford +collection: tarot_small +data_subdir: stanford_half +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [512, 512] +spherical_poses: False +use_ndc: False + +val_all: False +val_pairs: [] +val_num: 8 +val_skip: 1 + +render_params: + interpolate: False + supersample: 4 + crop: 1.0 + +lightfield: + rows: 17 + cols: 17 + + start_row: 0 + end_row: 17 + + step: 4 + supersample: 2 + disp_row: 8 + + use_file_coords: True + st_scale: 0.25 + vis_st_scale: diff --git a/nlf/conf/experiment/dataset/stanford_tarot_small_large.yaml b/nlf/conf/experiment/dataset/stanford_tarot_small_large.yaml new file mode 100644 index 0000000..1dc03a3 --- /dev/null +++ b/nlf/conf/experiment/dataset/stanford_tarot_small_large.yaml @@ -0,0 +1,56 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: stanford +collection: tarot_small +data_subdir: stanford +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [1024, 1024] +spherical_poses: False +use_ndc: False + +val_all: False +val_pairs: [] +val_num: 8 +val_skip: 1 + +#render_params: +# interpolate: False +# supersample: 4 +# crop: 1.0 + +render_params: + spiral: True + far: False + interpolate: False + supersample: 4 + crop: 1.0 + + spiral_rad: 0.5 + uv_downscale: 0.0 + +lightfield: + rows: 17 + cols: 17 + + start_row: 0 + end_row: 17 + + keyframe_step: -1 + #keyframe_subsample: 1 + keyframe_subsample: 1 + + step: 4 + supersample: 2 + disp_row: 8 + + use_file_coords: True + st_scale: 0.125 + vis_st_scale: diff --git a/nlf/conf/experiment/dataset/stanford_treasure.yaml b/nlf/conf/experiment/dataset/stanford_treasure.yaml new file mode 100644 index 0000000..2d9aad9 --- /dev/null +++ b/nlf/conf/experiment/dataset/stanford_treasure.yaml @@ -0,0 +1,42 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: stanford +collection: treasure +data_subdir: stanford_half +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [768, 640] +spherical_poses: False +use_ndc: False + +val_pairs: [] +val_num: 8 +val_skip: 1 + +render_params: + spiral: True + far: False + interpolate: False + supersample: 4 + crop: 1.0 + + spiral_rad: 0.75 + uv_downscale: 0.0 + +lightfield: + rows: 17 + cols: 17 + step: 4 + supersample: 2 + disp_row: 8 + + use_file_coords: True + st_scale: 0.25 + vis_st_scale: diff --git a/nlf/conf/experiment/dataset/stanford_treasure_large.yaml b/nlf/conf/experiment/dataset/stanford_treasure_large.yaml new file mode 100644 index 0000000..01e07d5 --- /dev/null +++ b/nlf/conf/experiment/dataset/stanford_treasure_large.yaml @@ -0,0 +1,46 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: stanford +collection: treasure +data_subdir: stanford +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [1536, 1280] +spherical_poses: False +use_ndc: False + +val_pairs: [] +val_num: 8 +val_skip: 1 +val_all: False + +render_params: + interpolate: False + supersample: 4 + crop: 1.0 + +lightfield: + rows: 17 + cols: 17 + + start_row: 0 + end_row: 17 + + keyframe_step: -1 + #keyframe_subsample: 1 + keyframe_subsample: 1 + + step: 4 + supersample: 2 + disp_row: 8 + + use_file_coords: True + st_scale: 0.125 + vis_st_scale: diff --git a/nlf/conf/experiment/dataset/stanford_truck.yaml b/nlf/conf/experiment/dataset/stanford_truck.yaml new file mode 100644 index 0000000..aa89aa8 --- /dev/null +++ b/nlf/conf/experiment/dataset/stanford_truck.yaml @@ -0,0 +1,37 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: stanford +collection: truck +data_subdir: stanford_half +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [640, 480] +spherical_poses: False +use_ndc: False + +val_pairs: [] +val_num: 8 +val_skip: 1 + +render_params: + interpolate: False + supersample: 4 + crop: 1.0 + +lightfield: + rows: 17 + cols: 17 + step: 4 + supersample: 2 + disp_row: 8 + + use_file_coords: True + st_scale: 0.1 + vis_st_scale: diff --git a/nlf/conf/experiment/dataset/stanford_truck_large.yaml b/nlf/conf/experiment/dataset/stanford_truck_large.yaml new file mode 100644 index 0000000..eb90e41 --- /dev/null +++ b/nlf/conf/experiment/dataset/stanford_truck_large.yaml @@ -0,0 +1,46 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: stanford +collection: truck +data_subdir: stanford +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [1280, 960] +spherical_poses: False +use_ndc: False + +val_pairs: [] +val_num: 8 +val_skip: 1 +val_all: False + +render_params: + interpolate: False + supersample: 4 + crop: 1.0 + +lightfield: + rows: 17 + cols: 17 + + start_row: 0 + end_row: 17 + + keyframe_step: -1 + #keyframe_subsample: 1 + keyframe_subsample: 1 + + step: 4 + supersample: 2 + disp_row: 8 + + use_file_coords: True + st_scale: 0.125 + vis_st_scale: diff --git a/nlf/conf/experiment/dataset/technicolor.yaml b/nlf/conf/experiment/dataset/technicolor.yaml new file mode 100644 index 0000000..552a001 --- /dev/null +++ b/nlf/conf/experiment/dataset/technicolor.yaml @@ -0,0 +1,60 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: technicolor +collection: painter +data_subdir: technicolor +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [2048, 1088] +spherical_poses: False +use_ndc: True +use_reference: False +correct_poses: False + +# Validation +val_num: 8 +val_skip: 2 +val_set: 'lightfield' + +val_all: False +val_pairs: [[2, 2]] + +lightfield_step: 1 +lightfield_rows: 4 +lightfield_cols: 4 + +# Video +start_frame: 0 +num_frames: 50 # 1.3 seconds +keyframe_step: 4 # every 5 frames + +# load_full_step: 8 # every 4 frames +# subsample_keyframe_step: 4 # every 2 frames +# subsample_keyframe_frac: 0.25 # load 1/4 of the pixels from each keyframe +# subsample_frac: 0.125 # load 1/20 of the pixels from every other frame + +load_full_step: 1 # every 4 frames +subsample_keyframe_step: 1 # every 2 frames +subsample_keyframe_frac: 1.0 # load 1/4 of the pixels from each keyframe +subsample_frac: 1.0 # load 1/20 of the pixels from every other frame + +#load_full_step: 4 # every 4 frames +#subsample_keyframe_step: 2 # every 2 frames +#subsample_keyframe_frac: 0.25 # load 1/4 of the pixels from each keyframe +#subsample_frac: 0.25 # load 1/20 of the pixels from every other frame + +# Rendering +render_params: + interpolate_time: False + interpolate: False + supersample: 2 + crop: 1.0 + +num_chunks: 1 diff --git a/nlf/conf/experiment/dataset/technicolor_subsample.yaml b/nlf/conf/experiment/dataset/technicolor_subsample.yaml new file mode 100644 index 0000000..51a815b --- /dev/null +++ b/nlf/conf/experiment/dataset/technicolor_subsample.yaml @@ -0,0 +1,60 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +name: technicolor +collection: painter +data_subdir: technicolor +root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}/" + +img_wh: [2048, 1088] +spherical_poses: False +use_ndc: True +use_reference: False +correct_poses: False + +# Validation +val_num: 8 +val_skip: 2 +val_set: 'lightfield' + +val_all: False +val_pairs: [[2, 2]] + +lightfield_step: 1 +lightfield_rows: 4 +lightfield_cols: 4 + +# Video +start_frame: 0 +num_frames: 50 # 1.3 seconds +keyframe_step: 4 # every 5 frames + +load_full_step: 8 # every 4 frames +subsample_keyframe_step: 4 # every 2 frames +subsample_keyframe_frac: 0.25 # load 1/4 of the pixels from each keyframe +subsample_frac: 0.125 # load 1/20 of the pixels from every other frame + +# load_full_step: 1 # every 4 frames +# subsample_keyframe_step: 1 # every 2 frames +# subsample_keyframe_frac: 1.0 # load 1/4 of the pixels from each keyframe +# subsample_frac: 1.0 # load 1/20 of the pixels from every other frame + +#load_full_step: 4 # every 4 frames +#subsample_keyframe_step: 2 # every 2 frames +#subsample_keyframe_frac: 0.25 # load 1/4 of the pixels from each keyframe +#subsample_frac: 0.25 # load 1/20 of the pixels from every other frame + +# Rendering +render_params: + interpolate_time: False + interpolate: False + supersample: 2 + crop: 1.0 + +num_chunks: 1 diff --git a/nlf/conf/experiment/local.yaml b/nlf/conf/experiment/local.yaml new file mode 100644 index 0000000..ed26e7f --- /dev/null +++ b/nlf/conf/experiment/local.yaml @@ -0,0 +1,15 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +defaults: + - params: local + - dataset: donerf_large + - model: donerf_cylinder + - training: donerf_tensorf + - regularizers: all + - visualizers: all diff --git a/nlf/conf/experiment/model/blender_voxel.yaml b/nlf/conf/experiment/model/blender_voxel.yaml new file mode 100644 index 0000000..86a6fc2 --- /dev/null +++ b/nlf/conf/experiment/model/blender_voxel.yaml @@ -0,0 +1,162 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + # 1) Per ray outputs + emb0: + type: ray_prediction + + # Parameterization + param: + n_dims: 6 + fn: pluecker + direction_multiplier: 1.0 + moment_multiplier: 1.0 + + # Net + net: + pe: + type: windowed + n_freqs: 2 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + #type: zero + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + z_channels: 192 + + outputs: + z_vals: 1 + sigma: 1 + point_offset: 3 + + # 2) Ray density + emb1: + type: point_density + shift: 2.0 + activation: + type: sigmoid + fac: 1.0 + + # 3) Intersection + emb2: + type: ray_intersect + + # Intersect + z_channels: 192 + + intersect: + type: voxel_grid + + sort: True + outward_facing: False + use_disparity: False + use_sigma: True + + origin: [0.0, 0.0, 0.0] + initial: [-2.0, -2.0, -2.0] + end: [2.0, 2.0, 2.0] + + near: 2.0 + far: 6.0 + + activation: + type: identity + fac: 0.5 + + # 5) Add extra outputs + emb3: + type: add_point_outputs + extra_outputs: ['viewdirs'] + + # 7) Ray density + emb5: + type: point_density + shift: 2.0 + activation: + type: sigmoid + fac: 1.0 + + # 8) Add point offset + emb6: + type: point_offset + use_sigma: True + activation: + type: identity + fac: 0.25 + + # 9) Extract + emb7: + type: extract_fields + fields: ['points', 'distances', 'viewdirs'] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_no_sample + + # Scene hyper-params + white_bg: 1 + ndc_ray: 0 + + # Density activation + fea2denseAct: softplus + distance_scale: 25.0 + density_shift: -10.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -2.0], [2.0, 2.0, 2.0]] + + # Grid size and upsampling + N_voxel_init: 1000000 # 100**3 + N_voxel_final: 27000000 # 300**3 + upsamp_list: [4000,6000,8000,10000,12000] + lr_upsample_reset: True + + # Thresholding + update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 1e-4 + alpha_mask_thre: 1e-4 + + # Tensor sizes + n_lamb_sigma: [8,8,8] + n_lamb_sh: [8,8,8] + num_frames: 8 + + # Shading + #shadingMode: RGB + #data_dim_color: 3 + + shadingMode: SH + data_dim_color: 27 diff --git a/nlf/conf/experiment/model/bom_cylinder.yaml b/nlf/conf/experiment/model/bom_cylinder.yaml new file mode 100644 index 0000000..99a1550 --- /dev/null +++ b/nlf/conf/experiment/model/bom_cylinder.yaml @@ -0,0 +1,262 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 6 + fn: pluecker + direction_multiplier: 1.0 + moment_multiplier: 1.0 + + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 1 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + time: + start: 7 + end: 8 + + param: + n_dims: 1 + fn: identity + + pe: + type: windowed + n_freqs: 2 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + z_channels: 32 + + outputs: + z_vals: + channels: 4 + + spatial_flow: + channels: 3 + + activation: + type: identity + outer_fac: 1.0 + + sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 1 + + activation: + type: sigmoid + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: tanh + outer_fac: 0.25 + + color_scale: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + color_shift: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + ray_intersect_0: + type: ray_intersect + + # Intersect + z_channels: 32 # NOTE: Changed from 32 + + intersect: + type: cylinder + + sort: True + outward_facing: True + use_disparity: False + max_axis: False + + use_sigma: True + + out_points: raw_points + out_distance: raw_distance + + use_dataset_bounds: True + origin_scale_factor: 0.0 + + contract: + type: mipnerf + contract_samples: True + use_dataset_bounds: True + + activation: + type: identity + fac: 0.5 + + flow_0: + type: advect_points + use_spatial_flow: True + use_angular_flow: False + + out_flow_field: raw_flow + flow_scale: 0.0 + + spatial_flow_activation: + type: identity + fac: 0.25 + + angular_flow_rotation_activation: + type: identity + fac: 0.25 + + angular_flow_anchor_activation: + type: identity + fac: 0.25 + + point_offset_0: + type: point_offset + in_density_field: point_sigma + use_sigma: True + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs', 'times'] + + extract_fields: + type: extract_fields + fields: [ + 'points', 'distances', 'base_times', 'time_offset', 'times', 'viewdirs', 'weights', + 'color_transform_global', 'color_scale_global', 'color_shift_global', + 'color_transform', 'color_scale', 'color_shift' + ] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_time + + # Scene hyper-params + white_bg: 0 + black_bg: 0 + + # Density activation + fea2denseAct: relu + distance_scale: 16.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -2.0], [2.0, 2.0, 2.0]] + + # Grid size and upsampling + N_voxel_init: 2097152 + N_voxel_final: 262144000 + + upsamp_list: [4000,6000,8000,10000,12000] + + lr_upsample_reset: True + + # Thresholding + update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + n_lamb_sigma: [8,4,4] + n_lamb_sh: [8,4,4] + + # Shading + #shadingMode: SH + #data_dim_color: 27 + + shadingMode: RGB + data_dim_color: 3 + + # Density + densityMode: Density diff --git a/nlf/conf/experiment/model/bom_sphere.yaml b/nlf/conf/experiment/model/bom_sphere.yaml new file mode 100644 index 0000000..b36879b --- /dev/null +++ b/nlf/conf/experiment/model/bom_sphere.yaml @@ -0,0 +1,266 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 6 + fn: pluecker + direction_multiplier: 1.0 + moment_multiplier: 1.0 + + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 1 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + time: + start: 7 + end: 8 + + param: + n_dims: 1 + fn: identity + + pe: + type: windowed + n_freqs: 2 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + z_channels: 32 + + outputs: + z_vals: + channels: 8 + + spatial_flow: + channels: 3 + + activation: + type: identity + outer_fac: 1.0 + + sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 1 + + activation: + type: sigmoid + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: tanh + outer_fac: 0.25 + + color_scale: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + color_shift: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + ray_intersect_0: + type: ray_intersect + + # Intersect + z_channels: 32 # NOTE: Changed from 32 + + intersect: + type: sphere_new + + sort: True + outward_facing: True + use_disparity: False + max_axis: False + + use_sigma: True + + out_points: raw_points + out_distance: raw_distance + + use_dataset_bounds: True + origin_scale_factor: 0.0 + resize_scale_factor: 0.0 + + contract: + type: mipnerf + contract_samples: True + use_dataset_bounds: True + + activation: + type: identity + fac: 0.5 + + flow_0: + type: advect_points + use_spatial_flow: True + use_angular_flow: False + + out_flow_field: raw_flow + flow_scale: 0.0 + + spatial_flow_activation: + type: identity + fac: 0.25 + + angular_flow_rotation_activation: + type: identity + fac: 0.25 + + angular_flow_anchor_activation: + type: identity + fac: 0.25 + + point_offset_0: + type: point_offset + in_density_field: point_sigma + use_sigma: True + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs', 'times'] + + extract_fields: + type: extract_fields + fields: [ + 'points', 'distances', 'base_times', 'time_offset', 'times', 'viewdirs', 'weights', + 'color_transform_global', 'color_scale_global', 'color_shift_global', + 'color_transform', 'color_scale', 'color_shift' + ] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_time + + # Scene hyper-params + white_bg: 0 + black_bg: 0 + + # Density activation + fea2denseAct: relu + distance_scale: 16.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -2.0], [2.0, 2.0, 2.0]] + + # Grid size and upsampling + N_voxel_init: 2097152 + N_voxel_final: 262144000 + + upsamp_list: [4000,6000,8000,10000,12000] + + lr_upsample_reset: True + + # Thresholding + update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + n_lamb_sigma: [8,4,4] + n_lamb_sh: [8,4,4] + + # Shading + shadingMode: SH + data_dim_color: 27 + + # shadingMode: RGB + # data_dim_color: 3 + + # shadingMode: MLP_Fea + # data_dim_color: 27 + + # Density + densityMode: Density diff --git a/nlf/conf/experiment/model/bom_z_plane.yaml b/nlf/conf/experiment/model/bom_z_plane.yaml new file mode 100644 index 0000000..238fb1f --- /dev/null +++ b/nlf/conf/experiment/model/bom_z_plane.yaml @@ -0,0 +1,7 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT diff --git a/nlf/conf/experiment/model/catacaustics_cylinder.yaml b/nlf/conf/experiment/model/catacaustics_cylinder.yaml new file mode 100644 index 0000000..57a967f --- /dev/null +++ b/nlf/conf/experiment/model/catacaustics_cylinder.yaml @@ -0,0 +1,196 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 6 + fn: pluecker + direction_multiplier: 1.0 + moment_multiplier: 1.0 + + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 2 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + z_channels: 64 + + outputs: + z_vals: + channels: 4 + + sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: tanh + outer_fac: 0.25 + + color_scale_global: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + color_shift_global: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + ray_intersect_0: + type: ray_intersect + + # Intersect + z_channels: 64 + + intersect: + type: cylinder + + sort: True + outward_facing: False + use_disparity: False + + use_sigma: True + + out_points: raw_points + out_distance: raw_distance + + use_dataset_bounds: True + + contract: + type: mipnerf + contract_samples: True + use_dataset_bounds: True + + activation: + type: identity + fac: 0.5 + + point_offset_0: + type: point_offset + use_sigma: True + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs'] + + extract_fields: + type: extract_fields + fields: ['points', 'distances', 'viewdirs', 'weights', 'color_scale_global', 'color_shift_global'] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_no_sample + + # Scene hyper-params + white_bg: 0 + black_bg: 0 + + # Density activation + fea2denseAct: relu + distance_scale: 16.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -2.0], [2.0, 2.0, 2.0]] + + # Grid size and upsampling + grid_size: + start: [150, 150, 150] + end: [600, 600, 600] + upsamp_list: [4000,6000,8000,10000,12000] + lr_upsample_reset: True + + # Thresholding + update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + n_lamb_sigma: [8,8,8] + n_lamb_sh: [8,8,8] + + # Shading + #shadingMode: RGB + #data_dim_color: 3 + + shadingMode: SH + data_dim_color: 27 diff --git a/nlf/conf/experiment/model/catacaustics_distance.yaml b/nlf/conf/experiment/model/catacaustics_distance.yaml new file mode 100644 index 0000000..e7747ff --- /dev/null +++ b/nlf/conf/experiment/model/catacaustics_distance.yaml @@ -0,0 +1,196 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 6 + fn: pluecker + direction_multiplier: 1.0 + moment_multiplier: 1.0 + + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 2 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + z_channels: 64 + + outputs: + z_vals: + channels: 1 + + sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: tanh + outer_fac: 0.25 + + color_scale_global: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + color_shift_global: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + ray_intersect_0: + type: ray_intersect + + # Intersect + z_channels: 64 + + intersect: + type: euclidean_distance_unified + + sort: True + outward_facing: False + use_disparity: False + + use_sigma: True + + out_points: raw_points + out_distance: raw_distance + + use_dataset_bounds: True + + contract: + type: mipnerf + contract_samples: True + use_dataset_bounds: True + + activation: + type: identity + fac: 0.5 + + point_offset_0: + type: point_offset + use_sigma: True + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs'] + + extract_fields: + type: extract_fields + fields: ['points', 'distances', 'viewdirs', 'weights', 'color_scale_global', 'color_shift_global'] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_no_sample + + # Scene hyper-params + white_bg: 0 + black_bg: 0 + + # Density activation + fea2denseAct: relu + distance_scale: 16.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -2.0], [2.0, 2.0, 2.0]] + + # Grid size and upsampling + grid_size: + start: [100, 100, 100] + end: [400, 400, 400] + upsamp_list: [4000,6000,8000,10000,12000] + lr_upsample_reset: True + + # Thresholding + update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + n_lamb_sigma: [8,8,8] + n_lamb_sh: [8,8,8] + + # Shading + #shadingMode: RGB + #data_dim_color: 3 + + shadingMode: SH + data_dim_color: 27 diff --git a/nlf/conf/experiment/model/catacaustics_sphere.yaml b/nlf/conf/experiment/model/catacaustics_sphere.yaml new file mode 100644 index 0000000..dcef220 --- /dev/null +++ b/nlf/conf/experiment/model/catacaustics_sphere.yaml @@ -0,0 +1,251 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 6 + fn: pluecker + direction_multiplier: 1.0 + moment_multiplier: 1.0 + + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 1 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + z_channels: 64 + + outputs: + z_vals: + channels: 8 + #channels: 1 + + sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 0 + #window_epochs: 0 + #wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 1 + #window_epochs: 0 + #wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: tanh + outer_fac: 0.125 + + color_scale: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + color_shift: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + #normal: + # channels: 3 + + # activation: + # type: identity + + #ref_viewdirs_offset: + # channels: 3 + + # activation: + # type: identity + + #ref_distance: + # channels: 1 + + # activation: + # type: identity + + ray_intersect_0: + type: ray_intersect + + # Intersect + z_channels: 64 + + intersect: + type: sphere + #type: euclidean_distance_unified + + sort: True + outward_facing: False + use_disparity: False + max_axis: False + + use_sigma: True + + out_points: raw_points + out_distance: raw_distance + + initial: -2.0 + end: 2.0 + + use_dataset_bounds: True + #origin_scale_factor: 1.0 + #resize_scale_factor: 1.0 + origin_scale_factor: 0.0 + resize_scale_factor: 0.0 + + contract: + type: bbox + contract_samples: True + use_dataset_bounds: True + + activation: + type: identity + fac: 0.5 + + #reflect_0: + # type: reflect + # direction_init: True + + # out_points_field: points_temp + # out_direction_field: viewdirs + + point_offset_0: + type: point_offset + use_sigma: True + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs'] + + extract_fields: + type: extract_fields + fields: ['points', 'distances', 'viewdirs', 'weights', 'color_scale', 'color_shift'] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_no_sample + + # Scene hyper-params + white_bg: 1 + black_bg: 0 + + # Density activation + fea2denseAct: relu + #distance_scale: 16.0 + distance_scale: 8.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -2.0], [2.0, 2.0, 2.0]] + + # Grid size and upsampling + N_voxel_init: 2097152 + #N_voxel_final: 216000000 + N_voxel_final: 64000000 + + + upsamp_list: [4000,6000,8000,10000,12000] + lr_upsample_reset: True + + # Thresholding + update_AlphaMask_list: [] + #update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + n_lamb_sigma: [8,8,8] + n_lamb_sh: [8,8,8] + + # Shading + shadingMode: RGB + data_dim_color: 3 + #shadingMode: SH + #data_dim_color: 27 diff --git a/nlf/conf/experiment/model/catacaustics_voxel.yaml b/nlf/conf/experiment/model/catacaustics_voxel.yaml new file mode 100644 index 0000000..99caa04 --- /dev/null +++ b/nlf/conf/experiment/model/catacaustics_voxel.yaml @@ -0,0 +1,196 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 6 + fn: pluecker + direction_multiplier: 1.0 + moment_multiplier: 1.0 + + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 2 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + z_channels: 96 + + outputs: + z_vals: + channels: 1 + + sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: tanh + outer_fac: 0.25 + + color_scale_global: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + color_shift_global: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + ray_intersect_0: + type: ray_intersect + + # Intersect + z_channels: 96 + + intersect: + type: voxel_grid + + sort: True + outward_facing: False + use_disparity: False + use_sigma: False + max_axis: False + + out_points: raw_points + out_distance: raw_distance + + use_dataset_bounds: True + + contract: + type: mipnerf + contract_samples: True + use_dataset_bounds: True + + activation: + type: identity + fac: 0.5 + + point_offset_0: + type: point_offset + use_sigma: True + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs'] + + extract_fields: + type: extract_fields + fields: ['points', 'distances', 'viewdirs', 'weights', 'color_scale_global', 'color_shift_global'] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_no_sample + + # Scene hyper-params + white_bg: 0 + black_bg: 0 + + # Density activation + fea2denseAct: relu + distance_scale: 16.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -2.0], [2.0, 2.0, 2.0]] + + # Grid size and upsampling + grid_size: + start: [150, 150, 150] + end: [600, 600, 600] + upsamp_list: [4000,6000,8000,10000,12000] + lr_upsample_reset: True + + # Thresholding + update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + n_lamb_sigma: [8,8,8] + n_lamb_sh: [8,8,8] + + # Shading + #shadingMode: RGB + #data_dim_color: 3 + + shadingMode: SH + data_dim_color: 27 diff --git a/nlf/conf/experiment/model/catacaustics_z_plane.yaml b/nlf/conf/experiment/model/catacaustics_z_plane.yaml new file mode 100644 index 0000000..41f8b4a --- /dev/null +++ b/nlf/conf/experiment/model/catacaustics_z_plane.yaml @@ -0,0 +1,201 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 6 + fn: pluecker + direction_multiplier: 1.0 + moment_multiplier: 1.0 + + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 2 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + z_channels: 64 + + outputs: + z_vals: + channels: 1 + + sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: tanh + outer_fac: 0.25 + + color_scale_global: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + color_shift_global: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + ray_intersect_0: + type: ray_intersect + + # Intersect + z_channels: 64 + + intersect: + type: z_plane + + sort: True + outward_facing: False + use_disparity: False + + use_sigma: True + + out_points: raw_points + out_distance: raw_distance + + initial: -1.0 + end: 1.0 + + contract: + type: mipnerf + contract_samples: True + contract_start_radius: 1.0 + contract_end_radius: 8.0 + + activation: + type: identity + fac: 0.5 + + point_offset_0: + type: point_offset + use_sigma: True + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs'] + + extract_fields: + type: extract_fields + fields: ['points', 'distances', 'viewdirs', 'weights', 'color_scale_global', 'color_shift_global'] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_no_sample + + # Scene hyper-params + white_bg: 0 + black_bg: 0 + + # Density activation + fea2denseAct: relu + distance_scale: 16.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -2.0], [2.0, 2.0, 2.0]] + + # Grid size and upsampling + grid_size: + start: [150, 150, 150] + end: [400, 400, 400] + upsamp_list: [4000,6000,8000,10000,12000] + lr_upsample_reset: True + + # Thresholding + update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + n_lamb_sigma: [8,0,0] + n_lamb_sh: [8,0,0] + + # Shading + #shadingMode: RGB + #data_dim_color: 3 + + shadingMode: SH + data_dim_color: 27 + + #shadingMode: MLP_Fea + #data_dim_color: 27 diff --git a/nlf/conf/experiment/model/donerf_cylinder.yaml b/nlf/conf/experiment/model/donerf_cylinder.yaml new file mode 100644 index 0000000..29a1574 --- /dev/null +++ b/nlf/conf/experiment/model/donerf_cylinder.yaml @@ -0,0 +1,209 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 6 + fn: pluecker + direction_multiplier: 1.0 + moment_multiplier: 1.0 + + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 1 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + z_channels: 32 + + outputs: + z_vals: + channels: 4 + + sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 1 + + activation: + type: sigmoid + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: tanh + outer_fac: 0.125 + + color_scale: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + color_shift: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + ray_intersect_0: + type: ray_intersect + + # Intersect + z_channels: 32 + + intersect: + type: cylinder + #type: sphere + + sort: True + outward_facing: False + use_disparity: False + max_axis: False + + use_sigma: True + + out_points: raw_points + out_distance: raw_distance + + use_dataset_bounds: True + origin_scale_factor: 0.0 + + contract: + type: mipnerf + contract_samples: True + use_dataset_bounds: True + + activation: + type: identity + fac: 0.5 + + point_offset_0: + type: point_offset + use_sigma: True + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs'] + + extract_fields: + type: extract_fields + fields: ['points', 'distances', 'viewdirs', 'weights', 'color_scale', 'color_shift'] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_no_sample + + # Scene hyper-params + white_bg: 0 + black_bg: 0 + + # Density activation + fea2denseAct: relu + distance_scale: 16.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -2.0], [2.0, 2.0, 2.0]] + + # Grid size and upsampling + N_voxel_init: 3375000 + N_voxel_final: 216000000 + + upsamp_list: [4000,6000,8000,10000,12000] + lr_upsample_reset: True + + # Thresholding + update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + n_lamb_sigma: [8,4,4] + n_lamb_sh: [8,4,4] + + ## Shading + shadingMode: RGB + data_dim_color: 3 diff --git a/nlf/conf/experiment/model/donerf_cylinder_no_point.yaml b/nlf/conf/experiment/model/donerf_cylinder_no_point.yaml new file mode 100644 index 0000000..a1222f9 --- /dev/null +++ b/nlf/conf/experiment/model/donerf_cylinder_no_point.yaml @@ -0,0 +1,205 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 6 + fn: pluecker + direction_multiplier: 1.0 + moment_multiplier: 1.0 + + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 1 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + z_channels: 32 + + outputs: + z_vals: + channels: 4 + + sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 1 + + activation: + type: sigmoid + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: tanh + outer_fac: 0.125 + + color_scale: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + color_shift: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + ray_intersect_0: + type: ray_intersect + + # Intersect + z_channels: 32 + + intersect: + #type: cylinder + type: sphere + + sort: True + outward_facing: False + use_disparity: False + max_axis: False + + use_sigma: True + + out_points: raw_points + out_distance: raw_distance + + use_dataset_bounds: True + origin_scale_factor: 0.0 + + contract: + type: mipnerf + contract_samples: True + use_dataset_bounds: True + + activation: + type: identity + fac: 0.5 + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs'] + + extract_fields: + type: extract_fields + fields: ['points', 'distances', 'viewdirs', 'weights', 'color_scale', 'color_shift'] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_no_sample + + # Scene hyper-params + white_bg: 0 + black_bg: 0 + + # Density activation + fea2denseAct: relu + distance_scale: 16.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -2.0], [2.0, 2.0, 2.0]] + + # Grid size and upsampling + N_voxel_init: 3375000 + N_voxel_final: 216000000 + + upsamp_list: [4000,6000,8000,10000,12000] + lr_upsample_reset: True + + # Thresholding + update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + n_lamb_sigma: [8,4,4] + n_lamb_sh: [8,4,4] + + ## Shading + shadingMode: RGB + data_dim_color: 3 diff --git a/nlf/conf/experiment/model/donerf_cylinder_small.yaml b/nlf/conf/experiment/model/donerf_cylinder_small.yaml new file mode 100644 index 0000000..5b8c59b --- /dev/null +++ b/nlf/conf/experiment/model/donerf_cylinder_small.yaml @@ -0,0 +1,211 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 6 + fn: pluecker + direction_multiplier: 1.0 + moment_multiplier: 1.0 + + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 1 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + z_channels: 32 + + outputs: + z_vals: + channels: 4 + + sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 1 + + activation: + type: sigmoid + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: tanh + outer_fac: 0.125 + + color_scale: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + color_shift: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + ray_intersect_0: + type: ray_intersect + + # Intersect + z_channels: 32 + + intersect: + #type: cylinder + type: sphere + + sort: True + outward_facing: False + use_disparity: False + max_axis: False + + use_sigma: True + + out_points: raw_points + out_distance: raw_distance + + use_dataset_bounds: True + origin_scale_factor: 0.0 + + contract: + type: mipnerf + contract_samples: True + use_dataset_bounds: True + + activation: + type: identity + fac: 0.5 + + point_offset_0: + type: point_offset + use_sigma: True + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs'] + + extract_fields: + type: extract_fields + fields: ['points', 'distances', 'viewdirs', 'weights', 'color_scale', 'color_shift'] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_no_sample + + # Scene hyper-params + white_bg: 0 + black_bg: 0 + + # Density activation + fea2denseAct: relu + distance_scale: 16.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -2.0], [2.0, 2.0, 2.0]] + + # Grid size and upsampling + #N_voxel_init: 1000000 + #N_voxel_final: 64000000 + N_voxel_init: 262144000 + N_voxel_final: 262144000 + + upsamp_list: [4000,6000,8000,10000,12000] + lr_upsample_reset: True + + # Thresholding + update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + n_lamb_sigma: [8,4,4] + n_lamb_sh: [8,4,4] + + ## Shading + shadingMode: RGB + data_dim_color: 3 diff --git a/nlf/conf/experiment/model/donerf_sphere.yaml b/nlf/conf/experiment/model/donerf_sphere.yaml new file mode 100644 index 0000000..3e17853 --- /dev/null +++ b/nlf/conf/experiment/model/donerf_sphere.yaml @@ -0,0 +1,208 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 6 + fn: pluecker + direction_multiplier: 1.0 + moment_multiplier: 1.0 + + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 1 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + z_channels: 32 + + outputs: + z_vals: + channels: 4 + + sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 1 + + activation: + type: sigmoid + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: tanh + outer_fac: 0.125 + + color_scale: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + color_shift: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + ray_intersect_0: + type: ray_intersect + + # Intersect + z_channels: 32 + + intersect: + type: sphere + + sort: True + outward_facing: False + use_disparity: False + max_axis: False + + use_sigma: True + + out_points: raw_points + out_distance: raw_distance + + use_dataset_bounds: True + origin_scale_factor: 0.0 + + contract: + type: mipnerf + contract_samples: True + use_dataset_bounds: True + + activation: + type: identity + fac: 0.5 + + point_offset_0: + type: point_offset + use_sigma: True + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs'] + + extract_fields: + type: extract_fields + fields: ['points', 'distances', 'viewdirs', 'weights', 'color_scale', 'color_shift'] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_no_sample + + # Scene hyper-params + white_bg: 0 + black_bg: 0 + + # Density activation + fea2denseAct: relu + distance_scale: 16.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -2.0], [2.0, 2.0, 2.0]] + + # Grid size and upsampling + N_voxel_init: 3375000 + N_voxel_final: 216000000 + + upsamp_list: [4000,6000,8000,10000,12000] + lr_upsample_reset: True + + # Thresholding + update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + n_lamb_sigma: [8,4,4] + n_lamb_sh: [8,4,4] + + ## Shading + shadingMode: RGB + data_dim_color: 3 diff --git a/nlf/conf/experiment/model/donerf_voxel.yaml b/nlf/conf/experiment/model/donerf_voxel.yaml new file mode 100644 index 0000000..3ab228c --- /dev/null +++ b/nlf/conf/experiment/model/donerf_voxel.yaml @@ -0,0 +1,233 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 6 + fn: pluecker + direction_multiplier: 1.0 + moment_multiplier: 1.0 + + #param: + # fn: contract_points + + # param: + # n_dims: 6 + # fn: pluecker + # direction_multiplier: 1.0 + # moment_multiplier: 1.0 + + # contract: + # type: mipnerf + # contract_samples: True + # use_dataset_bounds: True + # + # contract_start_channel: 3 + # contract_end_channel: 6 + + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 1 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + z_channels: 48 + + outputs: + z_vals: + channels: 1 + + sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 1 + + activation: + type: sigmoid + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: tanh + outer_fac: 0.125 + + color_scale: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + color_shift: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + ray_intersect_0: + type: ray_intersect + + # Intersect + z_channels: 48 + + intersect: + type: voxel_grid + + sort: True + outward_facing: False + use_disparity: False + max_axis: False + + use_sigma: True + + out_points: raw_points + out_distance: raw_distance + + use_dataset_bounds: True + origin_scale_factor: 0.0 + + contract: + type: mipnerf + contract_samples: True + use_dataset_bounds: True + + activation: + type: identity + fac: 0.5 + + point_offset_0: + type: point_offset + use_sigma: True + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs'] + + extract_fields: + type: extract_fields + fields: ['points', 'distances', 'viewdirs', 'weights', 'color_scale', 'color_shift'] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_no_sample + + # Scene hyper-params + white_bg: 0 + black_bg: 0 + + # Density activation + fea2denseAct: relu + distance_scale: 16.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -2.0], [2.0, 2.0, 2.0]] + + # Grid size and upsampling + #N_voxel_init: 2097152 + N_voxel_init: 3375000 # TODO: original + #N_voxel_init: 8000000 + + N_voxel_final: 216000000 # TODO: original + #N_voxel_final: 262144000 + + upsamp_list: [4000,6000,8000,10000,12000] + lr_upsample_reset: True + + # Thresholding + #update_AlphaMask_list: [] + update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + n_lamb_sigma: [8,8,8] + n_lamb_sh: [8,8,8] + + ## Shading + #shadingMode: SH + #data_dim_color: 27 + + shadingMode: RGB + data_dim_color: 3 diff --git a/nlf/conf/experiment/model/donerf_z.yaml b/nlf/conf/experiment/model/donerf_z.yaml new file mode 100644 index 0000000..e2e2600 --- /dev/null +++ b/nlf/conf/experiment/model/donerf_z.yaml @@ -0,0 +1,122 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + +embedding: + type: epipolar + + # Parameterization + param: + n_dims: 6 + fn: pluecker + direction_multiplier: 1.0 + moment_multiplier: 1.0 + + net: + pe: + type: windowed + n_freqs: 1 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Intersection + z_channels: 96 + preds_per_z: 1 + + intersect: + type: z_plane + out_channels_per_z: 3 + extra_outputs: ['distance'] + + stratified: True + sort: True + use_disparity: False + + initial: -1.0 + end: 1.0 + + add_random: False + random_per_sample: 1 + + add_point_offset: True + + add_flow: False + flow_keyframes: 1 + flow_scale: 1.0 + + z_activation: + type: identity + fac: 0.5 + point_activation: + type: tanh + fac: 0.25 + flow_activation: + type: identity + fac: 0.25 + sigma_activation: + type: sigmoid + fac: 1.0 + + sigma_shift: 5.0 + + # Transform + extra_out_channels: 3 + +color: + type: base + + net: + type: tensor_vm_split_no_sample + + white_bg: 0 + ndc_ray: 0 + nSamples: 96 + step_ratio: 0.5 + + fea2denseAct: relu + distance_scale: 4.0 + density_shift: 0.0 + + aabb: [[-2.0, -2.0, -2.0], [2.0, 2.0, 2.0]] + + N_voxel_init: 64000000 # 400**3 + N_voxel_final: 512000000 # 800**3 + upsamp_list: [2000,3000,4000,5500,7000] + update_AlphaMask_list: [] + + rm_weight_mask_thre: 1e-4 + alpha_mask_thre: 1e-4 + + n_lamb_sigma: [8,8,8] + n_lamb_sh: [8,8,8] + + shadingMode: SH + data_dim_color: 27 + + view_pe: 2 + fea_pe: 2 + featureC: 128 + + lr_upsample_reset: True diff --git a/nlf/conf/experiment/model/immersive_cylinder.yaml b/nlf/conf/experiment/model/immersive_cylinder.yaml new file mode 100644 index 0000000..c5ee7c3 --- /dev/null +++ b/nlf/conf/experiment/model/immersive_cylinder.yaml @@ -0,0 +1,273 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 6 + fn: pluecker + direction_multiplier: 1.0 + moment_multiplier: 1.0 + + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 1 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + time: + start: 7 + end: 8 + + param: + n_dims: 1 + fn: identity + + pe: + type: windowed + n_freqs: 2 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + z_channels: 32 # NOTE: Changed from 32 + + outputs: + z_vals: + channels: 4 # NOTE: Changed from 4 + + spatial_flow: + channels: 3 + + activation: + type: identity + #outer_fac: 8.0 + #outer_fac: 4.0 + outer_fac: 1.0 + + sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 1 + + activation: + type: sigmoid + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: tanh + outer_fac: 0.25 + + color_scale: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + color_shift: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + ray_intersect_0: + type: ray_intersect + + # Intersect + #z_channels: 64 # NOTE: Changed from 32 + z_channels: 32 # NOTE: Changed from 32 + + intersect: + type: cylinder # NOTE: Changed from cylinder + #type: sphere # NOTE: Changed from cylinder + + sort: True + outward_facing: True + use_disparity: False + max_axis: False + + use_sigma: True + + out_points: raw_points + out_distance: raw_distance + + #near: 0.5 # NOTE: Changed from 0.5 + #initial: 0.75 # NOTE: Changed from 0.75 + #end: 64.0 + #origin_scale_factor: 0.0 + + use_dataset_bounds: True + origin_scale_factor: 0.0 + + contract: + type: mipnerf + contract_samples: True + #contract_start_radius: 1.75 # NOTE: Changed from 1.75 + #contract_end_radius: 64.0 + use_dataset_bounds: True + + activation: + type: identity + fac: 0.5 + + flow_0: + type: advect_points + use_spatial_flow: True + use_angular_flow: False + + out_flow_field: raw_flow + flow_scale: 0.0 + + spatial_flow_activation: + type: identity + fac: 0.25 + + angular_flow_rotation_activation: + type: identity + fac: 0.25 + + angular_flow_anchor_activation: + type: identity + fac: 0.25 + + point_offset_0: + type: point_offset + in_density_field: point_sigma + use_sigma: True + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs', 'times'] + + extract_fields: + type: extract_fields + fields: [ + 'points', 'distances', 'base_times', 'time_offset', 'times', 'viewdirs', 'weights', + 'color_transform_global', 'color_scale_global', 'color_shift_global', + 'color_transform', 'color_scale', 'color_shift' + ] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_time + + # Scene hyper-params + white_bg: 0 + black_bg: 0 + + # Density activation + fea2denseAct: relu + distance_scale: 16.0 + #distance_scale: 8.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -2.0], [2.0, 2.0, 2.0]] + + # Grid size and upsampling + N_voxel_init: 2097152 + N_voxel_final: 262144000 + + upsamp_list: [4000,6000,8000,10000,12000] + + lr_upsample_reset: True + + # Thresholding + update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + n_lamb_sigma: [8,4,4] + n_lamb_sh: [8,4,4] + + # Shading + shadingMode: SH + data_dim_color: 27 + #shadingMode: RGB + #data_dim_color: 3 + + # Density + densityMode: Density diff --git a/nlf/conf/experiment/model/immersive_cylinder_pe.yaml b/nlf/conf/experiment/model/immersive_cylinder_pe.yaml new file mode 100644 index 0000000..2da4ad1 --- /dev/null +++ b/nlf/conf/experiment/model/immersive_cylinder_pe.yaml @@ -0,0 +1,270 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 6 + fn: pluecker + direction_multiplier: 1.0 + moment_multiplier: 1.0 + + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 2 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + time: + start: 7 + end: 8 + + param: + n_dims: 1 + fn: identity + + pe: + type: windowed + n_freqs: 4 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + z_channels: 32 # NOTE: Changed from 32 + + outputs: + z_vals: + channels: 4 # NOTE: Changed from 4 + + spatial_flow: + channels: 3 + + activation: + type: identity + outer_fac: 8.0 + + sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 1 + + activation: + type: sigmoid + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: tanh + outer_fac: 0.125 + + color_scale: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + color_shift: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + ray_intersect_0: + type: ray_intersect + + # Intersect + #z_channels: 64 # NOTE: Changed from 32 + z_channels: 32 # NOTE: Changed from 32 + + intersect: + type: cylinder # NOTE: Changed from cylinder + + sort: True + outward_facing: True + use_disparity: False + max_axis: False + + use_sigma: True + + out_points: raw_points + out_distance: raw_distance + + #near: 0.5 # NOTE: Changed from 0.5 + #initial: 0.75 # NOTE: Changed from 0.75 + #end: 64.0 + #origin_scale_factor: 0.0 + + use_dataset_bounds: True + origin_scale_factor: 0.0 + + contract: + type: mipnerf + contract_samples: True + #contract_start_radius: 1.75 # NOTE: Changed from 1.75 + #contract_end_radius: 64.0 + use_dataset_bounds: True + + activation: + type: identity + fac: 0.5 + + flow_0: + type: advect_points + use_spatial_flow: True + use_angular_flow: False + + out_flow_field: raw_flow + flow_scale: 0.0 + + spatial_flow_activation: + type: identity + fac: 0.25 + + angular_flow_rotation_activation: + type: identity + fac: 0.25 + + angular_flow_anchor_activation: + type: identity + fac: 0.25 + + point_offset_0: + type: point_offset + in_density_field: point_sigma + use_sigma: True + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs', 'times'] + + extract_fields: + type: extract_fields + fields: [ + 'points', 'distances', 'base_times', 'time_offset', 'times', 'viewdirs', 'weights', + 'color_transform_global', 'color_scale_global', 'color_shift_global', + 'color_transform', 'color_scale', 'color_shift' + ] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_time + + # Scene hyper-params + white_bg: 0 + black_bg: 0 + + # Density activation + fea2denseAct: relu + distance_scale: 16.0 + #distance_scale: 8.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -2.0], [2.0, 2.0, 2.0]] + + # Grid size and upsampling + N_voxel_init: 2097152 + N_voxel_final: 262144000 + + upsamp_list: [4000,6000,8000,10000,12000] + + lr_upsample_reset: True + + # Thresholding + update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + n_lamb_sigma: [8,4,4] + n_lamb_sh: [8,4,4] + + # Shading + shadingMode: SH + data_dim_color: 27 + #shadingMode: RGB + #data_dim_color: 3 + + # Density + densityMode: Density diff --git a/nlf/conf/experiment/model/immersive_sphere.yaml b/nlf/conf/experiment/model/immersive_sphere.yaml new file mode 100644 index 0000000..1498005 --- /dev/null +++ b/nlf/conf/experiment/model/immersive_sphere.yaml @@ -0,0 +1,279 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 6 + fn: pluecker + direction_multiplier: 1.0 + moment_multiplier: 1.0 + + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 1 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + time: + start: 7 + end: 8 + + param: + n_dims: 1 + fn: identity + + pe: + type: windowed + n_freqs: 2 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + z_channels: 64 # NOTE: Changed from 32 + + outputs: + z_vals: + channels: 4 # NOTE: Changed from 4 + + spatial_flow: + channels: 3 + + activation: + type: identity + #outer_fac: 8.0 + #outer_fac: 4.0 + outer_fac: 1.0 + + sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 1 + + activation: + type: sigmoid + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: tanh + outer_fac: 0.25 + + color_scale: + channels: 259 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + color_shift: + channels: 259 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + + + ray_intersect_0: + type: ray_intersect + + # Intersect + z_channels: 64 # NOTE: Changed from 32 + # z_channels: 32 # NOTE: Changed from 32 + + intersect: + #type: cylinder # NOTE: Changed from cylinder + type: sphere # NOTE: Changed from cylinder + + sort: True + outward_facing: True + use_disparity: False + max_axis: False + + use_sigma: True + + out_points: raw_points + out_distance: raw_distance + + #near: 0.5 # NOTE: Changed from 0.5 + #initial: 0.75 # NOTE: Changed from 0.75 + #end: 64.0 + #origin_scale_factor: 0.0 + + use_dataset_bounds: True + origin_scale_factor: 0.0 + + contract: + type: mipnerf + contract_samples: True + #contract_start_radius: 1.75 # NOTE: Changed from 1.75 + #contract_end_radius: 64.0 + use_dataset_bounds: True + + activation: + type: identity + fac: 0.5 + + flow_0: + type: advect_points + use_spatial_flow: True + use_angular_flow: False + + out_flow_field: raw_flow + flow_scale: 0.0 + + spatial_flow_activation: + type: identity + fac: 0.25 + + angular_flow_rotation_activation: + type: identity + fac: 0.25 + + angular_flow_anchor_activation: + type: identity + fac: 0.25 + + point_offset_0: + type: point_offset + in_density_field: point_sigma + use_sigma: True + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs', 'times'] + + extract_fields: + type: extract_fields + fields: [ + 'points', 'distances', 'base_times', 'time_offset', 'times', 'viewdirs', 'weights', + 'color_transform_global', 'color_scale_global', 'color_shift_global', + 'color_transform', 'color_scale', 'color_shift' + ] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_time + + # Scene hyper-params + white_bg: 0 + black_bg: 0 + + # Density activation + fea2denseAct: relu + distance_scale: 16.0 + #distance_scale: 8.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -2.0], [2.0, 2.0, 2.0]] + + # Grid size and upsampling + N_voxel_init: 2097152 + N_voxel_final: 262144000 + + upsamp_list: [4000,6000,8000,10000,12000] + + lr_upsample_reset: True + + # Thresholding + update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + n_lamb_sigma: [8,4,4] + n_lamb_sh: [8,4,4] + + # Shading + # shadingMode: SH + # data_dim_color: 27 + # #shadingMode: RGB + # #data_dim_color: 3 + shadingMode: MLP_Fea + data_dim_color: 27 + + # Density + densityMode: Density + + gear_num: 3 diff --git a/nlf/conf/experiment/model/immersive_sphere_new.yaml b/nlf/conf/experiment/model/immersive_sphere_new.yaml new file mode 100644 index 0000000..a89c3a4 --- /dev/null +++ b/nlf/conf/experiment/model/immersive_sphere_new.yaml @@ -0,0 +1,274 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 6 + fn: pluecker + direction_multiplier: 1.0 + moment_multiplier: 1.0 + + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 1 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + time: + start: 7 + end: 8 + + param: + n_dims: 1 + fn: identity + + pe: + type: windowed + n_freqs: 2 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + z_channels: 32 # NOTE: Changed from 32 + + outputs: + z_vals: + channels: 8 + + spatial_flow: + channels: 3 + + activation: + type: identity + #outer_fac: 8.0 + #outer_fac: 4.0 + outer_fac: 1.0 + + sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 1 + + activation: + type: sigmoid + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: tanh + outer_fac: 0.25 + + color_scale: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + color_shift: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + ray_intersect_0: + type: ray_intersect + + # Intersect + #z_channels: 64 # NOTE: Changed from 32 + z_channels: 32 # NOTE: Changed from 32 + + intersect: + #type: cylinder # NOTE: Changed from cylinder + type: sphere_new # NOTE: Changed from cylinder + + sort: True + outward_facing: True + use_disparity: False + max_axis: False + + use_sigma: True + + out_points: raw_points + out_distance: raw_distance + + #near: 0.5 # NOTE: Changed from 0.5 + #initial: 0.75 # NOTE: Changed from 0.75 + #end: 64.0 + #origin_scale_factor: 0.0 + + use_dataset_bounds: True + resize_scale_factor: 1.0 + origin_scale_factor: 1.0 + + contract: + type: mipnerf + contract_samples: True + #contract_start_radius: 1.75 # NOTE: Changed from 1.75 + #contract_end_radius: 64.0 + use_dataset_bounds: True + + activation: + type: identity + fac: 0.5 + + flow_0: + type: advect_points + use_spatial_flow: True + use_angular_flow: False + + out_flow_field: raw_flow + flow_scale: 0.0 + + spatial_flow_activation: + type: identity + fac: 0.25 + + angular_flow_rotation_activation: + type: identity + fac: 0.25 + + angular_flow_anchor_activation: + type: identity + fac: 0.25 + + point_offset_0: + type: point_offset + in_density_field: point_sigma + use_sigma: True + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs', 'times'] + + extract_fields: + type: extract_fields + fields: [ + 'points', 'distances', 'base_times', 'time_offset', 'times', 'viewdirs', 'weights', + 'color_transform_global', 'color_scale_global', 'color_shift_global', + 'color_transform', 'color_scale', 'color_shift' + ] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_time + + # Scene hyper-params + white_bg: 0 + black_bg: 0 + + # Density activation + fea2denseAct: relu + distance_scale: 16.0 + #distance_scale: 8.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -2.0], [2.0, 2.0, 2.0]] + + # Grid size and upsampling + N_voxel_init: 2097152 + N_voxel_final: 262144000 + + upsamp_list: [4000,6000,8000,10000,12000] + + lr_upsample_reset: True + + # Thresholding + update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + n_lamb_sigma: [8,4,4] + n_lamb_sh: [8,4,4] + + # Shading + shadingMode: SH + data_dim_color: 27 + #shadingMode: RGB + #data_dim_color: 3 + + # Density + densityMode: Density diff --git a/nlf/conf/experiment/model/immersive_sphere_test.yaml b/nlf/conf/experiment/model/immersive_sphere_test.yaml new file mode 100644 index 0000000..e387ad6 --- /dev/null +++ b/nlf/conf/experiment/model/immersive_sphere_test.yaml @@ -0,0 +1,241 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 6 + fn: pluecker + direction_multiplier: 1.0 + moment_multiplier: 1.0 + + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 1 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + time: + start: 7 + end: 8 + + param: + n_dims: 1 + fn: identity + + pe: + type: windowed + n_freqs: 2 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + z_channels: 32 # NOTE: Changed from 32 + + outputs: + z_vals: + channels: 4 # NOTE: Changed from 4 + + spatial_flow: + channels: 3 + + activation: + type: identity + #outer_fac: 8.0 + #outer_fac: 4.0 + outer_fac: 1.0 + + sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 1 + + activation: + type: sigmoid + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: tanh + outer_fac: 0.25 + + ray_intersect_0: + type: ray_intersect + + # Intersect + #z_channels: 64 # NOTE: Changed from 32 + z_channels: 32 # NOTE: Changed from 32 + + intersect: + #type: cylinder # NOTE: Changed from cylinder + type: sphere # NOTE: Changed from cylinder + + sort: True + outward_facing: True + use_disparity: False + max_axis: False + + use_sigma: True + + out_points: raw_points + out_distance: raw_distance + + #near: 0.5 # NOTE: Changed from 0.5 + #initial: 0.75 # NOTE: Changed from 0.75 + #end: 64.0 + #origin_scale_factor: 0.0 + + use_dataset_bounds: True + origin_scale_factor: 0.0 + + contract: + type: mipnerf + contract_samples: True + #contract_start_radius: 1.75 # NOTE: Changed from 1.75 + #contract_end_radius: 64.0 + use_dataset_bounds: True + + activation: + type: identity + fac: 0.5 + + flow_0: + type: advect_points + use_spatial_flow: True + use_angular_flow: False + + out_flow_field: raw_flow + flow_scale: 0.0 + + spatial_flow_activation: + type: identity + fac: 0.25 + + angular_flow_rotation_activation: + type: identity + fac: 0.25 + + angular_flow_anchor_activation: + type: identity + fac: 0.25 + + point_offset_0: + type: point_offset + in_density_field: point_sigma + use_sigma: True + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs', 'times'] + + extract_fields: + type: extract_fields + fields: [ + 'points', 'distances', 'base_times', 'time_offset', 'times', 'viewdirs', 'weights', + 'color_transform_global', 'color_scale_global', 'color_shift_global', + 'color_transform', 'color_scale', 'color_shift' + ] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_time + + # Scene hyper-params + white_bg: 0 + black_bg: 0 + + # Density activation + fea2denseAct: relu + distance_scale: 16.0 + #distance_scale: 8.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -2.0], [2.0, 2.0, 2.0]] + + # Grid size and upsampling + N_voxel_init: 2097152 + N_voxel_final: 262144000 + + upsamp_list: [4000,6000,8000,10000,12000] + + lr_upsample_reset: True + + # Thresholding + update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + n_lamb_sigma: [8,4,4] + n_lamb_sh: [8,4,4] + + # Shading + shadingMode: RGB + data_dim_color: 3 + + # Density + densityMode: Density diff --git a/nlf/conf/experiment/model/immersive_z_plane.yaml b/nlf/conf/experiment/model/immersive_z_plane.yaml new file mode 100644 index 0000000..c11884d --- /dev/null +++ b/nlf/conf/experiment/model/immersive_z_plane.yaml @@ -0,0 +1,243 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 4 + fn: two_plane + + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 2 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + z_channels: 32 + + outputs: + z_vals: + channels: 1 + + sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: tanh + outer_fac: 0.25 + + color_scale: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + color_shift: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + ray_intersect_0: + type: ray_intersect + + # Intersect + z_channels: 32 + + intersect: + type: z_plane + + sort: True + outward_facing: True + use_disparity: False + max_axis: False + + use_sigma: True + + out_points: raw_points + out_distance: raw_distance + + near: 0.5 + initial: -1.0 + end: -100.0 + + contract: + type: mipnerf + contract_samples: True + contract_start_radius: 8.0 # NOTE: Changed from 2 + contract_end_radius: 100.0 + + activation: + type: identity + fac: 0.5 + + color_transform: + type: color_transform + + transform_activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 0.1 + outer_fac: 1.0 + + shift_activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 0.1 + outer_fac: 1.0 + + point_offset_0: + type: point_offset + use_sigma: True + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs'] + + extract_fields: + type: extract_fields + fields: [ + 'points', 'distances', 'viewdirs', 'weights', + 'color_transform_global', 'color_scale_global', 'color_shift_global', + 'color_transform', 'color_scale', 'color_shift' + ] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_no_sample + + # Scene hyper-params + white_bg: 0 + black_bg: 0 + + # Density activation + fea2denseAct: relu + distance_scale: 16.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -2.0], [2.0, 2.0, 2.0]] + + # Grid size and upsampling + grid_size: + start: [192, 192, 192] # NOTE: Changed from 192 + end: [800, 800, 800] # NOTE: Changed from 800 + + upsamp_list: [4000,6000,8000,10000,12000] + + lr_upsample_reset: True + + # Thresholding + update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + n_lamb_sigma: [8,0,0] + n_lamb_sh: [8,0,0] + + # Shading + shadingMode: RGB + data_dim_color: 3 + + #shadingMode: SH + #data_dim_color: 27 + + #shadingMode: MLP_Fea + #data_dim_color: 27 + + # Density + densityMode: Density diff --git a/nlf/conf/experiment/model/llff_z_plane.yaml b/nlf/conf/experiment/model/llff_z_plane.yaml new file mode 100644 index 0000000..9af11db --- /dev/null +++ b/nlf/conf/experiment/model/llff_z_plane.yaml @@ -0,0 +1,212 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + #param: + # n_dims: 4 + # fn: two_plane + + param: + n_dims: 6 + fn: pluecker + direction_multiplier: 1.0 + moment_multiplier: 1.0 + + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 1 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + z_channels: 32 + + outputs: + z_vals: + channels: 1 + + sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 1 + + activation: + type: sigmoid + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: tanh + outer_fac: 0.125 + + color_scale: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + color_shift: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + ray_intersect_0: + type: ray_intersect + + # Intersect + z_channels: 32 + + intersect: + type: z_plane + + sort: True + outward_facing: False + use_disparity: False + + use_sigma: True + + out_points: raw_points + out_distance: raw_distance + + initial: -1.0 + end: 1.0 + + contract: + type: mipnerf + contract_samples: True + contract_start_radius: 1.0 + contract_end_radius: 8.0 + + activation: + type: identity + fac: 0.5 + + point_offset_0: + type: point_offset + in_density_field: point_sigma + use_sigma: True + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs'] + + extract_fields: + type: extract_fields + fields: ['points', 'distances', 'viewdirs', 'weights', 'color_scale', 'color_shift'] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_no_sample + + # Scene hyper-params + white_bg: 0 + black_bg: 0 + + # Density activation + fea2denseAct: relu + distance_scale: 16.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-1.5, -1.67, -1.0], [1.5, 1.67, 1.0]] + + # Grid size and upsampling + N_voxel_init: 2097152 + N_voxel_final: 262144000 + upsamp_list: [4000,6000,8000,10000,12000] + lr_upsample_reset: True + + # Thresholding + update_AlphaMask_list: [] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + n_lamb_sigma: [8,4,4] + n_lamb_sh: [8,4,4] + + # Shading + shadingMode: SH + data_dim_color: 27 diff --git a/nlf/conf/experiment/model/llff_z_plane_small.yaml b/nlf/conf/experiment/model/llff_z_plane_small.yaml new file mode 100644 index 0000000..33fa7e8 --- /dev/null +++ b/nlf/conf/experiment/model/llff_z_plane_small.yaml @@ -0,0 +1,213 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + #param: + # n_dims: 4 + # fn: two_plane + + param: + n_dims: 6 + fn: pluecker + direction_multiplier: 1.0 + moment_multiplier: 1.0 + + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 1 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + z_channels: 32 + + outputs: + z_vals: + channels: 1 + + sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 1 + + activation: + type: sigmoid + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: tanh + outer_fac: 0.125 + + color_scale: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + color_shift: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + ray_intersect_0: + type: ray_intersect + + # Intersect + z_channels: 32 + + intersect: + type: z_plane + + sort: True + outward_facing: False + use_disparity: False + + use_sigma: True + + out_points: raw_points + out_distance: raw_distance + + initial: -1.0 + end: 1.0 + + contract: + type: mipnerf + contract_samples: True + contract_start_radius: 1.0 + contract_end_radius: 8.0 + + activation: + type: identity + fac: 0.5 + + point_offset_0: + type: point_offset + in_density_field: point_sigma + use_sigma: True + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs'] + + extract_fields: + type: extract_fields + fields: ['points', 'distances', 'viewdirs', 'weights', 'color_scale', 'color_shift'] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_no_sample + + # Scene hyper-params + white_bg: 0 + black_bg: 0 + + # Density activation + fea2denseAct: relu + distance_scale: 16.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-1.5, -1.67, -1.0], [1.5, 1.67, 1.0]] + + # Grid size and upsampling + N_voxel_init: 1000000 + #N_voxel_final: 64000000 + N_voxel_final: 125000000 + upsamp_list: [4000,6000,8000,10000,12000] + lr_upsample_reset: True + + # Thresholding + update_AlphaMask_list: [] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + n_lamb_sigma: [8,4,4] + n_lamb_sh: [8,4,4] + + # Shading + shadingMode: SH + data_dim_color: 27 diff --git a/nlf/conf/experiment/model/neural_3d_z_plane.yaml b/nlf/conf/experiment/model/neural_3d_z_plane.yaml new file mode 100644 index 0000000..532aaa0 --- /dev/null +++ b/nlf/conf/experiment/model/neural_3d_z_plane.yaml @@ -0,0 +1,268 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + #param: + # n_dims: 4 + # fn: two_plane + + param: + n_dims: 6 + fn: pluecker + direction_multiplier: 1.0 + moment_multiplier: 1.0 + + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 1 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + time: + start: 7 + end: 8 + + param: + n_dims: 1 + fn: identity + + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 2 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + z_channels: 128 + + outputs: + z_vals: + channels: 1 + + spatial_flow: + channels: 3 + + activation: + type: identity + outer_fac: 4.0 + + sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 0 + + activation: + type: sigmoid + #shift: 4.0 + shift: 1.0 + + point_sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 1 + + activation: + type: sigmoid + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: tanh + outer_fac: 0.25 + + color_scale: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + color_shift: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + ray_intersect_0: + type: ray_intersect + + # Intersect + z_channels: 128 + + intersect: + type: z_plane + + sort: True + outward_facing: False + use_disparity: False + + use_sigma: True + + out_points: raw_points + out_distance: raw_distance + + initial: -1.0 + end: 1.0 + + contract: + type: mipnerf + contract_samples: True + contract_start_radius: 1.0 + contract_end_radius: 8.0 + + activation: + type: identity + fac: 0.5 + + flow_0: + type: advect_points + use_spatial_flow: True + use_angular_flow: False + + out_flow_field: raw_flow + flow_scale: 0.0 + + spatial_flow_activation: + type: identity + fac: 0.25 + + angular_flow_rotation_activation: + type: identity + fac: 0.25 + + angular_flow_anchor_activation: + type: identity + fac: 0.25 + + point_offset_0: + type: point_offset + in_density_field: point_sigma + use_sigma: True + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs', 'times'] + + extract_fields: + type: extract_fields + fields: [ + 'points', 'distances', 'base_times', 'time_offset', 'times', 'viewdirs', 'weights', + 'color_transform_global', 'color_scale_global', 'color_shift_global', + 'color_transform', 'color_scale', 'color_shift' + ] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_time + + # Scene hyper-params + white_bg: 0 + black_bg: 0 + + # Density activation + fea2denseAct: relu + #distance_scale: 8.0 + distance_scale: 16.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -1.5, -1.25], [2.0, 1.5, 1.25]] + + # Grid size and upsampling + N_voxel_init: 2097152 # TODO: original + N_voxel_final: 262144000 + + upsamp_list: [4000,6000,8000,10000,12000] + lr_upsample_reset: True + + # Thresholding + update_AlphaMask_list: [] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + n_lamb_sigma: [8,4,4] + n_lamb_sh: [8,4,4] + + # Shading + #shadingMode: RGB + #data_dim_color: 3 + + shadingMode: MLP_Fea + data_dim_color: 27 + + # Density + densityMode: Density diff --git a/nlf/conf/experiment/model/neural_3d_z_plane_static.yaml b/nlf/conf/experiment/model/neural_3d_z_plane_static.yaml new file mode 100644 index 0000000..9427765 --- /dev/null +++ b/nlf/conf/experiment/model/neural_3d_z_plane_static.yaml @@ -0,0 +1,238 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 4 + fn: two_plane + + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 2 + #n_freqs: 6 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + z_channels: 256 + + outputs: + z_vals: + channels: 1 + + spatial_flow: + channels: 3 + + sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 0 + wait_epochs: 0 + #window_epochs: 1 + #wait_epochs: 1 + + activation: + type: sigmoid + #shift: 2.0 + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: tanh + outer_fac: 0.25 + + color_scale_global: + channels: 3 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + color_shift_global: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + ray_intersect_0: + type: ray_intersect + + # Intersect + z_channels: 256 + + intersect: + type: z_plane + + sort: True + outward_facing: False + use_disparity: False + + use_sigma: True + + out_points: raw_points + out_distance: raw_distance + + initial: -1.0 + end: 1.0 + + contract: + type: mipnerf + contract_samples: True + contract_start_radius: 1.0 + contract_end_radius: 8.0 + + activation: + type: identity + fac: 0.5 + + point_offset_0: + type: point_offset + use_sigma: True + + #color_transform: + # type: color_transform + + # transform_activation: + # type: ease_value + # start_value: 0.0 + # window_epochs: 0 + # wait_epochs: 0 + + # activation: + # type: identity + # shift: 0.0 + # inner_fac: 0.1 + # outer_fac: 1.0 + + # shift_activation: + # type: ease_value + # start_value: 0.0 + # window_epochs: 0 + # wait_epochs: 0 + + # activation: + # type: identity + # shift: 0.0 + # inner_fac: 0.1 + # outer_fac: 1.0 + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs', 'times'] + + extract_fields: + type: extract_fields + fields: ['points', 'distances', 'base_times', 'time_offset', 'times', 'viewdirs', 'weights', 'color_scale_global', 'color_transform_global', 'color_shift_global'] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_no_sample + + # Scene hyper-params + white_bg: 0 + black_bg: 0 + + # Density activation + fea2denseAct: relu + distance_scale: 16.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -2.0], [2.0, 2.0, 2.0]] + + # Grid size and upsampling + grid_size: + start: [100, 100, 100] + end: [300, 300, 300] + #grid_size: + # start: [192, 192, 192] + # end: [800, 800, 800] + upsamp_list: [4000,6000,8000,10000,12000] + lr_upsample_reset: True + + # Thresholding + update_AlphaMask_list: [12000] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + n_lamb_sigma: [8,0,0] + n_lamb_sh: [8,0,0] + + # Shading + shadingMode: RGB + #shadingMode: RGBtFourier + data_dim_color: 3 + + #shadingMode: SH + #data_dim_color: 27 + + # Density + densityMode: Density + #densityMode: DensityFourier diff --git a/nlf/conf/experiment/model/neural_3d_z_plane_world.yaml b/nlf/conf/experiment/model/neural_3d_z_plane_world.yaml new file mode 100644 index 0000000..98ccb50 --- /dev/null +++ b/nlf/conf/experiment/model/neural_3d_z_plane_world.yaml @@ -0,0 +1,267 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 6 + fn: pluecker + direction_multiplier: 1.0 + moment_multiplier: 1.0 + + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 2 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + time: + start: 7 + end: 8 + + param: + n_dims: 1 + fn: identity + + pe: + type: windowed + n_freqs: 2 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + z_channels: 48 + + outputs: + z_vals: + channels: 1 + + spatial_flow: + channels: 3 + + sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: tanh + outer_fac: 0.25 + + color_scale: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + color_shift: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + ray_intersect_0: + type: ray_intersect + + # Intersect + z_channels: 48 + + intersect: + type: z_plane + + sort: True + outward_facing: False + use_disparity: False + + use_sigma: True + + out_points: raw_points + out_distance: raw_distance + + use_dataset_bounds: True + origin_scale_factor: 0.0 + + contract: + type: mipnerf + contract_samples: True + use_dataset_bounds: True + + activation: + type: identity + fac: 0.5 + + flow_0: + type: advect_points + use_spatial_flow: True + use_angular_flow: False + + out_flow_field: raw_flow + flow_scale: 0.0 + + spatial_flow_activation: + type: identity + fac: 0.25 + + angular_flow_rotation_activation: + type: identity + fac: 0.25 + + angular_flow_anchor_activation: + type: identity + fac: 0.25 + + point_offset_0: + type: point_offset + in_density_field: point_sigma + use_sigma: True + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs', 'times'] + + extract_fields: + type: extract_fields + fields: [ + 'points', 'distances', 'base_times', 'time_offset', 'times', 'viewdirs', 'weights', + 'color_transform_global', 'color_scale_global', 'color_shift_global', + 'color_transform', 'color_scale', 'color_shift' + ] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_time + + # Scene hyper-params + white_bg: 0 + black_bg: 0 + + # Density activation + fea2denseAct: relu + #distance_scale: 8.0 + distance_scale: 16.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -2.0], [2.0, 2.0, 2.0]] + + # Grid size and upsampling + N_voxel_init: 2097152 # TODO: original + #N_voxel_init: 3375000 + #N_voxel_init: 8000000 + + N_voxel_final: 262144000 + + upsamp_list: [4000,6000,8000,10000,12000] + lr_upsample_reset: True + + # Thresholding + #update_AlphaMask_list: [] + update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + n_lamb_sigma: [8,4,4] + n_lamb_sh: [8,4,4] + + # Shading + #shadingMode: RGB + #data_dim_color: 3 + + shadingMode: SH + data_dim_color: 27 + + # Density + densityMode: Density diff --git a/nlf/conf/experiment/model/refnerf_sphere.yaml b/nlf/conf/experiment/model/refnerf_sphere.yaml new file mode 100644 index 0000000..09ea7be --- /dev/null +++ b/nlf/conf/experiment/model/refnerf_sphere.yaml @@ -0,0 +1,246 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 6 + fn: pluecker + direction_multiplier: 1.0 + moment_multiplier: 1.0 + + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 1 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + z_channels: 64 + + outputs: + z_vals: + channels: 8 + #channels: 1 + + sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 0 + #window_epochs: 0 + #wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 1 + #window_epochs: 0 + #wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: tanh + outer_fac: 0.125 + + color_scale: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + color_shift: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + #normal: + # channels: 3 + + # activation: + # type: identity + + #ref_viewdirs_offset: + # channels: 3 + + # activation: + # type: identity + + #ref_distance: + # channels: 1 + + # activation: + # type: identity + + ray_intersect_0: + type: ray_intersect + + # Intersect + z_channels: 64 + + intersect: + type: sphere + #type: euclidean_distance_unified + + sort: True + outward_facing: False + use_disparity: False + max_axis: False + + use_sigma: True + + out_points: raw_points + out_distance: raw_distance + + initial: -2.0 + end: 2.0 + + use_dataset_bounds: True + #origin_scale_factor: 1.0 + #resize_scale_factor: 1.0 + origin_scale_factor: 0.0 + resize_scale_factor: 0.0 + + activation: + type: identity + fac: 0.5 + + #reflect_0: + # type: reflect + # direction_init: True + + # out_points_field: points_temp + # out_direction_field: viewdirs + + point_offset_0: + type: point_offset + use_sigma: True + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs'] + + extract_fields: + type: extract_fields + fields: ['points', 'distances', 'viewdirs', 'weights', 'color_scale', 'color_shift'] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_no_sample + + # Scene hyper-params + white_bg: 1 + black_bg: 0 + + # Density activation + fea2denseAct: relu + #distance_scale: 16.0 + distance_scale: 8.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -2.0], [2.0, 2.0, 2.0]] + + # Grid size and upsampling + N_voxel_init: 2097152 + #N_voxel_final: 216000000 + N_voxel_final: 64000000 + + + upsamp_list: [4000,6000,8000,10000,12000] + lr_upsample_reset: True + + # Thresholding + update_AlphaMask_list: [] + #update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + n_lamb_sigma: [8,8,8] + n_lamb_sh: [8,8,8] + + # Shading + shadingMode: RGB + data_dim_color: 3 + #shadingMode: SH + #data_dim_color: 27 diff --git a/nlf/conf/experiment/model/shiny_z_deformable.yaml b/nlf/conf/experiment/model/shiny_z_deformable.yaml new file mode 100644 index 0000000..558a0ee --- /dev/null +++ b/nlf/conf/experiment/model/shiny_z_deformable.yaml @@ -0,0 +1,160 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 4 + fn: two_plane + + pe: + type: basic + n_freqs: 2 + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + z_channels: 64 + + outputs: + z_vals: + channels: 4 + + sigma: + channels: 1 + + activation: + type: sigmoid + fac: 1.0 + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: tanh + fac: 0.25 + + ray_intersect_0: + type: ray_intersect + + # Intersect + z_channels: 64 + + intersect: + type: deformable_voxel_grid + + sort: True + outward_facing: False + use_disparity: False + use_sigma: False + max_axis: False + + out_points: raw_points + out_distance: raw_distance + + start_normal: [ + [ 0.0, 0.0, 1.0 ] + ] + normal_scale_factor: 1.0 + + initial: [-1.0] + end: [1.0] + + activation: + type: identity + fac: 0.5 + + point_offset_0: + type: point_offset + use_sigma: True + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs'] + + extract_fields: + type: extract_fields + fields: ['points', 'distances', 'viewdirs', 'weights'] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_no_sample + + # Scene hyper-params + white_bg: 0 + black_bg: 0 + + # Density activation + #fea2denseAct: softplus + #distance_scale: 25.0 + #density_shift: -10.0 + + fea2denseAct: relu + distance_scale: 4.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -2.0], [2.0, 2.0, 2.0]] + + # Grid size and upsampling + grid_size: + start: [150, 150, 150] + end: [600, 600, 600] + upsamp_list: [4000,6000,8000,10000,12000] + lr_upsample_reset: True + + # Thresholding + update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + n_lamb_sigma: [8,8,8] + n_lamb_sh: [8,8,8] + + # Shading + #shadingMode: RGB + #data_dim_color: 3 + + shadingMode: SH + data_dim_color: 27 diff --git a/nlf/conf/experiment/model/shiny_z_depth.yaml b/nlf/conf/experiment/model/shiny_z_depth.yaml new file mode 100644 index 0000000..697af6d --- /dev/null +++ b/nlf/conf/experiment/model/shiny_z_depth.yaml @@ -0,0 +1,91 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + +embedding: + type: epipolar + + # Parameterization + param: + n_dims: 4 + fn: two_plane + + # MLP + net: + # PE + pe_channels: 4 + + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 2 + wait_epochs: 0 + max_freq_epoch: 4 + exclude_identity: False + + type: base + depth: 6 + hidden_channels: 256 + skips: [3] + + layer_activation: + type: leaky_relu + sigma: 0.25 + + # Z outputs + z_channels: 1 + preds_per_z: 1 + z_activation: identity + + intersect: + type: z_plane + forward_facing: True + stratified: False + sort: False + out_channels_per_z: 3 + + # Transform + tform_in_channels: 0 + tform_out_channels: 3 + tform_activation: + type: row_l2_norm + param_channels: 3 + bias_activation: zero + activation: identity + +color: + type: base + + net: + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 8 + wait_epochs: 0 + max_freq_epoch: 16 + exclude_identity: False + + type: base + depth: 6 + hidden_channels: 384 + skips: [3] + + layer_activation: + type: leaky_relu + sigma: 0.25 + + activation: sigmoid diff --git a/nlf/conf/experiment/model/shiny_z_plane.yaml b/nlf/conf/experiment/model/shiny_z_plane.yaml new file mode 100644 index 0000000..83a3a2c --- /dev/null +++ b/nlf/conf/experiment/model/shiny_z_plane.yaml @@ -0,0 +1,220 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 4 + fn: two_plane + + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 1 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + z_channels: 32 + #z_channels: 64 + #z_channels: 128 + + outputs: + z_vals: + channels: 1 + + sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 1 + + activation: + type: sigmoid + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: tanh + outer_fac: 0.25 + + color_scale: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + color_shift: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + ray_intersect_0: + type: ray_intersect + + # Intersect + z_channels: 32 + #z_channels: 64 + #z_channels: 128 + + intersect: + type: z_plane + + sort: True + outward_facing: False + use_disparity: False + + use_sigma: True + + out_points: raw_points + out_distance: raw_distance + + initial: -1.0 + end: 1.0 + + num_samples_for_scale: 32 + #num_samples_for_scale: 64 + #num_samples_for_scale: 128 + + activation: + type: identity + fac: 0.5 + + #generate_samples_0: + # type: generate_samples + + # sample_range: [32, 32] + # inference_samples: 32 + # total_samples: 32 + + #select_points_0: + # type: select_points + # fields: ['points', 'distances', 'sigma', 'point_sigma', 'point_offset', 'weights', 'color_scale', 'color_shift'] + + point_offset_0: + type: point_offset + in_density_field: point_sigma + use_sigma: True + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs'] + + extract_fields: + type: extract_fields + fields: ['points', 'distances', 'viewdirs', 'weights', 'color_scale', 'color_shift'] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_no_sample + + # Scene hyper-params + white_bg: 0 + black_bg: 0 + + # Density activation + fea2denseAct: relu + distance_scale: 8.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -1.0], [2.0, 2.0, 1.0]] + + # Grid size and upsampling + N_voxel_init: 2097152 + N_voxel_final: 262144000 + + upsamp_list: [4000,6000,8000,10000,12000] + lr_upsample_reset: True + + # Thresholding + update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + n_lamb_sigma: [8,4,4] + n_lamb_sh: [8,4,4] + + # Shading + shadingMode: RGB + data_dim_color: 3 diff --git a/nlf/conf/experiment/model/shiny_z_plane_cascaded.yaml b/nlf/conf/experiment/model/shiny_z_plane_cascaded.yaml new file mode 100644 index 0000000..a62b31c --- /dev/null +++ b/nlf/conf/experiment/model/shiny_z_plane_cascaded.yaml @@ -0,0 +1,272 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 4 + fn: two_plane + + pe: + type: basic + n_freqs: 0 + + # Net + net: + group: embedding_impl + type: zero + + # Outputs + z_channels: 8 + + outputs: + z_vals: + channels: 1 + + ray_intersect_0: + type: ray_intersect + + # Intersect + z_channels: 8 + + intersect: + type: z_plane + + mask: + stop_iters: -1 + + sort: True + outward_facing: False + use_disparity: False + + use_sigma: False + + out_points: raw_points + out_distance: raw_distance + + initial: -1.0 + end: 1.0 + + activation: + type: identity + fac: 0.5 + + point_prediction_0: + type: point_prediction + + # Inputs + in_z_channels: 8 + + inputs: + points: 3 + viewdirs: 3 + times: 1 + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 6 + fn: identity + + pe: + type: basic + n_freqs: 1 + + z: + start: 2 + end: 3 + + param: + n_dims: 1 + fn: identity + + pe: + type: basic + n_freqs: 8 + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + out_z_channels: 32 + + outputs: + z_vals: + channels: 1 + + sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 1 + + activation: + type: sigmoid + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: tanh + outer_fac: 0.25 + + color_scale: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + color_shift: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + ray_intersect_1: + type: ray_intersect + + # Intersect + z_channels: 32 + + intersect: + type: z_plane + + sort: True + outward_facing: False + use_disparity: False + + use_sigma: True + + initial: -1.0 + end: 1.0 + + activation: + type: identity + fac: 0.5 + + point_offset_0: + type: point_offset + in_density_field: point_sigma + use_sigma: True + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs'] + + extract_fields: + type: extract_fields + fields: ['points', 'distances', 'viewdirs', 'weights', 'color_scale', 'color_shift'] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_no_sample + + # Scene hyper-params + white_bg: 0 + black_bg: 0 + + # Density activation + fea2denseAct: relu + distance_scale: 8.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -1.0], [2.0, 2.0, 1.0]] + + # Grid size and upsampling + N_voxel_init: 2097152 + #N_voxel_init: 512000 + N_voxel_final: 262144000 + + upsamp_list: [4000,6000,8000,10000,12000] + lr_upsample_reset: True + + # Thresholding + update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + n_lamb_sigma: [8,4,4] + n_lamb_sh: [8,4,4] + + # Shading + shadingMode: RGB + data_dim_color: 3 diff --git a/nlf/conf/experiment/model/shiny_z_plane_feedback.yaml b/nlf/conf/experiment/model/shiny_z_plane_feedback.yaml new file mode 100644 index 0000000..c155579 --- /dev/null +++ b/nlf/conf/experiment/model/shiny_z_plane_feedback.yaml @@ -0,0 +1,231 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 4 + fn: two_plane + + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 0 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + z_channels: 8 + + outputs: + z_vals: + channels: 1 + + ray_intersect_0: + wait_iters: 0 + type: ray_intersect + + # Intersect + z_channels: 8 + + intersect: + mask: + stop_iters: -1 + + type: z_plane + + sort: False + outward_facing: False + use_disparity: False + use_sigma: False + + initial: -1.0 + end: 1.0 + + out_points: raw_points + out_distance: raw_distance + + activation: + type: identity + fac: 0.5 + + point_prediction_0: + type: point_prediction + + # Inputs + in_z_channels: 8 + + inputs: + points: 3 + viewdirs: 3 + + # Parameterization + params: + all: + start: 0 + end: 6 + + param: + n_dims: 6 + fn: identity + + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 2 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + out_z_channels: 32 + + outputs: + z_vals: + channels: 1 + + sigma: + channels: 1 + + activation: + type: sigmoid + fac: 1.0 + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: tanh + fac: 0.25 + + ray_intersect_1: + type: ray_intersect + + # Intersect + z_channels: 32 + + intersect: + type: z_plane + + sort: True + outward_facing: False + use_disparity: False + use_sigma: True + max_axis: False + + out_points: raw_points + out_distance: raw_distance + + initial: -1.0 + end: 1.0 + + activation: + type: identity + fac: 0.5 + + point_offset_0: + type: point_offset + use_sigma: True + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs'] + + extract_fields: + type: extract_fields + fields: ['points', 'distances', 'viewdirs', 'weights'] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_no_sample + + # Scene hyper-params + white_bg: 0 + black_bg: 0 + + # Density activation + #fea2denseAct: softplus + #distance_scale: 25.0 + #density_shift: -10.0 + + fea2denseAct: relu + distance_scale: 4.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -2.0], [2.0, 2.0, 2.0]] + + # Grid size and upsampling + grid_size: + start: [150, 150, 150] + end: [600, 600, 600] + upsamp_list: [4000,6000,8000,10000,12000] + lr_upsample_reset: True + + # Thresholding + update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + n_lamb_sigma: [8,0,0] + n_lamb_sh: [8,0,0] + + # Shading + #shadingMode: RGB + #data_dim_color: 3 + + shadingMode: SH + data_dim_color: 27 diff --git a/nlf/conf/experiment/model/shiny_z_plane_no_point.yaml b/nlf/conf/experiment/model/shiny_z_plane_no_point.yaml new file mode 100644 index 0000000..f45b44e --- /dev/null +++ b/nlf/conf/experiment/model/shiny_z_plane_no_point.yaml @@ -0,0 +1,213 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 4 + fn: two_plane + + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 1 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + z_channels: 32 + + outputs: + z_vals: + channels: 1 + + sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 1 + + activation: + type: sigmoid + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: tanh + outer_fac: 0.25 + + color_scale: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + color_shift: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + ray_intersect_0: + type: ray_intersect + + # Intersect + z_channels: 32 + + intersect: + type: z_plane + + sort: True + outward_facing: False + use_disparity: False + + use_sigma: True + + out_points: raw_points + out_distance: raw_distance + + initial: -1.0 + end: 1.0 + + #contract: + # type: mipnerf + # contract_samples: True + # contract_start_radius: 1.0 + # contract_end_radius: 8.0 + + activation: + type: identity + fac: 0.5 + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs'] + + extract_fields: + type: extract_fields + fields: ['points', 'distances', 'viewdirs', 'weights', 'color_scale', 'color_shift'] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_no_sample + + # Scene hyper-params + white_bg: 0 + black_bg: 0 + + # Density activation + fea2denseAct: relu + distance_scale: 8.0 + density_shift: 0.0 + + # Grid bounds + #aabb: [[-2.0, -2.0, -2.0], [2.0, 2.0, 2.0]] + aabb: [[-2.0, -2.0, -1.0], [2.0, 2.0, 1.0]] + + # Grid size and upsampling + #grid_size: + # start: [128, 128, 128] + # end: [640, 640, 640] + N_voxel_init: 2097152 + N_voxel_final: 262144000 + + upsamp_list: [4000,6000,8000,10000,12000] + lr_upsample_reset: True + + # Thresholding + update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + #n_lamb_sigma: [8,0,0] + #n_lamb_sh: [8,0,0] + #n_lamb_sigma: [8,8,8] + #n_lamb_sh: [8,8,8] + n_lamb_sigma: [8,4,4] + n_lamb_sh: [8,4,4] + + # Shading + #shadingMode: RGB + #data_dim_color: 3 + + shadingMode: SH + data_dim_color: 27 diff --git a/nlf/conf/experiment/model/shiny_z_plane_small.yaml b/nlf/conf/experiment/model/shiny_z_plane_small.yaml new file mode 100644 index 0000000..75fa048 --- /dev/null +++ b/nlf/conf/experiment/model/shiny_z_plane_small.yaml @@ -0,0 +1,207 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 4 + fn: two_plane + + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 1 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: base + group: embedding_impl + + #depth: 6 + #hidden_channels: 256 + #skips: [3] + + depth: 4 + hidden_channels: 256 + skips: [2] + + # Outputs + #z_channels: 32 + z_channels: 16 + + outputs: + z_vals: + channels: 1 + + sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 1 + + activation: + type: sigmoid + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: tanh + outer_fac: 0.25 + + color_scale: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + color_shift: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + ray_intersect_0: + type: ray_intersect + + # Intersect + #z_channels: 32 + z_channels: 16 + + intersect: + type: z_plane + + sort: True + outward_facing: False + use_disparity: False + + use_sigma: True + + out_points: raw_points + out_distance: raw_distance + + initial: -1.0 + end: 1.0 + + activation: + type: identity + fac: 0.5 + + point_offset_0: + type: point_offset + in_density_field: point_sigma + use_sigma: True + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs'] + + extract_fields: + type: extract_fields + fields: ['points', 'distances', 'viewdirs', 'weights', 'color_scale', 'color_shift'] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_no_sample + + # Scene hyper-params + white_bg: 0 + black_bg: 0 + + # Density activation + fea2denseAct: relu + distance_scale: 8.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -1.0], [2.0, 2.0, 1.0]] + + # Grid size and upsampling + N_voxel_init: 2097152 + N_voxel_final: 262144000 + + upsamp_list: [4000,6000,8000,10000,12000] + lr_upsample_reset: True + + # Thresholding + update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + n_lamb_sigma: [8,4,4] + n_lamb_sh: [8,4,4] + + # Shading + shadingMode: RGB + data_dim_color: 3 diff --git a/nlf/conf/experiment/model/shiny_z_plane_tiny.yaml b/nlf/conf/experiment/model/shiny_z_plane_tiny.yaml new file mode 100644 index 0000000..2f9ce1c --- /dev/null +++ b/nlf/conf/experiment/model/shiny_z_plane_tiny.yaml @@ -0,0 +1,207 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 4 + fn: two_plane + + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 1 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: base + group: embedding_impl + + #depth: 6 + #hidden_channels: 256 + #skips: [3] + + depth: 4 + hidden_channels: 128 + skips: [2] + + # Outputs + #z_channels: 32 + z_channels: 8 + + outputs: + z_vals: + channels: 1 + + sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 1 + + activation: + type: sigmoid + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: tanh + outer_fac: 0.25 + + color_scale: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + color_shift: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + ray_intersect_0: + type: ray_intersect + + # Intersect + #z_channels: 32 + z_channels: 8 + + intersect: + type: z_plane + + sort: True + outward_facing: False + use_disparity: False + + use_sigma: True + + out_points: raw_points + out_distance: raw_distance + + initial: -1.0 + end: 1.0 + + activation: + type: identity + fac: 0.5 + + point_offset_0: + type: point_offset + in_density_field: point_sigma + use_sigma: True + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs'] + + extract_fields: + type: extract_fields + fields: ['points', 'distances', 'viewdirs', 'weights', 'color_scale', 'color_shift'] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_no_sample + + # Scene hyper-params + white_bg: 0 + black_bg: 0 + + # Density activation + fea2denseAct: relu + distance_scale: 8.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -1.0], [2.0, 2.0, 1.0]] + + # Grid size and upsampling + N_voxel_init: 2097152 + N_voxel_final: 262144000 + + upsamp_list: [4000,6000,8000,10000,12000] + lr_upsample_reset: True + + # Thresholding + update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + n_lamb_sigma: [8,4,4] + n_lamb_sh: [8,4,4] + + # Shading + shadingMode: RGB + data_dim_color: 3 diff --git a/nlf/conf/experiment/model/shiny_z_tensorf.yaml b/nlf/conf/experiment/model/shiny_z_tensorf.yaml new file mode 100644 index 0000000..1e58cd2 --- /dev/null +++ b/nlf/conf/experiment/model/shiny_z_tensorf.yaml @@ -0,0 +1,161 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + # 1) Per ray outputs + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 6 + fn: two_plane + + pe: + type: windowed + n_freqs: 2 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + z_channels: 96 + + outputs: + z_vals: + channels: 1 + + sigma: + channels: 1 + + activation: + type: sigmoid + fac: 1.0 + shift: 2.0 + + point_offset: + channels: 3 + + activation: + type: tanh + fac: 0.25 + + # 2) Intersection + ray_intersect_0: + type: ray_intersect + + # Intersect + z_channels: 96 + + intersect: + type: z + + sort: True + outward_facing: False + use_disparity: False + use_sigma: False + + initial: -1.0 + end: 1.0 + near: 0.25 + + activation: + type: identity + fac: 0.5 + + # 4) Add point offset + point_offset_0: + type: point_offset + use_sigma: True + + # 5) Add extra outputs + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs'] + + # 6) Extract + extract_fields: + type: extract_fields + fields: ['points', 'distances', 'viewdirs', 'weights'] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_no_sample + + # Scene hyper-params + white_bg: 0 + ndc_ray: 0 + + # Density activation + #fea2denseAct: softplus + #distance_scale: 25.0 + #density_shift: -10.0 + fea2denseAct: relu + distance_scale: 4.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -2.0], [2.0, 2.0, 2.0]] + + # Grid size and upsampling + #N_voxel_init: 8000000 # 200**3 + N_voxel_init: 1000000 # 100**3 + N_voxel_final: 64000000 # 400**3 + upsamp_list: [4000,6000,8000,10000,12000] + lr_upsample_reset: True + + # Thresholding + #update_AlphaMask_list: [] + update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 1e-4 + alpha_mask_thre: 1e-4 + + # Tensor sizes + n_lamb_sigma: [8,8,8] + n_lamb_sh: [8,8,8] + num_frames: 8 + + # Shading + #shadingMode: RGB + #data_dim_color: 3 + + shadingMode: SH + data_dim_color: 27 diff --git a/nlf/conf/experiment/model/shiny_z_tensorf_cascaded.yaml b/nlf/conf/experiment/model/shiny_z_tensorf_cascaded.yaml new file mode 100644 index 0000000..e16ea45 --- /dev/null +++ b/nlf/conf/experiment/model/shiny_z_tensorf_cascaded.yaml @@ -0,0 +1,230 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + # 1) Per ray outputs + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 4 + fn: two_plane + + pe: + type: windowed + n_freqs: 0 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + z_channels: 24 + + outputs: + z_vals: + channels: 1 + + # 2) Intersection + ray_intersect_0: + type: ray_intersect + + # Intersect + z_channels: 24 + + intersect: + type: z_plane + + mask: + stop_iters: -1 + + sort: True + outward_facing: False + use_disparity: False + use_sigma: False + + initial: -1.0 + end: 1.0 + near: 0.25 + + activation: + type: identity + fac: 0.5 + + point_prediction_0: + type: point_prediction + + # Inputs + in_z_channels: 24 + + inputs: + points: 3 + viewdirs: 3 + + # Parameterization + params: + all: + start: 0 + end: 6 + + param: + n_dims: 6 + fn: identity + + pe: + type: windowed + n_freqs: 2 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + out_z_channels: 96 + + outputs: + z_vals: + channels: 1 + + sigma: + channels: 1 + + activation: + type: sigmoid + fac: 1.0 + shift: 2.0 + + point_offset: + channels: 3 + + activation: + type: tanh + fac: 0.25 + + # 2) Intersection + ray_intersect_1: + type: ray_intersect + + # Intersect + z_channels: 96 + + intersect: + type: z_plane + + sort: True + outward_facing: False + use_disparity: False + use_sigma: False + + initial: -1.0 + end: 1.0 + near: 0.25 + + activation: + type: identity + fac: 0.5 + + # 4) Add point offset + point_offset_0: + type: point_offset + use_sigma: True + + # 5) Add extra outputs + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs'] + + # 6) Extract + extract_fields: + type: extract_fields + fields: ['points', 'distances', 'viewdirs', 'weights'] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_no_sample + + # Scene hyper-params + white_bg: 0 + ndc_ray: 0 + + # Density activation + #fea2denseAct: softplus + #distance_scale: 25.0 + #density_shift: -10.0 + fea2denseAct: relu + distance_scale: 4.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -1.0], [2.0, 2.0, 1.0]] + + # Grid size and upsampling + #N_voxel_init: 8000000 # 200**3 + N_voxel_init: 1000000 # 100**3 + N_voxel_final: 64000000 # 400**3 + upsamp_list: [4000,6000,8000,10000,12000] + lr_upsample_reset: True + + # Thresholding + #update_AlphaMask_list: [] + update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 1e-4 + alpha_mask_thre: 1e-4 + + # Tensor sizes + n_lamb_sigma: [8,8,8] + n_lamb_sh: [8,8,8] + num_frames: 8 + + # Shading + #shadingMode: RGB + #data_dim_color: 3 + + shadingMode: SH + data_dim_color: 27 diff --git a/nlf/conf/experiment/model/spaces_z_plane.yaml b/nlf/conf/experiment/model/spaces_z_plane.yaml new file mode 100644 index 0000000..11b64ca --- /dev/null +++ b/nlf/conf/experiment/model/spaces_z_plane.yaml @@ -0,0 +1,206 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 4 + fn: two_plane + + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 1 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + z_channels: 32 + + outputs: + z_vals: + channels: 1 + + sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 1 + + activation: + type: sigmoid + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: tanh + outer_fac: 0.125 + + color_scale: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + color_shift: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + ray_intersect_0: + type: ray_intersect + + # Intersect + z_channels: 32 + + intersect: + type: z_plane + + sort: True + outward_facing: False + use_disparity: False + + use_sigma: True + + out_points: raw_points + out_distance: raw_distance + + initial: -1.0 + end: 1.0 + + contract: + type: mipnerf + contract_samples: True + contract_start_radius: 1.0 + contract_end_radius: 8.0 + + activation: + type: identity + fac: 0.5 + + point_offset_0: + type: point_offset + in_density_field: point_sigma + use_sigma: True + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs'] + + extract_fields: + type: extract_fields + fields: ['points', 'distances', 'viewdirs', 'weights', 'color_scale', 'color_shift'] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_no_sample + + # Scene hyper-params + white_bg: 0 + black_bg: 0 + + # Density activation + fea2denseAct: relu + distance_scale: 16.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -1.0], [2.0, 2.0, 1.0]] + + # Grid size and upsampling + N_voxel_init: 2097152 + N_voxel_final: 262144000 + upsamp_list: [4000,6000,8000,10000,12000] + lr_upsample_reset: True + + # Thresholding + update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + n_lamb_sigma: [8,4,4] + n_lamb_sh: [8,4,4] + + # Shading + shadingMode: SH + data_dim_color: 27 diff --git a/nlf/conf/experiment/model/spaces_z_plane_world.yaml b/nlf/conf/experiment/model/spaces_z_plane_world.yaml new file mode 100644 index 0000000..b17cb65 --- /dev/null +++ b/nlf/conf/experiment/model/spaces_z_plane_world.yaml @@ -0,0 +1,253 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 4 + fn: two_plane + + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 2 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + z_channels: 32 + + outputs: + z_vals: + channels: 1 + + sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 1 + + activation: + type: sigmoid + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: tanh + outer_fac: 0.25 + + color_scale: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + color_shift: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + ray_intersect_0: + type: ray_intersect + + # Intersect + z_channels: 32 + + intersect: + type: z_plane + + sort: True + outward_facing: False + use_disparity: False + + use_sigma: True + + out_points: raw_points + out_distance: raw_distance + + near: 0.35 + use_dataset_bounds: True + + contract: + type: mipnerf + contract_samples: True + use_dataset_bounds: True + + activation: + type: identity + fac: 0.5 + + #dropout: + # frequency: 2 + # stop_epoch: 10 + + #color_transform: + # type: color_transform + + # transform_activation: + # type: ease_value + # start_value: 0.0 + # window_epochs: 0 + # wait_epochs: 0 + + # activation: + # type: identity + # shift: 0.0 + # inner_fac: 0.1 + # outer_fac: 1.0 + + # shift_activation: + # type: ease_value + # start_value: 0.0 + # window_epochs: 0 + # wait_epochs: 0 + + # activation: + # type: identity + # shift: 0.0 + # inner_fac: 0.1 + # outer_fac: 1.0 + + point_offset_0: + type: point_offset + in_density_field: point_sigma + use_sigma: True + + #dropout: + # frequency: 2 + # stop_epoch: 10 + + #random_offset_0: + # type: random_offset + + # frequency: 2 + # random_per_sample: 4 + # stop_epoch: 10 + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs'] + + extract_fields: + type: extract_fields + fields: ['points', 'distances', 'viewdirs', 'weights', 'color_scale', 'color_shift'] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_no_sample + + # Scene hyper-params + white_bg: 0 + black_bg: 0 + + # Density activation + fea2denseAct: relu + distance_scale: 8.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -2.0], [2.0, 2.0, 2.0]] + + # Grid size and upsampling + grid_size: + start: [100, 100, 100] + end: [600, 600, 600] + upsamp_list: [4000,6000,8000,10000,12000] + lr_upsample_reset: True + + # Thresholding + update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + #n_lamb_sigma: [8,0,0] + #n_lamb_sh: [8,0,0] + n_lamb_sigma: [8,8,8] + n_lamb_sh: [8,8,8] + + # Shading + #shadingMode: RGB + #data_dim_color: 3 + + shadingMode: SH + data_dim_color: 27 diff --git a/nlf/conf/experiment/model/stanford_llff_z_plane.yaml b/nlf/conf/experiment/model/stanford_llff_z_plane.yaml new file mode 100644 index 0000000..a3b7387 --- /dev/null +++ b/nlf/conf/experiment/model/stanford_llff_z_plane.yaml @@ -0,0 +1,209 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 4 + fn: two_plane + + near: -1.0 + far: 0.0 + + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 1 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + z_channels: 32 + + outputs: + z_vals: + channels: 1 + + sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: tanh + outer_fac: 0.25 + + color_scale: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + color_shift: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + ray_intersect_0: + type: ray_intersect + + # Intersect + z_channels: 32 + + intersect: + type: z_plane + + sort: True + outward_facing: False + use_disparity: False + + use_sigma: True + + out_points: raw_points + out_distance: raw_distance + + initial: -1.0 + end: 1.0 + + mask: + stop_iters: -1 + + activation: + type: identity + fac: 0.5 + + point_offset_0: + type: point_offset + in_density_field: point_sigma + use_sigma: True + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs'] + + extract_fields: + type: extract_fields + fields: ['points', 'distances', 'viewdirs', 'weights', 'color_scale', 'color_shift'] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_no_sample + + # Scene hyper-params + white_bg: 0 + black_bg: 0 + + # Density activation + fea2denseAct: relu + distance_scale: 8.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -1.0], [2.0, 2.0, 1.0]] + + # Grid size and upsampling + N_voxel_init: 512000 + N_voxel_final: 512000000 + + upsamp_list: [4000,6000,8000,10000,12000] + #upsamp_list: [8000, 12000, 16000, 20000, 24000] + lr_upsample_reset: True + + # Thresholding + #update_AlphaMask_list: [] + update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + n_lamb_sigma: [8,0,0] + n_lamb_sh: [8,0,0] + + # Shading + shadingMode: RGB + data_dim_color: 3 diff --git a/nlf/conf/experiment/model/stanford_z_plane.yaml b/nlf/conf/experiment/model/stanford_z_plane.yaml new file mode 100644 index 0000000..1ceab3d --- /dev/null +++ b/nlf/conf/experiment/model/stanford_z_plane.yaml @@ -0,0 +1,201 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 4 + fn: two_plane + + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 1 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + z_channels: 32 + + outputs: + z_vals: + channels: 1 + + sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: tanh + outer_fac: 0.25 + + color_scale: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + color_shift: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + ray_intersect_0: + type: ray_intersect + + # Intersect + z_channels: 32 + + intersect: + type: z_plane + + sort: True + outward_facing: False + use_disparity: False + + use_sigma: True + + out_points: raw_points + out_distance: raw_distance + + initial: -0.65 + end: 1.0 + + activation: + type: identity + fac: 0.5 + + point_offset_0: + type: point_offset + in_density_field: point_sigma + use_sigma: True + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs'] + + extract_fields: + type: extract_fields + fields: ['points', 'distances', 'viewdirs', 'weights', 'color_scale', 'color_shift'] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_no_sample + + # Scene hyper-params + white_bg: 0 + black_bg: 0 + + # Density activation + fea2denseAct: relu + distance_scale: 8.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -1.0], [2.0, 2.0, 2.0]] + + # Grid size and upsampling + N_voxel_init: 2097152 + N_voxel_final: 512000000 + + upsamp_list: [4000,6000,8000,10000,12000] + lr_upsample_reset: True + + # Thresholding + update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + n_lamb_sigma: [8,0,0] + n_lamb_sh: [8,0,0] + + # Shading + shadingMode: RGB + data_dim_color: 3 diff --git a/nlf/conf/experiment/model/stanford_z_plane_mem.yaml b/nlf/conf/experiment/model/stanford_z_plane_mem.yaml new file mode 100644 index 0000000..cb0fff5 --- /dev/null +++ b/nlf/conf/experiment/model/stanford_z_plane_mem.yaml @@ -0,0 +1,205 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 4 + fn: two_plane + + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 6 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + z_channels: 32 + + outputs: + z_vals: + channels: 1 + + sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: tanh + outer_fac: 0.25 + + color_scale: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + color_shift: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + ray_intersect_0: + type: ray_intersect + + # Intersect + z_channels: 32 + + intersect: + type: z_plane + + sort: True + outward_facing: False + use_disparity: False + + use_sigma: True + + out_points: raw_points + out_distance: raw_distance + + initial: -0.65 + end: 1.0 + + activation: + type: identity + fac: 0.5 + + point_offset_0: + type: point_offset + in_density_field: point_sigma + use_sigma: True + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs'] + + extract_fields: + type: extract_fields + fields: ['points', 'distances', 'viewdirs', 'weights', 'color_scale', 'color_shift'] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_no_sample + + # Scene hyper-params + white_bg: 0 + black_bg: 0 + + # Density activation + fea2denseAct: relu + #distance_scale: 16.0 + distance_scale: 8.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -1.0], [2.0, 2.0, 2.0]] + + # Grid size and upsampling + N_voxel_init: 2097152 + N_voxel_final: 512000000 + + upsamp_list: [4000,6000,8000,10000,12000] + lr_upsample_reset: True + + # Thresholding + update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + n_lamb_sigma: [8,0,0] + n_lamb_sh: [8,0,0] + + # Shading + shadingMode: RGB + data_dim_color: 3 + + #shadingMode: SH + #data_dim_color: 27 diff --git a/nlf/conf/experiment/model/stanford_z_plane_small.yaml b/nlf/conf/experiment/model/stanford_z_plane_small.yaml new file mode 100644 index 0000000..dee0e4f --- /dev/null +++ b/nlf/conf/experiment/model/stanford_z_plane_small.yaml @@ -0,0 +1,207 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 4 + fn: two_plane + + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 1 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: base + group: embedding_impl + + #depth: 6 + #hidden_channels: 256 + #skips: [3] + + depth: 4 + hidden_channels: 256 + skips: [2] + + # Outputs + #z_channels: 32 + z_channels: 16 + + outputs: + z_vals: + channels: 1 + + sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: tanh + outer_fac: 0.25 + + color_scale: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + color_shift: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + ray_intersect_0: + type: ray_intersect + + # Intersect + #z_channels: 32 + z_channels: 16 + + intersect: + type: z_plane + + sort: True + outward_facing: False + use_disparity: False + + use_sigma: True + + out_points: raw_points + out_distance: raw_distance + + initial: -0.65 + end: 1.0 + + activation: + type: identity + fac: 0.5 + + point_offset_0: + type: point_offset + in_density_field: point_sigma + use_sigma: True + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs'] + + extract_fields: + type: extract_fields + fields: ['points', 'distances', 'viewdirs', 'weights', 'color_scale', 'color_shift'] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_no_sample + + # Scene hyper-params + white_bg: 0 + black_bg: 0 + + # Density activation + fea2denseAct: relu + distance_scale: 8.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -1.0], [2.0, 2.0, 2.0]] + + # Grid size and upsampling + N_voxel_init: 2097152 + N_voxel_final: 512000000 + + upsamp_list: [4000,6000,8000,10000,12000] + lr_upsample_reset: True + + # Thresholding + update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + n_lamb_sigma: [8,0,0] + n_lamb_sh: [8,0,0] + + # Shading + shadingMode: RGB + data_dim_color: 3 diff --git a/nlf/conf/experiment/model/technicolor_cascaded.yaml b/nlf/conf/experiment/model/technicolor_cascaded.yaml new file mode 100644 index 0000000..5fb562e --- /dev/null +++ b/nlf/conf/experiment/model/technicolor_cascaded.yaml @@ -0,0 +1,325 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 4 + fn: two_plane + + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 0 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + time: + start: 7 + end: 8 + + param: + n_dims: 1 + fn: identity + + pe: + type: windowed + n_freqs: 2 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + z_channels: 8 + + outputs: + z_vals: + channels: 1 + + ray_intersect_0: + type: ray_intersect + + # Intersect + z_channels: 8 + + intersect: + type: z_plane + + mask: + stop_iters: -1 + + sort: True + outward_facing: False + use_disparity: False + + use_sigma: True + + out_points: raw_points + out_distance: raw_distance + + initial: -1.0 + end: 1.0 + + activation: + type: identity + fac: 0.5 + + point_prediction_0: + type: point_prediction + + # Inputs + in_z_channels: 8 + + inputs: + points: 3 + viewdirs: 3 + times: 1 + + # Parameterization + params: + ray: + start: 0 + end: 3 + + param: + n_dims: 3 + fn: identity + + pe: + type: basic + n_freqs: 2 + + time: + start: 3 + end: 4 + + param: + n_dims: 1 + fn: identity + + pe: + type: basic + n_freqs: 4 + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + out_z_channels: 32 + + outputs: + z_vals: + channels: 1 + + spatial_flow: + channels: 3 + + sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 1 + + activation: + type: sigmoid + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: tanh + outer_fac: 0.125 + + color_scale: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + color_shift: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + ray_intersect_1: + type: ray_intersect + + # Intersect + z_channels: 32 + + intersect: + type: z_plane + + sort: True + outward_facing: False + use_disparity: False + + use_sigma: True + + initial: -1.0 + end: 1.0 + + activation: + type: identity + fac: 0.5 + + flow_0: + type: advect_points + use_spatial_flow: True + use_angular_flow: False + + out_flow_field: raw_flow + flow_scale: 0.0 + + spatial_flow_activation: + type: identity + fac: 0.25 + + angular_flow_rotation_activation: + type: identity + fac: 0.25 + + angular_flow_anchor_activation: + type: identity + fac: 0.25 + + point_offset_1: + type: point_offset + in_density_field: point_sigma + use_sigma: True + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs', 'times'] + + extract_fields: + type: extract_fields + fields: [ + 'points', 'distances', 'base_times', 'time_offset', 'times', 'viewdirs', 'weights', + 'color_transform_global', 'color_scale_global', 'color_shift_global', + 'color_transform', 'color_scale', 'color_shift' + ] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_time + + # Scene hyper-params + white_bg: 0 + black_bg: 0 + + # Density activation + fea2denseAct: relu + distance_scale: 16.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -1.0], [2.0, 2.0, 1.0]] + + # Grid size and upsampling + N_voxel_init: 2097152 + N_voxel_final: 512000000 + + upsamp_list: [4000,6000,8000,10000,12000] + + lr_upsample_reset: True + + # Thresholding + update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + n_lamb_sigma: [8,0,0] + n_lamb_sh: [8,0,0] + + # Shading + shadingMode: SH + data_dim_color: 27 + + # Density + densityMode: Density diff --git a/nlf/conf/experiment/model/technicolor_z_plane.yaml b/nlf/conf/experiment/model/technicolor_z_plane.yaml new file mode 100644 index 0000000..3aab19e --- /dev/null +++ b/nlf/conf/experiment/model/technicolor_z_plane.yaml @@ -0,0 +1,268 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 4 + fn: two_plane + + #param: + # n_dims: 6 + # fn: pluecker + # direction_multiplier: 1.0 + # moment_multiplier: 1.0 + + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 0 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + time: + start: 7 + end: 8 + + param: + n_dims: 1 + fn: identity + + pe: + type: windowed + n_freqs: 2 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + z_channels: 64 + + outputs: + z_vals: + channels: 1 + + spatial_flow: + channels: 3 + + activation: + type: identity + outer_fac: 0.25 # TODO: Original + #outer_fac: 1.0 # TODO: New + #outer_fac: 4.0 # TODO: New + + sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 1 + + activation: + type: sigmoid + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: tanh + outer_fac: 0.25 # TODO: original + #outer_fac: 0.125 # TODO: New (but not used) + + color_scale: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + color_shift: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + ray_intersect_0: + type: ray_intersect + + # Intersect + z_channels: 64 + + intersect: + type: z_plane + + sort: True + outward_facing: False + use_disparity: False + + use_sigma: True + + out_points: raw_points + out_distance: raw_distance + + initial: -1.0 + end: 1.0 + + activation: + type: identity + fac: 0.5 + + flow_0: + type: advect_points + use_spatial_flow: True + use_angular_flow: False + + out_flow_field: raw_flow + flow_scale: 0.0 + + spatial_flow_activation: + type: identity + fac: 0.25 + + angular_flow_rotation_activation: + type: identity + fac: 0.25 + + angular_flow_anchor_activation: + type: identity + fac: 0.25 + + point_offset_0: + type: point_offset + in_density_field: point_sigma + use_sigma: True + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs', 'times'] + + extract_fields: + type: extract_fields + fields: [ + 'points', 'distances', 'base_times', 'time_offset', 'times', 'viewdirs', 'weights', + 'color_transform_global', 'color_scale_global', 'color_shift_global', + 'color_transform', 'color_scale', 'color_shift' + ] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_time + + # Scene hyper-params + white_bg: 0 + black_bg: 0 + + # Density activation + fea2denseAct: relu + distance_scale: 16.0 # TODO: Original + #distance_scale: 8.0 # TODO: New + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -1.0], [2.0, 2.0, 1.0]] + + # Grid size and upsampling + N_voxel_init: 2097152 + N_voxel_final: 512000000 + + upsamp_list: [4000,6000,8000,10000,12000] + + lr_upsample_reset: True + + # Thresholding + update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + n_lamb_sigma: [8,0,0] + n_lamb_sh: [8,0,0] + + # Shading + #shadingMode: RGB # TODO: Changed + #data_dim_color: 3 + + # shadingMode: SH + # data_dim_color: 27 + + + shadingMode: MLP_Fea + data_dim_color: 27 + + # Density + densityMode: Density diff --git a/nlf/conf/experiment/model/technicolor_z_plane_ff.yaml b/nlf/conf/experiment/model/technicolor_z_plane_ff.yaml new file mode 100644 index 0000000..b53884e --- /dev/null +++ b/nlf/conf/experiment/model/technicolor_z_plane_ff.yaml @@ -0,0 +1,251 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 4 + fn: two_plane + + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 0 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + time: + start: 7 + end: 8 + + param: + n_dims: 1 + fn: identity + + pe: + type: windowed + n_freqs: 2 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + z_channels: 32 + + outputs: + z_vals: + channels: 1 + + spatial_flow: + channels: 3 + + activation: + type: identity + outer_fac: 1.0 + + sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 1 + + activation: + type: sigmoid + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: tanh + outer_fac: 0.125 + + color_scale: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + color_shift: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + ray_intersect_0: + type: ray_intersect + + # Intersect + z_channels: 32 + + intersect: + type: z_plane + + sort: True + outward_facing: False + use_disparity: False + + use_sigma: True + + out_points: raw_points + out_distance: raw_distance + + initial: -1.0 + end: 1.0 + + activation: + type: identity + fac: 0.5 + + flow_0: + type: advect_points + use_spatial_flow: True + use_angular_flow: False + + out_flow_field: raw_flow + flow_scale: 0.0 + + spatial_flow_activation: + type: identity + fac: 0.25 + + angular_flow_rotation_activation: + type: identity + fac: 0.25 + + angular_flow_anchor_activation: + type: identity + fac: 0.25 + + point_offset_0: + type: point_offset + in_density_field: point_sigma + use_sigma: True + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs', 'times'] + + extract_fields: + type: extract_fields + fields: [ + 'points', 'distances', 'base_times', 'time_offset', 'times', 'viewdirs', 'weights', + 'color_transform_global', 'color_scale_global', 'color_shift_global', + 'color_transform', 'color_scale', 'color_shift' + ] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_time + + # Scene hyper-params + white_bg: 0 + black_bg: 0 + + # Density activation + fea2denseAct: relu + distance_scale: 16.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -1.0], [2.0, 2.0, 1.0]] + + # Grid size and upsampling + N_voxel_init: 2097152 + N_voxel_final: 512000000 + + upsamp_list: [4000,6000,8000,10000,12000] + + lr_upsample_reset: True + + # Thresholding + update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + n_lamb_sigma: [8,0,0] + n_lamb_sh: [8,0,0] + + # Shading + shadingMode: SH + data_dim_color: 27 + + # Density + densityMode: Density diff --git a/nlf/conf/experiment/model/technicolor_z_plane_large.yaml b/nlf/conf/experiment/model/technicolor_z_plane_large.yaml new file mode 100644 index 0000000..080af0d --- /dev/null +++ b/nlf/conf/experiment/model/technicolor_z_plane_large.yaml @@ -0,0 +1,214 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 4 + fn: two_plane + + pe: + type: basic + n_freqs: 1 + + time: + start: 7 + end: 8 + + param: + n_dims: 1 + fn: identity + + pe: + type: basic + n_freqs: 2 + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + z_channels: 32 + + outputs: + z_vals: + channels: 1 + + spatial_flow: + channels: 3 + + activation: + type: identity + outer_fac: 4.0 + + sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 1 + + activation: + type: sigmoid + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: tanh + outer_fac: 0.125 + + ray_intersect_0: + type: ray_intersect + + # Intersect + z_channels: 32 + + intersect: + type: z_plane + + sort: True + outward_facing: False + use_disparity: False + + use_sigma: True + + out_points: raw_points + out_distance: raw_distance + + initial: -1.0 + end: 1.0 + + activation: + type: identity + fac: 0.5 + + flow_0: + type: advect_points + use_spatial_flow: True + use_angular_flow: False + + out_flow_field: raw_flow + flow_scale: 0.0 + + spatial_flow_activation: + type: identity + fac: 0.25 + + angular_flow_rotation_activation: + type: identity + fac: 0.25 + + angular_flow_anchor_activation: + type: identity + fac: 0.25 + + point_offset_0: + type: point_offset + in_density_field: point_sigma + use_sigma: True + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs', 'times'] + + extract_fields: + type: extract_fields + fields: [ + 'points', 'distances', 'base_times', 'time_offset', 'times', 'viewdirs', 'weights', + 'color_transform_global', 'color_scale_global', 'color_shift_global', + 'color_transform', 'color_scale', 'color_shift' + ] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_time + + # Scene hyper-params + white_bg: 0 + black_bg: 0 + + # Density activation + fea2denseAct: relu + distance_scale: 16.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -1.0], [2.0, 2.0, 1.0]] + + # Grid size and upsampling + N_voxel_init: 2097152 + N_voxel_final: 512000000 + + upsamp_list: [4000,6000,8000,10000,12000] + + lr_upsample_reset: True + + # Thresholding + update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + n_lamb_sigma: [8,0,0] + n_lamb_sh: [8,0,0] + + # Shading + shadingMode: RGB + data_dim_color: 3 + + # Density + densityMode: Density diff --git a/nlf/conf/experiment/model/technicolor_z_plane_mem.yaml b/nlf/conf/experiment/model/technicolor_z_plane_mem.yaml new file mode 100644 index 0000000..e862403 --- /dev/null +++ b/nlf/conf/experiment/model/technicolor_z_plane_mem.yaml @@ -0,0 +1,251 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 4 + fn: two_plane + + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 2 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + time: + start: 7 + end: 8 + + param: + n_dims: 1 + fn: identity + + pe: + type: windowed + n_freqs: 4 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + z_channels: 32 # NOTE: Changed from 32 + + outputs: + z_vals: + channels: 1 + + spatial_flow: + channels: 3 + + sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 2.0 + + point_sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 2.0 + + point_offset: + channels: 3 + + activation: + type: tanh + outer_fac: 0.25 + + color_scale: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + color_shift: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + ray_intersect_0: + type: ray_intersect + + # Intersect + z_channels: 32 # NOTE: Changed from 32 + + intersect: + type: z_plane + + sort: True + outward_facing: False + use_disparity: False + + use_sigma: True + + out_points: raw_points + out_distance: raw_distance + + initial: -1.0 + end: 1.0 + + activation: + type: identity + fac: 0.5 + + flow_0: + type: advect_points + use_spatial_flow: True + use_angular_flow: False + + out_flow_field: raw_flow + flow_scale: 0.0 + + spatial_flow_activation: + type: identity + fac: 0.25 + + angular_flow_rotation_activation: + type: identity + fac: 0.25 + + angular_flow_anchor_activation: + type: identity + fac: 0.25 + + point_offset_0: + type: point_offset + in_density_field: point_sigma + use_sigma: True + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs', 'times'] + + extract_fields: + type: extract_fields + fields: [ + 'points', 'distances', 'base_times', 'time_offset', 'times', 'viewdirs', 'weights', + 'color_transform_global', 'color_scale_global', 'color_shift_global', + 'color_transform', 'color_scale', 'color_shift' + ] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_time + + # Scene hyper-params + white_bg: 0 + black_bg: 0 + + # Density activation + fea2denseAct: relu + distance_scale: 16.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -1.0], [2.0, 2.0, 1.0]] + + # Grid size and upsampling + N_voxel_init: 7077888 + N_voxel_final: 1728000000 + #grid_size: + # start: [192, 192, 192] + # end: [1200, 1200, 1200] + + upsamp_list: [4000,6000,8000,10000,12000] + + lr_upsample_reset: True + + # Thresholding + update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + n_lamb_sigma: [8,0,0] + n_lamb_sh: [8,0,0] + + # Shading + shadingMode: RGB # NOTE: Changed from RGB + data_dim_color: 3 + + # Density + densityMode: Density # NOTE: CHanged from Density + #densityMode: DensityFourier diff --git a/nlf/conf/experiment/model/technicolor_z_plane_no_sample.yaml b/nlf/conf/experiment/model/technicolor_z_plane_no_sample.yaml new file mode 100644 index 0000000..5d50060 --- /dev/null +++ b/nlf/conf/experiment/model/technicolor_z_plane_no_sample.yaml @@ -0,0 +1,222 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 4 + fn: two_plane + + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 0 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + time: + start: 7 + end: 8 + + param: + n_dims: 1 + fn: identity + + pe: + type: windowed + n_freqs: 2 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: zero + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + z_channels: 128 + + outputs: + z_vals: + channels: 1 + + spatial_flow: + channels: 3 + + activation: + type: identity + outer_fac: 0.25 + + sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 1 + + activation: + type: sigmoid + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: tanh + outer_fac: 0.25 + + ray_intersect_0: + type: ray_intersect + + # Intersect + z_channels: 128 + + intersect: + type: z_plane + + sort: True + outward_facing: False + use_disparity: False + + use_sigma: True + + out_points: raw_points + out_distance: raw_distance + + initial: -1.0 + end: 1.0 + + activation: + type: identity + fac: 0.5 + + flow_0: + type: advect_points + use_spatial_flow: False + use_angular_flow: False + + out_flow_field: raw_flow + flow_scale: 0.0 + + spatial_flow_activation: + type: identity + fac: 0.25 + + angular_flow_rotation_activation: + type: identity + fac: 0.25 + + angular_flow_anchor_activation: + type: identity + fac: 0.25 + + point_offset_0: + type: point_offset + in_density_field: point_sigma + use_sigma: True + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs', 'times'] + + extract_fields: + type: extract_fields + fields: [ + 'points', 'distances', 'base_times', 'time_offset', 'times', 'viewdirs', 'weights', + 'color_transform_global', 'color_scale_global', 'color_shift_global', + 'color_transform', 'color_scale', 'color_shift' + ] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_time + + # Scene hyper-params + white_bg: 0 + black_bg: 0 + + # Density activation + fea2denseAct: relu + distance_scale: 16.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -1.0], [2.0, 2.0, 1.0]] + + # Grid size and upsampling + #N_voxel_init: 2097152 + N_voxel_init: 512000000 + N_voxel_final: 512000000 + + upsamp_list: [4000,6000,8000,10000,12000] + + lr_upsample_reset: True + + # Thresholding + update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + n_lamb_sigma: [8,0,0] + n_lamb_sh: [8,0,0] + + # Shading + shadingMode: SH + data_dim_color: 27 + + # Density + densityMode: Density diff --git a/nlf/conf/experiment/model/technicolor_z_plane_small.yaml b/nlf/conf/experiment/model/technicolor_z_plane_small.yaml new file mode 100644 index 0000000..dec7066 --- /dev/null +++ b/nlf/conf/experiment/model/technicolor_z_plane_small.yaml @@ -0,0 +1,220 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 4 + fn: two_plane + + #param: + # n_dims: 6 + # fn: pluecker + # direction_multiplier: 1.0 + # moment_multiplier: 1.0 + + pe: + type: basic + n_freqs: 1 + + time: + start: 7 + end: 8 + + param: + n_dims: 1 + fn: identity + + pe: + type: basic + n_freqs: 2 + + # Net + net: + type: base + group: embedding_impl + + depth: 4 + hidden_channels: 256 + skips: [2] + + # Outputs + z_channels: 16 + + outputs: + z_vals: + channels: 1 + + spatial_flow: + channels: 3 + + activation: + type: identity + outer_fac: 4.0 + + sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 1 + + activation: + type: sigmoid + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: tanh + outer_fac: 0.125 + + ray_intersect_0: + type: ray_intersect + + # Intersect + z_channels: 16 + + intersect: + type: z_plane + + sort: True + outward_facing: False + use_disparity: False + + use_sigma: True + + out_points: raw_points + out_distance: raw_distance + + initial: -1.0 + end: 1.0 + + activation: + type: identity + fac: 0.5 + + flow_0: + type: advect_points + use_spatial_flow: True + use_angular_flow: False + + out_flow_field: raw_flow + flow_scale: 0.0 + + spatial_flow_activation: + type: identity + fac: 0.25 + + angular_flow_rotation_activation: + type: identity + fac: 0.25 + + angular_flow_anchor_activation: + type: identity + fac: 0.25 + + point_offset_0: + type: point_offset + in_density_field: point_sigma + use_sigma: True + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs', 'times'] + + extract_fields: + type: extract_fields + fields: [ + 'points', 'distances', 'base_times', 'time_offset', 'times', 'viewdirs', 'weights', + 'color_transform_global', 'color_scale_global', 'color_shift_global', + 'color_transform', 'color_scale', 'color_shift' + ] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_time + + # Scene hyper-params + white_bg: 0 + black_bg: 0 + + # Density activation + fea2denseAct: relu + distance_scale: 16.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -1.0], [2.0, 2.0, 1.0]] + + # Grid size and upsampling + N_voxel_init: 2097152 + N_voxel_final: 512000000 + + upsamp_list: [4000,6000,8000,10000,12000] + + lr_upsample_reset: True + + # Thresholding + update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + n_lamb_sigma: [8,0,0] + n_lamb_sh: [8,0,0] + + # Shading + shadingMode: RGB + data_dim_color: 3 + + # Density + densityMode: Density diff --git a/nlf/conf/experiment/model/technicolor_z_plane_tiny.yaml b/nlf/conf/experiment/model/technicolor_z_plane_tiny.yaml new file mode 100644 index 0000000..bd78aff --- /dev/null +++ b/nlf/conf/experiment/model/technicolor_z_plane_tiny.yaml @@ -0,0 +1,220 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 4 + fn: two_plane + + #param: + # n_dims: 6 + # fn: pluecker + # direction_multiplier: 1.0 + # moment_multiplier: 1.0 + + pe: + type: basic + n_freqs: 1 + + time: + start: 7 + end: 8 + + param: + n_dims: 1 + fn: identity + + pe: + type: basic + n_freqs: 2 + + # Net + net: + type: base + group: embedding_impl + + depth: 4 + hidden_channels: 128 + skips: [2] + + # Outputs + z_channels: 8 + + outputs: + z_vals: + channels: 1 + + spatial_flow: + channels: 3 + + activation: + type: identity + outer_fac: 4.0 + + sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 1 + + activation: + type: sigmoid + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: tanh + outer_fac: 0.125 + + ray_intersect_0: + type: ray_intersect + + # Intersect + z_channels: 8 + + intersect: + type: z_plane + + sort: True + outward_facing: False + use_disparity: False + + use_sigma: True + + out_points: raw_points + out_distance: raw_distance + + initial: -1.0 + end: 1.0 + + activation: + type: identity + fac: 0.5 + + flow_0: + type: advect_points + use_spatial_flow: True + use_angular_flow: False + + out_flow_field: raw_flow + flow_scale: 0.0 + + spatial_flow_activation: + type: identity + fac: 0.25 + + angular_flow_rotation_activation: + type: identity + fac: 0.25 + + angular_flow_anchor_activation: + type: identity + fac: 0.25 + + point_offset_0: + type: point_offset + in_density_field: point_sigma + use_sigma: True + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs', 'times'] + + extract_fields: + type: extract_fields + fields: [ + 'points', 'distances', 'base_times', 'time_offset', 'times', 'viewdirs', 'weights', + 'color_transform_global', 'color_scale_global', 'color_shift_global', + 'color_transform', 'color_scale', 'color_shift' + ] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_time + + # Scene hyper-params + white_bg: 0 + black_bg: 0 + + # Density activation + fea2denseAct: relu + distance_scale: 16.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -1.0], [2.0, 2.0, 1.0]] + + # Grid size and upsampling + N_voxel_init: 2097152 + N_voxel_final: 512000000 + + upsamp_list: [4000,6000,8000,10000,12000] + + lr_upsample_reset: True + + # Thresholding + update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + n_lamb_sigma: [8,0,0] + n_lamb_sh: [8,0,0] + + # Shading + shadingMode: RGB + data_dim_color: 3 + + # Density + densityMode: Density diff --git a/nlf/conf/experiment/model/technicolor_z_plane_world.yaml b/nlf/conf/experiment/model/technicolor_z_plane_world.yaml new file mode 100644 index 0000000..a4005dc --- /dev/null +++ b/nlf/conf/experiment/model/technicolor_z_plane_world.yaml @@ -0,0 +1,284 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: lightfield + +render: + type: lightfield + +param: + n_dims: 6 + fn: identity + + +### EMBEDDING ### +embedding: + type: ray_point + + embeddings: + ray_prediction_0: + type: ray_prediction + + # Parameterization + params: + ray: + start: 0 + end: 6 + + param: + n_dims: 4 + fn: two_plane + + pe: + type: windowed + freq_multiplier: 2.0 + n_freqs: 0 + #n_freqs: 4 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + time: + start: 6 + end: 7 + + param: + n_dims: 1 + fn: identity + + pe: + type: windowed + n_freqs: 2 + #n_freqs: 4 + wait_iters: 0 + max_freq_epoch: 0 + exclude_identity: False + + # Net + net: + type: base + group: embedding_impl + + depth: 6 + hidden_channels: 256 + skips: [3] + + # Outputs + z_channels: 32 + + outputs: + z_vals: + channels: 1 + + spatial_flow: + channels: 3 + + sigma: + channels: 1 + + activation: + type: ease_value + start_value: 1.0 + window_epochs: 3 + wait_epochs: 0 + + activation: + type: sigmoid + shift: 4.0 + + point_offset: + channels: 3 + + activation: + type: tanh + outer_fac: 0.25 + + color_scale: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + color_shift: + channels: 3 + + activation: + type: ease_value + start_value: 0.0 + window_epochs: 0 + wait_epochs: 0 + + activation: + type: identity + shift: 0.0 + inner_fac: 1.0 + outer_fac: 1.0 + + ray_intersect_0: + type: ray_intersect + + # Intersect + z_channels: 32 + + intersect: + type: z_plane + + sort: True + outward_facing: False + use_disparity: False + + use_sigma: True + + out_points: raw_points + out_distance: raw_distance + + near: 0.5 + + initial: -1.25 + end: -5.0 + + contract: + type: bbox + contract_samples: True + bbox_min: [-2.0, -2.0, 0.0] + bbox_max: [2.0, 2.0, -5.0] + + activation: + type: identity + fac: 0.5 + + flow_0: + type: advect_points + use_spatial_flow: True + use_angular_flow: False + + out_flow_field: raw_flow + flow_scale: 2.0 + + spatial_flow_activation: + type: identity + fac: 0.25 + + angular_flow_rotation_activation: + type: identity + fac: 0.25 + + angular_flow_anchor_activation: + type: identity + fac: 0.25 + + point_offset_0: + type: point_offset + use_sigma: True + + #color_transform: + # type: color_transform + + # transform_activation: + # type: ease_value + # start_value: 0.0 + # window_epochs: 0 + # wait_epochs: 0 + + # activation: + # type: identity + # shift: 0.0 + # inner_fac: 0.1 + # outer_fac: 1.0 + + # shift_activation: + # type: ease_value + # start_value: 0.0 + # window_epochs: 0 + # wait_epochs: 0 + + # activation: + # type: identity + # shift: 0.0 + # inner_fac: 0.1 + # outer_fac: 1.0 + + add_point_outputs_0: + type: add_point_outputs + extra_outputs: ['viewdirs', 'times'] + + extract_fields: + type: extract_fields + fields: [ + 'points', 'distances', 'base_times', 'time_offset', 'times', 'viewdirs', 'weights', + 'color_transform_global', 'color_scale_global', 'color_shift_global', + 'color_transform', 'color_scale', 'color_shift' + ] + + +### COLOR ### +color: + type: base + + net: + type: tensor_vm_split_time + + # Scene hyper-params + white_bg: 0 + black_bg: 0 + + # Density activation + fea2denseAct: relu + distance_scale: 16.0 + density_shift: 0.0 + + # Grid bounds + aabb: [[-2.0, -2.0, -1.0], [2.0, 2.0, 1.0]] + + # Grid size and upsampling + #grid_size: + # start: [150, 150, 150] + # end: [600, 600, 600] + grid_size: + start: [192, 192, 192] + end: [800, 800, 800] + + upsamp_list: [4000,6000,8000,10000,12000] + + lr_upsample_reset: True + + # Thresholding + update_AlphaMask_list: [4000,8000] + rm_weight_mask_thre: 0 + alpha_mask_thre: 1e-3 + + # Tensor sizes + n_lamb_sigma: [8,0,0] + n_lamb_sh: [8,0,0] + + # Shading + shadingMode: RGB + #shadingMode: RGBtFourier + data_dim_color: 3 + + #shadingMode: SH + #data_dim_color: 27 + + # Density + densityMode: Density + #densityMode: DensityFourier + + #filter: + # weight_thresh: 1e-3 + # max_samples: 16 + # wait_epochs: 3 diff --git a/nlf/conf/experiment/params/local.yaml b/nlf/conf/experiment/params/local.yaml new file mode 100644 index 0000000..23dc774 --- /dev/null +++ b/nlf/conf/experiment/params/local.yaml @@ -0,0 +1,36 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +tensor: True + +ckpt_dir: checkpoints +log_dir: logs +data_dir: /data/xliufe/Gear-NeRF/data +name: "${experiment.dataset.name}_${experiment.dataset.collection}" + +print_loss: False +save_results: True + +tensorboard: False +log_images: False +log_videos: False + +show_embedding: False + +test_only: False +render_only: False +load_from_weights: False +start_epoch: 0 + +seed: 1 + +save_video_dir: "val_videos" +save_image_dir: "val_images" + +input_pose: “” diff --git a/nlf/conf/experiment/regularizers/all.yaml b/nlf/conf/experiment/regularizers/all.yaml new file mode 100644 index 0000000..ea4b51e --- /dev/null +++ b/nlf/conf/experiment/regularizers/all.yaml @@ -0,0 +1,8 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ diff --git a/nlf/conf/experiment/regularizers/blurry_teacher/default.yaml b/nlf/conf/experiment/regularizers/blurry_teacher/default.yaml new file mode 100644 index 0000000..6511870 --- /dev/null +++ b/nlf/conf/experiment/regularizers/blurry_teacher/default.yaml @@ -0,0 +1,39 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: blurry_teacher +batch_size: 4096 + +dataset: + name: "dense_${experiment.dataset.name}" + collection: "${experiment.dataset.collection}_dense_blurry" + split: train + root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}_dense_blurry/" + + num_rows: 10 + num_cols: 10 + train_row_skip: 1 + train_col_skip: 1 + size: 1000 + + use_patches: True + blur_radius: 0 + patch_width: 64 + +blur_radius: 0 + +weight: + type: exponential_decay + start: 0.5 + decay: 0.1 + stop_weight: 0.025 + num_epochs: 250 + +loss: + type: mse diff --git a/nlf/conf/experiment/regularizers/coarse/default.yaml b/nlf/conf/experiment/regularizers/coarse/default.yaml new file mode 100644 index 0000000..88e8e47 --- /dev/null +++ b/nlf/conf/experiment/regularizers/coarse/default.yaml @@ -0,0 +1,23 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: coarse +batch_size: 8192 + +weight: + type: exponential_decay + start: 1.0 + decay: 0.1 + stop_weight: 0.01 + + stop_epochs: 100 + num_epochs: 50 + +loss: + type: mse diff --git a/nlf/conf/experiment/regularizers/depth_classification/default.yaml b/nlf/conf/experiment/regularizers/depth_classification/default.yaml new file mode 100644 index 0000000..c7c9e2d --- /dev/null +++ b/nlf/conf/experiment/regularizers/depth_classification/default.yaml @@ -0,0 +1,85 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + + +type: depth_classification +ray_chunk: 32768 +batch_size: 4096 +weight: 0.5 +use_color_embedding: True + +use_disparity: True +offset: 0.0 +near: 1.0 +far: 16.0 +num_slices: 128 + +dataset: + name: random_view + num_views: all + +lookup_loss: + type: mae + weight: 0.5 + warmup_iters: 1000 + + angle_std: 10.0 + dist_std: 0.5 + +color_loss: + type: mae + weight: 0.25 + + angle_std: 5.0 + dist_std: 0.25 + +depth_loss: + type: mae + weight: 0.25 + + angle_std: 5.0 + dist_std: 0.25 + +range: + pos: 1.0 + dir: 1.0 + +jitter: + pos: 0.25 + dir: 0.25 + +param: + n_dims: 6 + fn: pluecker + +pe: + n_freqs: 10 + warmup_iters: 0 + max_freq_iter: 0 + exclude_identity: False + +net: + depth: 3 + hidden_channels: 128 + skips: [] + activation: 'identity' + +optimizer: + optimizer: adam + lr: 0.001 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + lr_scheduler: steplr + warmup_multipler: 1.0 + warmup_epochs: 0 + + decay_epoch: 500 + decay_gamma: 0.1 diff --git a/nlf/conf/experiment/regularizers/feedback/donerf.yaml b/nlf/conf/experiment/regularizers/feedback/donerf.yaml new file mode 100644 index 0000000..300b541 --- /dev/null +++ b/nlf/conf/experiment/regularizers/feedback/donerf.yaml @@ -0,0 +1,23 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: geometry_feedback +ray_chunk: 32768 +batch_size: 8192 +wait_epochs: 1 + +student_fields: ['raw_points'] +teacher_fields: ['points'] +num_points: -1 + +weight: + type: exponential_decay + start: 0.1 + decay: 0.25 + num_epochs: 50 diff --git a/nlf/conf/experiment/regularizers/feedback/reflect.yaml b/nlf/conf/experiment/regularizers/feedback/reflect.yaml new file mode 100644 index 0000000..7190cd9 --- /dev/null +++ b/nlf/conf/experiment/regularizers/feedback/reflect.yaml @@ -0,0 +1,26 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: geometry_feedback +ray_chunk: 32768 +batch_size: 8192 +wait_epochs: 0 + +student_fields: ['normal'] +teacher_fields: ['render_normal'] +sizes: [3] + +weights: [[0.001, 1.0]] +num_points: -1 + +weight: + type: exponential_decay + start: 1.0 + decay: 0.25 + num_epochs: 50 diff --git a/nlf/conf/experiment/regularizers/feedback/stanford.yaml b/nlf/conf/experiment/regularizers/feedback/stanford.yaml new file mode 100644 index 0000000..5245604 --- /dev/null +++ b/nlf/conf/experiment/regularizers/feedback/stanford.yaml @@ -0,0 +1,23 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: geometry_feedback +ray_chunk: 32768 +batch_size: 8192 +wait_epochs: 1 + +student_fields: ['raw_points', 'raw_distance'] +teacher_fields: ['points'] +num_points: -1 + +weight: + type: exponential_decay + start: 1.0 + decay: 0.25 + num_epochs: 50 diff --git a/nlf/conf/experiment/regularizers/flow/video3d.yaml b/nlf/conf/experiment/regularizers/flow/video3d.yaml new file mode 100644 index 0000000..305439d --- /dev/null +++ b/nlf/conf/experiment/regularizers/flow/video3d.yaml @@ -0,0 +1,28 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: flow +ray_chunk: 32768 +batch_size: 8192 + +fields: ['raw_points_start', 'raw_points_end', 'raw_distance'] +origin: [0.0, 0.0, 0.0] +num_points: -1 + +contract: + type: mipnerf + contract_samples: True + contract_start_radius: 1.5 + contract_end_radius: 6.0 + +weight: + type: exponential_decay + start: 1.0 + decay: 0.25 + num_epochs: 50 diff --git a/nlf/conf/experiment/regularizers/fourier/default.yaml b/nlf/conf/experiment/regularizers/fourier/default.yaml new file mode 100644 index 0000000..13f43ed --- /dev/null +++ b/nlf/conf/experiment/regularizers/fourier/default.yaml @@ -0,0 +1,34 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: fourier +ray_chunk: 131072 +net_chunk: 131072 +batch_size: 1 +use_inp_freq: inf +wait_iters: 0 + +weight: + type: exponential_decay + start: 1.92 + decay: 0.1 + num_epochs: 500 + +dataset: + name: fourier_lightfield + num_views: all + +fourier_loss: + type: mse + weight: 1 + wait_iters: 0 + +range: + pos: 1.0 + dir: 1.0 diff --git a/nlf/conf/experiment/regularizers/geometry/donerf_barbershop.yaml b/nlf/conf/experiment/regularizers/geometry/donerf_barbershop.yaml new file mode 100644 index 0000000..dc300fb --- /dev/null +++ b/nlf/conf/experiment/regularizers/geometry/donerf_barbershop.yaml @@ -0,0 +1,30 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: geometry +ray_chunk: 32768 +batch_size: 8192 +#wait_iters: -16000 +#stop_iters: 16000 + +fields: ['raw_points', 'raw_distance'] +origin: [2.25, 7.75, 1.5] +num_points: -1 + +contract: + type: mipnerf + contract_samples: True + contract_start_radius: 2.0 + contract_end_radius: 16.0 + +weight: + type: exponential_decay + start: 1.0 + decay: 0.25 + num_epochs: 50 diff --git a/nlf/conf/experiment/regularizers/geometry/donerf_classroom.yaml b/nlf/conf/experiment/regularizers/geometry/donerf_classroom.yaml new file mode 100644 index 0000000..bdcfc9c --- /dev/null +++ b/nlf/conf/experiment/regularizers/geometry/donerf_classroom.yaml @@ -0,0 +1,30 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: geometry +ray_chunk: 32768 +batch_size: 8192 +#wait_iters: -16000 +#stop_iters: 16000 + +fields: ['raw_points', 'raw_distance'] +origin: [0.783, -3.19, 1.39] +num_points: -1 + +contract: + type: mipnerf + contract_samples: True + contract_start_radius: 2.0 + contract_end_radius: 48.0 + +weight: + type: exponential_decay + start: 1.0 + decay: 0.25 + num_epochs: 50 diff --git a/nlf/conf/experiment/regularizers/geometry/donerf_pavillon.yaml b/nlf/conf/experiment/regularizers/geometry/donerf_pavillon.yaml new file mode 100644 index 0000000..5e5ceb6 --- /dev/null +++ b/nlf/conf/experiment/regularizers/geometry/donerf_pavillon.yaml @@ -0,0 +1,30 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: geometry +ray_chunk: 32768 +batch_size: 8192 +#wait_iters: -16000 +#stop_iters: 16000 + +fields: ['raw_points', 'raw_distance'] +origin: [-17.5, -9.5, 2.4] +num_points: 24 + +contract: + type: mipnerf + contract_samples: True + contract_start_radius: 2.0 + contract_end_radius: 120.0 + +weight: + type: exponential_decay + start: 1.0 + decay: 0.125 + num_epochs: 50 diff --git a/nlf/conf/experiment/regularizers/geometry/lf.yaml b/nlf/conf/experiment/regularizers/geometry/lf.yaml new file mode 100644 index 0000000..49b51fe --- /dev/null +++ b/nlf/conf/experiment/regularizers/geometry/lf.yaml @@ -0,0 +1,41 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: geometry +ray_chunk: 32768 +batch_size: 32768 +use_inp_freq: 0 +wait_epochs: 0 + +z_channels: 12 + +weight: + type: exponential_decay + start: 1.0 + decay: 1.0 + num_epochs: 500 + +color_loss: + type: mae + weight: 0.0 + wait_epochs: 0 + +geometry_loss: + type: mae + weight: 0.1 + wait_epochs: 0 + +range: + pos: 1.0 + dir: 1.0 + +jitter: + pos: 0.05 + dir: 0.05 + bundle_size: 1 diff --git a/nlf/conf/experiment/regularizers/geometry/video3d.yaml b/nlf/conf/experiment/regularizers/geometry/video3d.yaml new file mode 100644 index 0000000..eac90fe --- /dev/null +++ b/nlf/conf/experiment/regularizers/geometry/video3d.yaml @@ -0,0 +1,28 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: geometry +ray_chunk: 32768 +batch_size: 8192 + +fields: ['raw_points', 'raw_distance'] +origin: [0.0, 0.0, 0.0] +num_points: -1 + +contract: + type: mipnerf + contract_samples: True + contract_start_radius: 1.5 + contract_end_radius: 6.0 + +weight: + type: exponential_decay + start: 1.0 + decay: 0.25 + num_epochs: 50 diff --git a/nlf/conf/experiment/regularizers/geometry_tv/lf.yaml b/nlf/conf/experiment/regularizers/geometry_tv/lf.yaml new file mode 100644 index 0000000..eea4931 --- /dev/null +++ b/nlf/conf/experiment/regularizers/geometry_tv/lf.yaml @@ -0,0 +1,41 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: geometry_tv +ray_chunk: 32768 +batch_size: 32768 +use_inp_freq: 0 +wait_epochs: 0 + +z_channels: 12 + +weight: + type: exponential_decay + start: 1.0 + decay: 1.0 + num_epochs: 500 + +color_loss: + type: mae + weight: 10000.0 + wait_epochs: 0 + +geometry_loss: + type: mae + weight: 0.0 + wait_epochs: 0 + +range: + pos: 1.0 + dir: 1.0 + +jitter: + pos: 0.1 + dir: 0.1 + bundle_size: 1 diff --git a/nlf/conf/experiment/regularizers/inverse_ray_depth/default.yaml b/nlf/conf/experiment/regularizers/inverse_ray_depth/default.yaml new file mode 100644 index 0000000..74ab365 --- /dev/null +++ b/nlf/conf/experiment/regularizers/inverse_ray_depth/default.yaml @@ -0,0 +1,44 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: inverse_ray_depth +ray_chunk: 32768 +net_chunk: 32768 +batch_size: 4096 + +wait_iters: 0 +warmup_iters: 0 +use_inp_freq: 2 + +dataset: + name: random_view + num_views: all + +weight: + type: exponential_decay + start: 0.5 + decay: 0.5 + num_epochs: 500 + +use_disparity: False +num_samples: 8 + +range: + pos: 1.0 + dir: 1.0 + +lookup_weight_map: + angle_std: 5.0 + dist_std: 0.125 + rgb_std: 0.125 + +embedding_lookup_loss: + type: mae + weight: 1.0 + wait_iters: 0 diff --git a/nlf/conf/experiment/regularizers/multiple_ray_depth/default.yaml b/nlf/conf/experiment/regularizers/multiple_ray_depth/default.yaml new file mode 100644 index 0000000..7bf1edb --- /dev/null +++ b/nlf/conf/experiment/regularizers/multiple_ray_depth/default.yaml @@ -0,0 +1,111 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: multiple_ray_depth +ray_chunk: 131072 +net_chunk: 131072 +batch_size: 32768 + +wait_iters: 1000 +warmup_iters: 10000 +use_inp_freq: 2 + +use_disparity: False +occlusion_aware: False +num_slices: 8 +num_filler: 0 + +dataset: + name: random_view + num_views: all + +weight: + type: exponential_decay + start: 0.5 + decay: 0.1 + num_epochs: 500 + +range: + pos: 1.0 + dir: 1.0 + +jitter: + pos: 0.25 + dir: 0.25 + bundle_size: 1 + +lookup_weight_map: + angle_std: 10.0 + dist_std: 0.1 + + rgb_std: + type: linear_decay + num_epochs: 100 + start: 0.1 + end: 0.01 + +color_weight_map: + angle_std: 5.0 + dist_std: 0.05 + + rgb_std: + type: linear_decay + num_epochs: 100 + start: 0.1 + end: 0.01 + +depth_weight_map: + angle_std: 10.0 + dist_std: 0.25 + +color_lookup_loss: + type: mae + weight: 0.25 + wait_iters: 0 + +color_loss: + type: mae + weight: 0.5 + wait_iters: 0 + +depth_loss: + type: mae + weight: 0.25 + wait_iters: 0 + +param: + n_dims: 4 + fn: two_plane + +pe: + n_freqs: 8 + wait_iters: 0 + max_freq_iter: 50000 + exclude_identity: False + +net: + depth: 8 + hidden_channels: 256 + skips: [4] + activation: identity + +optimizer: + optimizer: adam + lr: 0.001 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 500 + decay_gamma: 0.1 diff --git a/nlf/conf/experiment/regularizers/point/default.yaml b/nlf/conf/experiment/regularizers/point/default.yaml new file mode 100644 index 0000000..d7491bf --- /dev/null +++ b/nlf/conf/experiment/regularizers/point/default.yaml @@ -0,0 +1,23 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: point +ray_chunk: 32768 +batch_size: 8192 + +weight: + type: exponential_decay + start: 0.2 + decay: 0.1 + num_epochs: 100 + +loss: + type: mae + weight: 1.0 + wait_iters: 0 diff --git a/nlf/conf/experiment/regularizers/random_pixel/default.yaml b/nlf/conf/experiment/regularizers/random_pixel/default.yaml new file mode 100644 index 0000000..a8e0eaf --- /dev/null +++ b/nlf/conf/experiment/regularizers/random_pixel/default.yaml @@ -0,0 +1,24 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: teacher +batch_size: 4096 + +dataset: + name: random_pixel + +weight: + type: exponential_decay + start: 1.0 + decay: 1.0 + stop_epochs: 100 + num_epochs: 100 + +loss: + type: mse diff --git a/nlf/conf/experiment/regularizers/ray_bundle/default.yaml b/nlf/conf/experiment/regularizers/ray_bundle/default.yaml new file mode 100644 index 0000000..c1dbe47 --- /dev/null +++ b/nlf/conf/experiment/regularizers/ray_bundle/default.yaml @@ -0,0 +1,43 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: ray_bundle +ray_chunk: 131072 +net_chunk: 131072 +batch_size: 32768 +use_inp_freq: inf +wait_iters: 0 + +weight: + type: exponential_decay + start: 0.5 + decay: 0.1 + num_epochs: 500 + +dataset: + name: random_lightfield + st_plane: -1.0 + uv_plane: 0.0 + num_views: all + +color_weight_map: + angle_std: 5.0 + dist_std: 0.1 + +color_loss: + type: mse + wait_iters: 0 + +range: + pos: 1.0 + dir: 1.0 + +jitter: + dir: 0.25 + bundle_size: 8 diff --git a/nlf/conf/experiment/regularizers/ray_bundle/embedding.yaml b/nlf/conf/experiment/regularizers/ray_bundle/embedding.yaml new file mode 100644 index 0000000..484de6c --- /dev/null +++ b/nlf/conf/experiment/regularizers/ray_bundle/embedding.yaml @@ -0,0 +1,56 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: ray_bundle +ray_chunk: 131072 +net_chunk: 131072 +batch_size: 8192 +use_inp_freq: inf + +wait_epochs: 0 + +weight: + type: exponential_decay + start: 10.0 + decay: 0.1 + stop_weight: 0.01 + num_epochs: 100 + +dataset: + name: random_lightfield + st_plane: -1.0 + uv_plane: 0.0 + num_views: all + +color_weight_map: + angle_std: 5.0 + dist_std: 0.1 + +color_loss: + type: mse + wait_iters: inf + weight: 0.0 + +embed_weight_map: + angle_std: 20.0 + dist_std: 0.25 + +embed_loss: + type: mse + wait_iters: 0 + weight: 1.0 + +range: + pos: 1.5 + dir: 1.0 + +jitter: + pos: 0.1 + dir: 0.1 + bundle_size: 4 diff --git a/nlf/conf/experiment/regularizers/ray_density/default.yaml b/nlf/conf/experiment/regularizers/ray_density/default.yaml new file mode 100644 index 0000000..fb314b5 --- /dev/null +++ b/nlf/conf/experiment/regularizers/ray_density/default.yaml @@ -0,0 +1,41 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: ray_density + +ray_chunk: 32768 +batch_size: 8192 + +weight: + type: exponential_decay + start: 0.125 + decay: 0.25 + stop_weight: 0.00001 + + stop_epochs: 100 + num_epochs: 50 + +loss: + type: huber + delta: 0.25 + +num_views_for_random: 16 +num_views_for_ray: 16 + +extrapolate_freq: 2 +extrapolate_scale: 2.0 + +use_jitter: True +jitter: + dir_std: 0.1 + pos_std: 0.1 + +use_ndc: True +angle_std: 120.0 +dot_std: 120.0 diff --git a/nlf/conf/experiment/regularizers/ray_density/simple.yaml b/nlf/conf/experiment/regularizers/ray_density/simple.yaml new file mode 100644 index 0000000..3a1b96b --- /dev/null +++ b/nlf/conf/experiment/regularizers/ray_density/simple.yaml @@ -0,0 +1,41 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: simple_ray_density + +ray_chunk: 32768 +batch_size: 8192 + +weight: + type: exponential_decay + start: 0.125 + decay: 0.25 + stop_weight: 0.00001 + + stop_epochs: 100 + num_epochs: 50 + +loss: + type: mae + delta: 0.25 + +num_views_for_random: 16 +num_views_for_ray: 16 + +extrapolate_freq: 1 +extrapolate_scale: 2.0 + +use_jitter: True +jitter: + dir_std: 0.1 + pos_std: 0.1 + +use_ndc: True +angle_std: 10.0 +dot_std: 10.0 diff --git a/nlf/conf/experiment/regularizers/ray_depth/default.yaml b/nlf/conf/experiment/regularizers/ray_depth/default.yaml new file mode 100644 index 0000000..b7ddfdc --- /dev/null +++ b/nlf/conf/experiment/regularizers/ray_depth/default.yaml @@ -0,0 +1,109 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: ray_depth +ray_chunk: 131072 +net_chunk: 131072 +batch_size: 4096 + +wait_iters: 1000 +warmup_iters: 1000 +use_inp_freq: 2 + +occlusion_aware: False + +dataset: + name: random_view + num_views: all + +weight: + type: exponential_decay + start: 0.5 + decay: 0.1 + num_epochs: 500 + +range: + pos: 1.0 + dir: 1.0 + +jitter: + pos: 0.1 + dir: 0.1 + bundle_size: 1 + +lookup_weight_map: + angle_std: 10.0 + dist_std: 0.25 + + rgb_std: + type: linear_decay + num_epochs: 100 + start: 0.1 + end: 0.01 + +color_weight_map: + angle_std: 10.0 + dist_std: 0.25 + + rgb_std: + type: linear_decay + num_epochs: 100 + start: 0.1 + end: 0.01 + +depth_weight_map: + angle_std: 10.0 + dist_std: 0.25 + depth_std: 0.5 + +color_lookup_loss: + type: mae + weight: 0.25 + wait_iters: 0 + +color_loss: + type: mae + weight: 0.5 + wait_iters: 10000 + +depth_loss: + type: mae + weight: 0.25 + wait_iters: 0 + +param: + n_dims: 6 + fn: pluecker + +pe: + n_freqs: 8 + wait_iters: 0 + max_freq_iter: 50000 + exclude_identity: False + +net: + depth: 8 + hidden_channels: 256 + skips: [4] + activation: identity + +optimizer: + optimizer: adam + lr: 0.001 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 500 + decay_gamma: 0.1 diff --git a/nlf/conf/experiment/regularizers/ray_depth_blending/default.yaml b/nlf/conf/experiment/regularizers/ray_depth_blending/default.yaml new file mode 100644 index 0000000..bac64e2 --- /dev/null +++ b/nlf/conf/experiment/regularizers/ray_depth_blending/default.yaml @@ -0,0 +1,86 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + + +type: ray_depth_blending +ray_chunk: 32768 +batch_size: 4096 +use_inp_freq: 0 + +use_depth_embedding: False +use_color_embedding: True + +dataset: + name: random_view + num_views: 8 + +warmup_iters: 1000 + +weight: + type: exponential_decay + start: 1.0 + decay: 0.5 + num_epochs: 500 + +lookup: + angle_std: 5.0 + dist_std: 0.1 + +color_loss: + type: mae + weight: 0.5 + + angle_std: 5.0 + dist_std: 0.05 + +depth_loss: + type: mae + weight: 0.5 + + angle_std: 5.0 + dist_std: 0.05 + +range: + pos: 1.0 + dir: 1.0 + +jitter: + pos: 0.1 + dir: 0.1 + +param: + n_dims: 6 + fn: pluecker + +pe: + n_freqs: 10 + warmup_iters: 0 + max_freq_iter: 120000 + exclude_identity: False + +net: + depth: 8 + hidden_channels: 256 + skips: [4] + activation: 'identity' + +optimizer: + optimizer: adam + lr: 0.001 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multipler: 1.0 + warmup_epochs: 0 + + decay_epoch: 500 + decay_gamma: 0.5 diff --git a/nlf/conf/experiment/regularizers/ray_depth_occ_dir/default.yaml b/nlf/conf/experiment/regularizers/ray_depth_occ_dir/default.yaml new file mode 100644 index 0000000..880a123 --- /dev/null +++ b/nlf/conf/experiment/regularizers/ray_depth_occ_dir/default.yaml @@ -0,0 +1,103 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: ray_depth_occ_dir +ray_chunk: 131072 +net_chunk: 131072 +batch_size: 4096 + +wait_iters: 1000 +warmup_iters: 1000 +use_inp_freq: 2 + +dataset: + name: random_view + num_views: all + +weight: + type: exponential_decay + start: 0.25 + decay: 0.1 + num_epochs: 500 + +use_disparity: False +num_features: 128 +num_samples: 2 + +range: + pos: 1.0 + dir: 1.0 + +jitter: + pos: 0.25 + dir: 0.25 + bundle_size: 1 + +lookup_weight_map: + angle_std: 10.0 + dist_std: 0.25 + +color_weight_map: + angle_std: 5.0 + dist_std: 0.1 + +depth_weight_map: + angle_std: 5.0 + dist_std: 0.1 + +color_lookup_loss: + type: mae + weight: 0.1 + wait_iters: 0 + +color_loss: + type: mae + weight: 0.5 + wait_iters: 0 + +depth_loss: + type: mae + weight: 0.25 + wait_iters: 0 + +occ_loss: + type: mae + weight: 0.05 + wait_iters: 0 + +param: + n_dims: 6 + fn: pluecker + +pe: + n_freqs: 4 + wait_iters: 0 + max_freq_iter: 0 + exclude_identity: False + +net: + depth: 8 + hidden_channels: 256 + skips: [4] + activation: identity + +optimizer: + optimizer: adam + lr: 0.001 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 500 + decay_gamma: 0.1 diff --git a/nlf/conf/experiment/regularizers/ray_interpolation/default.yaml b/nlf/conf/experiment/regularizers/ray_interpolation/default.yaml new file mode 100644 index 0000000..58d6775 --- /dev/null +++ b/nlf/conf/experiment/regularizers/ray_interpolation/default.yaml @@ -0,0 +1,65 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: ray_interpolation +ray_chunk: 32768 +net_chunk: 32768 +batch_size: 4096 + +warmup_iters: 0 +use_inp_freq: 0 + +dataset: + name: random_view + num_views: 8 + +weight: + type: exponential_decay + start: 1.0 + decay: 1.0 + num_epochs: 500 + +use_disparity: True +points_per_view: 8 + +range: + pos: 1.0 + dir: 1.0 + +color_loss: + type: mse + weight: 1.0 + warmup_iters: 0 + + angle_std: 25.0 + dist_std: 0.5 + +blending_net: + type: base + depth: 8 + hidden_channels: 256 + skips: [4] + activation: sigmoid + #activation: softmax + #activation: identity + +optimizer: + optimizer: adam + lr: 0.001 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multipler: 1.0 + warmup_epochs: 0 + + decay_epoch: 500 + decay_gamma: 0.5 diff --git a/nlf/conf/experiment/regularizers/render_weight/default.yaml b/nlf/conf/experiment/regularizers/render_weight/default.yaml new file mode 100644 index 0000000..46b6b42 --- /dev/null +++ b/nlf/conf/experiment/regularizers/render_weight/default.yaml @@ -0,0 +1,23 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: render_weight +ray_chunk: 32768 +batch_size: 8192 + +wait_epochs: 0 +window_epochs: 1.5 + +num_points: -1 + +weight: + type: exponential_decay + start: 0.1 + decay: 0.125 + num_epochs: 100 diff --git a/nlf/conf/experiment/regularizers/render_weight/entropy.yaml b/nlf/conf/experiment/regularizers/render_weight/entropy.yaml new file mode 100644 index 0000000..23db389 --- /dev/null +++ b/nlf/conf/experiment/regularizers/render_weight/entropy.yaml @@ -0,0 +1,21 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: render_weight +ray_chunk: 32768 +batch_size: 8192 + +num_points: -1 +window_epochs: 5 + +weight: + type: exponential_decay + start: 0.001 + decay: 0.125 + num_epochs: 20 diff --git a/nlf/conf/experiment/regularizers/teacher/default.yaml b/nlf/conf/experiment/regularizers/teacher/default.yaml new file mode 100644 index 0000000..e7070fb --- /dev/null +++ b/nlf/conf/experiment/regularizers/teacher/default.yaml @@ -0,0 +1,33 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: teacher +batch_size: 4096 + +dataset: + name: "dense_${experiment.dataset.name}" + collection: "${experiment.dataset.collection}_teacher" + split: train + root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}_teacher/" + + num_rows: 10 + num_cols: 10 + train_row_skip: 1 + train_col_skip: 1 + size: 1000 + +weight: + type: exponential_decay + start: 1.0 + decay: 1.0 + stop_weight: 0.025 + num_epochs: 500 + +loss: + type: mse diff --git a/nlf/conf/experiment/regularizers/teacher_model/default.yaml b/nlf/conf/experiment/regularizers/teacher_model/default.yaml new file mode 100644 index 0000000..8bd6ce9 --- /dev/null +++ b/nlf/conf/experiment/regularizers/teacher_model/default.yaml @@ -0,0 +1,31 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: teacher_model +ray_chunk: 32768 +batch_size: 16384 + +weight: + type: exponential_decay + start: 1.0 + decay: 0.25 + num_epochs: 50 + +model_ckpt_path: shiny_lab_temp/last.ckpt +model_start_epoch: 100 + +origin_range: [[-2.0, -2.0, -1.0], [2.0, 2.0, -1.0]] +#origin_range: [[-1.0, -1.0, -1.0], [1.0, 1.0, -1.0]] +direction_range: [[-0.5, -0.5, 2.0], [0.5, 0.5, 2.0]] + +use_ndc: True +convert_ndc: False + +defaults: + - model: ../../../model/shiny_z_plane diff --git a/nlf/conf/experiment/regularizers/tensor_tv/l1.yaml b/nlf/conf/experiment/regularizers/tensor_tv/l1.yaml new file mode 100644 index 0000000..786be90 --- /dev/null +++ b/nlf/conf/experiment/regularizers/tensor_tv/l1.yaml @@ -0,0 +1,25 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: tensor_tv +ray_chunk: 32768 +batch_size: 8192 + +weight: + type: exponential_decay + start: 0.00005 + decay: 0.25 + num_epochs: 100 + +use_tv: False +opacity_weight: 1.0 +color_weight: 0.1 + +skip_row: -1 +skip_col: -1 diff --git a/nlf/conf/experiment/regularizers/tensor_tv/tv.yaml b/nlf/conf/experiment/regularizers/tensor_tv/tv.yaml new file mode 100644 index 0000000..ada2edc --- /dev/null +++ b/nlf/conf/experiment/regularizers/tensor_tv/tv.yaml @@ -0,0 +1,22 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: tensor_tv +ray_chunk: 32768 +batch_size: 8192 + +weight: + type: exponential_decay + start: 0.05 + decay: 0.25 + num_epochs: 100 + +use_tv: True +opacity_weight: 1.0 +color_weight: 0.1 diff --git a/nlf/conf/experiment/regularizers/tensor_tv/tv_subdivided.yaml b/nlf/conf/experiment/regularizers/tensor_tv/tv_subdivided.yaml new file mode 100644 index 0000000..c16a147 --- /dev/null +++ b/nlf/conf/experiment/regularizers/tensor_tv/tv_subdivided.yaml @@ -0,0 +1,26 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: tensor_tv +ray_chunk: 32768 +batch_size: 8192 + +weight: + type: exponential_decay + start: 0.05 + decay: 0.25 + num_epochs: 100 + +use_tv: True +opacity_weight: 1.0 +color_weight: 0.1 + + +skip_row: -1 +skip_col: -1 diff --git a/nlf/conf/experiment/regularizers/tensorf/l1_2000.yaml b/nlf/conf/experiment/regularizers/tensorf/l1_2000.yaml new file mode 100644 index 0000000..64c820a --- /dev/null +++ b/nlf/conf/experiment/regularizers/tensorf/l1_2000.yaml @@ -0,0 +1,29 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: tensorf +ray_chunk: 32768 +batch_size: 8192 + +weight: + type: exponential_decay + start: 1.0 + decay: 1.0 + num_epochs: 100 + +update_AlphaMask_list: [2000,4000] +lr_decay_target_ratio: 0.1 +n_iters: 30000 + +L1_weight_initial: 8e-5 +L1_weight_rest: 4e-5 +lr_upsample_reset: True + +TV_weight_density: 0.0 +TV_weight_app: 0.0 diff --git a/nlf/conf/experiment/regularizers/tensorf/l1_4000.yaml b/nlf/conf/experiment/regularizers/tensorf/l1_4000.yaml new file mode 100644 index 0000000..b52e887 --- /dev/null +++ b/nlf/conf/experiment/regularizers/tensorf/l1_4000.yaml @@ -0,0 +1,29 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: tensorf +ray_chunk: 32768 +batch_size: 8192 + +weight: + type: exponential_decay + start: 1.0 + decay: 1.0 + num_epochs: 100 + +update_AlphaMask_list: [4000,8000] +lr_decay_target_ratio: 0.1 +n_iters: 30000 + +L1_weight_initial: 8e-5 +L1_weight_rest: 4e-5 +lr_upsample_reset: True + +TV_weight_density: 0.0 +TV_weight_app: 0.0 diff --git a/nlf/conf/experiment/regularizers/tensorf/tv.yaml b/nlf/conf/experiment/regularizers/tensorf/tv.yaml new file mode 100644 index 0000000..25a8d69 --- /dev/null +++ b/nlf/conf/experiment/regularizers/tensorf/tv.yaml @@ -0,0 +1,34 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: tensorf +ray_chunk: 32768 +batch_size: 8192 + +weight: + type: exponential_decay + start: 1.0 + decay: 1.0 + num_epochs: 100 + +update_AlphaMask_list: [2000,4000] +lr_decay_target_ratio: 0.1 +n_iters: 30000 + +lr_upsample_reset: True + +L1_weight_initial: 8e-5 +L1_weight_rest: 4e-5 +TV_weight_density: 0.05 +TV_weight_app: 0.05 + +#L1_weight_initial: 0 +#L1_weight_rest: 0 +#TV_weight_density: 0.1 +#TV_weight_app: 0.1 diff --git a/nlf/conf/experiment/regularizers/tensorf/tv_2000.yaml b/nlf/conf/experiment/regularizers/tensorf/tv_2000.yaml new file mode 100644 index 0000000..3b996ce --- /dev/null +++ b/nlf/conf/experiment/regularizers/tensorf/tv_2000.yaml @@ -0,0 +1,29 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: tensorf +ray_chunk: 32768 +batch_size: 8192 + +weight: + type: exponential_decay + start: 1.0 + decay: 1.0 + num_epochs: 100 + +update_AlphaMask_list: [2000,4000] +lr_decay_target_ratio: 0.1 +n_iters: 30000 + +lr_upsample_reset: True + +L1_weight_initial: 8e-5 +L1_weight_rest: 4e-5 +TV_weight_density: 0.05 +TV_weight_app: 0.05 diff --git a/nlf/conf/experiment/regularizers/tensorf/tv_4000.yaml b/nlf/conf/experiment/regularizers/tensorf/tv_4000.yaml new file mode 100644 index 0000000..ee6f296 --- /dev/null +++ b/nlf/conf/experiment/regularizers/tensorf/tv_4000.yaml @@ -0,0 +1,30 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: tensorf +ray_chunk: 32768 +batch_size: 8192 + +weight: + type: exponential_decay + start: 1.0 + decay: 1.0 + num_epochs: 100 + +update_AlphaMask_list: [4000,8000] +lr_decay_target_ratio: 0.1 +n_iters: 30000 +#total_num_tv_iters: 60000 + +lr_upsample_reset: True + +L1_weight_initial: 8e-5 +L1_weight_rest: 4e-5 +TV_weight_density: 0.05 +TV_weight_app: 0.05 diff --git a/nlf/conf/experiment/regularizers/tensorf/tv_4000_donerf.yaml b/nlf/conf/experiment/regularizers/tensorf/tv_4000_donerf.yaml new file mode 100644 index 0000000..ee6f296 --- /dev/null +++ b/nlf/conf/experiment/regularizers/tensorf/tv_4000_donerf.yaml @@ -0,0 +1,30 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: tensorf +ray_chunk: 32768 +batch_size: 8192 + +weight: + type: exponential_decay + start: 1.0 + decay: 1.0 + num_epochs: 100 + +update_AlphaMask_list: [4000,8000] +lr_decay_target_ratio: 0.1 +n_iters: 30000 +#total_num_tv_iters: 60000 + +lr_upsample_reset: True + +L1_weight_initial: 8e-5 +L1_weight_rest: 4e-5 +TV_weight_density: 0.05 +TV_weight_app: 0.05 diff --git a/nlf/conf/experiment/regularizers/tensorf/tv_4000_immersive.yaml b/nlf/conf/experiment/regularizers/tensorf/tv_4000_immersive.yaml new file mode 100644 index 0000000..ee6f296 --- /dev/null +++ b/nlf/conf/experiment/regularizers/tensorf/tv_4000_immersive.yaml @@ -0,0 +1,30 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: tensorf +ray_chunk: 32768 +batch_size: 8192 + +weight: + type: exponential_decay + start: 1.0 + decay: 1.0 + num_epochs: 100 + +update_AlphaMask_list: [4000,8000] +lr_decay_target_ratio: 0.1 +n_iters: 30000 +#total_num_tv_iters: 60000 + +lr_upsample_reset: True + +L1_weight_initial: 8e-5 +L1_weight_rest: 4e-5 +TV_weight_density: 0.05 +TV_weight_app: 0.05 diff --git a/nlf/conf/experiment/regularizers/tensorf/tv_4000_large.yaml b/nlf/conf/experiment/regularizers/tensorf/tv_4000_large.yaml new file mode 100644 index 0000000..6f812af --- /dev/null +++ b/nlf/conf/experiment/regularizers/tensorf/tv_4000_large.yaml @@ -0,0 +1,30 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: tensorf +ray_chunk: 32768 +batch_size: 8192 + +weight: + type: exponential_decay + start: 1.0 + decay: 1.0 + num_epochs: 100 + +update_AlphaMask_list: [] +lr_decay_target_ratio: 0.1 +n_iters: 20000 +total_num_tv_iters: 60000 + +lr_upsample_reset: True + +L1_weight_initial: 0.0 +L1_weight_rest: 0.0 +TV_weight_density: 0.25 +TV_weight_app: 0.25 diff --git a/nlf/conf/experiment/regularizers/tensorf/tv_4000_large_small.yaml b/nlf/conf/experiment/regularizers/tensorf/tv_4000_large_small.yaml new file mode 100644 index 0000000..6f812af --- /dev/null +++ b/nlf/conf/experiment/regularizers/tensorf/tv_4000_large_small.yaml @@ -0,0 +1,30 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: tensorf +ray_chunk: 32768 +batch_size: 8192 + +weight: + type: exponential_decay + start: 1.0 + decay: 1.0 + num_epochs: 100 + +update_AlphaMask_list: [] +lr_decay_target_ratio: 0.1 +n_iters: 20000 +total_num_tv_iters: 60000 + +lr_upsample_reset: True + +L1_weight_initial: 0.0 +L1_weight_rest: 0.0 +TV_weight_density: 0.25 +TV_weight_app: 0.25 diff --git a/nlf/conf/experiment/regularizers/tensorf/tv_4000_llff.yaml b/nlf/conf/experiment/regularizers/tensorf/tv_4000_llff.yaml new file mode 100644 index 0000000..1b967a4 --- /dev/null +++ b/nlf/conf/experiment/regularizers/tensorf/tv_4000_llff.yaml @@ -0,0 +1,30 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: tensorf +ray_chunk: 32768 +batch_size: 8192 + +weight: + type: exponential_decay + start: 1.0 + decay: 1.0 + num_epochs: 100 + +update_AlphaMask_list: [] +lr_decay_target_ratio: 0.1 +n_iters: 20000 +total_num_tv_iters: 60000 + +lr_upsample_reset: True + +L1_weight_initial: 0.0 +L1_weight_rest: 0.0 +TV_weight_density: 1.0 +TV_weight_app: 1.0 diff --git a/nlf/conf/experiment/regularizers/tensorf/tv_4000_many.yaml b/nlf/conf/experiment/regularizers/tensorf/tv_4000_many.yaml new file mode 100644 index 0000000..238fb1f --- /dev/null +++ b/nlf/conf/experiment/regularizers/tensorf/tv_4000_many.yaml @@ -0,0 +1,7 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT diff --git a/nlf/conf/experiment/regularizers/tensorf/tv_4000_no_app.yaml b/nlf/conf/experiment/regularizers/tensorf/tv_4000_no_app.yaml new file mode 100644 index 0000000..c0b14a7 --- /dev/null +++ b/nlf/conf/experiment/regularizers/tensorf/tv_4000_no_app.yaml @@ -0,0 +1,29 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: tensorf +ray_chunk: 32768 +batch_size: 8192 + +weight: + type: exponential_decay + start: 1.0 + decay: 1.0 + num_epochs: 100 + +update_AlphaMask_list: [4000,8000] +lr_decay_target_ratio: 0.1 +n_iters: 30000 + +lr_upsample_reset: True + +L1_weight_initial: 8e-5 +L1_weight_rest: 4e-5 +TV_weight_density: 0.05 +TV_weight_app: 0.0 diff --git a/nlf/conf/experiment/regularizers/tensorf/tv_4000_small.yaml b/nlf/conf/experiment/regularizers/tensorf/tv_4000_small.yaml new file mode 100644 index 0000000..ee6f296 --- /dev/null +++ b/nlf/conf/experiment/regularizers/tensorf/tv_4000_small.yaml @@ -0,0 +1,30 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: tensorf +ray_chunk: 32768 +batch_size: 8192 + +weight: + type: exponential_decay + start: 1.0 + decay: 1.0 + num_epochs: 100 + +update_AlphaMask_list: [4000,8000] +lr_decay_target_ratio: 0.1 +n_iters: 30000 +#total_num_tv_iters: 60000 + +lr_upsample_reset: True + +L1_weight_initial: 8e-5 +L1_weight_rest: 4e-5 +TV_weight_density: 0.05 +TV_weight_app: 0.05 diff --git a/nlf/conf/experiment/regularizers/tensorf/tv_4000_stanford_llff.yaml b/nlf/conf/experiment/regularizers/tensorf/tv_4000_stanford_llff.yaml new file mode 100644 index 0000000..8010378 --- /dev/null +++ b/nlf/conf/experiment/regularizers/tensorf/tv_4000_stanford_llff.yaml @@ -0,0 +1,29 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: tensorf +ray_chunk: 32768 +batch_size: 8192 + +weight: + type: exponential_decay + start: 1.0 + decay: 1.0 + num_epochs: 100 + +update_AlphaMask_list: [20000,24000] +lr_decay_target_ratio: 0.1 +n_iters: 30000 + +lr_upsample_reset: True + +L1_weight_initial: 8e-5 +L1_weight_rest: 4e-5 +TV_weight_density: 0.05 +TV_weight_app: 0.05 diff --git a/nlf/conf/experiment/regularizers/tensorf/tv_4000_technicolor.yaml b/nlf/conf/experiment/regularizers/tensorf/tv_4000_technicolor.yaml new file mode 100644 index 0000000..d2d6d44 --- /dev/null +++ b/nlf/conf/experiment/regularizers/tensorf/tv_4000_technicolor.yaml @@ -0,0 +1,30 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: tensorf +ray_chunk: 32768 +batch_size: 8192 + +weight: + type: exponential_decay + start: 1.0 + decay: 1.0 + num_epochs: 100 + +update_AlphaMask_list: [4000,8000] +lr_decay_target_ratio: 0.1 +n_iters: 30000 +total_num_tv_iters: 60000 + +lr_upsample_reset: True + +L1_weight_initial: 8e-5 +L1_weight_rest: 4e-5 +TV_weight_density: 0.0125 +TV_weight_app: 0.0125 diff --git a/nlf/conf/experiment/regularizers/tensorf/tv_8000.yaml b/nlf/conf/experiment/regularizers/tensorf/tv_8000.yaml new file mode 100644 index 0000000..9834112 --- /dev/null +++ b/nlf/conf/experiment/regularizers/tensorf/tv_8000.yaml @@ -0,0 +1,29 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: tensorf +ray_chunk: 32768 +batch_size: 8192 + +weight: + type: exponential_decay + start: 1.0 + decay: 1.0 + num_epochs: 100 + +update_AlphaMask_list: [8000,16000] +lr_decay_target_ratio: 0.1 +n_iters: 60000 + +lr_upsample_reset: True + +L1_weight_initial: 8e-5 +L1_weight_rest: 4e-5 +TV_weight_density: 0.05 +TV_weight_app: 0.05 diff --git a/nlf/conf/experiment/regularizers/tensorf/tv_donerf.yaml b/nlf/conf/experiment/regularizers/tensorf/tv_donerf.yaml new file mode 100644 index 0000000..b8c4611 --- /dev/null +++ b/nlf/conf/experiment/regularizers/tensorf/tv_donerf.yaml @@ -0,0 +1,34 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: tensorf +ray_chunk: 32768 +batch_size: 8192 + +weight: + type: exponential_decay + start: 1.0 + decay: 1.0 + num_epochs: 100 + +update_AlphaMask_list: [4000,8000] +lr_decay_target_ratio: 0.1 +n_iters: 30000 + +lr_upsample_reset: True + +L1_weight_initial: 8e-5 +L1_weight_rest: 4e-5 +TV_weight_density: 0.05 +TV_weight_app: 0.05 + +#L1_weight_initial: 0 +#L1_weight_rest: 0 +#TV_weight_density: 0.1 +#TV_weight_app: 0.1 diff --git a/nlf/conf/experiment/regularizers/tensorf/tv_shiny.yaml b/nlf/conf/experiment/regularizers/tensorf/tv_shiny.yaml new file mode 100644 index 0000000..07064dd --- /dev/null +++ b/nlf/conf/experiment/regularizers/tensorf/tv_shiny.yaml @@ -0,0 +1,29 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: tensorf +ray_chunk: 32768 +batch_size: 8192 + +weight: + type: exponential_decay + start: 1.0 + decay: 1.0 + num_epochs: 100 + +update_AlphaMask_list: [2000,4000] +lr_decay_target_ratio: 0.1 +n_iters: 30000 + +lr_upsample_reset: True + +L1_weight_initial: 8e-5 +L1_weight_rest: 4e-5 +TV_weight_density: 0.25 +TV_weight_app: 0.05 diff --git a/nlf/conf/experiment/regularizers/tensorf/tv_video3d.yaml b/nlf/conf/experiment/regularizers/tensorf/tv_video3d.yaml new file mode 100644 index 0000000..bb5008c --- /dev/null +++ b/nlf/conf/experiment/regularizers/tensorf/tv_video3d.yaml @@ -0,0 +1,36 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: tensorf +ray_chunk: 32768 +batch_size: 8192 + +weight: + type: exponential_decay + start: 1.0 + decay: 1.0 + num_epochs: 100 + +update_AlphaMask_list: [2000,4000] +lr_decay_target_ratio: 0.1 +n_iters: 30000 + +lr_upsample_reset: True + +L1_weight_initial: 8e-5 +L1_weight_rest: 4e-5 +#L1_weight_initial: 4e-5 +#L1_weight_rest: 2e-5 +TV_weight_density: 0.05 +TV_weight_app: 0.05 + +#L1_weight_initial: 0 +#L1_weight_rest: 0 +#TV_weight_density: 0.1 +#TV_weight_app: 0.1 diff --git a/nlf/conf/experiment/regularizers/voxel_sparsity/default.yaml b/nlf/conf/experiment/regularizers/voxel_sparsity/default.yaml new file mode 100644 index 0000000..e6a0b31 --- /dev/null +++ b/nlf/conf/experiment/regularizers/voxel_sparsity/default.yaml @@ -0,0 +1,22 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: voxel_sparsity +ray_chunk: 32768 +batch_size: 32768 +use_inp_freq: inf + +weight: + type: exponential_decay + start: 0.01 + decay: 0.5 + num_epochs: 500 + +loss: + type: mse diff --git a/nlf/conf/experiment/regularizers/warp_level/lf.yaml b/nlf/conf/experiment/regularizers/warp_level/lf.yaml new file mode 100644 index 0000000..b19aa89 --- /dev/null +++ b/nlf/conf/experiment/regularizers/warp_level/lf.yaml @@ -0,0 +1,54 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: warp_level +ray_chunk: 32768 +batch_size: 8192 +use_inp_freq: inf +wait_iters: 10000 + +weight: + type: exponential_decay + start: 0.1 + decay: 1.0 + num_epochs: 500 + +dataset: + name: random_lightfield + st_plane: -1.0 + uv_plane: 0.0 + st_scale: + +color_loss: + type: mae + weight: 0.25 + wait_iters: 0 + +svd_loss: + type: mae + weight: 0.0 + wait_iters: 0 + +level_loss: + type: mae + weight: 0.25 + wait_iters: 0 + +range: + pos: 1.0 + dir: 1.0 + +jitter: + pos: 0.1 + dir: 0.1 + bundle_size: 1 + +param: + n_dims: 4 + fn: two_plane diff --git a/nlf/conf/experiment/regularizers/warp_level/subdivided.yaml b/nlf/conf/experiment/regularizers/warp_level/subdivided.yaml new file mode 100644 index 0000000..cb40bb2 --- /dev/null +++ b/nlf/conf/experiment/regularizers/warp_level/subdivided.yaml @@ -0,0 +1,54 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: warp_level +ray_chunk: 32768 +batch_size: 4096 +use_inp_freq: inf +wait_iters: 10000 + +weight: + type: exponential_decay + start: 0.1 + decay: 1.0 + num_epochs: 500 + +dataset: + name: random_lightfield + st_plane: -1.0 + uv_plane: 0.0 + st_scale: + +color_loss: + type: mae + weight: 0.25 + wait_iters: 0 + +svd_loss: + type: mae + weight: 0.0 + wait_iters: 0 + +level_loss: + type: mae + weight: 0.25 + wait_iters: 0 + +range: + pos: 1.0 + dir: 1.0 + +jitter: + pos: 0.1 + dir: 0.1 + bundle_size: 1 + +param: + n_dims: 4 + fn: two_plane diff --git a/nlf/conf/experiment/regularizers/weak_teacher/lf.yaml b/nlf/conf/experiment/regularizers/weak_teacher/lf.yaml new file mode 100644 index 0000000..d597b4a --- /dev/null +++ b/nlf/conf/experiment/regularizers/weak_teacher/lf.yaml @@ -0,0 +1,33 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: teacher +batch_size: 8192 + +dataset: + name: "dense_${experiment.dataset.name}" + collection: "${experiment.dataset.collection}_dense" + split: train + root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}_teacher/" + + num_rows: 10 + num_cols: 10 + train_row_skip: 1 + train_col_skip: 1 + size: 1000 + +weight: + type: exponential_decay + start: 1.0 + decay: 0.25 + stop_weight: 0.025 + num_epochs: 500 + +loss: + type: mse diff --git a/nlf/conf/experiment/regularizers/weak_teacher/subdivided.yaml b/nlf/conf/experiment/regularizers/weak_teacher/subdivided.yaml new file mode 100644 index 0000000..9d335c4 --- /dev/null +++ b/nlf/conf/experiment/regularizers/weak_teacher/subdivided.yaml @@ -0,0 +1,33 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: teacher +batch_size: 4096 + +dataset: + name: "dense_${experiment.dataset.name}" + collection: "${experiment.dataset.collection}_dense" + split: train + root_dir: "${experiment.params.data_dir}/${experiment.dataset.data_subdir}/${experiment.dataset.collection}_teacher/" + + num_rows: 10 + num_cols: 10 + train_row_skip: 1 + train_col_skip: 1 + size: 1000 + +weight: + type: exponential_decay + start: 1.0 + decay: 0.25 + stop_weight: 0.025 + num_epochs: 500 + +loss: + type: mse diff --git a/nlf/conf/experiment/training/bom_tensorf.yaml b/nlf/conf/experiment/training/bom_tensorf.yaml new file mode 100644 index 0000000..afbaeff --- /dev/null +++ b/nlf/conf/experiment/training/bom_tensorf.yaml @@ -0,0 +1,111 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +batch_size: 16384 +ray_chunk: 16384 +net_chunk: 16384 +render_ray_chunk: 16384 + +update_data_every: 1 +reload_data_every: 100 + +log_every: 1 +val_every: 10 +render_every: 40 +ckpt_every: 20 +test_every: 20 +flush_logs: 1000 + +num_workers: 16 +num_gpus: 1 + +multiscale: False +scales: [2, 1] +scale_epochs: [0, 50] +scale_batch_sizes: [4096, 32768] + +weight_init: + type: none + +loss: + type: mse + +# TensoRF +sample_with_replacement: True +num_iters: 4000 +num_epochs: 20 + +optimizers: + color: + optimizer: adam + lr: 0.02 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 100 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] + + color_impl: + optimizer: adam + lr: 0.001 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 100 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] + + embedding: + optimizer: adam + lr: 0.01 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 100 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] + + embedding_impl: + optimizer: adam + lr: 0.00075 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 100 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] diff --git a/nlf/conf/experiment/training/catacaustics_tensorf.yaml b/nlf/conf/experiment/training/catacaustics_tensorf.yaml new file mode 100644 index 0000000..497722a --- /dev/null +++ b/nlf/conf/experiment/training/catacaustics_tensorf.yaml @@ -0,0 +1,110 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +batch_size: 16384 +ray_chunk: 65536 +net_chunk: 65536 +render_ray_chunk: 65536 + +update_data_every: 1 +reload_data_every: 100 + +log_every: 1 +val_every: 100 +render_every: 1000 +ckpt_every: 100 +test_every: 200 +flush_logs: 1000 + +num_workers: 16 +num_gpus: 1 + +multiscale: False +scales: [2, 1] +scale_epochs: [0, 50] +scale_batch_sizes: [4096, 32768] + +weight_init: + type: none + +loss: + type: mse + +# TensoRF +sample_with_replacement: True +num_iters: 4000 + +optimizers: + color: + optimizer: adam + lr: 0.02 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 100 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] + + color_impl: + optimizer: adam + lr: 0.001 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 100 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] + + embedding: + optimizer: adam + lr: 0.01 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 50 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] + + embedding_impl: + optimizer: adam + lr: 0.00075 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 50 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] diff --git a/nlf/conf/experiment/training/default.yaml b/nlf/conf/experiment/training/default.yaml new file mode 100644 index 0000000..37e0933 --- /dev/null +++ b/nlf/conf/experiment/training/default.yaml @@ -0,0 +1,62 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +batch_size: 4096 +ray_chunk: 131072 +net_chunk: 131072 + +update_data_every: 1 +reload_data_every: 100 + +log_every: 1 +val_every: 25 +render_every: 100 +ckpt_every: 100 +test_every: 200 +flush_logs: 1000 +num_epochs: 5000 + +num_workers: 16 +num_gpus: 1 + +weight_init: + type: none + +loss: + type: mse + +color: + optimizer: adam + lr: 0.001 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: steplr + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 2000 + decay_gamma: 0.5 + +embedding: + optimizer: adam + lr: 0.0005 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 500 + decay_gamma: 0.1 diff --git a/nlf/conf/experiment/training/donerf_tensorf.yaml b/nlf/conf/experiment/training/donerf_tensorf.yaml new file mode 100644 index 0000000..e321697 --- /dev/null +++ b/nlf/conf/experiment/training/donerf_tensorf.yaml @@ -0,0 +1,111 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +batch_size: 16384 +ray_chunk: 16384 +net_chunk: 16384 +render_ray_chunk: 16384 + +update_data_every: 1 +reload_data_every: 100 + +log_every: 1 +val_every: 10 +render_every: 50 +ckpt_every: 20 +test_every: 50 +flush_logs: 1000 + +num_workers: 16 +num_gpus: 1 + +multiscale: False +scales: [2, 1] +scale_epochs: [0, 50] +scale_batch_sizes: [4096, 32768] + +weight_init: + type: none + +loss: + type: mse + +# TensoRF +sample_with_replacement: True +num_iters: 4000 +num_epochs: 80 + +optimizers: + color: + optimizer: adam + lr: 0.025 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 100 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] + + color_impl: + optimizer: adam + lr: 0.001 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 100 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] + + embedding: + optimizer: adam + lr: 0.01 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 50 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] + + embedding_impl: + optimizer: adam + lr: 0.00075 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 50 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] diff --git a/nlf/conf/experiment/training/eikonal_tensorf.yaml b/nlf/conf/experiment/training/eikonal_tensorf.yaml new file mode 100644 index 0000000..81676a9 --- /dev/null +++ b/nlf/conf/experiment/training/eikonal_tensorf.yaml @@ -0,0 +1,110 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +batch_size: 4096 +ray_chunk: 16384 +net_chunk: 16384 +render_ray_chunk: 16384 + +update_data_every: 1 +reload_data_every: 100 + +log_every: 1 +val_every: 100 +render_every: 1000 +ckpt_every: 100 +test_every: 200 +flush_logs: 1000 + +num_workers: 16 +num_gpus: 1 + +multiscale: False +scales: [2, 1] +scale_epochs: [0, 50] +scale_batch_sizes: [4096, 32768] + +weight_init: + type: none + +loss: + type: mse + +# TensoRF +sample_with_replacement: True +num_iters: 4000 + +optimizers: + color: + optimizer: adam + lr: 0.02 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 100 + decay_gamma: 0.25 + + reset_opt_list: [2000, 3000, 4000, 5500, 7000] + + color_impl: + optimizer: adam + lr: 0.001 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 100 + decay_gamma: 0.25 + + reset_opt_list: [2000, 3000, 4000, 5500, 7000] + + embedding: + optimizer: adam + lr: 0.01 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 50 + decay_gamma: 0.125 + + #reset_opt_list: [2000, 3000, 4000, 5500, 7000] + + embedding_impl: + optimizer: adam + lr: 0.0005 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 50 + decay_gamma: 0.125 + + #reset_opt_list: [2000, 3000, 4000, 5500, 7000] diff --git a/nlf/conf/experiment/training/immersive_tensorf.yaml b/nlf/conf/experiment/training/immersive_tensorf.yaml new file mode 100644 index 0000000..e42a9cf --- /dev/null +++ b/nlf/conf/experiment/training/immersive_tensorf.yaml @@ -0,0 +1,111 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +batch_size: 5000 +ray_chunk: 5000 +net_chunk: 5000 +render_ray_chunk: 5000 + +update_data_every: 1 +reload_data_every: 100 + +log_every: 1 +val_every: 10 +render_every: 40 +ckpt_every: 20 +test_every: 20 +flush_logs: 1000 + +num_workers: 16 +num_gpus: 1 + +multiscale: False +scales: [2, 1] +scale_epochs: [0, 50] +scale_batch_sizes: [4096, 32768] + +weight_init: + type: none + +loss: + type: mse + +# TensoRF +sample_with_replacement: True +num_iters: 4000 +num_epochs: 20 + +optimizers: + color: + optimizer: adam + lr: 0.02 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 100 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] + + color_impl: + optimizer: adam + lr: 0.001 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 100 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] + + embedding: + optimizer: adam + lr: 0.01 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 100 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] + + embedding_impl: + optimizer: adam + lr: 0.00075 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 100 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] diff --git a/nlf/conf/experiment/training/llff_tensorf.yaml b/nlf/conf/experiment/training/llff_tensorf.yaml new file mode 100644 index 0000000..91a5683 --- /dev/null +++ b/nlf/conf/experiment/training/llff_tensorf.yaml @@ -0,0 +1,111 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +batch_size: 16384 +ray_chunk: 65536 +net_chunk: 65536 +render_ray_chunk: 65536 + +update_data_every: 1 +reload_data_every: 100 + +log_every: 1 +val_every: 10 +render_every: 10 +ckpt_every: 10 +test_every: 10 +flush_logs: 1000 + +num_workers: 16 +num_gpus: 1 + +multiscale: False +scales: [2, 1] +scale_epochs: [0, 50] +scale_batch_sizes: [4096, 32768] + +weight_init: + type: none + +loss: + type: mse + +# TensoRF +sample_with_replacement: True +num_iters: 4000 +num_epochs: 20 + +optimizers: + color: + optimizer: adam + lr: 0.02 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 100 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] + + color_impl: + optimizer: adam + lr: 0.001 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 100 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] + + embedding: + optimizer: adam + lr: 0.01 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 50 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] + + embedding_impl: + optimizer: adam + lr: 0.00075 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 50 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] diff --git a/nlf/conf/experiment/training/neural_3d_tensorf.yaml b/nlf/conf/experiment/training/neural_3d_tensorf.yaml new file mode 100644 index 0000000..afbaeff --- /dev/null +++ b/nlf/conf/experiment/training/neural_3d_tensorf.yaml @@ -0,0 +1,111 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +batch_size: 16384 +ray_chunk: 16384 +net_chunk: 16384 +render_ray_chunk: 16384 + +update_data_every: 1 +reload_data_every: 100 + +log_every: 1 +val_every: 10 +render_every: 40 +ckpt_every: 20 +test_every: 20 +flush_logs: 1000 + +num_workers: 16 +num_gpus: 1 + +multiscale: False +scales: [2, 1] +scale_epochs: [0, 50] +scale_batch_sizes: [4096, 32768] + +weight_init: + type: none + +loss: + type: mse + +# TensoRF +sample_with_replacement: True +num_iters: 4000 +num_epochs: 20 + +optimizers: + color: + optimizer: adam + lr: 0.02 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 100 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] + + color_impl: + optimizer: adam + lr: 0.001 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 100 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] + + embedding: + optimizer: adam + lr: 0.01 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 100 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] + + embedding_impl: + optimizer: adam + lr: 0.00075 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 100 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] diff --git a/nlf/conf/experiment/training/shiny_tensorf.yaml b/nlf/conf/experiment/training/shiny_tensorf.yaml new file mode 100644 index 0000000..497722a --- /dev/null +++ b/nlf/conf/experiment/training/shiny_tensorf.yaml @@ -0,0 +1,110 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +batch_size: 16384 +ray_chunk: 65536 +net_chunk: 65536 +render_ray_chunk: 65536 + +update_data_every: 1 +reload_data_every: 100 + +log_every: 1 +val_every: 100 +render_every: 1000 +ckpt_every: 100 +test_every: 200 +flush_logs: 1000 + +num_workers: 16 +num_gpus: 1 + +multiscale: False +scales: [2, 1] +scale_epochs: [0, 50] +scale_batch_sizes: [4096, 32768] + +weight_init: + type: none + +loss: + type: mse + +# TensoRF +sample_with_replacement: True +num_iters: 4000 + +optimizers: + color: + optimizer: adam + lr: 0.02 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 100 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] + + color_impl: + optimizer: adam + lr: 0.001 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 100 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] + + embedding: + optimizer: adam + lr: 0.01 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 50 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] + + embedding_impl: + optimizer: adam + lr: 0.00075 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 50 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] diff --git a/nlf/conf/experiment/training/shiny_tensorf_small.yaml b/nlf/conf/experiment/training/shiny_tensorf_small.yaml new file mode 100644 index 0000000..70c69a6 --- /dev/null +++ b/nlf/conf/experiment/training/shiny_tensorf_small.yaml @@ -0,0 +1,127 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +batch_size: 4096 +ray_chunk: 65536 +net_chunk: 65536 +render_ray_chunk: 65536 + +update_data_every: 1 +reload_data_every: 100 + +log_every: 1 +val_every: 100 +render_every: 1000 +ckpt_every: 100 +test_every: 200 +flush_logs: 1000 + +num_workers: 16 +num_gpus: 1 + +multiscale: False +scales: [2, 1] +scale_epochs: [0, 50] +scale_batch_sizes: [4096, 32768] + +weight_init: + type: none + +loss: + type: mse + +# TensoRF +sample_with_replacement: True +num_iters: 4000 + +optimizers: + color: + optimizer: adam + lr: 0.01 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 100 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] + + color_impl: + optimizer: adam + lr: 0.001 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 100 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] + + embedding: + optimizer: adam + lr: 0.01 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 50 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] + + embedding_impl: + optimizer: adam + lr: 0.0005 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 50 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] + + calibration: + optimizer: adam + lr: 0.0005 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 25 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] diff --git a/nlf/conf/experiment/training/spaces_tensorf.yaml b/nlf/conf/experiment/training/spaces_tensorf.yaml new file mode 100644 index 0000000..91a5683 --- /dev/null +++ b/nlf/conf/experiment/training/spaces_tensorf.yaml @@ -0,0 +1,111 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +batch_size: 16384 +ray_chunk: 65536 +net_chunk: 65536 +render_ray_chunk: 65536 + +update_data_every: 1 +reload_data_every: 100 + +log_every: 1 +val_every: 10 +render_every: 10 +ckpt_every: 10 +test_every: 10 +flush_logs: 1000 + +num_workers: 16 +num_gpus: 1 + +multiscale: False +scales: [2, 1] +scale_epochs: [0, 50] +scale_batch_sizes: [4096, 32768] + +weight_init: + type: none + +loss: + type: mse + +# TensoRF +sample_with_replacement: True +num_iters: 4000 +num_epochs: 20 + +optimizers: + color: + optimizer: adam + lr: 0.02 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 100 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] + + color_impl: + optimizer: adam + lr: 0.001 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 100 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] + + embedding: + optimizer: adam + lr: 0.01 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 50 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] + + embedding_impl: + optimizer: adam + lr: 0.00075 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 50 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] diff --git a/nlf/conf/experiment/training/stanford_tensorf.yaml b/nlf/conf/experiment/training/stanford_tensorf.yaml new file mode 100644 index 0000000..18bf3de --- /dev/null +++ b/nlf/conf/experiment/training/stanford_tensorf.yaml @@ -0,0 +1,114 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +batch_size: 16384 +#ray_chunk: 3145728 +#net_chunk: 3145728 +#render_ray_chunk: 3145728 +ray_chunk: 16384 +net_chunk: 16384 +render_ray_chunk: 16384 + +update_data_every: 1 +reload_data_every: 100 + +log_every: 1 +val_every: 10 +render_every: 40 +ckpt_every: 40 +test_every: 20 +flush_logs: 1000 + +num_workers: 16 +num_gpus: 1 + +multiscale: False +scales: [2, 1] +scale_epochs: [0, 50] +scale_batch_sizes: [4096, 32768] + +weight_init: + type: none + +loss: + type: mse + +# TensoRF +sample_with_replacement: True +num_iters: 4000 +num_epochs: 40 + +optimizers: + color: + optimizer: adam + lr: 0.005 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 100 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] + + color_impl: + optimizer: adam + lr: 0.001 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 100 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] + + embedding: + optimizer: adam + lr: 0.01 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 100 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] + + embedding_impl: + optimizer: adam + lr: 0.001 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 100 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] diff --git a/nlf/conf/experiment/training/technicolor_tensorf.yaml b/nlf/conf/experiment/training/technicolor_tensorf.yaml new file mode 100644 index 0000000..fcdc66b --- /dev/null +++ b/nlf/conf/experiment/training/technicolor_tensorf.yaml @@ -0,0 +1,114 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +batch_size: 50000 +#ray_chunk: 3145728 +#net_chunk: 3145728 +#render_ray_chunk: 3145728 +ray_chunk: 50000 +net_chunk: 50000 +render_ray_chunk: 50000 + +update_data_every: 1 +reload_data_every: 100 + +log_every: 1 +val_every: 5 +render_every: 20 +ckpt_every: 5 +test_every: 5 +flush_logs: 1000 + +num_workers: 16 +num_gpus: 1 + +multiscale: False +scales: [2, 1] +scale_epochs: [0, 50] +scale_batch_sizes: [4096, 32768] + +weight_init: + type: none + +loss: + type: mse + +# TensoRF +sample_with_replacement: True +num_iters: 10000 +num_epochs: 4000 + +optimizers: + color: + optimizer: adam + lr: 0.02 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 100 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] + + color_impl: + optimizer: adam + lr: 0.001 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 100 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] + + embedding: + optimizer: adam + lr: 0.01 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 100 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] + + embedding_impl: + optimizer: adam + lr: 0.00075 + clip: False + clip_amount: 1.0 + momentum: 0.9 + weight_decay: 0 + + lr_scheduler: exp + warmup_multiplier: 1.0 + warmup_epochs: 0 + + decay_epoch: 100 + decay_gamma: 0.125 + + reset_opt_list: [4000, 6000, 8000, 10000, 12000] diff --git a/nlf/conf/experiment/visualizers/all.yaml b/nlf/conf/experiment/visualizers/all.yaml new file mode 100644 index 0000000..ea4b51e --- /dev/null +++ b/nlf/conf/experiment/visualizers/all.yaml @@ -0,0 +1,8 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ diff --git a/nlf/conf/experiment/visualizers/closest_view/default.yaml b/nlf/conf/experiment/visualizers/closest_view/default.yaml new file mode 100644 index 0000000..7ef36ac --- /dev/null +++ b/nlf/conf/experiment/visualizers/closest_view/default.yaml @@ -0,0 +1,10 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: closest_view diff --git a/nlf/conf/experiment/visualizers/embedding/default.yaml b/nlf/conf/experiment/visualizers/embedding/default.yaml new file mode 100644 index 0000000..320c2ad --- /dev/null +++ b/nlf/conf/experiment/visualizers/embedding/default.yaml @@ -0,0 +1,28 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: embedding + +run_on_test: False +save_data: False + +no_over_fields: [] + +fields: + distances: + use_abs: False + normalize: True + + point_offset: + use_abs: True + bounds: [0.0, 0.25] + + points: + use_abs: False + bounds: [-2.0, 2.0] diff --git a/nlf/conf/experiment/visualizers/embedding/default_cascaded.yaml b/nlf/conf/experiment/visualizers/embedding/default_cascaded.yaml new file mode 100644 index 0000000..13adbc4 --- /dev/null +++ b/nlf/conf/experiment/visualizers/embedding/default_cascaded.yaml @@ -0,0 +1,29 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: embedding + +run_on_test: False +save_data: False + +no_over_fields: [raw_distance] + +fields: + distances: + use_abs: False + normalize: True + + raw_distance: + use_abs: False + normalize: True + sort: True + + point_offset: + use_abs: True + bounds: [0.0, 0.25] diff --git a/nlf/conf/experiment/visualizers/embedding/default_cascaded_2.yaml b/nlf/conf/experiment/visualizers/embedding/default_cascaded_2.yaml new file mode 100644 index 0000000..26a58f1 --- /dev/null +++ b/nlf/conf/experiment/visualizers/embedding/default_cascaded_2.yaml @@ -0,0 +1,28 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: embedding + +run_on_test: False +save_data: False + +no_over_fields: [] + +fields: + distances: + use_abs: False + normalize: True + + raw_distance: + use_abs: False + normalize: True + + point_offset: + use_abs: True + bounds: [0.0, 0.25] diff --git a/nlf/conf/experiment/visualizers/embedding/default_reflect.yaml b/nlf/conf/experiment/visualizers/embedding/default_reflect.yaml new file mode 100644 index 0000000..2a36f39 --- /dev/null +++ b/nlf/conf/experiment/visualizers/embedding/default_reflect.yaml @@ -0,0 +1,34 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: embedding + +run_on_test: False +save_data: False + +no_over_fields: [] + +fields: + distances: + use_abs: False + normalize: True + + normal: + use_abs: True + normalize: False + bounds: [0.0, 1.0] + + render_normal: + use_abs: True + normalize: False + bounds: [0.0, 1.0] + + point_offset: + use_abs: True + bounds: [0.0, 0.25] diff --git a/nlf/conf/experiment/visualizers/embedding/default_time.yaml b/nlf/conf/experiment/visualizers/embedding/default_time.yaml new file mode 100644 index 0000000..1865e08 --- /dev/null +++ b/nlf/conf/experiment/visualizers/embedding/default_time.yaml @@ -0,0 +1,28 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: embedding + +run_on_test: False +save_data: False + +no_over_fields: [raw_distance, raw_flow] + +fields: + distances: + use_abs: False + normalize: True + + point_offset: + use_abs: True + bounds: [0.0, 0.25] + + spatial_flow: + use_abs: True + bounds: [0.0, 1.0] diff --git a/nlf/conf/experiment/visualizers/embedding/default_time_cascaded.yaml b/nlf/conf/experiment/visualizers/embedding/default_time_cascaded.yaml new file mode 100644 index 0000000..d38e34a --- /dev/null +++ b/nlf/conf/experiment/visualizers/embedding/default_time_cascaded.yaml @@ -0,0 +1,38 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: embedding + +run_on_test: False +save_data: False + +no_over_fields: [raw_distance, raw_flow] + +fields: + distances: + use_abs: False + normalize: True + + raw_distance: + use_abs: False + normalize: True + sort: True + + raw_flow: + use_abs: True + normalize: True + sort: True + + point_offset: + use_abs: True + bounds: [0.0, 0.25] + + spatial_flow: + use_abs: True + bounds: [0.0, 1.0] diff --git a/nlf/conf/experiment/visualizers/embedding/default_time_cascaded_2.yaml b/nlf/conf/experiment/visualizers/embedding/default_time_cascaded_2.yaml new file mode 100644 index 0000000..c3e8ad4 --- /dev/null +++ b/nlf/conf/experiment/visualizers/embedding/default_time_cascaded_2.yaml @@ -0,0 +1,36 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: embedding + +run_on_test: False +save_data: False + +no_over_fields: [] + +fields: + distances: + use_abs: False + normalize: True + + raw_distance: + use_abs: False + normalize: True + + raw_flow: + use_abs: True + normalize: True + + point_offset: + use_abs: True + bounds: [0.0, 0.25] + + spatial_flow: + use_abs: True + bounds: [0.0, 1.0] diff --git a/nlf/conf/experiment/visualizers/embedding/points.yaml b/nlf/conf/experiment/visualizers/embedding/points.yaml new file mode 100644 index 0000000..74ca0d2 --- /dev/null +++ b/nlf/conf/experiment/visualizers/embedding/points.yaml @@ -0,0 +1,20 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: embedding + +run_on_test: False +save_data: False + +no_over_fields: [] + +fields: + points: + use_abs: False + bounds: [-2.0, 2.0] diff --git a/nlf/conf/experiment/visualizers/epipolar/default.yaml b/nlf/conf/experiment/visualizers/epipolar/default.yaml new file mode 100644 index 0000000..c39f50b --- /dev/null +++ b/nlf/conf/experiment/visualizers/epipolar/default.yaml @@ -0,0 +1,19 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: epipolar +t: +v: +H: + +st_scale: +uv_scale: + +near: +far: diff --git a/nlf/conf/experiment/visualizers/focus/default.yaml b/nlf/conf/experiment/visualizers/focus/default.yaml new file mode 100644 index 0000000..d3863d5 --- /dev/null +++ b/nlf/conf/experiment/visualizers/focus/default.yaml @@ -0,0 +1,23 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: focus + +s: +t: + +ds: 200.0 +dt: 200.0 + +st_scale: +uv_scale: + +near: -1.0 +far: 0.0 +focal: 0.0 diff --git a/nlf/conf/experiment/visualizers/tensor/default.yaml b/nlf/conf/experiment/visualizers/tensor/default.yaml new file mode 100644 index 0000000..2732af6 --- /dev/null +++ b/nlf/conf/experiment/visualizers/tensor/default.yaml @@ -0,0 +1,10 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2020 Quei-An Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @package _group_ + +type: tensor diff --git a/nlf/contract.py b/nlf/contract.py new file mode 100644 index 0000000..9d2c460 --- /dev/null +++ b/nlf/contract.py @@ -0,0 +1,231 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +import numpy as np +import torch +from torch import nn + +from nlf.activations import IdentityTanh, Power, Tanh, get_activation + + +class BaseContract(nn.Module): + def __init__(self, cfg, **kwargs): + super().__init__() + + self.use_dataset_bounds = cfg.use_dataset_bounds if "use_dataset_bounds" in cfg else False + self.contract_samples = cfg.contract_samples if "contract_samples" in cfg else False + + def inverse_contract_distance(self, distance): + return distance + + def contract_distance(self, distance): + return distance + + def contract_points(self, points): + return points + + def inverse_contract_points(self, contract_points): + contract_distance = torch.norm(contract_points, dim=-1, keepdim=True) + distance = self.inverse_contract_distance(contract_distance) + return (contract_points / contract_distance) * distance + + def contract_points_and_distance(self, rays_o, points, distance): + # Contract + rays_o = self.contract_points(rays_o) + points = self.contract_points(points) + distance = torch.norm(points - rays_o[..., None, :], dim=-1, keepdim=True) + + # Return + return points, distance + + +class IdentityContract(BaseContract): + def __init__(self, cfg, **kwargs): + super().__init__(cfg) + + def contract_points_and_distance(self, rays_o, points, distance): + return points, distance + + +class BBoxContract(BaseContract): + def __init__(self, cfg, **kwargs): + super().__init__(cfg) + + self.bbox_min = torch.tensor(list(cfg.bbox_min) if "bbox_min" in cfg else [-1.0, -1.0, -1.0]).cuda() + self.bbox_max = torch.tensor(list(cfg.bbox_max) if "bbox_max" in cfg else [1.0, 1.0, 1.0]).cuda() + self.fac = torch.mean(torch.abs(self.bbox_max - self.bbox_min)) + + def inverse_contract_distance(self, distance): + return distance * self.fac + + def contract_distance(self, distance): + return distance / self.fac + + def contract_points(self, points): + return (points - self.bbox_min.view(1, 1, 3)) / (self.bbox_max.view(1, 1, 3) - self.bbox_min.view(1, 1, 3)) + + +class ZDepthContract(BaseContract): + def __init__(self, cfg, **kwargs): + super().__init__(cfg) + + if self.use_dataset_bounds: + self.contract_end_radius = ( + cfg.contract_end_radius + if "contract_end_radius" in cfg + else kwargs["system"].dm.train_dataset.depth_range[1] + ) + else: + self.contract_end_radius = cfg.contract_end_radius if "contract_end_radius" in cfg else float("inf") + + self.fac = self.contract_end_radius / 2.0 + + def inverse_contract_distance(self, distance): + return distance * self.fac + + def contract_distance(self, distance): + return distance / self.fac + + def contract_points(self, points): + return points / self.fac + + +class MIPNeRFContract(BaseContract): + def __init__(self, cfg, **kwargs): + super().__init__(cfg) + + if self.use_dataset_bounds: + self.contract_start_radius = ( + cfg.contract_start_radius + if "contract_start_radius" in cfg + else max(kwargs["system"].dm.train_dataset.depth_range[0] * 1.5, 1.0) + ) + self.contract_end_radius = ( + cfg.contract_end_radius + if "contract_end_radius" in cfg + else kwargs["system"].dm.train_dataset.depth_range[1] * 1.5 + ) + else: + self.contract_start_radius = cfg.contract_start_radius if "contract_start_radius" in cfg else 1.0 + self.contract_end_radius = cfg.contract_end_radius if "contract_end_radius" in cfg else float("inf") + + self.contract_start_distance = ( + cfg.contract_start_distance if "contract_start_distance" in cfg else self.contract_start_radius + ) + self.contract_end_distance = ( + cfg.contract_end_distance if "contract_end_distance" in cfg else self.contract_end_radius + ) + + if "distance_activation" in cfg: + self.distance_activation = get_activation(cfg.distance_activation) + else: + # self.distance_activation = IdentityTanh({}) + # self.distance_activation = Tanh({'fac': 1.0}) + # self.distance_activation = Tanh({'fac': 2.0}) + self.distance_activation = get_activation("identity") + + def inverse_contract_distance(self, distance): + # t varies linearly in disparity + inverse_contract_end_distance = self.contract_start_distance / self.contract_end_distance + scale_factor = 1.0 / (1.0 - inverse_contract_end_distance) + + # Inverse distance + distance = self.distance_activation(distance / 2.0) * 2.0 + distance = distance.clamp(-2.0, 2.0) + t = 2.0 - torch.abs(distance) + inverse_distance = t / scale_factor + inverse_contract_end_distance + + return ( + torch.where(torch.abs(distance) < 1, distance, torch.sign(distance) * (1.0 / inverse_distance)) + * self.contract_start_distance + ) + + def contract_distance(self, distance): + # Re-scale distance + distance = distance / self.contract_start_distance + inverse_distance = 1.0 / torch.abs(distance) + + # t varies linearly in disparity + inverse_contract_end_distance = self.contract_start_distance / self.contract_end_distance + scale_factor = 1.0 / (1.0 - inverse_contract_end_distance) + t = (inverse_distance - inverse_contract_end_distance) * scale_factor + + distance = torch.where( + torch.abs(distance) < 1.0, + distance / 1.0, + torch.sign(distance) * (2.0 - t), + ) + + return self.distance_activation.inverse(distance / 2.0) * 2.0 + + def contract_points(self, points): + points = points / self.contract_start_radius + distance = torch.norm(points, dim=-1, keepdim=True) + + # t varies linearly in disparity + inverse_distance = 1.0 / torch.abs(distance) + inverse_contract_end_radius = self.contract_start_radius / self.contract_end_radius + scale_factor = 1.0 / (1.0 - inverse_contract_end_radius) + t = (inverse_distance - inverse_contract_end_radius) * scale_factor + + return torch.where(distance < 1, points, (points / distance) * (2.0 - t)) + + +class DoNeRFContract(BaseContract): + def __init__(self, cfg, **kwargs): + super().__init__(cfg) + + if self.use_dataset_bounds: + self.contract_start_radius = ( + cfg.contract_start_radius + if "contract_start_radius" in cfg + else max(kwargs["system"].dm.train_dataset.depth_range[0] * 1.75, 1.0) + ) + self.contract_end_radius = ( + cfg.contract_end_radius + if "contract_end_radius" in cfg + else kwargs["system"].dm.train_dataset.depth_range[1] * 1.5 + ) + else: + self.contract_start_radius = cfg.contract_start_radius if "contract_start_radius" in cfg else None + self.contract_end_radius = cfg.contract_end_radius if "contract_end_radius" in cfg else 10000.0 + + if self.contract_start_radius is None: + self.power = cfg.power if "power" in cfg else 2.0 + self.fac = np.power(2.0, self.power) / self.contract_end_radius + else: + self.fac = 1.0 / self.contract_start_radius + self.power = np.log(self.contract_end_radius / self.contract_start_radius) / np.log(2.0) + + if "distance_activation" in cfg: + self.distance_activation = get_activation(cfg.distance_activation) + else: + self.distance_activation = get_activation("identity") + + def inverse_contract_distance(self, distance): + distance = self.distance_activation(distance / 2.0) * 2.0 + distance = distance.clamp(-2.0, 2.0) + + return torch.pow(torch.abs(distance) + 1e-8, self.power) * torch.sign(distance) / self.fac + + def contract_distance(self, distance): + distance = distance * self.fac + distance = torch.pow(torch.abs(distance) + 1e-8, 1.0 / self.power) * torch.sign(distance) + + return self.distance_activation.inverse(distance / 2.0) * 2.0 + + def contract_points(self, points): + dists = torch.norm(points, dim=-1, keepdim=True) + return (points / dists) * torch.pow(dists * self.fac + 1e-8, 1.0 / self.power) + + +contract_dict = { + "identity": IdentityContract, + "bbox": BBoxContract, + "z_depth": ZDepthContract, + "mipnerf": MIPNeRFContract, + "donerf": DoNeRFContract, +} diff --git a/nlf/datasets/__init__.py b/nlf/datasets/__init__.py new file mode 100644 index 0000000..fffdc45 --- /dev/null +++ b/nlf/datasets/__init__.py @@ -0,0 +1,51 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +from .blender import BlenderDataset, BlenderLightfieldDataset, DenseBlenderDataset +from .catacaustics import CatacausticsDataset +from .donerf import DONeRFDataset +from .eikonal import EikonalDataset +from .fourier import FourierDataset, FourierLightfieldDataset +from .immersive import ImmersiveDataset +from .llff import DenseLLFFDataset, LLFFDataset +from .neural_3d import Neural3DVideoDataset +from .random import RandomPixelDataset, RandomRayDataset, RandomRayLightfieldDataset, RandomViewSubsetDataset +from .shiny import DenseShinyDataset, ShinyDataset +from .spaces import SpacesDataset +from .stanford import StanfordEPIDataset, StanfordLightfieldDataset, StanfordLLFFDataset +from .technicolor import TechnicolorDataset +from .video3d_ground_truth import Video3DTimeGroundTruthDataset +from .video3d_static import Video3DDataset +from .video3d_time import Video3DTimeDataset + +dataset_dict = { + "fourier": FourierDataset, + "fourier_lightfield": FourierLightfieldDataset, + "random_ray": RandomRayDataset, + "random_pixel": RandomPixelDataset, + "random_lightfield": RandomRayLightfieldDataset, + "random_view": RandomViewSubsetDataset, + "donerf": DONeRFDataset, + "blender": BlenderDataset, + "dense_blender": DenseBlenderDataset, + "llff": LLFFDataset, + "eikonal": EikonalDataset, + "dense_llff": DenseLLFFDataset, + "dense_shiny": DenseShinyDataset, + "shiny": ShinyDataset, + "blender_lightfield": BlenderLightfieldDataset, + "stanford": StanfordLightfieldDataset, + "stanford_llff": StanfordLLFFDataset, + "stanford_epi": StanfordEPIDataset, + "video3d": Video3DDataset, + "video3d_time": Video3DTimeDataset, + "video3d_time_ground_truth": Video3DTimeGroundTruthDataset, + "technicolor": TechnicolorDataset, + "neural_3d": Neural3DVideoDataset, + "catacaustics": CatacausticsDataset, + "immersive": ImmersiveDataset, + "spaces": SpacesDataset, +} diff --git a/nlf/datasets/base.py b/nlf/datasets/base.py new file mode 100644 index 0000000..f514dcc --- /dev/null +++ b/nlf/datasets/base.py @@ -0,0 +1,526 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +import copy +import os +import pdb + +import numpy as np +import torch +from iopath.common.file_io import NativePathHandler, PathManager +from omegaconf import OmegaConf # @manual //github/third-party/omry/omegaconf:omegaconf +from torch.utils.data import Dataset +from torchvision import transforms as T + +from utils.pose_utils import create_spherical_poses, create_spiral_poses, interpolate_poses +from utils.ray_utils import get_ndc_rays_fx_fy, get_ray_directions_K, get_rays + + +class BaseDataset(Dataset): + def __init__(self, cfg, split="train", **kwargs): + + ## Settings ## + self.chunks = None + # Path manager + self.pmgr = PathManager() + self.pmgr.register_handler(NativePathHandler()) + + # Copy train dataset config + if "train_dataset" in kwargs: + base_dataset_cfg = copy.deepcopy(kwargs["train_dataset"].cfg.dataset) + OmegaConf.set_struct(base_dataset_cfg, False) + + for key in cfg.dataset.keys(): + base_dataset_cfg.__dict__[key] = cfg.dataset[key] + setattr(base_dataset_cfg, key, cfg.dataset[key]) + + cfg.dataset = base_dataset_cfg + + ## Dataset cfg + self.cfg = cfg + self.split = getattr(cfg.dataset, "split", split) + self.dataset_cfg = getattr(cfg.dataset, self.split, cfg.dataset) + + # Basic dataset params + self.root_dir = os.path.expanduser(self.dataset_cfg.root_dir) + + if "img_wh" in self.dataset_cfg and ( + not isinstance(self.dataset_cfg.img_wh, str) and self.dataset_cfg.img_wh is not None + ): + self._img_wh = tuple(self.dataset_cfg.img_wh) + self.img_wh = self._img_wh + self.aspect = float(self.img_wh[0]) / self.img_wh[1] + self.downsample = 1 + else: + self.img_wh = None + self.downsample = getattr(self.dataset_cfg, "downsample", 1) + + self.centered_pixels = getattr(self.dataset_cfg, "centered_pixels", False) + + # Rendering + self.render_supersample = self.dataset_cfg.render_params.supersample + self.render_crop = self.dataset_cfg.render_params.crop + + # Validation + self.val_num = self.dataset_cfg.val_num + self.val_skip = self.dataset_cfg.val_skip + self.val_set = self.dataset_cfg.val_set if "val_set" in self.dataset_cfg else [] + self.val_crop = self.dataset_cfg.val_crop if "val_crop" in self.dataset_cfg else 1.0 + self.val_all = (self.dataset_cfg.val_all if "val_all" in self.dataset_cfg else False) or ( + kwargs["val_all"] if "val_all" in kwargs else False + ) + + # Crop + self.precrop_iters = self.dataset_cfg.precrop_iters if "precrop_iters" in self.dataset_cfg else 0 + self.use_crop = self.precrop_iters > 0 + self.cur_iter = 0 + self.precrop_frac = self.dataset_cfg.precrop_frac if "precrop_fac" in self.dataset_cfg else 0.5 + + # Patch loading + self.use_patches = self.dataset_cfg.use_patches if "use_patches" in self.dataset_cfg else False + self.use_one_image = self.dataset_cfg.use_one_image if "use_one_image" in self.dataset_cfg else False + self.use_full_image = ( + self.dataset_cfg.use_full_image if "use_full_image" in self.dataset_cfg else self.use_one_image + ) + self.blur_radius = self.dataset_cfg.blur_radius if "blur_radius" in self.dataset_cfg else 0 + + ## Set-up data ## + + self.define_transforms() + self.prepare_data() + + def read_meta(self): + pass + + def prepare_train_data(self): + self.num_images = len(self.image_paths) + + ## Collect training data + self.all_coords = [] + self.all_rgb = [] + + for idx in range(len(self.image_paths)): + # coords + self.all_coords += [self.get_coords(idx)] + + # Color + self.all_rgb += [self.get_rgb(idx)] + + self.update_all_data(torch.cat(self.all_coords, 0), torch.cat(self.all_rgb, 0)) + + def update_all_data(self, coords, rgb): + self.all_coords = coords + self.all_rgb = rgb + self.all_weights = self.get_weights() + + ## Patches + if self.use_patches or self.use_crop: + self._all_coords = torch.clone(self.all_coords) + self._all_rgb = torch.clone(self.all_rgb) + + ## All inputs + self.all_inputs = torch.cat([self.all_coords, self.all_rgb, self.all_weights], -1) + + def prepare_val_data(self): + self.prepare_test_data() + + def prepare_test_data(self): + pass + + def prepare_render_data(self): + pass + + def shift_chunk(self): + return 0 + + def prepare_data(self): + self.read_meta() + + if self.split == "train": + if self.cfg.params.render_only or self.cfg.params.test_only: + self.all_inputs = [0] + self.all_inputs_sam = [0] + else: + self.prepare_train_data() + elif self.split == "val": + self.prepare_val_data() + elif self.split == "test": + self.prepare_test_data() + elif self.split == "render": + self.prepare_render_data() + + def define_transforms(self): + if self.blur_radius > 0: + self.transform = T.Compose( + [ + T.ToTensor(), + T.GaussianBlur((self.blur_radius * 2 + 1, self.blur_radius * 2 + 1), self.blur_radius / 3.0), + ] + ) + else: + self.transform = T.ToTensor() + + def scale(self, scale): + self.img_wh = (self._img_wh[0] // scale, self._img_wh[1] // scale) + self.aspect = float(self.img_wh[0]) / self.img_wh[1] + + self.define_transforms() + self.prepare_data() + + def get_coords(self, idx): + pass + + def get_weights(self, device="cpu"): + return torch.ones(*self.all_coords[..., 0:1].shape, device=device) + + def get_rgb(self, idx): + pass + + def get_closest_rgb(self, query_pose): + pass + + def shuffle(self): + if not self.use_patches: + # Get permutation + if self.use_full_image: + self.all_coords = self.all_coords.view(-1, self.img_wh[0] * self.img_wh[1], 6) + self.all_rgb = self.all_rgb.view(-1, self.img_wh[0] * self.img_wh[1], 3) + + perm = torch.tensor(np.random.permutation(self.all_coords.shape[0])) + else: + perm = torch.tensor(np.random.permutation(len(self))) + + # Shuffle + self.all_coords = self.all_coords[perm].view(-1, 6) + self.all_rgb = self.all_rgb[perm].view(-1, 3) + else: + self.shuffle_patches() + + # Weights and inputs + self.all_weights = self.get_weights() + self.all_inputs = torch.cat([self.all_coords, self.all_rgb, self.all_weights], -1) + + def __len__(self): + if self.split == "train": + if self.cfg.params.render_only or self.cfg.params.test_only: + return 1 + else: + return len(self.all_coords) + elif self.split == "val": + return min(self.val_num, len(self.poses)) + elif self.split == "render": + if self.render_max_frames > 0: + return min(self.render_max_frames, len(self.poses)) + else: + return len(self.poses) + else: + return len(self.poses) + + def get_one_image_batch(self, idx, batch_size, device="cuda"): + return None + + def __getitem__(self, idx): + if self.split == "render": + batch = {"coords": self.get_coords(idx), "pose": self.poses[idx], "idx": idx} + + batch["weight"] = torch.ones_like(batch["coords"][..., -1:]) + + elif self.split == "val" or self.split == "test": + batch = {"coords": self.get_coords(idx), "rgb": self.get_rgb(idx), "idx": idx} + + batch["weight"] = torch.ones_like(batch["coords"][..., -1:]) + else: + batch = { + "inputs": self.all_inputs[idx], + } + + W, H, batch = self.crop_batch(batch) + batch["W"] = W + batch["H"] = H + + return batch + + def format_batch(self, batch): + batch["coords"] = batch["inputs"][..., : self.all_coords.shape[-1]] + batch["rgb"] = batch["inputs"][..., self.all_coords.shape[-1] : self.all_coords.shape[-1] + 3] + batch["weight"] = batch["inputs"][..., -1:] + del batch["inputs"] + + return batch + + def get_batch(self, batch_idx, batch_size, jitter=None): + batch_start = batch_idx * batch_size + batch_end = (batch_idx + 1) * batch_size + return self[batch_start:batch_end] + + def crop_all_tensors( + self, + t, + W, + H, + dW, + dH, + ): + t = t.view(self.num_images, H, W, -1) + + t = t[ + :, + (H // 2 - dH) : (H // 2 + dH + 1), + (W // 2 - dW) : (W // 2 + dW + 1), + ] + + return t.reshape(-1, t.shape[-1]) + + def crop_one_tensor( + self, + t, + W, + H, + dW, + dH, + ): + t = t.view(1, H, W, -1) + + t = t[ + :, + (H // 2 - dH) : (H // 2 + dH + 1), + (W // 2 - dW) : (W // 2 + dW + 1), + ] + + H, W = t.shape[1], t.shape[2] + + return W, H, t.reshape(-1, t.shape[-1]) + + def crop(self): + if self.use_crop and self.cur_iter < self.precrop_iters: + W = self.img_wh[0] + H = self.img_wh[1] + dW = int(W // 2 * self.precrop_frac) + dH = int(H // 2 * self.precrop_frac) + + self.all_coords = self.crop_all_tensors(self._all_coords, W, H, dW, dH) + self.all_rgb = self.crop_all_tensors(self._all_rgb, W, H, dW, dH) + + def crop_batch(self, batch): + W = self.img_wh[0] + H = self.img_wh[1] + + if self.split == "val" or self.split == "test": + crop = self.val_crop + elif self.split == "render": + crop = self.render_crop + else: + crop = 1.0 + + if crop < 1.0: + dW = int(W // 2 * crop) + dH = int(H // 2 * crop) + + for k in batch.keys(): + if torch.is_tensor(batch[k]): + temp_W, temp_H, batch[k] = self.crop_one_tensor(batch[k], W, H, dW, dH) + + W, H = temp_W, temp_H + + return W, H, batch + + def patchify_tensor( + self, + t, + width, + height, + patch_offset, + patch_width, + ): + c = t.shape[-1] + t = t.view(self.num_images, height, width, c) + + # Remove boundaries + p = self.blur_radius + + if p > 0: + t = t[:, p:-p, p:-p] + + # Patch offset + t = t[:, patch_offset:, patch_offset:] + + # Crop to multiple of patch width + round_height = (t.shape[1] // patch_width) * patch_width + round_width = (t.shape[2] // patch_width) * patch_width + t = t[:, :round_height, :round_width] + + t = t.reshape( + t.shape[0], round_height // patch_width, patch_width, round_width // patch_width, patch_width, c + ).permute(0, 1, 3, 2, 4, 5) + + return t.reshape(-1, patch_width * patch_width, c) + + def shuffle_patches(self): + print("Shuffle patches") + + # Patchify + patch_width = self.dataset_cfg.patch_width + width, height = self.img_wh[0], self.img_wh[1] + patch_offset = int(np.random.uniform() * patch_width) + + self.all_coords = self.patchify_tensor(self._all_coords, width, height, patch_offset, patch_width) + + self.all_rgb = self.patchify_tensor(self._all_rgb, width, height, patch_offset, patch_width) + + # Shuffle + perm = torch.tensor(np.random.permutation(self.all_coords.shape[0])) + + self.all_coords = self.all_coords[perm].reshape(-1, self.all_coords.shape[-1]) + self.all_rgb = self.all_rgb[perm].reshape(-1, self.all_rgb.shape[-1]) + + +class Base5DDataset(BaseDataset): + def __init__(self, cfg, split="train", **kwargs): + + # Rendering + self.render_spherical = cfg.dataset.spherical_poses if "spherical_poses" in cfg else False + self.render_interpolate = cfg.dataset.render_params.interpolate + self.render_max_frames = ( + cfg.dataset.render_params.max_frames if "max_frames" in cfg.dataset.render_params else 0 + ) + self.use_ndc = cfg.dataset.use_ndc if "use_ndc" in cfg.dataset else False + self.include_world = cfg.dataset.include_world if "include_world" in cfg.dataset else False + + super().__init__(cfg, split, **kwargs) + + def prepare_render_data(self): + if self.render_spherical: + self.poses = create_spherical_poses(self.bounds.max()) + + if not self.render_interpolate: + close_depth, inf_depth = self.bounds.min() * 0.9, self.bounds.max() * 5.0 + dt = 0.75 + mean_dz = 1.0 / (((1.0 - dt) / close_depth + dt / inf_depth)) + focus_depth = mean_dz + + radii = np.percentile(np.abs(self.poses[..., 3]), 90, axis=0) + self.poses = create_spiral_poses(self.poses, radii, focus_depth) + else: + self.poses = interpolate_poses(self.poses, self.render_supersample) + + def get_intrinsics_screen_space(self): + K = np.copy(self.get_intrinsics()) + K[0, 2] = K[0, 2] - self.img_wh[0] / 2 + K[1, 2] = K[1, 2] - self.img_wh[1] / 2 + K[0, :] = 2 * K[0, :] / self.img_wh[0] + K[1, :] = -2 * K[1, :] / self.img_wh[1] + return K + + def get_intrinsics(self): + pass + + def get_closest_rgb(self, query_pose): + W = self.img_wh[0] + H = self.img_wh[1] + + images = self.all_rgb.view(self.num_images, H, W, -1) + dists = np.linalg.norm(self.poses[:, :3, -1] - query_pose[None, :3, -1], axis=-1) + return images[list(np.argsort(dists))[0]] + + def get_coords_from_camera(self, pose, time, cam_idx, K, W, H, device="cuda"): + # Directions + directions = get_ray_directions_K(H, W, K, centered_pixels=True, device=device) + + # Pose + c2w = torch.FloatTensor(pose[:3, :4]).to(device) + + # Rays + rays_o, rays_d = get_rays(directions, c2w) + rays = torch.cat([rays_o, rays_d], dim=-1) + + # To NDC + if self.use_ndc: + rays = self.to_ndc(rays) + + # Add camera idx + rays = torch.cat([rays, torch.ones_like(rays[..., :1]) * cam_idx], dim=-1) + + # Add times + rays = torch.cat([rays, torch.ones_like(rays[..., :1]) * time], dim=-1) + + # Add camera idx + return rays + + +class Base6DDataset(Base5DDataset): + def __init__(self, cfg, split="train", **kwargs): + + self.render_interpolate_time = ( + cfg.dataset.render_params.interpolate_time if "interpolate_time" in cfg.dataset.render_params else False + ) + + super().__init__(cfg, split, **kwargs) + + def prepare_render_data(self): + if self.render_spherical: + self.poses = create_spherical_poses(self.bounds.max()) + + if not self.render_interpolate: + close_depth, inf_depth = self.bounds.min() * 0.9, self.bounds.max() * 5.0 + dt = 0.75 + mean_dz = 1.0 / (((1.0 - dt) / close_depth + dt / inf_depth)) + focus_depth = mean_dz + + radii = np.percentile(np.abs(self.poses[..., 3]), 90, axis=0) + self.poses = create_spiral_poses(self.poses, radii, focus_depth) + else: + self.poses = interpolate_poses(self.poses, self.render_supersample) + + # Get times + if (self.num_frames - 1) > 0: + self.times = np.linspace(0, self.num_frames - 1, len(self.poses)) + + if not self.render_interpolate_time: + self.times = np.round(self.times) + + self.times = self.times / (self.num_frames - 1) + else: + self.times = [0.0 for p in self.poses] + + def get_closest_rgb(self, query_pose, query_time): + W = self.img_wh[0] + H = self.img_wh[1] + + # Reshape + images = self.all_rgb.view(self.num_frames, -1, H, W, self.all_rgb.shape[-1]) + poses = self.poses.reshape(self.num_frames, -1, self.poses.shape[-2], self.poses.shape[-1]) + + # Get poses at current frame + frame_idx = int(np.round(query_time * (self.num_frames - 1))) + images = images[frame_idx] + poses = poses[frame_idx] + + # Distances + dists = np.linalg.norm(poses[:, :3, -1] - query_pose[None, :3, -1], axis=-1) + + # Closest rgb + return images[list(np.argsort(dists))[0]] + + def __getitem__(self, idx): + if self.split == "render": + batch = {"coords": self.get_coords(idx), "pose": self.poses[idx], "time": self.times[idx], "idx": idx} + + batch["weight"] = torch.ones_like(batch["coords"][..., -1:]) + + elif self.split == "val" or self.split == "test": + batch = {"coords": self.get_coords(idx), "rgb": self.get_rgb(idx), "idx": idx} + + batch["weight"] = torch.ones_like(batch["coords"][..., -1:]) + else: + batch = { + "inputs": self.all_inputs[idx], + } + + W, H, batch = self.crop_batch(batch) + batch["W"] = W + batch["H"] = H + + return batch diff --git a/nlf/datasets/blender.py b/nlf/datasets/blender.py new file mode 100644 index 0000000..2d15550 --- /dev/null +++ b/nlf/datasets/blender.py @@ -0,0 +1,274 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +import json +import os + +import cv2 +import numpy as np +import torch +from PIL import Image + +from utils.ray_utils import get_ray_directions_K, get_rays + +from .base import Base5DDataset +from .lightfield import LightfieldDataset + + +class BlenderLightfieldDataset(LightfieldDataset): + def __init__(self, cfg, split="train", **kwargs): + super().__init__(cfg, split, **kwargs) + + def read_meta(self): + # Read meta + transforms_path = os.path.join(self.root_dir, "transforms.json") + + with self.pmgr.open(transforms_path, "r") as f: + self.meta = json.load(f) + + # Image paths and pose + self.image_paths = [] + self.poses = [] + + for frame in self.meta["frames"]: + # Image path + image_path = frame["file_path"].split("/")[-1] + self.image_paths += [image_path] + + # Pose + pose = np.array(frame["transform_matrix"])[:3, :4] + self.poses += [pose] + + def get_rgb(self, idx): + image_path = self.image_paths[idx] + + with self.pmgr.open(os.path.join(self.root_dir, f"{image_path}.png"), "rb") as im_file: + img = Image.open(im_file).convert("RGBA") + + img = img.resize(self._img_wh, Image.LANCZOS) + + if self.img_wh[0] != self._img_wh[0] or self.img_wh[1] != self._img_wh[1]: + img = img.resize(self.img_wh, Image.BOX) + + img = self.transform(img) + img = img.view(4, -1).permute(1, 0) + img = img[:, :3] * img[:, -1:] + (1 - img[:, -1:]) + + return img + + +class BlenderDataset(Base5DDataset): + def __init__(self, cfg, split="train", **kwargs): + + super().__init__(cfg, split, **kwargs) + + def read_meta(self): + if self.split == "render": + self.read_meta_for_split("test") + elif self.split == "train": + self.read_meta_for_split("train") + elif self.split == "val": + self.read_meta_for_split("test") + else: + self.read_meta_for_split(self.split) + + def read_meta_for_split(self, split): + with self.pmgr.open(os.path.join(self.root_dir, f"transforms_{split}.json"), "r") as f: + self.meta = json.load(f) + + if split == "val": + self.meta["frames"] = self.meta["frames"][: self.val_num] + + W, H = self.img_wh + + self.focal = 0.5 * 800 / np.tan(0.5 * self.meta["camera_angle_x"]) + self.focal *= self.img_wh[0] / 800 + + self.K = np.eye(3) + self.K[0, 0] = self.focal + self.K[0, 2] = W / 2.0 + self.K[1, 1] = self.focal + self.K[1, 2] = H / 2.0 + + # Bounds, common for all scenes + self.near = 2.0 + self.far = 6.0 + self.bounds = np.array([self.near, self.far]) + self.depth_range = np.array([self.near, self.far]) + + # Ray directions for all pixels, same for all images (same H, W, focal) + self.centered_pixels = True + self.directions = get_ray_directions_K(H, W, self.K, centered_pixels=True) + + # Image paths and pose + self.image_paths = [] + self.poses = [] + + for frame in self.meta["frames"]: + # Image path + self.image_paths += [frame["file_path"]] + + # Pose + pose = np.array(frame["transform_matrix"])[:3, :4] + self.poses += [pose] + + self.poses = np.stack(self.poses, axis=0) + + def prepare_render_data(self): + self.prepare_test_data() + + def get_coords(self, idx): + c2w = torch.FloatTensor(self.poses[idx]) + rays_o, rays_d = get_rays(self.directions, c2w) + return torch.cat([rays_o, rays_d], dim=-1) + + def get_rgb(self, idx): + image_path = self.image_paths[idx] + + with self.pmgr.open(os.path.join(self.root_dir, f"{image_path}.png"), "rb") as im_file: + img = Image.open(im_file).convert("RGBA") + + img = img.resize(self._img_wh, Image.LANCZOS) + + if self.img_wh[0] != self._img_wh[0] or self.img_wh[1] != self._img_wh[1]: + img = img.resize(self.img_wh, Image.BOX) + + img = self.transform(img) + img = img.view(4, -1).permute(1, 0) + img = img[:, :3] * img[:, -1:] + (1 - img[:, -1:]) + + return img + + def get_intrinsics(self): + K = np.eye(3) + K[0, 0] = self.focal + K[0, 2] = self.img_wh[0] / 2 + K[1, 1] = self.focal + K[1, 2] = self.img_wh[1] / 2 + + return K + + +class DenseBlenderDataset(Base5DDataset): + def __init__(self, cfg, split="train", **kwargs): + super().__init__(cfg, split, **kwargs) + + def read_meta(self): + if self.split == "render": + self.read_meta_for_split("test") + elif self.split == "train": + self.read_meta_for_split("test") + elif self.split == "val": + self.read_meta_for_split("test") + else: + self.read_meta_for_split(self.split) + + def read_meta_for_split(self, split): + with self.pmgr.open(os.path.join(self.root_dir, f"transforms_{split}.json"), "r") as f: + self.meta = json.load(f) + + W, H = self.img_wh + + self.focal = 0.5 * 800 / np.tan(0.5 * self.meta["camera_angle_x"]) + self.focal *= self.img_wh[0] / 800 + + self.K = np.eye(3) + self.K[0, 0] = self.focal + self.K[0, 2] = W / 2.0 + self.K[1, 1] = self.focal + self.K[1, 2] = H / 2.0 + + # Bounds, common for all scenes + self.near = 2.0 + self.far = 6.0 + self.bounds = np.array([self.near, self.far]) + self.depth_range = np.array([self.near, self.far]) + + # Ray directions for all pixels, same for all images (same H, W, focal) + self.centered_pixels = True + self.directions = get_ray_directions_K(H, W, self.K, centered_pixels=self.centered_pixels) + + # Image paths and pose + self.image_paths = [] + self.poses = [] + + for frame in self.meta["frames"]: + # Image path + self.image_paths += [frame["file_path"]] + + # Pose + pose = np.array(frame["transform_matrix"])[:3, :4] + self.poses += [pose] + + self.poses = np.stack(self.poses, axis=0) + + ## Holdout validation images + if self.val_set == "lightfield": + step = self.dataset_cfg.lightfield_step + rows = self.dataset_cfg.lightfield_rows + cols = self.dataset_cfg.lightfield_cols + val_indices = [] + + for row in range(0, rows, 1): + for col in range(0, cols, 1): + idx = row * cols + col + + if row % step != 0 or col % step != 0: + val_indices.append(idx) + + elif len(self.val_set) > 0: + val_indices = self.val_set + elif self.val_skip != "inf": + self.val_skip = min(len(self.image_paths), self.val_skip) + val_indices = list(range(0, len(self.image_paths), self.val_skip)) + else: + val_indices = [] + + train_indices = [i for i in range(len(self.image_paths)) if i not in val_indices] + + if self.val_all: + val_indices = [i for i in train_indices] # noqa + + if self.split == "val" or self.split == "test": + self.image_paths = [self.image_paths[i] for i in val_indices] + self.poses = self.poses[val_indices] + elif self.split == "train": + self.image_paths = [self.image_paths[i] for i in train_indices] + self.poses = self.poses[train_indices] + + def prepare_render_data(self): + self.prepare_test_data() + + def get_coords(self, idx): + c2w = torch.FloatTensor(self.poses[idx]) + rays_o, rays_d = get_rays(self.directions, c2w) + return torch.cat([rays_o, rays_d], dim=-1) + + def get_rgb(self, idx): + image_path = self.image_paths[idx] + + with self.pmgr.open(os.path.join(self.root_dir, f"{image_path}.png"), "rb") as im_file: + img = Image.open(im_file).convert("RGBA") + + img = img.resize(self._img_wh, Image.LANCZOS) + + if self.img_wh[0] != self._img_wh[0] or self.img_wh[1] != self._img_wh[1]: + img = img.resize(self.img_wh, Image.BOX) + + img = self.transform(img) + img = img.view(4, -1).permute(1, 0) + img = img[:, :3] * img[:, -1:] + (1 - img[:, -1:]) + + return img + + def get_intrinsics(self): + K = np.eye(3) + K[0, 0] = self.focal + K[0, 2] = self.img_wh[0] / 2 + K[1, 1] = self.focal + K[1, 2] = self.img_wh[1] / 2 + + return K diff --git a/nlf/datasets/catacaustics.py b/nlf/datasets/catacaustics.py new file mode 100644 index 0000000..74708ac --- /dev/null +++ b/nlf/datasets/catacaustics.py @@ -0,0 +1,321 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +import os +import sys +from pathlib import Path + +import numpy as np +import torch +from PIL import Image + +from utils.intersect_utils import intersect_axis_plane +from utils.pose_utils import center_poses_with, correct_poses_bounds, create_spiral_poses, interpolate_poses +from utils.ray_utils import get_ndc_rays_fx_fy, get_ray_directions_K, get_rays + +from .llff import LLFFDataset + +# import open3d as o3d + + +def readBundleFolder(cameras_folder, W, H, extension=".png", name_ints=8): + poses = [] + intrinsics = [] + image_paths = [] + + with open(os.path.join(cameras_folder, "bundle.out")) as bundle_file: + # First line is a comment + _ = bundle_file.readline() + num_cameras, _ = [int(x) for x in bundle_file.readline().split()] + + for idx in range(num_cameras): + cam_name = "{num:0{width}}".format(num=idx, width=name_ints) + extension + focal, dist0, dist1 = [float(x) for x in bundle_file.readline().split()] + + # Rotation + R = [] + + for i in range(3): + R.append([float(x) for x in bundle_file.readline().split()]) + + R = np.array(R).reshape(3, 3) + + # Translation + T = [float(x) for x in bundle_file.readline().split()] + T = np.array(T) + + # Pose + pose = np.eye(4) + pose[:3, :3] = R + pose[:3, -1] = T + # pose[:3, :3] = R.T + # pose[:3, -1] = -R.T @ T.T + pose = np.linalg.inv(pose) + + pose_pre = np.eye(4) + # pose_pre[1, 1] *= -1 + # pose_pre[2, 2] *= -1 + + pose = pose_pre @ pose @ pose_pre + + poses.append(pose[:3]) + + # Intrinsics + image_path = os.path.join(cameras_folder, cam_name) + image_name = Path(cam_name).stem + image = Image.open(image_path) + + K = np.eye(3) + K[0, 0] = focal * W / float(image.size[0]) + K[0, 2] = W / 2.0 + K[1, 1] = focal * H / float(image.size[1]) + K[1, 2] = H / 2.0 + intrinsics.append(K) + + # TODO: + # 1) Poses + # 2) Intrinsics + # 3) Model settings + + # Image + image_path = os.path.join(cameras_folder, cam_name) + image_paths.append(image_path) + + return np.stack(poses, 0), np.stack(intrinsics, 0), image_paths + + +class CatacausticsDataset(LLFFDataset): + def __init__(self, cfg, split="train", **kwargs): + self.use_reference = cfg.dataset.use_reference if "use_reference" in cfg.dataset else False + self.correct_poses = cfg.dataset.correct_poses if "correct_poses" in cfg.dataset else False + self.use_ndc = cfg.dataset.use_ndc if "use_ndc" in cfg.dataset else False + + super().__init__(cfg, split, **kwargs) + + def read_meta(self): + self.train_cameras_folder = os.path.join(self.root_dir, "cropped_train_cameras") + self.validation_cameras_folder = os.path.join(self.root_dir, "validation_cameras") + self.test_cameras_folder = os.path.join(self.root_dir, "test_path_cameras") + + train_poses, train_intrinsics, train_image_paths = readBundleFolder( + self.train_cameras_folder, self.img_wh[0], self.img_wh[1] + ) + validation_poses, validation_intrinsics, validation_image_paths = readBundleFolder( + self.validation_cameras_folder, self.img_wh[0], self.img_wh[1], name_ints=5 + ) + test_poses, test_intrinsics, test_image_paths = readBundleFolder( + self.test_cameras_folder, self.img_wh[0], self.img_wh[1], name_ints=5 + ) + + self.poses_dict = { + "train": train_poses, + "render": test_poses, + "val": validation_poses, + "test": test_poses, + } + self.poses = np.stack(self.poses_dict[self.split], 0) + + self.intrinsics_dict = { + "train": train_intrinsics, + "render": test_intrinsics, + "val": validation_intrinsics, + "test": test_intrinsics, + } + self.intrinsics = np.stack(self.intrinsics_dict[self.split], 0) + self.K = self.intrinsics_dict["train"][0] + + self.image_paths_dict = { + "train": train_image_paths, + "render": test_image_paths, + "val": validation_image_paths, + "test": test_image_paths, + } + self.image_paths = self.image_paths_dict[self.split] + + # Geometry + print("Reading Point-Cloud...") + + pcd = o3d.io.read_point_cloud(os.path.join(self.root_dir, "meshes", "dense_point_cloud.ply")) + self.bbox_center = np.array(pcd.get_center()) + points = np.array(pcd.points) + + min_dist = np.linalg.norm(points - self.bbox_center[None], axis=-1).min() + max_dist = np.linalg.norm(points - self.bbox_center[None], axis=-1).max() + fac = 8.0 / (min_dist + max_dist) + + min_dist = min_dist * fac + max_dist = max_dist * fac + self.bbox_center = self.bbox_center * fac + self.bbox_min = np.array(pcd.get_min_bound()) * fac - self.bbox_center + self.bbox_max = np.array(pcd.get_max_bound()) * fac - self.bbox_center + + self.depth_range = [min_dist, max_dist] + + # Change poses + self.poses[..., -1] = self.poses[..., -1] * fac - self.bbox_center + + # Bounds + self.near = min_dist + self.far = max_dist + self.bounds = np.array([self.near, self.far]) + + self.near = self.bounds.min() * 0.95 + self.far = self.bounds.max() * 1.05 + + ## Correct poses + # poses = np.copy(self.poses) + # train_poses = np.stack(self.poses_dict["train"]) + + # if self.use_ndc or self.correct_poses: + # self.poses, self.poses_avg = center_poses_with( + # poses, np.stack(self.poses_dict["train"][:1]) + # ) + # train_poses, _ = center_poses_with( + # np.copy(train_poses), np.stack(self.poses_dict["train"][:1]) + # ) + # #self.poses, self.poses_avg = center_poses_with( + # # poses, np.stack(self.poses_dict["train"]) + # #) + # #train_poses, _ = center_poses_with( + # # np.copy(train_poses), np.stack(self.poses_dict["train"]) + # #) + + # sc = np.max(np.abs(train_poses[..., -1])) + # self.poses[..., -1] /= sc + + # filter_idx = np.argwhere(self.poses[..., 2, 2] > 0.75).astype(np.int32).reshape(-1).tolist() + # self.image_paths = [self.image_paths[i] for i in filter_idx] + # self.poses = self.poses[filter_idx] + # self.intrinsics = self.intrinsics[filter_idx] + + def prepare_render_data(self): + self.prepare_test_data() + + def prepare_train_data(self): + self.num_images = len(self.image_paths) + + ## Collect training data + self.all_coords = [] + self.all_rgb = [] + + for idx in range(len(self.image_paths)): + # for idx in range(1): + # coords + self.all_coords += [self.get_coords(idx)] + + # Color + self.all_rgb += [self.get_rgb(idx)] + + # Format / save loaded data + self.update_all_data( + torch.cat(self.all_coords, 0), + torch.cat(self.all_rgb, 0), + ) + + def update_all_data(self, coords, rgb): + self.all_coords = coords + self.all_rgb = rgb + self.all_weights = self.get_weights() + + ## Patches + if self.use_patches or self.use_crop: + self._all_coords = torch.clone(self.all_coords) + self._all_rgb = torch.clone(self.all_rgb) + + ## All inputs + self.all_inputs = torch.cat( + [ + self.all_coords, + self.all_rgb, + self.all_weights, + ], + -1, + ) + + def format_batch(self, batch): + batch["coords"] = batch["inputs"][..., : self.all_coords.shape[-1]] + batch["rgb"] = batch["inputs"][..., self.all_coords.shape[-1] : self.all_coords.shape[-1] + 3] + batch["weight"] = batch["inputs"][..., -1:] + del batch["inputs"] + + return batch + + def to_ndc(self, rays): + return get_ndc_rays_fx_fy(self.img_wh[1], self.img_wh[0], self.K[0, 0], self.K[1, 1], self.near, rays) + + def get_coords(self, idx): + K = torch.FloatTensor(self.intrinsics[idx]) + c2w = torch.FloatTensor(self.poses[idx]) + + directions = get_ray_directions_K(self.img_wh[1], self.img_wh[0], K, centered_pixels=True) + rays_o, rays_d = get_rays(directions, c2w) + + if self.use_ndc: + rays = self.to_ndc(torch.cat([rays_o, rays_d], dim=-1)) + else: + rays = torch.cat([rays_o, rays_d], dim=-1) + + return rays + + def get_rgb(self, idx): + image_path = self.image_paths[idx] + + print(f"Loading image {idx}") + + with self.pmgr.open(os.path.join(self.root_dir, "images", image_path), "rb") as im_file: + img = Image.open(im_file) + img = img.convert("RGBA") + + img = img.resize(self._img_wh) + + if self.img_wh[0] != self._img_wh[0] or self.img_wh[1] != self._img_wh[1]: + img = img.resize(self.img_wh, Image.BOX) + + img = self.transform(img) + img = img.view(4, -1).permute(1, 0) + img = img[:, :3] * img[:, -1:] + (1 - img[:, -1:]) + + return img + + def get_intrinsics(self): + return self.intrinsics + + def __getitem__(self, idx): + if self.split == "render": + batch = { + "coords": self.get_coords(idx), + "pose": self.poses[idx], + "idx": idx, + } + + batch["weight"] = torch.ones_like(batch["coords"][..., -1:]) + + elif self.split == "test": + batch = { + "coords": self.get_coords(idx), + "rgb": self.get_rgb(idx), + "idx": idx, + } + + batch["weight"] = torch.ones_like(batch["coords"][..., -1:]) + elif self.split == "val": + batch = { + "coords": self.get_coords(idx), + "rgb": self.get_rgb(idx), + "idx": idx, + } + + batch["weight"] = torch.ones_like(batch["coords"][..., -1:]) + else: + batch = { + "inputs": self.all_inputs[idx], + } + + W, H, batch = self.crop_batch(batch) + batch["W"] = W + batch["H"] = H + + return batch diff --git a/nlf/datasets/donerf.py b/nlf/datasets/donerf.py new file mode 100644 index 0000000..aa64414 --- /dev/null +++ b/nlf/datasets/donerf.py @@ -0,0 +1,290 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT + +import json +import os + +import cv2 +import numpy as np +import torch +from PIL import Image + +from utils.pose_utils import ( + center_poses_with, + center_poses_with_rotation_only, + correct_poses_bounds, + create_rotating_spiral_poses, + create_spiral_poses, + interpolate_poses, +) +from utils.ray_utils import get_ndc_rays_fx_fy, get_ray_directions_K, get_rays + +from .base import Base5DDataset, Base6DDataset +from .lightfield import LightfieldDataset + + +class DONeRFDataset(Base5DDataset): + def __init__(self, cfg, split="train", **kwargs): + + self.correct_poses = cfg.dataset.correct_poses if "correct_poses" in cfg.dataset else False + self.center_poses = cfg.dataset.center_poses if "center_poses" in cfg.dataset else False + self.use_ndc = cfg.dataset.use_ndc if "use_ndc" in cfg.dataset else False + + super().__init__(cfg, split, **kwargs) + + def read_meta(self): + if self.split == "render": + self.read_meta_for_split("test", "cam_path_pan.json") + elif self.split == "test": + self.read_meta_for_split("test", "transforms_test.json") + elif self.split == "train": + self.read_meta_for_split("train", "transforms_train.json") + elif self.split == "val": + self.read_meta_for_split("val", "transforms_val.json") + else: + self.read_meta_for_split(self.split, "transforms_test.json") + + def load_poses_from_meta(self, meta, dataset_meta): + origin = np.array(dataset_meta["view_cell_center"]) + + # Image paths and pose + image_paths = [] + poses = [] + + for frame in meta["frames"]: + # Image path + if "file_path" in frame: + image_paths += [frame["file_path"]] + else: + image_paths += [None] + + # Pose + pose = np.array(frame["transform_matrix"])[:3, :4] + + if self.center_poses: + pose[:3, -1] = pose[:3, -1] - origin + + poses += [pose] + + poses = np.stack(poses, axis=0) + + return poses, image_paths + + def read_meta_for_split(self, split, split_file): + # Load train meta + with self.pmgr.open(os.path.join(self.root_dir, "transforms_train.json"), "r") as f: + self.train_meta = json.load(f) + + # Load meta + with self.pmgr.open(os.path.join(self.root_dir, split_file), "r") as f: + self.meta = json.load(f) + + if split == "val": + self.meta["frames"] = self.meta["frames"][: self.val_num] + + # Load dataset info + with self.pmgr.open(os.path.join(self.root_dir, "dataset_info.json"), "r") as f: + self.dataset_meta = json.load(f) + + W, H = self.img_wh + + self.focal = 0.5 * 800 / np.tan(0.5 * self.dataset_meta["camera_angle_x"]) + self.focal *= self.img_wh[0] / 800 + + self.K = np.eye(3) + self.K[0, 0] = self.focal + self.K[0, 2] = W / 2.0 + self.K[1, 1] = self.focal + self.K[1, 2] = H / 2.0 + + # Bounds, common for all scenes + self.depth_range = self.dataset_meta["depth_range"] + self.near = self.dataset_meta["depth_range"][0] + self.far = self.dataset_meta["depth_range"][1] + # self.depth_range = np.array([self.near * 1.5, self.far]) + + self.view_cell_size = np.max(np.array(self.dataset_meta["view_cell_size"])) + self.bounds = np.array([self.near, self.far]) + + # Image paths and pose + self.train_poses, _ = self.load_poses_from_meta(self.train_meta, self.dataset_meta) + self.poses, self.image_paths = self.load_poses_from_meta(self.meta, self.dataset_meta) + + # Correct + if self.use_ndc or self.correct_poses: + self.poses, _ = center_poses_with_rotation_only(self.poses, self.train_poses) + + if self.dataset_cfg.collection in ["pavillon"] and self.split == "render": + self.poses[..., :3, -1] *= 0.35 + + # Ray directions for all pixels, same for all images (same H, W, focal) + self.centered_pixels = True + self.directions = get_ray_directions_K(H, W, self.K, centered_pixels=self.centered_pixels) + + def prepare_train_data(self): + self.num_images = len(self.image_paths) + + ## Collect training data + self.all_coords = [] + self.all_rgb = [] + self.all_depth = [] + self.all_points = [] + + for idx in range(len(self.image_paths)): + # coords + self.all_coords += [self.get_coords(idx)] + + # Color + self.all_rgb += [self.get_rgb(idx)] + + # Depth + self.all_depth += [self.get_depth(idx)] + + # Points + self.all_points += [self.get_points(idx)] + + self.update_all_data( + torch.cat(self.all_coords, 0), + torch.cat(self.all_rgb, 0), + torch.cat(self.all_depth, 0), + torch.cat(self.all_points, 0), + ) + + # Calculate bounds + mask = self.all_depth != 0.0 + self.bbox_min = self.all_points[mask.repeat(1, 3)].reshape(-1, 3).min(0)[0] + self.bbox_max = self.all_points[mask.repeat(1, 3)].reshape(-1, 3).max(0)[0] + + # self.near = float(self.all_depth[mask].min()) + # self.far = float(self.all_depth[mask].max()) + + def update_all_data(self, coords, rgb, depth, points): + self.all_coords = coords + self.all_rgb = rgb + self.all_depth = depth + self.all_points = points + self.all_weights = self.get_weights() + + ## Patches + if self.use_patches or self.use_crop: + self._all_coords = torch.clone(self.all_coords) + self._all_rgb = torch.clone(self.all_rgb) + self._all_depth = torch.clone(self.all_depth) + + ## All inputs + self.all_inputs = torch.cat([self.all_coords, self.all_rgb, self.all_depth, self.all_weights], -1) + + def format_batch(self, batch): + batch["coords"] = batch["inputs"][..., : self.all_coords.shape[-1]] + batch["rgb"] = batch["inputs"][..., self.all_coords.shape[-1] : self.all_coords.shape[-1] + 3] + batch["depth"] = batch["inputs"][..., self.all_coords.shape[-1] + 3 : self.all_coords.shape[-1] + 4] + batch["weight"] = batch["inputs"][..., -1:] + del batch["inputs"] + + return batch + + def prepare_render_data(self): + self.prepare_test_data() + + def to_ndc(self, rays): + return get_ndc_rays_fx_fy(self.img_wh[1], self.img_wh[0], self.K[0, 0], self.K[1, 1], self.near, rays) + + def get_coords(self, idx): + c2w = torch.FloatTensor(self.poses[idx]) + rays_o, rays_d = get_rays(self.directions, c2w) + + if self.use_ndc: + return self.to_ndc(torch.cat([rays_o, rays_d], dim=-1)) + else: + return torch.cat([rays_o, rays_d], dim=-1) + + def get_rgb(self, idx): + image_path = self.image_paths[idx] + + with self.pmgr.open(os.path.join(self.root_dir, f"{image_path}.png"), "rb") as im_file: + img = np.array(Image.open(im_file).convert("RGBA")) + + img = cv2.resize(img, self._img_wh, interpolation=cv2.INTER_AREA) + + if self.img_wh[0] != self._img_wh[0] or self.img_wh[1] != self._img_wh[1]: + img = cv2.resize(img, self.img_wh, interpolation=cv2.INTER_AREA) + + img = self.transform(img) + img = img.view(4, -1).permute(1, 0) + img = img[:, :3] * img[:, -1:] + (1 - img[:, -1:]) + + return img + + def get_depth(self, idx): + image_path = self.image_paths[idx] + + with self.pmgr.open(os.path.join(self.root_dir, f"{image_path}_depth.npz"), "rb") as depth_file: + with np.load(depth_file) as depth: + img = depth["arr_0"].reshape(800, 800) + + # Resize + img = cv2.resize(img, self._img_wh, interpolation=cv2.INTER_NEAREST) + + if self.img_wh[0] != self._img_wh[0] or self.img_wh[1] != self._img_wh[1]: + img = cv2.resize(img, self.img_wh, interpolation=cv2.INTER_NEAREST) + + # Flip + img = np.flip(img, 0) + + # Transform + img = self.transform(np.copy(img)) + + # Return + depth = img.view(1, -1).permute(1, 0) + directions = torch.nn.functional.normalize(self.directions, p=2.0, dim=-1).view(-1, 3) + depth = depth / torch.abs(directions[..., 2:3]) + + # depth[depth < self.near] = self.near + # depth[depth > self.far] = self.far + depth[depth < self.near] = 0.0 + depth[depth > self.far] = 0.0 + + return depth + + def get_points(self, idx): + rays = self.all_coords[idx][..., :6].reshape(-1, 6) + depth = self.all_depth[idx].reshape(-1, 1) + return rays[..., :3] + rays[..., 3:6] * depth + + def get_intrinsics(self): + K = np.eye(3) + K[0, 0] = self.focal + K[0, 2] = self.img_wh[0] / 2 + K[1, 1] = self.focal + K[1, 2] = self.img_wh[1] / 2 + + return K + + def __getitem__(self, idx): + if self.split == "render": + batch = {"coords": self.get_coords(idx), "pose": self.poses[idx], "idx": idx} + + batch["weight"] = torch.ones_like(batch["coords"][..., -1:]) + + elif self.split == "test": + batch = {"coords": self.get_coords(idx), "rgb": self.get_rgb(idx), "idx": idx} + + batch["weight"] = torch.ones_like(batch["coords"][..., -1:]) + elif self.split == "val": + batch = {"coords": self.get_coords(idx), "rgb": self.get_rgb(idx), "depth": self.get_depth(idx), "idx": idx} + + batch["weight"] = torch.ones_like(batch["coords"][..., -1:]) + else: + batch = { + "inputs": self.all_inputs[idx], + } + + W, H, batch = self.crop_batch(batch) + batch["W"] = W + batch["H"] = H + + return batch diff --git a/nlf/datasets/eikonal.py b/nlf/datasets/eikonal.py new file mode 100644 index 0000000..ce4b848 --- /dev/null +++ b/nlf/datasets/eikonal.py @@ -0,0 +1,151 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +import os + +import numpy as np +import torch +from PIL import Image + +from utils.pose_utils import correct_poses_bounds, create_spiral_poses, interpolate_poses +from utils.ray_utils import get_ndc_rays_fx_fy, get_ray_directions_K, get_rays + +from .base import Base5DDataset + + +class EikonalDataset(Base5DDataset): + def __init__(self, cfg, split="train", **kwargs): + self.num_views = cfg.dataset.num_views if "num_views" in cfg.dataset else -1 + + super().__init__(cfg, split, **kwargs) + + def read_meta(self): + with self.pmgr.open(os.path.join(self.root_dir, "poses_bounds.npy"), "rb") as f: + poses_bounds = np.load(f) + + self.image_paths = sorted(self.pmgr.ls(os.path.join(self.root_dir, "images/"))) + + if self.img_wh is None: + image_path = self.image_paths[0] + + with self.pmgr.open(os.path.join(self.root_dir, "images", image_path), "rb") as im_file: + img = np.array(Image.open(im_file).convert("RGB")) + + self._img_wh = (img.shape[1] // self.downsample, img.shape[0] // self.downsample) + self.img_wh = (img.shape[1] // self.downsample, img.shape[0] // self.downsample) + self.aspect = float(self.img_wh[0]) / self.img_wh[1] + + if self.split in ["train", "val"]: + assert len(poses_bounds) == len( + self.image_paths + ), "Mismatch between number of images and number of poses! Please rerun COLMAP!" + + poses = poses_bounds[:, :15].reshape(-1, 3, 5) + self.bounds = poses_bounds[:, -2:] + + if self.num_views > 0: + poses = poses[: self.num_views] + self.image_paths = self.image_paths[: self.num_views] + + # Step 1: rescale focal length according to training resolution + H, W, self.focal = poses[0, :, -1] + self.cx, self.cy = W / 2.0, H / 2.0 + + self.K = np.eye(3) + self.K[0, 0] = self.focal * self.img_wh[0] / W + self.K[0, 2] = self.cx * self.img_wh[0] / W + self.K[1, 1] = self.focal * self.img_wh[1] / H + self.K[1, 2] = self.cy * self.img_wh[1] / H + + # Step 2: correct poses, bounds + self.poses, self.poses_avg, self.bounds = correct_poses_bounds(poses, self.bounds, center=True) + + if not self.use_ndc: + self.bounds = self.bounds / np.max(np.abs(poses[..., :3, 3])) + self.poses[..., :3, 3] = self.poses[..., :3, 3] / np.max(np.abs(poses[..., :3, 3])) + + self.near = self.bounds.min() + self.far = self.bounds.max() + + # Step 3: Ray directions for all pixels + self.centered_pixels = True + self.directions = get_ray_directions_K( + self.img_wh[1], self.img_wh[0], self.K, centered_pixels=self.centered_pixels + ) + + # Step 4: Holdout validation images + if len(self.val_set) > 0: + val_indices = self.val_set + elif self.val_skip != "inf": + self.val_skip = min(len(self.image_paths), self.val_skip) + val_indices = list(range(0, len(self.image_paths), self.val_skip)) + else: + val_indices = [] + + train_indices = [i for i in range(len(self.image_paths)) if i not in val_indices] + + if self.val_all: + val_indices = [i for i in train_indices] # noqa + + if self.split == "val" or self.split == "test": + self.image_paths = [self.image_paths[i] for i in val_indices] + self.poses = self.poses[val_indices] + elif self.split == "train": + self.image_paths = [self.image_paths[i] for i in train_indices] + self.poses = self.poses[train_indices] + + def get_intrinsics(self): + return self.K + + def to_ndc(self, rays): + return get_ndc_rays_fx_fy(self.img_wh[1], self.img_wh[0], self.K[0, 0], self.K[1, 1], self.near, rays) + + def get_coords(self, idx): + c2w = torch.FloatTensor(self.poses[idx]) + rays_o, rays_d = get_rays(self.directions, c2w) + + if self.use_ndc: + rays = self.to_ndc(torch.cat([rays_o, rays_d], dim=-1)) + + if self.include_world: + rays = torch.cat([rays, rays_o, rays_d], dim=-1) + + return rays + else: + return torch.cat([rays_o, rays_d], dim=-1) + + def get_rgb(self, idx): + # Colors + image_path = self.image_paths[idx] + + with self.pmgr.open(os.path.join(self.root_dir, "images", image_path), "rb") as im_file: + img = Image.open(im_file).convert("RGB") + + img = img.resize(self._img_wh, Image.LANCZOS) + + if self.img_wh[0] != self._img_wh[0] or self.img_wh[1] != self._img_wh[1]: + img = img.resize(self.img_wh, Image.BOX) + + img = self.transform(img) + img = img.view(3, -1).permute(1, 0) + + return img + + def prepare_render_data(self): + if not self.render_interpolate: + close_depth, inf_depth = self.bounds.min() * 0.9, self.bounds.max() * 5.0 + + dt = 0.75 + mean_dz = 1.0 / (((1.0 - dt) / close_depth + dt / inf_depth)) + focus_depth = mean_dz + + radii = np.percentile(np.abs(self.poses[:16, ..., 3]), 50, axis=0) + self.poses = create_spiral_poses(self.poses[:16], radii, focus_depth * 100) + + self.poses = np.stack(self.poses, axis=0) + self.poses[..., :3, 3] = self.poses[..., :3, 3] - 0.1 * close_depth * self.poses[..., :3, 2] + else: + self.poses = interpolate_poses(self.poses, self.render_supersample) diff --git a/nlf/datasets/fourier.py b/nlf/datasets/fourier.py new file mode 100644 index 0000000..eea868c --- /dev/null +++ b/nlf/datasets/fourier.py @@ -0,0 +1,193 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +import numpy as np +import torch +from torch.utils.data import Dataset + +from utils.ray_utils import get_lightfield_rays + + +def fft_rgb(rgb): + return torch.stack( + [ + torch.fft.fft2(rgb[..., 0], norm="ortho"), + torch.fft.fft2(rgb[..., 1], norm="ortho"), + torch.fft.fft2(rgb[..., 2], norm="ortho"), + ], + dim=-1, + ) + + +class FourierDataset(Dataset): + def __init__(self, cfg, train_dataset=None, **kwargs): + super().__init__() + + self.cfg = cfg + self.img_wh = train_dataset.img_wh + self.width, self.height = self.img_wh[0], self.img_wh[1] + self.aspect = train_dataset.aspect + self.num_images = train_dataset.num_images + self.batch_size = cfg.batch_size + + self.all_rays = torch.clone(train_dataset.all_rays) + self.all_rgb = torch.clone(train_dataset.all_rgb) + + # Prepare + self.compute_stats() + self.prepare_data() + self.shuffle() + + def compute_stats(self): + all_rays = self.all_rays.view(self.num_images, self.img_wh[1] * self.img_wh[0], -1) + ray_dim = all_rays.shape[-1] // 2 + + ## Per view statistics + self.all_means = [] + self.all_stds = [] + + for idx in range(self.num_images): + cur_rays = all_rays[idx] + self.all_means += [cur_rays.mean(0)] + self.all_stds += [cur_rays.std(0)] + + self.all_means = torch.stack(self.all_means, 0) + self.all_stds = torch.stack(self.all_stds, 0) + + ## Full dataset statistics + self.pos_mean = self.all_rays[..., :ray_dim].mean(0) + self.pos_std = self.all_rays[..., :ray_dim].std(0) + + self.dir_mean = self.all_rays[..., ray_dim:].mean(0) + self.dir_std = self.all_rays[..., ray_dim:].std(0) + + def prepare_data(self): + self.all_rays = self.all_rays.view(self.num_images, self.img_wh[1], self.img_wh[0], -1) + self.all_rgb = self.all_rgb.view(self.num_images, self.img_wh[1], self.img_wh[0], -1) + self.all_rgb_fft = fft_rgb(self.all_rgb) + self.rgb_fft_mean = self.all_rgb_fft.mean(0) + + def shuffle(self): + idx = list(np.random.choice(np.arange(0, self.num_images), size=self.num_images, replace=False)) + + self.all_rays = self.all_rays[idx] + self.all_rgb = self.all_rgb[idx] + # self.all_rgb_fft = torch.abs(self.all_rgb_fft[idx]) + self.all_rgb_fft = self.all_rgb_fft[idx] + + def __len__(self): + return len(self.all_rays) + + def __getitem__(self, idx): + return { + "rays": self.all_rays[idx], + "rgb": self.all_rgb[idx], + "mean_fft": self.rgb_fft_mean, + } + + def get_random_rays(self, ray_range): + pos_rand = ( + torch.rand( + ( + 1, + 1, + 3, + ) + ) + * 2 + - 1 + ) * ray_range.pos + pos_rand[..., 2] = 0 + + dir_rand = ( + torch.rand( + ( + self.height, + self.width, + 3, + ) + ) + * 2 + - 1 + ) * ray_range.dir + dir_rand[..., 2] = -1 + dir_rand = torch.nn.functional.normalize(dir_rand, p=2.0, dim=-1) + + pos_rand = pos_rand.repeat(self.height, self.width, 1) + + return torch.cat([pos_rand, dir_rand], -1) + + +class FourierLightfieldDataset(Dataset): + def __init__(self, cfg, train_dataset=None, **kwargs): + super().__init__() + + self.cfg = cfg + self.img_wh = train_dataset.img_wh + self.width, self.height = self.img_wh[0], self.img_wh[1] + self.aspect = train_dataset.aspect + self.num_images = train_dataset.num_images + self.batch_size = cfg.batch_size + + self.all_rays = torch.clone(train_dataset.all_rays) + self.all_rgb = torch.clone(train_dataset.all_rgb) + + # Prepare + self.compute_stats() + self.prepare_data() + self.shuffle() + + def compute_stats(self): + all_rays = self.all_rays.view(self.num_images, self.img_wh[1] * self.img_wh[0], -1) + ray_dim = all_rays.shape[-1] // 2 + + ## Per view statistics + self.all_means = [] + self.all_stds = [] + + for idx in range(self.num_images): + cur_rays = all_rays[idx] + self.all_means += [cur_rays.mean(0)] + self.all_stds += [cur_rays.std(0)] + + self.all_means = torch.stack(self.all_means, 0) + self.all_stds = torch.stack(self.all_stds, 0) + + ## Full dataset statistics + self.pos_mean = self.all_rays[..., :ray_dim].mean(0) + self.pos_std = self.all_rays[..., :ray_dim].std(0) + + self.dir_mean = self.all_rays[..., ray_dim:].mean(0) + self.dir_std = self.all_rays[..., ray_dim:].std(0) + + def prepare_data(self): + self.all_rays = self.all_rays.view(self.num_images, self.img_wh[1], self.img_wh[0], -1) + self.all_rgb = self.all_rgb.view(self.num_images, self.img_wh[1], self.img_wh[0], -1) + self.all_rgb_fft = fft_rgb(self.all_rgb) + self.rgb_fft_mean = self.all_rgb_fft.mean(0) + + def shuffle(self): + idx = list(np.random.choice(np.arange(0, self.num_images), size=self.num_images, replace=False)) + + self.all_rays = self.all_rays[idx] + self.all_rgb = self.all_rgb[idx] + # self.all_rgb_fft = torch.abs(self.all_rgb_fft[idx]) + self.all_rgb_fft = self.all_rgb_fft[idx] + + def __len__(self): + return len(self.all_rays) + + def __getitem__(self, idx): + return { + "rays": self.all_rays[idx], + "rgb": self.all_rgb[idx], + "mean_fft": self.rgb_fft_mean, + } + + def get_random_rays(self, ray_range): + pos_rand = (torch.rand((2,)) * 2 - 1) * ray_range.pos + + return get_lightfield_rays(self.width, self.height, pos_rand[0], pos_rand[1], self.aspect) diff --git a/nlf/datasets/immersive.py b/nlf/datasets/immersive.py new file mode 100644 index 0000000..f251464 --- /dev/null +++ b/nlf/datasets/immersive.py @@ -0,0 +1,687 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT + +import csv +import gc +import glob +import json +import os + +os.environ["OPENCV_IO_ENABLE_OPENEXR"] = "1" +import pdb +import random + +import cv2 +import numpy as np +import torch +import torch.nn.functional as F +import torchvision.transforms.functional as TF +from PIL import Image +from scipy.spatial.transform import Rotation +from segment_anything_hq import SamPredictor, sam_model_registry +from torchvision.utils import save_image + +from utils.pose_utils import ( + average_poses, + center_poses_with, + correct_poses_bounds, + create_rotating_spiral_poses, + create_spiral_poses, + interpolate_poses, +) +from utils.ray_utils import ( + get_ndc_rays_fx_fy, + get_pixels_for_image, + get_ray_directions_K, + get_rays, + sample_images_at_xy, +) + +from .base import Base5DDataset, Base6DDataset + + +def random_crop(image, output_size): + """ + Randomly crop an image to the specified output size and return the crop + along with its position. + + Parameters: + - image: A PIL Image or a Tensor image. + - output_size: Tuple or list of (height, width) for the output crop size. + + Returns: + - cropped_image: The cropped image. + - crop_position: The top-left corner position (x, y) of the crop. + """ + image_width, image_height = image.size if isinstance(image, Image.Image) else image.shape[-2:] + crop_height, crop_width = output_size + + if crop_width > image_width or crop_height > image_height: + raise ValueError("Crop size must be smaller than image size.") + + x = random.randint(0, image_width - crop_width) + y = random.randint(0, image_height - crop_height) + + cropped_image = TF.crop(image, x, y, crop_height, crop_width) + crop_position = (x, y) + + return cropped_image, crop_position + + +def perspective_to_fisheye(points, K, radial_distortion): + return cv2.fisheye.undistortPoints( + points[:, None], K, np.array([radial_distortion[0], radial_distortion[1], 0.0, 0.0]).astype(np.float32) + ) + + +class ImmersiveDataset(Base6DDataset): + def __init__(self, cfg, split="train", **kwargs): + self.use_reference = cfg.dataset.use_reference if "use_reference" in cfg.dataset else False + self.correct_poses = cfg.dataset.correct_poses if "correct_poses" in cfg.dataset else False + + self.use_ndc = cfg.dataset.use_ndc if "use_ndc" in cfg.dataset else False + + self.num_frames = cfg.dataset.num_frames if "num_frames" in cfg.dataset else 1 + self.start_frame = cfg.dataset.start_frame if "start_frame" in cfg.dataset else 1 + self.keyframe_step = cfg.dataset.keyframe_step if "keyframe_step" in cfg.dataset else 1 + self.num_keyframes = ( + cfg.dataset.num_keyframes if "num_keyframes" in cfg.dataset else self.num_frames // self.keyframe_step + ) + + self.load_full_step = cfg.dataset.load_full_step if "load_full_step" in cfg.dataset else 1 + self.subsample_keyframe_step = ( + cfg.dataset.subsample_keyframe_step if "subsample_keyframe_step" in cfg.dataset else 1 + ) + self.subsample_keyframe_frac = ( + cfg.dataset.subsample_keyframe_frac if "subsample_keyframe_frac" in cfg.dataset else 1.0 + ) + self.subsample_frac = cfg.dataset.subsample_frac if "subsample_frac" in cfg.dataset else 1.0 + + self.keyframe_offset = 0 + self.frame_offset = 0 + + super().__init__(cfg, split, **kwargs) + + def read_meta(self): + W, H = self.img_wh + + # Load meta + with self.pmgr.open(os.path.join(self.root_dir, "models.json"), "r") as f: + self.meta = json.load(f) + + # Populate vars + self.video_paths = [] + self.intrinsics = [] + self.distortions = [] + self.poses = [] + + for idx, camera in enumerate(self.meta): + + # DEBUGGING + # if idx >= 2: + # break + + # Path + self.video_paths.append(os.path.join(self.root_dir, camera["name"] + ".mp4")) + + # Intrinsics + width_factor = self.img_wh[0] / 2560.0 + height_factor = self.img_wh[1] / 1920.0 + + K = np.eye(3) + K = np.array( + [ + [camera["focal_length"] * width_factor, 0.0, camera["principal_point"][0] * width_factor], + [0.0, camera["focal_length"] * height_factor, camera["principal_point"][1] * height_factor], + [0.0, 0.0, 1.0], + ] + ) + + self.intrinsics.append(K) + + # Distortion + radial_distortion = np.array(camera["radial_distortion"]) + self.distortions.append(radial_distortion[:2]) + + # Pose + R = Rotation.from_rotvec(camera["orientation"]).as_matrix() + T = np.array(camera["position"]) + + pose = np.eye(4) + pose[:3, :3] = R.T + pose[:3, -1] = T + + pose_pre = np.eye(4) + pose_pre[1, 1] *= -1 + pose_pre[2, 2] *= -1 + pose = pose_pre @ pose @ pose_pre + + if camera["name"] == "camera_0001": + val_idx = idx + center_pose = pose[None, :3, :4] + + self.poses.append(pose[:3, :4]) + + self.images_per_frame = len(self.video_paths) + self.total_num_views = len(self.video_paths) + self.intrinsics = np.stack([self.intrinsics for i in range(self.num_frames)]).reshape(-1, 3, 3) + self.distortions = np.stack([self.distortions for i in range(self.num_frames)]).reshape(-1, 2) + self.poses = np.stack([self.poses for i in range(self.num_frames)]).reshape(-1, 3, 4) + self.K = self.intrinsics[0] + + # Times + self.times = np.tile(np.linspace(0, 1, self.num_frames)[..., None], (1, self.images_per_frame)) + self.times = self.times.reshape(-1) + + self.camera_ids = np.tile( + np.linspace(0, self.images_per_frame - 1, self.images_per_frame)[None, :], (self.num_frames, 1) + ) + self.camera_ids = self.camera_ids.reshape(-1) + + ## Bounds, common for all scenes + if self.dataset_cfg.collection in ["01_Welder"]: + self.near = 0.25 + self.far = 6.0 + self.bounds = np.array([self.near, self.far]) + self.depth_range = np.array([self.near * 2.0, self.far]) + + if self.dataset_cfg.collection in ["02_Flames"]: + self.near = 1.0 + self.far = 10.0 + self.bounds = np.array([self.near, self.far]) + self.depth_range = np.array([self.near * 2.0, self.far]) + if self.dataset_cfg.collection in ["04_Truck"]: + self.near = 0.5 + self.far = 10.0 + self.bounds = np.array([self.near, self.far]) + self.depth_range = np.array([self.near * 2.0, self.far]) + elif self.dataset_cfg.collection in ["05_Horse"]: + self.near = 0.5 + self.far = 45.0 + self.bounds = np.array([self.near, self.far]) + self.depth_range = np.array([self.near * 2.0, self.far]) + elif self.dataset_cfg.collection in ["07_Car"]: + self.near = 0.5 + self.far = 50.0 + self.bounds = np.array([self.near, self.far]) + self.depth_range = np.array([self.near * 2.0, self.far]) + elif self.dataset_cfg.collection in ["09_Alexa_Meade_Exhibit"]: + self.near = 0.5 + self.far = 30.0 + self.bounds = np.array([self.near, self.far]) + self.depth_range = np.array([self.near * 2.0, self.far]) + elif self.dataset_cfg.collection in ["10_Alexa_Meade_Face_Paint_1"]: + self.near = 0.25 + self.far = 6.0 + self.bounds = np.array([self.near, self.far]) + self.depth_range = np.array([0.5, self.far]) + elif self.dataset_cfg.collection in ["11_Alexa_Meade_Face_Paint_2"]: + self.near = 0.25 + self.far = 6.0 + self.bounds = np.array([self.near, self.far]) + self.depth_range = np.array([0.5, self.far]) + elif self.dataset_cfg.collection in ["12_Cave"]: + self.near = 0.5 + self.far = 20.0 + self.bounds = np.array([self.near, self.far]) + self.depth_range = np.array([self.near * 2.0, self.far]) + else: + self.near = 0.5 + self.far = 10.0 + self.bounds = np.array([self.near, self.far]) + self.depth_range = np.array([self.near * 2.0, self.far]) + + ## Correct poses, bounds + poses = np.copy(self.poses) + + if self.use_ndc or self.correct_poses: + self.poses, self.poses_avg = center_poses_with(poses, center_pose) + + self.near = self.bounds.min() * 0.95 + self.far = self.bounds.max() * 1.05 + + ## Holdout validation images + val_indices = [] + + if len(self.val_set) > 0: + val_indices += [frame * self.images_per_frame + val_idx for frame in range(self.num_frames)] + + train_indices = [i for i in range(len(self.poses)) if i not in val_indices] + + if self.val_all: + val_indices = [i for i in train_indices] # noqa + + if self.split == "val" or self.split == "test": + if not self.val_all and len(self.val_set) > 0: + self.video_paths = [self.video_paths[val_idx]] + + self.intrinsics = self.intrinsics[val_indices] + self.camera_ids = self.camera_ids[val_indices] + self.distortions = self.distortions[val_indices] + self.poses = self.poses[val_indices] + self.times = self.times[val_indices] + elif self.split == "train": + if not self.val_all and len(self.val_set) > 0: + self.video_paths = [self.video_paths[i] for i in range(len(self.video_paths)) if i != val_idx] + + self.intrinsics = self.intrinsics[train_indices] + self.camera_ids = self.camera_ids[train_indices] + self.distortions = self.distortions[train_indices] + self.poses = self.poses[train_indices] + self.times = self.times[train_indices] + + self.num_images = len(self.poses) + self.images_per_frame = len(self.video_paths) + + def random_subsample(self, coords, rgb, last_rgb, frame, fac=1.0): + if (frame % self.load_full_step) == 0: + return coords, rgb + elif (frame % self.subsample_keyframe_step) == 0: + num_take = int(np.round(coords.shape[0] * self.subsample_keyframe_frac * fac)) + perm = torch.tensor(np.random.permutation(coords.shape[0]))[:num_take] + else: + num_take = int(np.round(coords.shape[0] * self.subsample_frac * fac)) + perm = torch.tensor(np.random.permutation(coords.shape[0]))[:num_take] + + return coords[perm].view(-1, coords.shape[-1]), rgb[perm].view(-1, rgb.shape[-1]) + + def regular_subsample(self, coords, rgb, last_rgb, frame, fac=1.0): + if (frame % self.load_full_step) == 0: + return coords, rgb + elif (frame % self.subsample_keyframe_step) == 0: + subsample_every = int(np.round(1.0 / (self.subsample_keyframe_frac * fac))) + offset = self.keyframe_offset + self.keyframe_offset += 1 + else: + subsample_every = int(np.round(1.0 / (self.subsample_frac * fac))) + offset = self.frame_offset + self.frame_offset += 1 + + pixels = get_pixels_for_image(self.img_wh[1], self.img_wh[0]).reshape(-1, 2).long() + mask = ((pixels[..., 0] + pixels[..., 1] + offset) % subsample_every) == 0.0 + + return coords[mask].view(-1, coords.shape[-1]), rgb[mask].view(-1, rgb.shape[-1]) + + def test_subsample(self, coords, rgb, last_rgb, frame): + mask = coords[..., 5] < -0.25 + return coords[mask].view(-1, coords.shape[-1]), rgb[mask].view(-1, rgb.shape[-1]) + + def importance_subsample(self, coords, rgb, last_rgb, frame, fac=1.0): + if (frame % self.load_full_step) == 0: + return coords, rgb + + diff = torch.abs(rgb - last_rgb).mean(-1) + diff_sorted, _ = torch.sort(diff) + + if (frame % self.subsample_keyframe_step) == 0: + num_take = int(np.round(coords.shape[0] * self.subsample_keyframe_frac * fac)) + else: + num_take = int(np.round(coords.shape[0] * self.subsample_frac * fac)) + + mask = diff > diff_sorted[-num_take] + mask = mask & (coords[..., 5] < -0.05) + + return coords[mask].view(-1, coords.shape[-1]), rgb[mask].view(-1, rgb.shape[-1]) + + def subsample(self, coords, rgb, last_rgb, frame): + # coords, rgb = self.regular_subsample(coords, rgb, last_rgb, frame) + # return coords, rgb + + if (frame % self.load_full_step) == 0: + return coords, rgb + else: + coords, rgb = self.importance_subsample(coords, rgb, last_rgb, frame) + + return coords, rgb + + def prepare_train_data(self): + sam_checkpoint = "pre_trained/sam_hq_vit_h.pth" # Update this path to your checkpoint + model_type = "vit_h" + sam = sam_model_registry[model_type](checkpoint=sam_checkpoint) + sam.to("cuda") + predictor = SamPredictor(sam) + + ## Collect training data + self.all_coords = [] + self.all_rgb = [] + self.all_sam_coords = [] + self.all_sam = [] + num_pixels = 0 + last_rgb_full = None + + for video_idx in range(len(self.video_paths)): + self.keyframe_offset = video_idx + self.frame_offset = video_idx + + # Open video + cam = cv2.VideoCapture(self.video_paths[video_idx]) + # Get coords + video_coords = self.get_coords(video_idx) + + ctr = 0 + frame_idx = 0 + + while ctr < self.start_frame + self.num_frames: + _, frame = cam.read() + if ctr < self.start_frame: + ctr += 1 + continue + else: + ctr += 1 + + cur_time = self.times[frame_idx * self.images_per_frame + video_idx] + cur_frame = int( + np.round(self.times[frame_idx * self.images_per_frame + video_idx] * (self.num_frames - 1)) + ) + + # Coords + cur_coords = torch.cat([video_coords[..., :-1], torch.ones_like(video_coords[..., -1:]) * cur_time], -1) + + # Get RGB + crop_size = 960 + cur_rgb_full, img_cropped, features_coords = self.get_rgb_sam(frame, cur_coords, crop_size) + predictor.set_image((img_cropped * 255).permute(1, 2, 0).numpy().astype(np.uint8)) + features = predictor.features + + features = features / 2 + 0.5 + features = features.squeeze() # torch.Size([256, 64, 64]) + # features = F.interpolate(features.unsqueeze(0), size=(crop_size, crop_size), mode='bilinear').squeeze(0) + features_coords = F.interpolate(features_coords.unsqueeze(0), size=(64, 64), mode="bilinear").squeeze(0) + + # Subsample + if frame_idx == 0: + cur_rgb = cur_rgb_full + else: + cur_coords, cur_rgb = self.subsample(cur_coords, cur_rgb_full, last_rgb_full, cur_frame) + + cur_feature_coords = features_coords.permute(1, 2, 0).reshape(64 * 64, -1) + cur_feature = features.permute(1, 2, 0).reshape(64 * 64, -1).cpu() + + # Save for later + last_rgb_full = cur_rgb_full + self.all_coords += [cur_coords] + self.all_rgb += [cur_rgb] + self.all_sam_coords += [cur_feature_coords] + self.all_sam += [cur_feature] + + # Number of pixels + num_pixels += cur_rgb.shape[0] + + print(f"Video {video_idx} frame {frame_idx}") + print("Full res images loaded:", num_pixels / (self.img_wh[0] * self.img_wh[1])) + + # Increment frame idx + frame_idx += 1 + + cam.release() + del sam + del predictor + + # Format / save loaded data + self.all_coords = torch.cat(self.all_coords, 0) + self.all_rgb = torch.cat(self.all_rgb, 0) + self.all_sam_coords = torch.cat(self.all_sam_coords, 0) + self.all_sam = torch.cat(self.all_sam, 0) + + self.update_all_data() + + def update_all_data(self): + ## All inputs + self.all_inputs = torch.cat( + [ + self.all_coords, + self.all_rgb, + ], + -1, + ) + self.all_inputs_sam = torch.cat( + [ + self.all_sam_coords, + self.all_sam, + ], + -1, + ) + + def format_batch(self, batch): + batch["coords"] = batch["inputs"][..., : self.all_coords.shape[-1]] + batch["rgb"] = batch["inputs"][..., self.all_coords.shape[-1] : self.all_coords.shape[-1] + 3] + batch["coords_sam"] = batch["inputs_sam"][..., : self.all_sam_coords.shape[-1]] + batch["sam"] = batch["inputs_sam"][..., self.all_sam_coords.shape[-1] : self.all_sam_coords.shape[-1] + 256] + del batch["inputs"] + del batch["inputs_sam"] + + return batch + + def prepare_render_data(self): + if os.path.exists(self.cfg.params.input_pose): + with open(self.cfg.params.input_pose, "r") as json_file: + pose_time_list = json.load(json_file) + self.times = [pose_time["time"] for pose_time in pose_time_list] + self.poses = [pose_time["pose"] for pose_time in pose_time_list] + self.num_frames = len(self.times) + + else: + # Get poses + if not self.render_interpolate: + close_depth, inf_depth = self.bounds.min() * 0.9, self.bounds.max() * 5.0 + + dt = 0.75 + mean_dz = 1.0 / (((1.0 - dt) / close_depth + dt / inf_depth)) + focus_depth = mean_dz + + poses_per_frame = self.poses.shape[0] // self.num_frames + poses_one_frame = self.poses[ + (self.num_frames // 2) * poses_per_frame : (self.num_frames // 2 + 1) * poses_per_frame + ] + poses_each_frame = interpolate_poses(self.poses[::poses_per_frame], self.render_supersample) + radii = np.percentile(np.abs(poses_one_frame[..., 3]), 50, axis=0) + radii[..., :-1] *= 1.0 + radii[..., -1] *= 0.05 + + if self.num_frames > 1: + poses = create_spiral_poses( + poses_one_frame, + radii, + focus_depth, + N=self.num_frames * self.render_supersample, + ) + + reference_pose = np.eye(4) + reference_pose[:3, :4] = self.poses[(self.num_frames // 2) * poses_per_frame] + reference_pose = np.linalg.inv(reference_pose) + + for pose_idx in range(len(poses)): + cur_pose = np.eye(4) + cur_pose[:3, :4] = poses[pose_idx] + poses[pose_idx] = poses_each_frame[pose_idx] @ (reference_pose @ cur_pose) + else: + poses = create_spiral_poses(poses_one_frame, radii, focus_depth * 100, N=120) + + self.poses = np.stack(poses, axis=0) + else: + self.poses = interpolate_poses(self.poses, self.render_supersample) + + # Get times + if (self.num_frames - 1) > 0: + self.times = np.linspace(0, self.num_frames - 1, len(self.poses)) + + if not self.render_interpolate_time: + self.times = np.round(self.times) + + self.times = self.times / (self.num_frames - 1) + else: + self.times = [0.0 for p in self.poses] + + def to_ndc(self, rays): + return get_ndc_rays_fx_fy(self.img_wh[1], self.img_wh[0], self.K[0, 0], self.K[1, 1], self.near, rays) + + def get_coords(self, idx): + if self.split != "train" or self.split == "render": + camera_id = 1 + else: + camera_id = self.camera_ids[idx] + + if self.split != "render": + K = torch.FloatTensor(self.intrinsics[idx]) + distortion = self.distortions[idx] + else: + K = torch.FloatTensor(self.intrinsics[0]) + K[0, 0] *= 0.75 + K[1, 1] *= 0.75 + distortion = None + + c2w = torch.FloatTensor(self.poses[idx]) + time = self.times[idx] + + print("Loading time:", np.round(time * (self.num_frames - 1))) + + # Undistort + if distortion is not None: + directions = get_ray_directions_K(self.img_wh[1], self.img_wh[0], K, centered_pixels=True).view(-1, 3) + directions = perspective_to_fisheye( + np.array(directions[..., :2]).astype(np.float32), + np.eye(3).astype(np.float32), + distortion.astype(np.float32), + )[:, 0] + directions = np.concatenate( + [directions[..., 0:1], directions[..., 1:2], -np.ones_like(directions[..., -1:])], -1 + ) + + directions = torch.tensor(directions) + directions = torch.nn.functional.normalize(directions, dim=-1) + else: + directions = get_ray_directions_K(self.img_wh[1], self.img_wh[0], K, centered_pixels=True).view(-1, 3) + + # Convert to world space + rays_o, rays_d = get_rays(directions, c2w) + + # Convert to NDC + if self.use_ndc: + rays = self.to_ndc(torch.cat([rays_o, rays_d], dim=-1)) + else: + rays = torch.cat([rays_o, rays_d], dim=-1) + + # Add camera idx + rays = torch.cat([rays, torch.ones_like(rays[..., :1]) * camera_id], dim=-1) + + # Add times + rays = torch.cat([rays, torch.ones_like(rays[..., :1]) * time], dim=-1) + + # Return + return rays + + def get_rgb(self, img): + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + + if img.shape[0] != self._img_wh[0] or img.shape[1] != self._img_wh[1]: + img = cv2.resize(img, self._img_wh, cv2.INTER_LANCZOS4) + + if self.img_wh[0] != self._img_wh[0] or self.img_wh[1] != self._img_wh[1]: + img = cv2.resize(img, self.img_wh, cv2.INTER_AREA) + + img = self.transform(img) + img = img.view(3, -1).permute(1, 0) + + return img + + def get_rgb_sam(self, img, coords, crop_size): + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + + if img.shape[0] != self._img_wh[0] or img.shape[1] != self._img_wh[1]: + img = cv2.resize(img, self._img_wh, cv2.INTER_LANCZOS4) + + if self.img_wh[0] != self._img_wh[0] or self.img_wh[1] != self._img_wh[1]: + img = cv2.resize(img, self.img_wh, cv2.INTER_AREA) + + img = self.transform(img) + coords = coords.permute(1, 0).view(-1, img.shape[1], img.shape[2]) + + img_coords = torch.cat([img, coords], 0) + + img_coords_cropped, crop_position = random_crop(img_coords, (crop_size, crop_size)) + img_cropped = img_coords_cropped[:3] + coords_cropped = img_coords_cropped[3:] + + img = img.view(3, -1).permute(1, 0) + + return img, img_cropped, coords_cropped + + def get_rgb_one(self, idx): + # Open video + cam = cv2.VideoCapture(self.video_paths[idx % self.images_per_frame]) + + # Get RGB + ctr = 0 + frame_idx = 0 + + while ctr < self.start_frame + self.num_frames: + _, frame = cam.read() + + if ctr < self.start_frame: + ctr += 1 + continue + else: + ctr += 1 + + if frame_idx != (idx // self.images_per_frame): + frame_idx += 1 + continue + else: + rgb = self.get_rgb(frame) + break + + cam.release() + return rgb + + def get_intrinsics(self): + return self.intrinsics + + def __getitem__(self, idx): + if self.split == "render": + batch = { + "coords": self.get_coords(idx), + "pose": self.poses[idx], + "time": self.times[idx], + "idx": idx, + } + + batch["weight"] = torch.ones_like(batch["coords"][..., -1:]) + + elif self.split == "test": + batch = { + "coords": self.get_coords(idx), + "rgb": self.get_rgb_one(idx), + "idx": idx, + } + + batch["weight"] = torch.ones_like(batch["coords"][..., -1:]) + elif self.split == "val": + batch = { + "coords": self.get_coords(idx), + "rgb": self.get_rgb_one(idx), + "idx": idx, + } + + batch["weight"] = torch.ones_like(batch["coords"][..., -1:]) + else: + if isinstance(self.all_inputs_sam, list) or isinstance(self.all_inputs_sam, np.ndarray): + length = len(self.all_inputs_sam) + elif isinstance(self.all_inputs_sam, torch.Tensor): + length = self.all_inputs_sam.size(0) + else: + raise TypeError("Unsupported data type for self.all_inputs_sam") + + batch = { + "inputs": self.all_inputs[idx], + "inputs_sam": self.all_inputs_sam[idx % length], + } + + W, H, batch = self.crop_batch(batch) + batch["W"] = W + batch["H"] = H + + return batch diff --git a/nlf/datasets/lightfield.py b/nlf/datasets/lightfield.py new file mode 100644 index 0000000..6bbcdfa --- /dev/null +++ b/nlf/datasets/lightfield.py @@ -0,0 +1,393 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +import numpy as np +import torch + +from utils.ray_utils import get_lightfield_rays + +from .base import Base5DDataset, BaseDataset + + +class LightfieldDataset(Base5DDataset): + def __init__(self, cfg, split="train", **kwargs): + + ## Dataset cfg + self.cfg = cfg + self.split = getattr(cfg.dataset, "split", split) + self.dataset_cfg = getattr(cfg.dataset, self.split, cfg.dataset) + + ## Param + + # Lightfield params + self.rows = self.dataset_cfg.lightfield.rows + self.cols = self.dataset_cfg.lightfield.cols + self.step = self.dataset_cfg.lightfield.step + + self.start_row = self.dataset_cfg.lightfield.start_row if "start_row" in self.dataset_cfg.lightfield else 0 + self.end_row = self.dataset_cfg.lightfield.end_row if "end_row" in self.dataset_cfg.lightfield else self.rows + + self.start_col = self.dataset_cfg.lightfield.start_col if "start_col" in self.dataset_cfg.lightfield else 0 + self.end_col = self.dataset_cfg.lightfield.end_col if "end_col" in self.dataset_cfg.lightfield else self.cols + + self.st_scale = self.dataset_cfg.lightfield.st_scale if "st_scale" in self.dataset_cfg.lightfield else 1.0 + self.uv_scale = self.dataset_cfg.lightfield.uv_scale if "uv_scale" in self.dataset_cfg.lightfield else 1.0 + + if self.step > 1: + self.num_rows = (self.end_row - self.start_row) // self.step + 1 + self.num_cols = (self.end_col - self.start_col) // self.step + 1 + else: + self.num_rows = (self.end_row - self.start_row) // self.step + self.num_cols = (self.end_col - self.start_col) // self.step + + self.num_images = self.num_rows * self.num_cols + + self.near = 0 + self.far = 1 + self.near_plane = self.dataset_cfg.lightfield.near if "near" in self.dataset_cfg.lightfield else -1.0 + self.far_plane = self.dataset_cfg.lightfield.far if "far" in self.dataset_cfg.lightfield else 0.0 + + # Validation and testing + self.val_all = (self.dataset_cfg.val_all if "val_all" in self.dataset_cfg else False) or self.step == 1 + self.val_pairs = self.dataset_cfg.val_pairs if "val_pairs" in self.dataset_cfg else [] + + if len(self.val_pairs) > 0: + self.val_pairs = list(zip(self.val_pairs[::2], self.val_pairs[1::2])) + self.num_test_images = len(self.val_pairs) + elif self.val_all: + self.num_test_images = (self.end_row - self.start_row) * (self.end_col - self.start_col) + else: + self.num_test_images = (self.end_row - self.start_row) * (self.end_col - self.start_col) - self.num_images + + # Render params + self.disp_row = self.dataset_cfg.lightfield.disp_row + self.supersample = self.dataset_cfg.lightfield.supersample + self.keyframe_step = ( + self.dataset_cfg.lightfield.keyframe_step if "keyframe_step" in self.dataset_cfg.lightfield else -1 + ) + self.keyframe_subsample = ( + self.dataset_cfg.lightfield.keyframe_subsample if "keyframe_subsample" in self.dataset_cfg.lightfield else 1 + ) + + self.render_spiral = ( + self.dataset_cfg.render_params.spiral if "spiral" in self.dataset_cfg.render_params else False + ) + self.render_far = self.dataset_cfg.render_params.far if "far" in self.dataset_cfg.render_params else False + + self.spiral_rad = ( + self.dataset_cfg.render_params.spiral_rad if "spiral_rad" in self.dataset_cfg.render_params else 0.5 + ) + self.uv_downscale = ( + self.dataset_cfg.render_params.uv_downscale if "uv_downscale" in self.dataset_cfg.render_params else 0.0 + ) + + if "vis_st_scale" in self.dataset_cfg.lightfield: + self.vis_st_scale = ( + self.dataset_cfg.lightfield.vis_st_scale + if self.dataset_cfg.lightfield.vis_st_scale is not None + else self.st_scale + ) + else: + self.vis_st_scale = self.st_scale + + if "vis_uv_scale" in self.dataset_cfg.lightfield: + self.vis_uv_scale = ( + self.dataset_cfg.lightfield.vis_uv_scale + if self.dataset_cfg.lightfield.vis_uv_scale is not None + else self.uv_scale + ) + else: + self.vis_uv_scale = self.uv_scale + + super().__init__(cfg, split, val_all=self.val_all, **kwargs) + + self.poses = [self.get_coord(st_idx) for st_idx in self.all_st_idx] + + def read_meta(self): + pass + + def prepare_train_data(self): + self.all_coords = [] + self.all_rgb = [] + self.all_st_idx = [] + + for t_idx in range(self.start_row, self.end_row, self.step): + for s_idx in range(self.start_col, self.end_col, self.step): + if (s_idx, t_idx) in self.val_pairs: + continue + + # Rays + self.all_coords += [self.get_coords(s_idx, t_idx)] + + idx = t_idx * self.cols + s_idx + image_path = self.image_paths[idx] + + print(image_path) + print(self.all_coords[0][0]) + exit() + + # Color + self.all_rgb += [self.get_rgb(s_idx, t_idx)] + + # Random subsample for frames that are not keyframes + # TODO: Re-do every N iterations + if self.keyframe_step != -1 and self.keyframe_subsample != 1: + num_take = self.all_coords[-1].shape[0] // self.keyframe_subsample + + if (s_idx % self.keyframe_step != 0) or (t_idx % self.keyframe_step != 0): + perm = torch.tensor(np.random.permutation(self.all_coords[-1].shape[0]))[:num_take] + + self.all_coords[-1] = self.all_coords[-1][perm].view(-1, 6) + self.all_rgb[-1] = self.all_rgb[-1][perm].view(-1, 3) + + # Pose + self.all_st_idx.append((s_idx, t_idx)) + + self.all_coords = torch.cat(self.all_coords, 0) + self.all_rgb = torch.cat(self.all_rgb, 0) + self.all_weights = self.get_weights() + self.all_inputs = torch.cat([self.all_coords, self.all_rgb, self.all_weights], -1) + + def prepare_val_data(self): + self.prepare_test_data() + + def prepare_test_data(self): + self.all_st_idx = [] + + for t_idx in range(self.start_row, self.end_row, 1): + for s_idx in range(self.start_col, self.end_col, 1): + if len(self.val_pairs) == 0: + if (t_idx % self.step) == 0 and (s_idx % self.step) == 0 and not self.val_all: + continue + elif (s_idx, t_idx) not in self.val_pairs: + continue + + self.all_st_idx.append((s_idx, t_idx)) + + def prepare_render_data(self): + if not self.render_spiral: + self.all_st_idx = [] + t_idx = self.disp_row + + for s_idx in range(self.cols * self.supersample): + self.all_st_idx.append((s_idx / self.supersample, t_idx)) + else: + N = 120 + rots = 2 + scale = self.spiral_rad + + self.all_st_idx = [] + + for theta in np.linspace(0.0, 2.0 * np.pi * rots, N + 1)[:-1]: + s = (np.cos(theta) * scale + 1) / 2.0 * (self.cols - 1) + t = -np.sin(theta) * scale / 2.0 * (self.rows - 1) + ((self.rows - 1) - self.disp_row) + + self.all_st_idx.append((s, t)) + + def get_coord(self, st_idx): + s = (st_idx[0] / (self.cols - 1)) * 2 - 1 if self.cols > 1 else 0 + t = -(((st_idx[1] / (self.rows - 1)) * 2 - 1) if self.rows > 1 else 0) + + return (s, t) + + def get_coords(self, s_idx, t_idx): + if self.split == "render": + st_scale = self.vis_st_scale + uv_scale = self.vis_uv_scale + else: + st_scale = self.st_scale + uv_scale = self.uv_scale + + s, t = self.get_coord((s_idx, t_idx)) + + if self.render_spiral or self.render_far: + return get_lightfield_rays( + self.img_wh[0], + self.img_wh[1], + s, + t, + self.aspect, + st_scale=st_scale, + uv_scale=uv_scale, + near=self.near_plane, + far=self.far_plane, + use_inf=True, + center_u=-s * self.uv_downscale, + center_v=-t * self.uv_downscale, + ) + else: + return get_lightfield_rays( + self.img_wh[0], + self.img_wh[1], + s, + t, + self.aspect, + near=self.near_plane, + far=self.far_plane, + st_scale=st_scale, + uv_scale=uv_scale, + ) + + def get_rgb(self, s_idx, t_idx): + pass + + def get_closest_rgb(self, query_st): + W = self.img_wh[0] + H = self.img_wh[1] + + images = self.all_rgb.view(self.num_images, H, W, -1) + dists = np.linalg.norm(np.array(self.poses) - np.array(query_st)[None], axis=-1) + return images[list(np.argsort(dists))[0]] + + def __len__(self): + if self.split == "train": + return len(self.all_coords) + elif self.split == "val": + return min(self.val_num, self.num_test_images) + elif self.split == "render": + if not self.render_spiral: + return self.supersample * self.cols + else: + return 120 + else: + return self.num_test_images + + def __getitem__(self, idx): + if self.split == "render": + s_idx, t_idx = self.all_st_idx[idx] + + batch = { + "coords": LightfieldDataset.get_coords(self, s_idx, t_idx), + "pose": self.poses[idx], + "idx": idx, + "s_idx": s_idx, + "t_idx": t_idx, + } + + elif self.split == "val" or self.split == "test": + s_idx, t_idx = self.all_st_idx[idx] + + batch = { + "coords": self.get_coords(s_idx, t_idx), + "rgb": self.get_rgb(s_idx, t_idx), + "idx": idx, + "s_idx": s_idx, + "t_idx": t_idx, + } + else: + batch = { + "inputs": self.all_inputs[idx], + } + + W, H, batch = self.crop_batch(batch) + batch["W"] = W + batch["H"] = H + + return batch + + +class EPIDataset(BaseDataset): + def __init__(self, cfg, split="train", **kwargs): + + ## Dataset cfg + self.cfg = cfg + self.split = getattr(cfg.dataset, "split", split) + self.dataset_cfg = getattr(cfg.dataset, self.split, cfg.dataset) + + # Lightfield params + self.st_scale = self.dataset_cfg.lightfield.st_scale if "st_scale" in self.dataset_cfg.lightfield else 1.0 + self.supersample = self.dataset_cfg.lightfield.supersample if self.split == "render" else 1 + + super().__init__(cfg, split, **kwargs) + + def read_meta(self): + pass + + def prepare_train_data(self): + self.all_coords = [] + self.all_rgb = [] + + # Rays + self.all_coords += [self.get_coords()] + + # Color + self.all_rgb += [self.get_rgb()] + + # Stack + self.all_coords = torch.cat(self.all_coords, 0) + self.all_rgb = torch.cat(self.all_rgb, 0) + self.all_weights = self.get_weights() + + self.all_inputs = torch.cat([self.all_coords, self.all_rgb, self.all_weights], -1) + + def prepare_val_data(self): + self.prepare_test_data() + + def prepare_test_data(self): + self.prepare_train_data() + + def prepare_render_data(self): + self.all_coords = [] + self.all_rgb = [] + + # Rays + self.all_coords += [self.get_coords()] + + # Color + self.all_rgb += [self.get_rgb()] + + # Stack + self.all_coords = torch.cat(self.all_coords, 0) + self.all_rgb = torch.cat(self.all_rgb, 0) + self.all_weights = self.get_weights() + + def get_coords(self): + u = torch.linspace(-1, 1, self.img_wh[0], dtype=torch.float32) + s = torch.linspace(-1, 1, self.img_wh[1] * self.supersample, dtype=torch.float32) * self.st_scale + su = list(torch.meshgrid([s, u])) + return torch.stack(su, -1).view(-1, 2) + + def get_rgb(self): + # TODO: return single image + pass + + def get_closest_rgb(self, query_st): + pass + + def __len__(self): + if self.split == "train": + return len(self.all_coords) + elif self.split == "val": + return 1 + elif self.split == "render": + return 1 + else: + return 1 + + def __getitem__(self, idx): + if self.split == "render": + batch = { + "coords": self.get_coords(), + } + + elif self.split == "val" or self.split == "test": + batch = { + "coords": self.get_coords(), + "rgb": self.get_rgb(), + } + else: + batch = { + "inputs": self.all_inputs[idx], + } + + W, H, batch = self.crop_batch(batch) + batch["W"] = W + batch["H"] = H + + if self.split == "render": + batch["H"] *= self.supersample + + return batch diff --git a/nlf/datasets/llff.py b/nlf/datasets/llff.py new file mode 100644 index 0000000..c6992e1 --- /dev/null +++ b/nlf/datasets/llff.py @@ -0,0 +1,212 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +import os + +import numpy as np +import torch +from PIL import Image + +from utils.pose_utils import correct_poses_bounds +from utils.ray_utils import get_ndc_rays_fx_fy, get_ray_directions_K, get_rays + +from .base import Base5DDataset + + +class LLFFDataset(Base5DDataset): + def __init__(self, cfg, split="train", **kwargs): + super().__init__(cfg, split, **kwargs) + + def read_meta(self): + with self.pmgr.open(os.path.join(self.root_dir, "poses_bounds.npy"), "rb") as f: + poses_bounds = np.load(f) + + self.image_paths = sorted(self.pmgr.ls(os.path.join(self.root_dir, "images/"))) + self.camera_ids = np.linspace(0, len(self.image_paths) - 1, len(self.image_paths)) + self.total_num_views = len(self.image_paths) + + if self.img_wh is None: + image_path = self.image_paths[0] + + with self.pmgr.open(os.path.join(self.root_dir, "images", image_path), "rb") as im_file: + img = np.array(Image.open(im_file).convert("RGB")) + + self._img_wh = (img.shape[1] // self.downsample, img.shape[0] // self.downsample) + self.img_wh = (img.shape[1] // self.downsample, img.shape[0] // self.downsample) + self.aspect = float(self.img_wh[0]) / self.img_wh[1] + + if self.split in ["train", "val"]: + assert len(poses_bounds) == len( + self.image_paths + ), "Mismatch between number of images and number of poses! Please rerun COLMAP!" + + poses = poses_bounds[:, :15].reshape(-1, 3, 5) + self.bounds = poses_bounds[:, -2:] + + # Step 1: rescale focal length according to training resolution + H, W, self.focal = poses[0, :, -1] + self.cx, self.cy = W / 2.0, H / 2.0 + + self.K = np.eye(3) + self.K[0, 0] = self.focal * self.img_wh[0] / W + self.K[0, 2] = self.cx * self.img_wh[0] / W + self.K[1, 1] = self.focal * self.img_wh[1] / H + self.K[1, 2] = self.cy * self.img_wh[1] / H + + # Step 2: correct poses, bounds + self.poses, self.poses_avg, self.bounds = correct_poses_bounds(poses, self.bounds) + + if not self.use_ndc: + self.bounds = self.bounds / np.max(np.abs(poses[..., :3, 3])) + + self.near = self.bounds.min() * 0.95 + self.far = self.bounds.max() * 1.05 + self.depth_range = np.array([self.near * 2.0, self.far]) + + # Step 3: Ray directions for all pixels + self.directions = get_ray_directions_K(self.img_wh[1], self.img_wh[0], self.K, centered_pixels=True) + + # Step 4: Holdout validation images + if len(self.val_set) > 0: + val_indices = self.val_set + elif self.val_skip != "inf": + self.val_skip = min(len(self.image_paths), self.val_skip) + val_indices = list(range(0, len(self.image_paths), self.val_skip)) + else: + val_indices = [] + + train_indices = [i for i in range(len(self.image_paths)) if i not in val_indices] + + if self.val_all: + val_indices = [i for i in train_indices] # noqa + + if self.split == "val" or self.split == "test": + self.image_paths = [self.image_paths[i] for i in val_indices] + self.camera_ids = self.camera_ids[val_indices] + self.poses = self.poses[val_indices] + elif self.split == "train": + self.image_paths = [self.image_paths[i] for i in train_indices] + self.camera_ids = self.camera_ids[train_indices] + self.poses = self.poses[train_indices] + + def get_intrinsics(self): + return self.K + + def to_ndc(self, rays): + return get_ndc_rays_fx_fy(self.img_wh[1], self.img_wh[0], self.K[0, 0], self.K[1, 1], self.near, rays) + + def get_coords(self, idx): + if self.split != "train" or self.split == "render": + camera_id = 1 + else: + camera_id = self.camera_ids[idx] + + c2w = torch.FloatTensor(self.poses[idx]) + rays_o, rays_d = get_rays(self.directions, c2w) + + print(f"Loading image {idx}") + + rays = torch.cat([rays_o, rays_d], dim=-1) + + if self.use_ndc: + rays = self.to_ndc(rays) + + # Add camera idx + rays = torch.cat([rays, torch.ones_like(rays[..., :1]) * camera_id], dim=-1) + return rays + + def get_rgb(self, idx): + # Colors + image_path = self.image_paths[idx] + + with self.pmgr.open(os.path.join(self.root_dir, "images", image_path), "rb") as im_file: + img = Image.open(im_file).convert("RGB") + + img = img.resize(self._img_wh, Image.LANCZOS) + + if self.img_wh[0] != self._img_wh[0] or self.img_wh[1] != self._img_wh[1]: + img = img.resize(self.img_wh, Image.BOX) + + img = self.transform(img) + img = img.view(3, -1).permute(1, 0) + + return img + + +class DenseLLFFDataset(LLFFDataset): + def __init__(self, cfg, split="train", **kwargs): + super().__init__(cfg, split, **kwargs) + + def read_meta(self): + ## Bounds + with self.pmgr.open(os.path.join(self.root_dir, "bounds.npy"), "rb") as f: + bounds = np.load(f) + + self.bounds = bounds[:, -2:] + + ## Poses + with self.pmgr.open(os.path.join(self.root_dir, "poses.npy"), "rb") as f: + poses = np.load(f) + + ## Image paths + self.image_paths = sorted(self.pmgr.ls(os.path.join(self.root_dir, "images/"))) + + if self.img_wh is None: + image_path = self.image_paths[0] + + with self.pmgr.open(os.path.join(self.root_dir, "images", image_path), "rb") as im_file: + img = np.array(Image.open(im_file).convert("RGB")) + + self._img_wh = (img.shape[1] // self.downsample, img.shape[0] // self.downsample) + self.img_wh = (img.shape[1] // self.downsample, img.shape[0] // self.downsample) + self.aspect = float(self.img_wh[0]) / self.img_wh[1] + + ## Skip + row_skip = self.dataset_cfg.train_row_skip + col_skip = self.dataset_cfg.train_col_skip + + poses_skipped = [] + image_paths_skipped = [] + + for row in range(self.dataset_cfg.num_rows): + for col in range(self.dataset_cfg.num_cols): + idx = row * self.dataset_cfg.num_cols + col + + if self.split == "train" and ( + (row % row_skip) != 0 or (col % col_skip) != 0 or (idx % self.val_skip) == 0 + ): + continue + + if (self.split == "val" or self.split == "test") and ( + ((row % row_skip) == 0 and (col % col_skip) == 0) and (idx % self.val_skip) != 0 + ): + continue + + poses_skipped.append(poses[idx]) + image_paths_skipped.append(self.image_paths[idx]) + + poses = np.stack(poses_skipped, axis=0) + self.poses = poses.reshape(-1, 3, 5) + self.image_paths = image_paths_skipped + + # Step 1: rescale focal length according to training resolution + H, W, self.focal = poses[0, :, -1] + self.cx, self.cy = W / 2.0, H / 2.0 + + self.K = np.eye(3) + self.K[0, 0] = self.focal * self.img_wh[0] / W + self.K[0, 2] = self.cx * self.img_wh[0] / W + self.K[1, 1] = self.focal * self.img_wh[1] / H + self.K[1, 2] = self.cy * self.img_wh[1] / H + + # Step 2: correct poses, bounds + self.near = self.bounds.min() + self.far = self.bounds.max() + + # Step 3: Ray directions for all pixels + self.directions = get_ray_directions_K( + self.img_wh[1], self.img_wh[0], self.K, centered_pixels=self.centered_pixels + ) diff --git a/nlf/datasets/neural_3d.py b/nlf/datasets/neural_3d.py new file mode 100644 index 0000000..4f6b1ca --- /dev/null +++ b/nlf/datasets/neural_3d.py @@ -0,0 +1,470 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT + +import csv +import json +import os + +os.environ["OPENCV_IO_ENABLE_OPENEXR"] = "1" +import glob + +import cv2 +import numpy as np +import torch +from PIL import Image +from scipy.spatial.transform import Rotation + +from utils.pose_utils import ( + average_poses, + correct_poses_bounds, + create_rotating_spiral_poses, + create_spiral_poses, + interpolate_poses, +) +from utils.ray_utils import ( + get_ndc_rays_fx_fy, + get_pixels_for_image, + get_ray_directions_K, + get_rays, + sample_images_at_xy, +) + +from .base import Base5DDataset, Base6DDataset + + +class Neural3DVideoDataset(Base6DDataset): + def __init__(self, cfg, split="train", **kwargs): + self.use_reference = cfg.dataset.use_reference if "use_reference" in cfg.dataset else False + self.correct_poses = cfg.dataset.correct_poses if "correct_poses" in cfg.dataset else False + self.use_ndc = cfg.dataset.use_ndc if "use_ndc" in cfg.dataset else False + + self.num_frames = cfg.dataset.num_frames if "num_frames" in cfg.dataset else 1 + self.start_frame = cfg.dataset.start_frame if "start_frame" in cfg.dataset else 1 + self.keyframe_step = cfg.dataset.keyframe_step if "keyframe_step" in cfg.dataset else 1 + self.num_keyframes = ( + cfg.dataset.num_keyframes if "num_keyframes" in cfg.dataset else self.num_frames // self.keyframe_step + ) + + self.load_full_step = cfg.dataset.load_full_step if "load_full_step" in cfg.dataset else 1 + self.subsample_keyframe_step = ( + cfg.dataset.subsample_keyframe_step if "subsample_keyframe_step" in cfg.dataset else 1 + ) + self.subsample_keyframe_frac = ( + cfg.dataset.subsample_keyframe_frac if "subsample_keyframe_frac" in cfg.dataset else 1.0 + ) + self.subsample_frac = cfg.dataset.subsample_frac if "subsample_frac" in cfg.dataset else 1.0 + + self.keyframe_offset = 0 + self.frame_offset = 0 + + super().__init__(cfg, split, **kwargs) + + def read_meta(self): + W, H = self.img_wh + + # Poses, bounds + with self.pmgr.open(os.path.join(self.root_dir, "poses_bounds.npy"), "rb") as f: + poses_bounds = np.load(f) + + # Video paths + self.video_paths = sorted(glob.glob(os.path.join(self.root_dir, "*.mp4"))) + self.images_per_frame = len(self.video_paths) + self.total_images_per_frame = len(self.video_paths) + + # if self.dataset_cfg.collection in ['coffee_martini']: + # self.video_paths = [path for path in self.video_paths if 'cam13' not in path] + + # Get intrinsics & extrinsics + poses = poses_bounds[:, :15].reshape(-1, 3, 5) + self.bounds = poses_bounds[:, -2:] + + # if self.dataset_cfg.collection in ['coffee_martini']: + # poses = np.delete(poses, (12), axis=0) + + H, W, self.focal = poses[0, :, -1] + self.cx, self.cy = W / 2.0, H / 2.0 + + self.K = np.eye(3) + self.K[0, 0] = self.focal * self.img_wh[0] / W + self.K[0, 2] = self.cx * self.img_wh[0] / W + self.K[1, 1] = self.focal * self.img_wh[1] / H + self.K[1, 2] = self.cy * self.img_wh[1] / H + + # Correct poses, bounds + self.poses, self.poses_avg, self.bounds = correct_poses_bounds(poses, self.bounds) + + self.near = self.bounds.min() * 0.95 + self.far = self.bounds.max() * 1.05 + self.depth_range = np.array([self.near * 2.0, self.far]) + + # Ray directions for all pixels + self.directions = get_ray_directions_K(self.img_wh[1], self.img_wh[0], self.K, centered_pixels=True) + + # Repeat poses, times + self.poses = np.stack([self.poses for i in range(self.num_frames)]).reshape(-1, 3, 4) + self.times = np.tile(np.linspace(0, 1, self.num_frames)[..., None], (1, self.images_per_frame)) + self.times = self.times.reshape(-1) + self.camera_ids = np.tile( + np.linspace(0, self.images_per_frame - 1, self.images_per_frame)[None, :], (self.num_frames, 1) + ) + self.camera_ids = self.camera_ids.reshape(-1) + + # Holdout validation images + val_indices = [] + + for idx in self.val_set: + val_indices += [frame * self.images_per_frame + idx for frame in range(self.num_frames)] + + train_indices = [i for i in range(len(self.poses)) if i not in val_indices] + + if self.val_all: + val_indices = [i for i in train_indices] # noqa + + if self.split == "val" or self.split == "test": + if not self.val_all: + self.video_paths = [self.video_paths[i] for i in self.val_set] + + self.poses = self.poses[val_indices] + self.times = self.times[val_indices] + self.camera_ids = self.camera_ids[val_indices] + elif self.split == "train": + if not self.val_all: + self.video_paths = [self.video_paths[i] for i in range(len(self.video_paths)) if i not in self.val_set] + + self.poses = self.poses[train_indices] + self.times = self.times[train_indices] + self.camera_ids = self.camera_ids[train_indices] + + self.num_images = len(self.poses) + self.images_per_frame = len(self.video_paths) + + def random_subsample(self, coords, rgb, last_rgb, frame, fac=1.0): + if (frame % self.load_full_step) == 0: + return coords, rgb + elif (frame % self.subsample_keyframe_step) == 0: + num_take = int(np.round(coords.shape[0] * self.subsample_keyframe_frac * fac)) + perm = torch.tensor(np.random.permutation(coords.shape[0]))[:num_take] + else: + num_take = int(np.round(coords.shape[0] * self.subsample_frac * fac)) + perm = torch.tensor(np.random.permutation(coords.shape[0]))[:num_take] + + return coords[perm].view(-1, coords.shape[-1]), rgb[perm].view(-1, rgb.shape[-1]) + + def regular_subsample(self, coords, rgb, last_rgb, frame, fac=1.0): + if (frame % self.load_full_step) == 0: + return coords, rgb + elif (frame % self.subsample_keyframe_step) == 0: + subsample_every = int(np.round(1.0 / (self.subsample_keyframe_frac * fac))) + offset = self.keyframe_offset + self.keyframe_offset += 1 + else: + subsample_every = int(np.round(1.0 / (self.subsample_frac * fac))) + offset = self.frame_offset + self.frame_offset += 1 + + pixels = get_pixels_for_image(self.img_wh[1], self.img_wh[0]).reshape(-1, 2).long() + mask = ((pixels[..., 0] + pixels[..., 1] + offset) % subsample_every) == 0.0 + + return coords[mask].view(-1, coords.shape[-1]), rgb[mask].view(-1, rgb.shape[-1]) + + def test_subsample(self, coords, rgb, last_rgb, frame): + mask = coords[..., 5] < -0.25 + return coords[mask].view(-1, coords.shape[-1]), rgb[mask].view(-1, rgb.shape[-1]) + + def importance_subsample(self, coords, rgb, last_rgb, frame, fac=1.0): + if (frame % self.load_full_step) == 0: + return coords, rgb + + diff = torch.abs(rgb - last_rgb).mean(-1) + diff_sorted, _ = torch.sort(diff) + + if (frame % self.subsample_keyframe_step) == 0: + num_take = int(np.round(coords.shape[0] * self.subsample_keyframe_frac * fac)) + else: + num_take = int(np.round(coords.shape[0] * self.subsample_frac * fac)) + + mask = diff > diff_sorted[-num_take] + return coords[mask].view(-1, coords.shape[-1]), rgb[mask].view(-1, rgb.shape[-1]) + + def subsample(self, coords, rgb, last_rgb, frame): + coords, rgb = self.regular_subsample(coords, rgb, last_rgb, frame) + return coords, rgb + + # if (frame % self.load_full_step) == 0: + # return coords, rgb + # else: + # coords, rgb = self.importance_subsample(coords, rgb, last_rgb, frame) + + # return coords, rgb + + def prepare_train_data(self): + ## Collect training data + self.all_coords = [] + self.all_rgb = [] + num_pixels = 0 + last_rgb_full = None + + for video_idx in range(len(self.video_paths)): + self.keyframe_offset = video_idx + self.frame_offset = video_idx + + # Open video + cam = cv2.VideoCapture(self.video_paths[video_idx]) + + # Get coords + video_coords = self.get_coords(video_idx) + + ctr = 0 + frame_idx = 0 + + while ctr < self.start_frame + self.num_frames: + _, frame = cam.read() + + if ctr < self.start_frame: + ctr += 1 + continue + else: + ctr += 1 + + cur_time = self.times[frame_idx * self.images_per_frame + video_idx] + cur_frame = int( + np.round(self.times[frame_idx * self.images_per_frame + video_idx] * (self.num_frames - 1)) + ) + + # Coords + cur_coords = torch.cat( + [ + video_coords[..., :-1], + torch.ones_like(video_coords[..., -1:]) * cur_time, + ], + -1, + ) + + # Get RGB + cur_rgb_full = self.get_rgb(frame) + + # Subsample + if frame_idx == 0: + cur_rgb = cur_rgb_full + else: + cur_coords, cur_rgb = self.subsample(cur_coords, cur_rgb_full, last_rgb_full, cur_frame) + + # Save for later + last_rgb_full = cur_rgb_full + + # Coords + self.all_coords += [cur_coords] + + # Color + self.all_rgb += [cur_rgb] + + # Number of pixels + num_pixels += cur_rgb.shape[0] + + print(f"Video {video_idx} frame {frame_idx}") + print("Full res images loaded:", num_pixels / (self.img_wh[0] * self.img_wh[1])) + + # Increment frame idx + frame_idx += 1 + + cam.release() + + # Format / save loaded data + self.all_coords = torch.cat(self.all_coords, 0) + # self.all_coords = self.all_coords.view( + # -1, self.images_per_frame, self.num_frames, self.all_coords.shape[-1] + # ).permute(0, 2, 1, 3).reshape(-1, self.all_coords.shape[-1]) + self.all_rgb = torch.cat(self.all_rgb, 0) + # self.all_rgb = self.all_rgb.view( + # -1, self.images_per_frame, self.num_frames, self.all_rgb.shape[-1] + # ).permute(0, 2, 1, 3).reshape(-1, self.all_rgb.shape[-1]) + self.update_all_data() + + def update_all_data(self): + self.all_weights = self.get_weights() + + ## All inputs + self.all_inputs = torch.cat( + [ + self.all_coords, + self.all_rgb, + self.all_weights, + ], + -1, + ) + + def format_batch(self, batch): + batch["coords"] = batch["inputs"][..., : self.all_coords.shape[-1]] + batch["rgb"] = batch["inputs"][..., self.all_coords.shape[-1] : self.all_coords.shape[-1] + 3] + batch["weight"] = batch["inputs"][..., -1:] + del batch["inputs"] + + return batch + + def prepare_render_data(self): + # Get poses + if not self.render_interpolate: + close_depth, inf_depth = self.bounds.min() * 0.9, self.bounds.max() * 5.0 + + dt = 0.75 + mean_dz = 1.0 / (((1.0 - dt) / close_depth + dt / inf_depth)) + focus_depth = mean_dz + + poses_per_frame = self.poses.shape[0] // self.num_frames + poses_one_frame = self.poses[ + (self.num_frames // 2) * poses_per_frame : (self.num_frames // 2 + 1) * poses_per_frame + ] + poses_each_frame = interpolate_poses(self.poses[::poses_per_frame], self.render_supersample) + radii = np.percentile(np.abs(poses_one_frame[..., 3]), 50, axis=0) + radii[..., :2] *= 0.5 + + if self.num_frames > 1: + poses = create_spiral_poses( + poses_one_frame, + radii, + focus_depth * 2, + N=self.num_frames * self.render_supersample, + ) + + reference_pose = np.eye(4) + reference_pose[:3, :4] = self.poses[(self.num_frames // 2) * poses_per_frame] + reference_pose = np.linalg.inv(reference_pose) + + for pose_idx in range(len(poses)): + cur_pose = np.eye(4) + cur_pose[:3, :4] = poses[pose_idx] + poses[pose_idx] = poses_each_frame[pose_idx] @ (reference_pose @ cur_pose) + else: + poses = create_spiral_poses(poses_one_frame, radii, focus_depth * 100, N=120) + + self.poses = np.stack(poses, axis=0) + else: + self.poses = interpolate_poses(self.poses, self.render_supersample) + + # Get times + if (self.num_frames - 1) > 0: + self.times = np.linspace(0, self.num_frames - 1, len(self.poses)) + + if not self.render_interpolate_time: + self.times = np.round(self.times) + + self.times = self.times / (self.num_frames - 1) + else: + self.times = [0.0 for p in self.poses] + + for i in range(100): + self.poses[i] = self.poses[0] + + def to_ndc(self, rays): + return get_ndc_rays_fx_fy(self.img_wh[1], self.img_wh[0], self.K[0, 0], self.K[1, 1], self.near, rays) + + def get_coords(self, idx): + c2w = torch.FloatTensor(self.poses[idx]) + time = self.times[idx] + + if self.split != "train" or self.split == "render": + camera_id = 1 + else: + camera_id = self.camera_ids[idx] + + rays_o, rays_d = get_rays(self.directions, c2w) + + print("Loading time:", np.round(time * (self.num_frames - 1))) + + if self.use_ndc: + rays = self.to_ndc(torch.cat([rays_o, rays_d], dim=-1)) + else: + rays = torch.cat([rays_o, rays_d], dim=-1) + + # Camera ID + rays = torch.cat([rays, torch.ones_like(rays[..., :1]) * camera_id], dim=-1) + + # Time stamp + rays = torch.cat([rays, torch.ones_like(rays[..., :1]) * time], dim=-1) + return rays + + def get_rgb(self, img): + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + + if img.shape[0] != self._img_wh[0] or img.shape[1] != self._img_wh[1]: + img = cv2.resize(img, self._img_wh, cv2.INTER_LANCZOS4) + + if self.img_wh[0] != self._img_wh[0] or self.img_wh[1] != self._img_wh[1]: + img = cv2.resize(img, self.img_wh, cv2.INTER_AREA) + + img = self.transform(img) + img = img.view(3, -1).permute(1, 0) + + return img + + def get_rgb_one(self, idx): + # Open video + cam = cv2.VideoCapture(self.video_paths[idx % self.images_per_frame]) + + # Get RGB + ctr = 0 + frame_idx = 0 + + while ctr < self.start_frame + self.num_frames: + _, frame = cam.read() + + if ctr < self.start_frame: + ctr += 1 + continue + else: + ctr += 1 + + if frame_idx != (idx // self.images_per_frame): + frame_idx += 1 + continue + else: + rgb = self.get_rgb(frame) + break + + cam.release() + return rgb + + def get_intrinsics(self): + return self.K + + def __getitem__(self, idx): + if self.split == "render": + batch = { + "coords": self.get_coords(idx), + "pose": self.poses[idx], + "time": self.times[idx], + "idx": idx, + } + + batch["weight"] = torch.ones_like(batch["coords"][..., -1:]) + + elif self.split == "test": + batch = { + "coords": self.get_coords(idx), + "rgb": self.get_rgb_one(idx), + "idx": idx, + } + + batch["weight"] = torch.ones_like(batch["coords"][..., -1:]) + elif self.split == "val": + batch = { + "coords": self.get_coords(idx), + "rgb": self.get_rgb_one(idx), + "idx": idx, + } + + batch["weight"] = torch.ones_like(batch["coords"][..., -1:]) + else: + batch = { + "inputs": self.all_inputs[idx], + } + + W, H, batch = self.crop_batch(batch) + batch["W"] = W + batch["H"] = H + + return batch diff --git a/nlf/datasets/random.py b/nlf/datasets/random.py new file mode 100644 index 0000000..4833c7b --- /dev/null +++ b/nlf/datasets/random.py @@ -0,0 +1,460 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +import matplotlib.pyplot as plt +import numpy as np +import torch +from torch.utils.data import Dataset + +from utils.ray_utils import ( + get_pixels_for_image, + get_random_pixels, + get_ray_directions_from_pixels_K, + get_rays, + sample_images_at_xy, +) + + +class RandomRayDataset(Dataset): + def __init__(self, cfg, train_dataset=None, **kwargs): + super().__init__() + + self.cfg = cfg + self.img_wh = train_dataset.img_wh + self.near = train_dataset.near + self.far = train_dataset.far + self.use_ndc = train_dataset.use_ndc + self.num_images = train_dataset.num_images + self.batch_size = cfg.batch_size + + if "save_data" not in kwargs or kwargs["save_data"]: + self.all_rays = torch.clone(train_dataset.all_rays) + self.all_rgb = torch.clone(train_dataset.all_rgb) + + # Current + self.current_rays = self.all_rays + self.current_rgb = self.all_rgb + + # Prepare + self.prepare_data() + + def compute_stats(self): + all_rays = self.all_rays.view(self.num_images, self.img_wh[1] * self.img_wh[0], -1) + + ## Per view statistics + self.all_means = [] + self.all_stds = [] + + for idx in range(self.num_images): + cur_rays = all_rays[idx] + + self.all_means += [cur_rays.mean(0)] + self.all_stds += [cur_rays.std(0)] + + self.all_means = torch.stack(self.all_means, 0) + self.all_stds = torch.stack(self.all_stds, 0) + + ## Full dataset statistics + self.pos_mean = self.all_rays[..., :3].mean(0) + self.pos_std = self.all_rays[..., :3].std(0) + + self.dir_mean = self.all_rays[..., 3:].mean(0) + self.dir_std = self.all_rays[..., 3:].std(0) + + self.rgb_mean = self.all_rgb.mean(0) + self.rgb_std = self.all_rgb.std(0) + + def prepare_data(self): + self.compute_stats() + self.shuffle() + + def shuffle(self): + pass + + def __len__(self): + return len(self.all_rays) + + def jitter(self, rays, jitter=None): + if jitter is not None: + jitter_rays = rays + + if "pos" in jitter: + jitter_rays = self.jitter_ray_origins(jitter_rays, jitter) + + if "dir" in jitter: + jitter_rays = self.jitter_ray_directions(jitter_rays, jitter) + + return jitter_rays + else: + return rays + + def get_batch(self, batch_idx, batch_size, jitter=None): + batch = {} + + ## Get random rays + batch["rays"] = self.get_random_rays(batch_size, self.cfg.range) + + ## Jitter + batch["jitter_rays"] = self.jitter(batch["rays"], jitter) + + return batch + + def get_random_rays(self, num_rays, ray_range): + ray_dim = self.all_rays.shape[-1] // 2 + + pos_rand = torch.randn((num_rays, ray_dim)) * self.pos_std[None] * ray_range.pos + rays_o = self.pos_mean[None] + pos_rand + + dir_rand = torch.randn((num_rays, ray_dim)) * self.dir_std[None] * ray_range.dir + rays_d = self.dir_mean[None] + dir_rand + rays_d = torch.nn.functional.normalize(rays_d, p=2.0, dim=-1) + + return torch.cat([rays_o, rays_d], -1) + + def jitter_ray_origins(self, rays, jitter): + ray_dim = self.all_rays.shape[-1] // 2 + + pos_rand = ( + torch.randn((rays.shape[0], jitter.bundle_size, ray_dim), device=rays.device) + * self.pos_std[None].type_as(rays) + * jitter.pos + ) + + rays = rays.view(rays.shape[0], -1, rays.shape[-1]) + if rays.shape[1] == 1: + rays = rays.repeat(1, jitter.bundle_size, 1) + + rays_o = rays[..., :ray_dim] + pos_rand.type_as(rays) + + return torch.cat([rays_o, rays[..., ray_dim:]], -1) + + def jitter_ray_directions(self, rays, jitter): + ray_dim = self.all_rays.shape[-1] // 2 + + dir_rand = ( + torch.randn((rays.shape[0], jitter.bundle_size, ray_dim), device=rays.device) + * self.dir_std[None].type_as(rays) + * jitter.dir + ) + + rays = rays.view(rays.shape[0], -1, rays.shape[-1]) + if rays.shape[1] == 1: + rays = rays.repeat(1, jitter.bundle_size, 1) + + rays_d = rays[..., ray_dim:] + dir_rand.type_as(rays) + rays_d = torch.nn.functional.normalize(rays_d, p=2.0, dim=-1) + + return torch.cat([rays[..., :ray_dim], rays_d], -1) + + +class RandomPixelDataset(Dataset): + def __init__(self, cfg, train_dataset=None, **kwargs): + super().__init__() + + self.cfg = cfg + self.pixels_per_image = cfg.batch_size if "pixels_per_image" in cfg else None + self.use_ndc = train_dataset.use_ndc + self.prepare_data(train_dataset) + + def prepare_data(self, train_dataset): + # Create tensors + self.all_rays = [] + self.all_rgb = [] + + if self.use_ndc: + self.all_ndc_rays = [] + + # Random rays for each training image + if self.pixels_per_image is None: + self.pixels_per_image = train_dataset.img_wh[1] * train_dataset.img_wh[0] + + H, W = train_dataset.img_wh[1], train_dataset.img_wh[0] + + for i in range(train_dataset.num_images): + # Get random directions + cur_pixels = get_random_pixels( + self.pixels_per_image, + H, + W, + ) + cur_directions = get_ray_directions_from_pixels_K( + cur_pixels, train_dataset.K, centered_pixels=train_dataset.centered_pixels + ) + + # Sample rays + c2w = torch.FloatTensor(train_dataset.poses[i]) + cur_rays = torch.cat(list(get_rays(cur_directions, c2w)), -1) + + # Sample pixel colors + cur_rgb = train_dataset.all_rgb.view(train_dataset.num_images, H, W, -1)[i].unsqueeze(0) + cur_rgb = sample_images_at_xy(cur_rgb, cur_pixels, H, W) + + # Append + self.all_rays.append(cur_rays.reshape(-1, 6)) + self.all_rgb.append(cur_rgb.reshape(-1, 3)) + + if self.use_ndc: + self.all_ndc_rays.append(train_dataset.to_ndc(self.all_rays[-1])) + + # Concat tensors + self.all_rays = torch.cat(self.all_rays, 0) + self.all_rgb = torch.cat(self.all_rgb, 0) + + if self.use_ndc: + self.all_ndc_rays = torch.cat(self.all_ndc_rays, 0) + + def shuffle(self): + perm = torch.tensor(np.random.permutation(len(self))) + self.all_rays = self.all_rays[perm] + self.all_rgb = self.all_rgb[perm] + + if self.use_ndc: + self.all_ndc_rays = self.all_ndc_rays[perm] + + def __len__(self): + return len(self.all_rays) + + def jitter(self, rays, jitter=None): + return rays + + def get_batch(self, batch_idx, batch_size, jitter=None): + batch = {} + batch_start = batch_size * batch_idx + + if self.use_ndc: + batch["rays"] = self.all_ndc_rays[batch_start : batch_start + batch_size] + else: + batch["rays"] = self.all_rays[batch_start : batch_start + batch_size] + + batch["rgb"] = self.all_rgb[batch_start : batch_start + batch_size] + + return batch + + +class RandomViewSubsetDataset(RandomRayDataset): + def __init__(self, cfg, train_dataset=None, **kwargs): + self.num_images = len(train_dataset.image_paths) + self.num_views = train_dataset.num_images if cfg.dataset.num_views == "all" else cfg.dataset.num_views + + self.poses = np.tile(np.eye(4)[None], (self.num_images, 1, 1)) + self.poses[..., :3, :4] = train_dataset.poses[..., :3, :4] + self.poses_inv = np.linalg.inv(self.poses) + self.intrinsics = train_dataset.get_intrinsics_screen_space() + self.current_poses_inv = self.poses_inv + + super().__init__(cfg, train_dataset=train_dataset, **kwargs) + + def shuffle(self): + ## Get random view subset + self.current_views = self.get_random_views(self.num_views) + + self.current_rays = self.all_rays.view(self.num_images, self.img_wh[1] * self.img_wh[0], -1)[self.current_views] + self.current_rgb = self.all_rgb.view(self.num_images, self.img_wh[1] * self.img_wh[0], -1)[self.current_views] + self.current_poses = self.poses[self.current_views] + self.current_poses_inv = np.linalg.inv(self.current_poses) + + self.current_means = self.all_means[self.current_views] + self.current_stds = self.all_stds[self.current_views] + + print(self.current_views) + + def __len__(self): + return len(self.all_rays) + + def __getitem__(self, idx): + return { + "rays": self.random_rays[idx], + "jitter_rays": self.jitter_rays[idx], + } + + def get_random_views(self, n_views): + if self.num_views == self.num_images: + return list(range(self.num_images)) + else: + return list(np.random.choice(np.arange(0, self.num_images), size=n_views, replace=False)) + + def get_random_rays_convex_hull(self, num_rays, ray_range): + rays = self.current_rays + rays = rays[:, torch.randperm(rays.shape[1])] + rays = rays[:, :num_rays] + + weights = torch.rand(num_rays, self.num_views).type_as(rays) + weights = weights / (weights.sum(-1).unsqueeze(-1) + 1e-8) + weights = weights.permute(1, 0) + + rays = rays * weights.unsqueeze(-1) + rays = rays.sum(0) + + rays_o = rays[..., 0:3] + rays_d = rays[..., 3:6] + rays_d = torch.nn.functional.normalize(rays_d, p=2.0, dim=-1) + + return torch.cat([rays_o, rays_d], -1) + + def project_points(self, P, points): + points = torch.cat([points, torch.ones_like(points[..., -1:])], dim=-1) + points = points.unsqueeze(0) + points = (P @ points.permute(0, 2, 1)).permute(0, 2, 1) + pixels = points[..., :2] / (-points[..., -1:]) + + return pixels + + def lookup_points(self, points): + # Projection matrix + poses_inv = torch.Tensor(self.current_poses_inv).type_as(points)[..., :3, :4] + K = torch.Tensor(self.intrinsics).type_as(points).unsqueeze(0) + P = K @ poses_inv + + # Project points + pixels = self.project_points(P, points) + + # Valid mask + valid_mask = (pixels[..., 0] > -1) & (pixels[..., 0] < 1) & (pixels[..., 1] > -1) & (pixels[..., 1] < 1) + valid_mask = valid_mask.type_as(points).detach()[..., None] + + # Weights + camera_centers = ( + torch.Tensor(self.current_poses).type_as(points)[..., None, :3, -1].repeat(1, points.shape[0], 1) + ) + camera_dirs = torch.nn.functional.normalize(points.unsqueeze(0) - camera_centers, p=2.0, dim=-1) + camera_rays = torch.cat([camera_centers, camera_dirs], dim=-1) + + # Lookup + pixels = pixels.view(self.num_views, -1, 1, 2) + rgb = self.current_rgb.permute(0, 2, 1).view(self.num_views, 3, self.img_wh[1], self.img_wh[0]).type_as(points) + values = torch.nn.functional.grid_sample(rgb, pixels) + values = values.permute(0, 2, 3, 1).reshape(self.num_views, -1, 3) + + return values, camera_rays, valid_mask + + def project_points_single(self, P, points): + points = torch.cat([points, torch.ones_like(points[..., -1:])], dim=-1) + points = (P @ points.permute(0, 2, 1)).permute(0, 2, 1) + pixels = points[..., :2] / (-points[..., -1:]) + + return pixels + + def lookup_points_single(self, points, weights=None): + # Projection matrix + poses_inv = torch.Tensor(self.current_poses_inv).type_as(points)[..., :3, :4] + K = torch.Tensor(self.intrinsics).type_as(points).unsqueeze(0) + P = K @ poses_inv + + # Project points + pixels = self.project_points_single(P, points) + + # Valid mask + valid_mask = (pixels[..., 0] > -1) & (pixels[..., 0] < 1) & (pixels[..., 1] > -1) & (pixels[..., 1] < 1) + valid_mask = valid_mask.type_as(points).detach()[..., None] + + # Weights + camera_centers = ( + torch.Tensor(self.current_poses).type_as(points)[..., None, :3, -1].repeat(1, points.shape[1], 1) + ) + camera_dirs = torch.nn.functional.normalize(points - camera_centers, p=2.0, dim=-1) + camera_rays = torch.cat([camera_centers, camera_dirs], dim=-1) + + # Lookup + pixels = pixels.view(self.num_views, -1, 1, 2) + rgb = self.current_rgb.permute(0, 2, 1).view(self.num_views, 3, self.img_wh[1], self.img_wh[0]).type_as(points) + values = torch.nn.functional.grid_sample(rgb, pixels) + values = values.permute(0, 2, 3, 1).reshape(self.num_views, -1, 3) + + return values, camera_rays, valid_mask + + +class RandomRayLightfieldDataset(RandomRayDataset): + def __init__(self, cfg, train_dataset=None): + self.num_images = len(train_dataset.image_paths) + self.size = len(train_dataset) + + self.uv_plane = cfg.dataset.uv_plane + self.st_plane = cfg.dataset.st_plane + + if "st_scale" in cfg.dataset and cfg.dataset.st_scale is not None: + self.st_scale = cfg.dataset.st_scale + elif train_dataset is not None and "lightfield" in train_dataset.dataset_cfg: + self.st_scale = train_dataset.st_scale + else: + self.st_scale = 1.0 + + super().__init__(cfg, train_dataset, save_data=False) + + def get_random_rays(self, num_rays, ray_range): + st = (torch.rand((num_rays, 2)) * 2 - 1) * ray_range.pos + + s = st[..., 0] * self.st_scale + t = st[..., 1] * self.st_scale + + uv = (torch.rand((num_rays, 2)) * 2 - 1) * ray_range.dir + + u = uv[..., 0] + v = uv[..., 1] + + rays = torch.stack( + [ + s, + t, + self.st_plane * torch.ones_like(s), + u - s, + v - t, + (self.uv_plane - self.st_plane) * torch.ones_like(s), + ], + -1, + ) + + rays = torch.cat([rays[..., 0:3], torch.nn.functional.normalize(rays[..., 3:6], p=2.0, dim=-1)], -1) + + return rays + + def jitter_ray_directions(self, rays, jitter): + dir_rand = torch.randn((rays.shape[0], jitter.bundle_size, 2), device=rays.device) * jitter.dir + + rays = rays.view(rays.shape[0], -1, rays.shape[-1]) + if rays.shape[1] == 1: + rays = rays.repeat(1, jitter.bundle_size, 1) + + rays_d = torch.cat([rays[..., 3:5] + dir_rand.type_as(rays), rays[..., 5:]], -1) + + rays_d = torch.nn.functional.normalize(rays_d, p=2.0, dim=-1) + + rays = torch.cat( + [ + rays[..., :3], + rays_d, + ], + -1, + ) + + return rays + + def jitter_ray_origins(self, rays, jitter): + pos_rand = torch.randn((rays.shape[0], jitter.bundle_size, 2), device=rays.device) * jitter.pos * self.st_scale + + rays = rays.view(rays.shape[0], -1, rays.shape[-1]) + if rays.shape[1] == 1: + rays = rays.repeat(1, jitter.bundle_size, 1) + + rays_o = rays[..., :2] + pos_rand.type_as(rays) + + rays = torch.cat( + [ + rays_o, + rays[..., 2:], + ], + -1, + ) + + return rays + + def __len__(self): + return len(self.random_rays) + + def __getitem__(self, idx): + return { + "rays": self.random_rays[idx], + "jitter_rays": self.jitter_rays[idx], + } diff --git a/nlf/datasets/shiny.py b/nlf/datasets/shiny.py new file mode 100644 index 0000000..71a9776 --- /dev/null +++ b/nlf/datasets/shiny.py @@ -0,0 +1,206 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +import os + +import numpy as np +import torch +from PIL import Image + +from utils.intersect_utils import intersect_axis_plane +from utils.pose_utils import correct_poses_bounds, create_spiral_poses, interpolate_poses +from utils.ray_utils import get_ndc_rays_fx_fy, get_ray_directions_K + +from .llff import LLFFDataset + + +class ShinyDataset(LLFFDataset): + def __init__(self, cfg, split="train", **kwargs): + self.dense = cfg.dataset.collection == "cd" or cfg.dataset.collection == "lab" + + super().__init__(cfg, split, **kwargs) + + def read_meta(self): + with self.pmgr.open(os.path.join(self.root_dir, "poses_bounds.npy"), "rb") as f: + poses_bounds = np.load(f) + + with self.pmgr.open(os.path.join(self.root_dir, "hwf_cxcy.npy"), "rb") as f: + hwfc = np.load(f) + + self.image_paths = sorted(self.pmgr.ls(os.path.join(self.root_dir, "images/"))) + self.camera_ids = np.linspace(0, len(self.image_paths) - 1, len(self.image_paths)) + self.total_num_views = len(self.image_paths) + + if self.img_wh is None: + image_path = self.image_paths[0] + + with self.pmgr.open(os.path.join(self.root_dir, "images", image_path), "rb") as im_file: + img = np.array(Image.open(im_file).convert("RGB")) + + self._img_wh = (img.shape[1] // self.downsample, img.shape[0] // self.downsample) + self.img_wh = (img.shape[1] // self.downsample, img.shape[0] // self.downsample) + self.aspect = float(self.img_wh[0]) / self.img_wh[1] + + if self.split in ["train", "val"]: + assert len(poses_bounds) == len( + self.image_paths + ), "Mismatch between number of images and number of poses! Please rerun COLMAP!" + + poses = poses_bounds[:, :12].reshape(-1, 3, 4) + self.bounds = poses_bounds[:, -2:] + + # Step 1: rescale focal length according to training resolution + H, W, self.focal = hwfc[:3, 0] + self.cx, self.cy = hwfc[-2:, 0] + + self.K = np.eye(3) + self.K[0, 0] = self.focal * self.img_wh[0] / W + self.K[0, 2] = self.cx * self.img_wh[0] / W + self.K[1, 1] = self.focal * self.img_wh[1] / H + self.K[1, 2] = self.cy * self.img_wh[1] / H + + # Step 2: correct poses, bounds + self.poses, self.poses_avg, self.bounds = correct_poses_bounds(poses, self.bounds, use_train_pose=True) + + with self.pmgr.open(os.path.join(self.root_dir, "planes.txt"), "r") as f: + planes = [float(i) for i in f.read().strip().split(" ")] + + self.near = planes[0] * 0.95 + self.far = planes[1] * 1.05 + self.depth_range = np.array([self.near * 2.0, self.far]) + + # Step 3: Ray directions for all pixels + self.centered_pixels = True + self.directions = get_ray_directions_K( + self.img_wh[1], self.img_wh[0], self.K, centered_pixels=self.centered_pixels + ) + + # Step 4: Holdout validation images + if len(self.val_set) > 0: + val_indices = self.val_set + elif self.val_skip != "inf": + self.val_skip = min(len(self.image_paths), self.val_skip) + val_indices = list(range(0, len(self.image_paths), self.val_skip)) + else: + val_indices = [] + + train_indices = [i for i in range(len(self.image_paths)) if i not in val_indices] + + if self.val_all: + val_indices = [i for i in train_indices] # noqa + + if self.split == "val" or self.split == "test": + self.image_paths = [self.image_paths[i] for i in val_indices] + self.camera_ids = self.camera_ids[val_indices] + self.poses = self.poses[val_indices] + elif self.split == "train": + self.image_paths = [self.image_paths[i] for i in train_indices] + self.camera_ids = self.camera_ids[train_indices] + self.poses = self.poses[train_indices] + + def to_ndc(self, rays): + return get_ndc_rays_fx_fy(self.img_wh[1], self.img_wh[0], self.K[0, 0], self.K[1, 1], self.near, rays) + + def prepare_render_data(self): + if not self.render_interpolate: + close_depth, inf_depth = self.bounds.min() * 0.9, self.bounds.max() * 5.0 + + dt = 0.75 + mean_dz = 1.0 / (((1.0 - dt) / close_depth + dt / inf_depth)) + focus_depth = mean_dz + + if self.dense: + radii = np.percentile(np.abs(self.poses[..., 3]), 50, axis=0) + self.poses = create_spiral_poses(self.poses, radii, focus_depth * 100) + else: + radii = np.percentile(np.abs(self.poses[..., 3]), 85, axis=0) + self.poses = create_spiral_poses(self.poses, radii, focus_depth * 2) + + self.poses = np.stack(self.poses, axis=0) + self.poses[..., :3, 3] = self.poses[..., :3, 3] - 0.1 * close_depth * self.poses[..., :3, 2] + else: + self.poses = interpolate_poses(self.poses, self.render_supersample) + + +class DenseShinyDataset(ShinyDataset): + def __init__(self, cfg, split="train", **kwargs): + super().__init__(cfg, split, **kwargs) + + def read_meta(self): + ## Bounds + with self.pmgr.open(os.path.join(self.root_dir, "bounds.npy"), "rb") as f: + bounds = np.load(f) + + self.bounds = bounds[:, -2:] + + ## Intrinsics + with self.pmgr.open(os.path.join(self.root_dir, "hwf_cxcy.npy"), "rb") as f: + hwfc = np.load(f) + + ## Poses + with self.pmgr.open(os.path.join(self.root_dir, "poses.npy"), "rb") as f: + poses = np.load(f) + + ## Image paths + self.image_paths = sorted(self.pmgr.ls(os.path.join(self.root_dir, "images/"))) + + if self.img_wh is None: + image_path = self.image_paths[0] + + with self.pmgr.open(os.path.join(self.root_dir, "images", image_path), "rb") as im_file: + img = np.array(Image.open(im_file).convert("RGB")) + + self._img_wh = (img.shape[1] // self.downsample, img.shape[0] // self.downsample) + self.img_wh = (img.shape[1] // self.downsample, img.shape[0] // self.downsample) + self.aspect = float(self.img_wh[0]) / self.img_wh[1] + + ## Skip + row_skip = self.dataset_cfg.train_row_skip + col_skip = self.dataset_cfg.train_col_skip + + poses_skipped = [] + image_paths_skipped = [] + + for row in range(self.dataset_cfg.num_rows): + for col in range(self.dataset_cfg.num_cols): + idx = row * self.dataset_cfg.num_cols + col + + if self.split == "train" and ( + (row % row_skip) != 0 or (col % col_skip) != 0 or (idx % self.val_skip) == 0 + ): + continue + + if (self.split == "val" or self.split == "test") and ( + ((row % row_skip) == 0 and (col % col_skip) == 0) and (idx % self.val_skip) != 0 + ): + continue + + poses_skipped.append(poses[idx]) + image_paths_skipped.append(self.image_paths[idx]) + + poses = np.stack(poses_skipped, axis=0) + self.poses = poses.reshape(-1, 3, 5) + self.image_paths = image_paths_skipped + + # Step 1: rescale focal length according to training resolution + H, W, self.focal = hwfc[:3, 0] + self.cx, self.cy = hwfc[-2:, 0] + + self.K = np.eye(3) + self.K[0, 0] = self.focal * self.img_wh[0] / W + self.K[0, 2] = self.cx * self.img_wh[0] / W + self.K[1, 1] = self.focal * self.img_wh[1] / H + self.K[1, 2] = self.cy * self.img_wh[1] / H + + # Step 2: correct poses, bounds + self.near = self.bounds.min() + self.far = self.bounds.max() + + # Step 3: Ray directions for all pixels + self.centered_pixels = True + self.directions = get_ray_directions_K( + self.img_wh[1], self.img_wh[0], self.K, centered_pixels=self.centered_pixels + ) diff --git a/nlf/datasets/spaces.py b/nlf/datasets/spaces.py new file mode 100644 index 0000000..e5d7237 --- /dev/null +++ b/nlf/datasets/spaces.py @@ -0,0 +1,193 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +import json +import os + +import numpy as np +import torch +from PIL import Image +from scipy.spatial.transform import Rotation + +from utils.pose_utils import ( + average_poses, + center_poses_with, + correct_poses_bounds, + create_rotating_spiral_poses, + create_spiral_poses, + interpolate_poses, +) +from utils.ray_utils import get_ndc_rays_fx_fy, get_ray_directions_K, get_rays + +from .base import Base5DDataset + + +class SpacesDataset(Base5DDataset): + def __init__(self, cfg, split="train", **kwargs): + super().__init__(cfg, split, **kwargs) + + def read_meta(self): + # Load meta + with self.pmgr.open(os.path.join(self.root_dir, "models.json"), "r") as f: + self.meta = json.load(f) + + # Train and test split paths + with self.pmgr.open(os.path.join(self.root_dir, "train_image.txt"), "r") as f: + self.train_images = f.readlines() + self.train_images = [os.path.join(self.root_dir, l.strip()) for l in self.train_images] + + with self.pmgr.open(os.path.join(self.root_dir, "val_image.txt"), "r") as f: + self.val_images = f.readlines() + self.val_images = [os.path.join(self.root_dir, l.strip()) for l in self.val_images] + + with self.pmgr.open(os.path.join(self.root_dir, "ref_image.txt"), "r") as f: + self.ref_image = os.path.join(self.root_dir, f.read().split(" ")[0].strip()) + + # Populate vars + self.image_paths = [] + self.intrinsics = [] + self.poses = [] + + for rig in self.meta: + for camera in rig: + image_path = os.path.join(self.root_dir, camera["relative_path"]) + + if image_path not in self.train_images and image_path not in self.val_images: + continue + + self.image_paths.append(image_path) + + width_factor = self.img_wh[0] / camera["width"] + height_factor = self.img_wh[1] / camera["height"] + + if camera["height"] != self.img_wh[1]: + print(camera["height"], camera["principal_point"][1]) + + pa = camera["pixel_aspect_ratio"] + K = np.eye(3) + K = np.array( + [ + [camera["focal_length"] * width_factor, 0.0, camera["principal_point"][0] * width_factor], + [ + 0.0, + pa * camera["focal_length"] * height_factor, + camera["principal_point"][1] * height_factor, + ], + [0.0, 0.0, 1.0], + ] + ) + + self.intrinsics.append(K) + + # Pose + R = Rotation.from_rotvec(camera["orientation"]).as_matrix() + T = np.array(camera["position"]) + + pose = np.eye(4) + pose[:3, :3] = R.T + pose[:3, -1] = T + + pose_pre = np.eye(4) + pose_pre[1, 1] *= -1 + pose_pre[2, 2] *= -1 + pose = pose_pre @ pose @ pose_pre + + self.poses.append(pose[:3, :4]) + + # Camera IDs & other + self.K = self.intrinsics[0] + self.ref_idx = self.image_paths.index(self.ref_image) + self.intrinsics = np.stack(self.intrinsics) + self.poses = np.stack(self.poses) + + self.camera_ids = np.linspace(0, len(self.image_paths) - 1, len(self.image_paths)) + self.total_num_views = len(self.image_paths) + + # Bounds + with self.pmgr.open(os.path.join(self.root_dir, "planes.txt"), "r") as f: + planes = [float(i) for i in f.read().strip().split(" ")] + + self.bounds = np.array([planes[0], planes[1]]) + + # Correct poses & bounds + poses = np.copy(self.poses) + + self.poses, self.poses_avg = center_poses_with(poses, poses[self.ref_idx : self.ref_idx + 1]) + + if not self.use_ndc: + self.poses, self.poses_avg, self.bounds = correct_poses_bounds(poses, self.bounds, flip=False, center=False) + + self.near = self.bounds.min() * 0.95 + self.far = self.bounds.max() * 1.05 + self.depth_range = np.array([self.near * 2.0, self.far]) + + # Holdout + val_indices = [i for i in range(len(self.image_paths)) if self.image_paths[i] in self.val_images] + train_indices = [i for i in range(len(self.image_paths)) if i not in val_indices] + + if self.val_all: + val_indices = [i for i in train_indices] # noqa + + if self.split == "val" or self.split == "test": + self.image_paths = [self.image_paths[i] for i in val_indices] + self.camera_ids = self.camera_ids[val_indices] + self.poses = self.poses[val_indices] + self.intrinsics = self.intrinsics[val_indices] + elif self.split == "train": + self.image_paths = [self.image_paths[i] for i in train_indices] + self.camera_ids = self.camera_ids[train_indices] + self.poses = self.poses[train_indices] + self.intrinsics = self.intrinsics[train_indices] + + def get_intrinsics(self): + return self.K + + def to_ndc(self, rays): + return get_ndc_rays_fx_fy(self.img_wh[1], self.img_wh[0], self.K[0, 0], self.K[1, 1], self.near, rays) + + def get_coords(self, idx): + if self.split != "train" or self.split == "render": + camera_id = 1 + else: + camera_id = self.camera_ids[idx] + + if self.split != "render": + K = torch.FloatTensor(self.intrinsics[idx]) + else: + K = torch.FloatTensor(self.intrinsics[0]) + + print(f"Loading image {idx}") + + # Get rays + directions = get_ray_directions_K(self.img_wh[1], self.img_wh[0], K, centered_pixels=True).view(-1, 3) + c2w = torch.FloatTensor(self.poses[idx]) + rays_o, rays_d = get_rays(directions, c2w) + rays = torch.cat([rays_o, rays_d], dim=-1) + + # Convert to NDC + if self.use_ndc: + rays = self.to_ndc(rays) + + # Add camera idx + rays = torch.cat([rays, torch.ones_like(rays[..., :1]) * camera_id], dim=-1) + return rays + + def get_rgb(self, idx): + # Colors + image_path = self.image_paths[idx] + + with self.pmgr.open(os.path.join(self.root_dir, "images", image_path), "rb") as im_file: + img = Image.open(im_file).convert("RGB") + + img = img.resize(self._img_wh, Image.LANCZOS) + + if self.img_wh[0] != self._img_wh[0] or self.img_wh[1] != self._img_wh[1]: + img = img.resize(self.img_wh, Image.BOX) + + img = self.transform(img) + img = img.view(3, -1).permute(1, 0) + + return img diff --git a/nlf/datasets/stanford.py b/nlf/datasets/stanford.py new file mode 100644 index 0000000..82bf106 --- /dev/null +++ b/nlf/datasets/stanford.py @@ -0,0 +1,464 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +import os + +import numpy as np +import torch +from PIL import Image + +from utils.pose_utils import correct_poses_bounds, create_spiral_poses, interpolate_poses +from utils.ray_utils import get_lightfield_rays, get_ndc_rays_fx_fy, get_ray_directions_K, get_rays + +from .lightfield import EPIDataset, LightfieldDataset +from .llff import LLFFDataset + + +class StanfordLightfieldDataset(LightfieldDataset): + def __init__(self, cfg, split="train", **kwargs): + self.use_file_coords = ( + cfg.dataset.lightfield.use_file_coords if "use_file_coords" in cfg.dataset.lightfield else False + ) + + super().__init__(cfg, split, **kwargs) + + if self.split == "train" and self.use_file_coords: + self.poses = [] + + for (s_idx, t_idx) in self.all_st_idx: + idx = t_idx * self.cols + s_idx + coord = self.normalize_coord(self.camera_coords[idx]) + self.poses.append(coord) + + def read_meta(self): + self.image_paths = sorted(self.pmgr.ls(self.root_dir)) + + if self.img_wh is None: + image_path = self.image_paths[0] + + with self.pmgr.open(os.path.join(self.root_dir, image_path), "rb") as im_file: + img = np.array(Image.open(im_file).convert("RGB")) + + self._img_wh = (img.shape[1] // self.downsample, img.shape[0] // self.downsample) + self.img_wh = (img.shape[1] // self.downsample, img.shape[0] // self.downsample) + self.aspect = float(self.img_wh[0]) / self.img_wh[1] + + self.camera_coords = [] + + if self.use_file_coords: + for image_path in self.image_paths: + if self.dataset_cfg.collection in ["beans", "knights", "tarot", "tarot_small"]: + yx = image_path.split("_")[-2:] + y = -float(yx[0]) + x = float(yx[1].split(".png")[0]) + else: + yx = image_path.split("_")[-3:-1] + y, x = float(yx[0]), float(yx[1]) + + self.camera_coords.append((x, y)) + + def get_camera_range(self): + xs = [coord[0] for coord in self.camera_coords] + ys = [coord[1] for coord in self.camera_coords] + + min_x, max_x = np.min(xs), np.max(xs) + min_y, max_y = np.min(ys), np.max(ys) + + return (min_x, max_x), (min_y, max_y) + + def get_camera_center(self): + idx = (self.rows // 2) * self.cols + self.cols // 2 + return self.camera_coords[idx] + + def normalize_coord(self, coord): + x_range, y_range = self.get_camera_range() + + # x_c, y_c = self.get_camera_center() + # norm_x = 2 * (coord[0] - x_c) / (x_range[1] - x_range[0]) + # norm_y = 2 * (coord[1] - y_c) / (x_range[1] - x_range[0]) + + aspect = (x_range[1] - x_range[0]) / (y_range[1] - y_range[0]) + norm_x = ((coord[0] - x_range[0]) / (x_range[1] - x_range[0])) * 2 - 1 + norm_y = (((coord[1] - y_range[0]) / (y_range[1] - y_range[0])) * 2 - 1) / aspect + + return (norm_x, norm_y) + + def get_coords(self, s_idx, t_idx): + if not self.use_file_coords: + return super().get_coords(s_idx, t_idx) + + idx = t_idx * self.cols + s_idx + coord = self.normalize_coord(self.camera_coords[idx]) + + if self.split == "render": + st_scale = self.vis_st_scale + else: + st_scale = self.st_scale + + return get_lightfield_rays( + self.img_wh[0], + self.img_wh[1], + coord[0], + coord[1], + self.aspect, + near=self.near_plane, + far=self.far_plane, + st_scale=st_scale, + uv_scale=self.uv_scale, + ) + + def get_rgb(self, s_idx, t_idx): + idx = t_idx * self.cols + s_idx + image_path = self.image_paths[idx] + + with self.pmgr.open(os.path.join(self.root_dir, image_path), "rb") as im_file: + img = Image.open(im_file).convert("RGB") + + img = img.resize(self._img_wh, Image.LANCZOS) + + if self.img_wh[0] != self._img_wh[0] or self.img_wh[1] != self._img_wh[1]: + img = img.resize(self.img_wh, Image.BOX) + + img = self.transform(img) + img = img.view(3, -1).permute(1, 0) + + return img + + +class StanfordEPIDataset(EPIDataset): + def __init__(self, cfg, split="train", **kwargs): + + super().__init__(cfg, split, **kwargs) + + def read_meta(self): + self.image_paths = sorted(self.pmgr.ls(self.root_dir)) + + if self.img_wh is None: + image_path = self.image_paths[0] + + with self.pmgr.open(os.path.join(self.root_dir, image_path), "rb") as im_file: + img = np.array(Image.open(im_file).convert("RGB")) + + self._img_wh = (img.shape[1] // self.downsample, img.shape[0] // self.downsample) + self.img_wh = (img.shape[1] // self.downsample, img.shape[0] // self.downsample) + self.aspect = float(self.img_wh[0]) / self.img_wh[1] + + def get_coords(self): + if self.dataset_cfg.collection in ["tarot_small", "tarot", "chess"]: + u = torch.linspace(-1, 1, self.img_wh[0], dtype=torch.float32) + s = torch.linspace(1, -1, self.img_wh[1] * self.supersample, dtype=torch.float32) * self.st_scale + else: + u = torch.linspace(-1, 1, self.img_wh[0], dtype=torch.float32) + s = torch.linspace(-1, 1, self.img_wh[1] * self.supersample, dtype=torch.float32) * self.st_scale + + su = list(torch.meshgrid([s, u])) + return torch.stack(su, -1).view(-1, 2) + + def get_rgb(self): + image_path = self.image_paths[0] + + with self.pmgr.open(os.path.join(self.root_dir, image_path), "rb") as im_file: + img = Image.open(im_file).convert("RGB") + + img = img.resize(self._img_wh, Image.LANCZOS) + + if self.img_wh[0] != self._img_wh[0] or self.img_wh[1] != self._img_wh[1]: + img = img.resize(self.img_wh, Image.BOX) + + img = self.transform(img) + img = img.view(3, -1).permute(1, 0) + + return img + + +class StanfordEPIDataset(EPIDataset): + def __init__(self, cfg, split="train", **kwargs): + + super().__init__(cfg, split, **kwargs) + + def read_meta(self): + self.image_paths = sorted(self.pmgr.ls(self.root_dir)) + + if self.img_wh is None: + image_path = self.image_paths[0] + + with self.pmgr.open(os.path.join(self.root_dir, image_path), "rb") as im_file: + img = np.array(Image.open(im_file).convert("RGB")) + + self._img_wh = (img.shape[1] // self.downsample, img.shape[0] // self.downsample) + self.img_wh = (img.shape[1] // self.downsample, img.shape[0] // self.downsample) + self.aspect = float(self.img_wh[0]) / self.img_wh[1] + + def get_coords(self): + if self.dataset_cfg.collection in ["tarot_small", "tarot", "chess"]: + u = torch.linspace(-1, 1, self.img_wh[0], dtype=torch.float32) + s = torch.linspace(1, -1, self.img_wh[1] * self.supersample, dtype=torch.float32) * self.st_scale + else: + u = torch.linspace(-1, 1, self.img_wh[0], dtype=torch.float32) + s = torch.linspace(-1, 1, self.img_wh[1] * self.supersample, dtype=torch.float32) * self.st_scale + + su = list(torch.meshgrid([s, u])) + return torch.stack(su, -1).view(-1, 2) + + def get_rgb(self): + image_path = self.image_paths[0] + + with self.pmgr.open(os.path.join(self.root_dir, image_path), "rb") as im_file: + img = Image.open(im_file).convert("RGB") + + img = img.resize(self._img_wh, Image.LANCZOS) + + if self.img_wh[0] != self._img_wh[0] or self.img_wh[1] != self._img_wh[1]: + img = img.resize(self.img_wh, Image.BOX) + + img = self.transform(img) + img = img.view(3, -1).permute(1, 0) + + return img + + +class StanfordLLFFDataset(LLFFDataset): + def __init__(self, cfg, split="train", **kwargs): + + # Scale of ST plane relative to UV plane + st_scale_dict = { + "tarot": 0.125, + "tarot_small": 0.125, + "knights": 0.125, + "bracelet": 0.125, + } + + if "st_scale" in cfg.dataset: + self.st_scale = cfg.dataset.st_scale + else: + self.st_scale = st_scale_dict.get(cfg.dataset.collection, 1.0) + + # Near, far plane locations + self.near_plane = cfg.dataset.near if "near" in cfg.dataset else -1.0 + self.far_plane = cfg.dataset.far if "far" in cfg.dataset else 0.0 + + super().__init__(cfg, split, **kwargs) + + def read_meta(self): + # Camera coords + self.image_paths = sorted(self.pmgr.ls(self.root_dir)) + + # Get width, height + if self.img_wh is None: + image_path = self.image_paths[0] + + with self.pmgr.open(os.path.join(self.root_dir, image_path), "rb") as im_file: + img = np.array(Image.open(im_file).convert("RGB")) + + self._img_wh = (img.shape[1] // self.downsample, img.shape[0] // self.downsample) + self.img_wh = (img.shape[1] // self.downsample, img.shape[0] // self.downsample) + self.aspect = float(self.img_wh[0]) / self.img_wh[1] + + # Get camera coords + self.camera_coords = [] + + for image_path in self.image_paths: + if self.dataset_cfg.collection in ["beans", "knights", "tarot", "tarot_small"]: + yx = image_path.split("_")[-2:] + y = -float(yx[0]) + x = float(yx[1].split(".png")[0]) + else: + yx = image_path.split("_")[-3:-1] + y, x = float(yx[0]), float(yx[1]) + + self.camera_coords.append((x, y)) + + self.camera_coords = np.array(self.camera_coords) + self.camera_min = np.min(self.camera_coords, axis=0) + self.camera_max = np.max(self.camera_coords, axis=0) + + self.camera_coords = (self.camera_coords - self.camera_min) / (self.camera_max - self.camera_min) * 2 - 1 + st_aspect = (self.camera_max[0] - self.camera_min[0]) / (self.camera_max[1] - self.camera_min[1]) + self.camera_coords[:, 1] /= st_aspect + self.camera_coords *= self.st_scale + + # Set up poses + self.poses = np.tile(np.eye(4, 4)[..., None], [1, 1, len(self.image_paths)]) + self.poses[:, 1:3, :] *= -1 + self.poses[:2, 3, :] = self.camera_coords.T + self.poses[2, 3, :] = self.near_plane + self.poses = self.poses.transpose(2, 0, 1) + self.poses = self.poses[:, :3, :4] + + # Set up intrinsics + focal = 1 + pixel_scale = self.img_wh[0] / 2 + + self.intrinsics = np.tile(np.eye(3)[..., None], [1, 1, len(self.image_paths)]) + self.intrinsics[0, 0, :] = focal * pixel_scale + self.intrinsics[1, 1, :] = focal * pixel_scale + self.intrinsics[0, 2, :] = self.camera_coords.T[0] * focal * pixel_scale + self.img_wh[0] / 2 + self.intrinsics[1, 2, :] = -self.camera_coords.T[1] * focal * pixel_scale + self.img_wh[1] / 2 + self.intrinsics = self.intrinsics.transpose(2, 0, 1) + + self.K = np.eye(3) + self.K[0, 0] = focal * pixel_scale + self.K[1, 1] = focal * pixel_scale + self.K[0, 2] = self.img_wh[0] / 2 + self.K[1, 2] = self.img_wh[1] / 2 + + ## Correct poses, bounds + self.bounds = np.array([0.25, 2.0]) + + if self.use_ndc: + self.poses, self.poses_avg, self.bounds = correct_poses_bounds( + np.copy(self.poses), np.copy(self.bounds), flip=False, center=True + ) + + self.near = self.bounds.min() * 0.95 + self.far = self.bounds.max() * 1.05 + self.depth_range = np.array([self.near * 2.0, self.far]) + + ## Holdout validation images + if self.val_set == "lightfield": + step = self.dataset_cfg.lightfield_step + rows = self.dataset_cfg.lightfield_rows + cols = self.dataset_cfg.lightfield_cols + val_indices = [] + + self.val_pairs = self.dataset_cfg.val_pairs if "val_pairs" in self.dataset_cfg else [] + self.val_all = (step == 1 and len(self.val_pairs) == 0) or self.val_all + + for row in range(rows): + for col in range(cols): + idx = row * rows + col + + if (row % step != 0 or col % step != 0 or ([row, col] in self.val_pairs)) and not self.val_all: + val_indices += [idx] + + elif len(self.val_set) > 0 or self.val_all: + val_indices = self.val_set + elif self.val_skip != "inf": + self.val_skip = min(len(self.image_paths), self.val_skip) + val_indices = list(range(0, len(self.image_paths), self.val_skip)) + else: + val_indices = [] + + train_indices = [i for i in range(len(self.image_paths)) if i not in val_indices] + + if self.val_all: + val_indices = [i for i in train_indices] # noqa + + if self.split == "val" or self.split == "test": + self.image_paths = [self.image_paths[i] for i in val_indices] + self.intrinsics = self.intrinsics[val_indices] + self.poses = self.poses[val_indices] + elif self.split == "train": + self.image_paths = [self.image_paths[i] for i in train_indices] + self.intrinsics = self.intrinsics[train_indices] + self.poses = self.poses[train_indices] + + def prepare_train_data(self, reset=False): + self.num_images = len(self.image_paths) + + ## Collect training data + self.all_coords = [] + self.all_rgb = [] + num_pixels = 0 + + for idx in range(len(self.image_paths)): + cur_coords = self.get_coords(idx) + cur_rgb = self.get_rgb(idx) + + # Coords + self.all_coords += [cur_coords] + + # Color + self.all_rgb += [cur_rgb] + + # Number of pixels + num_pixels += cur_rgb.shape[0] + + print("Full res images loaded:", num_pixels / (self.img_wh[0] * self.img_wh[1])) + + # Format / save loaded data + self.all_coords = torch.cat(self.all_coords, 0) + self.all_rgb = torch.cat(self.all_rgb, 0) + self.update_all_data() + + def update_all_data(self): + self.all_weights = self.get_weights() + + ## All inputs + self.all_inputs = torch.cat( + [ + self.all_coords, + self.all_rgb, + self.all_weights, + ], + -1, + ) + + def to_ndc(self, rays): + return get_ndc_rays_fx_fy(self.img_wh[1], self.img_wh[0], self.K[0, 0], self.K[1, 1], self.near, rays) + + def prepare_render_data(self): + if not self.render_interpolate: + close_depth, inf_depth = self.bounds.min() * 0.9, self.bounds.max() * 5.0 + + dt = 0.75 + mean_dz = 1.0 / (((1.0 - dt) / close_depth + dt / inf_depth)) + focus_depth = mean_dz + + radii = np.percentile(np.abs(self.poses[..., 3] - np.mean(self.poses[..., 3], axis=0)), 50, axis=0) + self.poses = create_spiral_poses(self.poses, radii, focus_depth * 4) + + self.poses = np.stack(self.poses, axis=0) + self.poses[..., :3, 3] = self.poses[..., :3, 3] - 0.1 * close_depth * self.poses[..., :3, 2] + else: + self.poses = interpolate_poses(self.poses, self.render_supersample) + + def get_coords(self, idx): + if self.split != "train" and not self.val_all: + cam_idx = 0 + else: + cam_idx = idx + + if self.split != "render": + K = torch.FloatTensor(self.intrinsics[idx]) + else: + K = torch.FloatTensor(np.copy(self.K)) + + c2w = torch.FloatTensor(self.poses[idx]) + + directions = get_ray_directions_K(self.img_wh[1], self.img_wh[0], K, centered_pixels=False, flipped=True) + + # Convert to world space / NDC + rays_o, rays_d = get_rays(directions, c2w) + + if self.use_ndc: + rays = self.to_ndc(torch.cat([rays_o, rays_d], dim=-1)) + else: + rays = torch.cat([rays_o, rays_d], dim=-1) + + # Add camera idx + rays = torch.cat([rays, torch.ones_like(rays[..., :1]) * cam_idx], dim=-1) + + # Return + return rays + + def get_rgb(self, idx): + image_path = self.image_paths[idx] + + with self.pmgr.open(os.path.join(self.root_dir, image_path), "rb") as im_file: + img = Image.open(im_file) + img = img.convert("RGB") + + if img.size[0] != self._img_wh[0] or img.size[1] != self._img_wh[1]: + img = img.resize(self._img_wh, Image.LANCZOS) + + if self.img_wh[0] != self._img_wh[0] or self.img_wh[1] != self._img_wh[1]: + img = img.resize(self.img_wh, Image.BOX) + + img = self.transform(img) + img = img.view(3, -1).permute(1, 0) + + return img diff --git a/nlf/datasets/technicolor.py b/nlf/datasets/technicolor.py new file mode 100644 index 0000000..6edc461 --- /dev/null +++ b/nlf/datasets/technicolor.py @@ -0,0 +1,504 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT + +import csv +import gc +import json +import os + +os.environ["OPENCV_IO_ENABLE_OPENEXR"] = "1" +import pdb +import random + +import cv2 +import numpy as np +import torch +from PIL import Image +from scipy.spatial.transform import Rotation + +from utils.pose_utils import ( + average_poses, + correct_poses_bounds, + create_rotating_spiral_poses, + create_spiral_poses, + interpolate_poses, +) +from utils.ray_utils import ( + get_ndc_rays_fx_fy, + get_pixels_for_image, + get_ray_directions_K, + get_rays, + sample_images_at_xy, +) + +from .base import Base5DDataset, Base6DDataset + + +class TechnicolorDataset(Base6DDataset): + def __init__(self, cfg, split="train", **kwargs): + self.use_reference = cfg.dataset.use_reference if "use_reference" in cfg.dataset else False + self.correct_poses = cfg.dataset.correct_poses if "correct_poses" in cfg.dataset else False + + self.use_ndc = cfg.dataset.use_ndc if "use_ndc" in cfg.dataset else False + + self.num_frames = cfg.dataset.num_frames if "num_frames" in cfg.dataset else 1 + self.start_frame = cfg.dataset.start_frame if "start_frame" in cfg.dataset else 1 + + self.keyframe_step = cfg.dataset.keyframe_step if "keyframe_step" in cfg.dataset else 1 + self.num_keyframes = ( + cfg.dataset.num_keyframes if "num_keyframes" in cfg.dataset else self.num_frames // self.keyframe_step + ) + + self.load_full_step = cfg.dataset.load_full_step if "load_full_step" in cfg.dataset else 1 + self.subsample_keyframe_step = ( + cfg.dataset.subsample_keyframe_step if "subsample_keyframe_step" in cfg.dataset else 1 + ) + self.subsample_keyframe_frac = ( + cfg.dataset.subsample_keyframe_frac if "subsample_keyframe_frac" in cfg.dataset else 1.0 + ) + self.subsample_frac = cfg.dataset.subsample_frac if "subsample_frac" in cfg.dataset else 1.0 + + self.keyframe_offset = 0 + self.frame_offset = 0 + + self.num_chunks = cfg.dataset.num_chunks + + super().__init__(cfg, split, **kwargs) + + def read_meta(self): + W, H = self.img_wh + + # Image paths + self.num_rows = self.dataset_cfg.lightfield_rows + self.num_cols = self.dataset_cfg.lightfield_cols + rows = self.num_rows + cols = self.num_cols + self.images_per_frame = rows * cols + + self.image_paths = sorted(self.pmgr.ls(os.path.join(self.root_dir, "images/")))[ + self.images_per_frame * self.start_frame : self.images_per_frame * (self.start_frame + self.num_frames) + ] + + self.num_frames = len(self.image_paths) // self.images_per_frame + + # Poses + self.intrinsics = [] + self.poses = [] + + with self.pmgr.open(os.path.join(self.root_dir, "cameras_parameters.txt"), "r") as f: + reader = csv.reader(f, delimiter=" ") + + for idx, row in enumerate(reader): + if idx == 0: + continue + + row = [float(c) for c in row if c.strip() != ""] + + # Intrinsics + K = np.eye(3) + K[0, 0] = row[0] * self.img_wh[0] / 2048 + K[0, 2] = row[1] * self.img_wh[0] / 2048 + K[1, 1] = row[3] * row[0] * self.img_wh[1] / 1088 + K[1, 2] = row[2] * self.img_wh[1] / 1088 + self.intrinsics.append(K) + + # Pose + R = Rotation.from_quat([row[6], row[7], row[8], row[5]]).as_matrix() + pose = np.eye(4) + pose[:3, :3] = R.T + pose[:3, -1] = -R.T @ np.array(row[-3:]).T + + pose_pre = np.eye(4) + pose_pre[1, 1] *= -1 + pose_pre[2, 2] *= -1 + + pose = pose_pre @ pose @ pose_pre + self.poses.append(pose[:3, :4]) + + self.intrinsics = np.stack([self.intrinsics for i in range(self.num_frames)]).reshape(-1, 3, 3) + self.poses = np.stack([self.poses for i in range(self.num_frames)]).reshape(-1, 3, 4) + self.K = self.intrinsics[0] + + # Times + self.times = np.tile(np.linspace(0, 1, self.num_frames)[..., None], (1, self.images_per_frame)) + self.times = self.times.reshape(-1) + + ## Bounds, common for all scenes + if self.dataset_cfg.collection in ["painter"]: + self.near = 1.75 + self.far = 10.0 + elif self.dataset_cfg.collection in ["trains"]: + self.near = 0.65 + self.far = 10.0 + elif self.dataset_cfg.collection in ["theater"]: + self.near = 0.65 + self.far = 10.0 + elif self.dataset_cfg.collection in ["fabien"]: + self.near = 0.35 + # self.near = 0.5 + # self.near = 0.45 + # self.near = 0.4 + self.far = 2.0 + elif self.dataset_cfg.collection in ["birthday"]: + self.near = 1.75 + self.far = 10.0 + + # Broken file + if len(self.image_paths) > 377: + self.image_paths[377] = self.image_paths[361] + self.poses[377] = self.poses[361] + self.intrinsics[377] = self.intrinsics[361] + self.times[377] = self.times[361] + else: + self.near = 0.65 + self.far = 10.0 + + self.bounds = np.array([self.near, self.far]) + + ## Correct poses, bounds + poses = np.copy(self.poses) + + if self.use_ndc or self.correct_poses: + self.poses, self.poses_avg, self.bounds = correct_poses_bounds(poses, self.bounds, flip=False, center=True) + + self.near = self.bounds.min() * 0.95 + self.far = self.bounds.max() * 1.05 + + ## Holdout validation images + if self.val_set == "lightfield": + step = self.dataset_cfg.lightfield_step + rows = self.dataset_cfg.lightfield_rows + cols = self.dataset_cfg.lightfield_cols + val_indices = [] + + self.val_pairs = self.dataset_cfg.val_pairs if "val_pairs" in self.dataset_cfg else [] + self.val_all = (step == 1 and len(self.val_pairs) == 0) or self.val_all + + for row in range(rows): + for col in range(cols): + idx = row * rows + col + + if (row % step != 0 or col % step != 0 or ([row, col] in self.val_pairs)) and not self.val_all: + val_indices += [frame * self.images_per_frame + idx for frame in range(self.num_frames)] + + elif len(self.val_set) > 0 or self.val_all: + val_indices = self.val_set + elif self.val_skip != "inf": + self.val_skip = min(len(self.image_paths), self.val_skip) + val_indices = list(range(0, len(self.image_paths), self.val_skip)) + else: + val_indices = [] + + train_indices = [i for i in range(len(self.image_paths)) if i not in val_indices] + + if self.val_all: + val_indices = [i for i in train_indices] # noqa + + if self.split == "val" or self.split == "test": + self.image_paths = [self.image_paths[i] for i in val_indices] + self.intrinsics = self.intrinsics[val_indices] + self.poses = self.poses[val_indices] + self.times = self.times[val_indices] + elif self.split == "train": + self.image_paths = [self.image_paths[i] for i in train_indices] + self.intrinsics = self.intrinsics[train_indices] + self.poses = self.poses[train_indices] + self.times = self.times[train_indices] + + def subsample(self, coords, rgb, frame): + if (frame % self.load_full_step) == 0: + return coords, rgb + elif (frame % self.subsample_keyframe_step) == 0: + subsample_every = int(np.round(1.0 / self.subsample_keyframe_frac)) + offset = self.keyframe_offset + self.keyframe_offset += 1 + # num_take = int(np.round(coords.shape[0] * self.subsample_keyframe_frac)) + # perm = torch.tensor( + # np.random.permutation(coords.shape[0]) + # )[:num_take] + else: + subsample_every = int(np.round(1.0 / self.subsample_frac)) + offset = self.frame_offset + self.frame_offset += 1 + # num_take = int(np.round(coords.shape[0] * self.subsample_frac)) + # perm = torch.tensor( + # np.random.permutation(coords.shape[0]) + # )[:num_take] + + # return coords[perm].view(-1, coords.shape[-1]), rgb[perm].view(-1, rgb.shape[-1]) + pixels = get_pixels_for_image(self.img_wh[1], self.img_wh[0]).reshape(-1, 2).long() + mask = ((pixels[..., 0] + pixels[..., 1] + offset) % subsample_every) == 0.0 + return coords[mask].view(-1, coords.shape[-1]), rgb[mask].view(-1, rgb.shape[-1]) + + def prepare_train_data(self, reset=False): + self.num_images = len(self.image_paths) + + # Shuffle the range + shuffled_range = random.sample(range(self.num_images), self.num_images) + + # Chunkify the shuffled range + chunk_size = (self.num_images + self.num_chunks - 1) // self.num_chunks + self.chunks = [shuffled_range[i : i + chunk_size] for i in range(0, self.num_images, chunk_size)] + self.chunk_num_pixels = [] + self.coords_chunk_paths = [] + self.rgb_chunk_paths = [] + cur_coords_chunk = [] + cur_rgb_chunk = [] + num_pixels = 0 + + for chunk_idx in range(len(self.chunks)): + coords_chunk_path = os.path.join(self.root_dir, "rays", f"coords_chunk_{chunk_idx}.pt") + rgb_chunk_path = os.path.join(self.root_dir, "rays", f"rgb_chunk_{chunk_idx}.pt") + if os.path.exists(coords_chunk_path) and os.path.exists(rgb_chunk_path): + self.coords_chunk_paths.append(coords_chunk_path) + self.rgb_chunk_paths.append(rgb_chunk_path) + print("Chunk %d loaded." % chunk_idx) + else: + image_indices = self.chunks[chunk_idx] + for idx in image_indices: + cur_coords = self.get_coords(idx) + cur_rgb = self.get_rgb(idx) + cur_frame = int(np.round(self.times[idx] * (self.num_frames - 1))) + + # Subsample + cur_coords, cur_rgb = self.subsample(cur_coords, cur_rgb, cur_frame) + + # Coords + cur_coords_chunk.append(cur_coords) + + # Color + cur_rgb_chunk.append(cur_rgb) + + # Number of pixels + num_pixels += cur_rgb.shape[0] + + # Format / save loaded data + coords_chunk = torch.cat(cur_coords_chunk, 0) + rgb_chunk = torch.cat(cur_rgb_chunk, 0) + + if not os.path.exists(os.path.dirname(coords_chunk_path)): + os.makedirs(os.path.dirname(coords_chunk_path)) + + torch.save(coords_chunk, coords_chunk_path) + + if not os.path.exists(os.path.dirname(rgb_chunk_path)): + os.makedirs(os.path.dirname(rgb_chunk_path)) + + torch.save(rgb_chunk, rgb_chunk_path) + + self.coords_chunk_paths.append(coords_chunk_path) + self.rgb_chunk_paths.append(rgb_chunk_path) + self.chunk_num_pixels.append(num_pixels) + print("Chunk %d saved: %d pixels." % (chunk_idx, num_pixels)) + + # Reset + cur_coords_chunk = [] + cur_rgb_chunk = [] + num_pixels = 0 + + # Format / save loaded data + self.all_coords = torch.load(self.coords_chunk_paths[0]) + self.all_rgb = torch.load(self.rgb_chunk_paths[0]) + self.current_chunk = 0 + self.update_all_data() + + def update_all_data(self): + self.all_weights = self.get_weights() + + ## All inputs + self.all_inputs = torch.cat( + [ + self.all_coords, + self.all_rgb, + self.all_weights, + ], + -1, + ) + + def shift_chunk(self): + + self.current_chunk = (self.current_chunk + 1) % len(self.coords_chunk_paths) + self.all_coords = torch.load(self.coords_chunk_paths[self.current_chunk]) + self.all_rgb = torch.load(self.rgb_chunk_paths[self.current_chunk]) + print("loading", self.coords_chunk_paths[self.current_chunk]) + + self.all_weights = self.get_weights() + + ## All inputs + self.all_inputs = torch.cat( + [ + self.all_coords, + self.all_rgb, + self.all_weights, + ], + -1, + ) + + return self.current_chunk + + def format_batch(self, batch): + batch["coords"] = batch["inputs"][..., : self.all_coords.shape[-1]] + batch["rgb"] = batch["inputs"][..., self.all_coords.shape[-1] : self.all_coords.shape[-1] + 3] + batch["weight"] = batch["inputs"][..., -1:] + del batch["inputs"] + + return batch + + def prepare_render_data(self): + # Get poses + if not self.render_interpolate: + close_depth, inf_depth = self.bounds.min() * 0.9, self.bounds.max() * 5.0 + + dt = 0.75 + mean_dz = 1.0 / (((1.0 - dt) / close_depth + dt / inf_depth)) + focus_depth = mean_dz + + poses_per_frame = self.poses.shape[0] // self.num_frames + poses_one_frame = self.poses[ + (self.num_frames // 2) * poses_per_frame : (self.num_frames // 2 + 1) * poses_per_frame + ] + poses_each_frame = interpolate_poses(self.poses[::poses_per_frame], self.render_supersample) + radii = np.percentile(np.abs(poses_one_frame[..., 3]), 60, axis=0) + radii[..., :2] *= 0.25 + + if self.num_frames > 1: + poses = create_spiral_poses( + poses_one_frame, + radii, + focus_depth * 100, + N=self.num_frames * self.render_supersample, + ) + + reference_pose = np.eye(4) + reference_pose[:3, :4] = self.poses[(self.num_frames // 2) * poses_per_frame] + reference_pose = np.linalg.inv(reference_pose) + + for pose_idx in range(len(poses)): + cur_pose = np.eye(4) + cur_pose[:3, :4] = poses[pose_idx] + poses[pose_idx] = poses_each_frame[pose_idx] @ (reference_pose @ cur_pose) + else: + poses = create_spiral_poses(poses_one_frame, radii, focus_depth * 100, N=120) + + self.poses = np.stack(poses, axis=0) + else: + self.poses = interpolate_poses(self.poses, self.render_supersample) + + # Get times + if (self.num_frames - 1) > 0: + self.times = np.linspace(0, self.num_frames - 1, len(self.poses)) + + if not self.render_interpolate_time: + self.times = np.round(self.times) + + self.times = self.times / (self.num_frames - 1) + else: + self.times = [0.0 for p in self.poses] + + # for i in range(100): + # self.poses[i] = self.poses[0] + + def to_ndc(self, rays): + return get_ndc_rays_fx_fy(self.img_wh[1], self.img_wh[0], self.K[0, 0], self.K[1, 1], self.near, rays) + + def get_coords(self, idx): + if self.split != "train" and not self.val_all: + cam_idx = 3 + else: + cam_idx = idx % self.images_per_frame + + if self.split != "render": + K = torch.FloatTensor(self.intrinsics[idx]) + else: + K = torch.FloatTensor(self.intrinsics[0]) + + c2w = torch.FloatTensor(self.poses[idx]) + + time = self.times[idx] + + print("Loading time:", np.round(time * (self.num_frames - 1))) + + directions = get_ray_directions_K(self.img_wh[1], self.img_wh[0], K, centered_pixels=True) + + # Convert to world space / NDC + rays_o, rays_d = get_rays(directions, c2w) + + if self.use_ndc: + rays = self.to_ndc(torch.cat([rays_o, rays_d], dim=-1)) + else: + rays = torch.cat([rays_o, rays_d], dim=-1) + + # Add camera idx + rays = torch.cat([rays, torch.ones_like(rays[..., :1]) * cam_idx], dim=-1) + + # Add times + rays = torch.cat([rays, torch.ones_like(rays[..., :1]) * time], dim=-1) + + # Return + return rays + + def get_rgb(self, idx): + image_path = self.image_paths[idx] + + with self.pmgr.open(os.path.join(self.root_dir, "images", image_path), "rb") as im_file: + img = Image.open(im_file) + img = img.convert("RGB") + + if img.size[0] != self._img_wh[0] or img.size[1] != self._img_wh[1]: + img = img.resize(self._img_wh, Image.LANCZOS) + + if self.img_wh[0] != self._img_wh[0] or self.img_wh[1] != self._img_wh[1]: + img = img.resize(self.img_wh, Image.BOX) + + img = self.transform(img) + img = img.view(3, -1).permute(1, 0) + + # img = img.view(4, -1).permute(1, 0) + # img = img[:, :3] * img[:, -1:] + (1 - img[:, -1:]) + + return img + + def get_intrinsics(self): + return self.intrinsics + + def __getitem__(self, idx): + if self.split == "render": + batch = { + "coords": self.get_coords(idx), + "pose": self.poses[idx], + "time": self.times[idx], + "idx": idx, + } + + batch["weight"] = torch.ones_like(batch["coords"][..., -1:]) + + elif self.split == "test": + batch = { + "coords": self.get_coords(idx), + "rgb": self.get_rgb(idx), + "idx": idx, + } + + batch["weight"] = torch.ones_like(batch["coords"][..., -1:]) + elif self.split == "val": + batch = { + "coords": self.get_coords(idx), + "rgb": self.get_rgb(idx), + "idx": idx, + } + + batch["weight"] = torch.ones_like(batch["coords"][..., -1:]) + else: + batch = { + "inputs": self.all_inputs[idx], + } + + W, H, batch = self.crop_batch(batch) + batch["W"] = W + batch["H"] = H + + return batch diff --git a/nlf/datasets/video3d_ground_truth.py b/nlf/datasets/video3d_ground_truth.py new file mode 100644 index 0000000..31d9b97 --- /dev/null +++ b/nlf/datasets/video3d_ground_truth.py @@ -0,0 +1,459 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT + +import json +import os + +os.environ["OPENCV_IO_ENABLE_OPENEXR"] = "1" +import cv2 +import numpy as np +import torch +from PIL import Image + +from utils.pose_utils import ( + average_poses, + correct_poses_bounds, + create_rotating_spiral_poses, + create_spiral_poses, + interpolate_poses, +) +from utils.ray_utils import ( + get_ndc_rays_fx_fy, + get_pixels_for_image, + get_ray_directions_K, + get_rays, + sample_images_at_xy, +) + +from .base import Base6DDataset + + +class Video3DTimeGroundTruthDataset(Base6DDataset): + def __init__(self, cfg, split="train", **kwargs): + self.use_reference = cfg.dataset.use_reference if "use_reference" in cfg.dataset else False + self.correct_poses = cfg.dataset.correct_poses if "correct_poses" in cfg.dataset else False + self.use_ndc = cfg.dataset.use_ndc if "use_ndc" in cfg.dataset else False + self.num_keyframes = cfg.dataset.num_keyframes if "num_keyframes" in cfg.dataset else -1 + + super().__init__(cfg, split, **kwargs) + + def read_meta(self): + W, H = self.img_wh + + self.frame_paths = sorted(self.pmgr.ls(os.path.join(self.root_dir))) + self.num_frames = len(self.frame_paths) + if self.num_keyframes == -1: + self.num_keyframes = self.num_frames + self.keyframe_step = self.num_frames // self.num_keyframes + + ## Image and pose paths + self.image_paths = [] + self.pose_paths = [] + self.depth_paths = [] + + for frame_path in self.frame_paths: + all_paths = sorted(self.pmgr.ls(os.path.join(self.root_dir, frame_path))) + image_paths = [p for p in all_paths if p.endswith(".png")] + pose_paths = [p for p in all_paths if p.endswith(".json")] + depth_paths = [p for p in all_paths if p.endswith("_depth")] + + image_paths = [os.path.join(frame_path, p) for p in image_paths] + pose_paths = [os.path.join(frame_path, p) for p in pose_paths] + depth_paths = [os.path.join(frame_path, p) for p in depth_paths] + + self.image_paths += image_paths + self.pose_paths += pose_paths + self.depth_paths += depth_paths + + ## Load poses + poses = [] + self.reference_matrix = [] + self.times = [] + self.frames = [] + + for i, pose_path in enumerate(self.pose_paths): + with self.pmgr.open(os.path.join(self.root_dir, pose_path), "r") as f: + meta = json.load(f) + + if "frame" in meta: + frame = meta["frame"] + else: + frame = int(pose_path.split("/")[-2].split("frame_")[-1]) + + # Intrinsics + if i == 0: + self.meta = meta + self.focal_x = self.meta["normalized_focal_length_x"] + self.focal_y = self.meta["normalized_focal_length_y"] + self.principal_point_x = self.meta["normalized_principal_point_x"] + self.principal_point_y = self.meta["normalized_principal_point_y"] + self.start_frame = frame + self.end_frame = self.start_frame + self.num_frames - 1 + + # Reference matrix + if self.use_reference: + self.reference_matrix.append(np.array(meta["world_to_camera"])[:3, :4]) + else: + self.reference_matrix = np.eye(4) + + # Reference matrix + if self.use_reference: + self.reference_matrix = average_poses(np.stack(self.reference_matrix, 0)) + + # Get all poses + for i, pose_path in enumerate(self.pose_paths): + with self.pmgr.open(os.path.join(self.root_dir, pose_path), "r") as f: + meta = json.load(f) + + if "frame" in meta: + frame = meta["frame"] + else: + frame = int(pose_path.split("/")[-2].split("frame_")[-1]) + + frame_matrix = np.array(meta["camera_to_world"]) + pose = (self.reference_matrix @ frame_matrix)[:3, :4] + poses += [pose] + + # Time + if self.num_frames - 1 > 0: + self.times.append((frame - self.start_frame) / (self.num_frames - 1)) + self.frames.append(frame - self.start_frame) + else: + self.times.append(0.0) + self.frames.append(0) + + poses = np.stack(poses, axis=0) + self.times = np.array(self.times) + + ## Intrinsics + self.K = np.eye(3) + self.K[0, 0] = self.focal_x * W + self.K[0, 2] = self.principal_point_x * W + self.K[1, 1] = self.focal_y * H + self.K[1, 2] = self.principal_point_x * H + + ## Bounds, common for all scenes + # self.near = meta['near_clip'] + # self.far = meta['far_clip'] + self.near = 0.25 + self.far = 10.0 + self.bounds = np.array([self.near, self.far]) + + ## Correct poses, bounds + if self.use_ndc or self.correct_poses: + self.poses, self.poses_avg, self.bounds = correct_poses_bounds(poses, self.bounds, flip=False, center=True) + else: + self.poses = poses + + self.near = self.bounds.min() * 0.95 + self.far = self.bounds.max() * 1.05 + + ## Ray directions for all pixels, same for all images (same H, W, focal) + self.centered_pixels = True + self.directions = get_ray_directions_K(H, W, self.K, centered_pixels=self.centered_pixels) + + ## Holdout validation images + if self.val_set == "lightfield": + step = self.dataset_cfg.lightfield_step + rows = self.dataset_cfg.lightfield_rows + cols = self.dataset_cfg.lightfield_cols + val_indices = [] + + self.val_pairs = self.dataset_cfg.val_pairs if "val_pairs" in self.dataset_cfg else [] + self.val_all = (step == 1 and len(self.val_pairs) == 0) or self.val_all + + for idx, path in enumerate(self.image_paths): + n = int(path.split("_")[-1].split(".")[0]) + row = n // cols + col = n % cols + + if row % step != 0 or col % step != 0 or ((row, col) in self.val_pairs): + val_indices.append(idx) + + elif len(self.val_set) > 0: + val_indices = self.val_set + elif self.val_skip != "inf": + self.val_skip = min(len(self.image_paths), self.val_skip) + val_indices = list(range(0, len(self.image_paths), self.val_skip)) + else: + val_indices = [] + + train_indices = [i for i in range(len(self.image_paths)) if i not in val_indices] + + if self.val_all: + val_indices = [i for i in train_indices] # noqa + + if self.split == "val" or self.split == "test": + self.image_paths = [self.image_paths[i] for i in val_indices] + self.depth_paths = [self.depth_paths[i] for i in val_indices] + self.poses = self.poses[val_indices] + self.times = self.times[val_indices] + self.frames = [self.frames[i] for i in val_indices] + elif self.split == "train": + self.image_paths = [self.image_paths[i] for i in train_indices] + self.depth_paths = [self.depth_paths[i] for i in train_indices] + self.poses = self.poses[train_indices] + self.times = self.times[train_indices] + self.frames = [self.frames[i] for i in train_indices] + + def prepare_train_data(self): + self.num_images = len(self.image_paths) + + ## Collect training data + self.all_coords = [] + self.all_rgb = [] + self.all_depth = [] + self.all_pixel_flow = [] + self.all_flow = [] + + for idx in range(len(self.image_paths)): + # for idx in range(75, 76): # TODO: Remove + # coords + self.all_coords += [self.get_coords(idx)] + + # Color + self.all_rgb += [self.get_rgb(idx)] + + # Depth + self.all_depth += [self.get_depth(idx)] + + # Flow + self.all_pixel_flow += [self.get_pixel_flow(idx)] + self.all_flow += [self.get_flow(idx)] + + # Format / save loaded data + self.update_all_data( + torch.cat(self.all_coords, 0), + torch.cat(self.all_rgb, 0), + torch.cat(self.all_depth, 0), + torch.cat(self.all_flow, 0), + ) + + def update_all_data(self, coords, rgb, depth, flow): + self.all_coords = coords + self.all_rgb = rgb + self.all_depth = depth + self.all_flow = flow + self.all_weights = self.get_weights() + + ## Patches + if self.use_patches or self.use_crop: + self._all_coords = torch.clone(self.all_coords) + self._all_rgb = torch.clone(self.all_rgb) + self._all_depth = torch.clone(self.all_depth) + self._all_flow = torch.clone(self.all_flow) + + ## All inputs + self.all_inputs = torch.cat( + [ + self.all_coords, + self.all_rgb, + self.all_depth, + self.all_flow, + self.all_weights, + ], + -1, + ) + + def format_batch(self, batch): + batch["coords"] = batch["inputs"][..., : self.all_coords.shape[-1]] + batch["rgb"] = batch["inputs"][..., self.all_coords.shape[-1] : self.all_coords.shape[-1] + 3] + batch["depth"] = batch["inputs"][..., self.all_coords.shape[-1] + 3 : self.all_coords.shape[-1] + 4] + batch["flow"] = batch["inputs"][..., self.all_coords.shape[-1] + 4 : self.all_coords.shape[-1] + 7] + batch["weight"] = batch["inputs"][..., -1:] + del batch["inputs"] + + return batch + + def prepare_render_data(self): + # Get poses + if not self.render_interpolate: + close_depth, inf_depth = self.bounds.min() * 0.9, self.bounds.max() * 5.0 + + dt = 0.75 + mean_dz = 1.0 / (((1.0 - dt) / close_depth + dt / inf_depth)) + focus_depth = mean_dz + + poses_per_frame = self.poses.shape[0] // self.num_frames + poses_one_frame = self.poses[ + (self.num_frames // 2) * poses_per_frame : (self.num_frames // 2 + 1) * poses_per_frame + ] + poses_each_frame = interpolate_poses(self.poses[::poses_per_frame], self.render_supersample) + radii = np.percentile(np.abs(poses_one_frame[..., 3]), 80, axis=0) + + if self.num_frames > 1: + poses = create_spiral_poses( + poses_one_frame, + radii, + focus_depth * 100, + N=self.num_frames * self.render_supersample, + ) + + reference_pose = np.eye(4) + reference_pose[:3, :4] = self.poses[(self.num_frames // 2) * poses_per_frame] + reference_pose = np.linalg.inv(reference_pose) + + for pose_idx in range(len(poses)): + cur_pose = np.eye(4) + cur_pose[:3, :4] = poses[pose_idx] + poses[pose_idx] = poses_each_frame[pose_idx] @ (reference_pose @ cur_pose) + else: + poses = create_spiral_poses(poses_one_frame, radii, focus_depth * 100, N=120) + + self.poses = np.stack(poses, axis=0) + else: + self.poses = interpolate_poses(self.poses, self.render_supersample) + + # Get times + if (self.num_frames - 1) > 0: + self.times = np.linspace(0, self.num_frames - 1, len(self.poses)) + + if not self.render_interpolate_time: + self.times = np.round(self.times) + + self.times = self.times / (self.num_frames - 1) + else: + self.times = [0.0 for p in self.poses] + + def to_ndc(self, rays): + return get_ndc_rays_fx_fy(self.img_wh[1], self.img_wh[0], self.K[0, 0], self.K[1, 1], self.near, rays) + + def get_coords(self, idx): + c2w = torch.FloatTensor(self.poses[idx]) + time = self.times[idx] + print("Loading time:", np.round(time * (self.num_frames - 1))) + rays_o, rays_d = get_rays(self.directions, c2w) + + if self.use_ndc: + rays = self.to_ndc(torch.cat([rays_o, rays_d], dim=-1)) + else: + rays = torch.cat([rays_o, rays_d], dim=-1) + + rays = torch.cat([rays, torch.ones_like(rays[..., :1]) * time], dim=-1) + return rays + + def get_rgb(self, idx): + image_path = self.image_paths[idx] + + with self.pmgr.open(os.path.join(self.root_dir, image_path), "rb") as im_file: + img = Image.open(im_file).convert("RGBA") + + img = img.resize(self._img_wh, Image.LANCZOS) + + if self.img_wh[0] != self._img_wh[0] or self.img_wh[1] != self._img_wh[1]: + img = img.resize(self.img_wh, Image.BOX) + + img = self.transform(img) + img = img.view(4, -1).permute(1, 0) + img = img[:, :3] * img[:, -1:] + (1 - img[:, -1:]) + + return img + + def load_geometry(self, idx, prefix="depth", mode="exr"): + gt_path = os.path.join(self.root_dir, self.depth_paths[idx].replace("depth", prefix)) + gt_image_path = [p for p in self.pmgr.ls(gt_path) if p.endswith(mode)][0] + + depth_file = os.path.join(gt_path, gt_image_path) + + if mode == "exr": + img = cv2.imread(depth_file, cv2.IMREAD_ANYCOLOR | cv2.IMREAD_ANYDEPTH) + else: + img = np.load(depth_file) + + # Resize + img = cv2.resize(img, self._img_wh, interpolation=cv2.INTER_NEAREST) + + if self.img_wh[0] != self._img_wh[0] or self.img_wh[1] != self._img_wh[1]: + img = cv2.resize(img, self.img_wh, interpolation=cv2.INTER_NEAREST) + + # Transform + img = self.transform(np.copy(img)) + return img.view(img.shape[0], -1).permute(1, 0) + + def get_depth(self, idx, return_mask=False): + depth = self.load_geometry(idx, "depth")[..., 0:1] + + directions = torch.nn.functional.normalize(self.directions, p=2.0, dim=-1).view(-1, 3) + depth = depth / torch.abs(directions[..., 2:3]) + + mask = (depth < self.near) | (depth > self.far) + depth[depth < self.near] = self.near + depth[depth > self.far] = self.far + + if return_mask: + return depth, mask + else: + return depth + + def get_pixel_flow(self, idx): + # Pixel flow + pixel_flow = self.load_geometry(idx, "vector")[..., 1:3] + pixel_flow = torch.flip(pixel_flow, [1]) + pixel_flow[..., 1] *= -1 + pixel_flow = pixel_flow * self.img_wh[0] / 800 + + return pixel_flow + + def get_uv(self, idx): + # Pixel flow + uv = self.load_geometry(idx, "uv")[..., 1:3] + uv = torch.flip(uv[1:3], [-1]) + + return uv + + def get_flow(self, idx): + # Flow + return self.load_geometry(idx, "vector", mode="npy") + + def get_intrinsics(self): + K = np.eye(3) + K[0, 0] = self.focal_x * self.img_wh[0] + K[0, 2] = self.principal_point_x * self.img_wh[0] + K[1, 1] = self.focal_y * self.img_wh[1] + K[1, 2] = self.principal_point_x * self.img_wh[1] + + return K + + def __getitem__(self, idx): + if self.split == "render": + batch = { + "coords": self.get_coords(idx), + "pose": self.poses[idx], + "time": self.times[idx], + "idx": idx, + } + + batch["weight"] = torch.ones_like(batch["coords"][..., -1:]) + + elif self.split == "test": + batch = { + "coords": self.get_coords(idx), + "rgb": self.get_rgb(idx), + "idx": idx, + } + + batch["weight"] = torch.ones_like(batch["coords"][..., -1:]) + elif self.split == "val": + batch = { + "coords": self.get_coords(idx + 75), + "rgb": self.get_rgb(idx + 75), + "depth": self.get_depth(idx + 75), + "flow": self.get_flow(idx + 75), + "idx": idx, + } + + batch["weight"] = torch.ones_like(batch["coords"][..., -1:]) + else: + batch = { + "inputs": self.all_inputs[idx], + } + + W, H, batch = self.crop_batch(batch) + batch["W"] = W + batch["H"] = H + + return batch diff --git a/nlf/datasets/video3d_static.py b/nlf/datasets/video3d_static.py new file mode 100644 index 0000000..32ce38b --- /dev/null +++ b/nlf/datasets/video3d_static.py @@ -0,0 +1,217 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT + +import json +import os + +os.environ["OPENCV_IO_ENABLE_OPENEXR"] = "1" +import cv2 +import numpy as np +import torch +from PIL import Image + +from utils.pose_utils import ( + average_poses, + correct_poses_bounds, + create_rotating_spiral_poses, + create_spiral_poses, + interpolate_poses, +) +from utils.ray_utils import ( + get_ndc_rays_fx_fy, + get_pixels_for_image, + get_ray_directions_K, + get_rays, + sample_images_at_xy, +) + +from .base import Base5DDataset + + +class Video3DDataset(Base5DDataset): + def __init__(self, cfg, split="train", **kwargs): + self.use_reference = cfg.dataset.use_reference if "use_reference" in cfg.dataset else False + self.correct_poses = cfg.dataset.correct_poses if "correct_poses" in cfg.dataset else False + self.use_ndc = cfg.dataset.use_ndc if "use_ndc" in cfg.dataset else False + + super().__init__(cfg, split, **kwargs) + + def read_meta(self): + W, H = self.img_wh + + ## Image paths + self.image_paths = sorted(self.pmgr.ls(os.path.join(self.root_dir, "images"))) + + ## Load poses + self.pose_paths = sorted(self.pmgr.ls(os.path.join(self.root_dir, "cameras"))) + + poses = [] + + for i, pose_path in enumerate(self.pose_paths): + with self.pmgr.open(os.path.join(self.root_dir, "cameras", pose_path), "r") as f: + meta = json.load(f) + + if i == 0: + self.meta = meta + self.focal_x = self.meta["normalized_focal_length_x"] + self.focal_y = self.meta["normalized_focal_length_y"] + self.principal_point_x = self.meta["normalized_principal_point_x"] + self.principal_point_y = self.meta["normalized_principal_point_y"] + + # Correct pose + if "reference_world_to_camera" in meta and self.use_reference: + self.reference_matrix = np.array(meta["reference_world_to_camera"]) + else: + self.reference_matrix = np.eye(4) + + frame_matrix = np.array(meta["camera_to_world"]) + pose = (self.reference_matrix @ frame_matrix)[:3, :4] + poses += [pose] + + poses = np.stack(poses, axis=0) + + ## Intrinsics + self.K = np.eye(3) + self.K[0, 0] = self.focal_x * W + self.K[0, 2] = self.principal_point_x * W + self.K[1, 1] = self.focal_y * H + self.K[1, 2] = self.principal_point_x * H + + ## Bounds, common for all scenes + self.near = 0.75 + self.far = 4.0 + self.bounds = np.array([self.near, self.far]) + + ## Correct poses, bounds + if self.use_ndc or self.correct_poses: + self.poses, self.poses_avg, self.bounds = correct_poses_bounds(poses, self.bounds, flip=False, center=True) + else: + self.poses = poses + + self.near = self.bounds.min() * 0.95 + self.far = self.bounds.max() * 1.05 + + ## Ray directions for all pixels, same for all images (same H, W, focal) + self.centered_pixels = True + self.directions = get_ray_directions_K(H, W, self.K, centered_pixels=self.centered_pixels) + + ## Holdout validation images + if self.val_set == "lightfield": + step = self.dataset_cfg.lightfield_step + cols = self.dataset_cfg.lightfield_cols + val_indices = [] + + self.val_pairs = self.dataset_cfg.val_pairs if "val_pairs" in self.dataset_cfg else [] + self.val_all = (step == 1 and len(self.val_pairs) == 0) or self.val_all + + for idx, path in enumerate(self.image_paths): + n = int(path.split("_")[-1].split(".")[0]) + row = n // cols + col = n % cols + + if row % step != 0 or col % step != 0 or ((row, col) in self.val_pairs): + val_indices.append(idx) + + elif len(self.val_set) > 0: + val_indices = self.val_set + elif self.val_skip != "inf": + self.val_skip = min(len(self.image_paths), self.val_skip) + val_indices = list(range(0, len(self.image_paths), self.val_skip)) + else: + val_indices = [] + + train_indices = [i for i in range(len(self.image_paths)) if i not in val_indices] + + if self.val_all: + val_indices = [i for i in train_indices] # noqa + + if self.split == "val" or self.split == "test": + self.image_paths = [self.image_paths[i] for i in val_indices] + self.poses = self.poses[val_indices] + elif self.split == "train": + self.image_paths = [self.image_paths[i] for i in train_indices] + self.poses = self.poses[train_indices] + + def prepare_render_data(self): + if not self.render_interpolate: + close_depth, inf_depth = self.bounds.min() * 0.9, self.bounds.max() * 5.0 + + dt = 0.75 + mean_dz = 1.0 / (((1.0 - dt) / close_depth + dt / inf_depth)) + focus_depth = mean_dz + + radii = np.percentile(np.abs(self.poses[..., 3]), 50, axis=0) + camera_radius = 0.35 + + # self.poses = create_rotating_spiral_poses( + # [0.0, -0.2, 0.0], + # self.poses, + # camera_radius, + # [0.0, radii[1], camera_radius * 0.25], + # focus_depth * 100, + # [-1.0, 1.0], + # N=360 + # ) + self.poses = create_rotating_spiral_poses( + [0.0, 0.0, 0.0], + self.poses, + camera_radius, + [0.0, radii[1], camera_radius * 0.25], + focus_depth * 100, + [-1.0, 1.0], + N=360, + ) + # self.poses = create_rotating_spiral_poses( + # [0.0, 0.0, 0.35], + # self.poses, + # camera_radius, + # [0.0, radii[1], camera_radius * 0.25], + # focus_depth * 100, + # [-0.2, 0.2] + # ) + + self.poses = np.stack(self.poses, axis=0) + else: + self.poses = interpolate_poses(self.poses, self.render_supersample) + + def to_ndc(self, rays): + return get_ndc_rays_fx_fy(self.img_wh[1], self.img_wh[0], self.K[0, 0], self.K[1, 1], self.near, rays) + + def get_coords(self, idx): + c2w = torch.FloatTensor(self.poses[idx]) + rays_o, rays_d = get_rays(self.directions, c2w) + + if self.use_ndc: + return self.to_ndc(torch.cat([rays_o, rays_d], dim=-1)) + else: + return torch.cat([rays_o, rays_d], dim=-1) + + def get_rgb(self, idx): + image_path = self.image_paths[idx] + + with self.pmgr.open(os.path.join(self.root_dir, "images", image_path), "rb") as im_file: + img = Image.open(im_file).convert("RGBA") + + img = img.resize(self._img_wh, Image.LANCZOS) + + if self.img_wh[0] != self._img_wh[0] or self.img_wh[1] != self._img_wh[1]: + img = img.resize(self.img_wh, Image.BOX) + + img = self.transform(img) + img = img.view(4, -1).permute(1, 0) + img = img[:, :3] * img[:, -1:] + (1 - img[:, -1:]) + + return img + + def get_intrinsics(self): + K = np.eye(3) + K[0, 0] = self.focal_x * self.img_wh[0] + K[0, 2] = self.principal_point_x * self.img_wh[0] + K[1, 1] = self.focal_y * self.img_wh[1] + K[1, 2] = self.principal_point_x * self.img_wh[1] + + return K diff --git a/nlf/datasets/video3d_time.py b/nlf/datasets/video3d_time.py new file mode 100644 index 0000000..c4462c3 --- /dev/null +++ b/nlf/datasets/video3d_time.py @@ -0,0 +1,428 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT + +import json +import os + +os.environ["OPENCV_IO_ENABLE_OPENEXR"] = "1" +import cv2 +import numpy as np +import torch +from PIL import Image + +from utils.pose_utils import ( + average_poses, + correct_poses_bounds, + create_rotating_spiral_poses, + create_spiral_poses, + interpolate_poses, +) +from utils.ray_utils import ( + get_ndc_rays_fx_fy, + get_pixels_for_image, + get_ray_directions_K, + get_rays, + sample_images_at_xy, +) + +from .base import Base6DDataset + + +class Video3DTimeDataset(Base6DDataset): + def __init__(self, cfg, split="train", **kwargs): + self.use_reference = cfg.dataset.use_reference if "use_reference" in cfg.dataset else False + self.correct_poses = cfg.dataset.correct_poses if "correct_poses" in cfg.dataset else False + + self.use_ndc = cfg.dataset.use_ndc if "use_ndc" in cfg.dataset else False + + self.num_frames = cfg.dataset.num_frames if "num_frames" in cfg.dataset else 1 + self.start_frame = cfg.dataset.start_frame if "start_frame" in cfg.dataset else 1 + self.keyframe_step = cfg.dataset.keyframe_step if "keyframe_step" in cfg.dataset else 1 + self.num_keyframes = ( + cfg.dataset.num_keyframes if "num_keyframes" in cfg.dataset else self.num_frames // self.keyframe_step + ) + + self.load_full_step = cfg.dataset.load_full_step if "load_full_step" in cfg.dataset else 1 + self.subsample_keyframe_step = ( + cfg.dataset.subsample_keyframe_step if "subsample_keyframe_step" in cfg.dataset else 1 + ) + self.subsample_keyframe_frac = ( + cfg.dataset.subsample_keyframe_frac if "subsample_keyframe_frac" in cfg.dataset else 1.0 + ) + self.subsample_frac = cfg.dataset.subsample_frac if "subsample_frac" in cfg.dataset else 1.0 + + self.keyframe_offset = 0 + self.frame_offset = 0 + + super().__init__(cfg, split, **kwargs) + + def read_meta(self): + W, H = self.img_wh + + # Image paths + self.num_rows = self.dataset_cfg.lightfield_rows + self.num_cols = self.dataset_cfg.lightfield_cols + + rows = self.num_rows + cols = self.num_cols + + self.images_per_frame = rows * cols + self.total_num_views = rows * cols + + # Video paths + self.frame_paths = sorted(self.pmgr.ls(os.path.join(self.root_dir)))[ + self.start_frame : self.start_frame + self.num_frames + ] + + # Image and pose paths + self.image_paths = [] + self.pose_paths = [] + + for frame_path in self.frame_paths: + all_paths = sorted(self.pmgr.ls(os.path.join(self.root_dir, frame_path))) + + self.image_paths += [os.path.join(self.root_dir, frame_path, p) for p in all_paths if p.endswith(".png")] + self.pose_paths += [os.path.join(self.root_dir, frame_path, p) for p in all_paths if p.endswith(".json")] + + # Set up poses, times, frames + self.poses = [] + self.times = [] + self.frames = [] + self.intrinsics = [] + + for i, pose_path in enumerate(self.pose_paths): + with self.pmgr.open(pose_path, "r") as f: + meta = json.load(f) + + if "frame" in meta: + frame = meta["frame"] + else: + frame = int(pose_path.split("/")[-2].split("frame_")[-1]) + + # Intrinsics + if i == 0: + self.global_meta = meta + self.start_frame = frame + self.end_frame = self.start_frame + self.num_frames - 1 + + # Intrinsics + focal_x = meta["normalized_focal_length_x"] + focal_y = meta["normalized_focal_length_y"] + principal_point_x = meta["normalized_principal_point_x"] + principal_point_y = meta["normalized_principal_point_y"] + + K = np.eye(3) + K[0, 0] = focal_x * W + K[0, 2] = principal_point_x * W + K[1, 1] = focal_y * H + K[1, 2] = principal_point_y * H + self.intrinsics.append(K) + + # Get current pose + frame_matrix = np.array(meta["camera_to_world"]) + pose = frame_matrix[:3, :4] + self.poses += [pose] + + # Time + self.times.append((frame - self.start_frame) / (self.num_frames - 1)) + self.frames.append(frame - self.start_frame) + + self.poses = np.stack(self.poses, axis=0) + self.intrinsics = np.stack(self.intrinsics, axis=0) + self.times = np.array(self.times) + self.frames = np.array(self.frames) + self.K = self.intrinsics[0] + + ## Bounds, common for all scenes + self.near = self.global_meta["near_clip"] + self.far = self.global_meta["far_clip"] + self.bounds = np.array([self.near, self.far]) + + ## Correct poses, bounds + if self.use_ndc or self.correct_poses: + self.poses, self.poses_avg, self.bounds = correct_poses_bounds( + np.copy(self.poses), self.bounds, flip=False, center=True + ) + + self.near = self.bounds.min() * 0.95 + self.far = self.bounds.max() * 1.05 + self.depth_range = np.array([2 * self.near, self.far]) + + ## Holdout validation images + if self.val_set == "lightfield": + step = self.dataset_cfg.lightfield_step + rows = self.dataset_cfg.lightfield_rows + cols = self.dataset_cfg.lightfield_cols + val_indices = [] + + self.val_pairs = self.dataset_cfg.val_pairs if "val_pairs" in self.dataset_cfg else [] + self.val_all = (step == 1 and len(self.val_pairs) == 0) or self.val_all + + for row in range(rows): + for col in range(cols): + idx = row * rows + col + + if row % step != 0 or col % step != 0 or ([row, col] in self.val_pairs): + val_indices += [frame * self.images_per_frame + idx for frame in range(self.num_frames)] + + if self.val_num > 0: + val_indices = val_indices[: self.val_num] + + elif len(self.val_set) > 0: + val_indices = self.val_set + elif self.val_skip != "inf": + self.val_skip = min(len(self.image_paths), self.val_skip) + val_indices = list(range(0, len(self.image_paths), self.val_skip)) + else: + val_indices = [] + + train_indices = [i for i in range(len(self.image_paths)) if i not in val_indices] + + if self.val_all: + val_indices = [i for i in train_indices] # noqa + + if self.split == "val" or self.split == "test": + self.image_paths = [self.image_paths[i] for i in val_indices] + self.poses = self.poses[val_indices] + self.intrinsics = self.intrinsics[val_indices] + self.frames = self.frames[val_indices] + self.times = self.times[val_indices] + elif self.split == "train": + self.image_paths = [self.image_paths[i] for i in train_indices] + self.poses = self.poses[train_indices] + self.intrinsics = self.intrinsics[train_indices] + self.frames = self.frames[train_indices] + self.times = self.times[train_indices] + + def subsample(self, coords, rgb, frame): + if (frame % self.load_full_step) == 0: + return coords, rgb + elif (frame % self.subsample_keyframe_step) == 0: + subsample_every = int(np.round(1.0 / self.subsample_keyframe_frac)) + offset = self.keyframe_offset + self.keyframe_offset += 1 + # num_take = int(np.round(coords.shape[0] * self.subsample_keyframe_frac)) + # perm = torch.tensor( + # np.random.permutation(coords.shape[0]) + # )[:num_take] + else: + subsample_every = int(np.round(1.0 / self.subsample_frac)) + offset = self.frame_offset + self.frame_offset += 1 + # num_take = int(np.round(coords.shape[0] * self.subsample_frac)) + # perm = torch.tensor( + # np.random.permutation(coords.shape[0]) + # )[:num_take] + + # return coords[perm].view(-1, coords.shape[-1]), rgb[perm].view(-1, rgb.shape[-1]) + pixels = get_pixels_for_image(self.img_wh[1], self.img_wh[0]).reshape(-1, 2).long() + mask = ((pixels[..., 0] + pixels[..., 1] + offset) % subsample_every) == 0.0 + return coords[mask].view(-1, coords.shape[-1]), rgb[mask].view(-1, rgb.shape[-1]) + + def prepare_train_data(self, reset=False): + self.num_images = len(self.image_paths) + + ## Collect training data + self.all_coords = [] + self.all_rgb = [] + num_pixels = 0 + + for idx in range(len(self.image_paths)): + # for idx in range(1): + cur_coords = self.get_coords(idx) + cur_rgb = self.get_rgb(idx) + cur_frame = int(np.round(self.times[idx] * (self.num_frames - 1))) + + # Subsample + cur_coords, cur_rgb = self.subsample(cur_coords, cur_rgb, cur_frame) + + # Coords + self.all_coords += [cur_coords] + + # Color + self.all_rgb += [cur_rgb] + + # Number of pixels + num_pixels += cur_rgb.shape[0] + + print("Full res images loaded:", num_pixels / (self.img_wh[0] * self.img_wh[1])) + + # Format / save loaded data + self.all_coords = torch.cat(self.all_coords, 0) + self.all_rgb = torch.cat(self.all_rgb, 0) + self.update_all_data() + + def update_all_data(self): + self.all_weights = self.get_weights() + + ## All inputs + self.all_inputs = torch.cat( + [ + self.all_coords, + self.all_rgb, + self.all_weights, + ], + -1, + ) + + def format_batch(self, batch): + batch["coords"] = batch["inputs"][..., : self.all_coords.shape[-1]] + batch["rgb"] = batch["inputs"][..., self.all_coords.shape[-1] : self.all_coords.shape[-1] + 3] + batch["weight"] = batch["inputs"][..., -1:] + del batch["inputs"] + + return batch + + def prepare_render_data(self): + # Get poses + if not self.render_interpolate: + close_depth, inf_depth = self.bounds.min() * 0.9, self.bounds.max() * 5.0 + + dt = 0.75 + mean_dz = 1.0 / (((1.0 - dt) / close_depth + dt / inf_depth)) + focus_depth = mean_dz + + poses_per_frame = self.poses.shape[0] // self.num_frames + poses_one_frame = self.poses[ + (self.num_frames // 2) * poses_per_frame : (self.num_frames // 2 + 1) * poses_per_frame + ] + poses_each_frame = interpolate_poses(self.poses[::poses_per_frame], self.render_supersample) + radii = np.percentile(np.abs(poses_one_frame[..., 3]), 60, axis=0) + + if self.num_frames > 1: + poses = create_spiral_poses( + poses_one_frame, + radii, + focus_depth * 100, + N=self.num_frames * self.render_supersample, + ) + + reference_pose = np.eye(4) + reference_pose[:3, :4] = self.poses[(self.num_frames // 2) * poses_per_frame] + reference_pose = np.linalg.inv(reference_pose) + + for pose_idx in range(len(poses)): + cur_pose = np.eye(4) + cur_pose[:3, :4] = poses[pose_idx] + poses[pose_idx] = poses_each_frame[pose_idx] @ (reference_pose @ cur_pose) + else: + poses = create_spiral_poses(poses_one_frame, radii, focus_depth * 100, N=120) + + self.poses = np.stack(poses, axis=0) + else: + self.poses = interpolate_poses(self.poses, self.render_supersample) + + # Get times + if (self.num_frames - 1) > 0: + self.times = np.linspace(0, self.num_frames - 1, len(self.poses)) + + if not self.render_interpolate_time: + self.times = np.round(self.times) + + self.times = self.times / (self.num_frames - 1) + else: + self.times = [0.0 for p in self.poses] + + def to_ndc(self, rays): + return get_ndc_rays_fx_fy(self.img_wh[1], self.img_wh[0], self.K[0, 0], self.K[1, 1], self.near, rays) + + def get_coords(self, idx): + if self.split != "train" and not self.val_all: + cam_idx = 3 + else: + cam_idx = idx % self.images_per_frame + + if self.split != "render": + K = torch.FloatTensor(self.intrinsics[idx]) + else: + K = torch.FloatTensor(self.intrinsics[0]) + + c2w = torch.FloatTensor(self.poses[idx]) + time = self.times[idx] + + print("Loading time:", np.round(time * (self.num_frames - 1))) + + directions = get_ray_directions_K(self.img_wh[1], self.img_wh[0], K, centered_pixels=True) + + # Convert to world space / NDC + rays_o, rays_d = get_rays(directions, c2w) + + if self.use_ndc: + rays = self.to_ndc(torch.cat([rays_o, rays_d], dim=-1)) + else: + rays = torch.cat([rays_o, rays_d], dim=-1) + + # Add camera idx + rays = torch.cat([rays, torch.ones_like(rays[..., :1]) * cam_idx], dim=-1) + + # Add times + rays = torch.cat([rays, torch.ones_like(rays[..., :1]) * time], dim=-1) + + # Return + return rays + + def get_rgb(self, idx): + image_path = self.image_paths[idx] + + with self.pmgr.open(os.path.join(self.root_dir, "images", image_path), "rb") as im_file: + img = Image.open(im_file) + img = img.convert("RGB") + + if img.size[0] != self._img_wh[0] or img.size[1] != self._img_wh[1]: + img = img.resize(self._img_wh, Image.LANCZOS) + + if self.img_wh[0] != self._img_wh[0] or self.img_wh[1] != self._img_wh[1]: + img = img.resize(self.img_wh, Image.BOX) + + img = self.transform(img) + img = img.view(3, -1).permute(1, 0) + + # img = img.view(4, -1).permute(1, 0) + # img = img[:, :3] * img[:, -1:] + (1 - img[:, -1:]) + + return img + + def get_intrinsics(self): + return self.K + + def __getitem__(self, idx): + if self.split == "render": + batch = { + "coords": self.get_coords(idx), + "pose": self.poses[idx], + "time": self.times[idx], + "idx": idx, + } + + batch["weight"] = torch.ones_like(batch["coords"][..., -1:]) + + elif self.split == "test": + batch = { + "coords": self.get_coords(idx), + "rgb": self.get_rgb(idx), + "idx": idx, + } + + batch["weight"] = torch.ones_like(batch["coords"][..., -1:]) + elif self.split == "val": + batch = { + "coords": self.get_coords(idx), + "rgb": self.get_rgb(idx), + "idx": idx, + } + + batch["weight"] = torch.ones_like(batch["coords"][..., -1:]) + else: + batch = { + "inputs": self.all_inputs[idx], + } + + W, H, batch = self.crop_batch(batch) + batch["W"] = W + batch["H"] = H + + return batch diff --git a/nlf/embedding/__init__.py b/nlf/embedding/__init__.py new file mode 100644 index 0000000..b049677 --- /dev/null +++ b/nlf/embedding/__init__.py @@ -0,0 +1,7 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +from .embedding import embedding_dict diff --git a/nlf/embedding/affine.py b/nlf/embedding/affine.py new file mode 100644 index 0000000..28d77f3 --- /dev/null +++ b/nlf/embedding/affine.py @@ -0,0 +1,192 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +import torch +from torch import nn + +from nlf.activations import get_activation +from nlf.nets import net_dict + + +class AffineEmbedding(nn.Module): + def __init__(self, in_channels, cfg, **kwargs): + super().__init__() + + self.group = cfg.group if "group" in cfg else (kwargs["group"] if "group" in kwargs else "embedding") + + self.in_channels = in_channels + self.out_channels = cfg.out_channels if cfg.out_channels is not None else in_channels + self.homogenous_layer = nn.Linear(self.in_channels, self.out_channels) + + def forward(self, x): + return self.homogenous_layer(x) + + def set_iter(self, i): + self.cur_iter = i + + +class LocalAffineEmbedding(nn.Module): + def __init__(self, in_channels, cfg, **kwargs): + + super().__init__() + + self.group = cfg.group if "group" in cfg else (kwargs["group"] if "group" in kwargs else "embedding") + + self.cfg = cfg + self.dummy_layer = nn.Linear(1, 1) + + # Param + self.param_channels = in_channels if cfg.param_channels == "all" else cfg.param_channels + self.in_channels = in_channels + self.latent_dim = kwargs["latent_dim"] if "latent_dim" in kwargs else 0 + + # Extra + self.extra_in_channels = cfg.extra_in_channels if "extra_in_channels" in cfg else 0 + self.extra_out_channels = cfg.extra_out_channels if "extra_out_channels" in cfg else self.extra_in_channels + self.extra_tform_size = self.extra_in_channels * self.extra_out_channels + + self.extra_tform_activation = cfg.extra_tform_activation if "extra_tform_activation" in cfg else "identity" + self.extra_bias_activation = cfg.extra_bias_activation if "extra_bias_activation" in cfg else "zero" + self.extra_activation = cfg.extra_activation if "extra_activation" in cfg else "identity" + + # Tform + self.tform_in_channels = self.param_channels + self.tform_out_channels = cfg.tform_out_channels + self.tform_size = self.tform_in_channels * self.tform_out_channels + + self.tform_scale = cfg.tform_scale if "tform_scale" in cfg else 1.0 + self.add_identity = cfg.add_identity if "add_identity" in cfg else False + + self.tform_activation = cfg.tform_activation if "tform_activation" in cfg else "identity" + self.bias_activation = cfg.bias_activation if "bias_activation" in cfg else "zero" + self.activation = cfg.activation if "activation" in cfg else "identity" + + # Tform outputs + self.total_pred_channels = self.tform_size + self.extra_tform_size + self.out_channels_after_tform = cfg.tform_out_channels + + if self.bias_activation != "zero": + self.total_pred_channels += self.tform_out_channels + + if self.extra_bias_activation != "zero": + self.total_pred_channels += self.extra_out_channels + + # Outputs + self.out_channels = self.extra_out_channels + self.out_channels_after_tform + + # Net + if "depth" in cfg.net: + cfg.net["depth"] -= 2 + cfg.net["linear_last"] = False + + self.net = net_dict[cfg.net.type]( + self.in_channels, self.total_pred_channels, cfg.net, latent_dim=self.latent_dim, group=self.group + ) + + # Out + self.out_extra_tform_layer = get_activation(self.extra_tform_activation) + self.out_extra_bias_layer = get_activation(self.extra_bias_activation) + + self.out_tform_layer = get_activation(self.tform_activation) + self.out_bias_layer = get_activation(self.bias_activation) + + self.out_extra_layer = get_activation(self.extra_activation) + self.out_layer = get_activation(self.activation) + + def embed_params(self, x, **render_kwargs): + if "input_x" not in render_kwargs or render_kwargs["input_x"] is None: + input_x = x + else: + input_x = render_kwargs["input_x"] + + _, _, tform_flat, _ = self._embed_params(input_x) + + return tform_flat + + def _embed_params(self, x): + # MLP + x = self.net(x) + + # Outputs + extra_tform_flat = self.out_extra_tform_layer(x[..., : self.extra_tform_size]) + x = x[..., self.extra_tform_size :] + + if self.extra_bias_activation != "zero": + extra_bias = self.out_extra_bias_layer(x[..., : self.extra_out_channels]) + x = x[..., self.extra_out_channels :] + else: + extra_bias = None + + if self.bias_activation == "zero": + tform_flat = self.out_tform_layer(x) + bias = None + else: + tform_flat = self.out_tform_layer(x[..., : -self.out_channels_after_tform]) + bias = self.out_bias_layer(x[..., -self.out_channels_after_tform :]) + + return extra_tform_flat, extra_bias, tform_flat, bias + + def forward(self, x, **render_kwargs): + batch_size = x.shape[0] + + if "input_x" not in render_kwargs or render_kwargs["input_x"] is None: + input_x = x + else: + input_x = render_kwargs["input_x"] + + extra_tform, extra_bias, tform, bias = self._embed_params(input_x) + + # Extra channel transform + extra_x = x[..., : self.extra_in_channels] + + if self.extra_tform_size > 0: + extra_tform = extra_tform.view(-1, self.extra_out_channels, self.extra_in_channels) + extra_x = (extra_tform @ extra_x.unsqueeze(-1)).squeeze(-1) + + if extra_bias is not None: + extra_x = extra_x + extra_bias + + if self.extra_in_channels > 0: + extra_x = extra_x.view(batch_size, -1) + extra_x = self.out_extra_layer(extra_x) + + # Get transform + if self.add_identity: + tform = tform.reshape(-1, self.out_channels_after_tform, self.param_channels) + tform = tform * self.tform_scale + torch.eye( + self.out_channels_after_tform, self.param_channels, device=tform.device + ) + + tform = tform.view(-1, self.out_channels_after_tform, self.param_channels) + + # Apply transform + x = x[..., : self.param_channels] + x = self.out_layer((tform @ x.unsqueeze(-1)).squeeze(-1)) + + # Add bias + if bias is not None: + x = x + bias + + # Return + x = torch.cat([extra_x, x], -1) + + if "embed_params" in render_kwargs and render_kwargs["embed_params"]: + if bias is not None: + return torch.cat([tform.view(tform.shape[0], -1), bias], -1), x + else: + return tform.view(tform.shape[0], -1), x + else: + return x + + def set_iter(self, i): + self.cur_iter = i + self.net.set_iter(i) + + +affine_embedding_dict = { + "affine": AffineEmbedding, + "local_affine": LocalAffineEmbedding, +} diff --git a/nlf/embedding/embedding.py b/nlf/embedding/embedding.py new file mode 100644 index 0000000..e150111 --- /dev/null +++ b/nlf/embedding/embedding.py @@ -0,0 +1,121 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +import pdb +from typing import Dict + +import torch +from torch import nn + + +# Identity embedding +class IdentityEmbedding(nn.Module): + def __init__(self, in_channels, cfg, *args, **kwargs): + + super().__init__() + + self.group = cfg.group if "group" in cfg else (kwargs["group"] if "group" in kwargs else "embedding") + + self.in_channels = in_channels + self.out_channels = in_channels + self.layer = nn.Linear(1, 1) + + def forward(self, x: torch.Tensor, kwargs: Dict[str, str]): + return x + + def set_iter(self, i): + self.cur_iter = i + + +embedding_dict = { + "identity": IdentityEmbedding, +} + +# Add feature embeddings +from .feature import feature_embedding_dict + +for k, v in feature_embedding_dict.items(): + embedding_dict[k] = v + +# Add affine embeddings +from .affine import affine_embedding_dict + +for k, v in affine_embedding_dict.items(): + embedding_dict[k] = v + +# Add ray embeddings +from .ray import ray_embedding_dict + +for k, v in ray_embedding_dict.items(): + embedding_dict[k] = v + +# Add point embeddings +from .point import point_embedding_dict + +for k, v in point_embedding_dict.items(): + embedding_dict[k] = v + + +# Ray point embedding +class RayPointEmbedding(nn.Module): + def __init__(self, in_channels, cfg, **kwargs): + super().__init__() + + self.group = cfg.group if "group" in cfg else (kwargs["group"] if "group" in kwargs else "embedding") + self.cfg = cfg + + # In, out channels + self.in_channels = in_channels + + # Track iterations + self.cur_iter = 0 + self.wait_iters = [] + self.stop_iters = [] + + # Create ray and point embeddings + self.embedding_keys = list(cfg.embeddings.keys()) + self.embeddings = nn.ModuleList() + in_channels = self.in_channels + + for embedding_key in cfg.embeddings.keys(): + embedding_cfg = cfg.embeddings[embedding_key] + self.wait_iters.append(embedding_cfg.wait_iters if "wait_iters" in embedding_cfg else 0) + self.stop_iters.append(embedding_cfg.stop_iters if "stop_iters" in embedding_cfg else float("inf")) + + # Create net + embedding = embedding_dict[embedding_cfg.type](in_channels, embedding_cfg, **kwargs) + self.embeddings.append(embedding) + + # Out channels + self.out_channels = in_channels + + def forward(self, rays: torch.Tensor, render_kwargs: Dict[str, str]): + # Forward + x = { + "rays": rays, + } + + # print("embedding:", rays[:,-1].mean()) + + for idx, embedding in enumerate(self.embeddings): + if self.cur_iter >= self.wait_iters[idx] and self.cur_iter < self.stop_iters[idx]: + x = embedding(x, render_kwargs) + + # Flatten + for key in x.keys(): + x[key] = x[key].view(rays.shape[0], -1) + + # Return + return x + + def set_iter(self, i): + self.cur_iter = i + + for idx in range(len(self.embeddings)): + self.embeddings[idx].set_iter(i - self.wait_iters[idx]) + + +embedding_dict["ray_point"] = RayPointEmbedding diff --git a/nlf/embedding/feature.py b/nlf/embedding/feature.py new file mode 100644 index 0000000..1eff458 --- /dev/null +++ b/nlf/embedding/feature.py @@ -0,0 +1,43 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +from torch import nn + +from nlf.nets import net_dict + + +class FeatureEmbedding(nn.Module): + def __init__(self, in_channels, cfg, **kwargs): + super().__init__() + + self.group = cfg.group if "group" in cfg else (kwargs["group"] if "group" in kwargs else "embedding") + + self.cfg = cfg + + # Outputs + if self.cfg.net.depth == 0: + self.out_channels = in_channels + else: + self.out_channels = cfg.out_channels + + # Net + if "depth" in cfg.net: + cfg.net["depth"] -= 2 + cfg.net["linear_last"] = False + + self.net = net_dict[cfg.net.type](self.in_channels, self.out_channels, cfg.net) + + def forward(self, x, **render_kwargs): + if self.cfg.net.depth == 0: + return x + else: + return self.net(x) + + def set_iter(self, i): + self.cur_iter = i + + +feature_embedding_dict = {"feature": FeatureEmbedding} diff --git a/nlf/embedding/point.py b/nlf/embedding/point.py new file mode 100644 index 0000000..fa5a96e --- /dev/null +++ b/nlf/embedding/point.py @@ -0,0 +1,774 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +import pdb +from typing import Dict, List + +import numpy as np +import torch +from torch import nn + +from nlf.activations import get_activation +from nlf.contract import contract_dict +from nlf.nets import net_dict +from nlf.param import RayParam +from nlf.pe import IdentityPE, pe_dict +from utils.flow_utils import get_base_time +from utils.intersect_utils import sort_z +from utils.ray_utils import dot, from_ndc, get_ray_density, reflect +from utils.rotation_conversions import axis_angle_to_matrix + + +class PointPredictionEmbedding(nn.Module): + def __init__(self, in_channels, cfg, **kwargs): + super().__init__() + + self.group = cfg.group if "group" in cfg else (kwargs["group"] if "group" in kwargs else "embedding") + self.cfg = cfg + + # Filtering + self.filter = cfg.filter if "filter" in cfg else False + + # Rays & points + self.rays_name = cfg.rays_name if "rays_name" in cfg else "rays" + self.points_name = cfg.points_name if "points_name" in cfg else "points" + + # Inputs + self.in_z_channels = cfg.in_z_channels if "in_z_channels" in cfg else 1 + self.inputs = cfg.inputs + self.input_names = list(self.inputs.keys()) + self.input_shapes = [self.inputs[k] for k in self.inputs.keys()] + + # Ray parameterization and positional encoding + self.params = nn.ModuleList() + self.pes = nn.ModuleList() + self.in_channels = 0 + self.param_channels = [] + + for param_key in cfg.params.keys(): + param_cfg = cfg.params[param_key] + + # Start, end channels + self.param_channels.append((param_cfg.start, param_cfg.end)) + in_channels = param_cfg.end - param_cfg.start + + # Create param + if "in_channels" not in param_cfg.param: + param_cfg.param.in_channels = in_channels + + param = RayParam(param_cfg.param) + self.params.append(param) + + # Create PE + if "pe" in param_cfg: + pe = pe_dict[param_cfg.pe.type](param.out_channels, param_cfg.pe) + else: + pe = IdentityPE(param.out_channels) + + self.pes.append(pe) + + # Update in channels + self.in_channels += pe.out_channels + + # self.total_in_channels = self.in_channels * self.in_z_channels + self.total_in_channels = self.in_channels + + # Outputs + self.out_z_channels = cfg.out_z_channels if "out_z_channels" in cfg else 1 + self.outputs = cfg.outputs + self.output_names = list(self.outputs.keys()) + self.output_shapes = [self.outputs[k].channels for k in self.outputs.keys()] + self.output_residual = [ + self.outputs[k].residual if "residual" in self.outputs[k] else False for k in self.outputs.keys() + ] + self.out_channels = sum(self.output_shapes) + self.total_out_channels = self.out_channels * self.out_z_channels + self.out_z_per_in_z = self.out_z_channels // self.in_z_channels + + # Net + if "depth" in cfg.net: + cfg.net["depth"] -= 2 + cfg.net["linear_last"] = False + + self.net = net_dict[cfg.net.type]( + self.total_in_channels, + # self.in_channels, + # self.total_out_channels, + self.out_channels * self.out_z_per_in_z, + cfg.net, + group=self.group, + ) + + # Activations + self.activations = nn.ModuleList() + + for output_key in self.outputs.keys(): + output_cfg = self.outputs[output_key] + + if "activation" in output_cfg: + self.activations.append(get_activation(output_cfg.activation)) + else: + self.activations.append(get_activation("identity")) + + def forward(self, x: Dict[str, torch.Tensor], render_kwargs: Dict[str, str]): + rays = x[self.rays_name] + points = x[self.points_name] + + # Get inputs + inputs = [] + + for inp_idx, inp_name in enumerate(self.input_names): + if inp_name == "viewdirs": + inputs.append(rays[..., None, 3:6].repeat(1, points.shape[1], 1)) + elif inp_name == "origins": + inputs.append(rays[..., None, 0:3].repeat(1, points.shape[1], 1)) + elif inp_name == "times": + inputs.append(rays[..., None, -1:].repeat(1, points.shape[1], 1)) + else: + inputs.append(x[inp_name][..., : self.input_shapes[inp_idx]]) + + inputs = torch.cat(inputs, -1) + + # Apply parameterization + param_inputs = [] + + for idx in range(len(self.params)): + cur_input = inputs[..., self.param_channels[idx][0] : self.param_channels[idx][1]] + param_inputs.append(self.pes[idx](self.params[idx](cur_input))) + + inputs = torch.cat(param_inputs, -1).view(-1, self.total_in_channels) + + if self.filter: + # Run on valid + valid_mask = x["distances"].view(-1) > 0.0 + outputs_flat = inputs.new_zeros(inputs.shape[0], self.total_out_channels) + outputs_flat_valid = self.net(inputs[valid_mask]) + outputs_flat[valid_mask] = outputs_flat_valid + else: + # Run on all + outputs_flat = self.net(inputs) + + # Get outputs + outputs_flat = outputs_flat.view(points.shape[0], -1, self.out_channels) + outputs_flat = torch.split(outputs_flat, self.output_shapes, -1) + + for i in range(len(self.output_shapes)): + cur_output = self.activations[i](outputs_flat[i]) + + if self.output_residual[i]: + last_output = x[self.output_names[i]].view(cur_output.shape[0], -1, 1, cur_output.shape[-1]) + cur_output_shape = cur_output.shape + cur_output = ( + cur_output.view(cur_output.shape[0], last_output.shape[1], -1, cur_output.shape[-1]) + last_output + ) + # ) + torch.mean(last_output, dim=-2, keepdim=True) + cur_output = cur_output.view(*cur_output_shape) + + x[self.output_names[i]] = cur_output + + return x + + def set_iter(self, i): + self.cur_iter = i + self.net.set_iter(i) + + for act in self.activations: + if getattr(act, "set_iter", None) is not None: + act.set_iter(i) + + for pe in self.pes: + if getattr(pe, "set_iter", None) is not None: + pe.set_iter(i) + + +class ExtractFieldsEmbedding(nn.Module): + fields: List[str] + + def __init__(self, in_channels, cfg, **kwargs): + super().__init__() + + self.group = cfg.group if "group" in cfg else (kwargs["group"] if "group" in kwargs else "embedding") + self.cfg = cfg + self.fields = list(cfg.fields) + + def forward(self, x: Dict[str, torch.Tensor], render_kwargs: Dict[str, str]): + fields = self.fields + list(render_kwargs.get("fields", [])) + outputs = {} + + for field in fields: + if field in x: + outputs[field] = x[field] + + return outputs + + def set_iter(self, i): + self.cur_iter = i + + +class CreatePointsEmbedding(nn.Module): + def __init__(self, in_channels, cfg, **kwargs): + super().__init__() + + self.group = cfg.group if "group" in cfg else (kwargs["group"] if "group" in kwargs else "embedding") + self.cfg = cfg + + # Rays & points + self.rays_name = cfg.rays_name if "rays_name" in cfg else "rays" + self.out_points_field = cfg.out_points_field if "out_points_field" in cfg else "points" + + # Activation + self.activation = get_activation(cfg.activation if "activation" in cfg else "sigmoid") + + def forward(self, x: Dict[str, torch.Tensor], render_kwargs: Dict[str, str]): + rays = x[self.rays_name] + dists = x["distances"] + + points = rays[..., None, 0:3] + rays[..., None, 3:6] * dists.unsqueeze(-1) + x[self.out_points_field] = points + + return x + + def set_iter(self, i): + self.cur_iter = i + + +class PointDensityEmbedding(nn.Module): + def __init__(self, in_channels, cfg, **kwargs): + super().__init__() + + self.group = cfg.group if "group" in cfg else (kwargs["group"] if "group" in kwargs else "embedding") + self.cfg = cfg + + # Rays + self.rays_name = cfg.rays_name if "rays_name" in cfg else "rays" + + # In and out field + self.in_field = cfg.in_field if "in_field" in cfg else "sigma" + self.out_field = cfg.out_field if "out_field" in cfg else "sigma" + + # Activation + self.activation = get_activation(cfg.activation if "activation" in cfg else "sigmoid") + + self.shift = cfg.shift if "shift" in cfg else 0 + self.shift_range = cfg.shift_range if "shift_range" in cfg else 0 + + self.window_start_iters = cfg.window_start_iters if "window_start_iters" in cfg else 0 + self.window_iters = cfg.window_iters if "window_iters" in cfg else 0 + + def window(self): + cur_iter = self.cur_iter - self.window_start_iters + + if cur_iter < 0: + return 0.0 + elif cur_iter >= self.window_iters: + return 1.0 + else: + return cur_iter / self.window_iters + + def get_sigma(self, z_vals, rays): + sigma = self.activation(z_vals[..., -1:] + self.shift) + return sigma + + def forward(self, x: Dict[str, torch.Tensor], render_kwargs: Dict[str, str]): + rays = x[self.rays_name] + window_fac = self.window() + x[self.out_field] = self.get_sigma(x[self.in_field], rays) * window_fac + (1 - window_fac) + return x + + def set_iter(self, i): + self.cur_iter = i + + +class PointOffsetEmbedding(nn.Module): + def __init__(self, in_channels, cfg, **kwargs): + super().__init__() + + self.group = cfg.group if "group" in cfg else (kwargs["group"] if "group" in kwargs else "embedding") + self.cfg = cfg + + # In & out fields + self.in_density_field = cfg.in_density_field if "in_density_field" in cfg else "sigma" + + self.in_offset_field = cfg.in_offset_field if "in_offset_field" in cfg else "point_offset" + self.out_offset_field = cfg.out_offset_field if "out_offset_field" in cfg else "offset" + + self.in_points_field = cfg.in_points_field if "in_points_field" in cfg else "points" + self.out_points_field = cfg.out_points_field if "out_points_field" in cfg else "points" + self.save_points_field = cfg.save_points_field if "save_points_field" in cfg else None + + # Point offset + self.use_sigma = cfg.use_sigma if "use_sigma" in cfg else True + self.activation = get_activation(cfg.activation if "activation" in cfg else "identity") + + # Dropout params + self.use_dropout = "dropout" in cfg + self.dropout_frequency = cfg.dropout.frequency if "dropout" in cfg else 2 + self.dropout_stop_iter = cfg.dropout.stop_iter if "dropout" in cfg else float("inf") + + def forward(self, x: Dict[str, torch.Tensor], render_kwargs: Dict[str, str]): + in_points = x[self.in_points_field] + + if self.save_points_field is not None: + x[self.save_points_field] = in_points + + # Get point offset + if self.use_sigma and self.in_density_field in x: + sigma = x[self.in_density_field] + else: + sigma = torch.zeros(in_points.shape[0], in_points.shape[1], 1, device=in_points.device) + + point_offset = self.activation(x[self.in_offset_field]) * (1 - sigma) + + # Dropout + if ( + self.use_dropout + and ((self.cur_iter % self.dropout_frequency) == 0) + and self.cur_iter < self.dropout_stop_iter + and self.training + ): + point_offset = torch.zeros_like(point_offset) + + # Apply offset + x[self.in_offset_field] = point_offset + x[self.out_points_field] = x[self.in_points_field] + point_offset + + if self.out_offset_field is not None: + x[self.out_offset_field] = point_offset + + return x + + def set_iter(self, i): + self.cur_iter = i + + +class GenerateNumSamplesEmbedding(nn.Module): + def __init__(self, in_channels, cfg, **kwargs): + super().__init__() + + self.group = cfg.group if "group" in cfg else (kwargs["group"] if "group" in kwargs else "embedding") + self.cfg = cfg + + self.sample_range = cfg.sample_range + self.inference_samples = cfg.inference_samples + self.total_samples = cfg.total_samples + + self.num_samples_field = cfg.num_samples_field if "num_samples_field" in cfg else "num_samples" + self.total_samples_field = cfg.total_samples_field if "total_samples_field" in cfg else "total_samples" + + self.rays_name = cfg.rays_name if "rays_name" in cfg else "rays" + + def forward(self, x: Dict[str, torch.Tensor], render_kwargs: Dict[str, str]): + # Get num samples + if self.training: + num_samples = np.random.rand() * (self.sample_range[1] - self.sample_range[0]) + self.sample_range[0] + num_samples = int(np.round(num_samples)) + else: + num_samples = self.inference_samples + + x[self.num_samples_field] = num_samples + x[self.total_samples_field] = self.total_samples + + # Add num samples to rays + rays = x[self.rays_name] + + x[self.rays_name] = torch.cat([rays, torch.ones_like(rays[..., :1]) * num_samples], dim=-1) + + return x + + def set_iter(self, i): + self.cur_iter = i + + +class SelectPointsEmbedding(nn.Module): + def __init__(self, in_channels, cfg, **kwargs): + super().__init__() + + self.group = cfg.group if "group" in cfg else (kwargs["group"] if "group" in kwargs else "embedding") + self.cfg = cfg + self.fields = cfg.fields + + def forward(self, x: Dict[str, torch.Tensor], render_kwargs: Dict[str, str]): + total_num_samples = x["total_samples"] + num_samples = x["num_samples"] + + # Get samples + samples = np.arange(0, total_num_samples, int(total_num_samples / num_samples)) + + # Select samples + for key in x.keys(): + if key in self.fields: + x[key] = x[key][:, samples].contiguous() + + return x + + def set_iter(self, i): + self.cur_iter = i + + +class RandomOffsetEmbedding(nn.Module): + def __init__(self, in_channels, cfg, **kwargs): + super().__init__() + + self.group = cfg.group if "group" in cfg else (kwargs["group"] if "group" in kwargs else "embedding") + self.cfg = cfg + + # Rays + self.rays_name = cfg.rays_name if "rays_name" in cfg else "rays" + self.in_points_field = cfg.in_points_field if "in_points_field" in cfg else "points" + self.in_distances_field = cfg.in_distances_field if "in_distances_field" in cfg else "distances" + + self.out_points_field = cfg.out_points_field if "out_points_field" in cfg else "points" + self.out_distances_field = cfg.out_distances_field if "out_distances_field" in cfg else "distances" + + # Random config + self.random_per_sample = cfg.random_per_sample if "random_per_sample" in cfg else 1 + self.frequency = cfg.frequency if "frequency" in cfg else 2 + self.stop_iter = cfg.stop_iter if "stop_iter" in cfg else float("inf") + + # NOTE: If used with contract, should use contract point embedding + + def forward(self, x: Dict[str, torch.Tensor], render_kwargs: Dict[str, str]): + if not self.training or not ((self.cur_iter % self.frequency) == 0) or self.cur_iter >= self.stop_iter: + return x + + rays = x[self.rays_name] + + points = x[self.in_points_field] + points = points.view(rays.shape[0], -1, points.shape[-1]) + + dists = x[self.in_distances_field] + dists = dists.view(rays.shape[0], -1, dists.shape[-1]) + + # Get offset + diffs = points[..., 1:, :] - points[..., :-1, :] + + offset = diffs.new_zeros(diffs.shape[0], diffs.shape[1], self.random_per_sample) + torch.linspace( + 0.0, 1.0 - 1.0 / self.random_per_sample, self.random_per_sample, device=diffs.device + ).view(1, 1, -1) + offset = offset + torch.rand_like(offset) / self.random_per_sample + + # Add offset + points = torch.cat( + [ + points[:, :-1, None, :] + offset.unsqueeze(-1) * diffs.unsqueeze(-2), + points[:, -1:, None, :].repeat(1, 1, self.random_per_sample, 1), + ], + dim=1, + ).view(points.shape[0], -1, 3) + dists = torch.linalg.norm(points - rays[..., None, :3], dim=-1) + + x[self.out_points_field] = points + x[self.out_distances_field] = dists + + # Update outputs + for key in x.keys(): + if key not in ["points", "distances", "rays"]: + x[key] = x[key].view(points.shape[0], -1, 1, x[key].shape[-1]).repeat(1, 1, self.random_per_sample, 1) + x[key] = x[key].view(points.shape[0], -1, x[key].shape[-1]) + + return x + + def set_iter(self, i): + self.cur_iter = i + + +class ColorTransformEmbedding(nn.Module): + def __init__(self, in_channels, cfg, **kwargs): + super().__init__() + + self.opt_group = cfg.group if "group" in cfg else (kwargs["group"] if "group" in kwargs else "embedding") + self.cfg = cfg + + # Out fields + self.out_transform_field = cfg.out_transform_field if "out_transform_field" in cfg else "color_transform_global" + self.out_shift_field = cfg.out_shift_field if "out_shift_field" in cfg else "color_shift_global" + + # Transform + self.num_views = kwargs["system"].dm.train_dataset.total_images_per_frame + self.val_all = kwargs["system"].dm.train_dataset.val_all + self.color_embedding = nn.Parameter(torch.zeros((self.num_views, 12), device="cuda"), requires_grad=True) + self.transform_activation = get_activation( + cfg.transform_activation if "transform_activation" in cfg else "identity" + ) + self.shift_activation = get_activation(cfg.shift_activation if "shift_activation" in cfg else "identity") + + def forward(self, x: Dict[str, torch.Tensor], render_kwargs: Dict[str, str]): + if not self.val_all: + return x + + camera_ids = torch.round(x["rays"][..., -2]).long() + color_transforms = self.color_embedding[camera_ids] + + x[self.out_transform_field] = self.transform_activation(color_transforms[..., :9]) + x[self.out_shift_field] = self.shift_activation(color_transforms[..., -3:]) + + return x + + def set_iter(self, i): + self.cur_iter = i + self.transform_activation.set_iter(i) + self.shift_activation.set_iter(i) + + +class ContractEmbedding(nn.Module): + def __init__(self, in_channels, cfg, **kwargs): + super().__init__() + + self.group = cfg.group if "group" in cfg else (kwargs["group"] if "group" in kwargs else "embedding") + self.cfg = cfg + + # Origin + self.origin = torch.tensor(cfg.origin if "origin" in cfg else [0.0, 0.0, 0.0], device="cuda") + + # Contract function + self.contract_fn = contract_dict[cfg.contract.type](cfg.contract, system=kwargs["system"]) + + # In & out fields + self.in_points_field = cfg.in_points_field if "in_points_field" in cfg else "points" + self.in_distance_field = cfg.in_distance_field if "in_distance_field" in cfg else "distance" + self.in_direction_field = cfg.in_direction_field if "in_direction_field" in cfg else "viewdirs" + + self.out_points_field = cfg.out_points_field if "out_points_field" in cfg else "points" + self.out_direction_field = cfg.out_direction_field if "out_direction_field" in cfg else "viewdirs" + self.out_distance_field = cfg.out_distance_field if "out_distance_field" in cfg else "distances" + + def forward(self, x: Dict[str, torch.Tensor], render_kwargs: Dict[str, str]): + # Get rays + rays = x["rays"] + rays = torch.cat( + [ + rays[..., :3] - self.origin[None], + rays[..., 3:6], + ], + dim=-1, + ) + + # Get points + points = x[self.in_points_field] + dists = x[self.in_distance_field] + + points, dists = self.contract_fn.contract_points_and_distance(rays[..., :3], points, dists) + + # Get viewing directions + viewdirs = torch.cat([points[..., 1:, :] - points[..., :-1, :], torch.ones_like(points[..., :1, :])], dim=1) + + # Output + x[self.out_points_field] = points + x[self.out_direction_field] = viewdirs + x[self.out_distance_field] = dists + + return x + + def set_iter(self, i): + self.cur_iter = i + + +class ReflectEmbedding(nn.Module): + def __init__(self, in_channels, cfg, **kwargs): + super().__init__() + + self.group = cfg.group if "group" in cfg else (kwargs["group"] if "group" in kwargs else "embedding") + self.cfg = cfg + + # In & out fields + self.in_points_field = cfg.in_points_field if "in_points_field" in cfg else "points" + self.in_direction_field = cfg.in_direction_field if "in_direction_field" in cfg else "viewdirs" + self.in_normal_field = cfg.in_normal_field if "in_normal_field" in cfg else "normal" + self.in_distance_field = cfg.in_distance_field if "in_distance_field" in cfg else "ref_distance" + + self.direction_offset_field = ( + cfg.direction_offset_field if "direction_offset_field" in cfg else "ref_viewdirs_offset" + ) + + self.out_points_field = cfg.out_points_field if "out_points_field" in cfg else "ref_points" + self.out_direction_field = cfg.out_direction_field if "out_direction_field" in cfg else "ref_viewdirs" + self.out_normal_field = cfg.out_normal_field if "out_normal_field" in cfg else "normal" + + # Forward facing + self.forward_facing = cfg.forward_facing if "forward_facing" in cfg else False + self.direction_init = cfg.direction_init if "direction_init" in cfg else False + + def forward(self, x: Dict[str, torch.Tensor], render_kwargs: Dict[str, str]): + rays = x["rays"] + + # Get points, viewdirs & normal + points = x[self.in_points_field] + + if self.in_direction_field not in x: + dirs = rays[..., None, 3:6].repeat(1, points.shape[1], 1) + else: + dirs = x[self.in_direction_field] + + normal = x[self.in_normal_field] + + if self.forward_facing: + normal[..., -1] = normal[..., -1] - 1 + elif self.direction_init: + normal = normal - dirs + + normal = nn.functional.normalize(normal, dim=-1) + x[self.out_normal_field] = normal + + # Get reflected directions & points + ref_dirs = reflect(dirs, normal) + ref_distance = x[self.in_distance_field] + points = points + torch.abs(ref_distance) * ref_dirs + + if self.direction_offset_field in x: + ref_dirs = ref_dirs + x[self.direction_offset_field].view(*points.shape) + ref_dirs = nn.functional.normalize(ref_dirs, dim=-1) + + # Outputs + x[self.out_points_field] = points + x[self.out_direction_field] = ref_dirs + + return x + + def set_iter(self, i): + self.cur_iter = i + + +class AdvectPointsEmbedding(nn.Module): + def __init__(self, in_channels, cfg, **kwargs): + super().__init__() + + self.group = cfg.group if "group" in cfg else (kwargs["group"] if "group" in kwargs else "embedding") + self.cfg = cfg + + # Rays + self.rays_name = cfg.rays_name if "rays_name" in cfg else "rays" + + self.in_points_field = cfg.in_points_field if "in_points_field" in cfg else "points" + self.out_points_field = cfg.out_points_field if "out_points_field" in cfg else "points" + self.save_points_field = cfg.save_points_field if "save_points_field" in cfg else None + + self.out_offset_field = cfg.out_offset_field if "out_offset_field" in cfg else "offset" + + # Flow params + self.use_spatial_flow = cfg.use_spatial_flow if "use_spatial_flow" in cfg else False + self.use_angular_flow = cfg.use_angular_flow if "use_angular_flow" in cfg else False + + self.flow_keyframes = kwargs["system"].dm.train_dataset.num_keyframes + self.total_frames = kwargs["system"].dm.train_dataset.num_frames + self.flow_scale = cfg.flow_scale if "flow_scale" in cfg else 0.0 + + self.spatial_flow_activation = get_activation( + cfg.spatial_flow_activation if "spatial_flow_activation" in cfg else "identity" + ) + self.angular_flow_rotation_activation = get_activation( + cfg.angular_flow_rotation_activation if "angular_flow_rotation_activation" in cfg else "identity" + ) + self.angular_flow_anchor_activation = get_activation( + cfg.angular_flow_anchor_activation if "angular_flow_anchor_activation" in cfg else "identity" + ) + + def forward(self, x: Dict[str, torch.Tensor], render_kwargs: Dict[str, str]): + rays = x[self.rays_name] + points = x[self.in_points_field] + t = rays[..., -1:] + # print("point:", t.mean()) + + if self.save_points_field is not None: + x[self.save_points_field] = points + + # Get base time and time offset + base_t = get_base_time( + t, + self.flow_keyframes, + self.total_frames, + self.flow_scale, + self.training and (not "no_flow_jitter" in render_kwargs), + ) + # print("point:", base_t.mean()) + + time_offset = (t - base_t)[..., None, :] + # time_offset = (t - base_t)[..., None, :] * (self.flow_keyframes) + + # Apply angular flow + if self.use_angular_flow: + angular_flow_rot = self.angular_flow_rotation_activation(x["angular_flow"][..., :3]) + angular_flow_anchor = self.angular_flow_anchor_activation(x["angular_flow"][..., 3:6]) + x["angular_flow_rot"] = angular_flow_rot + x["angular_flow_anchor"] = angular_flow_anchor + + angular_flow_rot = axis_angle_to_matrix(angular_flow_rot * time_offset) + + points_shape = points.shape + points = points - angular_flow_anchor + points = (angular_flow_rot.view(-1, 3, 3) @ points.view(-1, 3, 1)).squeeze(-1) + points = points.view(*points_shape) + points = points + angular_flow_anchor + + # Apply spatial flow + if self.use_spatial_flow: + spatial_flow = self.spatial_flow_activation(x["spatial_flow"]) + x["spatial_flow"] = spatial_flow + + points = points + spatial_flow * time_offset + + # Update outputs + x[self.out_points_field] = points + x["base_times"] = base_t[..., None, :].repeat(1, points.shape[1], 1) + x["time_offset"] = time_offset.repeat(1, points.shape[1], 1) + + if self.out_offset_field is not None: + x[self.out_offset_field] = x[self.in_points_field] - points + + # Return + return x + + def set_iter(self, i): + self.cur_iter = i + + +class AddPointOutputsEmbedding(nn.Module): + extra_outputs: List[str] + + def __init__(self, in_channels, cfg, **kwargs): + super().__init__() + + self.group = cfg.group if "group" in cfg else (kwargs["group"] if "group" in kwargs else "embedding") + self.cfg = cfg + + # Rays + self.rays_name = cfg.rays_name if "rays_name" in cfg else "rays" + + # Extra outputs + self.extra_outputs = list(cfg.extra_outputs) + + def forward(self, x: Dict[str, torch.Tensor], render_kwargs: Dict[str, str]): + rays = x[self.rays_name] + + if "times" in self.extra_outputs and "times" not in x: + x["times"] = rays[..., None, -1:].repeat(1, x["points"].shape[1], 1) + + if "base_times" in self.extra_outputs and "base_times" not in x: + x["base_times"] = rays[..., None, -1:].repeat(1, x["points"].shape[1], 1) + + if "viewdirs" in self.extra_outputs and "viewdirs" not in x: + x["viewdirs"] = rays[..., None, 3:6].repeat(1, x["points"].shape[1], 1) + + return x + + def set_iter(self, i): + self.cur_iter = i + + +point_embedding_dict = { + "point_prediction": PointPredictionEmbedding, + "extract_fields": ExtractFieldsEmbedding, + "create_points": CreatePointsEmbedding, + "point_density": PointDensityEmbedding, + "point_offset": PointOffsetEmbedding, + "generate_samples": GenerateNumSamplesEmbedding, + "select_points": SelectPointsEmbedding, + "random_offset": RandomOffsetEmbedding, + "color_transform": ColorTransformEmbedding, + "contract": ContractEmbedding, + "reflect": ReflectEmbedding, + "advect_points": AdvectPointsEmbedding, + "add_point_outputs": AddPointOutputsEmbedding, +} diff --git a/nlf/embedding/ray.py b/nlf/embedding/ray.py new file mode 100644 index 0000000..53f95d8 --- /dev/null +++ b/nlf/embedding/ray.py @@ -0,0 +1,384 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +from typing import Dict + +import pytorch3d.transforms as transforms +import torch +from torch import nn + +from nlf.activations import get_activation +from nlf.intersect import intersect_dict +from nlf.nets import net_dict +from nlf.param import RayParam +from nlf.pe import IdentityPE, pe_dict +from utils.intersect_utils import intersect_axis_plane + + +class CalibratePlanarEmbedding(nn.Module): + def __init__(self, in_channels, cfg, **kwargs): + super().__init__() + + self.cfg = cfg + self.opt_group = cfg.group if "group" in cfg else (kwargs["group"] if "group" in kwargs else "calibration") + + # Rays + self.rays_name = cfg.rays_name if "rays_name" in cfg else "rays" + + # Offset + self.offset = torch.nn.Parameter(torch.tensor([[0.0, 0.0]]).float().cuda(), requires_grad=True) + + self.activation = get_activation(cfg.activation if "activation" in cfg else "identity") + + def forward(self, x: Dict[str, torch.Tensor], render_kwargs: Dict[str, str]): + # Get rays + rays = x[self.rays_name] + rays_o = rays[..., 0:3] + rays_d = rays[..., 3:6] + + # Intersect + t = intersect_axis_plane(rays, 0.0, -1) + rays_o2 = rays_o + t.unsqueeze(-1) * rays_d + + # Add offset + offset = self.activation(self.offset) + + rays_o = torch.cat( + [ + rays_o[..., :2] + offset, + rays_o[..., 2:], + ], + dim=-1, + ) + + rays_d = torch.nn.functional.normalize(rays_o2 - rays_o, dim=-1) + + rays = torch.cat([rays_o, rays_d], dim=-1) + print("Offset:", offset) + + # Return + x[self.rays_name] = rays + return x + + def set_iter(self, i): + self.cur_iter = i + + +class CalibrateEmbedding(nn.Module): + def __init__(self, in_channels, cfg, **kwargs): + super().__init__() + + self.cfg = cfg + self.opt_group = cfg.group if "group" in cfg else (kwargs["group"] if "group" in kwargs else "calibration") + + # Rays + self.rays_name = cfg.rays_name if "rays_name" in cfg else "rays" + + # Pose parameters + self.use_pose = cfg.use_pose if "use_pose" in cfg else False + + if self.use_pose: + self.num_views = kwargs["system"].dm.train_dataset.total_num_views + self.datasets = [kwargs["system"].dm.val_dataset] + self.constant_id = cfg.constant_id if "constant_id" in cfg else 0 + + self.base_quaternions = torch.zeros((self.num_views, 4), dtype=torch.float32, device="cuda") + self.base_quaternions[:, 0] = 1.0 + + self.quaternions = torch.nn.Parameter( + torch.zeros((self.num_views, 4), dtype=torch.float32, device="cuda"), requires_grad=True + ) + self.translations = torch.nn.Parameter( + torch.zeros((self.num_views, 3), dtype=torch.float32, device="cuda"), requires_grad=True + ) + + self.quaternion_activation = get_activation( + cfg.quaternion_activation if "quaternion_activation" in cfg else "identity" + ) + self.translation_activation = get_activation( + cfg.translation_activation if "translation_activation" in cfg else "identity" + ) + + # Time + self.use_time = cfg.use_time if "use_time" in cfg else False + + if self.use_time: + self.num_frames = kwargs["system"].dm.train_dataset.num_frames + + self.time_offsets = torch.nn.Parameter( + torch.zeros((self.num_views, 1), dtype=torch.float32, device="cuda"), requires_grad=True + ) + self.time_activation = get_activation(cfg.time_activation if "time_activation" in cfg else "identity") + + # NDC + self.use_ndc = cfg.use_ndc if "use_ndc" in cfg else False + + def forward(self, x: Dict[str, torch.Tensor], render_kwargs: Dict[str, str]): + # Get rays + rays = x[self.rays_name] + rays_o = rays[..., 0:3] + rays_d = rays[..., 3:6] + + # Get camera IDs + if rays.shape[-1] > 7: + camera_ids = torch.round(rays[..., -2]).long() + else: + camera_ids = torch.round(rays[..., -1]).long() + + if self.use_pose: + quaternion_offsets = self.quaternion_activation(self.quaternions) + quaternion_offsets[self.constant_id] = 0 + quaternions = self.base_quaternions + quaternion_offsets + # quaternions = torch.nn.functional.normalize(quaternions[camera_ids].view(-1, 4), -1) + quaternions = quaternions[camera_ids].view(-1, 4) + + translation_offsets = self.translation_activation(self.translations) + translation_offsets[self.constant_id] = 0 + translations = translation_offsets[camera_ids].view(-1, 3) + + rays_d = transforms.quaternion_apply(quaternions, rays_d) + rays_o = translations + rays_o + + if self.use_time: + time_offsets = self.time_activation(self.time_offsets) + time_offsets[self.constant_id] = 0.0 + time_offsets = time_offsets[camera_ids].view(-1, 1) + + rays_t = rays[..., -1:] + print(self.time_offsets[12], self.time_offsets[1]) + rays_t = rays_t + time_offsets + + # Update rays + if self.use_pose: + updated_rays = torch.cat([rays_o, rays_d], dim=-1) + else: + updated_rays = rays[..., :6] + + # Apply NDC + if self.use_ndc: + updated_rays = self.datasets[0].to_ndc(updated_rays) + + # Update times + if self.use_time: + rays = torch.cat([updated_rays, rays[..., 6:-1], rays_t], dim=-1) + else: + rays = torch.cat([updated_rays, rays[..., 6:]], dim=-1) + + x[self.rays_name] = rays + return x + + def set_iter(self, i): + self.cur_iter = i + + if self.use_pose: + self.quaternion_activation.set_iter(i) + self.translation_activation.set_iter(i) + + if self.use_time: + self.time_activation.set_iter(i) + + +class RayPredictionEmbedding(nn.Module): + def __init__(self, in_channels, cfg, **kwargs): + super().__init__() + + self.group = cfg.group if "group" in cfg else (kwargs["group"] if "group" in kwargs else "embedding") + self.cfg = cfg + + # Rays + self.rays_name = cfg.rays_name if "rays_name" in cfg else "rays" + + # Ray parameterization and positional encoding + self.param_names = list(cfg.params.keys()) + self.params = nn.ModuleList() + self.pes = nn.ModuleList() + self.in_channels = 0 + self.param_channels = [] + + for param_key in cfg.params.keys(): + param_cfg = cfg.params[param_key] + + # Start, end channels + self.param_channels.append((param_cfg.start, param_cfg.end)) + in_channels = param_cfg.end - param_cfg.start + + # Create param + if "in_channels" not in param_cfg.param: + param_cfg.param.in_channels = in_channels + + param = RayParam(param_cfg.param, system=kwargs["system"]) + self.params.append(param) + + # Create PE + if "pe" in param_cfg: + pe = pe_dict[param_cfg.pe.type](param.out_channels, param_cfg.pe) + else: + pe = IdentityPE(param.out_channels) + + self.pes.append(pe) + + # Update in channels + self.in_channels += pe.out_channels + + # Intersect + self.z_channels = cfg.z_channels + + # Outputs + self.outputs = cfg.outputs + self.output_names = list(self.outputs.keys()) + self.output_shapes = [self.outputs[k].channels for k in self.outputs.keys()] + self.preds_per_z = sum(self.output_shapes) + + self.ray_outputs = cfg.ray_outputs if "ray_outputs" in cfg else {} + self.ray_output_names = list(self.ray_outputs.keys()) + self.ray_output_shapes = [self.ray_outputs[k].channels for k in self.ray_outputs.keys()] + + self.total_ray_out_channels = sum(self.ray_output_shapes) + self.total_point_out_channels = self.z_channels * self.preds_per_z + self.total_out_channels = self.total_point_out_channels + self.total_ray_out_channels + + # Net + if "depth" in cfg.net: + cfg.net["depth"] -= 2 + cfg.net["linear_last"] = False + + self.net = net_dict[cfg.net.type](self.in_channels, self.total_out_channels, cfg.net, group=self.group) + + # Activations + self.activations = nn.ModuleList() + + for output_key in self.outputs.keys(): + output_cfg = self.outputs[output_key] + + if "activation" in output_cfg: + self.activations.append(get_activation(output_cfg.activation)) + else: + self.activations.append(get_activation("identity")) + + # Ray activations + self.ray_activations = nn.ModuleList() + + for output_key in self.ray_outputs.keys(): + output_cfg = self.ray_outputs[output_key] + + if "activation" in output_cfg: + self.ray_activations.append(get_activation(output_cfg.activation)) + else: + self.ray_activations.append(get_activation("identity")) + + def forward(self, x: Dict[str, torch.Tensor], render_kwargs: Dict[str, str]): + rays = x[self.rays_name] + + # Apply parameterization + param_x = [] + + for idx, (param, pe) in enumerate(zip(self.params, self.pes)): + cur_x = rays[:, self.param_channels[idx][0] : self.param_channels[idx][1]] + param_x.append(pe(param(cur_x))) + + param_x = torch.cat(param_x, -1) + + # Get outputs + outputs_flat = self.net(param_x) + + # Get point outputs + if self.total_point_out_channels > 0: + point_outputs = outputs_flat[..., : self.total_point_out_channels].reshape( + rays.shape[0], self.z_channels, -1 + ) + point_outputs = torch.split(point_outputs, self.output_shapes, -1) + + for idx, activation in enumerate(self.activations): + x[self.output_names[idx]] = activation(point_outputs[idx]) + + # Get ray outputs + if self.total_ray_out_channels > 0: + ray_outputs = outputs_flat[..., self.total_point_out_channels :] + ray_outputs = torch.split(ray_outputs, self.ray_output_shapes, -1) + + for idx, activation in enumerate(self.ray_activations): + x[self.ray_output_names[idx]] = activation(ray_outputs[idx]) + + return x + + def set_iter(self, i): + self.cur_iter = i + self.net.set_iter(i) + + for act in self.activations: + if getattr(act, "set_iter", None) is not None: + act.set_iter(i) + + for act in self.ray_activations: + if getattr(act, "set_iter", None) is not None: + act.set_iter(i) + + for pe in self.pes: + if getattr(pe, "set_iter", None) is not None: + pe.set_iter(i) + + +class RayIntersectEmbedding(nn.Module): + def __init__(self, in_channels, cfg, **kwargs): + super().__init__() + + self.group = cfg.group if "group" in cfg else (kwargs["group"] if "group" in kwargs else "embedding") + self.cfg = cfg + + # Rays + self.rays_name = cfg.rays_name if "rays_name" in cfg else "rays" + + # Intersect + self.z_channels = cfg.z_channels + self.intersect_fn = intersect_dict[cfg.intersect.type](self.z_channels, cfg.intersect, **kwargs) + + def forward(self, x: Dict[str, torch.Tensor], render_kwargs: Dict[str, str]): + rays = x[self.rays_name] + return self.intersect_fn(rays, x, render_kwargs) + + def set_iter(self, i): + self.cur_iter = i + self.intersect_fn.set_iter(i) + + +class CreateRaysEmbedding(nn.Module): + def __init__(self, in_channels, cfg, **kwargs): + super().__init__() + + self.group = cfg.group if "group" in cfg else (kwargs["group"] if "group" in kwargs else "embedding") + self.cfg = cfg + + # Rays + self.in_rays_name = cfg.in_rays_name if "in_rays_name" in cfg else "rays" + self.in_points_name = cfg.in_points_name if "in_points_name" in cfg else "points" + self.out_rays_name = cfg.out_rays_name if "out_rays_name" in cfg else "rays" + + # Extra outputs + self.extra_outputs = cfg.extra_outputs + + def forward(self, x: Dict[str, torch.Tensor], render_kwargs: Dict[str, str]): + rays = x[self.in_rays_name] + points = x[self.in_points_name] + x[self.out_rays_name] = torch.cat( + [ + points, + rays[..., None, 3:6].repeat(1, points.shape[1], 1), + ], + dim=-1, + ) + return x + + def set_iter(self, i): + self.cur_iter = i + + +ray_embedding_dict = { + "calibrate_planar": CalibratePlanarEmbedding, + "calibrate": CalibrateEmbedding, + "ray_prediction": RayPredictionEmbedding, + "ray_intersect": RayIntersectEmbedding, + "create_rays": CreateRaysEmbedding, +} diff --git a/nlf/intersect/__init__.py b/nlf/intersect/__init__.py new file mode 100644 index 0000000..4eeab7e --- /dev/null +++ b/nlf/intersect/__init__.py @@ -0,0 +1,7 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +from .intersect import intersect_dict diff --git a/nlf/intersect/base.py b/nlf/intersect/base.py new file mode 100644 index 0000000..4af8a76 --- /dev/null +++ b/nlf/intersect/base.py @@ -0,0 +1,241 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +from typing import Dict, List + +import numpy as np +import torch +from torch import nn + +from nlf.activations import get_activation +from utils.intersect_utils import sort_with, sort_z +from utils.ray_utils import get_ray_density + +from ..contract import contract_dict + + +def uniform_weight(cfg): + def weight_fn(rays, dists): + return torch.ones_like(dists) + + return weight_fn + + +def ease_max_weight(cfg): + weight_start = cfg.weight_start if "weight_start" in cfg else 1.0 + weight_end = cfg.weight_end if "weight_end" in cfg else 0.95 + + def weight_fn(rays, dists): + rays_norm = torch.abs(nn.functional.normalize(rays[..., 3:6], p=float("inf"), dim=-1)) + weights = ((rays_norm - weight_end) / (weight_start - weight_end)).clamp(0, 1) + return weights.unsqueeze(1).repeat(1, dists.shape[1] // 3, 1).view(weights.shape[0], -1) + + return weight_fn + + +weight_fn_dict = { + "uniform": uniform_weight, + "ease_max": ease_max_weight, +} + + +class Intersect(nn.Module): + sort_outputs: List[str] + + def __init__(self, z_channels, cfg, **kwargs): + super().__init__() + + self.cur_iter = 0 + self.cfg = cfg + + # Input/output size + self.z_channels = z_channels + self.in_density_field = cfg.in_density_field if "in_density_field" in cfg else "sigma" + self.out_points = cfg.out_points if "out_points" in cfg else None + self.out_distance = cfg.out_distance if "out_distance" in cfg else None + + # Other common parameters + self.forward_facing = cfg.forward_facing if "forward_facing" in cfg else False + self.normalize = cfg.normalize if "normalize" in cfg else False + self.residual_z = cfg.residual_z if "residual_z" in cfg else False + self.residual_distance = cfg.residual_distance if "residual_distance" in cfg else False + self.sort = cfg.sort if "sort" in cfg else False + self.clamp = cfg.clamp if "clamp" in cfg else False + + self.use_dataset_bounds = cfg.use_dataset_bounds if "use_dataset_bounds" in cfg else False + self.use_disparity = cfg.use_disparity if "use_disparity" in cfg else False + self.use_sigma = cfg.use_sigma if "use_sigma" in cfg else False + + # Origin + self.origin = torch.tensor(cfg.origin if "origin" in cfg else [0.0, 0.0, 0.0], device="cuda") + + # Minimum intersect distance + if self.use_dataset_bounds: + self.near = cfg.near if "near" in cfg else kwargs["system"].dm.train_dataset.near + else: + self.near = cfg.near if "near" in cfg else 0.0 + + # self.near = cfg.near if 'near' in cfg else 0.0 + self.far = cfg.far if "far" in cfg else float("inf") + + # Sorting + self.weight_fn = weight_fn_dict[cfg.weight_fn.type](cfg.weight_fn) if "weight_fn" in cfg else None + self.sort_outputs = list(cfg.sort_outputs) if "sort_outputs" in cfg else [] + + if self.weight_fn is not None: + self.sort_outputs.append("weights") + + # Mask + if "mask" in cfg: + self.mask_stop_iters = cfg.mask.stop_iters if "stop_iters" in cfg.mask else float("inf") + else: + self.mask_stop_iters = float("inf") + + # Contract function + if "contract" in cfg: + self.contract_fn = contract_dict[cfg.contract.type](cfg.contract, **kwargs) + self.contract_stop_iters = cfg.contract.stop_iters if "stop_iters" in cfg.contract else float("inf") + else: + self.contract_fn = contract_dict["identity"]({}) + self.contract_stop_iters = float("inf") + + # Activation + self.activation = get_activation(cfg.activation if "activation" in cfg else "identity") + + # Dropout params + self.use_dropout = "dropout" in cfg + self.dropout_frequency = cfg.dropout.frequency if "dropout" in cfg else 2 + self.dropout_stop_iter = cfg.dropout.stop_iter if "dropout" in cfg else float("inf") + + def process_z_vals(self, z_vals): + z_vals = z_vals.view(z_vals.shape[0], -1, self.z_scale.shape[-1]) * self.z_scale[None] + self.samples[None] + z_vals = z_vals.view(z_vals.shape[0], -1) + + if self.contract_fn.contract_samples: + z_vals = self.contract_fn.inverse_contract_distance(z_vals) + elif self.use_disparity: + z_vals = torch.where(torch.abs(z_vals) < 1e-8, 1e8 * torch.ones_like(z_vals), z_vals) + z_vals = 1.0 / z_vals + + return z_vals + + def forward(self, rays: torch.Tensor, x: Dict[str, torch.Tensor], render_kwargs: Dict[str, str]): + rays = torch.cat( + [ + rays[..., :3] - self.origin[None], + rays[..., 3:6], + ], + dim=-1, + ) + + ## Z value processing + z_vals = x["z_vals"].view(rays.shape[0], -1) + + # Z activation and sigma + if self.use_sigma and self.in_density_field in x: + sigma = x[self.in_density_field].view(z_vals.shape[0], -1) + else: + sigma = torch.zeros(z_vals.shape[0], z_vals.shape[1], device=z_vals.device) + + z_vals = self.activation(z_vals.view(z_vals.shape[0], sigma.shape[1], -1)) * (1 - sigma.unsqueeze(-1)) + z_vals = z_vals.view(z_vals.shape[0], -1) + + # Apply offset + if ( + self.use_dropout + and ((self.cur_iter % self.dropout_frequency) == 0) + and self.cur_iter < self.dropout_stop_iter + and self.training + ): + z_vals = torch.zeros_like(z_vals) + + # Add samples and contract + z_vals = self.process_z_vals(z_vals) + + # Residual distances + if self.residual_z and "last_z" in x: + last_z = x["last_z"] + last_z = last_z.view(last_z.shape[0], -1, 1) + z_vals = z_vals.view(z_vals.shape[0], last_z.shape[1], -1) + z_vals = (z_vals + last_z).view(z_vals.shape[0], -1) + else: + x["last_z"] = z_vals + + # Get distances + dists = self.intersect(rays, z_vals) + + # Calculate weights + if self.weight_fn is not None: + weights = self.weight_fn(rays, dists) + else: + weights = torch.ones_like(dists) + + if "weights" not in x or x["weights"].shape[1] != weights.shape[1]: + x["weights"] = weights.unsqueeze(-1) + else: + x["weights"] = x["weights"] * weights.unsqueeze(-1) + + # Mask + mask = (dists <= self.near) | (dists >= self.far) | (weights == 0.0) + + if self.cur_iter > self.mask_stop_iters: + mask = torch.zeros_like(mask) + + dists = torch.where(mask, torch.zeros_like(dists), dists) + + # Sort + if self.sort: + dists, sort_idx = sort_z(dists, 1, False) + + for output_key in self.sort_outputs: + x[output_key] = sort_with(sort_idx, x[output_key]) + + # Mask again + dists = dists.unsqueeze(-1) + mask = dists == 0.0 + + # Residual distances + if self.residual_distance and "last_distance" in x: + last_dists = x["last_distance"] + last_dists = last_dists.view(last_dists.shape[0], -1, 1, 1) + dists = dists.view(dists.shape[0], last_dists.shape[1], -1, 1) + dists = (dists + last_dists).view(dists.shape[0], -1, 1) + else: + x["last_distance"] = dists + + # Get points + points = rays[..., None, :3] + rays[..., None, 3:6] * dists + + # Normalize output + if self.normalize: + r = z_vals[..., None] + 1 + fac = 1.0 / torch.sqrt(((-r + 1) * (-r + 1) + r * r) + 1e-8) + + points = torch.cat([points[..., :2] * fac, points[..., 2:3]], -1) + + # Contract + if not (self.cur_iter > self.contract_stop_iters): + points, dists = self.contract_fn.contract_points_and_distance(rays[..., :3], points, dists) + dists = torch.where(mask, torch.zeros_like(dists), dists) + + if self.out_points is not None: + x[self.out_points] = points + + if self.out_distance is not None: + x[self.out_distance] = dists + + # Return + x["points"] = points + x["distances"] = dists + x["z_vals"] = z_vals + + return x + + def intersect(self, rays, z_vals): + pass + + def set_iter(self, i): + self.cur_iter = i diff --git a/nlf/intersect/intersect.py b/nlf/intersect/intersect.py new file mode 100644 index 0000000..7bde300 --- /dev/null +++ b/nlf/intersect/intersect.py @@ -0,0 +1,25 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +intersect_dict = {} + +# Add primitives +from .primitive import primitive_intersect_dict + +for k, v in primitive_intersect_dict.items(): + intersect_dict[k] = v + +# Add voxel +from .voxel import voxel_intersect_dict + +for k, v in voxel_intersect_dict.items(): + intersect_dict[k] = v + +# Add z +from .z import z_intersect_dict + +for k, v in z_intersect_dict.items(): + intersect_dict[k] = v diff --git a/nlf/intersect/primitive.py b/nlf/intersect/primitive.py new file mode 100644 index 0000000..238116f --- /dev/null +++ b/nlf/intersect/primitive.py @@ -0,0 +1,511 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +import numpy as np +import torch +import torch.nn.functional as F + +from nlf.activations import get_activation +from nlf.param import pluecker_pos, pluecker_pos_cylinder +from utils.intersect_utils import ( + dot, + intersect_cylinder, + intersect_plane, + intersect_sphere, + min_cylinder_radius, + min_sphere_radius, +) + +from .base import Intersect + + +class IntersectPlane(Intersect): + def __init__(self, z_channels, cfg, **kwargs): + super().__init__(z_channels, cfg, **kwargs) + + self.global_near = cfg.global_near if "global_near" in cfg else -1.0 + + self.initial = torch.tensor(cfg.initial if "initial" in cfg else -1.0) + self.end = torch.tensor(cfg.end if "end" in cfg else 1.0) + + if self.contract_fn.contract_samples: + self.initial = self.contract_fn.contract_distance(self.initial) + self.end = self.contract_fn.contract_distance(self.end) + + if self.use_disparity: + disparities = torch.linspace(1.0 / self.end, 1.0 / self.initial, z_channels, device="cuda") + self.samples = torch.flip(disparities, [0]) + else: + self.samples = torch.linspace(self.initial, self.end, z_channels, device="cuda") + + if z_channels > 1: + self.z_scale = cfg.z_scale if "z_scale" in cfg else torch.abs(self.samples[1] - self.samples[0]) + else: + self.z_scale = cfg.z_scale if "z_scale" in cfg else 1.0 + + if self.z_scale == 0.0: + self.z_scale = 1.0 + + def process_z_vals(self, normal): + normal = normal.view(normal.shape[0], -1, 4) + d = super().process_z_vals(normal[..., -1]) + return torch.cat([normal[..., :3], d[..., None]], -1).view(normal.shape[0], -1) + + def intersect(self, rays, normal): + normal = normal.view(normal.shape[0], -1, 4) + + return intersect_plane(rays[..., None, :], normal[..., :3], normal[..., -1]) + + +class IntersectEuclideanDistance(Intersect): + def __init__(self, z_channels, cfg, **kwargs): + super().__init__(z_channels, cfg, **kwargs) + + # Intersect params + self.global_near = cfg.global_near if "global_near" in cfg else -1.0 + self.min_radius = cfg.min_radius if "min_radius" in cfg else 0.0 + + self.initial = torch.tensor(cfg.initial if "initial" in cfg else 0.0) + self.end = torch.tensor(cfg.end if "end" in cfg else 2.0) + + # Contract + if self.contract_fn.contract_samples: + self.initial = self.contract_fn.contract_distance(self.initial) + self.end = self.contract_fn.contract_distance(self.end) + + if self.use_disparity: + disparities = torch.linspace(1.0 / self.end, 1.0 / self.initial, z_channels, device="cuda") + self.samples = torch.flip(disparities, [0]) + else: + self.samples = torch.linspace(self.initial, self.end, z_channels, device="cuda") + + if z_channels > 1: + self.z_scale = cfg.z_scale if "z_scale" in cfg else torch.abs(self.samples[1] - self.samples[0]) + else: + self.z_scale = cfg.z_scale if "z_scale" in cfg else 1.0 + + if self.z_scale == 0.0: + self.z_scale = 1.0 + + def intersect(self, rays, distance): + distance = distance.view(distance.shape[0], -1) + + # Min radius + if self.min_radius > 0: + d_from_o = torch.linalg.norm(rays[..., :3], dim=-1) + distance_offset = distance - d_from_o.unsqueeze(1) + else: + distance_offset = 0 + + # Add distance offset + distance = distance + distance_offset + + return distance + + +class IntersectEuclideanDistanceUnified(Intersect): + def __init__(self, z_channels, cfg, **kwargs): + super().__init__(z_channels, cfg, **kwargs) + + # Intersect params + self.global_near = cfg.global_near if "global_near" in cfg else -1.0 + self.min_radius = cfg.min_radius if "min_radius" in cfg else 0.0 + + if self.use_dataset_bounds: + self.initial = torch.tensor(cfg.initial if "initial" in cfg else -kwargs["system"].dm.train_dataset.far) + self.end = torch.tensor(cfg.end if "end" in cfg else kwargs["system"].dm.train_dataset.far) + else: + self.initial = torch.tensor(cfg.initial if "initial" in cfg else 0.0) + self.end = torch.tensor(cfg.end if "end" in cfg else 1.0) + + # Contract + if self.contract_fn.contract_samples: + self.initial = self.contract_fn.contract_distance(self.initial) + self.end = self.contract_fn.contract_distance(self.end) + + self.samples = torch.linspace(self.initial, self.end, z_channels, device="cuda").view(-1, 1) + + # Calculate z scale + if z_channels > 1: + self.z_scale = cfg.z_scale if "z_scale" in cfg else torch.abs(self.samples[1] - self.samples[0]) + else: + self.z_scale = cfg.z_scale if "z_scale" in cfg else 1.0 + + self.z_scale = torch.tensor(self.z_scale).view(-1, 1).cuda() + + # Unify positions + self.unify_fn = pluecker_pos(None) + + def intersect(self, rays, distance): + distance = distance.view(distance.shape[0], -1) + + # Base distance + rays_o, rays_d = rays[..., :3], rays[..., 3:6] + base_pos = self.unify_fn(rays) + diff = base_pos - rays_o + + # Add distance offset + distance = distance + (torch.sign(dot(rays_d, diff)) * torch.norm(diff, dim=-1)).unsqueeze(1) + + return distance + + +class IntersectCylinderOld(Intersect): + def __init__(self, z_channels, cfg, **kwargs): + super().__init__(z_channels, cfg, **kwargs) + + # Intersect hyper-params + if self.use_dataset_bounds: + self.initial = torch.tensor( + cfg.initial if "initial" in cfg else kwargs["system"].dm.train_dataset.near * 1.5 + ) + self.end = torch.tensor(cfg.end if "end" in cfg else kwargs["system"].dm.train_dataset.far * 1.5) + else: + self.initial = torch.tensor(cfg.initial if "initial" in cfg else 0.0) + self.end = torch.tensor(cfg.end if "end" in cfg else 1.0) + + # Contract + if self.contract_fn.contract_samples: + self.initial = self.contract_fn.contract_distance(self.initial) + self.end = self.contract_fn.contract_distance(self.end) + + # Origin scale + self.origin_scale_factor = cfg.origin_scale_factor if "origin_scale_factor" in cfg else 0.0 + self.origin_initial = torch.tensor(cfg.origin_initial if "origin_initial" in cfg else [1.0, 1.0, 1.0]).cuda() + + # Flip axes + self.flip_axes = cfg.flip_axes if "flip_axes" in cfg else False + + # Calculate samples + self.samples = torch.linspace(self.initial, self.end, z_channels, device="cuda").view(-1, 1) + + # Calculate z scale + if z_channels > 1: + self.z_scale = cfg.z_scale if "z_scale" in cfg else torch.abs(self.samples[1] - self.samples[0]) + else: + self.z_scale = cfg.z_scale if "z_scale" in cfg else 1.0 + + self.z_scale = torch.tensor(self.z_scale).view(-1, 1).cuda() + + # Unify positions + self.unify_fn = pluecker_pos(None) + + def process_origins(self, origins): + origins = origins * self.origin_scale_factor + self.origin_initial.view(1, 1, 3) + return origins + + def process_z_vals(self, z_vals): + z_vals = z_vals.view(z_vals.shape[0], -1, 4) + origins = self.process_origins(z_vals[..., :3]) + radii = super().process_z_vals(z_vals[..., -1]) + return torch.cat([origins, radii[..., None]], -1).view(z_vals.shape[0], -1) + + def intersect(self, rays, z_vals): + z_vals = z_vals.view(z_vals.shape[0], self.z_channels, 4) + origins = z_vals[..., :3] + radii = z_vals[..., -1] + + rays = torch.cat( + [ + rays[..., None, 0:3] * origins, + rays[..., None, 3:6] * origins, + ], + -1, + ) + + # Calculate intersection + return intersect_cylinder( + rays, + torch.zeros_like(origins), + radii, + ) + + +class IntersectCylinderNew(Intersect): + def __init__(self, z_channels, cfg, **kwargs): + super().__init__(z_channels, cfg, **kwargs) + + # Intersect hyper-params + if self.use_dataset_bounds: + if cfg.outward_facing: + self.initial = torch.tensor( + cfg.initial if "initial" in cfg else kwargs["system"].dm.train_dataset.near * 1.5 + ) + else: + self.initial = torch.tensor( + cfg.initial if "initial" in cfg else -kwargs["system"].dm.train_dataset.far * 1.5 + ) + + self.end = torch.tensor(cfg.end if "end" in cfg else kwargs["system"].dm.train_dataset.far * 1.5) + else: + self.initial = torch.tensor(cfg.initial if "initial" in cfg else 0.0) + self.end = torch.tensor(cfg.end if "end" in cfg else 1.0) + + # Contract + if self.contract_fn.contract_samples: + self.initial = self.contract_fn.contract_distance(self.initial) + self.end = self.contract_fn.contract_distance(self.end) + + # Origin scale + self.resize_scale_factor = cfg.resize_scale_factor if "resize_scale_factor" in cfg else 0.0 + self.resize_initial = torch.tensor(cfg.resize_initial if "resize_initial" in cfg else [1.0, 1.0, 1.0]).cuda() + self.origin_scale_factor = cfg.origin_scale_factor if "origin_scale_factor" in cfg else 0.0 + + # Flip axes + self.flip_axes = cfg.flip_axes if "flip_axes" in cfg else False + + # Calculate samples + self.samples = torch.linspace(self.initial, self.end, z_channels, device="cuda").view(-1, 1) + + # Calculate z scale + if z_channels > 1: + self.z_scale = cfg.z_scale if "z_scale" in cfg else torch.abs(self.samples[1] - self.samples[0]) + else: + self.z_scale = cfg.z_scale if "z_scale" in cfg else 1.0 + + self.z_scale = torch.tensor(self.z_scale).view(-1, 1).cuda() + + # Unify positions + self.unify_fn = pluecker_pos_cylinder(None) + + def process_origins(self, origins): + origins = origins * self.origin_scale_factor + return origins + + def process_resize(self, resize): + resize = resize * self.resize_scale_factor + self.resize_initial.view(1, 1, 3) + return resize + + def process_z_vals(self, z_vals): + z_vals = z_vals.view(z_vals.shape[0], -1, 8) + origins = self.process_origins(z_vals[..., :3]) + resize = self.process_resize(z_vals[..., 3:6]) + raw_offsets = super().process_z_vals(z_vals[..., -2]) + radii = super().process_z_vals(z_vals[..., -1]) + return torch.cat([origins, resize, raw_offsets[..., None], radii[..., None]], -1).view(z_vals.shape[0], -1) + + def intersect(self, rays, z_vals): + z_vals = z_vals.view(z_vals.shape[0], self.z_channels, 8) + origins = z_vals[..., :3] + resize = z_vals[..., 3:6] + raw_offsets = z_vals[..., -2] + radii = z_vals[..., -1] + + # Transform the space + rays_o = (rays[..., None, 0:3] - origins) * resize + rays_d = rays[..., None, 3:6] * resize + rays = torch.cat( + [ + rays_o, + torch.nn.functional.normalize(rays_d, p=2.0, dim=-1), + ], + -1, + ) + + # Calculate intersection distances (in transformed space) + t = intersect_cylinder( + rays, + torch.zeros_like(origins), + radii, + ) + + # Recycle samples for not-hit cylinders + min_radius = min_cylinder_radius(rays, torch.zeros_like(rays[..., :3])) + + base_pos = self.unify_fn(rays) + rays_o_cyl = torch.cat([rays[..., 0:1], torch.zeros_like(rays[..., 1:2]), rays[..., 2:3]], -1) + rays_d_cyl = torch.cat([rays[..., 3:4], torch.zeros_like(rays[..., 4:5]), rays[..., 5:6]], -1) + diff = base_pos - rays_o_cyl + + base_distance = torch.sign(dot(rays_d_cyl, diff)) * torch.norm(diff, dim=-1) / torch.norm(rays_d_cyl, dim=-1) + + t = torch.where(torch.abs(radii) < min_radius + 4 * self.z_scale, raw_offsets + base_distance, t) + + # Transform distances + return t / (torch.norm(rays_d, dim=-1) + 1e-5) + + +class IntersectSphereOld(Intersect): + def __init__(self, z_channels, cfg, **kwargs): + super().__init__(z_channels, cfg, **kwargs) + + # Intersect hyper-params + if self.use_dataset_bounds: + self.initial = torch.tensor( + cfg.initial if "initial" in cfg else kwargs["system"].dm.train_dataset.near * 1.5 + ) + self.end = torch.tensor(cfg.end if "end" in cfg else kwargs["system"].dm.train_dataset.far * 1.5) + else: + self.initial = torch.tensor(cfg.initial if "initial" in cfg else 0.0) + self.end = torch.tensor(cfg.end if "end" in cfg else 1.0) + + # Contract + if self.contract_fn.contract_samples: + self.initial = self.contract_fn.contract_distance(self.initial) + self.end = self.contract_fn.contract_distance(self.end) + + # Origin scale + self.origin_scale_factor = cfg.origin_scale_factor if "origin_scale_factor" in cfg else 0.0 + self.origin_initial = torch.tensor(cfg.origin_initial if "origin_initial" in cfg else [1.0, 1.0, 1.0]).cuda() + + # Flip axes + self.flip_axes = cfg.flip_axes if "flip_axes" in cfg else False + + # Calculate samples + self.samples = torch.linspace(self.initial, self.end, z_channels, device="cuda").view(-1, 1) + + # Calculate z scale + if z_channels > 1: + self.z_scale = cfg.z_scale if "z_scale" in cfg else torch.abs(self.samples[1] - self.samples[0]) + else: + self.z_scale = cfg.z_scale if "z_scale" in cfg else 1.0 + + self.z_scale = torch.tensor(self.z_scale).view(-1, 1).cuda() + + # Unify positions + self.unify_fn = pluecker_pos(None) + + def process_origins(self, origins): + origins = origins * self.origin_scale_factor + self.origin_initial.view(1, 1, 3) + return origins + + def process_z_vals(self, z_vals): + z_vals = z_vals.view(z_vals.shape[0], -1, 4) + origins = self.process_origins(z_vals[..., :3]) + radii = super().process_z_vals(z_vals[..., -1]) + return torch.cat([origins, radii[..., None]], -1).view(z_vals.shape[0], -1) + + def intersect(self, rays, z_vals): + z_vals = z_vals.view(z_vals.shape[0], self.z_channels, 4) + origins = z_vals[..., :3] + radii = z_vals[..., -1] + + rays = torch.cat( + [ + rays[..., None, 0:3] * origins, + rays[..., None, 3:6] * origins, + ], + -1, + ) + + # Calculate intersection + return intersect_sphere( + rays, + torch.zeros_like(origins), + radii, + ) + + +class IntersectSphereNew(Intersect): + def __init__(self, z_channels, cfg, **kwargs): + super().__init__(z_channels, cfg, **kwargs) + + # Intersect hyper-params + if self.use_dataset_bounds: + if cfg.outward_facing: + self.initial = torch.tensor( + cfg.initial if "initial" in cfg else kwargs["system"].dm.train_dataset.near * 1.5 + ) + else: + self.initial = torch.tensor( + cfg.initial if "initial" in cfg else -kwargs["system"].dm.train_dataset.far * 1.5 + ) + + self.end = torch.tensor(cfg.end if "end" in cfg else kwargs["system"].dm.train_dataset.far * 1.5) + else: + self.initial = torch.tensor(cfg.initial if "initial" in cfg else 0.0) + self.end = torch.tensor(cfg.end if "end" in cfg else 1.0) + + # Contract + if self.contract_fn.contract_samples: + self.initial = self.contract_fn.contract_distance(self.initial) + self.end = self.contract_fn.contract_distance(self.end) + + # Origin scale + self.resize_scale_factor = cfg.resize_scale_factor if "resize_scale_factor" in cfg else 0.0 + self.resize_initial = torch.tensor(cfg.resize_initial if "resize_initial" in cfg else [1.0, 1.0, 1.0]).cuda() + self.origin_scale_factor = cfg.origin_scale_factor if "origin_scale_factor" in cfg else 0.0 + + # Flip axes + self.flip_axes = cfg.flip_axes if "flip_axes" in cfg else False + + # Calculate samples + self.samples = torch.linspace(self.initial, self.end, z_channels, device="cuda").view(-1, 1) + + # Calculate z scale + if z_channels > 1: + self.z_scale = cfg.z_scale if "z_scale" in cfg else torch.abs(self.samples[1] - self.samples[0]) + else: + self.z_scale = cfg.z_scale if "z_scale" in cfg else 1.0 + + self.z_scale = torch.tensor(self.z_scale).view(-1, 1).cuda() + + # Unify positions + self.unify_fn = pluecker_pos(None) + + def process_origins(self, origins): + origins = origins * self.origin_scale_factor + return origins + + def process_resize(self, resize): + resize = resize * self.resize_scale_factor + self.resize_initial.view(1, 1, 3) + return resize + + def process_z_vals(self, z_vals): + z_vals = z_vals.view(z_vals.shape[0], -1, 8) + origins = self.process_origins(z_vals[..., :3]) + resize = self.process_resize(z_vals[..., 3:6]) + raw_offsets = super().process_z_vals(z_vals[..., -2]) + radii = super().process_z_vals(z_vals[..., -1]) + return torch.cat([origins, resize, raw_offsets[..., None], radii[..., None]], -1).view(z_vals.shape[0], -1) + + def intersect(self, rays, z_vals): + z_vals = z_vals.view(z_vals.shape[0], self.z_channels, 8) + origins = z_vals[..., :3] + resize = z_vals[..., 3:6] + raw_offsets = z_vals[..., -2] + radii = z_vals[..., -1] + + # Transform the space + rays_o = (rays[..., None, 0:3] - origins) * resize + rays_d = rays[..., None, 3:6] * resize + rays = torch.cat( + [ + rays_o, + torch.nn.functional.normalize(rays_d, p=2.0, dim=-1), + ], + -1, + ) + + # Calculate intersection distances (in transformed space) + t = intersect_sphere( + rays, + torch.zeros_like(origins), + radii, + ) + + # Recycle samples for not-hit spheres + min_radius = min_sphere_radius(rays, torch.zeros_like(rays[..., :3])) + + base_pos = self.unify_fn(rays) + diff = base_pos - rays[..., :3] + base_distance = torch.sign(dot(rays[..., 3:6], diff)) * torch.norm(diff, dim=-1) + + t = torch.where(torch.abs(radii) < min_radius + 4 * self.z_scale, raw_offsets + base_distance, t) + + # Transform distances + return t / (torch.norm(rays_d, dim=-1) + 1e-5) + + +primitive_intersect_dict = { + "euclidean_distance": IntersectEuclideanDistance, + "euclidean_distance_unified": IntersectEuclideanDistanceUnified, + "plane": IntersectPlane, + "cylinder": IntersectCylinderOld, + "cylinder_new": IntersectCylinderNew, + "sphere": IntersectSphereOld, + "sphere_new": IntersectSphereNew, +} diff --git a/nlf/intersect/voxel.py b/nlf/intersect/voxel.py new file mode 100644 index 0000000..ed684c6 --- /dev/null +++ b/nlf/intersect/voxel.py @@ -0,0 +1,209 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +import torch + +from utils.intersect_utils import dot, intersect_axis_plane, intersect_plane, intersect_voxel_grid + +from .base import Intersect + + +class IntersectVoxelGrid(Intersect): + def __init__(self, z_channels, cfg, **kwargs): + super().__init__(z_channels, cfg, **kwargs) + + # Intersect params + self.outward_facing = cfg.outward_facing if "outward_facing" in cfg else False + self.fac = cfg.fac if "fac" in cfg else 1.0 + + if self.use_dataset_bounds: + self.initial = torch.tensor( + list(cfg.initial) if "initial" in cfg else kwargs["system"].dm.train_dataset.bbox_min * self.fac + ) + self.end = torch.tensor( + list(cfg.end) if "end" in cfg else kwargs["system"].dm.train_dataset.bbox_max * self.fac + ) + else: + self.initial = torch.tensor(list(cfg.initial) if "initial" in cfg else [0.0, 0.0, 0.0]) + self.end = torch.tensor(list(cfg.end) if "end" in cfg else [1.0, 1.0, 1.0]) + + # Contract + if self.contract_fn.contract_samples: + self.initial = self.contract_fn.contract_distance(self.initial) + self.end = self.contract_fn.contract_distance(self.end) + + # Max axis + self.max_axis = cfg.max_axis if "max_axis" in cfg else False + + # Calculate samples + z_channels = z_channels // 3 + self.samples = [] + + for dim in range(3): + samples = torch.linspace(self.initial[dim], self.end[dim], z_channels, device="cuda") + + self.samples.append(samples) + + self.samples = torch.stack(self.samples, -1) + + if "z_scale" in cfg: + self.z_scale = torch.tensor(list(cfg.z_scale)).cuda() + elif z_channels > 1: + self.z_scale = torch.abs(self.samples[1] - self.samples[0]) + else: + self.z_scale = torch.tensor([1.0, 1.0, 1.0]).cuda() + + self.z_scale[self.z_scale == 0.0] = 1.0 + + # print("Initial, end in contracted space:", self.initial, self.end) + # print("Samples in contracted space:", self.samples) + + # Local prediction + self.use_local_prediction = cfg.use_local_prediction if "use_local_prediction" in cfg else False + self.voxel_size = torch.tensor(cfg.voxel_size if "voxel_size" in cfg else [1.0, 1.0, 1.0]).cuda() + + def intersect(self, rays, z_vals): + z_vals = z_vals.reshape(z_vals.shape[0], self.z_channels // 3, 3) + + # Outward facing + if self.outward_facing: + dir_sign = torch.sign(rays[..., 3:6]) + z_vals = z_vals * dir_sign[..., None, :] + + # Local prediction + if self.use_local_prediction: + rays_o = rays[..., 0:3] + origin = torch.round(rays_o / self.voxel_size.unsqueeze(0)) * self.voxel_size.unsqueeze(0) + z_vals = z_vals + origin.unsqueeze(1) + + # Calculate intersection distance + dists = intersect_voxel_grid(rays[..., None, :], torch.zeros_like(self.origin[None, None]), z_vals) + + # Max axis + if self.max_axis: + max_mask = torch.abs(rays[..., 3:6]) < ( + torch.max(torch.abs(rays[..., 3:6]), dim=-1, keepdim=True)[0] - 1e-8 + ) + max_mask = max_mask[..., None, :].repeat(1, self.z_channels // 3, 1) + + dists = dists.view(dists.shape[0], self.z_channels // 3, -1) + dists = torch.where( + max_mask, + torch.zeros_like(dists), + dists, + ).view(dists.shape[0], self.z_channels) + + return dists + + +class IntersectDeformableVoxelGrid(Intersect): + def __init__(self, z_channels, cfg, **kwargs): + super().__init__(z_channels, cfg, **kwargs) + + # Starting normals + self.num_axes = len(list(cfg.start_normal)) if "start_normal" in cfg else 3 + self.start_normal = torch.tensor( + list(cfg.start_normal) + if "start_normal" in cfg + else [ + [ + 1.0, + 0.0, + 0.0, + ], + [ + 0.0, + 1.0, + 0.0, + ], + [ + 0.0, + 0.0, + 1.0, + ], + ] + ) + self.normal_scale_factor = cfg.normal_scale_factor if "normal_scale_factor" in cfg else 0.1 + + # Intersect params + self.outward_facing = cfg.outward_facing if "outward_facing" in cfg else False + self.fac = cfg.fac if "fac" in cfg else 1.0 + + if self.use_dataset_bounds: + points = kwargs["system"].dm.train_dataset.all_points + mask = kwargs["system"].dm.train_dataset.all_depth != 0.0 + + valid_points = points[mask.repeat(1, 3)].reshape(-1, 3) + self.initial = dot(self.start_normal.unsqueeze(0), valid_points.unsqueeze(1)).min(0)[0].cuda() + self.end = dot(self.start_normal.unsqueeze(0), valid_points.unsqueeze(1)).max(0)[0].cuda() + + # print(self.initial, self.end) + # exit() + else: + self.initial = torch.tensor(list(cfg.initial) if "initial" in cfg else [0.0, 0.0, 0.0]) + self.end = torch.tensor(list(cfg.end) if "end" in cfg else [1.0, 1.0, 1.0]) + + self.start_normal = self.start_normal.cuda() + + # Contract + if self.contract_fn.contract_samples: + self.initial = self.contract_fn.contract_distance(self.initial) + self.end = self.contract_fn.contract_distance(self.end) + + # Max axis + self.max_axis = cfg.max_axis if "max_axis" in cfg else False + + # Calculate samples + z_channels = z_channels // self.num_axes + self.samples = [] + + for dim in range(self.num_axes): + samples = torch.linspace(self.initial[dim], self.end[dim], z_channels, device="cuda") + + self.samples.append(samples) + + self.samples = torch.stack(self.samples, -1).view(-1, 1) + + # Calculate z scale + if "z_scale" in cfg: + self.z_scale = torch.tensor(list(cfg.z_scale)).cuda() + elif z_channels > 1: + self.z_scale = torch.abs(self.samples[1] - self.samples[0]) + else: + self.z_scale = torch.tensor([1.0 for i in range(self.num_axes)]).cuda() + + self.z_scale[self.z_scale == 0.0] = 1.0 + + def process_z_vals(self, z_vals): + z_vals = z_vals.view(z_vals.shape[0], -1, 4) + d = super().process_z_vals(z_vals[..., -1]) + return torch.cat([z_vals[..., :3], d[..., None]], -1).view(z_vals.shape[0], -1) + + def intersect(self, rays, z_vals): + z_vals = z_vals.reshape( + z_vals.shape[0], + -1, + 4, + ) + + normal = z_vals[..., :3] + distance = z_vals[..., -1] + + # Correct normals + normal = normal.view(-1, self.num_axes, 3) * self.normal_scale_factor + self.start_normal.unsqueeze(0) + normal = normal.view(z_vals.shape[0], -1, 3) + normal = torch.nn.functional.normalize(normal, dim=-1) + + # Calculate intersection distance + dists = intersect_plane(rays[..., None, :], normal, distance) + + return dists + + +voxel_intersect_dict = { + "voxel_grid": IntersectVoxelGrid, + "deformable_voxel_grid": IntersectDeformableVoxelGrid, +} diff --git a/nlf/intersect/z.py b/nlf/intersect/z.py new file mode 100644 index 0000000..ff5bda2 --- /dev/null +++ b/nlf/intersect/z.py @@ -0,0 +1,83 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +import torch + +from utils.intersect_utils import intersect_axis_plane + +from .base import Intersect + + +class IntersectZPlane(Intersect): + def __init__(self, z_channels, cfg, **kwargs): + super().__init__(z_channels, cfg, **kwargs) + + # Intersect hyper-params + if self.use_dataset_bounds: + self.initial = torch.tensor(-kwargs["system"].dm.train_dataset.near) + self.end = torch.tensor(-kwargs["system"].dm.train_dataset.far) + else: + self.initial = torch.tensor(cfg.initial if "initial" in cfg else 0.0) + self.end = torch.tensor(cfg.end if "end" in cfg else 1.0) + + self.num_repeat = cfg.num_repeat if "num_repeat" in cfg else 1 + + # Contract + if self.contract_fn.contract_samples: + self.initial = self.contract_fn.contract_distance(self.initial) + self.end = self.contract_fn.contract_distance(self.end) + + if self.use_disparity: + self.samples = torch.linspace( + 1.0 / self.end, 1.0 / self.initial, z_channels // self.num_repeat, device="cuda" + ) + + self.samples = torch.flip(self.samples, [0]) + else: + self.samples = torch.linspace(self.initial, self.end, z_channels // self.num_repeat, device="cuda") + + self.samples = self.samples.repeat(self.num_repeat).view(-1, 1) + + # Calculate z scale + if z_channels > 1: + if "z_scale" in cfg: + self.z_scale = cfg.z_scale + elif "num_samples_for_scale" in cfg: + self.z_scale = torch.abs(self.samples[1] - self.samples[0]) * ( + z_channels / float(cfg.num_samples_for_scale) + ) + else: + self.z_scale = torch.abs(self.samples[1] - self.samples[0]) + else: + self.z_scale = cfg.z_scale if "z_scale" in cfg else 1.0 + + self.z_scale = torch.tensor(self.z_scale).view(-1, 1).cuda() + + # Local prediction + self.use_local_prediction = cfg.use_local_prediction if "use_local_prediction" in cfg else False + self.voxel_size = cfg.voxel_size if "voxel_size" in cfg else 1.0 + + def intersect(self, rays, z_vals): + z_vals = z_vals.view(z_vals.shape[0], -1) + + if self.clamp: + z_vals = z_vals.clamp(self.initial, self.end) + + # Local prediction + if self.use_local_prediction: + rays_z = rays[..., 2:3] + origin = torch.round(rays_z / self.voxel_size) * self.voxel_size + z_vals = z_vals + origin + + # Calculate intersection + dists = intersect_axis_plane(rays[..., None, :], z_vals, 2, exclude=False) + + return dists + + +z_intersect_dict = { + "z_plane": IntersectZPlane, +} diff --git a/nlf/models/__init__.py b/nlf/models/__init__.py new file mode 100644 index 0000000..3ade89f --- /dev/null +++ b/nlf/models/__init__.py @@ -0,0 +1,7 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +from .models import model_dict diff --git a/nlf/models/models.py b/nlf/models/models.py new file mode 100644 index 0000000..eea4821 --- /dev/null +++ b/nlf/models/models.py @@ -0,0 +1,115 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +from typing import Dict, List + +import numpy as np +import torch +from torch import nn + +from nlf.activations import get_activation +from nlf.embedding import embedding_dict +from nlf.nets import net_dict +from nlf.param import RayParam + + +class BaseColorModel(nn.Module): + def __init__(self, in_channels, out_channels, cfg, **kwargs): + super().__init__() + + self.group = cfg.group if "group" in cfg else (kwargs["group"] if "group" in kwargs else "color") + + self.in_channels = in_channels + self.color_channels = out_channels + + # Total out channels + self.out_channels = self.color_channels + + # MLP + self.net = net_dict[cfg.net.type]( + self.in_channels, self.out_channels, cfg.net, group=self.group, system=kwargs["system"] + ) + + def forward(self, x: Dict[str, torch.Tensor], render_kwargs: Dict[str, str]): + return self.net(x, render_kwargs) + + def set_iter(self, i): + self.cur_iter = i + self.net.set_iter(i) + + +color_model_dict = { + "base": BaseColorModel, +} + + +ray_model_dict = {} +pos_model_dict = {} +model_dict = {} + + +class BaseLightfieldModel(nn.Module): + def __init__(self, cfg, **kwargs): + super().__init__() + + self.cfg = cfg + self.embeddings = [] + self.models = [] + + if "is_subdivided" in kwargs: + self.is_subdivided = kwargs["is_subdivided"] + else: + self.is_subdivided = False + + if "num_outputs" in kwargs: + self.num_outputs = kwargs["num_outputs"] + else: + self.num_outputs = cfg.num_outputs if "num_outputs" in cfg else 3 + + # Ray parameterization + self.param = RayParam(cfg.param) + + def set_iter(self, i): + self.cur_iter = i + + for emb in self.embeddings: + emb.set_iter(i) + + for model in self.models: + model.set_iter(i) + + +class LightfieldModel(BaseLightfieldModel): + def __init__(self, cfg, **kwargs): + super().__init__(cfg, **kwargs) + + # Embedding + self.embedding_model = embedding_dict[cfg.embedding.type]( + self.param.out_channels, cfg.embedding, system=kwargs["system"] + ) + + self.embeddings += [self.embedding_model] + + # Color + self.color_model = color_model_dict[cfg.color.type]( + self.embedding_model.out_channels, self.num_outputs, cfg.color, system=kwargs["system"] + ) + + self.models += [self.color_model] + + def embed(self, rays: torch.Tensor, render_kwargs: Dict[str, str]): + param_rays = self.param(rays) + return self.embedding_model(param_rays, render_kwargs) + + def forward(self, rays, render_kwargs: Dict[str, str]): + embed_rays = self.embed(rays, render_kwargs) + outputs = self.color_model(embed_rays, render_kwargs) + return outputs + + +ray_model_dict["lightfield"] = LightfieldModel +pos_model_dict["lightfield"] = LightfieldModel +model_dict["lightfield"] = LightfieldModel diff --git a/nlf/nets/__init__.py b/nlf/nets/__init__.py new file mode 100644 index 0000000..51ee601 --- /dev/null +++ b/nlf/nets/__init__.py @@ -0,0 +1,8 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2022 Anpei Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +from .nets import net_dict diff --git a/nlf/nets/array_nd.py b/nlf/nets/array_nd.py new file mode 100644 index 0000000..04c74ed --- /dev/null +++ b/nlf/nets/array_nd.py @@ -0,0 +1,381 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2022 Anpei Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +import numpy as np +import torch +from torch import nn + +from nlf.activations import get_activation + + +def my_index(x, input_channels): + cur_x = [] + + for ch in input_channels: + cur_x.append(x[..., ch]) + + cur_x = torch.stack(cur_x, -1) + return cur_x + + +class ArrayND(nn.Module): + def __init__(self, in_channels, out_channels, cfg, **kwargs): + super().__init__() + + self.opt_group = cfg.group if "group" in cfg else (kwargs["group"] if "group" in kwargs else "color") + + # Number of input, output channels + self.in_channels = in_channels + self.out_channels = out_channels + + # Input channels for each tensor + self.input_channels = list(cfg.input_channels) + + # Size and range + self.size = cfg.size[::-1] + + if "range" in cfg: + self.range = cfg.range + else: + self.range = [[-1, 1] for s in self.size] + + if len(self.size) == 1: + self.size = [1] + self.size + + self.min_range = [r[0] for r in self.range] + self.max_range = [r[1] for r in self.range] + self.mode = cfg.mode if "mode" in cfg else "bilinear" + self.padding_mode = cfg.padding_mode if "padding_mode" in cfg else "zeros" + + # Activation + self.activation = cfg.activation if "activation" in cfg else "identity" + self.out_layer = get_activation(self.activation) + + # Tensor setup + cur_size = (self.out_channels,) + tuple(self.size) + + self.tensor = nn.Parameter(torch.Tensor(*cur_size)) + if self.opt_group == "color": + self.tensor.data.uniform_(-1.0, 1.0) + else: + self.tensor.data.uniform_(-0.01, 0.01) + + # self.tensor = nn.Parameter(1.0 * torch.randn(cur_size, device='cuda')) + + def forward(self, x, **render_kwargs): + # Index + x = my_index(x, self.input_channels) + + # Ranges + range_shape = tuple(1 for s in x.shape[:-1]) + x.shape[-1:] + min_range = torch.tensor(self.min_range, device=x.device).float().view(range_shape) + max_range = torch.tensor(self.max_range, device=x.device).float().view(range_shape) + + # Normalize + x = ((x - min_range) / (max_range - min_range)) * 2 - 1 + + # Get mask + mask = ~torch.any((x < -1) | (x > 1), dim=-1, keepdim=True) + + # Append extra dimension + if x.shape[-1] == 1: + x = torch.cat([x, -torch.ones_like(x)], -1) + + # Mask + # all_feature = x.new_zeros(x.shape[0], self.out_channels) + # x = x[mask.repeat(1, x.shape[-1])].view(-1, x.shape[-1]) + x = torch.where(mask, x, 1e8 * torch.ones_like(x)) + + # Reshape + input_shape = (1,) + x.shape[:1] + tuple(1 for s in self.size[:-1]) + x.shape[-1:] + x = x.view(input_shape) + + # Sample feature + feature = nn.functional.grid_sample( + self.tensor.unsqueeze(0), x, mode=self.mode, padding_mode=self.padding_mode, align_corners=False + ) + + # Reshape + feature = feature.reshape(self.out_channels, feature.shape[2]).permute(1, 0) + + ## All feature + # all_feature[mask.repeat(1, self.out_channels)] = feature.reshape(-1) + # feature = all_feature.view(-1, self.out_channels) + return feature + + def set_iter(self, i): + pass + + +class ArrayNDMultiple(nn.Module): + def __init__(self, in_channels, out_channels, cfg, **kwargs): + super().__init__() + + self.opt_group = cfg.group if "group" in cfg else (kwargs["group"] if "group" in kwargs else "color") + + # Number of input, output channels + self.in_channels = in_channels + self.out_channels = out_channels + self.num_factors = cfg.num_factors if "num_factors" in cfg else 1 + + # Input channels for each tensor + self.input_channels = list(cfg.input_channels) + self.dims_per_factor = len(cfg.size) + + # Size and range + self.size = cfg.size[::-1] + self.size[0] = self.size[0] * self.num_factors + + self.range = cfg.range + self.range = np.array(self.range).reshape(self.num_factors, self.dims_per_factor, 2) + + if self.dims_per_factor == 1: + self.size = [1] + self.size + self.range = np.concatenate( + [ + -np.ones_like(self.range[:, :1, :]), + self.range, + ], + axis=1, + ) + + self.width = self.size[1] + self.height = self.size[0] // self.num_factors + self.height_factor = (self.height - 1) / (self.size[0] - 1) + + self.min_range = self.range[..., 0] + self.max_range = self.range[..., 1] + + # Product mode + self.product_mode = cfg.product_mode if "product_mode" in cfg else "product" + + # Grid sample mode + self.mode = cfg.mode if "mode" in cfg else "bilinear" + self.padding_mode = cfg.padding_mode if "padding_mode" in cfg else "zeros" + + # Activation + self.activation = cfg.activation if "activation" in cfg else "identity" + self.out_layer = get_activation(self.activation) + + # Create tensors + cur_size = (self.out_channels,) + tuple(self.size) + + self.tensor = nn.Parameter(torch.Tensor(*cur_size)) + if self.opt_group == "color": + self.tensor.data.uniform_(-1.0, 1.0) + else: + self.tensor.data.uniform_(-0.01, 0.01) + + # self.tensor = nn.Parameter(0.1 * torch.randn(cur_size, device='cuda')) + + def forward(self, x, **render_kwargs): + batch_size = x.shape[0] + + # Index + x = my_index(x, self.input_channels) + + # Ranges + range_shape = tuple(1 for s in x.shape[:-1]) + x.shape[-1:] + min_range = torch.tensor(self.min_range, device=x.device).float().view(range_shape) + max_range = torch.tensor(self.max_range, device=x.device).float().view(range_shape) + + # Normalize + x = ((x - min_range) / (max_range - min_range)) * 2 - 1 + + # Get mask + mask = ~torch.any((x < -1) | (x > 1), dim=-1, keepdim=True) + + # Append extra dimension if necessary + x = x.view(batch_size, self.num_factors, self.dims_per_factor) + + if self.dims_per_factor == 1: + x = torch.cat([x, torch.zeros_like(x)], -1) + + # Offset + offset = ( + torch.linspace(0.0, (self.num_factors - 1) * self.height, self.num_factors, device=x.device)[None] + / (self.size[0] - 1) + ) * 2 - 1 + x = torch.stack( + [ + x[..., 0], + (x[..., 1] + 1) * self.height_factor + offset, + ], + -1, + ) + + # Mask + # all_feature = x.new_zeros(x.shape[0], self.out_channels) + # x = x[mask.repeat(1, x.shape[-1])].view(-1, x.shape[-1]) + x = torch.where(mask, x, 1e8 * torch.ones_like(x)) + + # Reshape + x = x.view(-1, self.dims_per_factor) + + # REshape again + input_shape = (1,) + x.shape[:1] + tuple(1 for s in self.size[:-1]) + x.shape[-1:] + x = x.view(input_shape) + + # Sample feature + feature = nn.functional.grid_sample( + self.tensor.unsqueeze(0), x, mode=self.mode, padding_mode=self.padding_mode, align_corners=False + ) + + # Reshape feature + feature = feature.reshape(self.out_channels, feature.shape[2]).permute(1, 0) + + ## All feature + # all_feature[mask.repeat(1, self.out_channels)] = feature.reshape(-1) + # feature = all_feature.view(-1, self.out_channels) + + # Product + feature = feature.reshape(batch_size, self.num_factors, self.out_channels) + + if self.product_mode == "product": + feature = torch.prod(feature, dim=1) + elif self.product_mode == "concat": + feature = feature.view(batch_size, -1) + + return feature + + def set_iter(self, i): + pass + + +class ArrayNDSubdivided(nn.Module): + def __init__(self, in_channels, out_channels, cfg, **kwargs): + super().__init__() + + self.opt_group = cfg.group if "group" in cfg else (kwargs["group"] if "group" in kwargs else "color") + + # Number of input, output channels + self.in_channels = in_channels + self.out_channels = out_channels + + # Input channels for each tensor + self.input_channels = list(cfg.input_channels) + + # Size and range + self.size = list(cfg.size) + self.range = list(cfg.range) + + if len(self.size) == 1: + self.size = self.size + [1] + + self.min_range = [r[0] for r in self.range] + self.max_range = [r[1] for r in self.range] + + # Subdivisions + self.grid_size = list(cfg.grid_size) + self.tensor_size = [self.size[idx] // self.grid_size[self.input_channels[idx]] for idx in range(2)] + self.full_size = self.tensor_size[0:1] + [ + self.grid_size[0] * self.grid_size[1] * self.grid_size[2] * self.tensor_size[1] + ] + + # Grid sample options + self.mode = cfg.mode if "mode" in cfg else "bilinear" + self.padding_mode = cfg.padding_mode if "padding_mode" in cfg else "zeros" + + # Activation + self.activation = cfg.activation if "activation" in cfg else "identity" + self.out_layer = get_activation(self.activation) + + # Tensor setup + cur_size = (self.out_channels,) + tuple(self.full_size[::-1]) + + self.tensor = nn.Parameter(torch.Tensor(*cur_size)) + if self.opt_group == "color": + self.tensor.data.uniform_(-1.0, 1.0) + else: + self.tensor.data.uniform_(-0.01, 0.01) + + # self.tensor = nn.Parameter(0.1 * torch.randn(cur_size, device='cuda')) + + def forward(self, x, **render_kwargs): + # Bounds + range_shape = tuple(1 for s in x.shape[:-1]) + (3,) + min_range = torch.tensor(self.min_range, device=x.device).float().view(range_shape) + max_range = torch.tensor(self.max_range, device=x.device).float().view(range_shape) + + # Voxel index + voxel_idx = torch.clip(x[..., :3], min_range, max_range) + grid_size = torch.tensor(self.grid_size, device=x.device).float().view(range_shape) + voxel_idx = torch.floor(((voxel_idx - min_range) / (max_range - min_range)) * grid_size) + voxel_idx = torch.clip(voxel_idx, torch.zeros_like(grid_size), grid_size - 1) + + voxel_idx = ( + voxel_idx[..., 2] * np.prod(self.grid_size[0:2]) + + voxel_idx[..., 1] * np.prod(self.grid_size[0:1]) + + voxel_idx[..., 0] + ) + + # Get relevant coordinates + x = my_index(x, self.input_channels) + min_range = min_range[..., self.input_channels] + max_range = max_range[..., self.input_channels] + + # Normalize + x = (x - min_range) / (max_range - min_range) + + # Get mask + mask = ~torch.any((x < 0) | (x > 1), dim=-1, keepdim=True) + + # Append extra dimension if necessary + if x.shape[-1] == 1: + x = torch.cat([x, torch.zeros_like(x)], -1) + + # Get tensor look-up coordinates + size_shape = tuple(1 for s in x.shape[:-1]) + x.shape[-1:] + size = torch.tensor(self.size, device=x.device).float().view(size_shape) + tensor_size = torch.tensor(self.tensor_size, device=x.device).float().view(size_shape) + full_size = torch.tensor(self.full_size, device=x.device).float().view(size_shape) + + x = torch.remainder(x * size, tensor_size) + x = ( + torch.stack( + [ + x[..., 0], + x[..., 1] + voxel_idx * tensor_size[..., 1], + ], + dim=-1, + ) + / full_size + ) * 2 - 1 + + # Only evalaute valid outputs + # all_feature = x.new_zeros(x.shape[0], self.out_channels) + # x = x[mask.repeat(1, x.shape[-1])].view(-1, x.shape[-1]) # (Option 1) tensor containing only valid + x = torch.where(mask, x, 1e8 * torch.ones_like(x)) # (Option 2) push invalid out of bounds + + # Reshape + input_shape = (1,) + x.shape[:1] + tuple(1 for s in self.size[:-1]) + x.shape[-1:] + x = x.view(input_shape) + + # Sample feature + feature = nn.functional.grid_sample( + self.tensor.unsqueeze(0), x, mode=self.mode, padding_mode=self.padding_mode, align_corners=False + ) + + # Reshape + feature = feature.reshape(self.out_channels, feature.shape[2]).permute(1, 0) + + ## All feature + # all_feature[mask.repeat(1, self.out_channels)] = feature.reshape(-1) + # feature = all_feature.view(-1, self.out_channels) + + # Return + return feature + + def set_iter(self, i): + pass + + +array_dict = { + "array_nd": ArrayND, + "array_nd_multiple": ArrayNDMultiple, + "array_nd_subdivided": ArrayNDSubdivided, +} diff --git a/nlf/nets/mlp.py b/nlf/nets/mlp.py new file mode 100644 index 0000000..eab666e --- /dev/null +++ b/nlf/nets/mlp.py @@ -0,0 +1,361 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2022 Anpei Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +import torch +from torch import nn + +from nlf.activations import get_activation +from nlf.pe import IdentityPE, pe_dict + + +class ZeroMLP(nn.Module): + def __init__(self, in_channels, out_channels, cfg, **kwargs): + super().__init__() + + self.opt_group = cfg.group if "group" in cfg else (kwargs["group"] if "group" in kwargs else "color") + + self.out_channels = out_channels + self.layer = nn.Linear(1, 1) + + def forward(self, x): + return x.new_zeros(x.shape[0], self.out_channels) + + def set_iter(self, i): + self.cur_iter = i + + +class ConstantMLP(nn.Module): + def __init__(self, in_channels, out_channels, cfg, **kwargs): + super().__init__() + + self.opt_group = cfg.group if "group" in cfg else (kwargs["group"] if "group" in kwargs else "color") + + self.out_channels = out_channels + self.layer = nn.Linear(1, out_channels) + self.out_layer = get_activation(cfg.activation if "activation" in cfg else "identity") + + def forward(self, x): + out = self.out_layer(self.layer.bias).unsqueeze(0) + return out.expand(x.shape[0], out.shape[-1]) + + def set_iter(self, i): + self.cur_iter = i + + +class BaseMLP(nn.Module): + def __init__(self, in_channels, out_channels, cfg, **kwargs): + super().__init__() + + self.opt_group = cfg.group if "group" in cfg else (kwargs["group"] if "group" in kwargs else "color") + + self.start_channel = kwargs["start_channel"] if "start_channel" in kwargs else 0 + self.in_channels = in_channels - self.start_channel + self.out_channels = out_channels if cfg.depth != 0 else self.in_channels + self.latent_dim = kwargs["latent_dim"] if "latent_dim" in kwargs else 0 + self.pe_channels = cfg.pe_channels if "pe_channels" in cfg else self.in_channels + self.zero_before_channel = cfg.zero_before_channel if "zero_before_channel" in cfg else None + self.linear_last = cfg.linear_last if "linear_last" in cfg else True + self.bias = cfg.bias if "bias" in cfg else True + self.pad_to = cfg.pad_to if "pad_to" in cfg else None + + if "latent_dim" in cfg: + self.latent_dim = cfg.latent_dim + + if self.pe_channels == "all": + self.pe_channels = self.in_channels + self.latent_dim + self.in_channels = self.pe_channels + self.latent_dim = 0 + + # Global + self.is_constant = cfg.is_constant if "is_constant" in cfg else False + + if self.is_constant: + if "pe_channels" in cfg and "latent_dim" not in cfg: + self.latent_dim = self.in_channels - self.pe_channels + else: + self.pe_channels = self.latent_dim + + net_in_channels = self.latent_dim + else: + # PE + if "pe" in cfg: + self.pe = pe_dict[cfg.pe.type](self.pe_channels, cfg.pe) + self.pe_out_channels = self.pe.out_channels + else: + self.pe = IdentityPE(self.pe_channels) + self.pe_out_channels = self.pe_channels + + net_in_channels = self.pe_out_channels + (self.in_channels - self.pe_channels) + self.latent_dim + + # Padding + if self.pad_to is not None: + net_in_channels = self.pad_to + + # MLP + self.D = cfg.depth + self.W = cfg.hidden_channels + + self.skips = list(cfg.skips) if "skips" in cfg else [] + self.activation = cfg.activation if "activation" in cfg else "identity" + self.layer_activation = cfg.layer_activation if "layer_activation" in cfg else "leaky_relu" + self.layers = nn.ModuleList() + + for i in range(self.D + 2): + if i == 0: + layer = nn.Linear(net_in_channels, self.W, bias=self.bias) + + if self.zero_before_channel is not None: + zero_before_channel = ( + self.zero_before_channel + * self.pe_channels + * (2 * cfg.pe.n_freqs + (0 if cfg.pe.exclude_identity else 1)) + ) + + with torch.no_grad(): + if self.latent_dim > 0: + layer.weight[..., zero_before_channel : -self.latent_dim] = 0.0 + else: + layer.weight[..., zero_before_channel:] = 0.0 + + elif i == self.D + 1: + layer = nn.Linear(self.W, self.out_channels, bias=self.bias) + elif i in self.skips: + layer = nn.Linear(self.W + net_in_channels, self.W, bias=self.bias) + else: + layer = nn.Linear(self.W, self.W, bias=self.bias) + + if self.linear_last: + if i < self.D: + layer = nn.Sequential(layer, get_activation(self.layer_activation)) + else: + if i < self.D + 1: + layer = nn.Sequential(layer, get_activation(self.layer_activation)) + + self.layers.append(layer) + + # Output + self.out_layer = get_activation(self.activation) + + def forward(self, x): + if self.pad_to is not None: + x = torch.cat([x, x.new_ones(x.shape[0], self.pad_to - x.shape[-1])], -1) + + # Run forward + input_x = x + + for i, layer in enumerate(self.layers): + if i in self.skips: + x = torch.cat([input_x, x], -1) + + x = layer(x) + + return self.out_layer(x) + + def set_iter(self, i): + self.cur_iter = i + + if not self.is_constant: + self.pe.set_iter(i) + + +class PartitionedConstantMLP(nn.Module): + def __init__(self, in_channels, out_channels, cfg, **kwargs): + super().__init__() + + self.opt_group = cfg.group if "group" in cfg else (kwargs["group"] if "group" in kwargs else "color") + + self.out_channels = out_channels + self.num_partitions = cfg.num_partitions + self.layer = nn.Linear(1, self.out_channels * self.num_partitions) + self.out_layer = get_activation(cfg.activation if "activation" in cfg else "identity") + + def forward(self, x): + return self.out_layer(self.layer.bias) + + def set_iter(self, i): + self.cur_iter = i + + +class PartitionedMLP(nn.Module): + def __init__(self, in_channels, out_channels, cfg, **kwargs): + super().__init__() + + self.opt_group = cfg.group if "group" in cfg else (kwargs["group"] if "group" in kwargs else "color") + + self.start_channel = kwargs["start_channel"] if "start_channel" in kwargs else 0 + self.in_channels = in_channels - self.start_channel + self.out_channels = out_channels if cfg.depth != 0 else self.in_channels + self.pe_channels = cfg.pe_channels if "pe_channels" in cfg else self.in_channels + self.latent_dim = kwargs["latent_dim"] if "latent_dim" in kwargs else 0 + self.use_latent = cfg.use_latent if "use_latent" in cfg else False + self.zero_before_channel = cfg.zero_before_channel if "zero_before_channel" in cfg else None + self.linear_last = cfg.linear_last if "linear_last" in cfg else True + + if "latent_dim" in cfg: + self.latent_dim = cfg.latent_dim + + # Global + self.is_constant = cfg.is_constant if "is_constant" in cfg else False + + if self.is_constant: + if "pe_channels" in cfg and "latent_dim" not in cfg: + self.latent_dim = self.in_channels - self.pe_channels + else: + self.pe_channels = self.latent_dim + + net_in_channels = self.latent_dim + else: + # PE + if "pe" in cfg: + self.pe = pe_dict[cfg.pe.type](self.pe_channels, cfg.pe) + self.pe_out_channels = self.pe.out_channels + else: + self.pe = IdentityPE(self.pe_channels) + self.pe_out_channels = self.pe_channels + + if self.use_latent: + net_in_channels = self.pe_out_channels + (self.in_channels - self.pe_channels) + self.latent_dim + else: + net_in_channels = self.pe_out_channels + (self.in_channels - self.pe_channels) + + # MLP + self.num_partitions = cfg.num_partitions + self.D = cfg.depth + self.W = cfg.hidden_channels + + self.skips = list(cfg.skips) if "skips" in cfg else [] + self.activation = cfg.activation if "activation" in cfg else "identity" + self.layer_activation = cfg.layer_activation if "layer_activation" in cfg else "leaky_relu" + + # Partitioned layers + for i in range(self.D + 2): + if i == 0: + layer = nn.Linear(net_in_channels, self.W * self.num_partitions) + + if self.zero_before_channel is not None: + zero_before_channel = ( + self.zero_before_channel + * self.pe_channels + * (2 * cfg.pe.n_freqs + (0 if cfg.pe.exclude_identity else 1)) + ) + + with torch.no_grad(): + if self.latent_dim > 0: + layer.weight[..., zero_before_channel : -self.latent_dim] = 0.0 + else: + layer.weight[..., zero_before_channel:] = 0.0 + + elif i == (self.D + 1): + layer = nn.Linear(self.W, self.out_channels * self.num_partitions) + elif i in self.skips: + layer = nn.Linear(self.W + net_in_channels, self.W * self.num_partitions) + else: + layer = nn.Linear(self.W, self.W * self.num_partitions) + + if self.linear_last: + if i < self.D: + layer = nn.Sequential(layer, get_activation(self.layer_activation)) + else: + layer = nn.Sequential(layer, get_activation("identity")) + else: + if i < self.D + 1: + layer = nn.Sequential(layer, get_activation(self.layer_activation)) + else: + layer = nn.Sequential(layer, get_activation("identity")) + + setattr(self, f"encoding{i+1}", layer) + + # Output + self.out_layer = get_activation(self.activation) + + def forward(self, x): + # Apply PE + if self.is_constant: + x = x[..., -self.latent_dim :] + else: + if self.latent_dim > 0 and self.use_latent: + x = torch.cat( + [x[..., self.start_channel : self.start_channel + self.in_channels], x[..., -self.latent_dim :]], + dim=-1, + ) + else: + x = x[..., self.start_channel : self.start_channel + self.in_channels] + + x = torch.cat([self.pe(x[..., : self.pe_channels]), x[..., self.pe_channels :]], dim=-1) + + # Run forward + batch_size = x.shape[0] // self.num_partitions + input_x = x + + for i in range(self.D + 2): + if i in self.skips: + x = torch.cat([input_x, x], -1) + + # Batch matmul + layer = getattr(self, f"encoding{i+1}") + weight = layer[0].weight + + weight = weight.view(self.num_partitions, weight.shape[0] // self.num_partitions, weight.shape[1]).permute( + 0, 2, 1 + ) + x = x.view(batch_size, self.num_partitions, x.shape[-1]).permute(1, 0, 2) + x = torch.bmm(x, weight).permute(1, 0, 2) + + # Bias + x = x + layer[0].bias.view(1, self.num_partitions, -1) + + # Apply non-linearity + x = layer[1](x) + x = x.reshape(batch_size * self.num_partitions, -1) + + return self.out_layer(x) + + def _forward(self, x): + # Apply PE + if self.is_constant: + x = x[..., -self.latent_dim :] + else: + if self.latent_dim > 0: + x = torch.cat( + [x[..., self.start_channel : self.start_channel + self.in_channels], x[..., -self.latent_dim :]], + dim=-1, + ) + else: + x = x[..., self.start_channel : self.start_channel + self.in_channels] + + x = torch.cat([self.pe(x[..., : self.pe_channels]), x[..., self.pe_channels :]], dim=-1) + + # Run forward + input_x = x + + for i in range(self.D + 2): + if i in self.skips: + x = torch.cat([input_x, x], -1) + + layer = getattr(self, f"encoding{i+1}") + x = x @ layer[0].weight.permute(1, 0) + layer[0].bias + x = layer[1](x) + + return self.out_layer(x) + + def set_iter(self, i): + self.cur_iter = i + + if not self.is_constant: + self.pe.set_iter(i) + + +from .siren import Siren + +mlp_dict = { + "zero": ZeroMLP, + "constant": ConstantMLP, + "partitioned_constant": PartitionedConstantMLP, + "base": BaseMLP, + "partitioned": PartitionedMLP, + "siren": Siren, +} diff --git a/nlf/nets/nets.py b/nlf/nets/nets.py new file mode 100644 index 0000000..a4fdaeb --- /dev/null +++ b/nlf/nets/nets.py @@ -0,0 +1,139 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2022 Anpei Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +import torch +from torch import nn + +from nlf.activations import get_activation + +net_dict = {} + +# Add MLPs +from .mlp import mlp_dict + +for k, v in mlp_dict.items(): + net_dict[k] = v + +# Add tensors +from .tensor import tensor_dict + +for k, v in tensor_dict.items(): + net_dict[k] = v + +# Add tensoRFs +from .tensorf_base import tensorf_base_dict + +for k, v in tensorf_base_dict.items(): + net_dict[k] = v + +from .tensorf_no_sample import tensorf_no_sample_dict + +for k, v in tensorf_no_sample_dict.items(): + net_dict[k] = v + +from .tensorf_reflect import tensorf_reflect_dict + +for k, v in tensorf_reflect_dict.items(): + net_dict[k] = v + +from .tensorf_dynamic import tensorf_dynamic_dict + +for k, v in tensorf_dynamic_dict.items(): + net_dict[k] = v + +# Multiple net +class MultipleNet(nn.Module): + def __init__(self, in_channels, out_channels, cfg, **kwargs): + super().__init__() + + self.group = cfg.group if "group" in cfg else (kwargs["group"] if "group" in kwargs else "color") + + self.cfg = cfg + self.cur_iter = 0 + self.in_channels = in_channels + self.out_channels = out_channels + self.latent_dim = kwargs["latent_dim"] if "latent_dim" in kwargs else 0 + + self.use_feature_net = "feature_net" in cfg + self.out_feature_dim = cfg.out_feature_dim if "feature_net" in cfg else out_channels + + if "latent_dim" in cfg: + self.latent_dim = cfg.latent_dim + + # Create nets + self.nets = nn.ModuleList() + self.wait_iters = [] + self.stop_iters = [] + self.feature_dims = [] + self.scales = [] + + for idx, net_key in enumerate(cfg.nets.keys()): + # Current config + net_cfg = cfg.nets[net_key] + + # Wait + self.wait_iters.append(net_cfg.wait_iters) + self.stop_iters.append(net_cfg.stop_iters) + self.feature_dims.append(net_cfg.feature_dim if "feature_dim" in net_cfg else 0) + self.scales.append(net_cfg.scale if "scale" in net_cfg else 1.0) + + # Create current net + net = net_dict[net_cfg.type]( + in_channels, + self.out_feature_dim + self.feature_dims[-1], + net_cfg, + latent_dim=self.latent_dim + (self.feature_dims[-2] if idx > 0 else 0), + group=self.group, + ) + + self.nets.append(net) + + # Feature net + if self.use_feature_net: + net_cfg = cfg.nets[net_key] + + self.feature_net = net_dict[net_cfg.type]( + self.out_feature_dim, self.out_channels, net_cfg, group=self.group + ) + + # Activation + self.activation = cfg.activation if "activation" in cfg else "identity" + self.out_layer = get_activation(self.activation) + + def forward(self, x, **render_kwargs): + total_output = 0.0 + feature_vector = x.new_zeros(x.shape[0], 0) + + for idx, net in enumerate(self.nets): + if self.cur_iter < self.wait_iters[idx] or self.cur_iter >= self.stop_iters[idx]: + continue + + # Run current net + cur_output = net(torch.cat([x, feature_vector], -1), **render_kwargs) + + if self.feature_dims[idx] > 0: + feature_vector = cur_output[..., -self.feature_dims[idx] :] + cur_output = cur_output[..., : -self.feature_dims[idx]] + + # Apply feature transform + if self.use_feature_net: + cur_output = self.feature_net(cur_output) + + # Add + total_output += cur_output * self.scales[idx] + + # Final non-linearity + return self.out_layer(total_output) + + def set_iter(self, i): + self.cur_iter = i + + for idx in range(len(self.nets)): + self.nets[idx].set_iter(i - self.wait_iters[idx]) + + +net_dict["multiple"] = MultipleNet diff --git a/nlf/nets/siren.py b/nlf/nets/siren.py new file mode 100644 index 0000000..53ec420 --- /dev/null +++ b/nlf/nets/siren.py @@ -0,0 +1,134 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2022 Anpei Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +import numpy as np +import torch +from torch import nn + +from nlf.activations import get_activation + + +class SineLayer(nn.Module): + # See paper sec. 3.2, final paragraph, and supplement Sec. 1.5 for discussion of omega_0. + + # If is_first=True, omega_0 is a frequency factor which simply multiplies the activations before the + # nonlinearity. Different signals may require different omega_0 in the first layer - this is a + # hyperparameter. + + # If is_first=False, then the weights will be divided by omega_0 so as to keep the magnitude of + # activations constant, but boost gradients to the weight matrix (see supplement Sec. 1.5) + + def __init__(self, in_features, out_features, bias=True, is_first=False, omega_0=30, **kwargs): + + super().__init__() + + self.omega_0 = omega_0 + self.is_first = is_first + + self.in_features = in_features + self.linear = nn.Linear(in_features, out_features, bias=bias) + + self.init_weights() + + def init_weights(self): + with torch.no_grad(): + if self.is_first: + self.linear.weight.uniform_(-1 / self.in_features, 1 / self.in_features) + else: + self.linear.weight.uniform_( + -np.sqrt(6 / self.in_features) / self.omega_0, np.sqrt(6 / self.in_features) / self.omega_0 + ) + + def forward(self, input): + return torch.sin(self.omega_0 * self.linear(input)) + + def forward_with_intermediate(self, input): + # For visualization of activation distributions + intermediate = self.omega_0 * self.linear(input) + return torch.sin(intermediate), intermediate + + +class Siren(nn.Module): + def __init__(self, in_channels, out_channels, cfg, **kwargs): + + super().__init__() + + self.opt_group = cfg.group if "group" in cfg else (kwargs["group"] if "group" in kwargs else "color") + + self.depth = cfg.depth + self.skips = cfg.skips if "skips" in cfg else [] + self.with_norm = cfg.with_norm if "with_norm" in cfg else False + + self.start_channel = kwargs["start_channel"] if "start_channel" in kwargs else 0 + self.in_channels = kwargs["num_channels"] if "num_channels" in kwargs else (in_channels - self.start_channel) + self.out_channels = out_channels if self.depth != 0 else self.in_channels + self.latent_dim = kwargs["latent_dim"] if "latent_dim" in kwargs else 0 + + # Siren config + self.first_omega_0 = cfg.first_omega_0 if "first_omega_0" in cfg else 30.0 + self.hidden_omega_0 = cfg.hidden_omega_0 if "hidden_omega_0" in cfg else 30.0 + self.outermost_linear = cfg.outermost_linear if "outermost_linear" in cfg else True + + # Net + for i in range(self.depth + 2): + if i == 0: + layer = SineLayer( + self.in_channels + self.latent_dim, cfg.hidden_channels, is_first=True, omega_0=self.first_omega_0 + ) + elif i in self.skips: + layer = SineLayer( + cfg.hidden_channels + self.in_channels + self.latent_dim, + cfg.hidden_channels, + is_first=False, + omega_0=self.hidden_omega_0, + ) + else: + layer = SineLayer(cfg.hidden_channels, cfg.hidden_channels, is_first=False, omega_0=self.hidden_omega_0) + + if self.with_norm: + layer = nn.Sequential(layer, nn.LayerNorm(cfg.hidden_channels, elementwise_affine=True)) + + setattr(self, f"encoding{i+1}", layer) + + if self.outermost_linear: + self.final_layer = nn.Linear(cfg.hidden_channels, self.out_channels) + + with torch.no_grad(): + self.final_layer.weight.uniform_( + -np.sqrt(6 / cfg.hidden_channels) / self.hidden_omega_0, + np.sqrt(6 / cfg.hidden_channels) / self.hidden_omega_0, + ) + else: + self.final_layer = SineLayer( + cfg.hidden_channels, self.out_channels, is_first=False, omega_0=self.hidden_omega_0 + ) + + # Final activation + self.activation = cfg.activation if "activation" in cfg else "identity" + self.out_layer = get_activation(self.activation) + + def forward(self, x): + if self.latent_dim > 0: + x = torch.cat( + [x[..., self.start_channel : self.start_channel + self.in_channels], x[..., -self.latent_dim :]], -1 + ) + else: + x = x[..., self.start_channel : self.start_channel + self.in_channels] + + # Run forward + input_x = x + + for i in range(self.depth): + if i in self.skips: + x = torch.cat([input_x, x], -1) + + x = getattr(self, f"encoding{i+1}")(x) + + return self.out_layer(self.final_layer(x)) + + def set_iter(self, i): + self.cur_iter = i diff --git a/nlf/nets/tensor.py b/nlf/nets/tensor.py new file mode 100644 index 0000000..e84ab09 --- /dev/null +++ b/nlf/nets/tensor.py @@ -0,0 +1,502 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2022 Anpei Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +import numpy as np +import torch +from torch import nn + +from nlf.activations import get_activation +from nlf.param import RayParam + +from .array_nd import array_dict +from .mlp import mlp_dict + +tensor_dict = {} + + +class TensorProduct(nn.Module): + def __init__(self, in_channels, out_channels, cfg, **kwargs): + super().__init__() + + self.group = cfg.group if "group" in cfg else (kwargs["group"] if "group" in kwargs else "color") + + self.in_channels = in_channels + self.out_channels = out_channels + self.latent_dim = kwargs["latent_dim"] if "latent_dim" in kwargs else 0 + self.num_tensors = len(cfg.tensors.keys()) + self.num_basis = cfg.num_basis + self.num_opacity_basis = cfg.num_opacity_basis if "num_opacity_basis" in cfg else self.num_basis + self.use_opacity = "num_opacity_basis" in cfg + + if "latent_dim" in cfg: + self.latent_dim = cfg.latent_dim + + # Activation + self.activation = cfg.activation if "activation" in cfg else "identity" + self.out_layer = get_activation(self.activation) + + # Basis + self.has_basis = "basis" in cfg + self.separate_basis = cfg.separate_basis if "separate_basis" in cfg else False + + if "basis" in cfg: + self.basis = mlp_dict[cfg.basis.type]( + self.in_channels, + self.num_basis * (self.out_channels - 1) + self.num_opacity_basis, + cfg.basis, + group=self.group, + ) + + if self.use_opacity: + tensor_out_channels = self.num_basis + self.num_opacity_basis + else: + tensor_out_channels = self.num_basis + else: + tensor_out_channels = self.num_basis * (self.out_channels - 1) + self.num_opacity_basis + + # Tensors + self.tensors = [] + + for idx, tensor_key in enumerate(cfg.tensors.keys()): + tensor_cfg = cfg.tensors[tensor_key] + cur_tensor = array_dict[tensor_cfg.type]( + self.in_channels, tensor_out_channels, tensor_cfg, group=self.group + ) + self.tensors.append(cur_tensor) + + self.tensors = nn.ModuleList(self.tensors) + + # TODO: Add VBNF basis model (with options for discretized / non-discretized look-up) + # TODO: Option for separate basis for each tensor + + def forward(self, x, render_kwargs): + # Get coefficients + outputs = [] + + for idx, tensor in enumerate(self.tensors): + outputs.append(tensor(x)) + + coeffs = torch.stack(outputs, -1).prod(-1)[..., None] + + # Get basis + if self.has_basis: + basis = self.basis(x) + + # Separate into color, opacity + if self.use_opacity: + color_basis = basis[..., : -self.num_opacity_basis].view( + x.shape[0], self.num_basis, self.out_channels - 1 + ) + # opacity_basis = basis[..., -self.num_opacity_basis:].view( + opacity_basis = basis.new_ones(x.shape[0], self.num_opacity_basis, 1) + + color_coeffs = coeffs[..., : -self.num_opacity_basis, :] + opacity_coeffs = coeffs[..., -self.num_opacity_basis :, :] + else: + basis = basis.view(x.shape[0], self.num_basis, self.out_channels) + else: + # Separate into color, opacity + if self.use_opacity: + color_coeffs = coeffs[..., : -self.num_opacity_basis, :].view( + x.shape[0], self.num_basis, self.out_channels - 1 + ) + opacity_coeffs = coeffs[..., -self.num_opacity_basis :, :].view(x.shape[0], self.num_opacity_basis, 1) + + color_basis = torch.ones_like(color_coeffs) + opacity_basis = torch.ones_like(opacity_coeffs) + else: + coeffs = coeffs.view(x.shape[0], self.num_basis, self.out_channels) + basis = torch.ones_like(coeffs) + + # Return + if self.use_opacity: + color = self.out_layer((color_coeffs * color_basis).mean(1)) + opacity = self.out_layer((opacity_coeffs * opacity_basis).mean(1)) + return torch.cat([color, opacity], -1) + else: + return self.out_layer((coeffs * basis).mean(1)) + + def set_iter(self, i): + for tensor in self.tensors: + tensor.set_iter(i) + + +tensor_dict["tensor_product"] = TensorProduct + + +class TensorConcat(nn.Module): + def __init__(self, in_channels, out_channels, cfg, **kwargs): + super().__init__() + + self.group = cfg.group if "group" in cfg else (kwargs["group"] if "group" in kwargs else "color") + + # In channels, out channels + self.latent_dim = kwargs["latent_dim"] if "latent_dim" in kwargs else 0 + self.in_channels = in_channels + self.out_channels = self.out_channels + + if "latent_dim" in cfg: + self.latent_dim = cfg.latent_dim + + # Num basis, num features + self.num_tensors = len(cfg.tensors.keys()) + self.num_basis = cfg.num_basis + self.num_features = self.out_channels // self.num_tensors + self.num_extra = self.out_channels - self.num_features * self.num_tensors + + # Activation + self.activation = cfg.activation if "activation" in cfg else "identity" + self.out_layer = get_activation(self.activation) + + # Tensors + self.tensors = [] + + for idx, tensor_key in enumerate(cfg.tensors.keys()): + tensor_cfg = cfg.tensors[tensor_key] + + cur_tensor = array_dict[tensor_cfg.type]( + self.input_channels, self.num_basis * (self.num_features + self.num_extra), tensor_cfg, group=self.group + ) + self.tensors.append(cur_tensor) + + self.tensors = nn.ModuleList(self.tensors) + + def forward(self, x, **kwargs): + outputs = [] + extras = [] + + for idx, tensor in enumerate(self.tensors): + cur_output = tensor(x) + cur_output.view(x.shape[0], self.num_basis, self.num_features + self.num_extra) + + outputs.append(cur_output[..., : self.num_features]) + extras.append(cur_output[..., self.num_features :]) + + extras = torch.stack(extras, -1).prod(-1)[..., None].mean(1) + outputs = torch.cat(outputs, -1).mean(1) + return self.out_layer(torch.cat([outputs, extras]), -1) + + def set_iter(self, i): + for tensor in self.tensors: + tensor.set_iter(i) + + +tensor_dict["tensor_concat"] = TensorConcat + + +class TensorPassthrough(nn.Module): + def __init__(self, in_channels, out_channels, cfg, **kwargs): + super().__init__() + + # Tensors + self.tensors = [] + + for idx, tensor_key in enumerate(cfg.tensors.keys()): + tensor_cfg = cfg.tensors[tensor_key] + + cur_tensor = array_dict[tensor_cfg.type](in_channels, out_channels, tensor_cfg, **kwargs) + self.tensors.append(cur_tensor) + + self.tensors = nn.ModuleList(self.tensors) + + def forward(self, x, **kwargs): + return self.tensors[0](x) + + def set_iter(self, i): + for tensor in self.tensors: + tensor.set_iter(i) + + +tensor_dict["tensor_passthrough"] = TensorPassthrough + + +def mean(tensors, *args, **kwargs): + return torch.mean(tensors, -2) + + +def over_composite_one(rgb, alphas, **kwargs): + alphas_shifted = torch.cat([torch.ones_like(alphas[:, :1]), 1 - alphas + 1e-8], -1) + weights = alphas * torch.cumprod(alphas_shifted, -1)[:, :-1] + accum = weights.sum(-1) + + rgb_final = torch.sum(weights.unsqueeze(-1) * rgb, -2) + + if "white_background" in kwargs and kwargs["white_background"]: + rgb_final = rgb_final + (1.0 - accum.unsqueeze(-1)) + + return rgb_final, accum, weights + + +def over(rgba, *args, **kwargs): + rgb = rgba[..., :-1] + alpha = rgba[..., -1] + + return over_composite_one(rgb, alpha)[0] + + +def _over_opacity(rgba, deltas): + rgb = torch.sigmoid(rgba[..., :-1]) + density = torch.relu(rgba[..., -1]) + alpha = 1 - torch.exp(-deltas * density) + + return over_composite_one(rgb, alpha)[0] + + +def over_opacity(rgba, *args, **kwargs): + rgb = torch.sigmoid(rgba[..., :-1]) + density = rgba[..., -1] + density = torch.relu(density) + alpha = 1 - torch.exp(-(4.0 / rgb.shape[1]) * density) + + return over_composite_one(rgb, alpha, **kwargs)[0] + + +def over_opacity_extra(rgba, *args, **kwargs): + # RGB + rgb = torch.sigmoid(rgba[..., :-1]) + + # Density + density = rgba[..., -1] + + # Remove samples with distance 0 + density = torch.where((kwargs["distance"] * torch.ones_like(density)) < 1e-5, torch.zeros_like(density), density) + + # density = nn.functional.softplus(density - 10.0) + density = torch.relu(density) + + # Density factor + density = density * kwargs["density_factor"] + + # Calculate alpha + # alpha = 1 - torch.exp(-(4.0 / rgb.shape[1]) * density) + + deltas = torch.cat( + [ + torch.abs(kwargs["distance"][:, 1:] - kwargs["distance"][:, :-1]), + 10000 * torch.ones_like(kwargs["distance"][:, :1]), + ], + dim=1, + ) + alpha = 1 - torch.exp(-deltas * density) + + return over_composite_one(rgb, alpha, **kwargs)[0] + + +def concat(outputs, *args, **kwargs): + return outputs.view(*(outputs.shape[0:-2] + (-1,))) + + +reduce_dict = { + "mean": mean, + "over": over, + "over_opacity": over_opacity, + "over_opacity_extra": over_opacity_extra, + "concat": concat, +} + + +class TensorReduce(nn.Module): + def __init__(self, in_channels, out_channels, cfg, **kwargs): + super().__init__() + + self.group = cfg.group if "group" in cfg else (kwargs["group"] if "group" in kwargs else "color") + + self.in_channels = in_channels + self.out_channels = out_channels + self.latent_dim = kwargs["latent_dim"] if "latent_dim" in kwargs else 0 + + if "latent_dim" in cfg: + self.latent_dim = cfg.latent_dim + + self.num_partitions = cfg.num_partitions if "num_partitions" in cfg else -1 + self.num_tensors = len(cfg.tensors.keys()) + + self.out_feature_dim = cfg.out_feature_dim if "out_feature_dim" in cfg else out_channels + self.use_feature_net = "feature_net" in cfg + self.white_background = False if "white_background" not in cfg else cfg.white_background + self.density_factor = 1.0 if "density_factor" not in cfg else cfg.density_factor + + # Reduce fns + if "reduce" not in cfg: + cfg.reduce = "mean" + + if "reduce_partitions" not in cfg: + cfg.reduce_partitions = "over_opacity_extra" + + self.reduce_fn = reduce_dict[cfg.reduce] + self.reduce_partition_fn = reduce_dict[cfg.reduce_partitions] + + # Combined opacity and color network / tensors + self.use_opacity = ("over" in cfg.reduce_partitions) and (self.num_partitions > 0) + + if self.use_feature_net or not self.use_opacity: + tensor_out_channels = self.out_feature_dim + else: + tensor_out_channels = self.out_feature_dim + 1 + + # Feature net + if self.use_feature_net: + # Create + net_cfg = cfg.feature_net + + # Input channels + self.feature_ray_channels = net_cfg.ray_channels if "ray_channels" in net_cfg else 0 + self.remove_rays = net_cfg.remove_rays if "remove_rays" in net_cfg else True + self.in_channels -= self.feature_ray_channels + + # Ray param + if self.feature_ray_channels > 0: + self.feature_param = RayParam(net_cfg.param) + self.feature_ray_in_channels = self.feature_param.out_channels + else: + self.feature_param = None + self.feature_ray_in_channels = 0 + + if self.num_partitions > 0 and not self.use_opacity: + self.feature_net = mlp_dict[net_cfg.type]( + self.out_feature_dim * self.num_partitions + self.feature_ray_in_channels, + self.out_channels + (1 if self.use_opacity else 0), + net_cfg, + group=self.group, + ) + else: + self.feature_net = mlp_dict[net_cfg.type]( + self.out_feature_dim * self.num_tensors + self.feature_ray_in_channels, + self.out_channels + 1, + net_cfg, + group=self.group, + ) + else: + self.feature_param = None + self.feature_ray_in_channels = 0 + + # Activation + self.activation = cfg.activation if "activation" in cfg else "identity" + self.out_layer = get_activation(self.activation) + + # Tensors + self.tensors = [] + + for _, tensor_key in enumerate(cfg.tensors.keys()): + tensor_cfg = cfg.tensors[tensor_key] + cur_tensor = tensor_dict[tensor_cfg.type]( + self.in_channels, tensor_out_channels, tensor_cfg, latent_dim=0, group=self.group + ) + self.tensors.append(cur_tensor) + + self.tensors = nn.ModuleList(self.tensors) + + def forward(self, x, render_kwargs): + points = x["points"] + distances = x["distances"] + + batch_size = points.shape[0] + + x = torch.cat( + [ + points.view(batch_size, -1, 3), + distances.view(batch_size, -1, 1), + ], + -1, + ).view(batch_size, -1) + + # Reshape + if self.feature_param is not None: + param_rays = self.feature_param(x[..., : self.feature_ray_channels]) + + if self.remove_rays: + x = x[..., self.feature_ray_channels :] + else: + param_rays = x[..., 0:0] + + if self.latent_dim > 0: + x = x[..., : -self.latent_dim] + + if self.num_partitions != -1: + x = x.reshape(-1, x.shape[-1] // self.num_partitions) + + # Get colors and opacities + outputs = [] + opacities = [] + + for idx, tensor in enumerate(self.tensors): + cur_output = tensor(x, render_kwargs) + outputs.append(cur_output[..., :-1]) + opacities.append(cur_output[..., -1:]) + + outputs = torch.stack(outputs, 1) + opacities = torch.stack(opacities, 1) + + if self.use_feature_net or not self.use_opacity: + outputs = torch.cat([outputs, opacities], -1) + + # Partitioned forward + if self.num_partitions > 0: + # Reduce + outputs = outputs.view(batch_size, self.num_partitions, -1, outputs.shape[-1]) + outputs = self.reduce_fn(outputs) + + # Feature net + if self.use_feature_net and self.use_opacity: + outputs = outputs.view(-1, outputs.shape[-1]) + param_rays = ( + param_rays.unsqueeze(1) + .repeat(1, self.num_partitions, 1) + .view(outputs.shape[0], param_rays.shape[-1]) + ) + outputs = self.feature_net(torch.cat([param_rays, outputs], -1)) + outputs = outputs.view(batch_size, self.num_partitions, outputs.shape[-1]) + # Combine color and opacity + elif self.use_opacity: + opacities = opacities.view(batch_size, self.num_partitions, -1, opacities.shape[-1]) + opacities = self.reduce_fn(opacities) + outputs = torch.cat([outputs, opacities], -1) + + # For visualization + if "keep_tensor_partitions" in render_kwargs: + outputs = outputs[:, render_kwargs["keep_tensor_partitions"], :].view( + batch_size, len(render_kwargs["keep_tensor_partitions"]), outputs.shape[-1] + ) + + # Reduce partitions + x = x.view(batch_size, self.num_partitions, -1) + + outputs = self.reduce_partition_fn( + outputs, + inputs=x[..., :3], + distance=x[..., -1], + density_factor=self.density_factor, + white_background=self.white_background, + ) + + # Feature net + if self.use_feature_net and not self.use_opacity: + param_rays = param_rays.view(outputs.shape[0], param_rays.shape[-1]) + outputs = self.feature_net(torch.cat([param_rays, outputs], -1)) + else: + # Reduce + outputs = outputs.view(batch_size, -1, outputs.shape[-1]) + outputs = self.reduce_fn(outputs) + + # Feature net + if self.use_feature_net: + param_rays = param_rays.view(outputs.shape[0], param_rays.shape[-1]) + outputs = self.feature_net(torch.cat([param_rays, outputs], -1)) + + rgb_map = self.out_layer(outputs) + + if "fields" in render_kwargs: + return {} + else: + return rgb_map + + def set_iter(self, i): + for tensor in self.tensors: + tensor.set_iter(i) + + +tensor_dict["tensor_sum"] = TensorReduce diff --git a/nlf/nets/tensorf_base.py b/nlf/nets/tensorf_base.py new file mode 100644 index 0000000..7d1aafc --- /dev/null +++ b/nlf/nets/tensorf_base.py @@ -0,0 +1,1178 @@ +#!/usr/bin/env python +# Copyright (c) 2024 Mitsubishi Electric Research Laboratories (MERL) +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2022 Anpei Chen +# +# SPDX-License-Identifier: AGPL-3.0-or-later +# SPDX-License-Identifier: MIT + +import pdb +import time +from typing import Dict + +import numpy as np +import torch +import torch.nn +import torch.nn.functional as F +from torch import autograd + +from utils.sh_utils import eval_sh_bases +from utils.tensorf_utils import ( + AlphaGridMask, + DensityFourierRender, + DensityLinearRender, + DensityRender, + N_to_reso, + RGBIdentityRender, + RGBRender, + RGBtFourierRender, + RGBtLinearRender, + SHRender, + alpha2weights, + cal_n_samples, + positional_encoding, + raw2alpha, +) + + +class MLPRender_Fea(torch.nn.Module): + def __init__(self, inChanel, viewpe=6, feape=6, featureC=128, out_dim=3): + super().__init__() + self.opt_group = "color_impl" + + self.in_mlpC = 2 * viewpe * 3 + 2 * feape * inChanel + 3 + inChanel + self.viewpe = viewpe + self.feape = feape + layer1 = torch.nn.Linear(self.in_mlpC, featureC) + layer2 = torch.nn.Linear(featureC, featureC) + layer3 = torch.nn.Linear(featureC, out_dim) # RGB + gear level + + self.mlp = torch.nn.Sequential( + layer1, + torch.nn.ReLU(inplace=True), + layer2, + torch.nn.ReLU(inplace=True), + layer3, + ) + torch.nn.init.constant_(self.mlp[-1].bias, 0) + + def forward(self, pts, viewdirs, features, kwargs): + indata = [features, viewdirs] + if self.feape > 0: + indata += [positional_encoding(features, self.feape)] + if self.viewpe > 0: + indata += [positional_encoding(viewdirs, self.viewpe)] + mlp_in = torch.cat(indata, dim=-1) + rgb = self.mlp(mlp_in) + rgb = torch.sigmoid(rgb) + return rgb + + +class TensorBase(torch.nn.Module): + def __init__(self, in_channels, out_channels, cfg, **kwargs): + super(TensorBase, self).__init__() + + self.cfg = cfg + self.device = "cuda" + + self.white_bg = cfg.white_bg if "white_bg" in cfg else 0 + self.black_bg = cfg.black_bg if "black_bg" in cfg else 0 + self.ndc_ray = cfg.ndc_ray if "ndc_ray" in cfg else 0 + + self.gear_num = cfg.gear_num + + self.register_buffer("aabb", torch.tensor(cfg.aabb).to(self.device)) + + if "grid_size" in cfg: + self.use_grid_size_upsample = True + self.register_buffer("gridSize", torch.tensor(list(cfg.grid_size.start))) + self.gridSizeStart = list(cfg.grid_size.start) + self.gridSizeEnd = list(cfg.grid_size.end) + else: + self.use_grid_size_upsample = False + gridSize = N_to_reso(cfg.N_voxel_init, self.aabb) + self.register_buffer("gridSize", torch.tensor(gridSize)) + self.gridSizeStart = gridSize + + self.max_n_samples = cfg.nSamples if "nSamples" in cfg else 32 + self.step_ratio = cfg.step_ratio if "step_ratio" in cfg else 0.5 + self.nSamples = min(self.max_n_samples, cal_n_samples(self.gridSize, self.step_ratio)) + + self.update_AlphaMask_list = cfg.update_AlphaMask_list + self.upsamp_list = cfg.upsamp_list + + if self.use_grid_size_upsample: + self.N_voxel_list = [] + + for i in range(3): + cur_voxel_list = ( + torch.round( + torch.exp( + torch.linspace( + np.log(self.gridSizeStart[i]), + np.log(self.gridSizeEnd[i]), + len(self.upsamp_list) + 1, + ) + ) + ).long() + ).tolist()[1:] + self.N_voxel_list.append(cur_voxel_list) + else: + self.N_voxel_list = ( + torch.round( + torch.exp( + torch.linspace( + np.log(cfg.N_voxel_init), + np.log(cfg.N_voxel_final), + len(self.upsamp_list) + 1, + ) + ) + ).long() + ).tolist()[1:] + + self.density_n_comp = cfg.n_lamb_sigma if "n_lamb_sigma" in cfg else 8 + self.app_n_comp = cfg.n_lamb_sh if "n_lamb_sh" in cfg else 24 + self.app_dim = cfg.data_dim_color if "data_dim_color" in cfg else 27 + self.alphaMask = cfg.alphaMask if "alphaMask" in cfg else None + + self.density_shift = cfg.density_shift if "density_shift" in cfg else -10.0 + self.alphaMask_thres = cfg.alpha_mask_thre if "alpha_mask_thre" in cfg else 0.001 + self.rayMarch_weight_thres = cfg.rm_weight_mask_thre if "rm_weight_mask_thre" in cfg else 0.0001 + self.distance_scale = cfg.distance_scale if "distance_scale" in cfg else 25 + self.fea2denseAct = cfg.fea2denseAct if "fea2denseAct" in cfg else "softplus" + + self.near_far = cfg.near_far if "near_far" in cfg else [2.0, 6.0] + + ### Filtering parameters + if "filter" not in cfg: + cfg.filter = {} + self.apply_filter_weights = False + else: + self.apply_filter_weights = True + + self.filter_weight_thresh = getattr(cfg.filter, "weight_thresh", 1e-3) + self.filter_max_samples = getattr(cfg.filter, "max_samples", 32) + self.filter_wait_iters = getattr(cfg.filter, "wait_iters", 12000) + ### Filtering parameters + + self.update_stepSize(self.gridSize) + + self.matMode = [[0, 1], [0, 2], [1, 2]] + self.vecMode = [2, 1, 0] + self.comp_w = [1, 1, 1] + + self.init_svd_volume(self.gridSize[0], self.device) + + self.shadingMode = cfg.shadingMode if "shadingMode" in cfg else "MLP_PE" + self.pos_pe = cfg.pos_pe if "pos_pe" in cfg else 6 + self.view_pe = cfg.view_pe if "view_pe" in cfg else 6 + self.fea_pe = cfg.fea_pe if "fea_pe" in cfg else 6 + self.featureC = cfg.featureC if "featureC" in cfg else 128 + + self.init_render_func( + self.shadingMode, + self.pos_pe, + self.view_pe, + self.fea_pe, + self.featureC, + self.device, + ) + + def init_render_func(self, shadingMode, pos_pe, view_pe, fea_pe, featureC, device): + self.renderModule = MLPRender_Fea(self.app_dim, view_pe, fea_pe, featureC, 3).to(device) + print("pos_pe", pos_pe, "view_pe", view_pe, "fea_pe", fea_pe) + print(self.renderModule) + self.renderModule_sam = MLPRender_Fea(self.app_dim, view_pe, fea_pe, featureC, 256).to(device) + print("pos_pe", pos_pe, "view_pe", view_pe, "fea_pe", fea_pe) + print(self.renderModule_sam) + + def update_stepSize(self, gridSize): + print("aabb", self.aabb.view(-1)) + print("grid size", gridSize) + self.aabbSize = self.aabb[1] - self.aabb[0] + self.invaabbSize = 2.0 / self.aabbSize + self.register_buffer("gridSize", torch.LongTensor(gridSize).to(self.device)) + self.units = self.aabbSize / (self.gridSize - 1) + self.stepSize = torch.mean(self.units) * self.step_ratio + self.aabbDiag = torch.sqrt(torch.sum(torch.square(self.aabbSize))) + self.nSamples = min(self.max_n_samples, int((self.aabbDiag / self.stepSize).item()) + 1) + print("sampling step size: ", self.stepSize) + print("sampling number: ", self.nSamples) + + def init_svd_volume(self, res, device): + pass + + def compute_features(self, xyz_sampled): + pass + + def compute_densityfeature(self, xyz_sampled): + pass + + def compute_appfeature(self, xyz_sampled): + pass + + def normalize_coord(self, xyz_sampled): + return (xyz_sampled - self.aabb[0]) * self.invaabbSize - 1 + + def get_optparam_groups(self, lr_init_spatial=0.02, lr_init_network=0.001): + pass + + def get_kwargs(self): + return { + "aabb": self.aabb, + "gridSize": self.gridSize.tolist(), + "density_n_comp": self.density_n_comp, + "appearance_n_comp": self.app_n_comp, + "app_dim": self.app_dim, + "density_shift": self.density_shift, + "alphaMask_thres": self.alphaMask_thres, + "distance_scale": self.distance_scale, + "rayMarch_weight_thres": self.rayMarch_weight_thres, + "fea2denseAct": self.fea2denseAct, + "near_far": self.near_far, + "step_ratio": self.step_ratio, + "shadingMode": self.shadingMode, + "pos_pe": self.pos_pe, + "view_pe": self.view_pe, + "fea_pe": self.fea_pe, + "featureC": self.featureC, + } + + def sample_ray_ndc(self, rays_o, rays_d, N_samples=-1): + N_samples = N_samples if N_samples > 0 else self.nSamples + near, far = self.near_far + interpx = torch.linspace(near, far, N_samples).unsqueeze(0).to(rays_o) + + if self.training: + interpx += torch.rand_like(interpx).to(rays_o) * ((far - near) / N_samples) + + rays_pts = rays_o[..., None, :] + rays_d[..., None, :] * interpx[..., None] + mask_outbbox = ((self.aabb[0] > rays_pts) | (rays_pts > self.aabb[1])).any(dim=-1) + return rays_pts, interpx, ~mask_outbbox + + def valid_mask(self, rays_pts): + mask_outbbox = ((self.aabb[0] > rays_pts) | (rays_pts > self.aabb[1])).any(dim=-1) + return ~mask_outbbox + + def sample_ray(self, rays_o, rays_d, N_samples=-1): + N_samples = N_samples if N_samples > 0 else self.nSamples + stepsize = self.stepSize + near, far = self.near_far + vec = torch.where(rays_d == 0, torch.full_like(rays_d, 1e-6), rays_d) + rate_a = (self.aabb[1] - rays_o) / vec + rate_b = (self.aabb[0] - rays_o) / vec + t_min = torch.minimum(rate_a, rate_b).amax(-1).clamp(min=near, max=far) + + rng = torch.arange(N_samples)[None].float() + if self.training: + rng = rng.repeat(rays_d.shape[-2], 1) + rng += torch.rand_like(rng[:, [0]]) + step = stepsize * rng.to(rays_o.device) + interpx = t_min[..., None] + step + + rays_pts = rays_o[..., None, :] + rays_d[..., None, :] * interpx[..., None] + mask_outbbox = ((self.aabb[0] > rays_pts) | (rays_pts > self.aabb[1])).any(dim=-1) + + return rays_pts, interpx, ~mask_outbbox + + def shrink(self, new_aabb, voxel_size): + pass + + @torch.no_grad() + def getDenseAlpha(self, gridSize=None): + samples = torch.stack( + torch.meshgrid( + torch.linspace(0, 1, gridSize[0]), + torch.linspace(0, 1, gridSize[1]), + torch.linspace(0, 1, gridSize[2]), + ), + -1, + ).to(self.device) + dense_xyz = self.aabb[0] * (1 - samples) + self.aabb[1] * samples + + # dense_xyz = dense_xyz + # print(self.stepSize, self.distance_scale*self.aabbDiag) + alpha = torch.zeros_like(dense_xyz[..., 0]) + for i in range(gridSize[0]): + alpha[i] = self.compute_alpha(dense_xyz[i].view(-1, 3), 0.01).view((gridSize[1], gridSize[2])) + return alpha, dense_xyz + + @torch.no_grad() + def updateAlphaMask(self, gridSize=(200, 200, 200)): + + alpha, dense_xyz = self.getDenseAlpha(gridSize) + dense_xyz = dense_xyz.transpose(0, 2).contiguous() + alpha = alpha.clamp(0, 1).transpose(0, 2).contiguous()[None, None] + total_voxels = gridSize[0] * gridSize[1] * gridSize[2] + + ks = 3 + alpha = F.max_pool3d(alpha, kernel_size=ks, padding=ks // 2, stride=1).view(gridSize[::-1]) + alpha[alpha >= self.alphaMask_thres] = 1 + alpha[alpha < self.alphaMask_thres] = 0 + + self.alphaMask = AlphaGridMask(self.device, self.aabb, alpha) + + valid_xyz = dense_xyz[alpha > 0.5] + xyz_min = valid_xyz.amin(0) + xyz_max = valid_xyz.amax(0) + + new_aabb = torch.stack((xyz_min, xyz_max)) + + total = torch.sum(alpha) + print(f"bbox: {xyz_min, xyz_max} alpha rest %%%f" % (total / total_voxels * 100)) + return new_aabb + + @torch.no_grad() + def filtering_rays(self, all_rays, all_rgbs, N_samples=256, chunk=10240 * 5, bbox_only=False): + print("========> filtering rays ...") + tt = time.time() + + N = torch.tensor(all_rays.shape[:-1]).prod() + + mask_filtered = [] + idx_chunks = torch.split(torch.arange(N), chunk) + for idx_chunk in idx_chunks: + rays_chunk = all_rays[idx_chunk].to(self.device) + + rays_o, rays_d = rays_chunk[..., :3], rays_chunk[..., 3:6] + if bbox_only: + vec = torch.where(rays_d == 0, torch.full_like(rays_d, 1e-6), rays_d) + rate_a = (self.aabb[1] - rays_o) / vec + rate_b = (self.aabb[0] - rays_o) / vec + t_min = torch.minimum(rate_a, rate_b).amax(-1) # .clamp(min=near, max=far) + t_max = torch.maximum(rate_a, rate_b).amin(-1) # .clamp(min=near, max=far) + mask_inbbox = t_max > t_min + + else: + xyz_sampled, _, _ = self.sample_ray(rays_o, rays_d, N_samples=N_samples) + mask_inbbox = (self.alphaMask.sample_alpha(xyz_sampled).view(xyz_sampled.shape[:-1]) > 0).any(-1) + + mask_filtered.append(mask_inbbox.cpu()) + + mask_filtered = torch.cat(mask_filtered).view(all_rgbs.shape[:-1]) + + print(f"Ray filtering done! takes {time.time()-tt} s. ray mask ratio: {torch.sum(mask_filtered) / N}") + return all_rays[mask_filtered], all_rgbs[mask_filtered] + + def feature2density(self, density_features, **kwargs): + if "weights" in kwargs: + density_features = density_features * kwargs["weights"].view(*density_features.shape) + + if self.fea2denseAct == "softplus": + return F.softplus(density_features + self.density_shift) + elif self.fea2denseAct == "relu": + return F.relu(density_features) + elif self.fea2denseAct == "relu_abs": + return F.relu(torch.abs(density_features)) + + def compute_alpha(self, xyz_locs, length=0.01): + + if self.alphaMask is not None: + alphas = self.alphaMask.sample_alpha(xyz_locs) + alpha_mask = alphas > 0 + else: + alpha_mask = torch.ones_like(xyz_locs[:, 0], dtype=bool) + + sigma = torch.zeros(xyz_locs.shape[:-1], device=xyz_locs.device) + + if alpha_mask.any(): + xyz_sampled = self.normalize_coord(xyz_locs[alpha_mask]) + sigma_feature = self.compute_densityfeature(xyz_sampled) + valid_sigma = self.feature2density(sigma_feature) + sigma[alpha_mask] = valid_sigma + + alpha = 1 - torch.exp(-sigma * length).view(xyz_locs.shape[:-1]) + + return alpha + + def set_iter(self, iteration): + self.cur_iter = iteration + + if not self.training: + return + + self.needs_opt_reset = False + + # Pruning + if iteration in self.update_AlphaMask_list: + # Update BBOX + reso_mask = tuple(self.gridSize) + + if reso_mask[0] > 200: + reso_mask = (200, 200, 200) + + new_aabb = self.updateAlphaMask(reso_mask) + + # Update regularization weights + if iteration == self.update_AlphaMask_list[0]: + self.shrink(new_aabb) + + # Upsampling + if iteration in self.upsamp_list: + if self.use_grid_size_upsample: + print("Before:", self.N_voxel_list, iteration) + + reso_cur = [] + for i in range(3): + reso_cur.append(self.N_voxel_list[i].pop(0)) + + print("After:", self.N_voxel_list, iteration) + else: + print("Before:", self.N_voxel_list, iteration) + n_voxels = self.N_voxel_list.pop(0) + print("After:", self.N_voxel_list, iteration) + reso_cur = N_to_reso(n_voxels, self.aabb) + + self.nSamples = min(self.max_n_samples, cal_n_samples(reso_cur, self.step_ratio)) + self.upsample_volume_grid(reso_cur) + + if self.cfg.lr_upsample_reset: + self.needs_opt_reset = True + + def forward(self, rays_chunk): + # Sample points + viewdirs = rays_chunk[:, 3:6] + + if self.ndc_ray: + xyz_sampled, z_vals, ray_valid = self.sample_ray_ndc( + rays_chunk[:, :3], rays_chunk[:, 3:6], N_samples=self.nSamples + ) + dists = torch.cat( + (z_vals[:, 1:] - z_vals[:, :-1], torch.zeros_like(z_vals[:, :1])), + dim=-1, + ) + dists = dists * torch.norm(rays_chunk[:, 3:6], dim=-1, keepdim=True) + viewdirs = viewdirs / torch.norm(viewdirs, dim=-1, keepdim=True) + else: + xyz_sampled, z_vals, ray_valid = self.sample_ray( + rays_chunk[:, :3], rays_chunk[:, 3:6], N_samples=self.nSamples + ) + dists = torch.cat( + (z_vals[:, 1:] - z_vals[:, :-1], torch.zeros_like(z_vals[:, :1])), + dim=-1, + ) + + viewdirs = viewdirs.view(-1, 1, 3).expand(xyz_sampled.shape) + + if self.alphaMask is not None: + alphas = self.alphaMask.sample_alpha(xyz_sampled[ray_valid]) + alpha_mask = alphas > 0 + ray_invalid = ~ray_valid + ray_invalid[ray_valid] |= ~alpha_mask + ray_valid = ~ray_invalid + + sigma = torch.zeros(xyz_sampled.shape[:-1], device=xyz_sampled.device) + rgb = torch.zeros((*xyz_sampled.shape[:2], 3), device=xyz_sampled.device) + + if ray_valid.any(): + xyz_sampled = self.normalize_coord(xyz_sampled) + sigma_feature = self.compute_densityfeature(xyz_sampled[ray_valid]) + + valid_sigma = self.feature2density(sigma_feature) + sigma[ray_valid] = valid_sigma + + alpha, weight, bg_weight = raw2alpha(sigma, dists * self.distance_scale) + app_mask = weight > self.rayMarch_weight_thres + + if app_mask.any(): + app_features = self.compute_appfeature(xyz_sampled[app_mask]) + valid_rgbs = self.renderModule(xyz_sampled[app_mask], viewdirs[app_mask], app_features, {}) + rgb[app_mask] = valid_rgbs + + acc_map = torch.sum(weight, -1) + rgb_map = torch.sum(weight[..., None] * rgb, -2) + + if self.white_bg or (self.training and torch.rand((1,)) < 0.5): + rgb_map = rgb_map + (1.0 - acc_map[..., None]) + + rgb_map = rgb_map.clamp(0, 1) + + with torch.no_grad(): + depth_map = torch.sum(weight * z_vals, -1) + depth_map = depth_map + (1.0 - acc_map) * rays_chunk[..., -1] + + # return rgb_map, depth_map # rgb, sigma, alpha, weight, bg_weight + return rgb_map + + +class TensorVM(TensorBase): + def __init__(self, in_channels, out_channels, cfg, **kwargs): + super().__init__(in_channels, out_channels, cfg, **kwargs) + + self.opt_group = { + "color": [self.line_coef, self.plane_coef], + "color_impl": [self.basis_mat], + } + + if isinstance(self.renderModule, torch.nn.Module): + self.opt_group["color_impl"] += [self.renderModule] + + def init_svd_volume(self, res, device): + self.plane_coef = torch.nn.Parameter( + 0.1 * torch.randn((3, self.app_n_comp + self.density_n_comp, res, res), device=device) + ) + self.line_coef = torch.nn.Parameter( + 0.1 * torch.randn((3, self.app_n_comp + self.density_n_comp, res, 1), device=device) + ) + self.basis_mat = torch.nn.Linear(self.app_n_comp * 3, self.app_dim, bias=False, device=device) + + def get_optparam_groups(self, lr_init_spatialxyz=0.02, lr_init_network=0.001): + grad_vars = [ + {"params": self.line_coef, "lr": lr_init_spatialxyz}, + {"params": self.plane_coef, "lr": lr_init_spatialxyz}, + {"params": self.basis_mat.parameters(), "lr": lr_init_network}, + ] + if isinstance(self.renderModule, torch.nn.Module): + grad_vars += [{"params": self.renderModule.parameters(), "lr": lr_init_network}] + return grad_vars + + def compute_features(self, xyz_sampled): + + coordinate_plane = torch.stack( + ( + xyz_sampled[..., self.matMode[0]], + xyz_sampled[..., self.matMode[1]], + xyz_sampled[..., self.matMode[2]], + ) + ) + coordinate_line = torch.stack( + ( + xyz_sampled[..., self.vecMode[0]], + xyz_sampled[..., self.vecMode[1]], + xyz_sampled[..., self.vecMode[2]], + ) + ) + coordinate_line = torch.stack((torch.zeros_like(coordinate_line), coordinate_line), dim=-1) + + plane_feats = F.grid_sample( + self.plane_coef[:, -self.density_n_comp :], + coordinate_plane, + align_corners=True, + ).view(-1, *xyz_sampled.shape[:1]) + line_feats = F.grid_sample( + self.line_coef[:, -self.density_n_comp :], + coordinate_line, + align_corners=True, + ).view(-1, *xyz_sampled.shape[:1]) + + sigma_feature = torch.sum(plane_feats * line_feats, dim=0) + + plane_feats = F.grid_sample(self.plane_coef[:, : self.app_n_comp], coordinate_plane, align_corners=True).view( + 3 * self.app_n_comp, -1 + ) + line_feats = F.grid_sample(self.line_coef[:, : self.app_n_comp], coordinate_line, align_corners=True).view( + 3 * self.app_n_comp, -1 + ) + + app_features = self.basis_mat((plane_feats * line_feats).T) + + return sigma_feature, app_features + + def compute_densityfeature(self, xyz_sampled): + coordinate_plane = torch.stack( + ( + xyz_sampled[..., self.matMode[0]], + xyz_sampled[..., self.matMode[1]], + xyz_sampled[..., self.matMode[2]], + ) + ).view(3, -1, 1, 2) + coordinate_line = torch.stack( + ( + xyz_sampled[..., self.vecMode[0]], + xyz_sampled[..., self.vecMode[1]], + xyz_sampled[..., self.vecMode[2]], + ) + ) + coordinate_line = torch.stack((torch.zeros_like(coordinate_line), coordinate_line), dim=-1).view(3, -1, 1, 2) + + plane_feats = F.grid_sample( + self.plane_coef[:, -self.density_n_comp :], + coordinate_plane, + align_corners=True, + ).view(-1, *xyz_sampled.shape[:1]) + line_feats = F.grid_sample( + self.line_coef[:, -self.density_n_comp :], + coordinate_line, + align_corners=True, + ).view(-1, *xyz_sampled.shape[:1]) + + sigma_feature = torch.sum(plane_feats * line_feats, dim=0) + + return sigma_feature + + def compute_appfeature(self, xyz_sampled): + coordinate_plane = torch.stack( + ( + xyz_sampled[..., self.matMode[0]], + xyz_sampled[..., self.matMode[1]], + xyz_sampled[..., self.matMode[2]], + ) + ).view(3, -1, 1, 2) + coordinate_line = torch.stack( + ( + xyz_sampled[..., self.vecMode[0]], + xyz_sampled[..., self.vecMode[1]], + xyz_sampled[..., self.vecMode[2]], + ) + ) + coordinate_line = torch.stack((torch.zeros_like(coordinate_line), coordinate_line), dim=-1).view(3, -1, 1, 2) + + plane_feats = F.grid_sample(self.plane_coef[:, : self.app_n_comp], coordinate_plane, align_corners=True).view( + 3 * self.app_n_comp, -1 + ) + line_feats = F.grid_sample(self.line_coef[:, : self.app_n_comp], coordinate_line, align_corners=True).view( + 3 * self.app_n_comp, -1 + ) + + app_features = self.basis_mat((plane_feats * line_feats).T) + + return app_features + + def vectorDiffs(self, vector_comps): + total = 0 + + for idx in range(len(vector_comps)): + # print(self.line_coef.shape, vector_comps[idx].shape) + n_comp, n_size = vector_comps[idx].shape[:-1] + + dotp = torch.matmul( + vector_comps[idx].view(n_comp, n_size), + vector_comps[idx].view(n_comp, n_size).transpose(-1, -2), + ) + # print(vector_comps[idx].shape, vector_comps[idx].view(n_comp,n_size).transpose(-1,-2).shape, dotp.shape) + non_diagonal = dotp.view(-1)[1:].view(n_comp - 1, n_comp + 1)[..., :-1] + # print(vector_comps[idx].shape, vector_comps[idx].view(n_comp,n_size).transpose(-1,-2).shape, dotp.shape,non_diagonal.shape) + total = total + torch.mean(torch.abs(non_diagonal)) + return total + + def vector_comp_diffs(self): + + return self.vectorDiffs(self.line_coef[:, -self.density_n_comp :]) + self.vectorDiffs( + self.line_coef[:, : self.app_n_comp] + ) + + # @torch.no_grad() + def up_sampling_VM(self, plane_coef, line_coef, res_target): + + for i in range(len(self.vecMode)): + vec_id = self.vecMode[i] + mat_id_0, mat_id_1 = self.matMode[i] + + plane_coef[i] = torch.nn.Parameter( + F.interpolate( + plane_coef[i].data, + size=(res_target[mat_id_1], res_target[mat_id_0]), + mode="bilinear", + align_corners=True, + ) + ) + line_coef[i] = torch.nn.Parameter( + F.interpolate( + line_coef[i].data, + size=(res_target[vec_id], 1), + mode="bilinear", + align_corners=True, + ) + ) + + # plane_coef[0] = torch.nn.Parameter( + # F.interpolate(plane_coef[0].data, size=(res_target[1], res_target[0]), mode='bilinear', + # align_corners=True)) + # line_coef[0] = torch.nn.Parameter( + # F.interpolate(line_coef[0].data, size=(res_target[2], 1), mode='bilinear', align_corners=True)) + # plane_coef[1] = torch.nn.Parameter( + # F.interpolate(plane_coef[1].data, size=(res_target[2], res_target[0]), mode='bilinear', + # align_corners=True)) + # line_coef[1] = torch.nn.Parameter( + # F.interpolate(line_coef[1].data, size=(res_target[1], 1), mode='bilinear', align_corners=True)) + # plane_coef[2] = torch.nn.Parameter( + # F.interpolate(plane_coef[2].data, size=(res_target[2], res_target[1]), mode='bilinear', + # align_corners=True)) + # line_coef[2] = torch.nn.Parameter( + # F.interpolate(line_coef[2].data, size=(res_target[0], 1), mode='bilinear', align_corners=True)) + + return plane_coef, line_coef + + # @torch.no_grad() + def upsample_volume_grid(self, res_target): + # self.app_plane, self.app_line = self.up_sampling_VM(self.app_plane, self.app_line, res_target) + # self.density_plane, self.density_line = self.up_sampling_VM(self.density_plane, self.density_line, res_target) + + scale = res_target[0] / self.line_coef.shape[2] # assuming xyz have the same scale + plane_coef = F.interpolate( + self.plane_coef.data, + scale_factor=scale, + mode="bilinear", + align_corners=True, + ) + line_coef = F.interpolate( + self.line_coef.data, + size=(res_target[0], 1), + mode="bilinear", + align_corners=True, + ) + self.plane_coef, self.line_coef = torch.nn.Parameter(plane_coef), torch.nn.Parameter(line_coef) + self.compute_stepSize(res_target) + print(f"upsamping to {res_target}") + + +class TensorVMSplit(TensorBase): + def __init__(self, in_channels, out_channels, cfg, **kwargs): + super().__init__(in_channels, out_channels, cfg, **kwargs) + + if "MLP" in self.shadingMode: + self.opt_group = { + "color": [ + self.density_line, + self.density_plane, + self.app_line, + self.app_plane, + ], + "color_impl": [self.basis_mat], + } + else: + self.opt_group = { + "color": [ + self.density_line, + self.density_plane, + self.app_line, + self.app_plane, + self.basis_mat, + ], + } + + if isinstance(self.renderModule, torch.nn.Module): + if "color_impl" not in self.opt_group: + self.opt_group["color_impl"] = [self.renderModule] + else: + self.opt_group["color_impl"] += [self.renderModule] + + def init_svd_volume(self, res, device): + if self.fea2denseAct == "softplus": + self.density_plane, self.density_line = self.init_one_svd_density( + self.density_n_comp, self.gridSize, 0.1, device + ) + else: + self.density_plane, self.density_line = self.init_one_svd_density( + self.density_n_comp, self.gridSize, 1e-2, device + ) + self.app_plane, self.app_line = self.init_one_svd(self.app_n_comp, self.gridSize, 0.1, device) + self.basis_mat = torch.nn.Linear(sum(self.app_n_comp), self.app_dim, bias=False).to(device) + + def init_one_svd(self, n_component, gridSize, scale, device): + plane_coef, line_coef = [], [] + for i in range(len(self.vecMode)): + vec_id = self.vecMode[i] + mat_id_0, mat_id_1 = self.matMode[i] + + if self.cfg.shadingMode == "RGBIdentity": + plane_coef.append( + torch.nn.Parameter(scale * torch.randn((1, n_component[i], gridSize[mat_id_1], gridSize[mat_id_0]))) + ) # + line_coef.append(torch.nn.Parameter(scale * torch.randn((1, n_component[i], gridSize[vec_id], 1)))) + else: + plane_coef.append( + torch.nn.Parameter(scale * torch.randn((1, n_component[i], gridSize[mat_id_1], gridSize[mat_id_0]))) + ) # + line_coef.append(torch.nn.Parameter(scale * torch.randn((1, n_component[i], gridSize[vec_id], 1)))) + + return torch.nn.ParameterList(plane_coef).to(device), torch.nn.ParameterList(line_coef).to(device) + + def init_one_svd_density(self, n_component, gridSize, scale, device): + plane_coef, line_coef = [], [] + for i in range(len(self.vecMode)): + vec_id = self.vecMode[i] + mat_id_0, mat_id_1 = self.matMode[i] + + if self.fea2denseAct == "softplus": + plane_coef.append( + torch.nn.Parameter(scale * torch.randn((1, n_component[i], gridSize[mat_id_1], gridSize[mat_id_0]))) + ) # + line_coef.append(torch.nn.Parameter(scale * torch.randn((1, n_component[i], gridSize[vec_id], 1)))) + elif self.fea2denseAct == "relu": + plane_coef.append( + torch.nn.Parameter( + scale * torch.rand((1, n_component[i], gridSize[mat_id_1], gridSize[mat_id_0])).clamp(1e-2, 1e8) + ) + ) + line_coef.append( + torch.nn.Parameter(scale * torch.rand((1, n_component[i], gridSize[vec_id], 1)).clamp(1e-2, 1e8)) + ) + + return torch.nn.ParameterList(plane_coef).to(device), torch.nn.ParameterList(line_coef).to(device) + + def get_optparam_groups(self, lr_init_spatialxyz=0.02, lr_init_network=0.001): + grad_vars = [ + {"params": self.density_line, "lr": lr_init_spatialxyz}, + {"params": self.density_plane, "lr": lr_init_spatialxyz}, + {"params": self.app_line, "lr": lr_init_spatialxyz}, + {"params": self.app_plane, "lr": lr_init_spatialxyz}, + {"params": self.basis_mat.parameters(), "lr": lr_init_network}, + ] + if isinstance(self.renderModule, torch.nn.Module): + grad_vars += [{"params": self.renderModule.parameters(), "lr": lr_init_network}] + return grad_vars + + def vectorDiffs(self, vector_comps): + total = 0 + + for idx in range(len(vector_comps)): + n_comp, n_size = vector_comps[idx].shape[1:-1] + + dotp = torch.matmul( + vector_comps[idx].view(n_comp, n_size), + vector_comps[idx].view(n_comp, n_size).transpose(-1, -2), + ) + non_diagonal = dotp.view(-1)[1:].view(n_comp - 1, n_comp + 1)[..., :-1] + total = total + torch.mean(torch.abs(non_diagonal)) + return total + + def vector_comp_diffs(self): + return self.vectorDiffs(self.density_line) + self.vectorDiffs(self.app_line) + + def density_L1(self): + total = 0 + for idx in range(len(self.density_plane)): + if self.density_plane[idx].shape[1] == 0: + continue + + total = ( + total + torch.mean(torch.abs(self.density_plane[idx])) + torch.mean(torch.abs(self.density_line[idx])) + ) # + torch.mean(torch.abs(self.app_plane[idx])) + torch.mean(torch.abs(self.density_plane[idx])) + return total + + def TV_loss_density(self, reg): + total = 0 + for idx in range(len(self.density_plane)): + if self.density_plane[idx].shape[1] == 0: + continue + + total = total + reg(self.density_plane[idx]) * 1e-2 # + reg(self.density_line[idx]) * 1e-3 + return total + + def TV_loss_app(self, reg): + total = 0 + for idx in range(len(self.app_plane)): + if self.app_plane[idx].shape[1] == 0: + continue + + total = total + reg(self.app_plane[idx]) * 1e-2 # + reg(self.app_line[idx]) * 1e-3 + return total + + def compute_densityfeature(self, xyz_sampled): + + # plane + line basis + coordinate_plane = torch.stack( + ( + xyz_sampled[:, self.matMode[0]], + xyz_sampled[:, self.matMode[1]], + xyz_sampled[:, self.matMode[2]], + ) + ).view(3, -1, 1, 2) + coordinate_line = torch.stack( + ( + xyz_sampled[:, self.vecMode[0]], + xyz_sampled[:, self.vecMode[1]], + xyz_sampled[:, self.vecMode[2]], + ) + ) + coordinate_line = torch.stack((torch.zeros_like(coordinate_line), coordinate_line), dim=-1).view(3, -1, 1, 2) + + sigma_feature = torch.zeros((xyz_sampled.shape[0],), device=xyz_sampled.device) + for idx_plane in range(len(self.density_plane)): + if self.density_plane[idx_plane].shape[1] == 0: + continue + + plane_coef_point = F.grid_sample( + self.density_plane[idx_plane], + coordinate_plane[[idx_plane]], + align_corners=True, + ).view(-1, *xyz_sampled.shape[:1]) + line_coef_point = F.grid_sample( + self.density_line[idx_plane], + coordinate_line[[idx_plane]], + align_corners=True, + ).view(-1, *xyz_sampled.shape[:1]) + sigma_feature = sigma_feature + torch.sum(plane_coef_point * line_coef_point, dim=0) + # sigma_feature = sigma_feature + torch.mean(plane_coef_point * line_coef_point, dim=0) + + return sigma_feature + # return sigma_feature / len(self.density_plane) + + def compute_appfeature(self, xyz_sampled): + # return xyz_sampled.new_zeros(xyz_sampled.shape[0], self.app_dim) + + # plane + line basis + coordinate_plane = torch.stack( + ( + xyz_sampled[..., self.matMode[0]], + xyz_sampled[..., self.matMode[1]], + xyz_sampled[..., self.matMode[2]], + ) + ).view(3, -1, 1, 2) + coordinate_line = torch.stack( + ( + xyz_sampled[..., self.vecMode[0]], + xyz_sampled[..., self.vecMode[1]], + xyz_sampled[..., self.vecMode[2]], + ) + ) + coordinate_line = torch.stack((torch.zeros_like(coordinate_line), coordinate_line), dim=-1).view(3, -1, 1, 2) + + plane_coef_point, line_coef_point = [], [] + for idx_plane in range(len(self.app_plane)): + if self.app_plane[idx_plane].shape[1] == 0: + continue + + plane_coef_point.append( + F.grid_sample( + self.app_plane[idx_plane], + coordinate_plane[[idx_plane]], + align_corners=True, + ).view(-1, *xyz_sampled.shape[:1]) + ) + line_coef_point.append( + F.grid_sample( + self.app_line[idx_plane], + coordinate_line[[idx_plane]], + align_corners=True, + ).view(-1, *xyz_sampled.shape[:1]) + ) + plane_coef_point, line_coef_point = torch.cat(plane_coef_point), torch.cat(line_coef_point) + + return self.basis_mat((plane_coef_point * line_coef_point).T) + # return self.basis_mat((plane_coef_point * line_coef_point).T) / plane_coef_point.shape + + @torch.no_grad() + def up_sampling_VM(self, plane_coef, line_coef, res_target): + + for i in range(len(self.vecMode)): + vec_id = self.vecMode[i] + mat_id_0, mat_id_1 = self.matMode[i] + if plane_coef[i].shape[1] > 0: + plane_coef[i] = torch.nn.Parameter( + F.interpolate( + plane_coef[i].data, + size=(res_target[mat_id_1], res_target[mat_id_0]), + mode="bilinear", + align_corners=True, + ) + ) + if line_coef[i].shape[1] > 0: + line_coef[i] = torch.nn.Parameter( + F.interpolate( + line_coef[i].data, + size=(res_target[vec_id], 1), + mode="bilinear", + align_corners=True, + ) + ) + + return plane_coef, line_coef + + @torch.no_grad() + def upsample_volume_grid(self, res_target): + self.app_plane, self.app_line = self.up_sampling_VM(self.app_plane, self.app_line, res_target) + self.density_plane, self.density_line = self.up_sampling_VM(self.density_plane, self.density_line, res_target) + + self.update_stepSize(res_target) + print(f"upsamping to {res_target}") + + @torch.no_grad() + def shrink(self, new_aabb): + print("====> shrinking ...") + xyz_min, xyz_max = new_aabb + t_l, b_r = (xyz_min - self.aabb[0]) / self.units, (xyz_max - self.aabb[0]) / self.units + # print(new_aabb, self.aabb) + # print(t_l, b_r,self.alphaMask.alpha_volume.shape) + t_l, b_r = torch.round(torch.round(t_l)).long(), torch.round(b_r).long() + 1 + b_r = torch.stack([b_r, self.gridSize]).amin(0) + + for i in range(len(self.vecMode)): + mode0 = self.vecMode[i] + self.density_line[i] = torch.nn.Parameter(self.density_line[i].data[..., t_l[mode0] : b_r[mode0], :]) + self.app_line[i] = torch.nn.Parameter(self.app_line[i].data[..., t_l[mode0] : b_r[mode0], :]) + mode0, mode1 = self.matMode[i] + self.density_plane[i] = torch.nn.Parameter( + self.density_plane[i].data[..., t_l[mode1] : b_r[mode1], t_l[mode0] : b_r[mode0]] + ) + self.app_plane[i] = torch.nn.Parameter( + self.app_plane[i].data[..., t_l[mode1] : b_r[mode1], t_l[mode0] : b_r[mode0]] + ) + + if not torch.all(self.alphaMask.gridSize == self.gridSize): + t_l_r, b_r_r = t_l / (self.gridSize - 1), (b_r - 1) / (self.gridSize - 1) + correct_aabb = torch.zeros_like(new_aabb) + correct_aabb[0] = (1 - t_l_r) * self.aabb[0] + t_l_r * self.aabb[1] + correct_aabb[1] = (1 - b_r_r) * self.aabb[0] + b_r_r * self.aabb[1] + print("aabb", new_aabb, "\ncorrect aabb", correct_aabb) + new_aabb = correct_aabb + + newSize = b_r - t_l + self.register_buffer("aabb", new_aabb) + self.update_stepSize((newSize[0], newSize[1], newSize[2])) + + +class TensorCP(TensorBase): + def __init__(self, in_channels, out_channels, cfg, **kwargs): + super().__init__(in_channels, out_channels, cfg, **kwargs) + + self.opt_group = { + "color": [self.density_line, self.app_line], + "color_impl": [self.basis_mat], + } + + if isinstance(self.renderModule, torch.nn.Module): + self.opt_group["color_impl"] += [self.renderModule] + + def init_svd_volume(self, res, device): + self.density_line = self.init_one_svd(self.density_n_comp[0], self.gridSize, 0.2, device) + self.app_line = self.init_one_svd(self.app_n_comp[0], self.gridSize, 0.2, device) + self.basis_mat = torch.nn.Linear(self.app_n_comp[0], self.app_dim, bias=False).to(device) + + def init_one_svd(self, n_component, gridSize, scale, device): + line_coef = [] + for i in range(len(self.vecMode)): + vec_id = self.vecMode[i] + line_coef.append(torch.nn.Parameter(scale * torch.randn((1, n_component, gridSize[vec_id], 1)))) + return torch.nn.ParameterList(line_coef).to(device) + + def get_optparam_groups(self, lr_init_spatialxyz=0.02, lr_init_network=0.001): + grad_vars = [ + {"params": self.density_line, "lr": lr_init_spatialxyz}, + {"params": self.app_line, "lr": lr_init_spatialxyz}, + {"params": self.basis_mat.parameters(), "lr": lr_init_network}, + ] + if isinstance(self.renderModule, torch.nn.Module): + grad_vars += [{"params": self.renderModule.parameters(), "lr": lr_init_network}] + return grad_vars + + def compute_densityfeature(self, xyz_sampled): + + coordinate_line = torch.stack( + ( + xyz_sampled[..., self.vecMode[0]], + xyz_sampled[..., self.vecMode[1]], + xyz_sampled[..., self.vecMode[2]], + ) + ) + coordinate_line = torch.stack((torch.zeros_like(coordinate_line), coordinate_line), dim=-1).view(3, -1, 1, 2) + + line_coef_point = F.grid_sample(self.density_line[0], coordinate_line[[0]], align_corners=True).view( + -1, *xyz_sampled.shape[:1] + ) + line_coef_point = line_coef_point * F.grid_sample( + self.density_line[1], coordinate_line[[1]], align_corners=True + ).view(-1, *xyz_sampled.shape[:1]) + line_coef_point = line_coef_point * F.grid_sample( + self.density_line[2], coordinate_line[[2]], align_corners=True + ).view(-1, *xyz_sampled.shape[:1]) + sigma_feature = torch.sum(line_coef_point, dim=0) + + return sigma_feature + + def compute_appfeature(self, xyz_sampled): + + coordinate_line = torch.stack( + ( + xyz_sampled[..., self.vecMode[0]], + xyz_sampled[..., self.vecMode[1]], + xyz_sampled[..., self.vecMode[2]], + ) + ) + coordinate_line = torch.stack((torch.zeros_like(coordinate_line), coordinate_line), dim=-1).view(3, -1, 1, 2) + + line_coef_point = F.grid_sample(self.app_line[0], coordinate_line[[0]], align_corners=True).view( + -1, *xyz_sampled.shape[:1] + ) + line_coef_point = line_coef_point * F.grid_sample( + self.app_line[1], coordinate_line[[1]], align_corners=True + ).view(-1, *xyz_sampled.shape[:1]) + line_coef_point = line_coef_point * F.grid_sample( + self.app_line[2], coordinate_line[[2]], align_corners=True + ).view(-1, *xyz_sampled.shape[:1]) + + return self.basis_mat(line_coef_point.T) + + @torch.no_grad() + def up_sampling_Vector(self, density_line_coef, app_line_coef, res_target): + + for i in range(len(self.vecMode)): + vec_id = self.vecMode[i] + density_line_coef[i] = torch.nn.Parameter( + F.interpolate( + density_line_coef[i].data, + size=(res_target[vec_id], 1), + mode="bilinear", + align_corners=True, + ) + ) + app_line_coef[i] = torch.nn.Parameter( + F.interpolate( + app_line_coef[i].data, + size=(res_target[vec_id], 1), + mode="bilinear", + align_corners=True, + ) + ) + + return density_line_coef, app_line_coef + + @torch.no_grad() + def upsample_volume_grid(self, res_target): + self.density_line, self.app_line = self.up_sampling_Vector(self.density_line, self.app_line, res_target) + + self.update_stepSize(res_target) + print(f"upsamping to {res_target}") + + @torch.no_grad() + def shrink(self, new_aabb): + print("====> shrinking ...") + xyz_min, xyz_max = new_aabb + t_l, b_r = (xyz_min - self.aabb[0]) / self.units, (xyz_max - self.aabb[0]) / self.units + + t_l, b_r = torch.round(torch.round(t_l)).long(), torch.round(b_r).long() + 1 + b_r = torch.stack([b_r, self.gridSize]).amin(0) + + for i in range(len(self.vecMode)): + mode0 = self.vecMode[i] + self.density_line[i] = torch.nn.Parameter(self.density_line[i].data[..., t_l[mode0] : b_r[mode0], :]) + self.app_line[i] = torch.nn.Parameter(self.app_line[i].data[..., t_l[mode0] : b_r[mode0], :]) + + if not torch.all(self.alphaMask.gridSize == self.gridSize): + t_l_r, b_r_r = t_l / (self.gridSize - 1), (b_r - 1) / (self.gridSize - 1) + correct_aabb = torch.zeros_like(new_aabb) + correct_aabb[0] = (1 - t_l_r) * self.aabb[0] + t_l_r * self.aabb[1] + correct_aabb[1] = (1 - b_r_r) * self.aabb[0] + b_r_r * self.aabb[1] + print("aabb", new_aabb, "\ncorrect aabb", correct_aabb) + new_aabb = correct_aabb + + newSize = b_r - t_l + self.register_buffer("aabb", new_aabb) + self.update_stepSize((newSize[0], newSize[1], newSize[2])) + + def density_L1(self): + total = 0 + for idx in range(len(self.density_line)): + total = total + torch.mean(torch.abs(self.density_line[idx])) + return total + + def TV_loss_density(self, reg): + total = 0 + for idx in range(len(self.density_line)): + total = total + reg(self.density_line[idx]) * 1e-3 + return total + + def TV_loss_app(self, reg): + total = 0 + + for idx in range(len(self.app_line)): + total = total + reg(self.app_line[idx]) * 1e-3 + + return total + + +tensorf_base_dict = { + "tensor_vm": TensorVM, + "tensor_vm_split": TensorVMSplit, +} diff --git a/nlf/nets/tensorf_density.py b/nlf/nets/tensorf_density.py new file mode 100644 index 0000000..a847d95 --- /dev/null +++ b/nlf/nets/tensorf_density.py @@ -0,0 +1,1318 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2022 Anpei Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +import time +from typing import Dict + +import numpy as np +import torch +import torch.nn +import torch.nn.functional as F +from torch import autograd + +from utils.sh_utils import eval_sh_bases +from utils.tensorf_utils import ( + DensityFourierRender, + DensityLinearRender, + DensityRender, + N_to_reso, + RGBIdentityRender, + RGBRender, + RGBtFourierRender, + RGBtLinearRender, + SHRender, + alpha2weights, + cal_n_samples, + positional_encoding, + raw2alpha, +) + + +class AlphaGridMask(torch.nn.Module): + def __init__(self, device, aabb, alpha_volume): + super().__init__() + self.opt_group = "color" + + self.device = device + + self.aabb = aabb.to(self.device) + self.aabbSize = self.aabb[1] - self.aabb[0] + self.invgridSize = 1.0 / self.aabbSize * 2 + self.alpha_volume = alpha_volume.view(1, 1, *alpha_volume.shape[-3:]) + self.gridSize = torch.LongTensor([alpha_volume.shape[-1], alpha_volume.shape[-2], alpha_volume.shape[-3]]).to( + self.device + ) + + def sample_alpha(self, xyz_sampled): + xyz_sampled = self.normalize_coord(xyz_sampled) + alpha_vals = F.grid_sample(self.alpha_volume, xyz_sampled.view(1, -1, 1, 1, 3), align_corners=True).view(-1) + + return alpha_vals + + def normalize_coord(self, xyz_sampled): + return (xyz_sampled - self.aabb[0]) * self.invgridSize - 1 + + +class MLPRender_Fea(torch.nn.Module): + def __init__(self, inChanel, viewpe=6, feape=6, featureC=128): + super().__init__() + self.opt_group = "color_impl" + + self.in_mlpC = 2 * viewpe * 3 + 2 * feape * inChanel + 3 + inChanel + self.viewpe = viewpe + self.feape = feape + layer1 = torch.nn.Linear(self.in_mlpC, featureC) + layer2 = torch.nn.Linear(featureC, featureC) + layer3 = torch.nn.Linear(featureC, 3) + + self.mlp = torch.nn.Sequential( + layer1, + torch.nn.ReLU(inplace=True), + layer2, + torch.nn.ReLU(inplace=True), + layer3, + ) + torch.nn.init.constant_(self.mlp[-1].bias, 0) + + def forward(self, pts, viewdirs, features, kwargs): + indata = [features, viewdirs] + if self.feape > 0: + indata += [positional_encoding(features, self.feape)] + if self.viewpe > 0: + indata += [positional_encoding(viewdirs, self.viewpe)] + mlp_in = torch.cat(indata, dim=-1) + rgb = self.mlp(mlp_in) + rgb = torch.sigmoid(rgb) + + return rgb + + +class MLPRender_PE(torch.nn.Module): + def __init__(self, inChanel, viewpe=6, pospe=6, featureC=128): + super().__init__() + self.opt_group = "color_impl" + + self.in_mlpC = (3 + 2 * viewpe * 3) + (3 + 2 * pospe * 3) + inChanel # + self.viewpe = viewpe + self.pospe = pospe + layer1 = torch.nn.Linear(self.in_mlpC, featureC) + layer2 = torch.nn.Linear(featureC, featureC) + layer3 = torch.nn.Linear(featureC, 3) + + self.mlp = torch.nn.Sequential( + layer1, + torch.nn.ReLU(inplace=True), + layer2, + torch.nn.ReLU(inplace=True), + layer3, + ) + torch.nn.init.constant_(self.mlp[-1].bias, 0) + + def forward(self, pts, viewdirs, features, kwargs): + indata = [features, viewdirs] + if self.pospe > 0: + indata += [positional_encoding(pts, self.pospe)] + if self.viewpe > 0: + indata += [positional_encoding(viewdirs, self.viewpe)] + mlp_in = torch.cat(indata, dim=-1) + rgb = self.mlp(mlp_in) + rgb = torch.sigmoid(rgb) + + return rgb + + +class MLPRender(torch.nn.Module): + def __init__(self, inChanel, viewpe=6, featureC=128): + super().__init__() + self.opt_group = "color_impl" + + self.in_mlpC = (3 + 2 * viewpe * 3) + inChanel + self.viewpe = viewpe + + layer1 = torch.nn.Linear(self.in_mlpC, featureC) + layer2 = torch.nn.Linear(featureC, featureC) + layer3 = torch.nn.Linear(featureC, 3) + + self.mlp = torch.nn.Sequential( + layer1, + torch.nn.ReLU(inplace=True), + layer2, + torch.nn.ReLU(inplace=True), + layer3, + ) + torch.nn.init.constant_(self.mlp[-1].bias, 0) + + def forward(self, pts, viewdirs, features, kwargs): + indata = [features, viewdirs] + if self.viewpe > 0: + indata += [positional_encoding(viewdirs, self.viewpe)] + mlp_in = torch.cat(indata, dim=-1) + rgb = self.mlp(mlp_in) + rgb = torch.sigmoid(rgb) + + return rgb + + +class TensorBase(torch.nn.Module): + def __init__(self, in_channels, out_channels, cfg, **kwargs): + super(TensorBase, self).__init__() + + self.cfg = cfg + self.device = "cuda" + + self.white_bg = cfg.white_bg if "white_bg" in cfg else 0 + self.black_bg = cfg.black_bg if "black_bg" in cfg else 0 + self.ndc_ray = cfg.ndc_ray if "ndc_ray" in cfg else 0 + + self.register_buffer("aabb", torch.tensor(cfg.aabb).to(self.device)) + + if "grid_size" in cfg: + self.use_grid_size_upsample = True + self.register_buffer("gridSize", torch.tensor(list(cfg.grid_size.start))) + self.gridSizeStart = list(cfg.grid_size.start) + self.gridSizeEnd = list(cfg.grid_size.end) + else: + self.use_grid_size_upsample = False + self.gridSize = N_to_reso(cfg.N_voxel_init, self.aabb) + + if "grid_size_alpha" in cfg: + self.register_buffer("gridSizeAlpha", torch.tensor(list(cfg.grid_size_alpha.start))) + self.gridSizeAlphaStart = list(cfg.grid_size_alpha.start) + self.gridSizeAlphaEnd = list(cfg.grid_size_alpha.end) + else: + self.gridSizeAlphaStart = self.gridSizeStart + self.gridSizeAlphaEnd = self.gridSizeEnd + self.gridSizeAlpha = self.gridSize + + self.max_n_samples = cfg.nSamples if "nSamples" in cfg else 32 + self.step_ratio = cfg.step_ratio if "step_ratio" in cfg else 0.5 + self.nSamples = min(self.max_n_samples, cal_n_samples(self.gridSize, self.step_ratio)) + + self.update_AlphaMask_list = cfg.update_AlphaMask_list + self.upsamp_list = cfg.upsamp_list + + if self.use_grid_size_upsample: + # Color + self.N_voxel_list = [] + + for i in range(3): + cur_voxel_list = ( + torch.round( + torch.exp( + torch.linspace( + np.log(self.gridSizeStart[i]), + np.log(self.gridSizeEnd[i]), + len(self.upsamp_list) + 1, + ) + ) + ).long() + ).tolist()[1:] + self.N_voxel_list.append(cur_voxel_list) + + # Alpha + self.N_voxel_list_alpha = [] + + for i in range(3): + cur_voxel_list = ( + torch.round( + torch.exp( + torch.linspace( + np.log(self.gridSizeAlphaStart[i]), + np.log(self.gridSizeAlphaEnd[i]), + len(self.upsamp_list) + 1, + ) + ) + ).long() + ).tolist()[1:] + self.N_voxel_list_alpha.append(cur_voxel_list) + else: + self.N_voxel_list = ( + torch.round( + torch.exp( + torch.linspace( + np.log(cfg.N_voxel_init), + np.log(cfg.N_voxel_final), + len(self.upsamp_list) + 1, + ) + ) + ).long() + ).tolist()[1:] + + self.density_n_comp = cfg.n_lamb_sigma if "n_lamb_sigma" in cfg else 8 + self.app_n_comp = cfg.n_lamb_sh if "n_lamb_sh" in cfg else 24 + self.app_dim = cfg.data_dim_color if "data_dim_color" in cfg else 27 + self.alphaMask = cfg.alphaMask if "alphaMask" in cfg else None + + self.density_shift = cfg.density_shift if "density_shift" in cfg else -10.0 + self.alphaMask_thres = cfg.alpha_mask_thre if "alpha_mask_thre" in cfg else 0.001 + self.rayMarch_weight_thres = cfg.rm_weight_mask_thre if "rm_weight_mask_thre" in cfg else 0.0001 + self.distance_scale = cfg.distance_scale if "distance_scale" in cfg else 25 + self.fea2denseAct = cfg.fea2denseAct if "fea2denseAct" in cfg else "softplus" + + self.near_far = cfg.near_far if "near_far" in cfg else [2.0, 6.0] + + ### Filtering parameters + if "filter" not in cfg: + cfg.filter = {} + self.apply_filter_weights = False + else: + self.apply_filter_weights = True + + self.filter_weight_thresh = getattr(cfg.filter, "weight_thresh", 1e-3) + self.filter_max_samples = getattr(cfg.filter, "max_samples", 32) + self.filter_wait_iters = getattr(cfg.filter, "wait_iters", 12000) + ### Filtering parameters + + self.update_stepSize(self.gridSize, self.gridSizeAlpha) + + self.matMode = [[0, 1], [0, 2], [1, 2]] + self.vecMode = [2, 1, 0] + self.comp_w = [1, 1, 1] + + self.init_svd_volume(self.gridSize[0], self.device) + + self.shadingMode = cfg.shadingMode if "shadingMode" in cfg else "MLP_PE" + self.pos_pe = cfg.pos_pe if "pos_pe" in cfg else 6 + self.view_pe = cfg.view_pe if "view_pe" in cfg else 6 + self.fea_pe = cfg.fea_pe if "fea_pe" in cfg else 6 + self.featureC = cfg.featureC if "featureC" in cfg else 128 + + self.init_render_func( + self.shadingMode, + self.pos_pe, + self.view_pe, + self.fea_pe, + self.featureC, + self.device, + ) + + def init_render_func(self, shadingMode, pos_pe, view_pe, fea_pe, featureC, device): + if shadingMode == "MLP_PE": + self.renderModule = MLPRender_PE(self.app_dim, view_pe, pos_pe, featureC).to(device) + elif shadingMode == "MLP_Fea": + self.renderModule = MLPRender_Fea(self.app_dim, view_pe, fea_pe, featureC).to(device) + elif shadingMode == "MLP": + self.renderModule = MLPRender(self.app_dim, view_pe, featureC).to(device) + elif shadingMode == "SH": + self.renderModule = SHRender + elif shadingMode == "RGB": + assert self.app_dim == 3 + self.renderModule = RGBRender + elif shadingMode == "RGBIdentity": + assert self.app_dim == 3 + self.renderModule = RGBIdentityRender + elif shadingMode == "RGBtLinear": + self.renderModule = RGBtLinearRender + elif shadingMode == "RGBtFourier": + self.renderModule = RGBtFourierRender + else: + print("Unrecognized shading module") + exit() + print("pos_pe", pos_pe, "view_pe", view_pe, "fea_pe", fea_pe) + print(self.renderModule) + + def update_stepSize(self, gridSize, gridSizeAlpha): + print("aabb", self.aabb.view(-1)) + print("grid size", gridSize) + self.aabbSize = self.aabb[1] - self.aabb[0] + self.invaabbSize = 2.0 / self.aabbSize + self.gridSize = torch.LongTensor(gridSize).to(self.device) + self.units = self.aabbSize / (self.gridSize - 1) + self.gridSizeAlpha = torch.LongTensor(gridSizeAlpha).to(self.device) + self.unitsAlpha = self.aabbSize / (self.gridSizeAlpha - 1) + self.stepSize = torch.mean(self.units) * self.step_ratio + self.aabbDiag = torch.sqrt(torch.sum(torch.square(self.aabbSize))) + self.nSamples = min(self.max_n_samples, int((self.aabbDiag / self.stepSize).item()) + 1) + print("sampling step size: ", self.stepSize) + print("sampling number: ", self.nSamples) + + def init_svd_volume(self, res, device): + pass + + def compute_features(self, xyz_sampled): + pass + + def compute_densityfeature(self, xyz_sampled): + pass + + def compute_appfeature(self, xyz_sampled): + pass + + def normalize_coord(self, xyz_sampled): + return (xyz_sampled - self.aabb[0]) * self.invaabbSize - 1 + + def get_optparam_groups(self, lr_init_spatial=0.02, lr_init_network=0.001): + pass + + def get_kwargs(self): + return { + "aabb": self.aabb, + "gridSize": self.gridSize.tolist(), + "density_n_comp": self.density_n_comp, + "appearance_n_comp": self.app_n_comp, + "app_dim": self.app_dim, + "density_shift": self.density_shift, + "alphaMask_thres": self.alphaMask_thres, + "distance_scale": self.distance_scale, + "rayMarch_weight_thres": self.rayMarch_weight_thres, + "fea2denseAct": self.fea2denseAct, + "near_far": self.near_far, + "step_ratio": self.step_ratio, + "shadingMode": self.shadingMode, + "pos_pe": self.pos_pe, + "view_pe": self.view_pe, + "fea_pe": self.fea_pe, + "featureC": self.featureC, + } + + def sample_ray_ndc(self, rays_o, rays_d, N_samples=-1): + N_samples = N_samples if N_samples > 0 else self.nSamples + near, far = self.near_far + interpx = torch.linspace(near, far, N_samples).unsqueeze(0).to(rays_o) + + if self.training: + interpx += torch.rand_like(interpx).to(rays_o) * ((far - near) / N_samples) + + rays_pts = rays_o[..., None, :] + rays_d[..., None, :] * interpx[..., None] + mask_outbbox = ((self.aabb[0] > rays_pts) | (rays_pts > self.aabb[1])).any(dim=-1) + return rays_pts, interpx, ~mask_outbbox + + def valid_mask(self, rays_pts): + mask_outbbox = ((self.aabb[0] > rays_pts) | (rays_pts > self.aabb[1])).any(dim=-1) + return ~mask_outbbox + + def sample_ray(self, rays_o, rays_d, N_samples=-1): + N_samples = N_samples if N_samples > 0 else self.nSamples + stepsize = self.stepSize + near, far = self.near_far + vec = torch.where(rays_d == 0, torch.full_like(rays_d, 1e-6), rays_d) + rate_a = (self.aabb[1] - rays_o) / vec + rate_b = (self.aabb[0] - rays_o) / vec + t_min = torch.minimum(rate_a, rate_b).amax(-1).clamp(min=near, max=far) + + rng = torch.arange(N_samples)[None].float() + if self.training: + rng = rng.repeat(rays_d.shape[-2], 1) + rng += torch.rand_like(rng[:, [0]]) + step = stepsize * rng.to(rays_o.device) + interpx = t_min[..., None] + step + + rays_pts = rays_o[..., None, :] + rays_d[..., None, :] * interpx[..., None] + mask_outbbox = ((self.aabb[0] > rays_pts) | (rays_pts > self.aabb[1])).any(dim=-1) + + return rays_pts, interpx, ~mask_outbbox + + def shrink(self, new_aabb, voxel_size): + pass + + @torch.no_grad() + def getDenseAlpha(self, gridSize=None): + samples = torch.stack( + torch.meshgrid( + torch.linspace(0, 1, gridSize[0]), + torch.linspace(0, 1, gridSize[1]), + torch.linspace(0, 1, gridSize[2]), + ), + -1, + ).to(self.device) + dense_xyz = self.aabb[0] * (1 - samples) + self.aabb[1] * samples + + # dense_xyz = dense_xyz + # print(self.stepSize, self.distance_scale*self.aabbDiag) + alpha = torch.zeros_like(dense_xyz[..., 0]) + for i in range(gridSize[0]): + alpha[i] = self.compute_alpha(dense_xyz[i].view(-1, 3), 0.01).view((gridSize[1], gridSize[2])) + return alpha, dense_xyz + + @torch.no_grad() + def updateAlphaMask(self, gridSize=(200, 200, 200)): + + alpha, dense_xyz = self.getDenseAlpha(gridSize) + dense_xyz = dense_xyz.transpose(0, 2).contiguous() + alpha = alpha.clamp(0, 1).transpose(0, 2).contiguous()[None, None] + total_voxels = gridSize[0] * gridSize[1] * gridSize[2] + + ks = 3 + alpha = F.max_pool3d(alpha, kernel_size=ks, padding=ks // 2, stride=1).view(gridSize[::-1]) + alpha[alpha >= self.alphaMask_thres] = 1 + alpha[alpha < self.alphaMask_thres] = 0 + + self.alphaMask = AlphaGridMask(self.device, self.aabb, alpha) + + valid_xyz = dense_xyz[alpha > 0.5] + xyz_min = valid_xyz.amin(0) + xyz_max = valid_xyz.amax(0) + + new_aabb = torch.stack((xyz_min, xyz_max)) + + total = torch.sum(alpha) + print(f"bbox: {xyz_min, xyz_max} alpha rest %%%f" % (total / total_voxels * 100)) + return new_aabb + + @torch.no_grad() + def filtering_rays(self, all_rays, all_rgbs, N_samples=256, chunk=10240 * 5, bbox_only=False): + print("========> filtering rays ...") + tt = time.time() + + N = torch.tensor(all_rays.shape[:-1]).prod() + + mask_filtered = [] + idx_chunks = torch.split(torch.arange(N), chunk) + for idx_chunk in idx_chunks: + rays_chunk = all_rays[idx_chunk].to(self.device) + + rays_o, rays_d = rays_chunk[..., :3], rays_chunk[..., 3:6] + if bbox_only: + vec = torch.where(rays_d == 0, torch.full_like(rays_d, 1e-6), rays_d) + rate_a = (self.aabb[1] - rays_o) / vec + rate_b = (self.aabb[0] - rays_o) / vec + t_min = torch.minimum(rate_a, rate_b).amax(-1) # .clamp(min=near, max=far) + t_max = torch.maximum(rate_a, rate_b).amin(-1) # .clamp(min=near, max=far) + mask_inbbox = t_max > t_min + + else: + xyz_sampled, _, _ = self.sample_ray(rays_o, rays_d, N_samples=N_samples) + mask_inbbox = (self.alphaMask.sample_alpha(xyz_sampled).view(xyz_sampled.shape[:-1]) > 0).any(-1) + + mask_filtered.append(mask_inbbox.cpu()) + + mask_filtered = torch.cat(mask_filtered).view(all_rgbs.shape[:-1]) + + print(f"Ray filtering done! takes {time.time()-tt} s. ray mask ratio: {torch.sum(mask_filtered) / N}") + return all_rays[mask_filtered], all_rgbs[mask_filtered] + + def feature2density(self, density_features, **kwargs): + if "weights" in kwargs: + density_features = density_features * kwargs["weights"].view(*density_features.shape) + + if self.fea2denseAct == "softplus": + return F.softplus(density_features + self.density_shift) + elif self.fea2denseAct == "relu": + return F.relu(density_features) + elif self.fea2denseAct == "relu_abs": + return F.relu(torch.abs(density_features)) + + def compute_alpha(self, xyz_locs, length=0.01): + + if self.alphaMask is not None: + alphas = self.alphaMask.sample_alpha(xyz_locs) + alpha_mask = alphas > 0 + else: + alpha_mask = torch.ones_like(xyz_locs[:, 0], dtype=bool) + + sigma = torch.zeros(xyz_locs.shape[:-1], device=xyz_locs.device) + + if alpha_mask.any(): + xyz_sampled = self.normalize_coord(xyz_locs[alpha_mask]) + sigma_feature = self.compute_densityfeature(xyz_sampled) + valid_sigma = self.feature2density(sigma_feature) + sigma[alpha_mask] = valid_sigma + + alpha = 1 - torch.exp(-sigma * length).view(xyz_locs.shape[:-1]) + + return alpha + + def set_iter(self, iteration): + self.cur_iter = iteration + + if not self.training: + return + + self.needs_opt_reset = False + + # Pruning + if iteration in self.update_AlphaMask_list: + # Update BBOX + reso_mask = tuple(self.gridSizeAlpha) + + if reso_mask[0] > 200: + reso_mask = (200, 200, 200) + + new_aabb = self.updateAlphaMask(reso_mask) + + # Update regularization weights + if iteration == self.update_AlphaMask_list[0]: + self.shrink(new_aabb) + + # Upsampling + if iteration in self.upsamp_list: + print("Before:", self.N_voxel_list, iteration) + + reso_cur = [] + for i in range(3): + reso_cur.append(self.N_voxel_list[i].pop(0)) + + reso_cur_alpha = [] + for i in range(3): + reso_cur_alpha.append(self.N_voxel_list_alpha[i].pop(0)) + + self.nSamples = min(self.max_n_samples, cal_n_samples(reso_cur, self.step_ratio)) + self.upsample_volume_grid(reso_cur, reso_cur_alpha) + + if self.cfg.lr_upsample_reset: + self.needs_opt_reset = True + + def forward(self, rays_chunk): + # Sample points + viewdirs = rays_chunk[:, 3:6] + + if self.ndc_ray: + xyz_sampled, z_vals, ray_valid = self.sample_ray_ndc( + rays_chunk[:, :3], rays_chunk[:, 3:6], N_samples=self.nSamples + ) + dists = torch.cat( + (z_vals[:, 1:] - z_vals[:, :-1], torch.zeros_like(z_vals[:, :1])), + dim=-1, + ) + dists = dists * torch.norm(rays_chunk[:, 3:6], dim=-1, keepdim=True) + viewdirs = viewdirs / torch.norm(viewdirs, dim=-1, keepdim=True) + else: + xyz_sampled, z_vals, ray_valid = self.sample_ray( + rays_chunk[:, :3], rays_chunk[:, 3:6], N_samples=self.nSamples + ) + dists = torch.cat( + (z_vals[:, 1:] - z_vals[:, :-1], torch.zeros_like(z_vals[:, :1])), + dim=-1, + ) + + viewdirs = viewdirs.view(-1, 1, 3).expand(xyz_sampled.shape) + + if self.alphaMask is not None: + alphas = self.alphaMask.sample_alpha(xyz_sampled[ray_valid]) + alpha_mask = alphas > 0 + ray_invalid = ~ray_valid + ray_invalid[ray_valid] |= ~alpha_mask + ray_valid = ~ray_invalid + + sigma = torch.zeros(xyz_sampled.shape[:-1], device=xyz_sampled.device) + rgb = torch.zeros((*xyz_sampled.shape[:2], 3), device=xyz_sampled.device) + + if ray_valid.any(): + xyz_sampled = self.normalize_coord(xyz_sampled) + sigma_feature = self.compute_densityfeature(xyz_sampled[ray_valid]) + + valid_sigma = self.feature2density(sigma_feature) + sigma[ray_valid] = valid_sigma + + alpha, weight, bg_weight = raw2alpha(sigma, dists * self.distance_scale) + app_mask = weight > self.rayMarch_weight_thres + + if app_mask.any(): + app_features = self.compute_appfeature(xyz_sampled[app_mask]) + valid_rgbs = self.renderModule(xyz_sampled[app_mask], viewdirs[app_mask], app_features, {}) + rgb[app_mask] = valid_rgbs + + acc_map = torch.sum(weight, -1) + rgb_map = torch.sum(weight[..., None] * rgb, -2) + + if self.white_bg or (self.training and torch.rand((1,)) < 0.5): + rgb_map = rgb_map + (1.0 - acc_map[..., None]) + + rgb_map = rgb_map.clamp(0, 1) + + with torch.no_grad(): + depth_map = torch.sum(weight * z_vals, -1) + depth_map = depth_map + (1.0 - acc_map) * rays_chunk[..., -1] + + # return rgb_map, depth_map # rgb, sigma, alpha, weight, bg_weight + return rgb_map + + +class TensorVM(TensorBase): + def __init__(self, in_channels, out_channels, cfg, **kwargs): + super().__init__(in_channels, out_channels, cfg, **kwargs) + + self.opt_group = { + "color": [self.line_coef, self.plane_coef], + "color_impl": [self.basis_mat], + } + + if isinstance(self.renderModule, torch.nn.Module): + self.opt_group["color_impl"] += [self.renderModule] + + def init_svd_volume(self, res, device): + self.plane_coef = torch.nn.Parameter( + 0.1 * torch.randn((3, self.app_n_comp + self.density_n_comp, res, res), device=device) + ) + self.line_coef = torch.nn.Parameter( + 0.1 * torch.randn((3, self.app_n_comp + self.density_n_comp, res, 1), device=device) + ) + self.basis_mat = torch.nn.Linear(self.app_n_comp * 3, self.app_dim, bias=False, device=device) + + def get_optparam_groups(self, lr_init_spatialxyz=0.02, lr_init_network=0.001): + grad_vars = [ + {"params": self.line_coef, "lr": lr_init_spatialxyz}, + {"params": self.plane_coef, "lr": lr_init_spatialxyz}, + {"params": self.basis_mat.parameters(), "lr": lr_init_network}, + ] + if isinstance(self.renderModule, torch.nn.Module): + grad_vars += [{"params": self.renderModule.parameters(), "lr": lr_init_network}] + return grad_vars + + def compute_features(self, xyz_sampled): + + coordinate_plane = torch.stack( + ( + xyz_sampled[..., self.matMode[0]], + xyz_sampled[..., self.matMode[1]], + xyz_sampled[..., self.matMode[2]], + ) + ) + coordinate_line = torch.stack( + ( + xyz_sampled[..., self.vecMode[0]], + xyz_sampled[..., self.vecMode[1]], + xyz_sampled[..., self.vecMode[2]], + ) + ) + coordinate_line = torch.stack((torch.zeros_like(coordinate_line), coordinate_line), dim=-1) + + plane_feats = F.grid_sample( + self.plane_coef[:, -self.density_n_comp :], + coordinate_plane, + align_corners=True, + ).view(-1, *xyz_sampled.shape[:1]) + line_feats = F.grid_sample( + self.line_coef[:, -self.density_n_comp :], + coordinate_line, + align_corners=True, + ).view(-1, *xyz_sampled.shape[:1]) + + sigma_feature = torch.sum(plane_feats * line_feats, dim=0) + + plane_feats = F.grid_sample(self.plane_coef[:, : self.app_n_comp], coordinate_plane, align_corners=True).view( + 3 * self.app_n_comp, -1 + ) + line_feats = F.grid_sample(self.line_coef[:, : self.app_n_comp], coordinate_line, align_corners=True).view( + 3 * self.app_n_comp, -1 + ) + + app_features = self.basis_mat((plane_feats * line_feats).T) + + return sigma_feature, app_features + + def compute_densityfeature(self, xyz_sampled): + coordinate_plane = torch.stack( + ( + xyz_sampled[..., self.matMode[0]], + xyz_sampled[..., self.matMode[1]], + xyz_sampled[..., self.matMode[2]], + ) + ).view(3, -1, 1, 2) + coordinate_line = torch.stack( + ( + xyz_sampled[..., self.vecMode[0]], + xyz_sampled[..., self.vecMode[1]], + xyz_sampled[..., self.vecMode[2]], + ) + ) + coordinate_line = torch.stack((torch.zeros_like(coordinate_line), coordinate_line), dim=-1).view(3, -1, 1, 2) + + plane_feats = F.grid_sample( + self.plane_coef[:, -self.density_n_comp :], + coordinate_plane, + align_corners=True, + ).view(-1, *xyz_sampled.shape[:1]) + line_feats = F.grid_sample( + self.line_coef[:, -self.density_n_comp :], + coordinate_line, + align_corners=True, + ).view(-1, *xyz_sampled.shape[:1]) + + sigma_feature = torch.sum(plane_feats * line_feats, dim=0) + + return sigma_feature + + def compute_appfeature(self, xyz_sampled): + coordinate_plane = torch.stack( + ( + xyz_sampled[..., self.matMode[0]], + xyz_sampled[..., self.matMode[1]], + xyz_sampled[..., self.matMode[2]], + ) + ).view(3, -1, 1, 2) + coordinate_line = torch.stack( + ( + xyz_sampled[..., self.vecMode[0]], + xyz_sampled[..., self.vecMode[1]], + xyz_sampled[..., self.vecMode[2]], + ) + ) + coordinate_line = torch.stack((torch.zeros_like(coordinate_line), coordinate_line), dim=-1).view(3, -1, 1, 2) + + plane_feats = F.grid_sample(self.plane_coef[:, : self.app_n_comp], coordinate_plane, align_corners=True).view( + 3 * self.app_n_comp, -1 + ) + line_feats = F.grid_sample(self.line_coef[:, : self.app_n_comp], coordinate_line, align_corners=True).view( + 3 * self.app_n_comp, -1 + ) + + app_features = self.basis_mat((plane_feats * line_feats).T) + + return app_features + + def vectorDiffs(self, vector_comps): + total = 0 + + for idx in range(len(vector_comps)): + # print(self.line_coef.shape, vector_comps[idx].shape) + n_comp, n_size = vector_comps[idx].shape[:-1] + + dotp = torch.matmul( + vector_comps[idx].view(n_comp, n_size), + vector_comps[idx].view(n_comp, n_size).transpose(-1, -2), + ) + # print(vector_comps[idx].shape, vector_comps[idx].view(n_comp,n_size).transpose(-1,-2).shape, dotp.shape) + non_diagonal = dotp.view(-1)[1:].view(n_comp - 1, n_comp + 1)[..., :-1] + # print(vector_comps[idx].shape, vector_comps[idx].view(n_comp,n_size).transpose(-1,-2).shape, dotp.shape,non_diagonal.shape) + total = total + torch.mean(torch.abs(non_diagonal)) + return total + + def vector_comp_diffs(self): + + return self.vectorDiffs(self.line_coef[:, -self.density_n_comp :]) + self.vectorDiffs( + self.line_coef[:, : self.app_n_comp] + ) + + # @torch.no_grad() + def up_sampling_VM(self, plane_coef, line_coef, res_target): + + for i in range(len(self.vecMode)): + vec_id = self.vecMode[i] + mat_id_0, mat_id_1 = self.matMode[i] + + plane_coef[i] = torch.nn.Parameter( + F.interpolate( + plane_coef[i].data, + size=(res_target[mat_id_1], res_target[mat_id_0]), + mode="bilinear", + align_corners=True, + ) + ) + line_coef[i] = torch.nn.Parameter( + F.interpolate( + line_coef[i].data, + size=(res_target[vec_id], 1), + mode="bilinear", + align_corners=True, + ) + ) + + # plane_coef[0] = torch.nn.Parameter( + # F.interpolate(plane_coef[0].data, size=(res_target[1], res_target[0]), mode='bilinear', + # align_corners=True)) + # line_coef[0] = torch.nn.Parameter( + # F.interpolate(line_coef[0].data, size=(res_target[2], 1), mode='bilinear', align_corners=True)) + # plane_coef[1] = torch.nn.Parameter( + # F.interpolate(plane_coef[1].data, size=(res_target[2], res_target[0]), mode='bilinear', + # align_corners=True)) + # line_coef[1] = torch.nn.Parameter( + # F.interpolate(line_coef[1].data, size=(res_target[1], 1), mode='bilinear', align_corners=True)) + # plane_coef[2] = torch.nn.Parameter( + # F.interpolate(plane_coef[2].data, size=(res_target[2], res_target[1]), mode='bilinear', + # align_corners=True)) + # line_coef[2] = torch.nn.Parameter( + # F.interpolate(line_coef[2].data, size=(res_target[0], 1), mode='bilinear', align_corners=True)) + + return plane_coef, line_coef + + # @torch.no_grad() + def upsample_volume_grid(self, res_target, res_target_alpha): + # self.app_plane, self.app_line = self.up_sampling_VM(self.app_plane, self.app_line, res_target) + # self.density_plane, self.density_line = self.up_sampling_VM(self.density_plane, self.density_line, res_target) + + scale = res_target[0] / self.line_coef.shape[2] # assuming xyz have the same scale + plane_coef = F.interpolate( + self.plane_coef.data, + scale_factor=scale, + mode="bilinear", + align_corners=True, + ) + line_coef = F.interpolate( + self.line_coef.data, + size=(res_target[0], 1), + mode="bilinear", + align_corners=True, + ) + self.plane_coef, self.line_coef = torch.nn.Parameter(plane_coef), torch.nn.Parameter(line_coef) + self.compute_stepSize(res_target) + print(f"upsamping to {res_target}") + + +class TensorVMSplit(TensorBase): + def __init__(self, in_channels, out_channels, cfg, **kwargs): + super().__init__(in_channels, out_channels, cfg, **kwargs) + + if "MLP" in self.shadingMode: + self.opt_group = { + "color": [ + self.density_line, + self.density_plane, + self.app_line, + self.app_plane, + ], + "color_impl": [self.basis_mat], + } + else: + self.opt_group = { + "color": [ + self.density_line, + self.density_plane, + self.app_line, + self.app_plane, + self.basis_mat, + ], + } + + if isinstance(self.renderModule, torch.nn.Module): + if "color_impl" not in self.opt_group: + self.opt_group["color_impl"] = [self.renderModule] + else: + self.opt_group["color_impl"] += [self.renderModule] + + def init_svd_volume(self, res, device): + if self.fea2denseAct == "softplus": + self.density_plane, self.density_line = self.init_one_svd_density( + self.density_n_comp, self.gridSizeAlpha, 0.1, device + ) + else: + self.density_plane, self.density_line = self.init_one_svd_density( + self.density_n_comp, self.gridSizeAlpha, 1e-2, device + ) + self.app_plane, self.app_line = self.init_one_svd(self.app_n_comp, self.gridSize, 0.1, device) + self.basis_mat = torch.nn.Linear(sum(self.app_n_comp), self.app_dim, bias=False).to(device) + + def init_one_svd(self, n_component, gridSize, scale, device): + plane_coef, line_coef = [], [] + for i in range(len(self.vecMode)): + vec_id = self.vecMode[i] + mat_id_0, mat_id_1 = self.matMode[i] + + if self.cfg.shadingMode == "RGBIdentity": + plane_coef.append( + torch.nn.Parameter(scale * torch.randn((1, n_component[i], gridSize[mat_id_1], gridSize[mat_id_0]))) + ) # + line_coef.append(torch.nn.Parameter(scale * torch.randn((1, n_component[i], gridSize[vec_id], 1)))) + else: + plane_coef.append( + torch.nn.Parameter(scale * torch.randn((1, n_component[i], gridSize[mat_id_1], gridSize[mat_id_0]))) + ) # + line_coef.append(torch.nn.Parameter(scale * torch.randn((1, n_component[i], gridSize[vec_id], 1)))) + + return torch.nn.ParameterList(plane_coef).to(device), torch.nn.ParameterList(line_coef).to(device) + + def init_one_svd_density(self, n_component, gridSize, scale, device): + plane_coef, line_coef = [], [] + for i in range(len(self.vecMode)): + vec_id = self.vecMode[i] + mat_id_0, mat_id_1 = self.matMode[i] + + if self.fea2denseAct == "softplus": + plane_coef.append( + torch.nn.Parameter(scale * torch.randn((1, n_component[i], gridSize[mat_id_1], gridSize[mat_id_0]))) + ) # + line_coef.append(torch.nn.Parameter(scale * torch.randn((1, n_component[i], gridSize[vec_id], 1)))) + elif self.fea2denseAct == "relu": + plane_coef.append( + torch.nn.Parameter( + scale * torch.rand((1, n_component[i], gridSize[mat_id_1], gridSize[mat_id_0])).clamp(1e-2, 1e8) + ) + ) + line_coef.append( + torch.nn.Parameter(scale * torch.rand((1, n_component[i], gridSize[vec_id], 1)).clamp(1e-2, 1e8)) + ) + + return torch.nn.ParameterList(plane_coef).to(device), torch.nn.ParameterList(line_coef).to(device) + + def get_optparam_groups(self, lr_init_spatialxyz=0.02, lr_init_network=0.001): + grad_vars = [ + {"params": self.density_line, "lr": lr_init_spatialxyz}, + {"params": self.density_plane, "lr": lr_init_spatialxyz}, + {"params": self.app_line, "lr": lr_init_spatialxyz}, + {"params": self.app_plane, "lr": lr_init_spatialxyz}, + {"params": self.basis_mat.parameters(), "lr": lr_init_network}, + ] + if isinstance(self.renderModule, torch.nn.Module): + grad_vars += [{"params": self.renderModule.parameters(), "lr": lr_init_network}] + return grad_vars + + def vectorDiffs(self, vector_comps): + total = 0 + + for idx in range(len(vector_comps)): + n_comp, n_size = vector_comps[idx].shape[1:-1] + + dotp = torch.matmul( + vector_comps[idx].view(n_comp, n_size), + vector_comps[idx].view(n_comp, n_size).transpose(-1, -2), + ) + non_diagonal = dotp.view(-1)[1:].view(n_comp - 1, n_comp + 1)[..., :-1] + total = total + torch.mean(torch.abs(non_diagonal)) + return total + + def vector_comp_diffs(self): + return self.vectorDiffs(self.density_line) + self.vectorDiffs(self.app_line) + + def density_L1(self): + total = 0 + for idx in range(len(self.density_plane)): + if self.density_plane[idx].shape[1] == 0: + continue + + total = ( + total + torch.mean(torch.abs(self.density_plane[idx])) + torch.mean(torch.abs(self.density_line[idx])) + ) # + torch.mean(torch.abs(self.app_plane[idx])) + torch.mean(torch.abs(self.density_plane[idx])) + return total + + def TV_loss_density(self, reg): + total = 0 + for idx in range(len(self.density_plane)): + if self.density_plane[idx].shape[1] == 0: + continue + + total = total + reg(self.density_plane[idx]) * 1e-2 # + reg(self.density_line[idx]) * 1e-3 + return total + + def TV_loss_app(self, reg): + total = 0 + for idx in range(len(self.app_plane)): + if self.app_plane[idx].shape[1] == 0: + continue + + total = total + reg(self.app_plane[idx]) * 1e-2 # + reg(self.app_line[idx]) * 1e-3 + return total + + def compute_densityfeature(self, xyz_sampled): + + # plane + line basis + coordinate_plane = torch.stack( + ( + xyz_sampled[:, self.matMode[0]], + xyz_sampled[:, self.matMode[1]], + xyz_sampled[:, self.matMode[2]], + ) + ).view(3, -1, 1, 2) + coordinate_line = torch.stack( + ( + xyz_sampled[:, self.vecMode[0]], + xyz_sampled[:, self.vecMode[1]], + xyz_sampled[:, self.vecMode[2]], + ) + ) + coordinate_line = torch.stack((torch.zeros_like(coordinate_line), coordinate_line), dim=-1).view(3, -1, 1, 2) + + sigma_feature = torch.zeros((xyz_sampled.shape[0],), device=xyz_sampled.device) + for idx_plane in range(len(self.density_plane)): + if self.density_plane[idx_plane].shape[1] == 0: + continue + + plane_coef_point = F.grid_sample( + self.density_plane[idx_plane], + coordinate_plane[[idx_plane]], + align_corners=True, + ).view(-1, *xyz_sampled.shape[:1]) + line_coef_point = F.grid_sample( + self.density_line[idx_plane], + coordinate_line[[idx_plane]], + align_corners=True, + ).view(-1, *xyz_sampled.shape[:1]) + sigma_feature = sigma_feature + torch.sum(plane_coef_point * line_coef_point, dim=0) + # sigma_feature = sigma_feature + torch.mean(plane_coef_point * line_coef_point, dim=0) + + return sigma_feature + # return sigma_feature / len(self.density_plane) + + def compute_appfeature(self, xyz_sampled): + # return xyz_sampled.new_zeros(xyz_sampled.shape[0], self.app_dim) + + # plane + line basis + coordinate_plane = torch.stack( + ( + xyz_sampled[..., self.matMode[0]], + xyz_sampled[..., self.matMode[1]], + xyz_sampled[..., self.matMode[2]], + ) + ).view(3, -1, 1, 2) + coordinate_line = torch.stack( + ( + xyz_sampled[..., self.vecMode[0]], + xyz_sampled[..., self.vecMode[1]], + xyz_sampled[..., self.vecMode[2]], + ) + ) + coordinate_line = torch.stack((torch.zeros_like(coordinate_line), coordinate_line), dim=-1).view(3, -1, 1, 2) + + plane_coef_point, line_coef_point = [], [] + for idx_plane in range(len(self.app_plane)): + if self.app_plane[idx_plane].shape[1] == 0: + continue + + plane_coef_point.append( + F.grid_sample( + self.app_plane[idx_plane], + coordinate_plane[[idx_plane]], + align_corners=True, + ).view(-1, *xyz_sampled.shape[:1]) + ) + line_coef_point.append( + F.grid_sample( + self.app_line[idx_plane], + coordinate_line[[idx_plane]], + align_corners=True, + ).view(-1, *xyz_sampled.shape[:1]) + ) + plane_coef_point, line_coef_point = torch.cat(plane_coef_point), torch.cat(line_coef_point) + + return self.basis_mat((plane_coef_point * line_coef_point).T) + # return self.basis_mat((plane_coef_point * line_coef_point).T) / plane_coef_point.shape + + @torch.no_grad() + def up_sampling_VM(self, plane_coef, line_coef, res_target): + + for i in range(len(self.vecMode)): + vec_id = self.vecMode[i] + mat_id_0, mat_id_1 = self.matMode[i] + if plane_coef[i].shape[1] > 0: + plane_coef[i] = torch.nn.Parameter( + F.interpolate( + plane_coef[i].data, + size=(res_target[mat_id_1], res_target[mat_id_0]), + mode="bilinear", + align_corners=True, + ) + ) + if line_coef[i].shape[1] > 0: + line_coef[i] = torch.nn.Parameter( + F.interpolate( + line_coef[i].data, + size=(res_target[vec_id], 1), + mode="bilinear", + align_corners=True, + ) + ) + + return plane_coef, line_coef + + @torch.no_grad() + def upsample_volume_grid(self, res_target, res_target_alpha): + self.app_plane, self.app_line = self.up_sampling_VM(self.app_plane, self.app_line, res_target) + self.density_plane, self.density_line = self.up_sampling_VM( + self.density_plane, self.density_line, res_target_alpha + ) + + self.update_stepSize(res_target, res_target_alpha) + print(f"upsamping to {res_target}") + + @torch.no_grad() + def shrink(self, new_aabb): + print("====> shrinking ...") + xyz_min, xyz_max = new_aabb + t_l_o, b_r_o = (xyz_min - self.aabb[0]) / self.units, (xyz_max - self.aabb[0]) / self.units + + # print(new_aabb, self.aabb) + # print(t_l, b_r,self.alphaMask.alpha_volume.shape) + + t_l, b_r = torch.round(torch.round(t_l_o)).long(), torch.round(b_r_o).long() + 1 + b_r = torch.stack([b_r, self.gridSize]).amin(0) + + fac = self.units / self.unitsAlpha + t_l_alpha, b_r_alpha = torch.round(torch.round(t_l_o * fac)).long(), torch.round(b_r_o * fac).long() + 1 + b_r_alpha = torch.stack([b_r_alpha, self.gridSizeAlpha]).amin(0) + + for i in range(len(self.vecMode)): + mode0 = self.vecMode[i] + self.density_line[i] = torch.nn.Parameter( + self.density_line[i].data[..., t_l_alpha[mode0] : b_r_alpha[mode0], :] + ) + self.app_line[i] = torch.nn.Parameter(self.app_line[i].data[..., t_l[mode0] : b_r[mode0], :]) + mode0, mode1 = self.matMode[i] + self.density_plane[i] = torch.nn.Parameter( + self.density_plane[i].data[ + ..., t_l_alpha[mode1] : b_r_alpha[mode1], t_l_alpha[mode0] : b_r_alpha[mode0] + ] + ) + self.app_plane[i] = torch.nn.Parameter( + self.app_plane[i].data[..., t_l[mode1] : b_r[mode1], t_l[mode0] : b_r[mode0]] + ) + + if not torch.all(self.alphaMask.gridSize == self.gridSize): + t_l_r, b_r_r = t_l / (self.gridSize - 1), (b_r - 1) / (self.gridSize - 1) + correct_aabb = torch.zeros_like(new_aabb) + correct_aabb[0] = (1 - t_l_r) * self.aabb[0] + t_l_r * self.aabb[1] + correct_aabb[1] = (1 - b_r_r) * self.aabb[0] + b_r_r * self.aabb[1] + print("aabb", new_aabb, "\ncorrect aabb", correct_aabb) + new_aabb = correct_aabb + + newSize = b_r - t_l + newSizeAlpha = b_r_alpha - t_l_alpha + self.aabb = new_aabb + self.update_stepSize((newSize[0], newSize[1], newSize[2]), (newSizeAlpha[0], newSizeAlpha[1], newSizeAlpha[2])) + + +class TensorCP(TensorBase): + def __init__(self, in_channels, out_channels, cfg, **kwargs): + super().__init__(in_channels, out_channels, cfg, **kwargs) + + self.opt_group = { + "color": [self.density_line, self.app_line], + "color_impl": [self.basis_mat], + } + + if isinstance(self.renderModule, torch.nn.Module): + self.opt_group["color_impl"] += [self.renderModule] + + def init_svd_volume(self, res, device): + self.density_line = self.init_one_svd(self.density_n_comp[0], self.gridSize, 0.2, device) + self.app_line = self.init_one_svd(self.app_n_comp[0], self.gridSize, 0.2, device) + self.basis_mat = torch.nn.Linear(self.app_n_comp[0], self.app_dim, bias=False).to(device) + + def init_one_svd(self, n_component, gridSize, scale, device): + line_coef = [] + for i in range(len(self.vecMode)): + vec_id = self.vecMode[i] + line_coef.append(torch.nn.Parameter(scale * torch.randn((1, n_component, gridSize[vec_id], 1)))) + return torch.nn.ParameterList(line_coef).to(device) + + def get_optparam_groups(self, lr_init_spatialxyz=0.02, lr_init_network=0.001): + grad_vars = [ + {"params": self.density_line, "lr": lr_init_spatialxyz}, + {"params": self.app_line, "lr": lr_init_spatialxyz}, + {"params": self.basis_mat.parameters(), "lr": lr_init_network}, + ] + if isinstance(self.renderModule, torch.nn.Module): + grad_vars += [{"params": self.renderModule.parameters(), "lr": lr_init_network}] + return grad_vars + + def compute_densityfeature(self, xyz_sampled): + + coordinate_line = torch.stack( + ( + xyz_sampled[..., self.vecMode[0]], + xyz_sampled[..., self.vecMode[1]], + xyz_sampled[..., self.vecMode[2]], + ) + ) + coordinate_line = torch.stack((torch.zeros_like(coordinate_line), coordinate_line), dim=-1).view(3, -1, 1, 2) + + line_coef_point = F.grid_sample(self.density_line[0], coordinate_line[[0]], align_corners=True).view( + -1, *xyz_sampled.shape[:1] + ) + line_coef_point = line_coef_point * F.grid_sample( + self.density_line[1], coordinate_line[[1]], align_corners=True + ).view(-1, *xyz_sampled.shape[:1]) + line_coef_point = line_coef_point * F.grid_sample( + self.density_line[2], coordinate_line[[2]], align_corners=True + ).view(-1, *xyz_sampled.shape[:1]) + sigma_feature = torch.sum(line_coef_point, dim=0) + + return sigma_feature + + def compute_appfeature(self, xyz_sampled): + + coordinate_line = torch.stack( + ( + xyz_sampled[..., self.vecMode[0]], + xyz_sampled[..., self.vecMode[1]], + xyz_sampled[..., self.vecMode[2]], + ) + ) + coordinate_line = torch.stack((torch.zeros_like(coordinate_line), coordinate_line), dim=-1).view(3, -1, 1, 2) + + line_coef_point = F.grid_sample(self.app_line[0], coordinate_line[[0]], align_corners=True).view( + -1, *xyz_sampled.shape[:1] + ) + line_coef_point = line_coef_point * F.grid_sample( + self.app_line[1], coordinate_line[[1]], align_corners=True + ).view(-1, *xyz_sampled.shape[:1]) + line_coef_point = line_coef_point * F.grid_sample( + self.app_line[2], coordinate_line[[2]], align_corners=True + ).view(-1, *xyz_sampled.shape[:1]) + + return self.basis_mat(line_coef_point.T) + + @torch.no_grad() + def up_sampling_Vector(self, density_line_coef, app_line_coef, res_target): + + for i in range(len(self.vecMode)): + vec_id = self.vecMode[i] + density_line_coef[i] = torch.nn.Parameter( + F.interpolate( + density_line_coef[i].data, + size=(res_target[vec_id], 1), + mode="bilinear", + align_corners=True, + ) + ) + app_line_coef[i] = torch.nn.Parameter( + F.interpolate( + app_line_coef[i].data, + size=(res_target[vec_id], 1), + mode="bilinear", + align_corners=True, + ) + ) + + return density_line_coef, app_line_coef + + @torch.no_grad() + def upsample_volume_grid(self, res_target, res_target_alpha): + self.density_line, self.app_line = self.up_sampling_Vector(self.density_line, self.app_line, res_target) + + self.update_stepSize(res_target, res_target_alpha) + print(f"upsamping to {res_target}") + + @torch.no_grad() + def shrink(self, new_aabb): + print("====> shrinking ...") + xyz_min, xyz_max = new_aabb + t_l, b_r = (xyz_min - self.aabb[0]) / self.units, (xyz_max - self.aabb[0]) / self.units + + t_l, b_r = torch.round(torch.round(t_l)).long(), torch.round(b_r).long() + 1 + b_r = torch.stack([b_r, self.gridSize]).amin(0) + + for i in range(len(self.vecMode)): + mode0 = self.vecMode[i] + self.density_line[i] = torch.nn.Parameter(self.density_line[i].data[..., t_l[mode0] : b_r[mode0], :]) + self.app_line[i] = torch.nn.Parameter(self.app_line[i].data[..., t_l[mode0] : b_r[mode0], :]) + + if not torch.all(self.alphaMask.gridSize == self.gridSize): + t_l_r, b_r_r = t_l / (self.gridSize - 1), (b_r - 1) / (self.gridSize - 1) + correct_aabb = torch.zeros_like(new_aabb) + correct_aabb[0] = (1 - t_l_r) * self.aabb[0] + t_l_r * self.aabb[1] + correct_aabb[1] = (1 - b_r_r) * self.aabb[0] + b_r_r * self.aabb[1] + print("aabb", new_aabb, "\ncorrect aabb", correct_aabb) + new_aabb = correct_aabb + + newSize = b_r - t_l + self.aabb = new_aabb + self.update_stepSize((newSize[0], newSize[1], newSize[2])) + + def density_L1(self): + total = 0 + for idx in range(len(self.density_line)): + total = total + torch.mean(torch.abs(self.density_line[idx])) + return total + + def TV_loss_density(self, reg): + total = 0 + for idx in range(len(self.density_line)): + total = total + reg(self.density_line[idx]) * 1e-3 + return total + + def TV_loss_app(self, reg): + total = 0 + + for idx in range(len(self.app_line)): + total = total + reg(self.app_line[idx]) * 1e-3 + + return total + + +tensorf_base_dict = { + "tensor_vm": TensorVM, + "tensor_vm_split": TensorVMSplit, +} diff --git a/nlf/nets/tensorf_dynamic.py b/nlf/nets/tensorf_dynamic.py new file mode 100644 index 0000000..272c522 --- /dev/null +++ b/nlf/nets/tensorf_dynamic.py @@ -0,0 +1,892 @@ +#!/usr/bin/env python +# Copyright (c) 2024 Mitsubishi Electric Research Laboratories (MERL) +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2022 Anpei Chen +# +# SPDX-License-Identifier: AGPL-3.0-or-later +# SPDX-License-Identifier: MIT + +import pdb +import time +from typing import Dict + +import cv2 +import numpy as np +import torch +import torch.nn +import torch.nn.functional as F +from torch import autograd + +from utils.intersect_utils import sort_with, sort_z +from utils.sh_utils import eval_sh_bases +from utils.tensorf_utils import ( + DensityFourierRender, + DensityLinearRender, + DensityRender, + N_to_reso, + RGBIdentityRender, + RGBtFourierRender, + RGBtLinearRender, + alpha2weights, + cal_n_samples, + raw2alpha, + scale_shift_color_all, + scale_shift_color_one, + transform_color_all, + transform_color_one, +) + +from .tensorf_base import TensorBase + + +class TensorVMKeyframeTime(TensorBase): + def __init__(self, in_channels, out_channels, cfg, **kwargs): + self.matModeSpace = [[0, 1], [0, 2], [1, 2]] + self.matModeTime = [[2, 3], [1, 3], [0, 3]] + self.num_keyframes = kwargs["system"].dm.train_dataset.num_keyframes + self.total_num_frames = kwargs["system"].dm.train_dataset.num_frames + self.frames_per_keyframe = ( + cfg.frames_per_keyframe if "frames_per_keyframe" in cfg else self.total_num_frames // self.num_keyframes + ) + + self.time_scale_factor = (self.total_num_frames - 1) / self.total_num_frames + self.time_pixel_offset = 0.5 / self.num_keyframes + + self.densityMode = cfg.densityMode + + if self.densityMode == "Density": + self.data_dim_density = 1 + elif self.densityMode == "DensityLinear": + self.data_dim_density = 2 + elif self.densityMode == "DensityFourier": + self.data_dim_density = self.frames_per_keyframe * 2 + 1 + + super().__init__(in_channels, out_channels, cfg, **kwargs) + + self.opt_group = { + "color": [ + self.density_plane_space_list, + self.density_plane_time_list, + self.app_plane_space_list, + self.app_plane_time_list, + self.sam_plane_space_list, + self.sam_plane_time_list, + self.gear_plane_space, + self.gear_plane_time, + ], + "color_impl": [self.basis_mat, self.basis_mat_density, self.basis_mat_sam, self.basis_mat_gear], + } + + self.opt_group["color_impl"] += [self.renderModule, self.renderModule_sam] + + def init_svd_volume(self, res, device): + self.density_plane_space_list = torch.nn.ModuleList() + self.density_plane_time_list = torch.nn.ModuleList() + self.app_plane_space_list = torch.nn.ModuleList() + self.app_plane_time_list = torch.nn.ModuleList() + self.sam_plane_space_list = torch.nn.ModuleList() + self.sam_plane_time_list = torch.nn.ModuleList() + + for i in range(self.gear_num): + density_plane_space, density_plane_time = self.init_one_svd_density( + self.density_n_comp, self.gridSize, self.num_keyframes, 1e-2, device + ) + app_plane_space, app_plane_time = self.init_one_svd( + self.app_n_comp, self.gridSize, self.num_keyframes, 0.1, device + ) + sam_plane_space, sam_plane_time = self.init_one_svd( + self.app_n_comp, self.gridSize, self.num_keyframes, 0.1, device + ) + self.density_plane_space_list.append(density_plane_space) + self.density_plane_time_list.append(density_plane_time) + self.app_plane_space_list.append(app_plane_space) + self.app_plane_time_list.append(app_plane_time) + self.sam_plane_space_list.append(sam_plane_space) + self.sam_plane_time_list.append(sam_plane_time) + + self.gear_plane_space, self.gear_plane_time = self.init_one_svd_density( + self.density_n_comp, self.gridSize, self.num_keyframes, 1e-2, device + ) + + self.basis_mat = torch.nn.Linear(sum(self.app_n_comp), self.app_dim, bias=False).to(device) + self.basis_mat_sam = torch.nn.Linear(sum(self.app_n_comp), self.app_dim, bias=False).to(device) + self.basis_mat_density = torch.nn.Linear(sum(self.density_n_comp), self.data_dim_density, bias=False).to(device) + self.basis_mat_gear = torch.nn.Linear(sum(self.density_n_comp), self.gear_num, bias=False).to(device) + + def init_one_svd(self, n_component, gridSize, numFrames, scale, device): + plane_coef_space, plane_coef_time = [], [] + + for i in range(len(self.matModeSpace)): + mat_id_space_0, mat_id_space_1 = self.matModeSpace[i] + mat_id_time_0, mat_id_time_1 = self.matModeTime[i] + + if n_component[i] == 0 and False: + plane_coef_space.append( + torch.nn.Parameter( + torch.zeros( + (1, n_component[i], self.gridSizeStart[mat_id_space_1], self.gridSizeStart[mat_id_space_0]) + ) + ) + ) + plane_coef_time.append( + torch.nn.Parameter(torch.zeros((1, n_component[i], numFrames, self.gridSizeStart[mat_id_time_0]))) + ) + else: + plane_coef_space.append( + torch.nn.Parameter( + scale + * torch.randn( + ( + 1, + n_component[i], + gridSize[mat_id_space_1], + gridSize[mat_id_space_0], + ) + ) + ) + ) + plane_coef_time.append( + torch.nn.Parameter(scale * torch.randn((1, n_component[i], numFrames, gridSize[mat_id_time_0]))) + ) + + return torch.nn.ParameterList(plane_coef_space).to(device), torch.nn.ParameterList(plane_coef_time).to(device) + + def init_one_svd_density(self, n_component, gridSize, numFrames, scale, device): + plane_coef_space, plane_coef_time = [], [] + + for i in range(len(self.matModeSpace)): + mat_id_space_0, mat_id_space_1 = self.matModeSpace[i] + mat_id_time_0, mat_id_time_1 = self.matModeTime[i] + + if n_component[i] == 0 and False: + plane_coef_space.append( + torch.nn.Parameter( + torch.zeros( + (1, n_component[i], self.gridSizeStart[mat_id_space_1], self.gridSizeStart[mat_id_space_0]) + ) + ) + ) + plane_coef_time.append( + torch.nn.Parameter(torch.zeros((1, n_component[i], numFrames, self.gridSizeStart[mat_id_time_0]))) + ) + elif self.fea2denseAct == "softplus": + plane_coef_space.append( + torch.nn.Parameter( + scale + * torch.randn( + ( + 1, + n_component[i], + gridSize[mat_id_space_1], + gridSize[mat_id_space_0], + ) + ) + ) + ) + plane_coef_time.append( + torch.nn.Parameter(scale * torch.randn((1, n_component[i], numFrames, gridSize[mat_id_time_0]))) + ) + else: + plane_coef_space.append( + torch.nn.Parameter( + scale + * torch.rand( + ( + 1, + n_component[i], + gridSize[mat_id_space_1], + gridSize[mat_id_space_0], + ) + ).clamp(1e-2, 1e8) + ) + ) + plane_coef_time.append( + torch.nn.Parameter( + scale * torch.rand((1, n_component[i], numFrames, gridSize[mat_id_time_0])).clamp(1e-2, 1e8) + ) + ) + + return torch.nn.ParameterList(plane_coef_space).to(device), torch.nn.ParameterList(plane_coef_time).to(device) + + def density_L1(self): + total = 0 + + for plane_idx in range(len(self.density_plane_space_list)): + for idx in range(len(self.density_plane_space_list[plane_idx])): + if self.density_plane_space_list[plane_idx][idx].shape[1] == 0: + continue + + total = ( + total + + torch.mean(torch.abs(self.density_plane_space_list[plane_idx][idx])) + + torch.mean(torch.abs(self.density_plane_time_list[plane_idx][idx])) + ) + + return total + + def TV_loss_density(self, reg): + total = 0 + + for plane_idx in range(len(self.density_plane_space_list)): + for idx in range(len(self.density_plane_space_list[plane_idx])): + if ( + self.density_plane_space_list[plane_idx][idx].shape[1] == 0 + or self.density_plane_time_list[plane_idx][idx].shape[1] == 0 + ): + continue + + total = ( + total + reg(self.density_plane_space_list[plane_idx][idx]) * 1e-2 + ) # + reg(self.density_plane_time[idx]) * 1e-2 + + return total + + def TV_loss_app(self, reg): + total = 0 + + for plane_idx in range(len(self.app_plane_space_list)): + for idx in range(len(self.app_plane_space_list[plane_idx])): + if ( + self.density_plane_space_list[plane_idx][idx].shape[1] == 0 + or self.density_plane_time_list[plane_idx][idx].shape[1] == 0 + ): + continue + + total = ( + total + reg(self.app_plane_space_list[plane_idx][idx]) * 1e-2 + ) # + reg(self.app_plane_time[idx]) * 1e-2 + + return total + + def compute_gear(self, xyz_sampled): + + coordinate_plane_space = torch.stack( + ( + xyz_sampled[:, self.matModeSpace[0]], + xyz_sampled[:, self.matModeSpace[1]], + xyz_sampled[:, self.matModeSpace[2]], + ) + ).view(3, -1, 1, 2) + + coordinate_plane_time = torch.stack( + ( + xyz_sampled[:, self.matModeTime[0]], + xyz_sampled[:, self.matModeTime[1]], + xyz_sampled[:, self.matModeTime[2]], + ) + ).view(3, -1, 1, 2) + + plane_coef_space, plane_coef_time = [], [] + for idx_plane, (plane_space, plane_time) in enumerate(zip(self.gear_plane_space, self.gear_plane_time)): + if self.gear_plane_space[idx_plane].shape[1] == 0: + continue + + cur_plane = F.grid_sample(plane_space, coordinate_plane_space[[idx_plane]], align_corners=True).view( + -1, xyz_sampled.shape[0] + ) + cur_time = F.grid_sample(plane_time, coordinate_plane_time[[idx_plane]], align_corners=True).view( + -1, xyz_sampled.shape[0] + ) + + plane_coef_space.append(cur_plane) + plane_coef_time.append(cur_time) + + plane_coef_space, plane_coef_time = torch.cat(plane_coef_space), torch.cat(plane_coef_time) + + gear_map = self.basis_mat_gear((plane_coef_space * plane_coef_time).T) + gear_map = torch.softmax(gear_map, 1) + return gear_map + + def compute_densityfeature(self, xyz_sampled, gear_map): + + coordinate_plane_space = torch.stack( + ( + xyz_sampled[:, self.matModeSpace[0]], + xyz_sampled[:, self.matModeSpace[1]], + xyz_sampled[:, self.matModeSpace[2]], + ) + ).view(3, -1, 1, 2) + + coordinate_plane_time = torch.stack( + ( + xyz_sampled[:, self.matModeTime[0]], + xyz_sampled[:, self.matModeTime[1]], + xyz_sampled[:, self.matModeTime[2]], + ) + ).view(3, -1, 1, 2) + + plane_coef_space_gear = [] + plane_coef_time_gear = [] + for gear_id in range(self.gear_num): + plane_coef_space, plane_coef_time = [], [] + for idx_plane, (plane_space, plane_time) in enumerate( + zip(self.density_plane_space_list[gear_id], self.density_plane_time_list[gear_id]) + ): + if self.density_plane_space_list[gear_id][idx_plane].shape[1] == 0: + continue + + cur_plane = F.grid_sample(plane_space, coordinate_plane_space[[idx_plane]], align_corners=True).view( + -1, xyz_sampled.shape[0] + ) + cur_time = F.grid_sample(plane_time, coordinate_plane_time[[idx_plane]], align_corners=True).view( + -1, xyz_sampled.shape[0] + ) + + plane_coef_space.append(cur_plane) + plane_coef_time.append(cur_time) + + plane_coef_space, plane_coef_time = torch.cat(plane_coef_space), torch.cat(plane_coef_time) + plane_coef_space_gear.append(plane_coef_space) + plane_coef_time_gear.append(plane_coef_time) + + plane_coef_space_gear = torch.stack(plane_coef_space_gear) + plane_coef_time_gear = torch.stack(plane_coef_time_gear) + plane_coef_space_gear = plane_coef_space_gear.permute(2, 1, 0) + plane_coef_time_gear = plane_coef_time_gear.permute(2, 1, 0) + plane_coef_space = torch.bmm(plane_coef_space_gear, gear_map).squeeze().permute(1, 0) + plane_coef_time = torch.bmm(plane_coef_time_gear, gear_map).squeeze().permute(1, 0) + + if self.densityMode != "Density": + return self.basis_mat_density((plane_coef_space * plane_coef_time).T) + else: + return torch.sum((plane_coef_space * plane_coef_time), dim=0).unsqueeze(-1) + + def compute_appfeature(self, xyz_sampled, gear_map): + coordinate_plane_space = torch.stack( + ( + xyz_sampled[:, self.matModeSpace[0]], + xyz_sampled[:, self.matModeSpace[1]], + xyz_sampled[:, self.matModeSpace[2]], + ) + ).view(3, -1, 1, 2) + + coordinate_plane_time = torch.stack( + ( + xyz_sampled[:, self.matModeTime[0]], + xyz_sampled[:, self.matModeTime[1]], + xyz_sampled[:, self.matModeTime[2]], + ) + ).view(3, -1, 1, 2) + + plane_coef_space_gear = [] + plane_coef_time_gear = [] + for gear_id in range(self.gear_num): + plane_coef_space, plane_coef_time = [], [] + + for idx_plane, (plane_space, plane_time) in enumerate( + zip(self.app_plane_space_list[gear_id], self.app_plane_time_list[gear_id]) + ): + if self.density_plane_space_list[gear_id][idx_plane].shape[1] == 0: + continue + + cur_plane = F.grid_sample(plane_space, coordinate_plane_space[[idx_plane]], align_corners=True).view( + -1, *xyz_sampled.shape[:1] + ) + cur_time = F.grid_sample(plane_time, coordinate_plane_time[[idx_plane]], align_corners=True).view( + -1, *xyz_sampled.shape[:1] + ) + + plane_coef_space.append(cur_plane) + plane_coef_time.append(cur_time) + + plane_coef_space, plane_coef_time = torch.cat(plane_coef_space), torch.cat(plane_coef_time) + plane_coef_space_gear.append(plane_coef_space) + plane_coef_time_gear.append(plane_coef_time) + + plane_coef_space_gear = torch.stack(plane_coef_space_gear) + plane_coef_time_gear = torch.stack(plane_coef_time_gear) + plane_coef_space_gear = plane_coef_space_gear.permute(2, 1, 0) + plane_coef_time_gear = plane_coef_time_gear.permute(2, 1, 0) + plane_coef_space = torch.bmm(plane_coef_space_gear, gear_map).squeeze().permute(1, 0) + plane_coef_time = torch.bmm(plane_coef_time_gear, gear_map).squeeze().permute(1, 0) + + return self.basis_mat((plane_coef_space * plane_coef_time).T) + + def compute_samfeature(self, xyz_sampled, gear_map): + coordinate_plane_space = torch.stack( + ( + xyz_sampled[:, self.matModeSpace[0]], + xyz_sampled[:, self.matModeSpace[1]], + xyz_sampled[:, self.matModeSpace[2]], + ) + ).view(3, -1, 1, 2) + + coordinate_plane_time = torch.stack( + ( + xyz_sampled[:, self.matModeTime[0]], + xyz_sampled[:, self.matModeTime[1]], + xyz_sampled[:, self.matModeTime[2]], + ) + ).view(3, -1, 1, 2) + + plane_coef_space_gear = [] + plane_coef_time_gear = [] + for gear_id in range(self.gear_num): + plane_coef_space, plane_coef_time = [], [] + + for idx_plane, (plane_space, plane_time) in enumerate( + zip(self.sam_plane_space_list[gear_id], self.sam_plane_time_list[gear_id]) + ): + if self.density_plane_space_list[gear_id][idx_plane].shape[1] == 0: + continue + + cur_plane = F.grid_sample(plane_space, coordinate_plane_space[[idx_plane]], align_corners=True).view( + -1, *xyz_sampled.shape[:1] + ) + cur_time = F.grid_sample(plane_time, coordinate_plane_time[[idx_plane]], align_corners=True).view( + -1, *xyz_sampled.shape[:1] + ) + + plane_coef_space.append(cur_plane) + plane_coef_time.append(cur_time) + + plane_coef_space, plane_coef_time = torch.cat(plane_coef_space), torch.cat(plane_coef_time) + plane_coef_space_gear.append(plane_coef_space) + plane_coef_time_gear.append(plane_coef_time) + + plane_coef_space_gear = torch.stack(plane_coef_space_gear) + plane_coef_time_gear = torch.stack(plane_coef_time_gear) + plane_coef_space_gear = plane_coef_space_gear.permute(2, 1, 0) + plane_coef_time_gear = plane_coef_time_gear.permute(2, 1, 0) + plane_coef_space = torch.bmm(plane_coef_space_gear, gear_map).squeeze().permute(1, 0) + plane_coef_time = torch.bmm(plane_coef_time_gear, gear_map).squeeze().permute(1, 0) + + return self.basis_mat_sam((plane_coef_space * plane_coef_time).T) + + def feature2density(self, density_features: torch.Tensor, x: Dict[str, torch.Tensor]): + if self.densityMode == "Density": + density_features = DensityRender(density_features, x) + elif self.densityMode == "DensityLinear": + density_features = DensityLinearRender(density_features, x) + elif self.densityMode == "DensityFourier": + density_features = DensityFourierRender(density_features, x) + + density_features = density_features * x["weights"].view(density_features.shape[0]) + + if self.fea2denseAct == "softplus": + return F.softplus(density_features + self.density_shift) + elif self.fea2denseAct == "relu": + return F.relu(density_features) + elif self.fea2denseAct == "relu_abs": + return F.relu(torch.abs(density_features)) + + @torch.no_grad() + def up_sampling_VM(self, n_component, plane_coef_space, plane_coef_time, res_target, numFrames): + + for i in range(len(self.matModeSpace)): + mat_id_space_0, mat_id_space_1 = self.matModeSpace[i] + mat_id_time_0, mat_id_time_1 = self.matModeTime[i] + + if self.density_plane_space_list[0][i].shape[1] == 0: + plane_coef_space[i] = torch.nn.Parameter( + plane_coef_space[i].data.new_zeros( + 1, n_component[i], res_target[mat_id_space_1], res_target[mat_id_space_0] + ), + ) + plane_coef_time[i] = torch.nn.Parameter( + plane_coef_time[i].data.new_zeros(1, n_component[i], numFrames, res_target[mat_id_time_0]) + ) + else: + plane_coef_space[i] = torch.nn.Parameter( + F.interpolate( + plane_coef_space[i].data, + size=(res_target[mat_id_space_1], res_target[mat_id_space_0]), + mode="bilinear", + align_corners=True, + ) + ) + plane_coef_time[i] = torch.nn.Parameter( + F.interpolate( + plane_coef_time[i].data, + size=(numFrames, res_target[mat_id_time_0]), + mode="bilinear", + align_corners=True, + ) + ) + + return plane_coef_space, plane_coef_time + + @torch.no_grad() + def upsample_volume_grid(self, res_target): + + for i in range(len(self.app_plane_space_list)): + + self.app_plane_space_list[i], self.app_plane_time_list[i] = self.up_sampling_VM( + self.app_n_comp, + self.app_plane_space_list[i], + self.app_plane_time_list[i], + res_target, + self.num_keyframes, + ) + self.sam_plane_space_list[i], self.sam_plane_time_list[i] = self.up_sampling_VM( + self.app_n_comp, + self.sam_plane_space_list[i], + self.sam_plane_time_list[i], + res_target, + self.num_keyframes, + ) + self.density_plane_space_list[i], self.density_plane_time_list[i] = self.up_sampling_VM( + self.density_n_comp, + self.density_plane_space_list[i], + self.density_plane_time_list[i], + res_target, + self.num_keyframes, + ) + self.gear_plane_space, self.gear_plane_time = self.up_sampling_VM( + self.density_n_comp, + self.gear_plane_space, + self.gear_plane_time, + res_target, + self.num_keyframes, + ) + + self.update_stepSize(res_target) + print(f"upsamping to {res_target}") + + @torch.no_grad() + def shrink(self, new_aabb): + print("====> shrinking ...") + + xyz_min, xyz_max = new_aabb + t_l, b_r = (xyz_min - self.aabb[0]) / self.units, (xyz_max - self.aabb[0]) / self.units + + t_l, b_r = torch.round(torch.round(t_l)).long(), torch.round(b_r).long() + 1 + b_r = torch.stack([b_r, self.gridSize]).amin(0) + + for j in range(len(self.density_plane_space_list)): + for i in range(len(self.matModeSpace)): + mat_id_space_0, mat_id_space_1 = self.matModeSpace[i] + mat_id_time_0, mat_id_time_1 = self.matModeTime[i] + + self.density_plane_space_list[j][i] = torch.nn.Parameter( + self.density_plane_space_list[j][i].data[ + ..., + t_l[mat_id_space_1] : b_r[mat_id_space_1], + t_l[mat_id_space_0] : b_r[mat_id_space_0], + ] + ) + self.density_plane_time_list[j][i] = torch.nn.Parameter( + self.density_plane_time_list[j][i].data[..., :, t_l[mat_id_time_0] : b_r[mat_id_time_0]] + ) + self.app_plane_space_list[j][i] = torch.nn.Parameter( + self.app_plane_space_list[j][i].data[ + ..., + t_l[mat_id_space_1] : b_r[mat_id_space_1], + t_l[mat_id_space_0] : b_r[mat_id_space_0], + ] + ) + self.app_plane_time_list[j][i] = torch.nn.Parameter( + self.app_plane_time_list[j][i].data[..., :, t_l[mat_id_time_0] : b_r[mat_id_time_0]] + ) + self.sam_plane_space_list[j][i] = torch.nn.Parameter( + self.sam_plane_space_list[j][i].data[ + ..., + t_l[mat_id_space_1] : b_r[mat_id_space_1], + t_l[mat_id_space_0] : b_r[mat_id_space_0], + ] + ) + self.sam_plane_time_list[j][i] = torch.nn.Parameter( + self.sam_plane_time_list[j][i].data[..., :, t_l[mat_id_time_0] : b_r[mat_id_time_0]] + ) + for i in range(len(self.matModeSpace)): + mat_id_space_0, mat_id_space_1 = self.matModeSpace[i] + mat_id_time_0, mat_id_time_1 = self.matModeTime[i] + + self.gear_plane_space[i] = torch.nn.Parameter( + self.gear_plane_space[i].data[ + ..., + t_l[mat_id_space_1] : b_r[mat_id_space_1], + t_l[mat_id_space_0] : b_r[mat_id_space_0], + ] + ) + self.gear_plane_time[i] = torch.nn.Parameter( + self.gear_plane_time[i].data[..., :, t_l[mat_id_time_0] : b_r[mat_id_time_0]] + ) + + if not torch.all(self.alphaMask.gridSize == self.gridSize): + t_l_r, b_r_r = t_l / (self.gridSize - 1), (b_r - 1) / (self.gridSize - 1) + correct_aabb = torch.zeros_like(new_aabb) + correct_aabb[0] = (1 - t_l_r) * self.aabb[0] + t_l_r * self.aabb[1] + correct_aabb[1] = (1 - b_r_r) * self.aabb[0] + b_r_r * self.aabb[1] + print("aabb", new_aabb, "\ncorrect aabb", correct_aabb) + new_aabb = correct_aabb + + newSize = b_r - t_l + self.aabb = new_aabb + self.update_stepSize((newSize[0], newSize[1], newSize[2])) + + @torch.no_grad() + def getDenseAlpha(self, gridSize=None): + samples = torch.stack( + torch.meshgrid( + torch.linspace(0, 1, gridSize[0]), + torch.linspace(0, 1, gridSize[1]), + torch.linspace(0, 1, gridSize[2]), + ), + -1, + ).to(self.device) + dense_xyz = self.aabb[0] * (1 - samples) + self.aabb[1] * samples + + # dense_xyz = dense_xyz + # print(self.stepSize, self.distance_scale*self.aabbDiag) + alpha = torch.zeros_like(dense_xyz[..., 0]) + time_scale_factor = (self.total_num_frames - 1) / self.total_num_frames + + for t in np.linspace(0, 1, self.total_num_frames): + cur_alpha = torch.zeros_like(alpha) + times = torch.ones_like(dense_xyz[..., -1:]) * t + base_times = torch.round((times * time_scale_factor).clamp(0.0, self.num_keyframes - 1)) * ( + 1.0 / time_scale_factor + ) + time_offset = times - base_times + + for i in range(gridSize[0]): + cur_xyz = dense_xyz[i].view(-1, 3) + cur_base_times = base_times[i].view(-1, 1) + cur_times = times[i].view(-1, 1) + cur_time_offset = time_offset[i].view(-1, 1) + + cur_xyzt = torch.cat([cur_xyz, cur_base_times], -1) + cur_alpha[i] = self.compute_alpha(cur_xyzt, 0.01, times=cur_times, time_offset=cur_time_offset).view( + (gridSize[1], gridSize[2]) + ) + + alpha = torch.maximum(alpha, cur_alpha) + + return alpha, dense_xyz + + def normalize_time_coord(self, time): + return (time * self.time_scale_factor + self.time_pixel_offset) * 2 - 1 + + def compute_alpha(self, xyzt_locs, length=0.01, times=None, time_offset=None): + sigma = torch.zeros(xyzt_locs.shape[:-1], device=xyzt_locs.device) + + xyzt_sampled = torch.cat( + [ + self.normalize_coord(xyzt_locs[..., :3]), + self.normalize_time_coord(xyzt_locs[..., -1:]), + ], + dim=-1, + ) + gear_map = self.compute_gear(xyzt_sampled)[:, :, None] + sigma_feature = self.compute_densityfeature(xyzt_sampled, gear_map) + sigma = self.feature2density( + sigma_feature, + { + "frames_per_keyframe": self.frames_per_keyframe, + "num_keyframes": self.num_keyframes, + "total_num_frames": self.total_num_frames, + "times": times, + "time_offset": time_offset, + "weights": torch.ones_like(times), + }, + ) + + alpha = 1 - torch.exp(-sigma * length).view(xyzt_locs.shape[:-1]) + + return alpha + + def forward(self, x, render_kwargs): + batch_size = x["viewdirs"].shape[0] + + nSamples = x["points"].shape[-1] // 3 + xyzt_sampled = torch.cat( + [ + x["points"].view(batch_size, -1, 3), + x["base_times"].view(batch_size, -1, 1), + ], + -1, + ) + + # Distances + distances = x["distances"].view(batch_size, -1) + deltas = torch.cat( + [ + distances[..., 1:] - distances[..., :-1], + 1e10 * torch.ones_like(distances[:, :1]), + ], + dim=1, + ) + + # Times & viewdirs + times = x["times"].view(batch_size, -1, 1) + time_offset = x["time_offset"].view(batch_size, -1, 1) + viewdirs = x["viewdirs"].view(batch_size, nSamples, 3) + + # Weights + weights = x["weights"].view(batch_size, -1, 1) + + # Mask out + ray_valid = self.valid_mask(xyzt_sampled[..., :3]) & (distances > 0) + + # Filter + if self.apply_filter_weights and self.cur_iter >= self.filter_wait_iters: + weights = weights.view(batch_size, -1) + min_weight = torch.topk(weights, self.filter_max_samples, dim=-1, sorted=False)[0].min(-1)[0].unsqueeze(-1) + + ray_valid = ray_valid & (weights >= (min_weight - 1e-8)) & (weights > self.filter_weight_thresh) + + weights = weights.view(batch_size, -1, 1) + weights = torch.ones_like(weights) # TODO: maybe remove + else: + weights = torch.ones_like(weights) # TODO: maybe remove + pass + + if self.alphaMask is not None and False: + # if self.alphaMask is not None: + alphas = self.alphaMask.sample_alpha(xyzt_sampled[..., :3][ray_valid]) + alpha_mask = alphas > 0 + ray_invalid = ~ray_valid + ray_invalid[ray_valid] |= ~alpha_mask + ray_valid = ~ray_invalid + + # Get densities + xyzt_sampled = torch.cat( + [ + self.normalize_coord(xyzt_sampled[..., :3]), + self.normalize_time_coord(xyzt_sampled[..., -1:]), + ], + dim=-1, + ) + sigma = xyzt_sampled.new_zeros(xyzt_sampled.shape[:-1], device=xyzt_sampled.device) + + if ray_valid.any(): + gear_map = self.compute_gear(xyzt_sampled[ray_valid])[:, :, None] + sigma_feature = self.compute_densityfeature(xyzt_sampled[ray_valid], gear_map) + + # Convert to density + valid_sigma = self.feature2density( + sigma_feature, + { + "frames_per_keyframe": self.frames_per_keyframe, + "num_keyframes": self.num_keyframes, + "total_num_frames": self.total_num_frames, + "times": times[ray_valid], + "time_offset": time_offset[ray_valid], + "weights": weights[ray_valid], + }, + ) + + # Update valid + assert valid_sigma is not None + assert ray_valid is not None + + sigma[ray_valid] = valid_sigma + + alpha, weight, bg_weight = raw2alpha(sigma, deltas * self.distance_scale) + app_mask = weight > self.rayMarch_weight_thres + + # Get colors + rgb = xyzt_sampled.new_zeros( + (xyzt_sampled.shape[0], xyzt_sampled.shape[1], 3 + 256), + device=xyzt_sampled.device, + ) + + if app_mask.any(): + gear_map = self.compute_gear(xyzt_sampled[app_mask])[:, :, None] + app_features = self.compute_appfeature(xyzt_sampled[app_mask], gear_map) + sam_features = self.compute_samfeature(xyzt_sampled[app_mask], gear_map) + + valid_rgbs = self.renderModule( + xyzt_sampled[app_mask], + viewdirs[app_mask], + app_features, + { + "frames_per_keyframe": self.frames_per_keyframe, + "num_keyframes": self.num_keyframes, + "total_num_frames": self.total_num_frames, + "times": times[app_mask], + "time_offset": time_offset[app_mask], + }, + ) + + valid_sam = self.renderModule_sam( + xyzt_sampled[app_mask], + viewdirs[app_mask], + sam_features, + { + "frames_per_keyframe": self.frames_per_keyframe, + "num_keyframes": self.num_keyframes, + "total_num_frames": self.total_num_frames, + "times": times[app_mask], + "time_offset": time_offset[app_mask], + }, + ) + + rgb = valid_rgbs.new_zeros((xyzt_sampled.shape[0], xyzt_sampled.shape[1], 3), device=xyzt_sampled.device) + + sam = valid_rgbs.new_zeros((xyzt_sampled.shape[0], xyzt_sampled.shape[1], 256), device=xyzt_sampled.device) + + assert valid_rgbs is not None + assert app_mask is not None + rgb[app_mask] = valid_rgbs + sam[app_mask] = valid_sam + + # pdb.set_trace() + # Transform colors + if "color_scale" in x: + color_scale = x["color_scale"].view(rgb.shape[0], rgb.shape[1], 259) + color_shift = x["color_shift"].view(rgb.shape[0], rgb.shape[1], 259) + rgb = scale_shift_color_all(rgb, color_scale[:, :, :3], color_shift[:, :, :3]) + sam = scale_shift_color_all(sam, color_scale[:, :, 3:], color_shift[:, :, 3:]) + elif "color_transform" in x: + color_transform = x["color_transform"].view(rgb.shape[0], rgb.shape[1], 9) + color_shift = x["color_shift"].view(rgb.shape[0], rgb.shape[1], 3) + rgb = transform_color_all(rgb, color_transform, color_shift) + + # Over composite + acc_map = torch.sum(weight, -1) + rgb_map = torch.sum(weight[:, :, None] * rgb, -2) + sam_map = torch.sum(weight[:, :, None].detach() * sam, -2) + + rgb_map = torch.cat([rgb_map, sam_map], 1) + + # White background + if (self.white_bg or (self.training and torch.rand((1,)) < 0.5)) and not self.black_bg: + rgb_map = rgb_map + (1.0 - acc_map[:, None]) + + # Transform colors + if "color_scale_global" in x: + rgb_map = scale_shift_color_one(rgb, rgb_map, x) + elif "color_transform_global" in x: + rgb_map = transform_color_one(rgb, rgb_map, x) + + # Clamp and return + if not self.training: + rgb_map = rgb_map.clamp(0, 1) + + # Other fields + outputs = {"rgb": rgb_map} + + fields = render_kwargs.get("fields", []) + no_over_fields = render_kwargs.get("no_over_fields", []) + pred_weights_fields = render_kwargs.get("pred_weights_fields", []) + + if len(fields) == 0: + return outputs + + if len(pred_weights_fields) > 0: + pred_weights = alpha2weights(weights[..., 0]) + + for key in fields: + if key == "render_weights": + outputs[key] = weight + elif key in no_over_fields: + outputs[key] = x[key].view(batch_size, -1) + elif key in pred_weights_fields: + outputs[key] = torch.sum( + pred_weights[..., None] * x[key].view(batch_size, nSamples, -1), + -2, + ) + else: + outputs[key] = torch.sum( + weight[..., None] * x[key].view(batch_size, nSamples, -1), + -2, + ) + + return outputs + + +tensorf_dynamic_dict = { + "tensor_vm_split_time": TensorVMKeyframeTime, +} diff --git a/nlf/nets/tensorf_hybrid.py b/nlf/nets/tensorf_hybrid.py new file mode 100644 index 0000000..c945744 --- /dev/null +++ b/nlf/nets/tensorf_hybrid.py @@ -0,0 +1,782 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2022 Anpei Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +import time +from typing import Dict + +import numpy as np +import torch +import torch.nn +import torch.nn.functional as F +from torch import autograd + +from utils.intersect_utils import sort_with, sort_z +from utils.sh_utils import eval_sh_bases +from utils.tensorf_utils import ( + DensityFourierRender, + DensityLinearRender, + DensityRender, + N_to_reso, + RGBIdentityRender, + RGBtFourierRender, + RGBtLinearRender, + alpha2weights, + cal_n_samples, + raw2alpha, + scale_shift_color_all, + scale_shift_color_one, + transform_color_all, + transform_color_one, +) + +from .tensorf_base import TensorBase + + +class TensorVMKeyframeHybrid(TensorBase): + def __init__(self, in_channels, out_channels, cfg, **kwargs): + self.matModeSpace = [[0, 1], [0, 2], [1, 2]] + self.matModeTime = [[2, 3], [1, 3], [0, 3]] + self.static_only_components = cfg.static_only_components if "static_only_components" in cfg else 0 + self.num_keyframes = kwargs["system"].dm.train_dataset.num_keyframes + self.total_num_frames = kwargs["system"].dm.train_dataset.num_frames + self.frames_per_keyframe = ( + cfg.frames_per_keyframe if "frames_per_keyframe" in cfg else self.total_num_frames // self.num_keyframes + ) + # self.time_scale_factor = ((self.num_keyframes) / (self.num_keyframes - 1)) * (self.total_num_frames - 1) / self.total_num_frames + # self.time_pixel_offset = 0.0 / self.num_keyframes + self.time_scale_factor = (self.total_num_frames - 1) / self.total_num_frames + self.time_pixel_offset = 0.5 / self.num_keyframes + + # Number of outputs for color and density + if cfg.shadingMode == "RGBtLinear": + cfg.data_dim_color = (2) * 3 + elif cfg.shadingMode == "RGBtFourier": + cfg.data_dim_color = (self.frames_per_keyframe * 2 + 1) * 3 + + self.densityMode = cfg.densityMode + + if self.densityMode == "Density": + self.data_dim_density = 1 + elif self.densityMode == "DensityLinear": + self.data_dim_density = 2 + elif self.densityMode == "DensityFourier": + self.data_dim_density = self.frames_per_keyframe * 2 + 1 + + super().__init__(in_channels, out_channels, cfg, **kwargs) + + if "MLP" in self.shadingMode: + self.opt_group = { + "color": [ + self.density_plane_space, + self.density_plane_time, + self.app_plane_space, + self.app_plane_time, + ], + "color_impl": [self.basis_mat, self.basis_mat_density], + } + else: + self.opt_group = { + "color": [ + self.density_plane_space, + self.density_plane_time, + self.app_plane_space, + self.app_plane_time, + self.basis_mat, + self.basis_mat_density, + ], + } + + if isinstance(self.renderModule, torch.nn.Module): + if "MLP" in self.shadingMode: + self.opt_group["color_impl"] += [self.renderModule] + else: + self.opt_group["color_impl"] = [self.renderModule] + + def init_svd_volume(self, res, device): + if self.fea2denseAct == "softplus": + self.density_plane_space, self.density_plane_time, self.density_line = self.init_one_svd_density( + self.density_n_comp, self.gridSize, self.num_keyframes, 0.1, device + ) + else: + self.density_plane_space, self.density_plane_time, self.density_line = self.init_one_svd_density( + self.density_n_comp, self.gridSize, self.num_keyframes, 1e-2, device + ) + + self.app_plane_space, self.app_plane_time, self.app_line = self.init_one_svd( + self.app_n_comp, self.gridSize, self.num_keyframes, 0.1, device + ) + self.basis_mat = torch.nn.Linear(sum(self.app_n_comp), self.app_dim, bias=False).to(device) + self.basis_mat_density = torch.nn.Linear(sum(self.density_n_comp), self.data_dim_density, bias=False).to(device) + + def init_one_svd(self, n_component, gridSize, numFrames, scale, device): + plane_coef_space, plane_coef_time, line_coef = [], [], [] + + for i in range(len(self.matModeSpace)): + mat_id_space_0, mat_id_space_1 = self.matModeSpace[i] + mat_id_time_0, mat_id_time_1 = self.matModeTime[i] + + plane_coef_space.append( + torch.nn.Parameter( + scale + * torch.randn( + ( + 1, + n_component[i], + gridSize[mat_id_space_1], + gridSize[mat_id_space_0], + ) + ) + ) + ) + + plane_coef_time.append( + torch.nn.Parameter( + scale + * torch.randn((1, n_component[i] - self.static_only_components, numFrames, gridSize[mat_id_time_0])) + ) + ) + + line_coef.append( + torch.nn.Parameter(scale * torch.randn((1, self.static_only_components, 1, gridSize[mat_id_time_0]))) + ) + + return ( + torch.nn.ParameterList(plane_coef_space).to(device), + torch.nn.ParameterList(plane_coef_time).to(device), + torch.nn.ParameterList(line_coef).to(device), + ) + + def init_one_svd_density(self, n_component, gridSize, numFrames, scale, device): + plane_coef_space, plane_coef_time, line_coef = [], [], [] + + for i in range(len(self.matModeSpace)): + mat_id_space_0, mat_id_space_1 = self.matModeSpace[i] + mat_id_time_0, mat_id_time_1 = self.matModeTime[i] + + if self.fea2denseAct == "softplus": + plane_coef_space.append( + torch.nn.Parameter( + scale + * torch.randn( + ( + 1, + n_component[i], + gridSize[mat_id_space_1], + gridSize[mat_id_space_0], + ) + ) + ) + ) + plane_coef_time.append( + torch.nn.Parameter( + scale + * torch.randn( + (1, n_component[i] - self.static_only_components, numFrames, gridSize[mat_id_time_0]) + ) + ) + ) + line_coef.append( + torch.nn.Parameter( + scale * torch.randn((1, self.static_only_components, 1, gridSize[mat_id_time_0])) + ) + ) + else: + plane_coef_space.append( + torch.nn.Parameter( + scale + * torch.rand( + ( + 1, + n_component[i], + gridSize[mat_id_space_1], + gridSize[mat_id_space_0], + ) + ).clamp(1e-2, 1e8) + ) + ) + plane_coef_time.append( + torch.nn.Parameter( + scale + * torch.rand( + (1, n_component[i] - self.static_only_components, numFrames, gridSize[mat_id_time_0]) + ).clamp(1e-2, 1e8) + ) + ) + line_coef.append( + torch.nn.Parameter( + scale + * torch.rand((1, self.static_only_components, 1, gridSize[mat_id_time_0])).clamp(1e-2, 1e8) + ) + ) + + return ( + torch.nn.ParameterList(plane_coef_space).to(device), + torch.nn.ParameterList(plane_coef_time).to(device), + torch.nn.ParameterList(line_coef).to(device), + ) + + def density_L1(self): + total = 0 + + for idx in range(len(self.density_plane_space)): + if self.density_plane_space[idx].shape[1] == 0: + continue + + total = ( + total + + torch.mean(torch.abs(self.density_plane_space[idx])) + + torch.mean(torch.abs(self.density_plane_time[idx])) + + torch.mean(torch.abs(self.density_line[idx])) + ) + + return total + + def TV_loss_density(self, reg): + total = 0 + + for idx in range(len(self.density_plane_space)): + if self.density_plane_space[idx].shape[1] == 0: + continue + + # total = ( + # total + reg(self.density_plane_space[idx]) * 1e-2 + # ) + reg(self.density_plane_time[idx]) * 1e-2 + total = total + reg(self.density_plane_space[idx]) * 1e-2 + + return total + + def TV_loss_app(self, reg): + total = 0 + + for idx in range(len(self.app_plane_space)): + if self.density_plane_space[idx].shape[1] == 0: + continue + + # total = ( + # total + reg(self.app_plane_space[idx]) * 1e-2 + # ) + reg(self.app_plane_time[idx]) * 1e-2 + total = total + reg(self.app_plane_space[idx]) * 1e-2 + + return total + + def compute_densityfeature(self, xyz_sampled): + # plane + line basis + coordinate_plane_space = torch.stack( + ( + xyz_sampled[:, self.matModeSpace[0]], + xyz_sampled[:, self.matModeSpace[1]], + xyz_sampled[:, self.matModeSpace[2]], + ) + ).view(3, -1, 1, 2) + + coordinate_plane_time = torch.stack( + ( + xyz_sampled[:, self.matModeTime[0]], + xyz_sampled[:, self.matModeTime[1]], + xyz_sampled[:, self.matModeTime[2]], + ) + ).view(3, -1, 1, 2) + + if self.static_only_components > 0: + coordinate_line = torch.stack( + ( + xyz_sampled[:, self.matModeTime[0][0]], + xyz_sampled[:, self.matModeTime[1][0]], + xyz_sampled[:, self.matModeTime[2][0]], + ) + ) + coordinate_line = torch.stack((torch.zeros_like(coordinate_line), coordinate_line), dim=-1).view( + 3, -1, 1, 2 + ) + + plane_coef_space, plane_coef_time = [], [] + + for idx_plane, (plane_space, plane_time) in enumerate(zip(self.density_plane_space, self.density_plane_time)): + if self.density_plane_space[idx_plane].shape[1] == 0: + continue + + cur_plane = F.grid_sample(plane_space, coordinate_plane_space[[idx_plane]], align_corners=True).view( + -1, xyz_sampled.shape[0] + ) + cur_time = F.grid_sample(plane_time, coordinate_plane_time[[idx_plane]], align_corners=True).view( + -1, xyz_sampled.shape[0] + ) + + plane_coef_space.append(cur_plane) + plane_coef_time.append(cur_time) + + if self.static_only_components > 0: + cur_line = F.grid_sample( + self.density_line[idx_plane], + coordinate_line[[idx_plane]], + align_corners=True, + ).view(-1, *xyz_sampled.shape[:1]) + plane_coef_time.append(cur_line) + + plane_coef_space, plane_coef_time = torch.cat(plane_coef_space), torch.cat(plane_coef_time) + + if self.densityMode != "Density": + return self.basis_mat_density((plane_coef_space * plane_coef_time).T) + else: + return torch.sum((plane_coef_space * plane_coef_time), dim=0).unsqueeze(-1) + + def compute_appfeature(self, xyz_sampled): + # plane + line basis + coordinate_plane_space = torch.stack( + ( + xyz_sampled[:, self.matModeSpace[0]], + xyz_sampled[:, self.matModeSpace[1]], + xyz_sampled[:, self.matModeSpace[2]], + ) + ).view(3, -1, 1, 2) + + coordinate_plane_time = torch.stack( + ( + xyz_sampled[:, self.matModeTime[0]], + xyz_sampled[:, self.matModeTime[1]], + xyz_sampled[:, self.matModeTime[2]], + ) + ).view(3, -1, 1, 2) + + if self.static_only_components > 0: + coordinate_line = torch.stack( + ( + xyz_sampled[:, self.matModeTime[0][0]], + xyz_sampled[:, self.matModeTime[1][0]], + xyz_sampled[:, self.matModeTime[2][0]], + ) + ) + coordinate_line = torch.stack((torch.zeros_like(coordinate_line), coordinate_line), dim=-1).view( + 3, -1, 1, 2 + ) + + plane_coef_space, plane_coef_time = [], [] + + for idx_plane, (plane_space, plane_time) in enumerate(zip(self.app_plane_space, self.app_plane_time)): + if self.density_plane_space[idx_plane].shape[1] == 0: + continue + + cur_plane = F.grid_sample(plane_space, coordinate_plane_space[[idx_plane]], align_corners=True).view( + -1, *xyz_sampled.shape[:1] + ) + cur_time = F.grid_sample(plane_time, coordinate_plane_time[[idx_plane]], align_corners=True).view( + -1, *xyz_sampled.shape[:1] + ) + + if self.static_only_components > 0: + cur_line = F.grid_sample( + self.density_line[idx_plane], + coordinate_line[[idx_plane]], + align_corners=True, + ).view(-1, *xyz_sampled.shape[:1]) + plane_coef_time.append(cur_line) + + plane_coef_space.append(cur_plane) + plane_coef_time.append(cur_time) + + plane_coef_space, plane_coef_time = torch.cat(plane_coef_space), torch.cat(plane_coef_time) + return self.basis_mat((plane_coef_space * plane_coef_time).T) + + def feature2density(self, density_features: torch.Tensor, x: Dict[str, torch.Tensor]): + if self.densityMode == "Density": + density_features = DensityRender(density_features, x) + elif self.densityMode == "DensityLinear": + density_features = DensityLinearRender(density_features, x) + elif self.densityMode == "DensityFourier": + density_features = DensityFourierRender(density_features, x) + + density_features = density_features * x["weights"].view(density_features.shape[0]) + + if self.fea2denseAct == "softplus": + return F.softplus(density_features + self.density_shift) + elif self.fea2denseAct == "relu": + return F.relu(density_features) + elif self.fea2denseAct == "relu_abs": + return F.relu(torch.abs(density_features)) + + @torch.no_grad() + def up_sampling_VM(self, plane_coef_space, plane_coef_time, line_coef, res_target, numFrames): + + for i in range(len(self.matModeSpace)): + mat_id_space_0, mat_id_space_1 = self.matModeSpace[i] + mat_id_time_0, mat_id_time_1 = self.matModeTime[i] + + if self.density_plane_space[i].shape[1] == 0: + continue + + plane_coef_space[i] = torch.nn.Parameter( + F.interpolate( + plane_coef_space[i].data, + size=(res_target[mat_id_space_1], res_target[mat_id_space_0]), + mode="bilinear", + align_corners=True, + ) + ) + + plane_coef_time[i] = torch.nn.Parameter( + F.interpolate( + plane_coef_time[i].data, + size=(numFrames, res_target[mat_id_time_0]), + mode="bilinear", + align_corners=True, + ) + ) + + if self.static_only_components > 0: + line_coef[i] = torch.nn.Parameter( + F.interpolate( + line_coef[i].data, + size=(1, res_target[mat_id_time_0]), + mode="bilinear", + align_corners=True, + ) + ) + + return plane_coef_space, plane_coef_time + + @torch.no_grad() + def upsample_volume_grid(self, res_target): + self.app_plane_space, self.app_plane_time, self.app_line = self.up_sampling_VM( + self.app_plane_space, self.app_plane_time, self.app_line, res_target, self.num_keyframes + ) + self.density_plane_space, self.density_plane_time, self.density_line = self.up_sampling_VM( + self.density_plane_space, + self.density_plane_time, + self.density_line, + res_target, + self.num_keyframes, + ) + self.update_stepSize(res_target) + print(f"upsamping to {res_target}") + + @torch.no_grad() + def shrink(self, new_aabb): + print("====> shrinking ...") + + xyz_min, xyz_max = new_aabb + t_l, b_r = (xyz_min - self.aabb[0]) / self.units, (xyz_max - self.aabb[0]) / self.units + # print(new_aabb, self.aabb) + + # print(t_l, b_r,self.alphaMask.alpha_volume.shape) + t_l, b_r = torch.round(torch.round(t_l)).long(), torch.round(b_r).long() + 1 + b_r = torch.stack([b_r, self.gridSize]).amin(0) + + for i in range(len(self.matModeSpace)): + mat_id_space_0, mat_id_space_1 = self.matModeSpace[i] + mat_id_time_0, mat_id_time_1 = self.matModeTime[i] + + if self.density_plane_space[i].shape[1] == 0: + continue + + self.density_plane_space[i] = torch.nn.Parameter( + self.density_plane_space[i].data[ + ..., + t_l[mat_id_space_1] : b_r[mat_id_space_1], + t_l[mat_id_space_0] : b_r[mat_id_space_0], + ] + ) + self.density_plane_time[i] = torch.nn.Parameter( + self.density_plane_time[i].data[..., :, t_l[mat_id_time_0] : b_r[mat_id_time_0]] + ) + self.density_line[i] = torch.nn.Parameter( + self.density_line[i].data[..., :, t_l[mat_id_time_0] : b_r[mat_id_time_0]] + ) + self.app_plane_space[i] = torch.nn.Parameter( + self.app_plane_space[i].data[ + ..., + t_l[mat_id_space_1] : b_r[mat_id_space_1], + t_l[mat_id_space_0] : b_r[mat_id_space_0], + ] + ) + self.app_plane_time[i] = torch.nn.Parameter( + self.app_plane_time[i].data[..., :, t_l[mat_id_time_0] : b_r[mat_id_time_0]] + ) + self.app_line[i] = torch.nn.Parameter( + self.app_line[i].data[..., :, t_l[mat_id_time_0] : b_r[mat_id_time_0]] + ) + + if not torch.all(self.alphaMask.gridSize == self.gridSize): + t_l_r, b_r_r = t_l / (self.gridSize - 1), (b_r - 1) / (self.gridSize - 1) + correct_aabb = torch.zeros_like(new_aabb) + correct_aabb[0] = (1 - t_l_r) * self.aabb[0] + t_l_r * self.aabb[1] + correct_aabb[1] = (1 - b_r_r) * self.aabb[0] + b_r_r * self.aabb[1] + print("aabb", new_aabb, "\ncorrect aabb", correct_aabb) + new_aabb = correct_aabb + + newSize = b_r - t_l + self.aabb = new_aabb + self.update_stepSize((newSize[0], newSize[1], newSize[2])) + + @torch.no_grad() + def getDenseAlpha(self, gridSize=None): + samples = torch.stack( + torch.meshgrid( + torch.linspace(0, 1, gridSize[0]), + torch.linspace(0, 1, gridSize[1]), + torch.linspace(0, 1, gridSize[2]), + ), + -1, + ).to(self.device) + dense_xyz = self.aabb[0] * (1 - samples) + self.aabb[1] * samples + + # dense_xyz = dense_xyz + # print(self.stepSize, self.distance_scale*self.aabbDiag) + alpha = torch.zeros_like(dense_xyz[..., 0]) + time_scale_factor = (self.total_num_frames - 1) / self.total_num_frames + + for t in np.linspace(0, 1, self.total_num_frames): + cur_alpha = torch.zeros_like(alpha) + times = torch.ones_like(dense_xyz[..., -1:]) * t + base_times = torch.round((times * time_scale_factor).clamp(0.0, self.num_keyframes - 1)) * ( + 1.0 / time_scale_factor + ) + time_offset = times - base_times + + for i in range(gridSize[0]): + cur_xyz = dense_xyz[i].view(-1, 3) + cur_base_times = base_times[i].view(-1, 1) + cur_times = times[i].view(-1, 1) + cur_time_offset = time_offset[i].view(-1, 1) + + cur_xyzt = torch.cat([cur_xyz, cur_base_times], -1) + cur_alpha[i] = self.compute_alpha(cur_xyzt, 0.01, times=cur_times, time_offset=cur_time_offset).view( + (gridSize[1], gridSize[2]) + ) + + alpha = torch.maximum(alpha, cur_alpha) + + return alpha, dense_xyz + + def normalize_time_coord(self, time): + return (time * self.time_scale_factor + self.time_pixel_offset) * 2 - 1 + + def compute_alpha(self, xyzt_locs, length=0.01, times=None, time_offset=None): + sigma = torch.zeros(xyzt_locs.shape[:-1], device=xyzt_locs.device) + + xyzt_sampled = torch.cat( + [ + self.normalize_coord(xyzt_locs[..., :3]), + self.normalize_time_coord(xyzt_locs[..., -1:]), + ], + dim=-1, + ) + sigma_feature = self.compute_densityfeature(xyzt_sampled) + sigma = self.feature2density( + sigma_feature, + { + "frames_per_keyframe": self.frames_per_keyframe, + "num_keyframes": self.num_keyframes, + "total_num_frames": self.total_num_frames, + "times": times, + "time_offset": time_offset, + "weights": torch.ones_like(times), + }, + ) + + alpha = 1 - torch.exp(-sigma * length).view(xyzt_locs.shape[:-1]) + + return alpha + + def forward(self, x, render_kwargs): + batch_size = x["viewdirs"].shape[0] + + # Positions + times + nSamples = x["points"].shape[-1] // 3 + xyzt_sampled = torch.cat( + [ + x["points"].view(batch_size, -1, 3), + x["base_times"].view(batch_size, -1, 1), + ], + -1, + ) + + # Distances + distances = x["distances"].view(batch_size, -1) + deltas = torch.cat( + [ + distances[..., 1:] - distances[..., :-1], + 1e10 * torch.ones_like(distances[:, :1]), + ], + dim=1, + ) + # deltas = torch.ones_like(deltas) * (1.0 / deltas.shape[1]) + + # deltas = torch.cat( + # [ + # distances[..., 0:1], + # distances[..., 1:] - distances[..., :-1], + # ], + # dim=1, + # ) + + # Times & viewdirs + times = x["times"].view(batch_size, -1, 1) + time_offset = x["time_offset"].view(batch_size, -1, 1) + viewdirs = x["viewdirs"].view(batch_size, nSamples, 3) + + # Weights + weights = x["weights"].view(batch_size, -1, 1) + + # Mask out + ray_valid = self.valid_mask(xyzt_sampled[..., :3]) & (distances > 0) + + # Filter + if self.apply_filter_weights and self.cur_iter >= self.filter_wait_iters: + weights = weights.view(batch_size, -1) + min_weight = torch.topk(weights, self.filter_max_samples, dim=-1, sorted=False)[0].min(-1)[0].unsqueeze(-1) + + ray_valid = ray_valid & (weights >= (min_weight - 1e-8)) & (weights > self.filter_weight_thresh) + + weights = weights.view(batch_size, -1, 1) + weights = torch.ones_like(weights) # TODO: maybe remove + else: + weights = torch.ones_like(weights) # TODO: maybe remove + pass + + if self.alphaMask is not None and False: + alphas = self.alphaMask.sample_alpha(xyzt_sampled[..., :3][ray_valid]) + alpha_mask = alphas > 0 + ray_invalid = ~ray_valid + ray_invalid[ray_valid] |= ~alpha_mask + ray_valid = ~ray_invalid + + # Get densities + xyzt_sampled = torch.cat( + [ + self.normalize_coord(xyzt_sampled[..., :3]), + self.normalize_time_coord(xyzt_sampled[..., -1:]), + ], + dim=-1, + ) + sigma = xyzt_sampled.new_zeros(xyzt_sampled.shape[:-1], device=xyzt_sampled.device) + + if ray_valid.any(): + sigma_feature = self.compute_densityfeature(xyzt_sampled[ray_valid]) + + # Convert to density + valid_sigma = self.feature2density( + sigma_feature, + { + "frames_per_keyframe": self.frames_per_keyframe, + "num_keyframes": self.num_keyframes, + "total_num_frames": self.total_num_frames, + "times": times[ray_valid], + "time_offset": time_offset[ray_valid], + "weights": weights[ray_valid], + }, + ) + + # Update valid + assert valid_sigma is not None + assert ray_valid is not None + + sigma[ray_valid] = valid_sigma + + alpha, weight, bg_weight = raw2alpha(sigma, deltas * self.distance_scale) + app_mask = weight > self.rayMarch_weight_thres + + # if len(self.update_AlphaMask_list) == 0 or self.cur_iter < self.update_AlphaMask_list[0]: + # app_mask = torch.ones_like(app_mask) + + # Get colors + rgb = xyzt_sampled.new_zeros( + (xyzt_sampled.shape[0], xyzt_sampled.shape[1], 3), + device=xyzt_sampled.device, + ) + + if app_mask.any(): + app_features = self.compute_appfeature(xyzt_sampled[app_mask]) + + ## Transform colors + # if 'color_scale' in x: + # color_scale = x['color_scale'].view(rgb.shape[0], rgb.shape[1], 3) + # color_shift = x['color_shift'].view(rgb.shape[0], rgb.shape[1], 3) + # app_features = scale_shift_color_all(app_features, color_scale, color_shift) + # elif 'color_transform' in x: + # color_transform = x['color_transform'].view(rgb.shape[0], rgb.shape[1], 9) + # color_shift = x['color_shift'].view(rgb.shape[0], rgb.shape[1], 3) + # app_features = transform_color_all(app_features, color_transform, color_shift) + + valid_rgbs = self.renderModule( + xyzt_sampled[app_mask], + viewdirs[app_mask], + app_features, + { + "frames_per_keyframe": self.frames_per_keyframe, + "num_keyframes": self.num_keyframes, + "total_num_frames": self.total_num_frames, + "times": times[app_mask], + "time_offset": time_offset[app_mask], + }, + ) + assert valid_rgbs is not None + assert app_mask is not None + rgb[app_mask] = valid_rgbs + + # Transform colors + if "color_scale" in x: + color_scale = x["color_scale"].view(rgb.shape[0], rgb.shape[1], 3) + color_shift = x["color_shift"].view(rgb.shape[0], rgb.shape[1], 3) + rgb = scale_shift_color_all(rgb, color_scale, color_shift) + elif "color_transform" in x: + color_transform = x["color_transform"].view(rgb.shape[0], rgb.shape[1], 9) + color_shift = x["color_shift"].view(rgb.shape[0], rgb.shape[1], 3) + rgb = transform_color_all(rgb, color_transform, color_shift) + + # Over composite + acc_map = torch.sum(weight, -1) + rgb_map = torch.sum(weight[:, :, None] * rgb, -2) + + # White background + if (self.white_bg or (self.training and torch.rand((1,)) < 0.5)) and not self.black_bg: + rgb_map = rgb_map + (1.0 - acc_map[:, None]) + + # Transform colors + if "color_scale_global" in x: + rgb_map = scale_shift_color_one(rgb, rgb_map, x) + elif "color_transform_global" in x: + rgb_map = transform_color_one(rgb, rgb_map, x) + + # Clamp and return + if not self.training: + rgb_map = rgb_map.clamp(0, 1) + + # Other fields + outputs = {"rgb": rgb_map} + + fields = render_kwargs.get("fields", []) + no_over_fields = render_kwargs.get("no_over_fields", []) + pred_weights_fields = render_kwargs.get("pred_weights_fields", []) + + if len(fields) == 0: + return outputs + + if len(pred_weights_fields) > 0: + pred_weights = alpha2weights(weights[..., 0]) + + for key in fields: + if key == "render_weights": + outputs[key] = weight + elif key in no_over_fields: + outputs[key] = x[key].view(batch_size, -1) + elif key in pred_weights_fields: + outputs[key] = torch.sum( + pred_weights[..., None] * x[key].view(batch_size, nSamples, -1), + -2, + ) + else: + outputs[key] = torch.sum( + weight[..., None] * x[key].view(batch_size, nSamples, -1), + -2, + ) + + return outputs + + +tensorf_hybrid_dict = { + "tensor_vm_split_hybrid": TensorVMKeyframeHybrid, +} diff --git a/nlf/nets/tensorf_no_sample.py b/nlf/nets/tensorf_no_sample.py new file mode 100644 index 0000000..982dc4c --- /dev/null +++ b/nlf/nets/tensorf_no_sample.py @@ -0,0 +1,261 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2022 Anpei Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +import time +from typing import Dict + +import numpy as np +import torch +import torch.nn +import torch.nn.functional as F +from torch import autograd + +from utils.ray_utils import dot +from utils.sh_utils import eval_sh_bases +from utils.tensorf_utils import ( + N_to_reso, + alpha2weights, + cal_n_samples, + raw2alpha, + scale_shift_color_all, + scale_shift_color_one, + transform_color_all, + transform_color_one, +) + +from .tensorf_base import TensorVMSplit + + +class TensorVMNoSample(TensorVMSplit): + __constants__ = ["density_plane"] + + def __init__(self, in_channels, out_channels, cfg, **kwargs): + super().__init__(in_channels, out_channels, cfg, **kwargs) + + if kwargs["system"].cfg.dataset.collection in ["bulldozer"]: + self.black_bg = 1 + + if kwargs["system"].cfg.dataset.name in ["blender"]: + self.white_bg = 1 + + def compute_densityfeature(self, xyz_sampled): + coordinate_plane = torch.stack( + ( + xyz_sampled[:, self.matMode[0]], + xyz_sampled[:, self.matMode[1]], + xyz_sampled[:, self.matMode[2]], + ) + ).view(3, -1, 1, 2) + coordinate_line = torch.stack( + ( + xyz_sampled[:, self.vecMode[0]], + xyz_sampled[:, self.vecMode[1]], + xyz_sampled[:, self.vecMode[2]], + ) + ) + coordinate_line = torch.stack((torch.zeros_like(coordinate_line), coordinate_line), dim=-1).view(3, -1, 1, 2) + sigma_feature = torch.zeros((xyz_sampled.shape[0],), device=xyz_sampled.device) + + for idx_plane, (plane, line) in enumerate(zip(self.density_plane, self.density_line)): + plane_coef_point = F.grid_sample(plane, coordinate_plane[[idx_plane]], align_corners=True).view( + -1, xyz_sampled.shape[0] + ) + line_coef_point = F.grid_sample(line, coordinate_line[[idx_plane]], align_corners=True).view( + -1, xyz_sampled.shape[0] + ) + sigma_feature = sigma_feature + torch.sum(plane_coef_point * line_coef_point, dim=0) + + return sigma_feature + + def feature2density(self, density_features): + if self.fea2denseAct == "softplus": + return F.softplus(density_features + self.density_shift) + elif self.fea2denseAct == "relu": + return F.relu(density_features) + elif self.fea2denseAct == "relu_abs": + return F.relu(torch.abs(density_features)) + + def compute_appfeature(self, xyz_sampled): + # plane + line basis + coordinate_plane = torch.stack( + ( + xyz_sampled[:, self.matMode[0]], + xyz_sampled[:, self.matMode[1]], + xyz_sampled[:, self.matMode[2]], + ) + ).view(3, -1, 1, 2) + coordinate_line = torch.stack( + ( + xyz_sampled[:, self.vecMode[0]], + xyz_sampled[:, self.vecMode[1]], + xyz_sampled[:, self.vecMode[2]], + ) + ) + coordinate_line = torch.stack((torch.zeros_like(coordinate_line), coordinate_line), dim=-1).view(3, -1, 1, 2) + + plane_coef_point, line_coef_point = [], [] + for idx_plane, (plane, line) in enumerate(zip(self.app_plane, self.app_line)): + plane_coef_point.append( + F.grid_sample(plane, coordinate_plane[[idx_plane]], align_corners=True).view(-1, xyz_sampled.shape[0]) + ) + line_coef_point.append( + F.grid_sample(line, coordinate_line[[idx_plane]], align_corners=True).view(-1, xyz_sampled.shape[0]) + ) + + plane_coef_point, line_coef_point = torch.cat(plane_coef_point), torch.cat(line_coef_point) + return self.basis_mat((plane_coef_point * line_coef_point).T) + + def forward(self, x: Dict[str, torch.Tensor], render_kwargs: Dict[str, str]): + # Batch size + batch_size = x["viewdirs"].shape[0] + + # Positions + nSamples = x["points"].shape[-1] // 3 + xyz_sampled = x["points"].view(batch_size, nSamples, 3) + + # Distances + distances = x["distances"].view(batch_size, -1) + deltas = torch.cat( + [ + distances[..., 1:] - distances[..., :-1], + 1e10 * torch.ones_like(distances[:, :1]), + ], + dim=1, + ) + + # Viewdirs + viewdirs = x["viewdirs"].view(batch_size, nSamples, 3) + + # Weights + weights = x["weights"].view(batch_size, -1, 1) + + if "weights_shift" in x: + weights_shift = x["weights_shift"].view(batch_size, -1, 1) + + # Mask out + ray_valid = self.valid_mask(xyz_sampled) & (distances > 0) + + # Filter + if self.apply_filter_weights and self.cur_iter >= self.filter_wait_iters: + weights = weights.view(batch_size, -1) + min_weight = torch.topk(weights, self.filter_max_samples, dim=-1, sorted=False)[0].min(-1)[0].unsqueeze(-1) + + ray_valid = ray_valid & (weights >= (min_weight - 1e-8)) & (weights > self.filter_weight_thresh) + + weights = weights.view(batch_size, -1, 1) + else: + pass + + if self.alphaMask is not None and False: + # if self.alphaMask is not None: + alphas = self.alphaMask.sample_alpha(xyz_sampled[ray_valid]) + alpha_mask = alphas > 0 + ray_invalid = ~ray_valid + ray_invalid[ray_valid] |= ~alpha_mask + ray_valid = ~ray_invalid + + # Get densities + xyz_sampled = self.normalize_coord(xyz_sampled) + sigma = xyz_sampled.new_zeros(xyz_sampled.shape[:-1], device=xyz_sampled.device) + + if ray_valid.any(): + sigma_feature = self.compute_densityfeature(xyz_sampled[ray_valid]) + + # Convert to density + sigma_feature = sigma_feature * weights[ray_valid].view(sigma_feature.shape[0]) + + if "weights_shift" in x: + sigma_feature = sigma_feature + weights_shift[ray_valid].view(sigma_feature.shape[0]) + + valid_sigma = self.feature2density(sigma_feature) + + # Update valid + assert valid_sigma is not None + assert ray_valid is not None + + sigma[ray_valid] = valid_sigma + + alpha, weight, bg_weight = raw2alpha(sigma, deltas * self.distance_scale) + app_mask = weight > self.rayMarch_weight_thres + + # Get colors + rgb = xyz_sampled.new_zeros((xyz_sampled.shape[0], xyz_sampled.shape[1], 3), device=xyz_sampled.device) + + if app_mask.any(): + app_features = self.compute_appfeature(xyz_sampled[app_mask]) + + valid_rgbs = self.renderModule(xyz_sampled[app_mask], viewdirs[app_mask], app_features, {}) + rgb = valid_rgbs.new_zeros( # TODO: maybe remove + (xyz_sampled.shape[0], xyz_sampled.shape[1], 3), device=xyz_sampled.device + ) + assert valid_rgbs is not None + assert app_mask is not None + rgb[app_mask] = valid_rgbs + + # Transform colors + if "color_scale" in x: + color_scale = x["color_scale"].view(rgb.shape[0], rgb.shape[1], 3) + color_shift = x["color_shift"].view(rgb.shape[0], rgb.shape[1], 3) + rgb = scale_shift_color_all(rgb, color_scale, color_shift) + elif "color_transform" in x: + color_transform = x["color_transform"].view(rgb.shape[0], rgb.shape[1], 9) + color_shift = x["color_shift"].view(rgb.shape[0], rgb.shape[1], 3) + rgb = transform_color_all(rgb, color_transform, color_shift) + + # Over composite + acc_map = torch.sum(weight, -1) + rgb_map = torch.sum(weight[:, :, None] * rgb, -2) + + # White background + if (self.white_bg or (self.training and torch.rand((1,)) < 0.5)) and not self.black_bg: + rgb_map = rgb_map + (1.0 - acc_map[:, None]) + + # Transform colors + if "color_scale_global" in x: + rgb_map = scale_shift_color_one(rgb, rgb_map, x) + elif "color_transform_global" in x: + rgb_map = transform_color_one(rgb, rgb_map, x) + + # Clamp and return + if not self.training: + rgb_map = rgb_map.clamp(0, 1) + + # Other fields + outputs = {"rgb": rgb_map} + + fields = render_kwargs.get("fields", []) + no_over_fields = render_kwargs.get("no_over_fields", []) + pred_weights_fields = render_kwargs.get("pred_weights_fields", []) + + if len(fields) == 0: + return outputs + + if len(pred_weights_fields) > 0: + pred_weights = alpha2weights(weights[..., 0]) + + for key in fields: + if key == "render_weights": + outputs[key] = weight + elif key in no_over_fields: + outputs[key] = x[key].view(batch_size, -1) + elif key in pred_weights_fields: + outputs[key] = torch.sum( + pred_weights[..., None] * x[key].view(batch_size, nSamples, -1), + -2, + ) + else: + outputs[key] = torch.sum( + weight[..., None] * x[key].view(batch_size, nSamples, -1), + -2, + ) + + return outputs + + +tensorf_no_sample_dict = { + "tensor_vm_split_no_sample": TensorVMNoSample, +} diff --git a/nlf/nets/tensorf_reflect.py b/nlf/nets/tensorf_reflect.py new file mode 100644 index 0000000..922dc97 --- /dev/null +++ b/nlf/nets/tensorf_reflect.py @@ -0,0 +1,187 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2022 Anpei Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +import time +from typing import Dict + +import numpy as np +import torch +import torch.nn +import torch.nn.functional as F +from torch import autograd + +from utils.sh_utils import eval_sh_bases +from utils.tensorf_utils import N_to_reso, alpha2weights, cal_n_samples, raw2alpha + +from .tensorf_no_sample import TensorVMNoSample + + +class TensorVMReflect(TensorVMNoSample): + __constants__ = ["density_plane"] + + def __init__(self, in_channels, out_channels, cfg, **kwargs): + super().__init__(in_channels, out_channels, cfg, **kwargs) + + def compute_density_normal(self, points, weights): + has_grad = torch.is_grad_enabled() + points = points.view(-1, 3) + + # Calculate gradient with respect to points + with torch.enable_grad(): + points = points.requires_grad_(True) + + density = super().compute_densityfeature(points) + density = density * weights.view(density.shape[0]) + density = self.feature2density(density) + + normal = -autograd.grad( + density, + points, + torch.ones_like(density, device=points.device), + create_graph=has_grad, + retain_graph=has_grad, + only_inputs=True, + )[0] + + return density, torch.nn.functional.normalize(normal, dim=-1) + + def forward(self, x: Dict[str, torch.Tensor], render_kwargs: Dict[str, str]): + fields = render_kwargs.get("fields", []) + no_over_fields = render_kwargs.get("no_over_fields", []) + pred_weights_fields = render_kwargs.get("pred_weights_fields", []) + + # Batch size + batch_size = x["viewdirs"].shape[0] + + # Positions + nSamples = x["points"].shape[-1] // 3 + xyz_sampled = x["points"].view(batch_size, nSamples, 3) + + # Distances + distances = x["distances"].view(batch_size, -1) + deltas = torch.cat( + [ + distances[..., 1:] - distances[..., :-1], + 1e10 * torch.ones_like(distances[:, :1]), + ], + dim=1, + ) + # deltas = torch.cat( + # [ + # distances[..., 0:1], + # distances[..., 1:] - distances[..., :-1], + # ], + # dim=1, + # ) + + # Viewdirs + viewdirs = x["viewdirs"].view(batch_size, nSamples, 3) + + # Weights + weights = x["weights"].view(batch_size, -1, 1) + + # Mask out + ray_valid = self.valid_mask(xyz_sampled) & (distances > 0) + + if self.alphaMask is not None and False: + alphas = self.alphaMask.sample_alpha(xyz_sampled[ray_valid]) + alpha_mask = alphas > 0 + ray_invalid = ~ray_valid + ray_invalid[ray_valid] |= ~alpha_mask + ray_valid = ~ray_invalid + + assert ray_valid is not None + + # Get densities + xyz_sampled = self.normalize_coord(xyz_sampled) + sigma = xyz_sampled.new_zeros(xyz_sampled.shape[:-1], device=xyz_sampled.device) + + if "render_normal" in fields: + render_normal = xyz_sampled.new_zeros(xyz_sampled.shape, device=xyz_sampled.device) + + if ray_valid.any(): + valid_sigma, valid_render_normal = self.compute_density_normal( + xyz_sampled[ray_valid], weights[ray_valid] + ) + + # Update valid + assert valid_render_normal is not None + assert valid_sigma is not None + + render_normal[ray_valid] = valid_render_normal + x["render_normal"] = render_normal + + sigma[ray_valid] = valid_sigma + else: + if ray_valid.any(): + sigma_feature = self.compute_densityfeature(xyz_sampled[ray_valid]) + sigma_feature = sigma_feature * weights[ray_valid].view(sigma_feature.shape[0]) + valid_sigma = self.feature2density(sigma_feature) + + # Update valid + assert valid_sigma is not None + + sigma[ray_valid] = valid_sigma + + alpha, weight, bg_weight = raw2alpha(sigma, deltas * self.distance_scale) + app_mask = weight > self.rayMarch_weight_thres + + # if len(self.update_AlphaMask_list) == 0 or self.cur_iter < self.update_AlphaMask_list[0]: + # app_mask = torch.ones_like(app_mask) + + # Get colors + rgb = xyz_sampled.new_zeros((xyz_sampled.shape[0], xyz_sampled.shape[1], 3), device=xyz_sampled.device) + + if app_mask.any(): + app_features = self.compute_appfeature(xyz_sampled[app_mask]) + valid_rgbs = self.renderModule(xyz_sampled[app_mask], viewdirs[app_mask], app_features, {}) + assert valid_rgbs is not None + assert app_mask is not None + rgb[app_mask] = valid_rgbs + + # Over composite + acc_map = torch.sum(weight, -1) + rgb_map = torch.sum(weight[:, :, None] * rgb, -2) + + # White background + if self.white_bg or (self.training and torch.rand((1,)) < 0.5): + rgb_map = rgb_map + (1.0 - acc_map[:, None]) + + # Clamp and return + rgb_map = rgb_map.clamp(0, 1) + + # Other fields + outputs = {"rgb": rgb_map} + + if len(fields) == 0: + return outputs + + if len(pred_weights_fields) > 0: + pred_weights = alpha2weights(weights[..., 0]) + + for key in fields: + if key == "render_weights": + outputs[key] = weight + elif key in no_over_fields: + outputs[key] = x[key].view(batch_size, -1) + elif key in pred_weights_fields: + outputs[key] = torch.sum( + pred_weights[..., None] * x[key].view(batch_size, nSamples, -1), + -2, + ) + else: + outputs[key] = torch.sum( + weight[..., None] * x[key].view(batch_size, nSamples, -1), + -2, + ) + + return outputs + + +tensorf_reflect_dict = { + "tensor_vm_split_reflect": TensorVMReflect, +} diff --git a/nlf/param.py b/nlf/param.py new file mode 100644 index 0000000..2065516 --- /dev/null +++ b/nlf/param.py @@ -0,0 +1,417 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +import numpy as np +import torch +from torch import nn + +from utils.intersect_utils import intersect_axis_plane, intersect_cylinder, intersect_sphere + +from .contract import contract_dict + + +def identity(cfg, **kwargs): + def param(x): + return x + + return param + + +def take(cfg, **kwargs): + input_channels = cfg.input_channels + + def param(x): + return x[..., input_channels].view(*(x.shape[:-1] + (len(input_channels),))) + + return param + + +def position(cfg, **kwargs): + def pos(rays): + return rays[..., :3] + + return pos + + +def xy_param(cfg, *kwargs): + def param(rays): + rays = rays.view(rays.shape[0], -1, 6) + rays = torch.cat([rays[..., :2], rays[..., 3:5]], dim=-1) + return rays.view(rays.shape[0], -1) + + return param + + +def rays_param(cfg, **kwargs): + def param(rays): + rays = rays.view(rays.shape[0], -1, 6) + rays_o = rays[..., :3] + rays_d = torch.nn.functional.normalize(rays[..., 3:6] - rays_o, dim=-1) + rays = torch.cat([rays_o, rays_d], dim=-1) + return rays.view(rays.shape[0], -1) + + return param + + +class TwoPlaneParam(nn.Module): + def __init__(self, cfg, **kwargs): + + super().__init__() + + self.in_channels = cfg.in_channels if "in_channels" in cfg else 6 + self.out_channels = cfg.n_dims if "n_dims" in cfg else 4 + + self.st_multiplier = cfg.st_multiplier if "st_multiplier" in cfg else 1.0 + self.uv_multiplier = cfg.uv_multiplier if "uv_multiplier" in cfg else 1.0 + + self.near = cfg.near if "near" in cfg else -1.0 + self.far = cfg.far if "far" in cfg else 0.0 + + self.origin = torch.tensor(cfg.origin if "origin" in cfg else [0.0, 0.0, 0.0], device="cuda") + + # Local param + self.use_local_param = cfg.use_local_param if "use_local_param" in cfg else False + self.voxel_size = cfg.voxel_size if "voxel_size" in cfg else 1.0 + + def forward(self, rays): + # Offset z + rays_o, rays_d = rays[..., :3] - self.origin.unsqueeze(0), rays[..., 3:6] + + if self.use_local_param: + z_offset = torch.round(rays_o[..., 2:3] / self.voxel_size) * self.voxel_size + rays_o = rays_o - z_offset + + rays = torch.cat([rays_o, rays_d], -1) + + # Intersection distances + t1 = intersect_axis_plane(rays, self.near, 2) + + t2 = intersect_axis_plane(rays, self.far, 2) + + # Param rays + param_rays = torch.cat( + [ + rays[..., :2] + rays[..., 3:5] * t1.unsqueeze(-1), + rays[..., :2] + rays[..., 3:5] * t2.unsqueeze(-1), + ], + dim=-1, + ) + + return param_rays + + def set_iter(self, i): + self.cur_iter = i + + +def multi_plane_param(cfg, **kwargs): + # Intersect hyper-params + initial_z = cfg.initial_z if "initial_z" in cfg else -1.0 + end_z = cfg.end_z if "end_z" in cfg else 1.0 + z_channels = cfg.z_channels if "z_channels" in cfg else 8 + voxel_size = cfg.voxel_size if "voxel_size" in cfg else 1.0 + + depth_samples = torch.linspace(initial_z, end_z, z_channels, device="cuda") * voxel_size + + def param(rays): + t = intersect_axis_plane(rays[..., None, :], depth_samples[None], 2, exclude=False) + + param_rays = rays[..., None, :3] + rays[..., None, 3:6] * t.unsqueeze(-1) + + return param_rays.view(rays.shape[0], -1) + + return param + + +def calc_scale(r): + return 1.0 / torch.sqrt(((-r + 1) * (-r + 1) + r * r) + 1e-8) + + +def two_plane_matrix(cfg, **kwargs): + global_near = cfg.global_near if "global_near" in cfg else -1.0 + near = (cfg.near if "near" in cfg else 0.0) * cfg.voxel_size + far = (cfg.far if "far" in cfg else 1.0) * cfg.voxel_size + + def param(rays): + # Get near, far zs + start_z = rays[..., 2] + near_z = near + start_z + far_z = far + start_z + + # Intersect + isect_pts_1, _ = intersect_axis_plane(rays, near_z, 2, exclude=False) + + isect_pts_2, _ = intersect_axis_plane(rays, far_z, 2, exclude=False) + + # Scale factors + near_scale = calc_scale((near_z - global_near)) + far_scale = calc_scale((far_z - global_near)) + + param_rays = torch.cat( + [ + isect_pts_1[..., :2] * near_scale[..., None], + isect_pts_1[..., -1:], + isect_pts_2[..., :2] * far_scale[..., None], + isect_pts_2[..., -1:], + ], + dim=-1, + ) + + return param_rays + + return param + + +def two_plane_pos(cfg, **kwargs): + near = cfg.near if "near" in cfg else -1.0 + far = cfg.far if "far" in cfg else 0.0 + + pre_mult = 1.0 + post_mult = 1.0 + + if "voxel_size" in cfg: + near = cfg.near if "near" in cfg else -0.5 + far = cfg.far if "far" in cfg else 0.5 # noqa + + pre_mult = 1.0 / cfg.voxel_size + post_mult = cfg.voxel_size + + if "multiplier" in cfg: + near = cfg.near if "near" in cfg else 0.0 + far = cfg.far if "far" in cfg else 1.0 # noqa + + post_mult = cfg.multiplier + + def pos(rays): + rays = rays * pre_mult + + isect_pts, _ = intersect_axis_plane(rays, near, 2, exclude=False) + + return isect_pts * post_mult + + return pos + + +class PlueckerParam(nn.Module): + def __init__(self, cfg, **kwargs): + + super().__init__() + + self.in_channels = cfg.in_channels if "in_channels" in cfg else 6 + self.out_channels = cfg.n_dims if "n_dims" in cfg else 6 + + self.direction_multiplier = cfg.direction_multiplier if "direction_multiplier" in cfg else 1.0 + self.moment_multiplier = cfg.moment_multiplier if "moment_multiplier" in cfg else 1.0 + + self.origin = torch.tensor(cfg.origin if "origin" in cfg else [0.0, 0.0, 0.0], device="cuda") + + # Local param + self.use_local_param = cfg.use_local_param if "use_local_param" in cfg else False + self.voxel_size = torch.tensor(cfg.voxel_size if "voxel_size" in cfg else [1.0, 1.0, 1.0]).cuda() + + def forward(self, rays): + rays_o, rays_d = rays[..., :3] - self.origin.unsqueeze(0), rays[..., 3:6] + rays_d = torch.nn.functional.normalize(rays_d, p=2.0, dim=-1) + + if self.use_local_param: + origin = torch.round(rays_o / self.voxel_size.unsqueeze(0)) * self.voxel_size.unsqueeze(0) + rays_o = rays_o - origin + + m = torch.cross(rays_o, rays_d, dim=-1) + return torch.cat([rays_d * self.direction_multiplier, m * self.moment_multiplier], dim=-1) + + def set_iter(self, i): + self.cur_iter = i + + +class ContractPointsParam(nn.Module): + def __init__(self, cfg, **kwargs): + + super().__init__() + + self.param = ray_param_dict[cfg.param.fn](cfg.param) + + self.in_channels = self.param.in_channels + self.out_channels = self.param.out_channels + + self.contract_fn = contract_dict[cfg.contract.type](cfg.contract, **kwargs) + self.contract_start_channel = cfg.contract_start_channel + self.contract_end_channel = cfg.contract_end_channel + + def forward(self, rays): + param_rays = self.param(rays) + + return torch.cat( + [ + param_rays[..., : self.contract_start_channel], + self.contract_fn.contract_points( + param_rays[..., self.contract_start_channel : self.contract_end_channel] + ), + param_rays[..., self.contract_end_channel :], + ], + -1, + ) + + def set_iter(self, i): + self.cur_iter = i + self.param.set_iter(i) + + +def pluecker_pos(cfg): + def pos(rays): + rays_o, rays_d = rays[..., :3], rays[..., 3:6] + rays_d = torch.nn.functional.normalize(rays_d, p=2.0, dim=-1) + + m = torch.cross(rays_o, rays_d, dim=-1) + rays_o = torch.cross(rays_d, m, dim=-1) + + return rays_o + + return pos + + +def pluecker_pos_cylinder(cfg): + def pos(rays): + rays_o, rays_d = rays[..., :3], rays[..., 3:6] + rays_o = torch.cat([rays_o[..., 0:1], torch.zeros_like(rays[..., 1:2]), rays_o[..., 2:3]], -1) + rays_d = torch.cat([rays_d[..., 0:1], torch.zeros_like(rays[..., 1:2]), rays_d[..., 2:3]], -1) + rays_d = torch.nn.functional.normalize(rays_d, p=2.0, dim=-1) + + m = torch.cross(rays_o, rays_d, dim=-1) + rays_o = torch.cross(rays_d, m, dim=-1) + + return rays_o + + return pos + + +def spherical_param(cfg, **kwargs): + def param(rays): + isect_pts = intersect_sphere(rays, cfg.radius) / cfg.radius + + return torch.cat([isect_pts, rays[..., 3:6]], dim=-1) + + return param + + +def two_cylinder_param(cfg, **kwargs): + origin = torch.tensor(cfg.origin if "origin" in cfg else [0.0, 0.0, 0.0], device="cuda") + near = cfg.near if "near" in cfg else 1.0 + far = cfg.far if "far" in cfg else 2.0 + + def param(rays): + isect_pts_1, _ = intersect_cylinder(rays, origin[None], near, sort=False) + isect_pts_2, _ = intersect_cylinder(rays, origin[None], far, sort=False) + param_rays = torch.cat([isect_pts_1, isect_pts_2], dim=-1) + + return param_rays + + return param + + +time_param_dict = {"identity": identity} + + +class RayPlusTime(nn.Module): + def __init__(self, cfg, **kwargs): + + super().__init__() + + self.group = cfg.group if "group" in cfg else (kwargs["group"] if "group" in kwargs else "embedding") + self.ray_param_fn = ray_param_dict[cfg.ray_param.fn](cfg.ray_param) + self.time_param_fn = time_param_dict[cfg.time_param.fn](cfg.time_param) + + self.in_channels = cfg.in_channels if "in_channels" in cfg else 7 + self.out_channels = cfg.n_dims if "n_dims" in cfg else self.in_channels + + self.dummy_layer = nn.Linear(1, 1) + + def forward(self, x): + param_rays = self.ray_param_fn(x[..., :-1]) + param_times = self.time_param_fn(x[..., -1:]) + return torch.cat([param_rays, param_times], dim=-1) + + def set_iter(self, i): + self.cur_iter = i + + +class VoxelCenterParam(nn.Module): + def __init__(self, cfg, **kwargs): + + super().__init__() + + self.in_channels = cfg.in_channels if "in_channels" in cfg else 3 + self.out_channels = cfg.n_dims if "n_dims" in cfg else 3 + self.origin = torch.tensor(cfg.origin if "origin" in cfg else [0.0, 0.0, 0.0], device="cuda") + self.voxel_size = torch.tensor(cfg.voxel_size if "voxel_size" in cfg else [1.0, 1.0, 1.0]).cuda() + + def forward(self, x): + x = x - self.origin.unsqueeze(0) + origin = torch.round(x / self.voxel_size.unsqueeze(0)) * self.voxel_size.unsqueeze(0) + return origin + + def set_iter(self, i): + self.cur_iter = i + + +class ZSliceParam(nn.Module): + def __init__(self, cfg, **kwargs): + + super().__init__() + + self.in_channels = cfg.in_channels if "in_channels" in cfg else 1 + self.out_channels = cfg.n_dims if "n_dims" in cfg else 1 + self.voxel_size = cfg.voxel_size if "voxel_size" in cfg else 1.0 + + def forward(self, x): + return torch.round(x / self.voxel_size) * self.voxel_size + + def set_iter(self, i): + self.cur_iter = i + + +ray_param_dict = { + "identity": identity, + "take": take, + "pluecker": PlueckerParam, + "position": position, + "spherical": spherical_param, + "xy": xy_param, + "rays": rays_param, + "two_plane": TwoPlaneParam, + "multi_plane": multi_plane_param, + "two_plane_matrix": two_plane_matrix, + "two_cylinder": two_cylinder_param, + "ray_plus_time": RayPlusTime, + "voxel_center": VoxelCenterParam, + "z_slice": ZSliceParam, + "contract_points": ContractPointsParam, +} + + +ray_param_pos_dict = { + "pluecker": pluecker_pos, + "two_plane": two_plane_pos, +} + + +class RayParam(nn.Module): + def __init__(self, cfg, **kwargs): + + super().__init__() + + self.group = cfg.group if "group" in cfg else (kwargs["group"] if "group" in kwargs else "embedding") + + self.ray_param_fn = ray_param_dict[cfg.fn](cfg, **kwargs) + self.in_channels = cfg.in_channels if "in_channels" in cfg else 6 + self.out_channels = cfg.n_dims if "n_dims" in cfg else self.in_channels + + self.dummy_layer = nn.Linear(1, 1) + + def forward(self, x): + return self.ray_param_fn(x) + + def set_iter(self, i): + self.cur_iter = i diff --git a/nlf/pe.py b/nlf/pe.py new file mode 100644 index 0000000..f019c27 --- /dev/null +++ b/nlf/pe.py @@ -0,0 +1,423 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +import numpy as np +import torch +from torch import nn + + +class IdentityPE(nn.Module): + def __init__(self, in_channels, *args, **kwargs): + + super().__init__() + + self.in_channels = in_channels + self.out_channels = in_channels + + def forward(self, x): + return x + + def set_iter(self, i): + self.cur_iter = i + + +class BasicPE(nn.Module): + def __init__( + self, + in_channels, + cfg, + ): + super().__init__() + + self.n_freqs = cfg.n_freqs + self.cur_iter = 0 + + self.funcs = [torch.sin, torch.cos] + self.freq_multiplier = cfg.freq_multiplier if "freq_multiplier" in cfg else 2.0 + self.freq_bands = (self.freq_multiplier ** torch.linspace(1, cfg.n_freqs, cfg.n_freqs)).cuda() + + self.in_channels = in_channels + self.out_channels = in_channels * (len(self.funcs) * cfg.n_freqs + 1) + + self.dummy_layer = nn.Linear(1, 1) + + def forward(self, x): + # Input shape + input_shape = x.shape + + # Reshape + x = x.view(-1, x.shape[-1]) + batch_size = x.shape[0] + + # Get PE + out = [x] + + if self.n_freqs > 0: + cur_x = (self.freq_bands[None, None] * x[..., None]).view(batch_size, -1) + out += [torch.sin(cur_x)] + out += [torch.cos(cur_x)] + + return torch.cat(out, -1) + + def set_iter(self, i): + self.cur_iter = i + + +class BasicWindowedPE(nn.Module): + def __init__( + self, + in_channels, + cfg, + ): + super().__init__() + + self.n_freqs = cfg.n_freqs + self.cur_iter = 0 + self.wait_iters = cfg.wait_iters + self.max_freq_iter = float(cfg.max_freq_iter) + self.exclude_identity = cfg.exclude_identity if "exclude_identity" in cfg else False + + self.funcs = [torch.sin, torch.cos] + self.freq_multiplier = cfg.freq_multiplier if "freq_multiplier" in cfg else 2.0 + self.freq_bands = self.freq_multiplier ** torch.linspace(1, cfg.n_freqs, cfg.n_freqs) + + self.in_channels = in_channels + if self.exclude_identity: + self.out_channels = in_channels * (len(self.funcs) * cfg.n_freqs) + else: + self.out_channels = in_channels * (len(self.funcs) * cfg.n_freqs + 1) + + self.dummy_layer = nn.Linear(1, 1) + + def weight(self, j): + if self.max_freq_iter == 0: + return 1.0 + elif self.cur_iter < self.wait_iters: + return 0.0 + elif self.cur_iter > self.max_freq_iter: + return 1.0 + + cur_iter = self.cur_iter - self.wait_iters + alpha = (cur_iter / self.max_freq_iter) * self.n_freqs + return (1.0 - np.cos(np.pi * np.clip(alpha - j, 0.0, 1.0))) / 2 + + def forward(self, x): + out = [] + + if not self.exclude_identity: + out += [x] + + for j, freq in enumerate(self.freq_bands): + for func in self.funcs: + out += [self.weight(j) * func(freq * x)] + + return torch.cat(out, -1) + + def set_iter(self, i): + self.cur_iter = i + + +class WindowedPE(nn.Module): + def __init__( + self, + in_channels, + cfg, + ): + super().__init__() + + self.in_channels = in_channels + self.funcs = [torch.sin, torch.cos] + + self.cur_iter = 0 + self.wait_iters = cfg.wait_iters + self.max_freq_iter = float(cfg.max_freq_iter) + + # PE + self.n_freqs = cfg.n_freqs + self.freq_multiplier = cfg.freq_multiplier if "freq_multiplier" in cfg else 2.0 + self.freq_bands = self.freq_multiplier ** torch.linspace(1.0, cfg.n_freqs, cfg.n_freqs) + + # What to do about identity + self.base_multiplier = cfg.base_multiplier if "base_multiplier" in cfg else 1.0 + + self.ceil = cfg.ceil if "ceil" in cfg else False + self.exclude_identity = cfg.exclude_identity if "exclude_identity" in cfg else False + self.window_identity = 1 if "window_identity" in cfg and cfg.window_identity else 0 + + if self.exclude_identity: + self.out_channels = in_channels * (len(self.funcs) * cfg.n_freqs) + else: + self.out_channels = in_channels * (len(self.funcs) * cfg.n_freqs + 1) + + # Windowing + if self.max_freq_iter > 0 or "window_iters" in cfg: + self.window_after = self.max_freq_iter / self.n_freqs + + if "window_iters" in cfg: + self.window_iters = cfg.window_iters + self.max_freq_iter = np.max(cfg.window_iters) + elif self.window_identity != 0: + self.window_iters = [(self.wait_iters, self.window_after + self.wait_iters)] + [ + (self.window_after * i + self.wait_iters, self.window_after * (i + 1) + self.wait_iters) + for i in range(1, self.n_freqs + 1) + ] + self.max_freq_iter = (self.n_freqs + 1) * self.window_after + else: + self.window_iters = [ + (self.window_after * i + self.wait_iters, self.window_after * (i + 1) + self.wait_iters) + for i in range(0, self.n_freqs) + ] + + self.dummy_layer = nn.Linear(1, 1) + + def weight(self, j): + cur_iter = self.cur_iter - self.wait_iters + + if j < 0: + return 1.0 + elif cur_iter < 0: + return 0.0 + elif self.max_freq_iter == 0: + return 1.0 + elif self.cur_iter > self.max_freq_iter: + return 1.0 + elif (self.window_iters[j][1] - self.window_iters[j][0]) == 0: + if self.cur_iter >= self.window_iters[j][0]: + return 1.0 + else: + return 0.0 + + alpha = (cur_iter - self.window_iters[j][0]) / float(self.window_iters[j][1] - self.window_iters[j][0]) + + if self.ceil: + return np.ceil((1.0 - np.cos(np.pi * np.clip(alpha, 0.0, 1.0))) / 2) + else: + return (1.0 - np.cos(np.pi * np.clip(alpha, 0.0, 1.0))) / 2 + + def forward(self, x): + out = [] + + if not self.exclude_identity: + # out = [self.base_multiplier * self.weight(-1 + self.window_identity) * x] + out = [x] + + for j, freq in enumerate(self.freq_bands): + for func in self.funcs: + out += [self.weight(j + self.window_identity) * func(self.base_multiplier * freq * x)] + + return torch.cat(out, -1) + + def set_iter(self, i): + self.cur_iter = i + + +class SelectPE(nn.Module): + def __init__( + self, + in_channels, + cfg, + ): + super().__init__() + + self.start_channel = cfg.start_channel if "start_channel" in cfg else 0 + self.in_channels = in_channels - self.start_channel + self.select_channels = cfg.select_channels + self.discard = cfg.discard if "discard" in cfg else False + + self.pe = WindowedPE(self.select_channels, cfg) + + if self.discard: + self.out_channels = self.pe.out_channels + else: + self.out_channels = (self.in_channels - self.select_channels) + self.pe.out_channels + + def forward(self, x): + out_x = self.pe(x[..., self.start_channel : self.start_channel + self.select_channels]) + + if not self.discard: + out_x = torch.cat([out_x, x[..., self.select_channels :]], -1) + + return out_x + + def set_iter(self, i): + self.pe.set_iter(i) + + +class RandomPE(nn.Module): + def __init__( + self, + in_channels, + cfg, + ): + super().__init__() + + self.n_freqs = cfg.n_freqs + self.sigma = cfg.sigma + self.funcs = [torch.sin, torch.cos] + + self.in_channels = in_channels + self.out_channels = len(self.funcs) * cfg.n_freqs + + self.embedding_matrix = (torch.randn((self.n_freqs, self.in_channels)) * self.sigma).cuda() + + def forward(self, x): + # Convert to correct device + embedding_matrix = self.embedding_matrix.type_as(x) + + out = [] + raw = (embedding_matrix @ x.permute(1, 0)).permute(1, 0) + + for func in self.funcs: + out += [func(raw)] + + return torch.cat(out, -1) + + def set_iter(self, i): + self.cur_iter = i + + +class WindowedRandomPE(nn.Module): + def __init__( + self, + in_channels, + cfg, + ): + super().__init__() + + self.n_freqs = cfg.n_freqs + self.sigma = cfg.sigma + self.funcs = [torch.sin, torch.cos] + + self.in_channels = in_channels + self.out_channels = len(self.funcs) * cfg.n_freqs + self.in_channels + + # Embedding matrix + self.embedding_matrix = (torch.randn((self.in_channels, self.n_freqs)) * self.sigma).cuda() + + self.embedding_mag = torch.norm(self.embedding_matrix, dim=0) + sort_idx = torch.argsort(self.embedding_mag, dim=0) + + self.embedding_matrix = torch.gather(self.embedding_matrix, -1, sort_idx[None].repeat(self.in_channels, 1)) + + self.embedding_mag = torch.norm(self.embedding_matrix, dim=0) + + # Windowing + self.cur_iter = 0 + self.wait_iters = cfg.wait_iters + self.max_freq_iter = float(cfg.max_freq_iter) + + self.ceil = cfg.ceil if "ceil" in cfg else False + self.window_identity = 1 if "window_identity" in cfg and cfg.window_identity else 0 + + # Windowing + if self.max_freq_iter > 0 or "window_iters" in cfg: + self.window_after = self.max_freq_iter / self.n_freqs + + if "window_iters" in cfg: + self.window_iters = cfg.window_iters + self.max_freq_iter = np.max(cfg.window_iters) + elif self.window_identity != 0: + self.window_iters = [(self.wait_iters, self.window_after + self.wait_iters)] + [ + (self.window_after * i + self.wait_iters, self.window_after * (i + 1) + self.wait_iters) + for i in range(1, self.n_freqs + 1) + ] + self.max_freq_iter = (self.n_freqs + 1) * self.window_after + else: + self.window_iters = [ + (self.window_after * i + self.wait_iters, self.window_after * (i + 1) + self.wait_iters) + for i in range(0, self.n_freqs) + ] + + def weight(self, j): + cur_iter = self.cur_iter - self.wait_iters + + if cur_iter < 0: + return 0.0 + elif j < 0: + return 1.0 + elif self.max_freq_iter == 0: + return 1.0 + elif self.cur_iter > self.max_freq_iter: + return 1.0 + elif (self.window_iters[j][1] - self.window_iters[j][0]) == 0: + if self.cur_iter >= self.window_iters[j][0]: + return 1.0 + else: + return 0.0 + + alpha = (cur_iter - self.window_iters[j][0]) / float(self.window_iters[j][1] - self.window_iters[j][0]) + + if self.ceil: + return np.ceil((1.0 - np.cos(np.pi * np.clip(alpha, 0.0, 1.0))) / 2) + else: + return (1.0 - np.cos(np.pi * np.clip(alpha, 0.0, 1.0))) / 2 + + def forward(self, x): + # Convert to correct device + embedding_matrix = self.embedding_matrix.type_as(x) + + raw = x @ embedding_matrix + + out = [self.weight(-1 + self.window_identity) * x] + + for j in range(raw.shape[-1]): + for func in self.funcs: + out += [self.weight(j + self.window_identity) * func(raw[..., j : j + 1])] + + return torch.cat(out, -1) + + def set_iter(self, i): + self.cur_iter = i + + +class LearnablePE(nn.Module): + def __init__( + self, + in_channels, + cfg, + ): + super().__init__() + + self.n_freqs = cfg.n_freqs + self.sigma = cfg.sigma + self.funcs = [torch.sin, torch.cos] + + self.in_channels = in_channels + self.out_channels = len(self.funcs) * cfg.n_freqs + self.embedding_layer = nn.Linear(in_channels, cfg.n_freqs) + + self.embedding_matrix = (torch.randn((self.n_freqs, self.in_channels)) * self.sigma).cuda() + self.embedding_matrix = nn.Parameter(self.embedding_matrix, requires_grad=True) + + self.embedding_bias = (torch.randn((1, self.n_freqs)) * self.sigma).cuda() + self.embedding_bias = nn.Parameter(self.embedding_bias, requires_grad=True) + + def forward(self, x): + # Convert to correct device + embedding_matrix = self.embedding_matrix.type_as(x) + embedding_bias = self.embedding_bias.type_as(x) + + out = [] + raw = (embedding_matrix @ x.permute(1, 0)).permute(1, 0) + embedding_bias + + for func in self.funcs: + out += [func(raw)] + + return torch.cat(out, -1) + + def set_iter(self, i): + self.cur_iter = i + + +pe_dict = { + "basic": BasicPE, + "windowed": WindowedPE, + "identity": IdentityPE, + "random": RandomPE, + "windowed_random": WindowedRandomPE, + "learnable": LearnablePE, + "select": SelectPE, +} diff --git a/nlf/regularizers/__init__.py b/nlf/regularizers/__init__.py new file mode 100644 index 0000000..1930995 --- /dev/null +++ b/nlf/regularizers/__init__.py @@ -0,0 +1,36 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +from .coarse import CoarseRegularizer +from .fourier import FourierRegularizer +from .geometry import FlowRegularizer, GeometryFeedbackRegularizer, GeometryRegularizer, RenderWeightRegularizer +from .point import PointRegularizer +from .ray_density import RayDensityRegularizer, SimpleRayDensityRegularizer +from .teacher import BlurryTeacherRegularizer, TeacherModelRegularizer, TeacherRegularizer +from .tensor import TensorTV +from .tensorf import TensoRF +from .voxel_sparsity import VoxelSparsityRegularizer +from .warp import WarpLevelSetRegularizer, WarpRegularizer + +regularizer_dict = { + "fourier": FourierRegularizer, + "coarse": CoarseRegularizer, + "teacher": TeacherRegularizer, + "teacher_model": TeacherModelRegularizer, + "blurry_teacher": BlurryTeacherRegularizer, + "voxel_sparsity": VoxelSparsityRegularizer, + "warp": WarpRegularizer, + "warp_level": WarpLevelSetRegularizer, + "point": PointRegularizer, + "geometry": GeometryRegularizer, + "geometry_feedback": GeometryFeedbackRegularizer, + "flow": FlowRegularizer, + "render_weight": RenderWeightRegularizer, + "tensor_tv": TensorTV, + "tensorf": TensoRF, + "ray_density": RayDensityRegularizer, + "simple_ray_density": SimpleRayDensityRegularizer, +} diff --git a/nlf/regularizers/base.py b/nlf/regularizers/base.py new file mode 100644 index 0000000..cd58287 --- /dev/null +++ b/nlf/regularizers/base.py @@ -0,0 +1,167 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +import numpy as np +import torch +from torch import nn + +from losses import loss_dict +from nlf.rendering import render_chunked +from utils.ray_utils import get_weight_map + + +class BaseRegularizer(nn.Module): + def __init__(self, system, cfg): + super().__init__() + + self.cfg = cfg + self.net_chunk = cfg.net_chunk if "net_chunk" in cfg else 32768 + self.batch_size = cfg.batch_size if "batch_size" in cfg else 4096 + self.weight = cfg.weight if "weight" in cfg else 0.0 + + self.use_inp_freq = cfg.use_inp_freq if "use_inp_freq" in cfg else 0 + self.wait_iters = cfg.wait_iters if "wait_iters" in cfg else 0 + self.warmup_iters = cfg.warmup_iters if "warmup_iters" in cfg else 0 + self.stop_iters = cfg.stop_iters if "stop_iters" in cfg else float("inf") + + ## (Hack) Prevent from storing system variables + self.systems = [system] + + ## Losses + self.build_losses() + + def build_losses(self): + self.loss_fns = {} + + for key in self.cfg.keys(): + if "loss" in key: + loss_fn = loss_dict[self.cfg[key].type](self.cfg[key]) + self.loss_fns[key] = loss_fn + + if "weight_map" in key: + for attr in self.cfg[key].keys(): + if attr == "angle_std": + angle_std = float(np.radians(self.cfg[key].angle_std)) + self.cfg[key].angle_std = angle_std + + def warming_up(self): + return self.cur_iter < self.warmup_iters + + def _do_loss(self, name): + return ( + self.cfg[name].wait_iters != "inf" + and self.cur_iter >= self.cfg[name].wait_iters + and self.cfg[name].weight > 0 + ) + + def _loss_fn(self, name, *args): + return self.loss_fns[name](*args) * self.cfg[name].weight + + def get_system(self): + return self.systems[0] + + def get_dataset(self): + system = self.get_system() + + if "dataset" in self.cfg: + return system.trainer.datamodule.regularizer_datasets[self.cfg.type] + else: + return None + + def to_ndc(self, rays): + dataset = self.get_dataset() + return dataset.to_ndc(rays) + + def get_batch(self, train_batch, batch_idx, apply_ndc=False): + system = self.get_system() + dataset = self.get_dataset() + batch = {} + + use_inp = self.use_inp_freq == 0 or ( + (float(self.use_inp_freq) != float("inf")) and (batch_idx % self.use_inp_freq == 0) + ) + + if dataset is not None and not use_inp: + ## Use regularizer dataset + if "jitter" in self.cfg: + batch = dataset.get_batch(batch_idx, self.batch_size, self.cfg.jitter) + else: + batch = dataset.get_batch(batch_idx, self.batch_size, None) + + ## Convert to correct device + for k in batch.keys(): + batch[k] = batch[k].type_as(train_batch["coords"]) + + else: + ## Use training dataset + batch["coords"] = train_batch["coords"][: self.batch_size] + batch["rgb"] = train_batch["rgb"][: self.batch_size] + + ## Convert to correct device + for k in batch.keys(): + batch[k] = batch[k].type_as(train_batch["coords"]) + + return batch + + def forward(self, x): + return x + + def run_chunked(self, x): + return render_chunked(x, self, {}, chunk=self.cfg.ray_chunk) + + def get_weight_map( + self, + rays, + jitter_rays, + name, + ): + with torch.no_grad(): + return get_weight_map( + rays, + jitter_rays, + self.cfg[name], + softmax=False, + ) + + def _loss(self, batch, batch_idx): + return 0.0 + + def loss(self, batch, batch_results, batch_idx): + if self.cur_iter < 0: + return 0.0 + elif self.cur_iter >= self.stop_iters: + return 0.0 + + return self._loss(batch, batch_results, batch_idx) + + def loss_weight(self): + system = self.get_system() + + if isinstance(self.weight, float): + return self.weight + elif self.weight.type == "exponential_decay": + weight_num_iters = ( + self.weight.num_epochs + * len(system.trainer.datamodule.train_dataset) + // system.trainer.datamodule.cur_batch_size + ) + exponent = self.cur_iter / weight_num_iters + return self.weight.start * np.power(self.weight.decay, exponent) + else: + return self.weight + + def set_iter(self, i): + self.cur_iter = i - self.wait_iters + + def validation_video(self, batch, batch_idx): + return {} + + def validation_image(self, batch, batch_idx): + return {} + + @property + def render_kwargs(self): + return {} diff --git a/nlf/regularizers/coarse.py b/nlf/regularizers/coarse.py new file mode 100644 index 0000000..f31419e --- /dev/null +++ b/nlf/regularizers/coarse.py @@ -0,0 +1,36 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +from losses import loss_dict + +from .base import BaseRegularizer + + +class CoarseRegularizer(BaseRegularizer): + def __init__(self, system, cfg): + super().__init__(system, cfg) + + self.loss_fn = loss_dict[self.cfg.loss.type]() + + def _loss(self, train_batch, batch_results, batch_idx): + system = self.get_system() + + if self.cur_iter >= self.cfg.weight.stop_iters: + return 0.0 + + # Get inputs + rays = train_batch["coords"] + rgb = train_batch["rgb"] + + # Loss + results = system(rays, coarse=True) + pred_rgb = results["rgb"] + + loss = self.loss_fn(pred_rgb, rgb) + + print("Coarse loss:", loss) + + return loss diff --git a/nlf/regularizers/fourier.py b/nlf/regularizers/fourier.py new file mode 100644 index 0000000..dc79904 --- /dev/null +++ b/nlf/regularizers/fourier.py @@ -0,0 +1,52 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +import torch + +from datasets.fourier import fft_rgb + +from .base import BaseRegularizer + + +class FourierRegularizer(BaseRegularizer): + def __init__(self, system, cfg): + super().__init__(system, cfg) + + self.range = cfg.range + self.use_absolute = "complex" not in cfg.fourier_loss.type + + def loss(self, train_batch, batch_results, batch_idx): + system = self.get_system() + dataset = self.get_dataset() + + ## Get rays + all_rgb_fft = dataset.all_rgb_fft.to(train_batch["rays"].device) + + ## Query + rays = dataset.get_random_rays(self.cfg.range).type_as(train_batch["rays"]) + rgb = system(rays)["rgb"].view(1, system.img_wh[1], system.img_wh[0], 3) + + #### Losses #### + + all_losses = {loss: 0.0 for loss in self.loss_fns.keys()} + + if self._do_loss("fourier_loss"): + rgb_fft = fft_rgb(rgb) + + if self.use_absolute: + rgb_fft = torch.abs(rgb_fft) + all_rgb_fft = torch.abs(all_rgb_fft) + + all_losses["fourier_loss"] = self._loss_fn("fourier_loss", rgb_fft, all_rgb_fft) + + ## Total loss + total_loss = 0.0 + + for name in all_losses.keys(): + print(name + ":", all_losses[name]) + total_loss += all_losses[name] + + return total_loss diff --git a/nlf/regularizers/geometry.py b/nlf/regularizers/geometry.py new file mode 100644 index 0000000..fbb1e7b --- /dev/null +++ b/nlf/regularizers/geometry.py @@ -0,0 +1,274 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +import torch +import torch.nn as nn + +from nlf.contract import contract_dict +from utils.ray_utils import dot, from_ndc, reflect + +from .base import BaseRegularizer + + +class GeometryRegularizer(BaseRegularizer): + def __init__(self, system, cfg): + super().__init__(system, cfg) + + # Setup losses + self.cfg = cfg + + # Variables + self.fields = list(cfg.fields) + + # Origin + self.origin = torch.tensor(cfg.origin if "origin" in cfg else [0.0, 0.0, 0.0], device="cuda") + + # Contract function + if "contract" in cfg: + self.contract_fn = contract_dict[cfg.contract.type](cfg.contract, system=system) + else: + self.contract_fn = contract_dict["identity"]({}) + + # How many points to use + self.num_points = cfg.num_points if "num_points" in cfg else -1 + + def _loss(self, batch, outputs, batch_idx): + # Get coords + rays = batch["coords"] + rays = torch.clone(rays.view(-1, rays.shape[-1])) + + # Render points + pred_points = outputs[self.fields[0]] + pred_points = pred_points.view(pred_points.shape[0], -1, 3) + + pred_distance = outputs[self.fields[1]] + pred_distance = pred_distance.view(pred_points.shape[0], -1) + + # Get ground truth points + gt_depth = batch["depth"] + + rays_o, rays_d = rays[..., :3] - self.origin[None], rays[..., 3:6] + rays_d = torch.nn.functional.normalize(rays_d, p=2.0, dim=-1) + gt_points = self.contract_fn.contract_points(rays_o + gt_depth * rays_d) + + # Compute mask + mask = (gt_depth != 0.0) & (pred_distance != 0.0) + + # Loss + diff = torch.norm(pred_points - gt_points.unsqueeze(1), dim=-1) * mask.float() + + if self.num_points > 0: + diff = torch.sort(diff, dim=-1)[0][..., : self.num_points] + + loss = torch.mean(diff) + return loss + + @property + def render_kwargs(self): + return { + "fields": self.fields, + "no_over_fields": self.fields, + } + + +class GeometryFeedbackRegularizer(BaseRegularizer): + def __init__(self, system, cfg): + super().__init__(system, cfg) + + # Setup losses + self.cfg = cfg + + # Variables + self.student_fields = list(cfg.student_fields) + self.teacher_fields = list(cfg.teacher_fields) + self.sizes = list(cfg.sizes) if "sizes" in cfg else [3 for s in self.student_fields] + self.weights = list(cfg.weights) if "weights" in cfg else [1.0 for s in self.student_fields] + + # How many points to use + self.num_points = cfg.num_points if "num_points" in cfg else -1 + + def _loss(self, batch, outputs, batch_idx): + # Get coords + rays = batch["coords"] + rays = torch.clone(rays.view(-1, rays.shape[-1])) + + # Weights + render_weights = outputs["render_weights"] + + # Total loss + total_loss = 0.0 + + for idx, loss_weight in enumerate(self.weights): + # Student outputs + student_points = outputs[self.student_fields[idx]] + student_points = student_points.view(student_points.shape[0], -1, 1, self.sizes[-1]) + + # Teacher outputs + teacher_points = outputs[self.teacher_fields[idx]] + + if not self.teacher_fields[idx] == "render_normal": + teacher_points = teacher_points.detach() + else: + pass + + teacher_points = teacher_points.view(teacher_points.shape[0], student_points.shape[1], -1, self.sizes[-1]) + + # Loss + if not self.teacher_fields[idx] == "render_normal": + cur_render_weights = render_weights.detach() + else: + cur_render_weights = render_weights.detach() + # cur_render_weights = render_weights + + cur_render_weights = cur_render_weights.view(cur_render_weights.shape[0], student_points.shape[1], -1) + + # Special case normal + if self.teacher_fields[idx] == "render_normal": + viewdirs = outputs["viewdirs"] + + diff = 1.0 - dot(student_points, teacher_points) + loss_match = (diff * cur_render_weights).sum((-2, -1)).mean() + + dot_dirs_normal = dot( + student_points.view(student_points.shape[0], -1, 3), + viewdirs.view(student_points.shape[0], -1, 3), + keepdim=True, + ) + loss_penalty = ( + ( + torch.square(torch.maximum(dot_dirs_normal, torch.zeros_like(dot_dirs_normal))) + * cur_render_weights + ) + .sum((-2, -1)) + .mean() + ) + + loss = loss_match * loss_weight[0] + loss_penalty * loss_weight[1] + else: + diff = torch.square(student_points - teacher_points).sum(-1) + # diff = torch.norm(student_points - teacher_points, dim=-1) + # diff = torch.abs(student_points - teacher_points).sum(-1) + diff = (diff * cur_render_weights).sum((-2, -1)) + + loss = torch.mean(diff) * loss_weight + + total_loss = total_loss + loss + + return total_loss + + @property + def render_kwargs(self): + return { + "fields": self.student_fields + self.teacher_fields + ["render_weights", "viewdirs"], + "no_over_fields": self.student_fields + self.teacher_fields + ["render_weights", "viewdirs"], + } + + +class FlowRegularizer(BaseRegularizer): + def __init__(self, system, cfg): + super().__init__(system, cfg) + + # Setup losses + self.cfg = cfg + + # Variables + self.fields = list(cfg.fields) + + # Origin + self.origin = torch.tensor(cfg.origin if "origin" in cfg else [0.0, 0.0, 0.0], device="cuda") + + # Contract function + if "contract" in cfg: + self.contract_fn = contract_dict[cfg.contract.type](cfg.contract) + else: + self.contract_fn = contract_dict["identity"]({}) + + # How many points to use + self.num_points = cfg.num_points if "num_points" in cfg else -1 + + def _loss(self, batch, outputs, batch_idx): + # Get coords + rays = batch["coords"] + rays = torch.clone(rays.view(-1, rays.shape[-1])) + + # Render points and distance + pred_points_start = outputs[self.fields[0]] + pred_points_start = pred_points_start.view(pred_points_start.shape[0], -1, 3) + + pred_points_end = outputs[self.fields[1]] + pred_points_end = pred_points_end.view(pred_points_end.shape[0], -1, 3) + pred_points = torch.cat([pred_points_start, pred_points_end], -1) + + pred_distance = outputs[self.fields[2]] + pred_distance = pred_distance.view(pred_points_start.shape[0], -1) + + # Get ground truth flow + gt_flow = batch["flow"] + gt_depth = batch["depth"] + + rays_o, rays_d = rays[..., :3] - self.origin[None], rays[..., 3:6] + rays_d = torch.nn.functional.normalize(rays_d, p=2.0, dim=-1) + gt_world_points = rays_o + gt_depth * rays_d + + gt_points_start = self.contract_fn.contract_points(gt_world_points) + gt_points_end = self.contract_fn.contract_points(gt_world_points + gt_flow) + gt_points = torch.cat([gt_points_start, gt_points_end], -1) + + # Compute mask + mask = (gt_flow != 0.0).any(dim=-1, keepdim=True) & (gt_depth != 0.0) & (pred_distance != 0.0) + + # Loss + diff = torch.norm(pred_points - gt_points.unsqueeze(1), dim=-1) * mask.float() + + if self.num_points > 0: + diff = torch.sort(diff, dim=-1)[0][..., : self.num_points] + + loss = torch.mean(diff) + return loss + + @property + def render_kwargs(self): + return {"fields": self.fields, "no_over_fields": self.fields, "no_flow_jitter": True} + + +class RenderWeightRegularizer(BaseRegularizer): + def __init__(self, system, cfg): + super().__init__(system, cfg) + + # Setup losses + self.cfg = cfg + + # How many points to use + self.num_points = cfg.num_points if "num_points" in cfg else -1 + + # Ease iterations + self.window_iters = cfg.window_iters + + def ease_weight(self): + return min(max(self.cur_iter / float(self.window_iters), 0.0), 1.0) + + def _loss(self, batch, batch_results, batch_idx): + weights = batch_results["weights"] + render_weights = batch_results["render_weights"].view(*weights.shape).detach() + + w = self.ease_weight() + + if True: + sparsity_loss_0 = torch.mean(torch.abs(weights)) + sparsity_loss_1 = torch.mean(torch.abs(1.0 - weights)) * 0.1 + match_loss = torch.mean(torch.abs(weights - render_weights)) + # return (sparsity_loss_0 + match_loss) * w + sparsity_loss_1 * (1 - w) + return match_loss * w + sparsity_loss_1 * (1 - w) + else: + entropy_loss = torch.mean(-render_weights * torch.log(render_weights + 1e-8)) + return entropy_loss * w + + @property + def render_kwargs(self): + return { + "fields": ["weights", "render_weights"], + "no_over_fields": ["weights"], + } diff --git a/nlf/regularizers/point.py b/nlf/regularizers/point.py new file mode 100644 index 0000000..6514e83 --- /dev/null +++ b/nlf/regularizers/point.py @@ -0,0 +1,37 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +import copy + +import torch +from omegaconf import OmegaConf # @manual //github/third-party/omry/omegaconf:omegaconf + +from losses import loss_dict + +from .base import BaseRegularizer + + +class PointRegularizer(BaseRegularizer): + def __init__(self, system, cfg): + super().__init__(system, cfg) + + # Setup losses + self.loss_fn = loss_dict[self.cfg.loss.type]() + + def _loss(self, train_batch, batch_results, batch_idx): + #### Prepare #### + system = self.get_system() + + ## Batch + rays = train_batch["coords"] + + ## tform constraints + point_bias = system.render("embed_params", rays, return_bias=True)["params"] + point_bias = point_bias.view(-1, 3) + + loss = self.loss_fn(point_bias, torch.zeros_like(point_bias)) + + return loss diff --git a/nlf/regularizers/ray_density.py b/nlf/regularizers/ray_density.py new file mode 100644 index 0000000..f7c5f96 --- /dev/null +++ b/nlf/regularizers/ray_density.py @@ -0,0 +1,364 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +import copy + +import numpy as np +import torch +import torch.nn.functional as F +from omegaconf import OmegaConf # @manual //github/third-party/omry/omegaconf:omegaconf + +from losses import loss_dict +from nlf.intersect import intersect_dict +from nlf.param import ray_param_dict, ray_param_pos_dict +from utils.ray_utils import ( + compute_sigma_angle, + compute_sigma_dot, + get_random_pixels, + get_ray_directions_from_pixels_K, + jitter_ray_directions, + jitter_ray_origins, +) + +from .base import BaseRegularizer + + +def sample_simplex(batch_size, n, device): + samples = torch.rand(batch_size, n, device=device) + samples = torch.cat([torch.zeros_like(samples[:, :1]), samples], dim=-1) + samples, _ = torch.sort(samples, dim=-1) + return samples[:, 1:] - samples[:, :-1] + + +class RayDensityRegularizer(BaseRegularizer): + def __init__(self, system, cfg): + super().__init__(system, cfg) + + self.num_views_for_random = cfg.num_views_for_random + self.num_views_for_ray = cfg.num_views_for_ray + + self.extrapolate_freq = cfg.extrapolate_freq + self.extrapolate_scale = cfg.extrapolate_scale + self.batch_size = system.cfg.training.batch_size + + self.dataset = system.dm.train_dataset + self.all_poses = torch.tensor(self.dataset.poses).cuda().float() + self.all_centers = self.all_poses[:, :3, -1] + self.K = torch.tensor(self.dataset.K).cuda().float() + self.use_ndc = self.dataset.use_ndc + + # Perturb + self.use_jitter = cfg.use_jitter if "use_jitter" in cfg else False + self.jitter_pos_std = cfg.jitter.pos_std + self.jitter_dir_std = cfg.jitter.dir_std + + # Intersect fn + if system.is_subdivided: + model_cfg = system.cfg.model.ray + else: + model_cfg = system.cfg.model + + self.z_channels = model_cfg.embedding.z_channels + + isect_cfg = model_cfg.embedding.intersect + self.intersect_fn = intersect_dict[isect_cfg.type](self.z_channels, isect_cfg) + + # Sigma computation + self.angle_std = float(np.radians(self.cfg.angle_std)) if "angle_std" in cfg else -1.0 + self.dot_std = float(cfg.dot_std) if "dot_std" in cfg else -1.0 + self.angle_std = self.angle_std / self.dataset.num_images + self.dot_std = self.dot_std / self.dataset.num_images + + # Loss + self.loss_fn = loss_dict[self.cfg.loss.type](self.cfg.loss) + + def get_random_views(self, n_views, n_images): + return list(np.random.choice(np.arange(0, n_images), size=n_views, replace=False)) + + def _loss(self, train_batch, batch_results, batch_idx): + #### Prepare #### + system = self.get_system() + W, H = self.dataset.img_wh[0], self.dataset.img_wh[1] + + with torch.no_grad(): + # Subset of cameras + views_idx = torch.randint( + 0, self.dataset.num_images - 1, (self.batch_size * self.num_views_for_random,), device="cuda" + ) + anchor_poses = self.all_poses[views_idx].reshape(-1, 3, 4) + anchor_positions = self.all_centers[views_idx].reshape(-1, 3) + + # Generate random rays + pixels = get_random_pixels(self.batch_size * self.num_views_for_random, H, W, device="cuda") + anchor_directions = get_ray_directions_from_pixels_K(pixels, self.K, centered_pixels=True).float() + anchor_directions = (anchor_poses[:, :3, :3] @ anchor_directions.unsqueeze(-1)).squeeze(-1) + anchor_directions = torch.nn.functional.normalize(anchor_directions, dim=-1) + + # Reshape + anchor_positions = anchor_positions.view(self.batch_size, self.num_views_for_random, 3) + anchor_directions = anchor_directions.view(self.batch_size, self.num_views_for_random, 3) + + # Extrapolate (sometimes) + if (batch_idx % 3) == 1: + print("Extrapolating") + + # Offset positions + anchor_centroids = anchor_positions.mean(1).unsqueeze(1) + anchor_positions = (anchor_positions - anchor_centroids) * self.extrapolate_scale + anchor_centroids + + # Offset directions + anchor_dir_centroids = anchor_directions.mean(1).unsqueeze(1) + anchor_dir_centroids = torch.nn.functional.normalize(anchor_dir_centroids, dim=-1) + anchor_directions = ( + anchor_directions - anchor_dir_centroids + ) * self.extrapolate_scale + anchor_dir_centroids + anchor_directions = torch.nn.functional.normalize(anchor_directions, dim=-1) + + # Interpolate (sometimes) + if (batch_idx % 2) == 1: + print("Interpolating") + # Sample weights from unit simplex + weights = sample_simplex(self.batch_size, self.num_views_for_random, "cuda") + + # Interpolated positions, directions + anchor_positions = (weights.unsqueeze(-1) * anchor_positions).sum(1) + anchor_directions = torch.nn.functional.normalize( + (weights.unsqueeze(-1) * anchor_directions).sum(1), dim=-1 + ) + else: + print("Choosing position") + + # Grab first position, direction + anchor_positions = anchor_positions[:, 0] + anchor_directions = anchor_directions[:, 0] + + # Jitter + if self.use_jitter: + anchor_positions = ( + anchor_positions + torch.randn(anchor_positions.shape, device="cuda") * self.jitter_pos_std + ) + anchor_directions = ( + anchor_directions + torch.randn(anchor_directions.shape, device="cuda") * self.jitter_dir_std + ) + anchor_directions = torch.nn.functional.normalize(anchor_directions, dim=-1) + + # Rays + random_rays = torch.cat([anchor_positions, anchor_directions], dim=-1) + print("Rays shape:", random_rays.shape) + + # Get closest cameras to random rays + centers = self.all_centers[None].repeat(self.batch_size, 1, 1) + poses = self.all_poses[None].repeat(self.batch_size, 1, 1, 1) + + camera_dists = torch.linalg.norm(random_rays[..., None, :3] - centers, dim=-1) + sort_idx = torch.argsort(camera_dists, dim=-1) + + centers = centers.permute(0, 2, 1) + centers = torch.gather(centers, -1, sort_idx[:, None, :].repeat(1, 3, 1)) + centers = centers.permute(0, 2, 1)[:, : self.num_views_for_ray] + + poses = poses.permute(0, 2, 3, 1) + poses = torch.gather(poses, -1, sort_idx[:, None, None, :].repeat(1, 3, 4, 1)) + poses = poses.permute(0, 3, 1, 2)[:, : self.num_views_for_ray] + + # Get intersection points along ray + random_rays = random_rays.view(-1, 6) + z_vals = random_rays.new_zeros(random_rays.shape[0], self.z_channels, 1) + + if self.use_ndc: + random_rays_ndc = self.dataset.to_ndc(random_rays) + t_p = self.intersect_fn.intersect(random_rays_ndc, random_rays_ndc, z_vals) + + o_z = -self.dataset.near + t = (o_z / (1 - t_p) - o_z) / random_rays[..., 5, None] + t = t + (o_z - random_rays[..., None, 2]) / random_rays[..., None, 5] + else: + t = self.intersect_fn.intersect(random_rays, random_rays_ndc, z_vals) + + points = random_rays[..., None, :3] + t[..., None] * random_rays[..., None, 3:6] + + # TODO: Project points into cameras + # - Apply poses + # - Apply intrinsics + # - Find points outside of pixel bounds + + # Get directions + camera_points = points.unsqueeze(1) - centers.unsqueeze(-2) + dirs = torch.nn.functional.normalize(camera_points, dim=-1) + + # TODO: Automatically determine size of kernels for computing density + # - Unstrucured Lumigraph: Use max angle + # - Ours: Use angle standard deviation + # - Other: something more robust? MAD (median absolute deviation?) + + # Compute sigma + h_sigma = compute_sigma_angle(random_rays[..., None, None, 3:6], dirs, angle_std=self.angle_std) + # h_sigma = compute_sigma_dot(random_rays[..., None, None, 3:6], dirs, dot_std=self.dot_std) + + h_sigma = (torch.sigmoid(h_sigma * 1e-1) - 0.5) * 2.0 + h_sigma[torch.isnan(h_sigma)] = 1 + + print("Sigma", h_sigma[0]) + + # Loss + random_rays = random_rays.view(-1, 6) + sigma = system.render("embed_params", random_rays)["params"] + sigma = sigma.view(*h_sigma.shape) + total_loss = self.loss_fn(h_sigma, sigma) + + return total_loss + + +class SimpleRayDensityRegularizer(BaseRegularizer): + def __init__(self, system, cfg): + super().__init__(system, cfg) + + self.num_views_for_random = cfg.num_views_for_random + self.num_views_for_ray = cfg.num_views_for_ray + + self.extrapolate_freq = cfg.extrapolate_freq + self.extrapolate_scale = cfg.extrapolate_scale + self.batch_size = system.cfg.training.batch_size + + self.dataset = system.dm.train_dataset + self.all_poses = torch.tensor(self.dataset.poses).cuda().float() + self.all_centers = self.all_poses[:, :3, -1] + self.K = torch.tensor(self.dataset.K).cuda().float() + + self.use_ndc = self.dataset.use_ndc + + # Perturb + self.use_jitter = cfg.use_jitter if "use_jitter" in cfg else False + self.jitter_pos_std = cfg.jitter.pos_std + self.jitter_dir_std = cfg.jitter.dir_std + + # Intersect fn + if system.is_subdivided: + model_cfg = system.cfg.model.ray + else: + model_cfg = system.cfg.model + + self.z_channels = model_cfg.embedding.z_channels + + isect_cfg = model_cfg.embedding.intersect + self.intersect_fn = intersect_dict[isect_cfg.type](self.z_channels, isect_cfg) + + # Loss + self.loss_fn = loss_dict[self.cfg.loss.type](self.cfg.loss) + + def get_random_views(self, n_views, n_images): + return list(np.random.choice(np.arange(0, n_images), size=n_views, replace=False)) + + def _loss(self, train_batch, batch_results, batch_idx): + #### Prepare #### + system = self.get_system() + W, H = self.dataset.img_wh[0], self.dataset.img_wh[1] + + with torch.no_grad(): + # Subset of cameras + views_idx = torch.randint( + 0, self.dataset.num_images - 1, (self.batch_size * self.num_views_for_random,), device="cuda" + ) + anchor_poses = self.all_poses[views_idx].reshape(-1, 3, 4) + anchor_positions = self.all_centers[views_idx].reshape(-1, 3) + + # Generate random rays + pixels = get_random_pixels(self.batch_size * self.num_views_for_random, H, W, device="cuda") + anchor_directions = get_ray_directions_from_pixels_K(pixels, self.K, centered_pixels=True).float() + anchor_directions = (anchor_poses[:, :3, :3] @ anchor_directions.unsqueeze(-1)).squeeze(-1) + anchor_directions = torch.nn.functional.normalize(anchor_directions, dim=-1) + + # Reshape + anchor_positions = anchor_positions.view(self.batch_size, self.num_views_for_random, 3) + anchor_directions = anchor_directions.view(self.batch_size, self.num_views_for_random, 3) + + # Extrapolate (sometimes) + if (batch_idx % 3) == 0: + print("Extrapolating") + + # Offset positions + anchor_centroids = anchor_positions.mean(1).unsqueeze(1) + anchor_positions = (anchor_positions - anchor_centroids) * self.extrapolate_scale + anchor_centroids + + # Offset directions + anchor_dir_centroids = anchor_directions.mean(1).unsqueeze(1) + anchor_dir_centroids = torch.nn.functional.normalize(anchor_dir_centroids, dim=-1) + anchor_directions = ( + anchor_directions - anchor_dir_centroids + ) * self.extrapolate_scale + anchor_dir_centroids + anchor_directions = torch.nn.functional.normalize(anchor_directions, dim=-1) + + # Interpolate (sometimes) + if (batch_idx % 2) == 1: + print("Interpolating") + # Sample weights from unit simplex + weights = sample_simplex(self.batch_size, self.num_views_for_random, "cuda") + + # Interpolated positions, directions + anchor_positions = (weights.unsqueeze(-1) * anchor_positions).sum(1) + anchor_directions = torch.nn.functional.normalize( + (weights.unsqueeze(-1) * anchor_directions).sum(1), dim=-1 + ) + else: + print("Choosing position") + + # Grab first position, direction + anchor_positions = anchor_positions[:, 0] + anchor_directions = anchor_directions[:, 0] + + # Jitter + if self.use_jitter: + anchor_positions = ( + anchor_positions + torch.randn(anchor_positions.shape, device="cuda") * self.jitter_pos_std + ) + anchor_directions = ( + anchor_directions + torch.randn(anchor_directions.shape, device="cuda") * self.jitter_dir_std + ) + anchor_directions = torch.nn.functional.normalize(anchor_directions, dim=-1) + + # Rays + random_rays = torch.cat([anchor_positions, anchor_directions], dim=-1) + + if self.use_ndc: + random_rays = self.dataset.to_ndc(random_rays) + random_rays[..., :3] = random_rays[..., :3].clamp(-2, 2) + + # Predicted sigma + sigma = system.render("embed_params", random_rays)["params"] + + # Weight map + N = self.dataset.num_images + + if (batch_idx % 3) == 0: + # weights = 1 - torch.exp( + # -torch.square(random_rays[..., :2]).mean(-1) + # ) + # weights += 1 - torch.exp( + # -torch.square(random_rays[..., 3:5]).mean(-1) + # ) + # weights = 2 * weights.unsqueeze(-1) / N + + weights = ( + 4.0 + * ( + 1 + - torch.exp( + -torch.square(random_rays[..., :2]).mean(-1) + -torch.square(random_rays[..., 3:5]).mean(-1) + ) + ) + / N + ) + + # weights = 4.0 / N + else: + weights = 1.0 / N + + ## Total loss + sigma = sigma.view(self.batch_size, -1) + total_loss = self.loss_fn(sigma * weights, torch.ones_like(sigma) * weights) + print("Ray Density loss:", total_loss) + + return total_loss diff --git a/nlf/regularizers/teacher.py b/nlf/regularizers/teacher.py new file mode 100644 index 0000000..6af498f --- /dev/null +++ b/nlf/regularizers/teacher.py @@ -0,0 +1,252 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +import os + +import torch +from kornia.filters import gaussian_blur2d +from omegaconf import DictConfig, OmegaConf + +from losses import loss_dict +from nlf.models import model_dict +from nlf.rendering import render_chunked, render_fn_dict +from utils.tensorf_utils import AlphaGridMask + +from .base import BaseRegularizer + + +class TeacherRegularizer(BaseRegularizer): + def __init__(self, system, cfg): + super().__init__(system, cfg) + + self.loss_fn = loss_dict[self.cfg.loss.type]() + self.use_inp_freq = "inf" + + def _loss(self, train_batch, batch_results, batch_idx): + system = self.get_system() + + if self.cur_iter >= self.cfg.weight.stop_iters: + return 0.0 + + # Get inputs + dataset = self.get_dataset() + batch = dataset.get_batch(batch_idx, self.batch_size) + + rays = batch["coords"].type_as(train_batch["coords"]) + rgb = batch["rgb"].type_as(train_batch["rgb"]) + + # Loss + results = system(rays) + pred_rgb = results["rgb"] + + loss = self.loss_fn(pred_rgb, rgb) + + return loss + + +class BlurryTeacherRegularizer(BaseRegularizer): + def __init__(self, system, cfg): + super().__init__(system, cfg) + + self.loss_fn = loss_dict[self.cfg.loss.type]() + self.use_inp_freq = "inf" + + self.patch_width = self.cfg.dataset.patch_width + self.blur_radius = self.cfg.blur_radius + self.batch_size = self.patch_width * self.patch_width + + def _loss(self, train_batch, batch_idx): + system = self.get_system() + + if self.cur_iter >= self.cfg.weight.stop_iters: + return 0.0 + + # Get inputs + dataset = self.get_dataset() + batch = dataset.get_batch(batch_idx, self.batch_size) + + rays = batch["coords"].type_as(train_batch["coords"]) + rgb = batch["rgb"].type_as(train_batch["rgb"]) + + # Run forward and blur + pred_rgb = system(rays)["rgb"] + pred_rgb = pred_rgb.view(-1, self.patch_width, self.patch_width, 3).permute(0, 3, 1, 2) + rgb = rgb.view(-1, self.patch_width, self.patch_width, 3).permute(0, 3, 1, 2) + + if self.blur_radius > 0: + blur_rgb = gaussian_blur2d( + pred_rgb, + (self.blur_radius * 2 + 1, self.blur_radius * 2 + 1), + (self.blur_radius / 3.0, self.blur_radius / 3.0), + ) + blur_rgb = blur_rgb[..., self.blur_radius : -self.blur_radius, self.blur_radius : -self.blur_radius] + rgb = rgb[..., self.blur_radius : -self.blur_radius, self.blur_radius : -self.blur_radius] + else: + blur_rgb = pred_rgb + + # Loss + return self.loss_fn(blur_rgb, rgb) + + +class TeacherModelRegularizer(BaseRegularizer): + def __init__(self, system, cfg): + super().__init__(system, cfg) + + self.cfg = cfg + + self.model_ckpt_dir = os.path.expanduser(system.cfg.params.ckpt_dir) + self.model_ckpt_path = os.path.join(self.model_ckpt_dir, cfg.model_ckpt_path) + + # Create model + self.model_config = self.cfg.model + self.model_start_epoch = cfg.model_start_epoch + + model = model_dict[self.model_config.type](self.model_config, system=system) + + # Load from checkpoint + model = model.cuda().eval() + model_state_dict = torch.load(self.model_ckpt_path)["state_dict"] + self.load_state_dict_for_model(model, model_state_dict) + + # Set iteration + model.set_iter(system.cfg.training.iters_per_epoch * self.model_start_epoch) + + # List of models (do not save in subsequent checkpoints) + self.models = [model] + + # TODO: + # - incorporate dataset info + # - better ray generation + # - some debugging + + # Random ray generation + self.origin_range = torch.tensor( + cfg.origin_range if "origin_range" in cfg else [[-1.0, -1.0, -1.0], [1.0, 1.0, 1.0]] + ).cuda() + self.direction_range = torch.tensor( + cfg.direction_range if "direction_range" in cfg else [[-1.0, -1.0, -1.0], [1.0, 1.0, 1.0]] + ).cuda() + self.extra_range = torch.tensor(cfg.extra_range if "extra_range" in cfg else [[0.0], [0.0]]).cuda() + + self.use_ndc = cfg.use_ndc if "use_ndc" in cfg else False + self.convert_ndc = cfg.convert_ndc if "convert_ndc" in cfg else False + + def generate_random_rays(self, coords): + batch_size = coords.shape[0] + + origins = ( + torch.rand((batch_size, 3), device=coords.device) * (self.origin_range[1:2] - self.origin_range[0:1]) + + self.origin_range[0:1] + ) + + directions = ( + torch.rand((batch_size, 3), device=coords.device) * (self.direction_range[1:2] - self.direction_range[0:1]) + + self.direction_range[0:1] + ) + + if self.use_ndc: + # directions = (directions / directions[..., -1:]) * 2.0 + directions = torch.nn.functional.normalize(directions, dim=-1) + else: + directions = torch.nn.functional.normalize(directions, dim=-1) + + extras = ( + torch.rand((batch_size, self.extra_range.shape[-1]), device=coords.device) + * (self.extra_range[1:2] - self.extra_range[0:1]) + + self.extra_range[0:1] + ) + + return torch.cat([origins, directions, extras], dim=-1) + + def generate_random_rays_convex(self, coords): + batch_size = coords.shape[0] + + # Extras + rays = coords[..., :6] + extras = coords[..., 6:] + + # Collect + num_convex = 4 + + rand_idx = torch.randint( + low=0, high=batch_size, size=(batch_size * (num_convex - 1), 1), device=rays.device + ).repeat(1, 6) + + rand_rays = torch.gather(rays, 0, rand_idx) + rand_rays = rand_rays.reshape(batch_size, num_convex - 1, 6) + rand_rays = torch.cat([rays.unsqueeze(1), rand_rays], dim=1) + + # Convex combination + weights = torch.rand((rays.shape[0], num_convex), device=rays.device) + weights = weights / (weights.sum(1).unsqueeze(1) + 1e-8) + + # Valid rays + rays = (rays.unsqueeze(1) * weights.unsqueeze(-1)).sum(1) + rays_o = rays[..., 0:3] + rays_d = rays[..., 3:6] + + # NOTE: normalization affects distances in over-composite + if self.use_ndc: + rays_d = (rays_d / rays_d[..., -1:]) * 2.0 + else: + rays_d = torch.nn.functional.normalize(rays_d, dim=-1) + + return torch.cat([rays_o, rays_d, extras], -1) + + def _loss(self, batch, outputs, batch_idx): + system = self.get_system() + + # Teacher results + with torch.no_grad(): + # Generate random rays + coords = batch["coords"] + + # new_coords = self.generate_random_rays(coords) + new_coords = self.generate_random_rays_convex(coords) + # new_coords = coords + + # Run forward + teacher_results = self.models[0](new_coords, {}) + + # Model results + model_results = system(new_coords, **system.regularizer_render_kwargs) + weight = (teacher_results["rgb"] != 0.0).any(dim=-1, keepdim=True) + weight = torch.ones_like(weight) + + # Loss + image_loss = system.loss(model_results["rgb"] * weight, teacher_results["rgb"] * weight, **batch) + return image_loss + + def load_state_dict_for_model(self, model, state_dict, strict=False): + new_state_dict = {} + + # For loading subdivision variables (voxel grid, voxel size, etc.) # + alpha_aabb = None + alpha_volume = None + + for key in state_dict.keys(): + new_key = key.split("render_fn.model.")[-1] + new_state_dict[new_key] = state_dict[key] + + # Update size of tensor components + if "alpha_aabb" in key: + alpha_aabb = state_dict[key] + elif "alpha_volume" in key: + alpha_volume = state_dict[key] + elif "gridSize" in key: + model.color_model.net.gridSize = state_dict[key] + + model.color_model.net.init_svd_volume(model.color_model.net.gridSize[0], model.color_model.net.device) + + model.load_state_dict(new_state_dict, strict=False) + + # Update other grid-size-dependent variables + model.color_model.net.update_stepSize(model.color_model.net.gridSize.cpu()) + + # Update alpha mask + if alpha_volume is not None: + device = model.color_model.net.device + model.color_model.net.alphaMask = AlphaGridMask(device, alpha_aabb.to(device), alpha_volume.to(device)) diff --git a/nlf/regularizers/tensor.py b/nlf/regularizers/tensor.py new file mode 100644 index 0000000..eb0cbdd --- /dev/null +++ b/nlf/regularizers/tensor.py @@ -0,0 +1,121 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +import copy + +import torch +from kornia import create_meshgrid +from omegaconf import OmegaConf # @manual //github/third-party/omry/omegaconf:omegaconf + +from losses import loss_dict + +from .base import BaseRegularizer + + +def roll_1(tensor): + return torch.cat( + [ + tensor[:, :1], + tensor[:, :-1], + ], + dim=1, + ) + + +def roll_2(tensor): + return torch.cat( + [ + tensor[:, :, :1], + tensor[:, :, :-1], + ], + dim=2, + ) + + +def roll_3(tensor): + return torch.cat( + [ + tensor[:, :, :, :1], + tensor[:, :, :, :-1], + ], + dim=3, + ) + + +def tv_loss(tensor, skip_row, skip_col): + if len(tensor.shape) == 4: + diff_z = torch.square(torch.roll(tensor, 1, dims=1) - tensor) + diff_y = torch.square(torch.roll(tensor, 1, dims=2) - tensor) + diff_x = torch.square(torch.roll(tensor, 1, dims=3) - tensor) + diff = diff_z + diff_y + diff_x + elif len(tensor.shape) == 3: + diff_y = torch.square(torch.roll(tensor, 1, dims=1) - tensor) + diff_x = torch.square(torch.roll(tensor, 1, dims=2) - tensor) + diff = diff_y + diff_x + + x = torch.linspace(0, tensor.shape[2] - 1, tensor.shape[2], dtype=torch.int32, device=tensor.device) + y = torch.linspace(0, tensor.shape[1] - 1, tensor.shape[1], dtype=torch.int32, device=tensor.device) + + rem_x = torch.remainder(x, skip_col) + rem_y = torch.remainder(y, skip_row) + + skip_x = ((rem_x == 0) | (rem_x == skip_col - 1)) & (skip_col != -1) + skip_y = ((rem_y == 0) | (rem_y == skip_row - 1)) & (skip_row != -1) + + skip = torch.stack(list(torch.meshgrid([skip_y, skip_x])), dim=-1) + skip = torch.any(skip, dim=-1, keepdim=False)[None] + + diff = diff * (~skip).float() + + return torch.mean(torch.sqrt(diff + 1e-8)) + + +class TensorTV(BaseRegularizer): + def __init__(self, system, cfg): + super().__init__(system, cfg) + + # Setup losses + self.use_tv = cfg.use_tv + self.opacity_weight = cfg.opacity_weight + self.color_weight = cfg.color_weight + + self.skip_row = cfg.skip_row if "skip_row" in cfg else -1 + self.skip_col = cfg.skip_col if "skip_col" in cfg else -1 + + def _loss(self, train_batch, batch_results, batch_idx): + #### Prepare #### + system = self.get_system() + + # Get tensors + if system.is_subdivided: + model = system.render_fn.model.ray_model.color_model + else: + model = system.render_fn.model.color_model + + loss = 0.0 + M = len(model.net.tensors) + + for tensor_prod in model.net.tensors: + # Calculate mean TV loss + num_opacity_basis = tensor_prod.num_opacity_basis + N = len(tensor_prod.tensors) + + for i in range(N): + tensor = tensor_prod.tensors[i].tensor + + # Color and appearance + color_tensor = tensor[:-num_opacity_basis] + opacity_tensor = tensor[-num_opacity_basis:] + + # TV loss + if self.use_tv: + loss += self.opacity_weight * tv_loss(opacity_tensor, self.skip_row, self.skip_col) / (N * M) + loss += self.color_weight * tv_loss(color_tensor, self.skip_row, self.skip_col) / (N * M) + else: + loss += self.opacity_weight * torch.mean(torch.abs(opacity_tensor)) / (N * M) + + # Return + return loss diff --git a/nlf/regularizers/tensorf.py b/nlf/regularizers/tensorf.py new file mode 100644 index 0000000..8084228 --- /dev/null +++ b/nlf/regularizers/tensorf.py @@ -0,0 +1,95 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +import numpy as np +import torch +import torch.nn as nn + +from .base import BaseRegularizer + + +class TVLoss(nn.Module): + def __init__(self, TVLoss_weight=1): + super(TVLoss, self).__init__() + self.TVLoss_weight = TVLoss_weight + + def forward(self, x): + batch_size = x.size()[0] + h_x = x.size()[2] + w_x = x.size()[3] + + count_h = self._tensor_size(x[:, :, 1:, :]) + count_w = self._tensor_size(x[:, :, :, 1:]) + h_tv = torch.pow((x[:, :, 1:, :] - x[:, :, : h_x - 1, :]), 2).sum() + w_tv = torch.pow((x[:, :, :, 1:] - x[:, :, :, : w_x - 1]), 2).sum() + + return self.TVLoss_weight * 2 * (h_tv / count_h + w_tv / count_w) / batch_size + + def _tensor_size(self, t): + return t.size()[1] * t.size()[2] * t.size()[3] + + +class TensoRF(BaseRegularizer): + def __init__(self, system, cfg): + super().__init__(system, cfg) + + # Setup losses + self.cfg = cfg + self.tvreg = TVLoss() + + self.update_AlphaMask_list = cfg.update_AlphaMask_list + + self.lr_factor = self.cfg.lr_decay_target_ratio ** (1 / self.cfg.n_iters) + self.total_num_tv_iters = ( + self.cfg.total_num_tv_iters + if "total_num_tv_iters" in self.cfg + else int(np.round((np.log(1e-4) / np.log(self.cfg.lr_decay_target_ratio)) * self.cfg.n_iters)) + ) + + self.L1_reg_weight = self.cfg.L1_weight_initial + self.TV_weight_density = self.cfg.TV_weight_density + self.TV_weight_app = self.cfg.TV_weight_app + + def _loss(self, train_batch, batch_results, batch_idx): + #### Prepare #### + system = self.get_system() + + # Get tensors + if system.is_subdivided: + tensorf = system.render_fn.model.ray_model.color_model.net + else: + tensorf = system.render_fn.model.color_model.net + + total_loss = 0.0 + + # L1 loss + if self.L1_reg_weight > 0: + loss_reg_L1 = tensorf.density_L1() + total_loss += self.L1_reg_weight * loss_reg_L1 + + # Return if decayed TV enough + if self.cur_iter > self.total_num_tv_iters: + return total_loss + + # TV Loss + if self.TV_weight_density > 0: + self.TV_weight_density *= self.lr_factor + loss_tv = tensorf.TV_loss_density(self.tvreg) * self.cfg.TV_weight_density + total_loss = total_loss + loss_tv + + if self.TV_weight_app > 0: + self.TV_weight_app *= self.lr_factor + loss_tv = loss_tv + tensorf.TV_loss_app(self.tvreg) * self.cfg.TV_weight_app + total_loss = total_loss + loss_tv + + return total_loss + + def set_iter(self, iteration): + super().set_iter(iteration) + + if len(self.update_AlphaMask_list) > 0 and self.cur_iter == self.update_AlphaMask_list[0]: + self.L1_reg_weight = self.cfg.L1_weight_rest + print("continuing L1_reg_weight", self.L1_reg_weight) diff --git a/nlf/regularizers/voxel_sparsity.py b/nlf/regularizers/voxel_sparsity.py new file mode 100644 index 0000000..ad4a0d7 --- /dev/null +++ b/nlf/regularizers/voxel_sparsity.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +import torch + +from .base import BaseRegularizer + + +class VoxelSparsityRegularizer(BaseRegularizer): + def __init__(self, system, cfg): + super().__init__(system, cfg) + + def get_subdivision(self): + system = self.get_system() + return system.subdivision + + def loss(self, batch, batch_results, batch_idx): + system = self.get_system() + pos_model = system.render_fn.model.pos_model + chunk = system.cfg.training.ray_chunk + subdivision = self.get_subdivision() + + points = subdivision.voxel_centers[0, : subdivision.num_voxels].cuda() + sampled_xyz = points.unsqueeze(1) + + sh = sampled_xyz.shape[:-1] # noqa + sampled_idx = torch.arange(points.size(0), device=points.device)[:, None].expand(*sampled_xyz.size()[:2]) + sampled_xyz, sampled_idx = sampled_xyz.reshape(-1, 3), sampled_idx.reshape(-1) + + ## Evaluate + B = sampled_xyz.shape[0] + out_chunks = [] + + for i in range(0, B, chunk): + # Get points, idx + cur_pts = sampled_xyz[i : i + chunk].unsqueeze(1) + cur_idx = sampled_idx[i : i + chunk].unsqueeze(1) + + cur_mask = cur_idx.eq(-1) + cur_idx[cur_mask] = 0 + + # Get codes + cur_codes = subdivision.get_vertex_codes(cur_pts, cur_idx, cur_mask) + + # Combine inputs + cur_inps = torch.cat([cur_pts, cur_codes], -1) + cur_inps = cur_inps.view(-1, cur_inps.shape[-1]) + out_chunks += [pos_model.pos_forward(cur_inps)] + + out = torch.cat(out_chunks, 0) + out = out[..., -1].view(-1) + + loss = -(out * torch.log(out + 1e-8) + (1 - out) * torch.log(1 - out + 1e-8)).mean() + print(out.max(), loss) + return loss diff --git a/nlf/regularizers/warp.py b/nlf/regularizers/warp.py new file mode 100644 index 0000000..bd6f4dd --- /dev/null +++ b/nlf/regularizers/warp.py @@ -0,0 +1,219 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +import copy + +import torch +from omegaconf import OmegaConf # @manual //github/third-party/omry/omegaconf:omegaconf + +from losses import loss_dict +from nlf.param import ray_param_dict, ray_param_pos_dict + +from .base import BaseRegularizer + + +class WarpRegularizer(BaseRegularizer): + def __init__(self, system, cfg): + super().__init__(system, cfg) + + # Setup parametrization + param_cfg = copy.deepcopy(cfg.param) + OmegaConf.set_struct(param_cfg, False) + + if system.is_subdivided: + system_param_cfg = system.cfg.model.ray.param + + for key in system_param_cfg.keys(): + param_cfg.__dict__[key] = system_param_cfg[key] + setattr(param_cfg, key, system_param_cfg[key]) + else: + system_param_cfg = system.cfg.model.param + + for key in system_param_cfg.keys(): + param_cfg.__dict__[key] = system_param_cfg[key] + setattr(param_cfg, key, system_param_cfg[key]) + + self.ray_param_fn = ray_param_dict[param_cfg.fn](param_cfg) + self.ray_param_pos_fn = ray_param_pos_dict[param_cfg.fn](param_cfg) + self.param_channels = self.cfg.param.n_dims + + # Setup losses + self.loss_fn = loss_dict[self.cfg.loss.type]() + self.use_inp_freq = cfg.use_inp_freq + + def _loss(self, train_batch, batch_results, batch_idx): + #### Prepare #### + system = self.get_system() + + ## Batch + batch = self.get_batch(train_batch, batch_idx) + rays = batch["coords"] + + ## tform constraints + raw = system.render("embed_params", rays)["value"] + out_channels = raw.shape[-1] // (self.param_channels + 1) + tform = raw[..., :out_channels].reshape(-1, out_channels, self.param_channels) + + _, S, _ = torch.svd(tform) + + loss = self.loss_fn(S[..., 2:], torch.zeros_like(S[..., 2:])) + + return loss + + +class WarpLevelSetRegularizer(BaseRegularizer): + def __init__(self, system, cfg): + super().__init__(system, cfg) + + # Setup parametrization + param_cfg = copy.deepcopy(cfg.param) + OmegaConf.set_struct(param_cfg, False) + + if system.is_subdivided: + system_param_cfg = system.cfg.model.ray.param + + for key in system_param_cfg.keys(): + param_cfg.__dict__[key] = system_param_cfg[key] + setattr(param_cfg, key, system_param_cfg[key]) + else: + system_param_cfg = system.cfg.model.param + + for key in system_param_cfg.keys(): + param_cfg.__dict__[key] = system_param_cfg[key] + setattr(param_cfg, key, system_param_cfg[key]) + + self.ray_param_fn = ray_param_dict[param_cfg.fn](param_cfg) + self.ray_param_pos_fn = ray_param_pos_dict[param_cfg.fn](param_cfg) + self.param_channels = self.cfg.param.n_dims + + # Setup losses + self.svd_loss_fn = loss_dict[self.cfg.svd_loss.type]() + self.level_loss_fn = loss_dict[self.cfg.level_loss.type]() + self.use_inp_freq = cfg.use_inp_freq + + def _loss(self, train_batch, batch_results, batch_idx): + #### Prepare #### + system = self.get_system() + + ## Batch + batch = self.get_batch(train_batch, batch_idx) + rays = batch["coords"] + + rgb_channels = 4 if system.is_subdivided else 3 + + ## Get params + outputs = system.render("forward_all", rays, embed_params=True) + params = outputs["value"][..., rgb_channels:].view(-1, outputs["value"].shape[-1] - rgb_channels) + rgba = outputs["value"][..., :rgb_channels].view(-1, rgb_channels) + + out_channels = params.shape[-1] // (self.param_channels + 1) + tform = params[..., :-out_channels].reshape(-1, out_channels, self.param_channels) + bias = params[..., -out_channels:] + + ## Decompose params + U, S, V = torch.linalg.svd(tform) + + ## Jitter directions + if not system.is_subdivided: + param_rays = self.ray_param_fn(rays) + else: + num_slices = outputs["isect_inps"].shape[1] + param_rays = self.ray_param_fn(outputs["isect_inps"][..., :6]) + param_rays = param_rays.view(-1, param_rays.shape[-1]) + + jitter = self.cfg.jitter + + jitter_dirs = torch.randn((V.shape[0], jitter.bundle_size, V.shape[-2] - 2, 1), device=V.device) * jitter.pos + jitter_dirs = (jitter_dirs * V[..., 2:, :].unsqueeze(1)).mean(-2) + jitter_dirs = jitter_dirs.view(-1, jitter_dirs.shape[-1]) + + ## Reshape all for comparison + param_rays = param_rays.unsqueeze(1).repeat(1, jitter.bundle_size, 1).view(-1, param_rays.shape[-1]) + + rays = rays.unsqueeze(1).repeat(1, jitter.bundle_size, 1).view(-1, rays.shape[-1]) + + tform = tform.reshape(tform.shape[0], -1) + tform = tform.unsqueeze(1).repeat(1, jitter.bundle_size, 1).view(-1, tform.shape[-1]) + + bias = bias.unsqueeze(1).repeat(1, jitter.bundle_size, 1).view(-1, bias.shape[-1]) + + ## Reshape for forward + if system.is_subdivided: + isect_codes = outputs["isect_inps"][..., 6:] + isect_codes = isect_codes.view(-1, isect_codes.shape[-1]) + isect_codes = isect_codes.unsqueeze(1).repeat(1, jitter.bundle_size, 1).view(-1, isect_codes.shape[-1]) + + isect_mask = outputs["isect_mask"] + isect_mask = isect_mask.view(-1) + isect_mask = isect_mask.unsqueeze(1).repeat(1, jitter.bundle_size).view(-1) + + ## Forward + jitter_rays = param_rays + jitter_dirs + + if not system.is_subdivided: + jitter_outputs = system.render( + "forward_all", jitter_rays, apply_ndc=False, no_param=True, embed_params=True + ) + + jitter_params = jitter_outputs["value"][..., rgb_channels:].view( + -1, jitter_outputs["value"].shape[-1] - rgb_channels + ) + jitter_rgba = jitter_outputs["value"][..., :rgb_channels].view(-1, rgb_channels) + else: + isect_inps = torch.cat( + [ + jitter_rays.view(-1, num_slices, jitter.bundle_size, jitter_rays.shape[-1]), + isect_codes.view(-1, num_slices, jitter.bundle_size, isect_codes.shape[-1]), + ], + -1, + ) + isect_inps = isect_inps.permute(0, 2, 1, 3).reshape(-1, num_slices, isect_inps.shape[-1]) + + isect_mask = isect_mask.view(-1, num_slices, jitter.bundle_size) + isect_mask = isect_mask.permute(0, 2, 1).reshape(-1, num_slices) + + jitter_outputs = system.render_fn.render( + "forward_all", None, no_param=True, isect_inps=isect_inps, isect_mask=isect_mask, embed_params=True + ) + + jitter_params = jitter_outputs["value"][..., rgb_channels:].view( + -1, jitter.bundle_size, num_slices, jitter_outputs["value"].shape[-1] - rgb_channels + ) + jitter_params = jitter_params.permute(0, 2, 1, 3).reshape(-1, jitter_params.shape[-1]) + + jitter_rgba = jitter_outputs["value"][..., :rgb_channels].view( + -1, jitter.bundle_size, num_slices, rgb_channels + ) + jitter_rgba = jitter_rgba.permute(0, 2, 1, 3).reshape(-1, jitter_rgba.shape[-1]) + + jitter_tform = jitter_params[..., :-out_channels] + jitter_bias = jitter_params[..., -out_channels:] + + #### Losses #### + + all_losses = {loss: 0.0 for loss in self.loss_fns.keys()} + + if self._do_loss("color_loss"): + all_losses["color_loss"] += self._loss_fn("color_loss", rgba, jitter_rgba) + + if self._do_loss("svd_loss"): + all_losses["svd_loss"] += self._loss_fn("svd_loss", S[..., 2:], torch.zeros_like(S[..., 2:])) + + if self._do_loss("level_loss"): + all_losses["level_loss"] += self._loss_fn("level_loss", jitter_tform, tform) + + all_losses["level_loss"] += self._loss_fn("level_loss", jitter_bias, bias) + + ## Total loss + total_loss = 0.0 + + for name in all_losses.keys(): + if batch_idx == 0: + print(name + ":", all_losses[name]) + + total_loss += all_losses[name] + + return total_loss diff --git a/nlf/rendering.py b/nlf/rendering.py new file mode 100644 index 0000000..794947d --- /dev/null +++ b/nlf/rendering.py @@ -0,0 +1,120 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +from collections import defaultdict + +import numpy as np +import torch +from torch import nn + +from .nets import net_dict + + +class Render(nn.Module): + def __init__(self, model, subdivision, cfg, **kwargs): + super().__init__() + + if "net_chunk" in kwargs: + self.net_chunk = kwargs["net_chunk"] + else: + self.net_chunk = 32768 + + def _run(self, x, fn, **render_kwargs): + x = x.view(-1, x.shape[-1]) + out = fn(x, render_kwargs) + return out.view(-1, out.shape[-1]) + + def _run_multiple(self, x, fn, **render_kwargs): + x = x.view(-1, x.shape[-1]) + out = fn(x, render_kwargs) + + for key in out.keys(): + out[key] = out[key].view(-1, out[key].shape[-1]) + + return out + + def _run_chunked(self, x, fn, **render_kwargs): + x = x.view(-1, x.shape[-1]) + + # Chunked inference + B = x.shape[0] + out_chunks = [] + + for i in range(0, B, self.net_chunk): + out_chunks += [fn(x[i : i + self.net_chunk], render_kwargs)] + + out = torch.cat(out_chunks, 0) + return out.view(-1, out.shape[-1]) + + +class RenderLightfield(Render): + def __init__(self, model, subdivision, cfg, *args, **kwargs): + super().__init__(model, subdivision, cfg, **kwargs) + + self.model = model + + def forward(self, rays, **render_kwargs): + return self._run_multiple(rays, self.model, **render_kwargs) + + def embed(self, rays, **render_kwargs): + return self._run_multiple(rays, self.model.embed, **render_kwargs) + + def forward_multiple(self, rays, **render_kwargs): + return self._run_multiple(rays, self.model, **render_kwargs) + + +render_fn_dict = { + "lightfield": RenderLightfield, +} + + +def render_chunked( + rays, + render_fn, + render_kwargs, + chunk, +): + B = rays.shape[0] + results = defaultdict(list) + chunk_args = getattr(render_kwargs, "chunk_args", None) + + for i in range(0, B, chunk): + # Create render arguments + if chunk_args is None: + chunk_render_kwargs = render_kwargs + else: + chunk_render_kwargs = {} + + for k in render_kwargs.keys(): + # Chunk this argument + if k in chunk_args: + chunk_render_kwargs[k] = {} + + for j in render_kwargs[k]: + chunk_render_kwargs[k][j] = render_kwargs[k][j][i : i + chunk] + # Pass full argument + else: + chunk_render_kwargs[k] = render_kwargs[k] + + # Run forward in chunks + rendered_ray_chunks = render_fn(rays[i : i + chunk], **chunk_render_kwargs) + + # Accumulate chunks + for k, v in rendered_ray_chunks.items(): + results[k] += [v] + + # Concatenate chunks + for k, v in results.items(): + if isinstance(v[0], list): + if "weights" in k: + results[k] = v[0] + else: + results[k] = torch.cat([torch.stack(item, 0) for item in v], 1) + results[k] = [results[k][idx] for idx in range(results[k].shape[0])] + else: + results[k] = torch.cat(v, 0) + + return results diff --git a/nlf/subdivision.py b/nlf/subdivision.py new file mode 100644 index 0000000..a8cec40 --- /dev/null +++ b/nlf/subdivision.py @@ -0,0 +1,468 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +import numpy as np +import torch +from torch import nn +from torch.nn import functional as F + +from utils.config_utils import lambda_config, replace_config +from utils.intersect_utils import intersect_axis_plane, intersect_sphere + +from .embedding import embedding_dict + + +class Subdivision(nn.Module): + def __init__(self, system, cfg): + super().__init__() + + self.cfg = cfg + self.no_reparam = False + + ## (Hack) Prevent from storing system variables + self.systems = [system] + + self.update_every = cfg.update_every if "update_every" in cfg else float("inf") + + if self.update_every == "inf": + self.update_every = float("inf") + + def get_system(self): + return self.systems[0] + + def get_dataset(self): + return self.systems[0].trainer.datamodule.train_dataset + + def process_intersect(self, rays, pts, idx): + pass + + def intersect(self, rays): + pass + + def forward(self, rays): + with torch.no_grad(): + isect_pts, isect_depth, isect_idx = self.intersect(rays) + + isect_rays, isect_centers = self.process_intersect(rays, isect_pts, isect_idx) + + return torch.cat([isect_rays, isect_centers], -1), isect_depth, isect_idx, isect_idx.eq(-1) + + def validation(self, rays, results): + system = self.get_system() + W = system.cur_wh[0] + H = system.cur_wh[1] + + depth = results["depth"].view(H, W, 1).cpu().numpy() + depth = depth.transpose(2, 0, 1) + + disp = 1 / depth + disp[depth == 0] = 0 + + disp = (disp - disp.min()) / (disp.max() - disp.min()) + depth = (depth - depth.min()) / (depth.max() - depth.min()) + + accum = results["accum"].view(H, W, 1).cpu().numpy() + accum = accum.transpose(2, 0, 1) + + return { + "depth": depth, + "disp": disp, + "accum": accum, + } + + def validation_video(self, rays, results): + outputs = self.validation(rays, results) + + return { + "videos/subdivision_depth": outputs["depth"], + "videos/subdivision_disp": outputs["disp"], + "videos/subdivision_accum": outputs["accum"], + } + + def validation_image(self, batch, batch_idx, results): + outputs = self.validation(batch["coords"], results) + + return { + "images/subdivision_depth": outputs["depth"], + "images/subdivision_disp": outputs["disp"], + "images/subdivision_accum": outputs["accum"], + } + + def update(self): + pass + + +class DepthSubdivision(Subdivision): + def __init__(self, system, cfg): + super().__init__(system, cfg) + + self.near = cfg.near + self.far = cfg.far + + self.grid_depth = cfg.grid_depth + 1 + self.reparam = cfg.reparam if "reparam" in cfg else True + self.voxel_size = torch.tensor((self.far - self.near) / (self.grid_depth - 1)) + + def process_intersect(self, rays, pts, idx): + depths = torch.linspace(self.near, self.far, self.grid_depth, device=rays.device).float() + + # Reparametrize rays + rays = rays[..., :6].unsqueeze(1) + rays = rays * rays.new_ones(1, self.grid_depth, 1) + + if self.reparam: + pts[..., 2] = pts[..., 2] - depths[None] + + rays = torch.cat([pts, rays[..., 3:6]], -1) + + centers = torch.ones_like(rays[..., 0:1]) * depths[None, ..., None] + + return rays, centers + + def intersect(self, rays): + rays = rays[..., :6].unsqueeze(1) + rays = rays * rays.new_ones(1, self.grid_depth, 1) + + depths = torch.linspace(self.near, self.far, self.grid_depth, device=rays.device).float() + depths = depths.view(1, self.grid_depth) * depths.new_ones(rays.shape[0], 1) + + isect_pts, isect_depth = intersect_axis_plane(rays, depths, -1) + isect_depth = isect_depth[..., -1] + isect_idx = (isect_depth >= 0).long() + isect_idx[isect_depth < 0] = -1 + isect_idx[..., -1] = -1 + + return isect_pts, isect_depth, isect_idx + + +class DepthEmbeddingSubdivision(Subdivision): + def __init__(self, system, cfg, **kwargs): + + super().__init__(system, cfg) + + self.group = cfg.group if "group" in cfg else (kwargs["group"] if "group" in kwargs else "embedding") + + def set_z_channels(cfg, key): + cfg[key] = cfg[key] * self.grid_depth + + # Net + self.embedding_cfg = cfg.embedding + lambda_config(self.embedding_cfg, "z_channels", set_z_channels) + self.net = embedding_dict[self.embedding_cfg.type]( + 6, self.embedding_cfg, latent_dim=0, net_in_channels=None, group=self.group + ) + + # Subdivision + self.near = cfg.near + self.far = cfg.far + self.grid_depth = cfg.grid_depth + self.voxel_size = torch.tensor((self.far - self.near) / self.grid_depth) + + # Correct bounds + self.near += self.voxel_size / 2 + self.far -= self.voxel_size / 2 + + def forward(self, rays): + # Embedded points and indices + embed_rays = self.net(rays) + embed_rays = embed_rays.view(rays.shape[0], self.grid_depth + 1, -1) + + # Points, primitive indices, primitive codes + pts = embed_rays[..., :3] + idx = torch.ones_like(pts[..., -1]).long() + codes = torch.linspace(-1, 1, self.grid_depth + 1, device=rays.device).float()[None] * torch.ones_like( + pts[..., -1] + ) + + # Depth for sorting + depth = torch.norm(pts - rays[..., None, :3], dim=-1) + sort_idx = torch.argsort(depth, dim=-1) + sort_idx_pts = torch.stack([sort_idx, sort_idx, sort_idx], dim=1) + + # Sort all tensors + depth = torch.gather(depth, -1, sort_idx) + idx = torch.gather(idx, -1, sort_idx) + codes = torch.gather(codes, -1, sort_idx) + + pts = pts.permute(0, 2, 1) + pts = torch.gather(pts, -1, sort_idx_pts) + pts = pts.permute(0, 2, 1) + + # Get embed rays + pts = torch.cat([pts, pts[..., :1, :, :]], 1) + embed_rays = torch.cat([pts[..., :-1, :], pts[..., 1:, :]], -1) + + return torch.cat([embed_rays, codes], -1), depth, idx, idx.eq(-1) + + +class VoxelEmbeddingSubdivision(Subdivision): + def __init__(self, system, cfg, **kwargs): + super().__init__(system, cfg) + + self.group = cfg.group if "group" in cfg else (kwargs["group"] if "group" in kwargs else "embedding") + + self.min_point = torch.tensor(cfg.min_point).float().cuda() + self.max_point = torch.tensor(cfg.max_point).float().cuda() + + # Subdivision in depth dimension + self.continuous_code = cfg.continuous_code if "continuous_code" in cfg else False + self.no_reparam = cfg.no_reparam if "no_reparam" in cfg else True + self.no_voxel = cfg.no_voxel if "no_voxel" in cfg else True + self.no_voxel_reparam = cfg.no_voxel_reparam if "no_voxel_reparam" in cfg else self.no_voxel + self.grid_depth = cfg.grid_depth + self.depth_step = torch.tensor((self.max_point[-1] - self.min_point[-1]) / self.grid_depth).cuda() + + # Lateral subdivision + self.grid_width = cfg.grid_width + self.lat_step = torch.tensor((self.max_point[0] - self.min_point[0]) / self.grid_width) + self.voxel_size = self.depth_step + self.voxel_size_ = torch.stack([self.lat_step, self.lat_step, self.depth_step]).cuda() + + # Correct bounds + self.offset = torch.tensor([0.0, 0.0, 0.5]).float().cuda() + self.min_point += self.voxel_size_ * self.offset + self.max_point -= self.voxel_size_ * self.offset + self.near, self.far = self.min_point[-1], self.max_point[-1] + + # Embedding + def set_z_channels(cfg, key): + cfg[key] = cfg[key] * self.grid_depth + + self.embedding_cfg = cfg.embedding + lambda_config(self.embedding_cfg, "z_channels", set_z_channels) + self.net = embedding_dict[self.embedding_cfg.type]( + 6, self.embedding_cfg, latent_dim=0, net_in_channels=None, group=self.group + ) + + # Post embedding + self.post_embedding_cfg = cfg.post_embedding if "post_embedding" in cfg else None + + if self.post_embedding_cfg is not None: + lambda_config(self.post_embedding_cfg, "z_channels", set_z_channels) + self.post_in_channels = self.net.out_channels * 2 + + self.post_net = embedding_dict[self.post_embedding_cfg.type]( + self.post_in_channels, self.post_embedding_cfg, latent_dim=0, net_in_channels=None, group=self.group + ) + else: + self.post_net = None + + def forward(self, rays): + # Intersect layers + plane_depths = torch.linspace(self.near, self.far, self.grid_depth, device=rays.device).float() + plane_depths = torch.cat([plane_depths, 10000 * torch.ones_like(plane_depths[..., -1:])], -1) + plane_depths = plane_depths.view(1, self.grid_depth + 1) * plane_depths.new_ones(rays.shape[0], 1) + + isect_pts, isect_depth = intersect_axis_plane(rays[..., None, :], plane_depths, -1) + isect_depth = isect_depth[..., -1] + isect_idx = (isect_depth >= 0).long() + isect_idx[isect_depth < 0] = -1 + isect_idx[..., -1] = -1 + + # Voxel codes + voxel_centers = ( + torch.round((isect_pts - self.min_point[None, None]) / self.voxel_size_[None, None]) + + self.offset[None, None] + ) * self.voxel_size_[None, None] + self.min_point[None, None] + + if self.no_voxel_reparam: + voxel_centers[..., :2] = 0 + + # Embedded points + embed_rays = self.net(rays) + embed_pts = embed_rays.view( + rays.shape[0], + self.grid_depth, + -1, + 3, + ) + + # Local light field parameterization + pts = embed_pts[..., :3] + depths = torch.norm(pts - rays[..., None, None, :3], dim=-1).mean(-1) + + if self.no_reparam: + embed_rays = torch.cat([pts, rays[..., None, None, 3:6] * torch.ones_like(pts)], dim=-1) + else: + embed_rays = torch.cat( + [pts - voxel_centers[..., :-1, None, :], rays[..., None, None, 3:6] * torch.ones_like(pts)], dim=-1 + ) + + # Post embedding + if self.post_net is not None: + embed_rays = embed_rays.view(rays.shape[0], -1) + embed_rays = self.post_net(embed_rays) + + # Reshape and concat + embed_rays = embed_rays.view( + rays.shape[0], + self.grid_depth, + -1, + ) + embed_rays = torch.cat([embed_rays, embed_rays[..., :1, :]], 1) + depths = torch.cat([depths, depths[..., :1]], 1) + + # Center subdivisions at predicted point, use full point as "latent code" + if self.continuous_code: + voxel_centers[..., -1:] = embed_rays[..., 2:3].clone() + + if self.no_voxel: + voxel_centers = voxel_centers[..., -1:] + + embed_rays[..., 2] = 0 + embed_rays = torch.cat([embed_rays, voxel_centers], -1) + return embed_rays, depths, isect_idx, isect_idx.eq(-1) + + +class NeRFSubdivision(Subdivision): + def __init__(self, system, cfg): + super().__init__(system, cfg) + + self.min_point = torch.tensor(cfg.min_point).float().cuda() + self.max_point = torch.tensor(cfg.max_point).float().cuda() + + # Subdivision in depth dimension + self.no_voxel = cfg.no_voxel if "no_voxel" in cfg else True + self.grid_depth = cfg.grid_depth + self.steps = cfg.steps + self.depth_step = torch.tensor((self.max_point[-1] - self.min_point[-1]) / self.grid_depth).cuda() + + # Lateral subdivision + self.grid_width = cfg.grid_width + self.lat_step = torch.tensor((self.max_point[0] - self.min_point[0]) / self.grid_width) + self.voxel_size = self.depth_step + self.voxel_size_ = torch.stack([self.lat_step, self.lat_step, self.depth_step]).cuda() + + # Correct bounds + self.offset = torch.tensor([0.0, 0.0, 0.5]).float().cuda() + self.min_point += self.voxel_size_ * self.offset + self.max_point -= self.voxel_size_ * self.offset + self.near, self.far = self.min_point[-1], self.max_point[-1] + + # Embedding + + def forward(self, rays): + # Intersect layers + plane_depths = torch.linspace(self.near, self.far, self.steps, device=rays.device).float() + plane_depths = torch.cat([plane_depths, 10000 * torch.ones_like(plane_depths[..., -1:])], -1) + plane_depths = plane_depths.view(1, self.steps + 1) * plane_depths.new_ones(rays.shape[0], 1) + + isect_pts, isect_depth = intersect_axis_plane(rays[..., None, :], plane_depths, -1) + isect_depth = isect_depth[..., -1] + isect_idx = (isect_depth >= 0).long() + isect_idx[isect_depth < 0] = -1 + isect_idx[..., -1] = -1 + + # Voxel codes + voxel_centers = ( + torch.round((isect_pts - self.min_point[None, None]) / self.voxel_size_[None, None]) + + self.offset[None, None] + ) * self.voxel_size_[None, None] + self.min_point[None, None] + + # Embedded points + embed_pts = isect_pts[..., :-1, :] + + # Local light field parameterization + depths = torch.norm(embed_pts - rays[..., None, :3], dim=-1) + + # Use full point as "latent code" + if self.no_voxel: + voxel_centers[..., :2] = 0.0 + + embed_rays = torch.cat( + [embed_pts - voxel_centers[..., :-1, :], rays[..., None, 3:6] * torch.ones_like(embed_pts)], dim=-1 + ) + + # Reshape and concat + embed_rays = embed_rays.view( + rays.shape[0], + self.steps, + -1, + ) + embed_rays = torch.cat([embed_rays, embed_rays[..., :1, :]], 1) + depths = torch.cat([depths, depths[..., :1]], 1) + + embed_rays = torch.cat([embed_rays, isect_pts], -1) + return embed_rays, depths, isect_idx, isect_idx.eq(-1) + + +class RadialSubdivision(Subdivision): + def __init__(self, system, cfg): + super().__init__(system, cfg) + + self.near = cfg.near + self.far = cfg.far + + if cfg.voxel_size is not None: + self.voxel_size = torch.tensor(cfg.voxel_size) + self.num_slices = int(np.round((self.far - self.near) / self.voxel_size)) + 1 + self.grid_depth = self.num_slices + else: + self.grid_depth = cfg.grid_depth + self.num_slices = cfg.grid_depth + self.voxel_size = torch.tensor((self.far - self.near) / (self.num_slices - 1)) + + # Depths + self.radii = torch.linspace(self.near, self.far, self.num_slices) + + def process_intersect(self, rays, pts, idx): + mask = idx.eq(-1) + idx[mask] = 0 + + radii = self.radii.view(1, self.num_slices).to(rays).repeat(rays.shape[0], 1) + radii = torch.gather(radii, -1, idx) + + idx[mask] = -1 + + isect_pts = rays[..., None, 0:3] / radii + pts = torch.where(mask.unsqueeze(-1) * mask.new_ones(1, 1, 3), pts, isect_pts) + rays = torch.cat([pts, rays[..., 3:6]], -1) + + return rays, radii + + def intersect(self, rays): + # Reparametrize rays + rays = rays[..., :6].unsqueeze(1).repeat(1, self.num_slices, 1) + + radii = self.radii.view(1, self.num_slices).to(rays.device).repeat(rays.shape[0], 1) + isect_pts = intersect_sphere(rays, radii) + isect_depth = torch.norm(rays[..., :3] - isect_pts, dim=-1) + + # Sort + sort_idx = torch.argsort(isect_depth, dim=-1) + sort_idx_pts = torch.stack([sort_idx, sort_idx, sort_idx], dim=1) + + isect_depth = torch.gather(isect_depth, -1, sort_idx) + isect_pts = isect_pts.permute(0, 2, 1) + isect_pts = torch.gather(isect_pts, -1, sort_idx_pts) + isect_pts = isect_pts.permute(0, 2, 1) + + isect_idx = torch.ones_like(isect_depth).long() + isect_idx[isect_depth < 0] = -1 + + return isect_pts, isect_depth, isect_idx + + +def voxels_from_bb(min_point, max_point, voxel_size): + steps = ((max_point - min_point) / voxel_size).round().astype("int64") + 1 + x, y, z = [ + c.reshape(-1).astype("float32") + for c in np.meshgrid(np.arange(steps[0]), np.arange(steps[1]), np.arange(steps[2])) + ] + x = x * voxel_size + min_point[0] + y = y * voxel_size + min_point[1] + z = z * voxel_size + min_point[2] + + return np.stack([x, y, z]).T.astype("float32") + + +subdivision_dict = { + "depth": DepthSubdivision, + "depth_embed": DepthEmbeddingSubdivision, + "voxel_embed": VoxelEmbeddingSubdivision, + "nerf": NeRFSubdivision, + "radial": RadialSubdivision, +} diff --git a/nlf/visualizers/__init__.py b/nlf/visualizers/__init__.py new file mode 100644 index 0000000..2388f4c --- /dev/null +++ b/nlf/visualizers/__init__.py @@ -0,0 +1,19 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +from .closest_view import ClosestViewVisualizer +from .embedding import EmbeddingVisualizer +from .epipolar import EPIVisualizer +from .focus import FocusVisualizer +from .tensor import TensorVisualizer + +visualizer_dict = { + "closest_view": ClosestViewVisualizer, + "embedding": EmbeddingVisualizer, + "epipolar": EPIVisualizer, + "focus": FocusVisualizer, + "tensor": TensorVisualizer, +} diff --git a/nlf/visualizers/base.py b/nlf/visualizers/base.py new file mode 100644 index 0000000..77491c6 --- /dev/null +++ b/nlf/visualizers/base.py @@ -0,0 +1,33 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +from torch import nn + + +class BaseVisualizer(nn.Module): + def __init__(self, system, cfg): + super().__init__() + + self.cfg = cfg + + ## (Hack) Prevent from storing system variables + self.systems = [system] + + ## Run when testing + self.run_on_test = cfg.run_on_test if "run_on_test" in cfg else False + + def get_system(self): + return self.systems[0] + + def validation_video(self, batch, batch_idx): + return {} + + def validation_image(self, batch, batch_idx): + return {} + + @property + def render_kwargs(self): + return {} diff --git a/nlf/visualizers/closest_view.py b/nlf/visualizers/closest_view.py new file mode 100644 index 0000000..d894a07 --- /dev/null +++ b/nlf/visualizers/closest_view.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +import numpy as np # noqa + +from .base import BaseVisualizer + + +class ClosestViewVisualizer(BaseVisualizer): + def __init__(self, system, cfg): + super().__init__(system, cfg) + + def validation(self, batch): + system = self.get_system() + + if "pose" not in batch: + return {} + + if ( + "lightfield" not in system.trainer.datamodule.train_dataset.dataset_cfg + or system.trainer.datamodule.train_dataset.keyframe_step == -1 + ): + + if "time" in batch: + rgb = system.trainer.datamodule.train_dataset.get_closest_rgb(batch["pose"], batch["time"]).cpu() + else: + rgb = system.trainer.datamodule.train_dataset.get_closest_rgb(batch["pose"]).cpu() + + rgb = rgb.permute(2, 0, 1) + + return {"rgb": rgb} + else: + return {} + + def validation_video(self, batch, batch_idx): + temp_outputs = self.validation(batch) + outputs = {} + + for key in temp_outputs.keys(): + outputs[f"videos/closest_{key}"] = temp_outputs[key] + + return outputs + + def validation_image(self, batch, batch_idx): + return {} diff --git a/nlf/visualizers/embedding.py b/nlf/visualizers/embedding.py new file mode 100644 index 0000000..fb6d63f --- /dev/null +++ b/nlf/visualizers/embedding.py @@ -0,0 +1,96 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +import torch + +from datasets.base import Base6DDataset +from datasets.lightfield import LightfieldDataset +from utils.ray_utils import get_epi_rays +from utils.visualization import get_warp_dimensions, visualize_warp + +from .base import BaseVisualizer + + +class EmbeddingVisualizer(BaseVisualizer): + def __init__(self, system, cfg): + super().__init__(system, cfg) + + # Variables to visualize + self.fields = cfg.fields + self.data_fields = list(cfg.data_fields) if "data_fields" in cfg else [] + self.no_over_fields = list(cfg.no_over_fields) if "no_over_fields" in cfg else [] + self.pred_weights_fields = list(cfg.pred_weights_fields) if "pred_weights_fields" in cfg else [] + + # Vis dims + self.vis_dims = {} + + def validation(self, batch, batch_idx): + system = self.get_system() + W = system.cur_wh[0] + H = system.cur_wh[1] + + # Get coords + coords = batch["coords"] + coords = torch.clone(coords.view(-1, coords.shape[-1])) + + # Render fields + outputs = system.render( + "forward_multiple", + coords, + fields=self.fields.keys(), + no_over_fields=self.no_over_fields, + pred_weights_fields=self.pred_weights_fields, + ) + + # Data outputs + data_outputs = {} + + for key in self.data_fields: + data_outputs[key] = outputs[key].view(H, W, outputs[key].shape[-1]).cpu().numpy() + data_outputs[key] = data_outputs[key].transpose(2, 0, 1) + + # Visualize outputs + vis_outputs = {} + + for key in self.fields: + vis_outputs[key] = outputs[key].view(H * W, outputs[key].shape[-1]) + + # Get dimensions to visualize + if batch_idx == 0: + self.vis_dims[key] = get_warp_dimensions( + vis_outputs[key], W, H, k=min(vis_outputs[key].shape[-1], 3), **dict(self.fields[key]) + ) + + # Visualize + vis_outputs[key] = visualize_warp(vis_outputs[key], self.vis_dims[key], **dict(self.fields[key])) + + # Convert to numpy array + vis_outputs[key] = vis_outputs[key].view(H, W, vis_outputs[key].shape[-1]).cpu().numpy() + vis_outputs[key] = vis_outputs[key].transpose(2, 0, 1) + + # Return + return data_outputs, vis_outputs + + def validation_image(self, batch, batch_idx): + data_outputs, vis_outputs = self.validation(batch, batch_idx) + outputs = {} + + for key in data_outputs.keys(): + outputs[f"data/{key}"] = data_outputs[key] + + for key in vis_outputs.keys(): + outputs[f"images/embedding_{key}"] = vis_outputs[key] + + return outputs + + def validation_video(self, batch, batch_idx): + data_outputs, vis_outputs = self.validation(batch, batch_idx) + outputs = {} + + for key in vis_outputs.keys(): + outputs[f"videos/embedding_{key}"] = vis_outputs[key] + + return outputs diff --git a/nlf/visualizers/epipolar.py b/nlf/visualizers/epipolar.py new file mode 100644 index 0000000..1757695 --- /dev/null +++ b/nlf/visualizers/epipolar.py @@ -0,0 +1,129 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +import torch + +from datasets.base import Base6DDataset +from datasets.lightfield import LightfieldDataset +from utils.ray_utils import get_epi_rays +from utils.visualization import get_warp_dimensions, visualize_warp + +from .base import BaseVisualizer + + +class EPIVisualizer(BaseVisualizer): + def __init__(self, system, cfg): + super().__init__(system, cfg) + + # Setup parametrization + if system.is_subdivided: + param_cfg = system.cfg.model.ray.param + else: + param_cfg = system.cfg.model.param + + # Vars + self.v = cfg.v if "v" in cfg else None + self.t = cfg.v if "t" in cfg else None + self.H = cfg.H if "H" in cfg else None + + self.near = cfg.near if "near" in cfg else -1.0 + self.far = cfg.far if "far" in cfg else 0.0 + + if self.near is None: + if "near" in param_cfg: + self.near = param_cfg.near + else: + self.near = -1.0 + + if self.far is None: + if "far" in param_cfg: + self.far = param_cfg.far + else: + self.far = 0.0 + + if "st_scale" in cfg and cfg.st_scale is not None: + self.st_scale = cfg.st_scale + elif "lightfield" in system.cfg.dataset and "st_scale" in system.cfg.dataset.lightfield: + self.st_scale = system.cfg.dataset.lightfield.st_scale + else: + self.st_scale = 1.0 + + if "uv_scale" in cfg and cfg.uv_scale is not None: + self.uv_scale = cfg.uv_scale + elif "lightfield" in system.cfg.dataset and "uv_scale" in system.cfg.dataset.lightfield: + self.uv_scale = system.cfg.dataset.lightfield.uv_scale + else: + self.uv_scale = 1.0 + + def validation(self, batch, batch_idx): + if batch_idx > 0: + return + + system = self.get_system() + dataset = system.trainer.datamodule.train_dataset + W = system.cur_wh[0] + H = system.cur_wh[1] + + # Coordinates + if self.t is not None: + t = self.t + else: + t = 0 + + if self.v is not None: + v = self.v + else: + v = 0 + + if self.H is not None: + H = self.H + + ## Forward + outputs = {} + + # Ground truth EPI + if isinstance(dataset, LightfieldDataset) and dataset.keyframe_subsample == 1: + all_rgb = dataset.all_rgb.view(dataset.num_rows, dataset.num_cols, dataset.img_wh[1], dataset.img_wh[0], 3) + rgb = all_rgb[dataset.num_rows // 2, :, dataset.img_wh[1] // 2, :, :] + + rgb = rgb.view(rgb.shape[0], rgb.shape[1], 3).cpu() + rgb = rgb.permute(2, 0, 1) + outputs["gt"] = rgb + + # Generate EPI rays + rays = get_epi_rays( + W, v, H, t, dataset.aspect, st_scale=self.st_scale, uv_scale=self.uv_scale, near=self.near, far=self.far + ).type_as(batch["coords"]) + + # Add time + if isinstance(dataset, Base6DDataset): + rays = torch.cat([rays, torch.zeros_like(rays[..., :1])], dim=-1) + + # RGB + rgb = system(rays)["rgb"] + + if isinstance(rgb, list): + rgb = rgb[-1] + + rgb = rgb.view(H, W, 3).cpu() + rgb = rgb.permute(2, 0, 1) + + outputs["pred"] = rgb + + return outputs + + def validation_image(self, batch, batch_idx): + if batch_idx > 0: + return {} + + # Outputs + temp_outputs = self.validation(batch, batch_idx) + outputs = {} + + for key in temp_outputs.keys(): + outputs[f"images/epi_{key}"] = temp_outputs[key] + + return outputs diff --git a/nlf/visualizers/focus.py b/nlf/visualizers/focus.py new file mode 100644 index 0000000..04ad3e7 --- /dev/null +++ b/nlf/visualizers/focus.py @@ -0,0 +1,152 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +import torch + +from utils.ray_utils import get_lightfield_rays + +from .base import BaseVisualizer + + +class FocusVisualizer(BaseVisualizer): + def __init__(self, system, cfg): + super().__init__(system, cfg) + + self.s = cfg.s if "s" in cfg else None + self.t = cfg.t if "t" in cfg else None + + self.ds = cfg.ds if "ds" in cfg else None + self.dt = cfg.dt if "dt" in cfg else None + + self.near = cfg.near if "near" in cfg else -1.0 + self.far = cfg.far if "far" in cfg else 0.0 + self.focal = cfg.focal if "focal" in cfg else 0.0 + + self.n_freqs = system.cfg.model.color_pe.n_freqs + self.freq_bands = 2 ** torch.linspace(0, self.n_freqs - 1, self.n_freqs) + + if "st_scale" in cfg and cfg.st_scale is not None: + self.st_scale = cfg.st_scale + elif "lightfield" in system.cfg.dataset and "st_scale" in system.cfg.dataset.lightfield: + self.st_scale = system.cfg.dataset.lightfield.st_scale + else: + self.st_scale = 1.0 + + if "uv_scale" in cfg and cfg.uv_scale is not None: + self.uv_scale = cfg.uv_scale + else: + self.uv_scale = 1.0 + + def validation(self, batch, batch_idx): + if batch_idx > 0: + return + + system = self.get_system() + dataset = system.trainer.datamodule.train_dataset + + W = system.cur_wh[0] + H = system.cur_wh[1] + + param_channels = 4 + + ## Forward + outputs = {} + + # Generate image rays + if self.s is not None: + s = self.s + else: + s = 0.0 + + if self.t is not None: + t = self.t + else: + t = 0.0 + + rays = get_lightfield_rays( + W, H, s, t, dataset.aspect, st_scale=self.st_scale, uv_scale=self.uv_scale, near=self.near, far=self.far + ).type_as(batch["coords"]) + + # Cone + if self.ds is None: + ds = 1.0 + else: + ds = self.ds + + if self.dt is None: + dt = 1.0 + else: + dt = self.dt + + du = (self.focal - self.far) * ds / (self.far - self.near) + dv = (self.focal - self.far) * dt / (self.far - self.near) + + ds_vec = torch.zeros((1, param_channels, 1)).type_as(rays) + ds_vec[..., 0, :] = ds + ds_vec[..., 2, :] = du + + dt_vec = torch.zeros((1, param_channels, 1)).type_as(rays) + dt_vec[..., 1, :] = dt + dt_vec[..., 3, :] = dv + + # Warp + params = system.render("embed_params", rays)["params"] + out_channels = params.shape[-1] // (param_channels + 1) + tform = params[..., :-out_channels].reshape(-1, out_channels, param_channels) + tform = torch.nn.functional.normalize(tform, p=2.0, dim=-1) + + s_response = (tform @ ds_vec).squeeze(-1) / W + t_response = (tform @ dt_vec).squeeze(-1) / W + + max_response = torch.maximum( + torch.abs(s_response), + torch.abs(t_response), + ) + max_freq = 1.0 / max_response + + # Calculate weights + pe_weight = {} + + for j, freq in enumerate(self.freq_bands): + # weight = (max_freq / freq - 0.5) * 2 + weight = max_freq / freq + pe_weight[j] = torch.clamp(weight, torch.zeros_like(weight), torch.ones_like(weight)) + + print(pe_weight[0].shape, rays.shape) + print(pe_weight[len(self.freq_bands) - 1][0]) + + # RGB out of focus + rgb = system(rays, pe_weight=pe_weight, chunk_args=["pe_weight"])["rgb"] + if isinstance(rgb, list): + rgb = rgb[-1] + rgb = rgb.view(H, W, 3).cpu() + rgb = rgb.permute(2, 0, 1) + + outputs["rgb_cone"] = rgb + + # RGB in focus + rgb = system(rays)["rgb"] + if isinstance(rgb, list): + rgb = rgb[-1] + rgb = rgb.view(H, W, 3).cpu() + rgb = rgb.permute(2, 0, 1) + + outputs["rgb_ray"] = rgb + + return outputs + + def validation_image(self, batch, batch_idx): + if batch_idx > 0: + return {} + + # Outputs + temp_outputs = self.validation(batch, batch_idx) + outputs = {} + + for key in temp_outputs.keys(): + outputs[f"images/focus_{key}"] = temp_outputs[key] + + return outputs diff --git a/nlf/visualizers/tensor.py b/nlf/visualizers/tensor.py new file mode 100644 index 0000000..ee5c7f6 --- /dev/null +++ b/nlf/visualizers/tensor.py @@ -0,0 +1,72 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +import torch + +from .base import BaseVisualizer + + +class TensorVisualizer(BaseVisualizer): + def __init__(self, system, cfg): + super().__init__(system, cfg) + + def validation(self, batch, batch_idx): + if batch_idx > 0: + return + + system = self.get_system() + W, H = batch["W"], batch["H"] + + outputs = {} + + # Render tensors (unwarped) + if system.is_subdivided: + tensors = system.render_fn.model.ray_model.color_model.net.tensors[0].tensors[0].tensor[:, ..., 0] + else: + tensors = system.render_fn.model.color_model.net.tensors[0].tensors[0].tensor[:, ..., 0] + + for i in range(tensors.shape[0]): + if len(tensors[i].shape) == 2: + rgb = torch.sigmoid(tensors[i].permute(1, 0)[None, ..., :3]).repeat(128, 1, 1).cpu() + rgb = rgb.permute(2, 0, 1) + outputs[f"rgb_unwarped_{i:03d}"] = rgb + elif len(tensors[i].shape) == 3: + rgb = torch.sigmoid(tensors[i].permute(1, 2, 0)[..., :3]).cpu() + rgb = rgb.permute(2, 0, 1) + outputs[f"rgb_unwarped_{i:03d}"] = rgb + + # Render tensors (layers) + if system.is_subdivided: + num_partitions = system.render_fn.model.ray_model.color_model.net.num_partitions + else: + num_partitions = system.render_fn.model.color_model.net.num_partitions + + for i in range(num_partitions): + coords, rgb, = ( + batch["coords"], + batch["rgb"], + ) + coords = torch.clone(coords.view(-1, coords.shape[-1])) + + results = system(coords, keep_tensor_partitions=[i]) + rgb = results["rgb"].view(H, W, 3).cpu().numpy() + rgb = rgb.transpose(2, 0, 1) + outputs[f"rgb_warped_{i:03d}"] = rgb + + return outputs + + def validation_image(self, batch, batch_idx): + if batch_idx > 0: + return {} + + # Outputs + temp_outputs = self.validation(batch, batch_idx) + outputs = {} + + for key in temp_outputs.keys(): + outputs[f"images/tensor_{key}"] = temp_outputs[key] + + return outputs diff --git a/scripts/input_pose.json b/scripts/input_pose.json new file mode 100644 index 0000000..730ee8b --- /dev/null +++ b/scripts/input_pose.json @@ -0,0 +1,26 @@ +[ + { + "time": 0.95, + "pose": [ + [0.99358621, -0.00245376, 0.11305054, 0.53649348], + [0.00578573, 0.99955817, -0.02915463, 0.11812715], + [-0.11292905, 0.02962171, 0.99316141, 0.29801934] + ] + }, + { + "time": 0.35, + "pose": [ + [0.99358621, -0.00245376, 0.11305054, 0.23649348], + [0.00578573, 0.99955817, -0.02915463, 0.51812715], + [-0.11292905, 0.02962171, 0.99316141, 0.29801934] + ] + }, + { + "time": 0.05, + "pose": [ + [0.99358621, -0.00245376, 0.11305054, 0.23649348], + [0.00578573, 0.99955817, -0.02915463, 0.11812715], + [-0.11292905, 0.02962171, 0.99316141, 0.59801934] + ] + } +] diff --git a/scripts/render_flames_given_pose.sh b/scripts/render_flames_given_pose.sh new file mode 100644 index 0000000..2a4f762 --- /dev/null +++ b/scripts/render_flames_given_pose.sh @@ -0,0 +1,25 @@ +#!/bin/bash +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +CUDA_VISIBLE_DEVICES=$1 python main.py experiment/dataset=immersive \ + experiment/training=immersive_tensorf \ + experiment.training.val_every=10 \ + experiment.training.test_every=10 \ + experiment.training.ckpt_every=10 \ + experiment.training.render_every=10 \ + experiment.training.num_epochs=10 \ + experiment/model=immersive_sphere \ + experiment.params.print_loss=True \ + experiment.dataset.collection=02_Flames \ + +experiment/regularizers/tensorf=tv_4000 \ + experiment.dataset.start_frame=50 \ + experiment.dataset.num_frames=10 \ + experiment.params.name=flames \ + experiment.params.save_results=True \ + experiment.training.num_iters=100 \ + experiment.training.num_epochs=1000 \ + experiment.params.render_only=True \ + experiment.params.input_pose=scripts/input_pose.json diff --git a/scripts/render_horse.sh b/scripts/render_horse.sh new file mode 100644 index 0000000..4a2f0e6 --- /dev/null +++ b/scripts/render_horse.sh @@ -0,0 +1,24 @@ +#!/bin/bash +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +CUDA_VISIBLE_DEVICES=$1 python main.py experiment/dataset=immersive \ + experiment/training=immersive_tensorf \ + experiment.training.val_every=10 \ + experiment.training.test_every=10 \ + experiment.training.ckpt_every=10 \ + experiment.training.render_every=10 \ + experiment.training.num_epochs=30 \ + experiment/model=immersive_sphere \ + experiment.params.print_loss=True \ + experiment.dataset.collection=05_Horse \ + +experiment/regularizers/tensorf=tv_4000 \ + experiment.dataset.start_frame=50 \ + experiment.dataset.num_frames=50 \ + experiment.params.name=horse \ + experiment.params.save_results=True \ + experiment.training.num_iters=100 \ + experiment.training.num_epochs=1000 \ + experiment.params.render_only=True diff --git a/scripts/render_horse_given_pose.sh b/scripts/render_horse_given_pose.sh new file mode 100644 index 0000000..0c77834 --- /dev/null +++ b/scripts/render_horse_given_pose.sh @@ -0,0 +1,25 @@ +#!/bin/bash +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +CUDA_VISIBLE_DEVICES=$1 python main.py experiment/dataset=immersive \ + experiment/training=immersive_tensorf \ + experiment.training.val_every=10 \ + experiment.training.test_every=10 \ + experiment.training.ckpt_every=10 \ + experiment.training.render_every=10 \ + experiment.training.num_epochs=30 \ + experiment/model=immersive_sphere \ + experiment.params.print_loss=True \ + experiment.dataset.collection=05_Horse \ + +experiment/regularizers/tensorf=tv_4000 \ + experiment.dataset.start_frame=50 \ + experiment.dataset.num_frames=50 \ + experiment.params.name=horse \ + experiment.params.save_results=True \ + experiment.training.num_iters=100 \ + experiment.training.num_epochs=1000 \ + experiment.params.render_only=True \ + experiment.params.input_pose=scripts/input_pose.json diff --git a/scripts/train_flames.sh b/scripts/train_flames.sh new file mode 100644 index 0000000..edcfa81 --- /dev/null +++ b/scripts/train_flames.sh @@ -0,0 +1,20 @@ +#!/bin/bash +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +CUDA_VISIBLE_DEVICES=$1 python main.py experiment/dataset=immersive \ + experiment/training=immersive_tensorf \ + experiment.training.val_every=10 \ + experiment.training.test_every=10 \ + experiment.training.ckpt_every=10 \ + experiment.training.render_every=10 \ + experiment.training.num_epochs=10 \ + experiment/model=immersive_sphere \ + experiment.params.print_loss=True \ + experiment.dataset.collection=02_Flames \ + +experiment/regularizers/tensorf=tv_4000 \ + experiment.dataset.start_frame=50 \ + experiment.dataset.num_frames=10 \ + experiment.params.name=flames diff --git a/scripts/train_horse.sh b/scripts/train_horse.sh new file mode 100644 index 0000000..87afe87 --- /dev/null +++ b/scripts/train_horse.sh @@ -0,0 +1,20 @@ +#!/bin/bash +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +CUDA_VISIBLE_DEVICES=$1 python main.py experiment/dataset=immersive \ + experiment/training=immersive_tensorf \ + experiment.training.val_every=10 \ + experiment.training.test_every=10 \ + experiment.training.ckpt_every=5 \ + experiment.training.render_every=10 \ + experiment.training.num_epochs=30 \ + experiment/model=immersive_sphere \ + experiment.params.print_loss=True \ + experiment.dataset.collection=05_Horse \ + +experiment/regularizers/tensorf=tv_4000 \ + experiment.dataset.start_frame=50 \ + experiment.dataset.num_frames=50 \ + experiment.params.name=horse diff --git a/utils/__init__.py b/utils/__init__.py new file mode 100644 index 0000000..2ea22e3 --- /dev/null +++ b/utils/__init__.py @@ -0,0 +1,160 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2022 hawkey +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +from functools import partial + +import numpy as np +import torch +from torch.optim import SGD, Adam, RMSprop +from torch.optim.lr_scheduler import CosineAnnealingLR, LambdaLR, MultiStepLR + +from .warmup_scheduler import GradualWarmupScheduler + + +def no_init(cfg): + def init(m): + pass + + return init + + +def uniform_weights_init(cfg): + def init(m): + if isinstance(m, torch.nn.Linear): + torch.nn.init.uniform_(m.weight, a=-0.1, b=0.1) + torch.nn.init.uniform_(m.bias, a=-0.1, b=0.1) + + return init + + +def xavier_uniform_weights_init(cfg): + def init(m): + if isinstance(m, torch.nn.Linear): + torch.nn.init.xavier_uniform_(m.weight) + torch.nn.init.uniform_(m.bias, a=-0.01, b=0.01) + + return init + + +weight_init_dict = { + "none": no_init, + "uniform": uniform_weights_init, + "xavier_uniform": xavier_uniform_weights_init, +} + + +def to8b(x): + return (255 * np.clip(x, 0, 1)).astype(np.uint8) + + +def get_optimizer(hparams, models): + eps = 1e-8 + parameters = [] + + for model in models: + parameters += list(model.parameters()) + if hparams.optimizer == "sgd": + optimizer = SGD(parameters, lr=hparams.lr, momentum=hparams.momentum, weight_decay=hparams.weight_decay) + elif hparams.optimizer == "adam": + optimizer = Adam(parameters, lr=hparams.lr, eps=eps, weight_decay=hparams.weight_decay, betas=(0.9, 0.99)) + elif hparams.optimizer == "rmsprop": + optimizer = RMSprop( + parameters, + alpha=hparams.alpha, + momentum=hparams.momentum, + lr=hparams.lr, + eps=eps, + weight_decay=hparams.weight_decay, + ) + else: + raise ValueError("optimizer not recognized!") + + return optimizer + + +def exp_decay(decay_gamma, stop_epoch, decay_epoch, epoch): + if epoch > stop_epoch: + decay = 0.0 + else: + decay = decay_gamma ** (epoch / decay_epoch) + + return decay + + +def poly_exp_decay(num_epochs, poly_exp, epoch): + return (1 - epoch / num_epochs) ** poly_exp + + +def get_scheduler(hparams, optimizer, iters_per_epoch): + eps = 1e-8 + + if hparams.lr_scheduler == "exp": + scheduler = LambdaLR( + optimizer, + partial( + exp_decay, + hparams.decay_gamma, + hparams.stop_epoch if "stop_epoch" in hparams else float("inf"), + float(hparams.decay_epoch), + ), + ) + elif hparams.lr_scheduler == "steplr": + scheduler = MultiStepLR(optimizer, milestones=[hparams.decay_epoch], gamma=hparams.decay_gamma) + elif hparams.lr_scheduler == "cosine": + scheduler = CosineAnnealingLR(optimizer, T_max=hparams.num_epochs, eta_min=eps) + elif hparams.lr_scheduler == "poly": + scheduler = LambdaLR( + optimizer, + partial(poly_exp_decay, hparams.num_epochs, hparams.poly_exp), + ) + else: + raise ValueError("scheduler not recognized!") + + if hparams.warmup_epochs > 0: + scheduler = GradualWarmupScheduler( + optimizer, + multiplier=hparams.warmup_multiplier, + total_epoch=hparams.warmup_epochs, + after_scheduler=scheduler, + ) + + return scheduler + + +def get_learning_rate(optimizer): + for param_group in optimizer.param_groups: + return param_group["lr"] + + +def extract_model_state_dict(ckpt_path, model_name="model", prefixes_to_ignore=[]): # noqa + checkpoint = torch.load(ckpt_path, map_location=torch.device("cpu")) + checkpoint_ = {} + + if "state_dict" in checkpoint: # if it's a pytorch-lightning checkpoint + checkpoint = checkpoint["state_dict"] + + for k, v in checkpoint.items(): + if not k.startswith(model_name): + continue + + k = k[len(model_name) + 1 :] + + for prefix in prefixes_to_ignore: + if k.startswith(prefix): + print("ignore", k) + break + else: + checkpoint_[k] = v + + return checkpoint_ + + +def load_ckpt(model, ckpt_path, model_name="model", prefixes_to_ignore=[]): # noqa + model_dict = model.state_dict() + checkpoint_ = extract_model_state_dict(ckpt_path, model_name, prefixes_to_ignore) + model_dict.update(checkpoint_) + model.load_state_dict(model_dict) diff --git a/utils/config_utils.py b/utils/config_utils.py new file mode 100644 index 0000000..e74e301 --- /dev/null +++ b/utils/config_utils.py @@ -0,0 +1,40 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2022 hawkey +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +# @manual //github/third-party/omry/omegaconf:omegaconf +from omegaconf import DictConfig + + +def format_config(cfg: DictConfig): + format_config_helper(cfg, cfg) + + +def format_config_helper(cfg, master_config: DictConfig): + if isinstance(cfg, DictConfig): + for key, _ in cfg.items(): + if isinstance(cfg[key], str): + cfg[key] = cfg[key].format(config=master_config) + else: + format_config_helper(cfg[key], master_config) + + +def replace_config(cfg, **kwargs): + if isinstance(cfg, DictConfig): + for key, _ in cfg.items(): + if key in kwargs.keys() and cfg[key] is None: + cfg[key] = kwargs[key] + else: + replace_config(cfg[key], **kwargs) + + +def lambda_config(cfg, find_key, fn): + if isinstance(cfg, DictConfig): + for key, _ in cfg.items(): + if key == find_key: + fn(cfg, key) + else: + lambda_config(cfg[key], find_key, fn) diff --git a/utils/flow_utils.py b/utils/flow_utils.py new file mode 100644 index 0000000..785cc99 --- /dev/null +++ b/utils/flow_utils.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2022 hawkey +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +import torch + + +def get_base_time( + t, + flow_keyframes=-1, + total_frames=-1, + flow_scale=0.0, + jitter=False, +): + # Get time offset + if flow_keyframes > 0: + fac = flow_keyframes * (total_frames - 1) / total_frames + t = t * fac + + # Offset base time + if jitter and flow_scale > 0.0: + base_t = t + (torch.rand_like(t) * flow_scale - flow_scale / 2.0) + else: + base_t = t + + # Round + base_t = torch.round(base_t.clamp(0.0, flow_keyframes - 1.0) - 1e-5) * (1.0 / fac) + else: + base_t = torch.zeros_like(t) + + return base_t + + +def get_flow_and_time( + flow, + t, + flow_keyframes=-1, + flow_scale=0.0, + rigid_flow=False, + add_rand=False, + flow_activation=None, +): + # Get time offset + if flow_keyframes > 0: + # Offset base time + if add_rand and flow_scale > 0.0: + base_t = t + (torch.rand_like(t) * flow_scale - flow_scale / 2.0) / (flow_keyframes - 1) + else: + base_t = t + + # Round + base_t = torch.round(base_t * (flow_keyframes - 1)) / (flow_keyframes - 1) + else: + base_t = torch.zeros_like(t) + + base_t = base_t.clamp(0.0, 1.0) + + # Get flow and advect points + time_offset = (t - base_t)[..., None, :] + + if rigid_flow: + flow = flow_activation(flow * time_offset) + else: + flow = flow_activation(flow) * time_offset + + # Return + return flow, base_t diff --git a/utils/gui_utils.py b/utils/gui_utils.py new file mode 100644 index 0000000..dba5e80 --- /dev/null +++ b/utils/gui_utils.py @@ -0,0 +1,397 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2022 hawkey +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +import math +import time + +import cv2 +import dearpygui.dearpygui as dpg +import numpy as np +import torch +from scipy.spatial.transform import Rotation as R + + +class OrbitCamera: + def __init__(self, W, H, r=2, fovy=60): + self.W = W + self.H = H + self.radius = r # camera distance from center + self.fovy = fovy # in degree + self.center = np.array([0, 0, 0], dtype=np.float32) # look at this point + self.rot = R.from_quat( + [1, 0, 0, 0] + ) # init camera matrix: [[1, 0, 0], [0, -1, 0], [0, 0, 1]] (to suit ngp convention) + self.up = np.array([0, 1, 0], dtype=np.float32) # need to be normalized! + + # pose + @property + def pose(self): + # first move camera to radius + res = np.eye(4, dtype=np.float32) + res[2, 3] -= self.radius + # rotate + rot = np.eye(4, dtype=np.float32) + rot[:3, :3] = self.rot.as_matrix() + res = rot @ res + + # translate + res[:3, 3] -= self.center + + ## Convert pose + res[..., 1] *= -1 + res[..., 2] *= -1 + + # pose_pre = np.eye(4) + # pose_pre[1, 1] *= -1 + # pose_pre[2, 2] *= -1 + # res = pose_pre @ res @ pose_pre + + return res + + # intrinsics + @property + def intrinsics(self): + focal = self.H / (2 * np.tan(np.radians(self.fovy) / 2)) + return np.array([focal, focal, self.W // 2, self.H // 2]) + + def orbit(self, dx, dy): + # rotate along camera up/side axis! + side = self.rot.as_matrix()[:3, 0] # why this is side --> ? # already normalized. + rotvec_x = self.up * np.radians(-0.1 * dx) + rotvec_y = side * np.radians(-0.1 * dy) + self.rot = R.from_rotvec(rotvec_x) * R.from_rotvec(rotvec_y) * self.rot + + def scale(self, delta): + # self.radius *= 1.1 ** (-delta) + self.radius += delta * 0.25 + + def pan(self, dx, dy, dz=0): + # pan in camera coordinate system (careful on the sensitivity!) + self.center += 0.0005 * self.rot.as_matrix()[:3, :3] @ np.array([dx, dy, dz]) + + +class NeRFGUI: + def __init__(self, system, W=800, H=800, radius=1.0, fovy=30, debug=True): + self.system = system + self.system.render_fn = self.system.render_fn.cuda() + self.system.render_fn.eval() + self.train_dataset = system.dm.train_dataset + + if "tarot" in self.system.cfg.dataset.collection and self.system.cfg.dataset.use_ndc: + fovy = 75 + radius = 0.0 + self.pan_factor = 0.25 + elif "tarot" in self.system.cfg.dataset.collection and not self.system.cfg.dataset.use_ndc: + fovy = 60 + radius = -1.0 + self.pan_factor = 0.1 + else: + self.pan_factor = 1.0 + + self.W = W + self.H = H + + self.cam = OrbitCamera(W, H, r=radius, fovy=fovy) + self.debug = debug + + self.training = False + self.step = 0 # training step + + self.render_buffer = np.zeros((self.W, self.H, 3), dtype=np.float32) + self.need_update = True # camera moved, should reset accumulation + self.mode = "image" # choose from ['image', 'depth'] + + self.dynamic_resolution = True + self.downscale = 1 + self.train_steps = 16 + + self.flip = "fabien" in self.system.cfg.dataset.collection or "tarot" in self.system.cfg.dataset.collection + self.transpose = "fabien" in self.system.cfg.dataset.collection + + self.pan_dx = 0 + self.pan_dy = 0 + + self.rot_dx = 0 + self.rot_dy = 0 + + dpg.create_context() # TODO: enable again + self.register_dpg() + self.test_step() + + def __del__(self): + # return + dpg.destroy_context() # TODO: enable again + + def prepare_buffer(self, outputs): + return outputs["image"] + + def test_step(self): + # TODO: seems we have to move data from GPU --> CPU --> GPU? + self.need_update = True + + loop_length = 2.0 + t = (time.time() % 2.0) / 2.0 + + if self.need_update: + starter, ender = torch.cuda.Event(enable_timing=True), torch.cuda.Event(enable_timing=True) + starter.record() + + # Width, height, intrinsics + W = min(int(self.W * self.downscale), self.W) + H = min(int(self.H * self.downscale), self.H) + + K = np.eye(3) + K[0, 0] = self.cam.intrinsics[0] * self.downscale + K[1, 1] = self.cam.intrinsics[1] * self.downscale + K[0, 2] = self.cam.intrinsics[2] * self.downscale + K[1, 2] = self.cam.intrinsics[3] * self.downscale + + # Get coords + num_frames = self.system.cfg.dataset.num_frames if "num_frames" in self.system.cfg.dataset else 2 + coords = self.train_dataset.get_coords_from_camera( + self.cam.pose, np.round(t * num_frames - 1) / (num_frames - 1), 0, K, W, H, "cuda" + ) + # coords = self.train_dataset.get_coords(0).to('cuda') + + # Run forward + rgb_output = self.system(coords)["rgb"].view(H, W, 3).cpu().numpy() + # rgb_output = self.system(coords)['rgb'].view(567, 1008, 3).cpu().numpy() + # rgb_output = self.system(coords)['rgb'].view(512, 512, 3).cpu().numpy() + + ender.record() + torch.cuda.synchronize() + t = starter.elapsed_time(ender) + + # Write out image (temporary) + # rgb_output = cv2.cvtColor(rgb_output, cv2.COLOR_BGR2RGB) + # cv2.imwrite('tmp.png', np.uint8(rgb_output * 255)) + + # update dynamic resolution + if self.dynamic_resolution: + # max allowed infer time per-frame is 200 ms + full_t = t / (self.downscale**2) + downscale = min(1, max(1 / 4, math.sqrt(200 / full_t))) + + if downscale > self.downscale * 1.2 or downscale < self.downscale * 0.8: + self.downscale = downscale + + if self.need_update: + + if self.transpose: + rgb_output = rgb_output.transpose(1, 0, 2) + + if self.flip: + rgb_output = np.flip(rgb_output, axis=0) + + self.render_buffer = np.ascontiguousarray(rgb_output).astype(np.float32) + + self.need_update = False + + print(f"{t:.4f}ms ({int(1000/t)} FPS)") + # return + + dpg.set_value("_log_infer_time", f"{t:.4f}ms ({int(1000/t)} FPS)") + dpg.set_value("_log_resolution", f"{int(self.downscale * self.W)}x{int(self.downscale * self.H)}") + dpg.set_value("_texture", self.render_buffer) + + def register_dpg(self): + # return + + ### register texture + + with dpg.texture_registry(show=False): + dpg.add_raw_texture(self.W, self.H, self.render_buffer, format=dpg.mvFormat_Float_rgb, tag="_texture") + + ### register window + + # the rendered image, as the primary window + with dpg.window(tag="_primary_window", width=self.W, height=self.H): + + # add the texture + dpg.add_image("_texture") + + dpg.set_primary_window("_primary_window", True) + + # control window + with dpg.window(label="Control", tag="_control_window", width=400, height=300): + + # button theme + with dpg.theme() as theme_button: + with dpg.theme_component(dpg.mvButton): + dpg.add_theme_color(dpg.mvThemeCol_Button, (23, 3, 18)) + dpg.add_theme_color(dpg.mvThemeCol_ButtonHovered, (51, 3, 47)) + dpg.add_theme_color(dpg.mvThemeCol_ButtonActive, (83, 18, 83)) + dpg.add_theme_style(dpg.mvStyleVar_FrameRounding, 5) + dpg.add_theme_style(dpg.mvStyleVar_FramePadding, 3, 3) + + # time + with dpg.group(horizontal=True): + dpg.add_text("Infer time: ") + dpg.add_text("no data", tag="_log_infer_time") + + # rendering options + with dpg.collapsing_header(label="Options", default_open=True): + + # dynamic rendering resolution + with dpg.group(horizontal=True): + + def callback_set_dynamic_resolution(sender, app_data): + if self.dynamic_resolution: + self.dynamic_resolution = False + self.downscale = 1 + else: + self.dynamic_resolution = True + + self.need_update = True + + dpg.add_checkbox( + label="dynamic resolution", + default_value=self.dynamic_resolution, + callback=callback_set_dynamic_resolution, + ) + dpg.add_text(f"{self.W}x{self.H}", tag="_log_resolution") + + # mode combo + def callback_change_mode(sender, app_data): + self.mode = app_data + self.need_update = True + + dpg.add_combo(("image", "depth"), label="mode", default_value=self.mode, callback=callback_change_mode) + + # fov slider + def callback_set_fovy(sender, app_data): + self.cam.fovy = app_data + self.need_update = True + + dpg.add_slider_int( + label="FoV (vertical)", + min_value=1, + max_value=120, + format="%d deg", + default_value=self.cam.fovy, + callback=callback_set_fovy, + ) + + # debug info + if self.debug: + with dpg.collapsing_header(label="Debug"): + # pose + dpg.add_separator() + dpg.add_text("Camera Pose:") + dpg.add_text(str(self.cam.pose), tag="_log_pose") + + ### register camera handler + + def callback_camera_drag_rotate(sender, app_data): + + if not dpg.is_item_focused("_primary_window"): + return + + dx = app_data[1] * 0.5 + dy = app_data[2] * 0.5 + + if self.transpose: + tmp = dx + dx = dy + dy = tmp + + if self.flip: + dy = -dy + + rot_dx = dx - self.rot_dx + rot_dy = dy - self.rot_dy + + self.cam.orbit(rot_dx, rot_dy) + self.need_update = True + + self.rot_dx = dx + self.rot_dy = dy + + if self.debug: + dpg.set_value("_log_pose", str(self.cam.pose)) + + def callback_camera_wheel_scale(sender, app_data): + + if not dpg.is_item_focused("_primary_window"): + return + + delta = app_data + + self.cam.scale(delta) + self.need_update = True + + if self.debug: + dpg.set_value("_log_pose", str(self.cam.pose)) + + def callback_camera_drag_pan(sender, app_data): + + if not dpg.is_item_focused("_primary_window"): + return + + # dx = app_data[1] * 4.5 + # dy = app_data[2] * 4.5 + dx = app_data[1] * 15.0 * self.pan_factor + dy = app_data[2] * 15.0 * self.pan_factor + + if self.transpose: + tmp = dx + dx = dy + dy = tmp + + if self.flip: + dx = -dx + + pan_dx = dx - self.pan_dx + pan_dy = dy - self.pan_dy + + self.cam.pan(pan_dx, pan_dy) + self.need_update = True + + self.pan_dx = dx + self.pan_dy = dy + + if self.debug: + dpg.set_value("_log_pose", str(self.cam.pose)) + + def callback_mouse_down(sender, app_data): + self.pan_dx = 0 + self.pan_dy = 0 + + self.rot_dx = 0 + self.rot_dy = 0 + + with dpg.handler_registry(): + dpg.add_mouse_click_handler(callback=callback_mouse_down) + dpg.add_mouse_drag_handler(button=dpg.mvMouseButton_Left, callback=callback_camera_drag_rotate) + dpg.add_mouse_wheel_handler(callback=callback_camera_wheel_scale) + dpg.add_mouse_drag_handler(button=dpg.mvMouseButton_Middle, callback=callback_camera_drag_pan) + + dpg.create_viewport(title="HyperReel", width=self.W, height=self.H, resizable=False) + + ### global theme + with dpg.theme() as theme_no_padding: + with dpg.theme_component(dpg.mvAll): + # set all padding to 0 to avoid scroll bar + dpg.add_theme_style(dpg.mvStyleVar_WindowPadding, 0, 0, category=dpg.mvThemeCat_Core) + dpg.add_theme_style(dpg.mvStyleVar_FramePadding, 0, 0, category=dpg.mvThemeCat_Core) + dpg.add_theme_style(dpg.mvStyleVar_CellPadding, 0, 0, category=dpg.mvThemeCat_Core) + + dpg.bind_item_theme("_primary_window", theme_no_padding) + + dpg.setup_dearpygui() + + # dpg.show_metrics() + + dpg.show_viewport() + + def render(self): + # while True: + # self.test_step() + + while dpg.is_dearpygui_running(): + self.test_step() + dpg.render_dearpygui_frame() diff --git a/utils/intersect_utils.py b/utils/intersect_utils.py new file mode 100644 index 0000000..82b6912 --- /dev/null +++ b/utils/intersect_utils.py @@ -0,0 +1,203 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2022 hawkey +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +import numpy as np +import torch +import torch.nn.functional as F + + +def sort_z(z_vals, dim: int, descending: bool): + sort_idx = torch.argsort(z_vals, dim=dim, descending=descending) + z_vals = torch.gather(z_vals, -1, sort_idx) + + return z_vals, sort_idx + + +def sort_with(sort_idx, points): + points = points.permute(0, 2, 1) + sort_idx = sort_idx.unsqueeze(1).repeat(1, points.shape[1], 1) + points = torch.gather(points, -1, sort_idx) + return points.permute(0, 2, 1) + + +def dot(a, b, axis=-1): + return torch.sum(a * b, dim=axis) + + +def min_sphere_radius(rays, origin): + rays_o, rays_d = rays[..., :3] - origin, rays[..., 3:6] + rays_d = torch.nn.functional.normalize(rays_d, p=2.0, dim=-1) + + m = torch.cross(rays_o, rays_d, dim=-1) + rays_o = torch.cross(rays_d, m, dim=-1) + return torch.linalg.norm(rays_o, dim=-1) + + +def min_cylinder_radius(rays, origin): + rays_o, rays_d = rays[..., 0:3] - origin, rays[..., 3:6] + rays_o = torch.cat([rays_o[..., 0:1], torch.zeros_like(rays[..., 1:2]), rays_o[..., 2:3]], -1) + rays_d = torch.cat([rays_d[..., 0:1], torch.zeros_like(rays[..., 1:2]), rays_d[..., 2:3]], -1) + rays_d = torch.nn.functional.normalize(rays_d, p=2.0, dim=-1) + + m = torch.cross(rays_o, rays_d, dim=-1) + rays_o = torch.cross(rays_d, m, dim=-1) + return torch.linalg.norm(rays_o, dim=-1) + + +def intersect_sphere(rays, origin, radius, continuous=False): + rays_o, rays_d = rays[..., 0:3] - origin, rays[..., 3:6] + o = rays_o + d = rays_d + + dot_o_o = dot(o, o) + dot_d_d = dot(d, d) + dot_o_d = dot(o, d) + + a = dot_d_d + b = 2 * dot_o_d + c = dot_o_o - radius * radius + disc = b * b - 4 * a * c + + if continuous: + disc = torch.abs(disc) + else: + disc = torch.where(disc < 0, torch.zeros_like(disc), disc) + + t1 = (-b + torch.sqrt(disc + 1e-8)) / (2 * a) + t2 = (-b - torch.sqrt(disc + 1e-8)) / (2 * a) + + t1 = torch.where(disc <= 0, torch.zeros_like(t1), t1) + t2 = torch.where(disc <= 0, torch.zeros_like(t2), t2) + + t = torch.where((t2 < 0) | (radius < 0), t1, t2) + + return t + + +def intersect_cylinder(rays, origin, radius, continuous=False): + rays_o, rays_d = rays[..., 0:3] - origin, rays[..., 3:6] + o = torch.cat([rays_o[..., 0:1], rays_o[..., 2:3]], -1) + d = torch.cat([rays_d[..., 0:1], rays_d[..., 2:3]], -1) + + dot_o_o = dot(o, o) + dot_d_d = dot(d, d) + dot_o_d = dot(o, d) + + a = dot_d_d + b = 2 * dot_o_d + c = dot_o_o - radius * radius + disc = b * b - 4 * a * c + + if continuous: + disc = torch.abs(disc) + else: + disc = torch.where(disc < 0, torch.zeros_like(disc), disc) + + t1 = (-b + torch.sqrt(disc + 1e-8)) / (2 * a) + t2 = (-b - torch.sqrt(disc + 1e-8)) / (2 * a) + + t1 = torch.where(disc <= 0, torch.zeros_like(t1), t1) + t2 = torch.where(disc <= 0, torch.zeros_like(t2), t2) + + t = torch.where((t2 < 0) | (radius < 0), t1, t2) # TODO: Maybe change + + return t + + +def intersect_axis_plane( + rays, + val, + dim, + exclude=False, +): + + # Calculate intersection + rays_o, rays_d = rays[..., :3], rays[..., 3:6] + rays_d = torch.where(torch.abs(rays_d) < 1e-5, torch.ones_like(rays_d) * 1e12, rays_d) + + t = (val - rays_o[..., dim]) / rays_d[..., dim] + # t = torch.where( + # t < 1e-5, + # torch.zeros_like(t), + # t + # ) + + # Return + return t + + +def intersect_voxel_grid( + rays, + origin, + val, +): + + rays_o, rays_d = rays[..., :3] - origin, rays[..., 3:6] + + # Mask out invalid + rays_d = torch.where(torch.abs(rays_d) < 1e-5, torch.ones_like(rays_d) * 1e12, rays_d) + + # Calculate intersection + t = (val - rays_o) / rays_d + # t = torch.where( + # (t < 1e-5), + # torch.zeros_like(t), + # t + # ) + + # Reshape + t = t.view(t.shape[0], -1) + + # Return + return t + + +def intersect_max_axis_plane( + rays, + max_dir, + origin, + val, +): + + # Calculate intersection + rays_o, rays_d = rays[..., :3] - origin, rays[..., 3:6] + rays_d = torch.where(torch.abs(rays_d) < 1e-5, torch.ones_like(rays_d) * 1e12, rays_d) + + t = (val - rays_o) / rays_d + t = torch.where(t < 1e-8, torch.zeros_like(t), t) + t = torch.gather(t, 2, max_dir).reshape(t.shape[0], -1) + + # Reshape + t = t.view(t.shape[0], -1) + + # Return + return t + + +def intersect_plane( + rays, + normal, + distance, +): + # Calculate intersection + rays_o, rays_d = rays[..., :3], rays[..., 3:6] + o_dot_n = dot(rays_o, normal) + d_dot_n = dot(rays_d, normal) + d_dot_n = torch.where(torch.abs(d_dot_n) < 1e-5, torch.ones_like(d_dot_n) * 1e12, d_dot_n) + + t = (distance - o_dot_n) / (d_dot_n) + # t = torch.where( + # t < 1e-5, + # torch.zeros_like(t), + # t + # ) + + # Reshape + t = t.view(t.shape[0], -1) + + # Return + return t diff --git a/utils/pose_utils.py b/utils/pose_utils.py new file mode 100644 index 0000000..3f349ad --- /dev/null +++ b/utils/pose_utils.py @@ -0,0 +1,318 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2022 hawkey +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +import numpy as np +import scipy.linalg + + +def normalize(v): + return v / np.linalg.norm(v) + + +def average_poses(poses): + # 1. Compute the center + center = poses[..., 3].mean(0) # (3) + + # 2. Compute the z axis + z = normalize(poses[..., 2].mean(0)) # (3) + + # 3. Compute axis y' (no need to normalize as it's not the final output) + y_ = poses[..., 1].mean(0) # (3) + + # 4. Compute the x axis + x = normalize(np.cross(y_, z)) # (3) + + # 5. Compute the y axis (as z and x are normalized, y is already of norm 1) + y = np.cross(z, x) # (3) + + R = np.stack([x, y, z], 1) + + center = center[..., None] + # center = (-R @ center[..., None]) + + pose_avg = np.concatenate([R, center], 1) # (3, 4) + + return pose_avg + + +def viewmatrix(z, up, pos): + vec2 = normalize(z) + vec1_avg = up + vec0 = normalize(np.cross(vec1_avg, vec2)) + vec1 = normalize(np.cross(vec2, vec0)) + m = np.stack([vec0, vec1, vec2, pos], 1) + return m + + +def center_poses(poses): + pose_avg = average_poses(poses) # (3, 4) + pose_avg_homo = np.eye(4) + pose_avg_homo[:3] = pose_avg + last_row = np.tile(np.array([0, 0, 0, 1]), (len(poses), 1, 1)) # (N_images, 1, 4) + poses_homo = np.concatenate([poses, last_row], 1) # (N_images, 4, 4) homogeneous coordinate + + poses_centered = np.linalg.inv(pose_avg_homo) @ poses_homo # (N_images, 4, 4) + poses_centered = poses_centered[:, :3] # (N_images, 3, 4) + + return poses_centered, np.linalg.inv(pose_avg_homo) + + +def center_poses_with(poses, train_poses, avg_pose=None): + if avg_pose is None: + pose_avg = average_poses(train_poses) # (3, 4) + pose_avg_homo = np.eye(4) + pose_avg_homo[:3] = pose_avg + inv_pose = np.linalg.inv(pose_avg_homo) + else: + inv_pose = np.copy(avg_pose) + + last_row = np.tile(np.array([0, 0, 0, 1]), (len(poses), 1, 1)) # (N_images, 1, 4) + poses_homo = np.concatenate([poses, last_row], 1) # (N_images, 4, 4) homogeneous coordinate + poses_centered = inv_pose @ poses_homo # (N_images, 4, 4) + poses_centered = poses_centered[:, :3] # (N_images, 3, 4) + + return poses_centered, inv_pose + + +def center_poses_with_rotation_only(poses, train_poses): + pose_avg = average_poses(train_poses) # (3, 4) + pose_avg_homo = np.eye(4) + pose_avg_homo[:3, :3] = pose_avg[:3, :3] + last_row = np.tile(np.array([0, 0, 0, 1]), (len(poses), 1, 1)) # (N_images, 1, 4) + poses_homo = np.concatenate([poses, last_row], 1) # (N_images, 4, 4) homogeneous coordinate + + poses_centered = np.linalg.inv(pose_avg_homo) @ poses_homo # (N_images, 4, 4) + poses_centered = poses_centered[:, :3] # (N_images, 3, 4) + + return poses_centered, np.linalg.inv(pose_avg_homo) + + +def center_poses_reference(poses): + pose_avg = average_poses(poses) # (3, 4) + pose_avg_homo = np.eye(4) + + pose_avg_homo[:3] = pose_avg + + last_row = np.tile(np.array([0, 0, 0, 1]), (len(poses), 1, 1)) # (N_images, 1, 4) + poses_homo = np.concatenate([poses, last_row], 1) # (N_images, 4, 4) homogeneous coordinate + + # Get reference + dists = np.sum(np.square(pose_avg[:3, 3] - poses[:, :3, 3]), -1) + reference_view_id = np.argmin(dists) + pose_avg_homo = poses_homo[reference_view_id] + + poses_centered = np.linalg.inv(pose_avg_homo) @ poses_homo # (N_images, 4, 4) + poses_centered = poses_centered[:, :3] # (N_images, 3, 4) + + return poses_centered, np.linalg.inv(pose_avg_homo) + + +def create_rotating_spiral_poses( + camera_offset, poses, pose_rad, spiral_rads, focal, theta_range, N=240, rots=4, flip=False +): + # Camera offset and up + camera_offset = np.array(camera_offset) + up = normalize(poses[:, :3, 1].sum(0)) + + # Radii in X, Y, Z + render_poses = [] + spiral_rads = np.array(list(spiral_rads) + [1.0]) + + # Pose, spiral angle + pose_thetas = np.linspace(np.pi * theta_range[0], np.pi * theta_range[1], N, endpoint=False) + + spiral_thetas = np.linspace(0, 2 * np.pi * rots, N, endpoint=False) + + # Create poses + for pose_theta, spiral_theta in zip(pose_thetas, spiral_thetas): + # Central cylindrical pose + pose_x, pose_z = ( + np.sin(pose_theta) * pose_rad, + -np.cos(pose_theta) * pose_rad, + ) + pose_y = 0 + + pose_center = np.array([pose_x, pose_y, pose_z]) + camera_offset + pose_forward = np.array([-pose_x, -pose_y, -pose_z]) + c2w = viewmatrix(pose_forward, up, pose_center) + + # Spiral pose + c = np.dot( + c2w[:3, :4], + np.array([np.cos(spiral_theta), -np.sin(spiral_theta), -np.sin(spiral_theta * 0.5), 1.0]) * spiral_rads, + ) + + z = normalize(c - np.dot(c2w[:3, :4], np.array([0, 0, -focal, 1.0]))) + render_poses.append(viewmatrix(z, up, c)) + + return render_poses + + +def create_spiral_poses(poses, rads, focal, N=120, flip=False): + c2w = average_poses(poses) + up = normalize(poses[:, :3, 1].sum(0)) + rots = 2 + + render_poses = [] + rads = np.array(list(rads) + [1.0]) + + for theta in np.linspace(0.0, 2.0 * np.pi * rots, N + 1)[:-1]: + c = np.dot(c2w[:3, :4], np.array([np.cos(theta), -np.sin(theta), -np.sin(theta * 0.5), 1.0]) * rads) + + if flip: + z = normalize(np.dot(c2w[:3, :4], np.array([0, 0, focal, 1.0])) - c) + else: + z = normalize(c - np.dot(c2w[:3, :4], np.array([0, 0, -focal, 1.0]))) + + render_poses.append(viewmatrix(z, up, c)) + + return render_poses + + +def create_spherical_poses(radius, n_poses=120): + def spherical_pose(theta, phi, radius): + def trans_t(t): + return np.array( + [ + [1, 0, 0, 0], + [0, 1, 0, -0.9 * t], + [0, 0, 1, t], + [0, 0, 0, 1], + ] + ) + + def rot_phi(phi): + return np.array( + [ + [1, 0, 0, 0], + [0, np.cos(phi), -np.sin(phi), 0], + [0, np.sin(phi), np.cos(phi), 0], + [0, 0, 0, 1], + ] + ) + + def rot_theta(th): + return np.array( + [ + [np.cos(th), 0, -np.sin(th), 0], + [0, 1, 0, 0], + [np.sin(th), 0, np.cos(th), 0], + [0, 0, 0, 1], + ] + ) + + c2w = rot_theta(theta) @ rot_phi(phi) @ trans_t(radius) + c2w = np.array([[-1, 0, 0, 0], [0, 0, 1, 0], [0, 1, 0, 0], [0, 0, 0, 1]]) @ c2w + return c2w[:3] + + spherical_poses = [] + + for th in np.linspace(0, 2 * np.pi, n_poses + 1)[:-1]: + spherical_poses += [spherical_pose(th, -np.pi / 5, radius)] # 36 degree view downwards + + return np.stack(spherical_poses, 0) + + +def correct_poses_bounds(poses, bounds, flip=True, use_train_pose=False, center=True, train_poses=None): + # Original poses has rotation in form "down right back", change to "right up back" + # See https://github.com/bmild/nerf/issues/34 + if flip: + poses = np.concatenate([poses[..., 1:2], -poses[..., :1], poses[..., 2:4]], -1) + + # See https://github.com/bmild/nerf/issues/34 + if train_poses is None: + near_original = bounds.min() + scale_factor = near_original * 0.75 # 0.75 is the default parameter + bounds /= scale_factor + poses[..., :3, 3] /= scale_factor + + # Recenter + if center: + if use_train_pose: + if train_poses is not None: + poses, ref_pose = center_poses_with(poses, train_poses) + else: + poses, ref_pose = center_poses_reference(poses) + else: + poses, ref_pose = center_poses(poses) + else: + ref_pose = poses[0] + + return poses, ref_pose, bounds + + +# Assumes centered poses +def get_bounding_sphere(poses): + dists = np.linalg.norm(poses[:, :3, -1], axis=-1) + return dists.max() + + +def get_bounding_box(poses): + min_x, max_x = poses[:, 0, -1].min(), poses[:, 0, -1].max() + min_y, max_y = poses[:, 1, -1].min(), poses[:, 1, -1].max() + min_z, max_z = poses[:, 2, -1].min(), poses[:, 2, -1].max() + + return [min_x, min_y, min_z, max_x, max_y, max_z] + + +def p34_to_44(p): + return np.concatenate([p, np.tile(np.reshape(np.eye(4)[-1, :], [1, 1, 4]), [p.shape[0], 1, 1])], 1) + + +def poses_to_twists(poses): + twists = [] + + for i in range(poses.shape[0]): + M = scipy.linalg.logm(poses[i]) + twist = np.stack( + [ + M[..., 2, 1], + M[..., 0, 2], + M[..., 1, 0], + M[..., 0, 3], + M[..., 1, 3], + M[..., 2, 3], + ], + axis=-1, + ) + twists.append(twist) + + return np.stack(twists, 0) + + +def twists_to_poses(twists): + poses = [] + + for i in range(twists.shape[0]): + twist = twists[i] + null = np.zeros_like(twist[..., 0]) + + M = np.stack( + [ + np.stack([null, twist[..., 2], -twist[..., 1], null], axis=-1), + np.stack([-twist[..., 2], null, twist[..., 0], null], axis=-1), + np.stack([twist[..., 1], -twist[..., 0], null, null], axis=-1), + np.stack([twist[..., 3], twist[..., 4], twist[..., 5], null], axis=-1), + ], + axis=-1, + ) + + poses.append(scipy.linalg.expm(M)) + + return np.stack(poses, 0) + + +def interpolate_poses(poses, supersample): + t = np.linspace(0, 1, supersample, endpoint=False).reshape(1, supersample, 1) + twists = poses_to_twists(p34_to_44(poses)) + + interp_twists = twists.reshape(-1, 1, twists.shape[-1]) + interp_twists = (1 - t) * interp_twists[:-1] + t * interp_twists[1:] + interp_twists = interp_twists.reshape(-1, twists.shape[-1]) + interp_twists = np.concatenate([interp_twists, np.tile(twists[-1:], [supersample, 1])], 0) + + return twists_to_poses(interp_twists)[:, :3, :4] diff --git a/utils/ray_utils.py b/utils/ray_utils.py new file mode 100644 index 0000000..9065c0f --- /dev/null +++ b/utils/ray_utils.py @@ -0,0 +1,340 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2022 hawkey +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +import numpy as np +import torch +import torch.nn.functional as F +from kornia import create_meshgrid + + +def get_lightfield_rays( + U, + V, + s, + t, + aspect, + st_scale=1.0, + uv_scale=1.0, + near=-1, + far=0, + use_inf=False, + center_u=0.0, + center_v=0.0, +): + u = torch.linspace(-1, 1, U, dtype=torch.float32) + v = torch.linspace(1, -1, V, dtype=torch.float32) / aspect + + vu = list(torch.meshgrid([v, u])) + u = vu[1] * uv_scale + v = vu[0] * uv_scale + s = torch.ones_like(vu[1]) * s * st_scale + t = torch.ones_like(vu[0]) * t * st_scale + + rays = torch.stack( + [ + s, + t, + near * torch.ones_like(s), + u - s, + v - t, + (far - near) * torch.ones_like(s), + ], + axis=-1, + ).view(-1, 6) + + return torch.cat([rays[..., 0:3], torch.nn.functional.normalize(rays[..., 3:6], p=2.0, dim=-1)], -1) + + +def get_epi_rays( + U, + v, + S, + t, + aspect, + st_scale=1.0, + uv_scale=1.0, + near=-1, + far=0, + use_inf=False, + center_u=0.0, + center_v=0.0, +): + u = torch.linspace(-1, 1, U, dtype=torch.float32) + s = torch.linspace(-1, 1, S, dtype=torch.float32) / aspect + + su = list(torch.meshgrid([s, u])) + u = su[1] * uv_scale + v = torch.ones_like(su[0]) * v * uv_scale + s = su[0] * st_scale + t = torch.ones_like(su[0]) * t * st_scale + + rays = torch.stack( + [ + s, + t, + near * torch.ones_like(s), + u - s, + v - t, + (far - near) * torch.ones_like(s), + ], + axis=-1, + ).view(-1, 6) + + return torch.cat([rays[..., 0:3], torch.nn.functional.normalize(rays[..., 3:6], p=2.0, dim=-1)], -1) + + +def get_pixels_for_image(H, W, device="cpu"): + grid = create_meshgrid(H, W, normalized_coordinates=False, device=device)[0] + + return grid + + +def get_random_pixels(n_pixels, H, W, device="cpu"): + grid = torch.rand(n_pixels, 2, device=device) + + i, j = grid.unbind(-1) + grid[..., 0] = grid[..., 0] * (W - 1) + grid[..., 1] = grid[..., 1] * (H - 1) + + return grid + + +def get_ray_directions_from_pixels_K(grid, K, centered_pixels=False, flipped=False): + i, j = grid.unbind(-1) + + offset_x = 0.5 if centered_pixels else 0.0 + offset_y = 0.5 if centered_pixels else 0.0 + + directions = torch.stack( + [ + (i - K[0, 2] + offset_x) / K[0, 0], + (-(j - K[1, 2] + offset_y) / K[1, 1]) if not flipped else (j - K[1, 2] + offset_y) / K[1, 1], + -torch.ones_like(i), + ], + -1, + ) + + return directions + + +def get_ray_directions_K(H, W, K, centered_pixels=False, flipped=False, device="cpu"): + grid = create_meshgrid(H, W, normalized_coordinates=False, device=device)[0] + return get_ray_directions_from_pixels_K(grid, K, centered_pixels, flipped=flipped) + + +def get_rays(directions, c2w, normalize=True): + # Implementation: https://github.com/kwea123/nerf_pl + + # Rotate ray directions from camera coordinate to the world coordinate + rays_d = directions @ c2w[:, :3].T # (H, W, 3) + if normalize: + rays_d = torch.nn.functional.normalize(rays_d, p=2.0, dim=-1) + + # The origin of all rays is the camera origin in world coordinate + rays_o = c2w[:, 3].expand(rays_d.shape) # (H, W, 3) + + rays_d = rays_d.view(-1, 3) + rays_o = rays_o.view(-1, 3) + + return rays_o, rays_d + + +def get_ndc_rays_fx_fy(H, W, fx, fy, near, rays): + rays_o, rays_d = rays[..., 0:3], rays[..., 3:6] + + # Shift ray origins to near plane + t = -(near + rays_o[..., 2]) / rays_d[..., 2] + rays_o = rays_o + t[..., None] * rays_d + + # o_z = -near + # (o_z / (1 - t') - o_z) / d_z + + # Store some intermediate homogeneous results + ox_oz = rays_o[..., 0] / rays_o[..., 2] + oy_oz = rays_o[..., 1] / rays_o[..., 2] + + # Projection + o0 = -1.0 / (W / (2.0 * fx)) * ox_oz + o1 = -1.0 / (H / (2.0 * fy)) * oy_oz + o2 = 1.0 + 2.0 * near / rays_o[..., 2] + + d0 = -1.0 / (W / (2.0 * fx)) * (rays_d[..., 0] / rays_d[..., 2] - ox_oz) + d1 = -1.0 / (H / (2.0 * fy)) * (rays_d[..., 1] / rays_d[..., 2] - oy_oz) + d2 = 1 - o2 + + rays_o = torch.stack([o0, o1, o2], -1) # (B, 3) + rays_d = torch.stack([d0, d1, d2], -1) # (B, 3) + # rays_d = torch.nn.functional.normalize(rays_d, p=2.0, dim=-1) + + return torch.cat([rays_o, rays_d], -1) + + +def sample_images_at_xy(images, xy_grid, H, W, mode="bilinear", padding_mode="border"): + batch_size = images.shape[0] + spatial_size = images.shape[1:-1] + + xy_grid = torch.clone(xy_grid.reshape(batch_size, -1, 1, 2)) + xy_grid[..., 0] = (xy_grid[..., 0] / (W - 1)) * 2 - 1 + xy_grid[..., 1] = (xy_grid[..., 1] / (H - 1)) * 2 - 1 + + images_sampled = torch.nn.functional.grid_sample( + images.permute(0, 3, 1, 2), + xy_grid, + align_corners=False, + mode=mode, + padding_mode=padding_mode, + ) + + return images_sampled.permute(0, 2, 3, 1).view(-1, images.shape[-1]) + + +def dot(a, b, axis=-1, keepdim=False): + return torch.sum(a * b, dim=axis, keepdim=keepdim) + + +def reflect(dirs, normal): + dir_dot_normal = dot(-dirs, normal, keepdim=True) * normal + return 2 * dir_dot_normal + dirs + + +def get_stats(rays): + return (rays.mean(0), rays.std(0)) + + +def get_weight_map(rays, jitter_rays, cfg, weights=None, softmax=True): + ray_dim = rays.shape[-1] // 2 + + # Angles + angles = torch.acos(torch.clip(dot(rays[..., ray_dim:], jitter_rays[..., ray_dim:]), -1 + 1e-8, 1 - 1e-8)).detach() + + # Distances + dists = torch.linalg.norm(rays[..., :ray_dim] - jitter_rays[..., :ray_dim], dim=-1).detach() + + # Weights + if weights is None: + weights = torch.zeros_like(angles) + + if softmax: + weights = torch.nn.functional.softmax( + 0.5 * -(torch.square(angles / cfg.angle_std) + torch.square(dists / cfg.dist_std)) + weights, dim=0 + )[..., None] + else: + # print("Angle:", angles.max(), angles.mean(), cfg.angle_std) + # print("Dist:", dists.max(), dists.mean(), cfg.dist_std) + + weights = torch.exp( + 0.5 * -(torch.square(angles / cfg.angle_std) + torch.square(dists / cfg.dist_std)) + weights + )[..., None] + + # Normalization constant + constant = np.power(2 * np.pi * cfg.angle_std * cfg.angle_std, -1.0 / 2.0) * np.power( + 2 * np.pi * cfg.dist_std * cfg.dist_std, -1.0 / 2.0 + ) + + return weights / constant + + +def compute_sigma_angle(query_ray, rays, angle_std=-1): + # Angles + angles = torch.acos(torch.clip(dot(rays, query_ray), -1 + 1e-8, 1 - 1e-8)) + + # Calculate angle std + if angle_std < 0: + mean_ray = torch.nn.functional.normalize(rays.mean(1).unsqueeze(1), dim=-1) + mean_angles = torch.acos(torch.clip(dot(mean_ray, query_ray), -1 + 1e-8, 1 - 1e-8)) + + angle_std, _ = torch.median(torch.abs(mean_angles), dim=1, keepdim=True) + print(angle_std[0]) + c = torch.pow(2 * np.pi * angle_std * angle_std, -1.0 / 2.0) + else: + c = np.power(2 * np.pi * angle_std * angle_std, -1.0 / 2.0) + + # Weights + weights = torch.exp(0.5 * -(torch.square(angles / angle_std)))[..., None] + weights = c * weights.mean(1) + + return weights * c + + +def compute_sigma_dot(query_ray, rays, dot_std=-1): + # Dots + dots = torch.clip(dot(rays, query_ray), -1 + 1e-8, 1 - 1e-8) + + # Calculate dot std + if dot_std < 0: + mean_ray = torch.nn.functional.normalize(rays.mean(1).unsqueeze(1), dim=-1) + mean_dots = torch.clip(dot(mean_ray, query_ray), -1 + 1e-8, 1 - 1e-8) + + dot_std, _ = torch.median(torch.abs(1 - mean_dots), dim=1, keepdim=True) + print(dot_std[0]) + + c = torch.pow(2 * np.pi * dot_std * dot_std, -1.0 / 2.0) + else: + c = np.power(2 * np.pi * dot_std * dot_std, -1.0 / 2.0) + + # Weights + weights = torch.exp(0.5 * -(torch.square((1 - dots) / dot_std)))[..., None] + weights = c * weights.mean(1) + + return weights * c + + +def weighted_stats(rgb, weights): + weights_sum = weights.sum(0) + rgb_mean = (rgb * weights).sum(0) / weights_sum + rgb_mean = torch.where(weights_sum == 0, torch.zeros_like(rgb_mean), rgb_mean) + + diff = rgb - rgb_mean.unsqueeze(0) + rgb_var = (diff * diff * weights).sum(0) / weights_sum + rgb_var = torch.where(weights_sum == 0, torch.zeros_like(rgb_var), rgb_var) + + return rgb_mean, rgb_var + + +def jitter_ray_origins(rays, jitter): + ray_dim = 3 + + pos_rand = torch.randn((rays.shape[0], jitter.bundle_size, ray_dim), device=rays.device) * jitter.pos + + rays = rays.view(rays.shape[0], -1, rays.shape[-1]) + + if rays.shape[1] == 1: + rays = rays.repeat(1, jitter.bundle_size, 1) + + rays_o = rays[..., :ray_dim] + pos_rand.type_as(rays) + + return torch.cat([rays_o, rays[..., ray_dim:]], -1) + + +def jitter_ray_directions(rays, jitter): + ray_dim = 3 + + dir_rand = torch.randn((rays.shape[0], jitter.bundle_size, ray_dim), device=rays.device) * jitter.dir + + rays = rays.view(rays.shape[0], -1, rays.shape[-1]) + + if rays.shape[1] == 1: + rays = rays.repeat(1, jitter.bundle_size, 1) + + rays_d = rays[..., ray_dim : 2 * ray_dim] + dir_rand.type_as(rays) + rays_d = F.normalize(rays_d, dim=-1) + + return torch.cat([rays[..., :ray_dim], rays_d], -1) + + +def from_ndc(t_p, rays, near): + t = (near / (1 - t_p) - near) / rays[..., 5, None] + t = t + (near - rays[..., None, 2]) / rays[..., None, 5] + return t + + +def get_ray_density(sigma, ease_iters, cur_iter): + if cur_iter >= ease_iters: + return sigma + else: + w = min(max(float(ease_iters) / cur_iter, 0.0), 1.0) + return sigma * w + (1 - w) diff --git a/utils/rotation_conversions.py b/utils/rotation_conversions.py new file mode 100644 index 0000000..6b17359 --- /dev/null +++ b/utils/rotation_conversions.py @@ -0,0 +1,568 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2022 hawkey +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +from typing import Optional, Union + +import torch +import torch.nn.functional as F + +Device = Union[str, torch.device] + + +""" +The transformation matrices returned from the functions in this file assume +the points on which the transformation will be applied are column vectors. +i.e. the R matrix is structured as + + R = [ + [Rxx, Rxy, Rxz], + [Ryx, Ryy, Ryz], + [Rzx, Rzy, Rzz], + ] # (3, 3) + +This matrix can be applied to column vectors by post multiplication +by the points e.g. + + points = [[0], [1], [2]] # (3 x 1) xyz coordinates of a point + transformed_points = R * points + +To apply the same matrix to points which are row vectors, the R matrix +can be transposed and pre multiplied by the points: + +e.g. + points = [[0, 1, 2]] # (1 x 3) xyz coordinates of a point + transformed_points = points * R.transpose(1, 0) +""" + + +def quaternion_to_matrix(quaternions: torch.Tensor) -> torch.Tensor: + """ + Convert rotations given as quaternions to rotation matrices. + + Args: + quaternions: quaternions with real part first, + as tensor of shape (..., 4). + + Returns: + Rotation matrices as tensor of shape (..., 3, 3). + """ + r, i, j, k = torch.unbind(quaternions, -1) + # pyre-fixme[58]: `/` is not supported for operand types `float` and `Tensor`. + two_s = 2.0 / (quaternions * quaternions).sum(-1) + + o = torch.stack( + ( + 1 - two_s * (j * j + k * k), + two_s * (i * j - k * r), + two_s * (i * k + j * r), + two_s * (i * j + k * r), + 1 - two_s * (i * i + k * k), + two_s * (j * k - i * r), + two_s * (i * k - j * r), + two_s * (j * k + i * r), + 1 - two_s * (i * i + j * j), + ), + -1, + ) + return o.reshape(quaternions.shape[:-1] + (3, 3)) + + +def _copysign(a: torch.Tensor, b: torch.Tensor) -> torch.Tensor: + """ + Return a tensor where each element has the absolute value taken from the, + corresponding element of a, with sign taken from the corresponding + element of b. This is like the standard copysign floating-point operation, + but is not careful about negative 0 and NaN. + + Args: + a: source tensor. + b: tensor whose signs will be used, of the same shape as a. + + Returns: + Tensor of the same shape as a with the signs of b. + """ + signs_differ = (a < 0) != (b < 0) + return torch.where(signs_differ, -a, a) + + +def _sqrt_positive_part(x: torch.Tensor) -> torch.Tensor: + """ + Returns torch.sqrt(torch.max(0, x)) + but with a zero subgradient where x is 0. + """ + ret = torch.zeros_like(x) + positive_mask = x > 0 + ret[positive_mask] = torch.sqrt(x[positive_mask]) + return ret + + +def matrix_to_quaternion(matrix: torch.Tensor) -> torch.Tensor: + """ + Convert rotations given as rotation matrices to quaternions. + + Args: + matrix: Rotation matrices as tensor of shape (..., 3, 3). + + Returns: + quaternions with real part first, as tensor of shape (..., 4). + """ + if matrix.size(-1) != 3 or matrix.size(-2) != 3: + raise ValueError(f"Invalid rotation matrix shape {matrix.shape}.") + + batch_dim = matrix.shape[:-2] + m00, m01, m02, m10, m11, m12, m20, m21, m22 = torch.unbind(matrix.reshape(batch_dim + (9,)), dim=-1) + + q_abs = _sqrt_positive_part( + torch.stack( + [ + 1.0 + m00 + m11 + m22, + 1.0 + m00 - m11 - m22, + 1.0 - m00 + m11 - m22, + 1.0 - m00 - m11 + m22, + ], + dim=-1, + ) + ) + + # we produce the desired quaternion multiplied by each of r, i, j, k + quat_by_rijk = torch.stack( + [ + # pyre-fixme[58]: `**` is not supported for operand types `Tensor` and + # `int`. + torch.stack([q_abs[..., 0] ** 2, m21 - m12, m02 - m20, m10 - m01], dim=-1), + # pyre-fixme[58]: `**` is not supported for operand types `Tensor` and + # `int`. + torch.stack([m21 - m12, q_abs[..., 1] ** 2, m10 + m01, m02 + m20], dim=-1), + # pyre-fixme[58]: `**` is not supported for operand types `Tensor` and + # `int`. + torch.stack([m02 - m20, m10 + m01, q_abs[..., 2] ** 2, m12 + m21], dim=-1), + # pyre-fixme[58]: `**` is not supported for operand types `Tensor` and + # `int`. + torch.stack([m10 - m01, m20 + m02, m21 + m12, q_abs[..., 3] ** 2], dim=-1), + ], + dim=-2, + ) + + # We floor here at 0.1 but the exact level is not important; if q_abs is small, + # the candidate won't be picked. + flr = torch.tensor(0.1).to(dtype=q_abs.dtype, device=q_abs.device) + quat_candidates = quat_by_rijk / (2.0 * q_abs[..., None].max(flr)) + + # if not for numerical problems, quat_candidates[i] should be same (up to a sign), + # forall i; we pick the best-conditioned one (with the largest denominator) + + return quat_candidates[F.one_hot(q_abs.argmax(dim=-1), num_classes=4) > 0.5, :].reshape(batch_dim + (4,)) + + +def _axis_angle_rotation(axis: str, angle: torch.Tensor) -> torch.Tensor: + """ + Return the rotation matrices for one of the rotations about an axis + of which Euler angles describe, for each value of the angle given. + + Args: + axis: Axis label "X" or "Y or "Z". + angle: any shape tensor of Euler angles in radians + + Returns: + Rotation matrices as tensor of shape (..., 3, 3). + """ + + cos = torch.cos(angle) + sin = torch.sin(angle) + one = torch.ones_like(angle) + zero = torch.zeros_like(angle) + + if axis == "X": + R_flat = (one, zero, zero, zero, cos, -sin, zero, sin, cos) + elif axis == "Y": + R_flat = (cos, zero, sin, zero, one, zero, -sin, zero, cos) + elif axis == "Z": + R_flat = (cos, -sin, zero, sin, cos, zero, zero, zero, one) + else: + raise ValueError("letter must be either X, Y or Z.") + + return torch.stack(R_flat, -1).reshape(angle.shape + (3, 3)) + + +def euler_angles_to_matrix(euler_angles: torch.Tensor, convention: str) -> torch.Tensor: + """ + Convert rotations given as Euler angles in radians to rotation matrices. + + Args: + euler_angles: Euler angles in radians as tensor of shape (..., 3). + convention: Convention string of three uppercase letters from + {"X", "Y", and "Z"}. + + Returns: + Rotation matrices as tensor of shape (..., 3, 3). + """ + if euler_angles.dim() == 0 or euler_angles.shape[-1] != 3: + raise ValueError("Invalid input euler angles.") + if len(convention) != 3: + raise ValueError("Convention must have 3 letters.") + if convention[1] in (convention[0], convention[2]): + raise ValueError(f"Invalid convention {convention}.") + for letter in convention: + if letter not in ("X", "Y", "Z"): + raise ValueError(f"Invalid letter {letter} in convention string.") + matrices = [_axis_angle_rotation(c, e) for c, e in zip(convention, torch.unbind(euler_angles, -1))] + # return functools.reduce(torch.matmul, matrices) + return torch.matmul(torch.matmul(matrices[0], matrices[1]), matrices[2]) + + +def _angle_from_tan(axis: str, other_axis: str, data, horizontal: bool, tait_bryan: bool) -> torch.Tensor: + """ + Extract the first or third Euler angle from the two members of + the matrix which are positive constant times its sine and cosine. + + Args: + axis: Axis label "X" or "Y or "Z" for the angle we are finding. + other_axis: Axis label "X" or "Y or "Z" for the middle axis in the + convention. + data: Rotation matrices as tensor of shape (..., 3, 3). + horizontal: Whether we are looking for the angle for the third axis, + which means the relevant entries are in the same row of the + rotation matrix. If not, they are in the same column. + tait_bryan: Whether the first and third axes in the convention differ. + + Returns: + Euler Angles in radians for each matrix in data as a tensor + of shape (...). + """ + + i1, i2 = {"X": (2, 1), "Y": (0, 2), "Z": (1, 0)}[axis] + if horizontal: + i2, i1 = i1, i2 + even = (axis + other_axis) in ["XY", "YZ", "ZX"] + if horizontal == even: + return torch.atan2(data[..., i1], data[..., i2]) + if tait_bryan: + return torch.atan2(-data[..., i2], data[..., i1]) + return torch.atan2(data[..., i2], -data[..., i1]) + + +def _index_from_letter(letter: str) -> int: + if letter == "X": + return 0 + if letter == "Y": + return 1 + if letter == "Z": + return 2 + raise ValueError("letter must be either X, Y or Z.") + + +def matrix_to_euler_angles(matrix: torch.Tensor, convention: str) -> torch.Tensor: + """ + Convert rotations given as rotation matrices to Euler angles in radians. + + Args: + matrix: Rotation matrices as tensor of shape (..., 3, 3). + convention: Convention string of three uppercase letters. + + Returns: + Euler angles in radians as tensor of shape (..., 3). + """ + if len(convention) != 3: + raise ValueError("Convention must have 3 letters.") + if convention[1] in (convention[0], convention[2]): + raise ValueError(f"Invalid convention {convention}.") + for letter in convention: + if letter not in ("X", "Y", "Z"): + raise ValueError(f"Invalid letter {letter} in convention string.") + if matrix.size(-1) != 3 or matrix.size(-2) != 3: + raise ValueError(f"Invalid rotation matrix shape {matrix.shape}.") + i0 = _index_from_letter(convention[0]) + i2 = _index_from_letter(convention[2]) + tait_bryan = i0 != i2 + if tait_bryan: + central_angle = torch.asin(matrix[..., i0, i2] * (-1.0 if i0 - i2 in [-1, 2] else 1.0)) + else: + central_angle = torch.acos(matrix[..., i0, i0]) + + o = ( + _angle_from_tan(convention[0], convention[1], matrix[..., i2], False, tait_bryan), + central_angle, + _angle_from_tan(convention[2], convention[1], matrix[..., i0, :], True, tait_bryan), + ) + return torch.stack(o, -1) + + +def random_quaternions(n: int, dtype: Optional[torch.dtype] = None, device: Optional[Device] = None) -> torch.Tensor: + """ + Generate random quaternions representing rotations, + i.e. versors with nonnegative real part. + + Args: + n: Number of quaternions in a batch to return. + dtype: Type to return. + device: Desired device of returned tensor. Default: + uses the current device for the default tensor type. + + Returns: + Quaternions as tensor of shape (N, 4). + """ + if isinstance(device, str): + device = torch.device(device) + # pyre-fixme[6]: For 2nd param expected `dtype` but got `Optional[dtype]`. + o = torch.randn((n, 4), dtype=dtype, device=device) + s = (o * o).sum(1) + o = o / _copysign(torch.sqrt(s), o[:, 0])[:, None] + return o + + +def random_rotations(n: int, dtype: Optional[torch.dtype] = None, device: Optional[Device] = None) -> torch.Tensor: + """ + Generate random rotations as 3x3 rotation matrices. + + Args: + n: Number of rotation matrices in a batch to return. + dtype: Type to return. + device: Device of returned tensor. Default: if None, + uses the current device for the default tensor type. + + Returns: + Rotation matrices as tensor of shape (n, 3, 3). + """ + quaternions = random_quaternions(n, dtype=dtype, device=device) + return quaternion_to_matrix(quaternions) + + +def random_rotation(dtype: Optional[torch.dtype] = None, device: Optional[Device] = None) -> torch.Tensor: + """ + Generate a single random 3x3 rotation matrix. + + Args: + dtype: Type to return + device: Device of returned tensor. Default: if None, + uses the current device for the default tensor type + + Returns: + Rotation matrix as tensor of shape (3, 3). + """ + return random_rotations(1, dtype, device)[0] + + +def standardize_quaternion(quaternions: torch.Tensor) -> torch.Tensor: + """ + Convert a unit quaternion to a standard form: one in which the real + part is non negative. + + Args: + quaternions: Quaternions with real part first, + as tensor of shape (..., 4). + + Returns: + Standardized quaternions as tensor of shape (..., 4). + """ + return torch.where(quaternions[..., 0:1] < 0, -quaternions, quaternions) + + +def quaternion_raw_multiply(a: torch.Tensor, b: torch.Tensor) -> torch.Tensor: + """ + Multiply two quaternions. + Usual torch rules for broadcasting apply. + + Args: + a: Quaternions as tensor of shape (..., 4), real part first. + b: Quaternions as tensor of shape (..., 4), real part first. + + Returns: + The product of a and b, a tensor of quaternions shape (..., 4). + """ + aw, ax, ay, az = torch.unbind(a, -1) + bw, bx, by, bz = torch.unbind(b, -1) + ow = aw * bw - ax * bx - ay * by - az * bz + ox = aw * bx + ax * bw + ay * bz - az * by + oy = aw * by - ax * bz + ay * bw + az * bx + oz = aw * bz + ax * by - ay * bx + az * bw + return torch.stack((ow, ox, oy, oz), -1) + + +def quaternion_multiply(a: torch.Tensor, b: torch.Tensor) -> torch.Tensor: + """ + Multiply two quaternions representing rotations, returning the quaternion + representing their composition, i.e. the versor with nonnegative real part. + Usual torch rules for broadcasting apply. + + Args: + a: Quaternions as tensor of shape (..., 4), real part first. + b: Quaternions as tensor of shape (..., 4), real part first. + + Returns: + The product of a and b, a tensor of quaternions of shape (..., 4). + """ + ab = quaternion_raw_multiply(a, b) + return standardize_quaternion(ab) + + +def quaternion_invert(quaternion: torch.Tensor) -> torch.Tensor: + """ + Given a quaternion representing rotation, get the quaternion representing + its inverse. + + Args: + quaternion: Quaternions as tensor of shape (..., 4), with real part + first, which must be versors (unit quaternions). + + Returns: + The inverse, a tensor of quaternions of shape (..., 4). + """ + + scaling = torch.tensor([1, -1, -1, -1], device=quaternion.device) + return quaternion * scaling + + +def quaternion_apply(quaternion: torch.Tensor, point: torch.Tensor) -> torch.Tensor: + """ + Apply the rotation given by a quaternion to a 3D point. + Usual torch rules for broadcasting apply. + + Args: + quaternion: Tensor of quaternions, real part first, of shape (..., 4). + point: Tensor of 3D points of shape (..., 3). + + Returns: + Tensor of rotated points of shape (..., 3). + """ + if point.size(-1) != 3: + raise ValueError(f"Points are not in 3D, {point.shape}.") + real_parts = point.new_zeros(point.shape[:-1] + (1,)) + point_as_quaternion = torch.cat((real_parts, point), -1) + out = quaternion_raw_multiply( + quaternion_raw_multiply(quaternion, point_as_quaternion), + quaternion_invert(quaternion), + ) + return out[..., 1:] + + +def axis_angle_to_matrix(axis_angle: torch.Tensor) -> torch.Tensor: + """ + Convert rotations given as axis/angle to rotation matrices. + + Args: + axis_angle: Rotations given as a vector in axis angle form, + as a tensor of shape (..., 3), where the magnitude is + the angle turned anticlockwise in radians around the + vector's direction. + + Returns: + Rotation matrices as tensor of shape (..., 3, 3). + """ + return quaternion_to_matrix(axis_angle_to_quaternion(axis_angle)) + + +def matrix_to_axis_angle(matrix: torch.Tensor) -> torch.Tensor: + """ + Convert rotations given as rotation matrices to axis/angle. + + Args: + matrix: Rotation matrices as tensor of shape (..., 3, 3). + + Returns: + Rotations given as a vector in axis angle form, as a tensor + of shape (..., 3), where the magnitude is the angle + turned anticlockwise in radians around the vector's + direction. + """ + return quaternion_to_axis_angle(matrix_to_quaternion(matrix)) + + +def axis_angle_to_quaternion(axis_angle: torch.Tensor) -> torch.Tensor: + """ + Convert rotations given as axis/angle to quaternions. + + Args: + axis_angle: Rotations given as a vector in axis angle form, + as a tensor of shape (..., 3), where the magnitude is + the angle turned anticlockwise in radians around the + vector's direction. + + Returns: + quaternions with real part first, as tensor of shape (..., 4). + """ + angles = torch.norm(axis_angle, p=2.0, dim=-1, keepdim=True) + half_angles = angles * 0.5 + eps = 1e-6 + small_angles = angles.abs() < eps + sin_half_angles_over_angles = torch.empty_like(angles) + sin_half_angles_over_angles[~small_angles] = torch.sin(half_angles[~small_angles]) / angles[~small_angles] + # for x small, sin(x/2) is about x/2 - (x/2)^3/6 + # so sin(x/2)/x is about 1/2 - (x*x)/48 + sin_half_angles_over_angles[small_angles] = 0.5 - (angles[small_angles] * angles[small_angles]) / 48 + quaternions = torch.cat([torch.cos(half_angles), axis_angle * sin_half_angles_over_angles], dim=-1) + return quaternions + + +def quaternion_to_axis_angle(quaternions: torch.Tensor) -> torch.Tensor: + """ + Convert rotations given as quaternions to axis/angle. + + Args: + quaternions: quaternions with real part first, + as tensor of shape (..., 4). + + Returns: + Rotations given as a vector in axis angle form, as a tensor + of shape (..., 3), where the magnitude is the angle + turned anticlockwise in radians around the vector's + direction. + """ + norms = torch.norm(quaternions[..., 1:], p=2.0, dim=-1, keepdim=True) + half_angles = torch.atan2(norms, quaternions[..., :1]) + angles = 2 * half_angles + eps = 1e-6 + small_angles = angles.abs() < eps + sin_half_angles_over_angles = torch.empty_like(angles) + sin_half_angles_over_angles[~small_angles] = torch.sin(half_angles[~small_angles]) / angles[~small_angles] + # for x small, sin(x/2) is about x/2 - (x/2)^3/6 + # so sin(x/2)/x is about 1/2 - (x*x)/48 + sin_half_angles_over_angles[small_angles] = 0.5 - (angles[small_angles] * angles[small_angles]) / 48 + return quaternions[..., 1:] / sin_half_angles_over_angles + + +def rotation_6d_to_matrix(d6: torch.Tensor) -> torch.Tensor: + """ + Converts 6D rotation representation by Zhou et al. [1] to rotation matrix + using Gram--Schmidt orthogonalization per Section B of [1]. + Args: + d6: 6D rotation representation, of size (*, 6) + + Returns: + batch of rotation matrices of size (*, 3, 3) + + [1] Zhou, Y., Barnes, C., Lu, J., Yang, J., & Li, H. + On the Continuity of Rotation Representations in Neural Networks. + IEEE Conference on Computer Vision and Pattern Recognition, 2019. + Retrieved from http://arxiv.org/abs/1812.07035 + """ + + a1, a2 = d6[..., :3], d6[..., 3:] + b1 = F.normalize(a1, dim=-1) + b2 = a2 - (b1 * a2).sum(-1, keepdim=True) * b1 + b2 = F.normalize(b2, dim=-1) + b3 = torch.cross(b1, b2, dim=-1) + return torch.stack((b1, b2, b3), dim=-2) + + +def matrix_to_rotation_6d(matrix: torch.Tensor) -> torch.Tensor: + """ + Converts rotation matrices to 6D rotation representation by Zhou et al. [1] + by dropping the last row. Note that 6D representation is not unique. + Args: + matrix: batch of rotation matrices of size (*, 3, 3) + + Returns: + 6D rotation representation, of size (*, 6) + + [1] Zhou, Y., Barnes, C., Lu, J., Yang, J., & Li, H. + On the Continuity of Rotation Representations in Neural Networks. + IEEE Conference on Computer Vision and Pattern Recognition, 2019. + Retrieved from http://arxiv.org/abs/1812.07035 + """ + batch_dim = matrix.size()[:-2] + return matrix[..., :2, :].clone().reshape(batch_dim + (6,)) diff --git a/utils/sh_utils.py b/utils/sh_utils.py new file mode 100644 index 0000000..caf4411 --- /dev/null +++ b/utils/sh_utils.py @@ -0,0 +1,140 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2022 Anpei Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +import torch + +################## sh function ################## +C0 = 0.28209479177387814 +C1 = 0.4886025119029199 +C2 = [1.0925484305920792, -1.0925484305920792, 0.31539156525252005, -1.0925484305920792, 0.5462742152960396] +C3 = [ + -0.5900435899266435, + 2.890611442640554, + -0.4570457994644658, + 0.3731763325901154, + -0.4570457994644658, + 1.445305721320277, + -0.5900435899266435, +] +C4 = [ + 2.5033429417967046, + -1.7701307697799304, + 0.9461746957575601, + -0.6690465435572892, + 0.10578554691520431, + -0.6690465435572892, + 0.47308734787878004, + -1.7701307697799304, + 0.6258357354491761, +] + + +def eval_sh(deg, sh, dirs): + """ + Evaluate spherical harmonics at unit directions + using hardcoded SH polynomials. + Works with torch/np/jnp. + ... Can be 0 or more batch dimensions. + :param deg: int SH max degree. Currently, 0-4 supported + :param sh: torch.Tensor SH coeffs (..., C, (max degree + 1) ** 2) + :param dirs: torch.Tensor unit directions (..., 3) + :return: (..., C) + """ + assert deg <= 4 and deg >= 0 + assert (deg + 1) ** 2 == sh.shape[-1] + C = sh.shape[-2] + + result = C0 * sh[..., 0] + if deg > 0: + x, y, z = dirs[..., 0:1], dirs[..., 1:2], dirs[..., 2:3] + result = result - C1 * y * sh[..., 1] + C1 * z * sh[..., 2] - C1 * x * sh[..., 3] + if deg > 1: + xx, yy, zz = x * x, y * y, z * z + xy, yz, xz = x * y, y * z, x * z + result = ( + result + + C2[0] * xy * sh[..., 4] + + C2[1] * yz * sh[..., 5] + + C2[2] * (2.0 * zz - xx - yy) * sh[..., 6] + + C2[3] * xz * sh[..., 7] + + C2[4] * (xx - yy) * sh[..., 8] + ) + + if deg > 2: + result = ( + result + + C3[0] * y * (3 * xx - yy) * sh[..., 9] + + C3[1] * xy * z * sh[..., 10] + + C3[2] * y * (4 * zz - xx - yy) * sh[..., 11] + + C3[3] * z * (2 * zz - 3 * xx - 3 * yy) * sh[..., 12] + + C3[4] * x * (4 * zz - xx - yy) * sh[..., 13] + + C3[5] * z * (xx - yy) * sh[..., 14] + + C3[6] * x * (xx - 3 * yy) * sh[..., 15] + ) + if deg > 3: + result = ( + result + + C4[0] * xy * (xx - yy) * sh[..., 16] + + C4[1] * yz * (3 * xx - yy) * sh[..., 17] + + C4[2] * xy * (7 * zz - 1) * sh[..., 18] + + C4[3] * yz * (7 * zz - 3) * sh[..., 19] + + C4[4] * (zz * (35 * zz - 30) + 3) * sh[..., 20] + + C4[5] * xz * (7 * zz - 3) * sh[..., 21] + + C4[6] * (xx - yy) * (7 * zz - 1) * sh[..., 22] + + C4[7] * xz * (xx - 3 * yy) * sh[..., 23] + + C4[8] * (xx * (xx - 3 * yy) - yy * (3 * xx - yy)) * sh[..., 24] + ) + return result + + +def eval_sh_bases(deg, dirs): + """ + Evaluate spherical harmonics bases at unit directions, + without taking linear combination. + At each point, the final result may the be + obtained through simple multiplication. + :param deg: int SH max degree. Currently, 0-4 supported + :param dirs: torch.Tensor (..., 3) unit directions + :return: torch.Tensor (..., (deg+1) ** 2) + """ + assert deg <= 4 and deg >= 0 + result = torch.empty((*dirs.shape[:-1], (deg + 1) ** 2), dtype=dirs.dtype, device=dirs.device) + result[..., 0] = C0 + if deg > 0: + x, y, z = dirs.unbind(-1) + result[..., 1] = -C1 * y + result[..., 2] = C1 * z + result[..., 3] = -C1 * x + if deg > 1: + xx, yy, zz = x * x, y * y, z * z + xy, yz, xz = x * y, y * z, x * z + result[..., 4] = C2[0] * xy + result[..., 5] = C2[1] * yz + result[..., 6] = C2[2] * (2.0 * zz - xx - yy) + result[..., 7] = C2[3] * xz + result[..., 8] = C2[4] * (xx - yy) + + if deg > 2: + result[..., 9] = C3[0] * y * (3 * xx - yy) + result[..., 10] = C3[1] * xy * z + result[..., 11] = C3[2] * y * (4 * zz - xx - yy) + result[..., 12] = C3[3] * z * (2 * zz - 3 * xx - 3 * yy) + result[..., 13] = C3[4] * x * (4 * zz - xx - yy) + result[..., 14] = C3[5] * z * (xx - yy) + result[..., 15] = C3[6] * x * (xx - 3 * yy) + + if deg > 3: + result[..., 16] = C4[0] * xy * (xx - yy) + result[..., 17] = C4[1] * yz * (3 * xx - yy) + result[..., 18] = C4[2] * xy * (7 * zz - 1) + result[..., 19] = C4[3] * yz * (7 * zz - 3) + result[..., 20] = C4[4] * (zz * (35 * zz - 30) + 3) + result[..., 21] = C4[5] * xz * (7 * zz - 3) + result[..., 22] = C4[6] * (xx - yy) * (7 * zz - 1) + result[..., 23] = C4[7] * xz * (xx - 3 * yy) + result[..., 24] = C4[8] * (xx * (xx - 3 * yy) - yy * (3 * xx - yy)) + return result diff --git a/utils/tensorf_utils.py b/utils/tensorf_utils.py new file mode 100644 index 0000000..cc242e8 --- /dev/null +++ b/utils/tensorf_utils.py @@ -0,0 +1,487 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2022 Anpei Chen +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +import pdb + +import cv2 +import numpy as np +import torch +import torch.nn.functional as F +import torchvision.transforms as T +from PIL import Image + +from utils.ray_utils import dot +from utils.sh_utils import eval_sh_bases + +# import scipy.signal + + +mse2psnr = lambda x: -10.0 * torch.log(x) / torch.log(torch.Tensor([10.0])) + + +def visualize_depth_numpy(depth, minmax=None, cmap=cv2.COLORMAP_JET): + """ + depth: (H, W) + """ + + x = np.nan_to_num(depth) # change nan to 0 + if minmax is None: + mi = np.min(x[x > 0]) # get minimum positive depth (ignore background) + ma = np.max(x) + else: + mi, ma = minmax + + x = (x - mi) / (ma - mi + 1e-8) # normalize to 0~1 + x = (255 * x).astype(np.uint8) + x_ = cv2.applyColorMap(x, cmap) + return x_, [mi, ma] + + +def init_log(log, keys): + for key in keys: + log[key] = torch.tensor([0.0], dtype=float) + return log + + +def visualize_depth(depth, minmax=None, cmap=cv2.COLORMAP_JET): + """ + depth: (H, W) + """ + if type(depth) is not np.ndarray: + depth = depth.cpu().numpy() + + x = np.nan_to_num(depth) # change nan to 0 + if minmax is None: + mi = np.min(x[x > 0]) # get minimum positive depth (ignore background) + ma = np.max(x) + else: + mi, ma = minmax + + x = (x - mi) / (ma - mi + 1e-8) # normalize to 0~1 + x = (255 * x).astype(np.uint8) + x_ = Image.fromarray(cv2.applyColorMap(x, cmap)) + x_ = T.ToTensor()(x_) # (3, H, W) + return x_, [mi, ma] + + +def N_to_reso(n_voxels, bbox): + xyz_min, xyz_max = bbox + # pdb.set_trace() + voxel_size = ((xyz_max - xyz_min).prod() / n_voxels).pow(1 / 3) + return ((xyz_max - xyz_min) / voxel_size).long().tolist() + + +def cal_n_samples(reso, step_ratio=0.5): + return int(np.linalg.norm(reso) / step_ratio) + + +__LPIPS__ = {} + + +def init_lpips(net_name, device): + assert net_name in ["alex", "vgg"] + import lpips + + print(f"init_lpips: lpips_{net_name}") + return lpips.LPIPS(net=net_name, version="0.1").eval().to(device) + + +def rgb_lpips(np_gt, np_im, net_name, device): + if net_name not in __LPIPS__: + __LPIPS__[net_name] = init_lpips(net_name, device) + gt = torch.from_numpy(np_gt).permute([2, 0, 1]).contiguous().to(device) + im = torch.from_numpy(np_im).permute([2, 0, 1]).contiguous().to(device) + return __LPIPS__[net_name](gt, im, normalize=True).item() + + +def findItem(items, target): + for one in items: + if one[: len(target)] == target: + return one + return None + + +""" Evaluation metrics (ssim, lpips) +""" + + +def rgb_ssim(img0, img1, max_val, filter_size=11, filter_sigma=1.5, k1=0.01, k2=0.03, return_map=False): + # Modified from https://github.com/google/mipnerf/blob/16e73dfdb52044dcceb47cda5243a686391a6e0f/internal/math.py#L58 + assert len(img0.shape) == 3 + assert img0.shape[-1] == 3 + assert img0.shape == img1.shape + + # Construct a 1D Gaussian blur filter. + hw = filter_size // 2 + shift = (2 * hw - filter_size + 1) / 2 + f_i = ((np.arange(filter_size) - hw + shift) / filter_sigma) ** 2 + filt = np.exp(-0.5 * f_i) + filt /= np.sum(filt) + + # Blur in x and y (faster than the 2D convolution). + def convolve2d(z, f): + return scipy.signal.convolve2d(z, f, mode="valid") + + filt_fn = lambda z: np.stack( + [convolve2d(convolve2d(z[..., i], filt[:, None]), filt[None, :]) for i in range(z.shape[-1])], -1 + ) + mu0 = filt_fn(img0) + mu1 = filt_fn(img1) + mu00 = mu0 * mu0 + mu11 = mu1 * mu1 + mu01 = mu0 * mu1 + sigma00 = filt_fn(img0**2) - mu00 + sigma11 = filt_fn(img1**2) - mu11 + sigma01 = filt_fn(img0 * img1) - mu01 + + # Clip the variances and covariances to valid values. + # Variance must be non-negative: + sigma00 = np.maximum(0.0, sigma00) + sigma11 = np.maximum(0.0, sigma11) + sigma01 = np.sign(sigma01) * np.minimum(np.sqrt(sigma00 * sigma11), np.abs(sigma01)) + c1 = (k1 * max_val) ** 2 + c2 = (k2 * max_val) ** 2 + numer = (2 * mu01 + c1) * (2 * sigma01 + c2) + denom = (mu00 + mu11 + c1) * (sigma00 + sigma11 + c2) + ssim_map = numer / denom + ssim = np.mean(ssim_map) + return ssim_map if return_map else ssim + + +import torch.nn as nn + + +class TVLoss(nn.Module): + def __init__(self, TVLoss_weight=1): + super(TVLoss, self).__init__() + self.TVLoss_weight = TVLoss_weight + + def forward(self, x): + batch_size = x.size()[0] + h_x = x.size()[2] + w_x = x.size()[3] + count_h = self._tensor_size(x[:, :, 1:, :]) + count_w = self._tensor_size(x[:, :, :, 1:]) + h_tv = torch.pow((x[:, :, 1:, :] - x[:, :, : h_x - 1, :]), 2).sum() + w_tv = torch.pow((x[:, :, :, 1:] - x[:, :, :, : w_x - 1]), 2).sum() + return self.TVLoss_weight * 2 * (h_tv / count_h + w_tv / count_w) / batch_size + + def _tensor_size(self, t): + return t.size()[1] * t.size()[2] * t.size()[3] + + +import plyfile + + +# import skimage.measure +def convert_sdf_samples_to_ply( + pytorch_3d_sdf_tensor, + ply_fp, + bbox, + level=0.5, + offset=None, + scale=None, +): + """ + Convert sdf samples to .ply + + :param pytorch_3d_sdf_tensor: a torch.FloatTensor of shape (n,n,n) + :voxel_grid_origin: a list of three floats: the bottom, left, down origin of the voxel grid + :voxel_size: float, the size of the voxels + :ply_fp: file object to save ply to + + This function adapted from: https://github.com/RobotLocomotion/spartan + """ + + numpy_3d_sdf_tensor = pytorch_3d_sdf_tensor.numpy() + voxel_size = list((bbox[1] - bbox[0]) / np.array(pytorch_3d_sdf_tensor.shape)) + + verts, faces, normals, values = skimage.measure.marching_cubes(numpy_3d_sdf_tensor, level=level, spacing=voxel_size) + faces = faces[..., ::-1] # inverse face orientation + + # transform from voxel coordinates to camera coordinates + # note x and y are flipped in the output of marching_cubes + mesh_points = np.zeros_like(verts) + mesh_points[:, 0] = bbox[0, 0] + verts[:, 0] + mesh_points[:, 1] = bbox[0, 1] + verts[:, 1] + mesh_points[:, 2] = bbox[0, 2] + verts[:, 2] + + # apply additional offset and scale + if scale is not None: + mesh_points = mesh_points / scale + if offset is not None: + mesh_points = mesh_points - offset + + # try writing to the ply file + + num_verts = verts.shape[0] + num_faces = faces.shape[0] + + verts_tuple = np.zeros((num_verts,), dtype=[("x", "f4"), ("y", "f4"), ("z", "f4")]) + + for i in range(0, num_verts): + verts_tuple[i] = tuple(mesh_points[i, :]) + + faces_building = [] + for i in range(0, num_faces): + faces_building.append(((faces[i, :].tolist(),))) + faces_tuple = np.array(faces_building, dtype=[("vertex_indices", "i4", (3,))]) + + el_verts = plyfile.PlyElement.describe(verts_tuple, "vertex") + el_faces = plyfile.PlyElement.describe(faces_tuple, "face") + + ply_data = plyfile.PlyData([el_verts, el_faces]) + ply_data.write(ply_fp) + + +def positional_encoding(positions, freqs): + + freq_bands = (2 ** torch.arange(freqs).float()).to(positions.device) # (F,) + pts = (positions[..., None] * freq_bands).reshape( + positions.shape[:-1] + (freqs * positions.shape[-1],) + ) # (..., DF) + pts = torch.cat([torch.sin(pts), torch.cos(pts)], dim=-1) + return pts + + +def raw2alpha(sigma, dist): + alpha = 1.0 - torch.exp(-sigma * dist) + + T = torch.cumprod( + torch.cat([torch.ones(alpha.shape[0], 1).to(alpha.device), 1.0 - alpha + 1e-10], -1), + -1, + ) + + weights = alpha * T[:, :-1] # [N_rays, N_samples] + return alpha, weights, T[:, -1:] + + +def alpha2weights(alpha): + T = torch.cumprod( + torch.cat([torch.ones(alpha.shape[0], 1).to(alpha.device), 1.0 - alpha + 1e-10], -1), + -1, + ) + + weights = alpha * T[:, :-1] # [N_rays, N_samples] + return weights + + +def scale_shift_color_all(rgb, color_scale, color_shift): + color_scale = color_scale.view(*rgb.shape) + color_shift = color_shift.view(*rgb.shape) + + # print(color_shift.mean((0, 1))) + + return rgb * (color_scale + 1.0) + color_shift + + +def scale_shift_color_one(rgb, rgb_map, x): + color_scale = x["color_scale_global"].view(*rgb.shape)[:, 0, :] + color_shift = x["color_shift_global"].view(*rgb.shape)[:, 0, :] + + # print(color_shift.mean(0)) + + return rgb_map * (color_scale + 1.0) + color_shift + + +def transform_color_all(rgb, color_transform, color_shift): + color_transform = color_transform.view(rgb.shape[0], 3, 3) + color_shift = color_shift.view(*rgb.shape) + + rgb = torch.stack( + [ + rgb[..., 0] + dot(rgb, color_transform[..., 0, :]), + rgb[..., 1] + dot(rgb, color_transform[..., 1, :]), + rgb[..., 2] + dot(rgb, color_transform[..., 2, :]), + ], + -1, + ) + # rgb = torch.stack( + # [ + # dot(rgb, color_transform[..., 0, :]), + # dot(rgb, color_transform[..., 1, :]), + # dot(rgb, color_transform[..., 2, :]), + # ], + # -1 + # ) + + # print(color_transform.mean(0), color_shift.mean(0)) + + return rgb + color_shift + + +def transform_color_one(rgb, rgb_map, x): + color_transform = x["color_transform_global"].view(rgb.shape[0], -1, 3, 3)[:, 0, :, :] + color_shift = x["color_shift_global"].view(rgb.shape[0], -1, 3)[:, 0, :] + + rgb_map = torch.stack( + [ + rgb_map[..., 0] + dot(rgb_map, color_transform[..., 0, :]), + rgb_map[..., 1] + dot(rgb_map, color_transform[..., 1, :]), + rgb_map[..., 2] + dot(rgb_map, color_transform[..., 2, :]), + ], + -1, + ) + # rgb_map = torch.stack( + # [ + # dot(rgb_map, color_transform[..., 0, :]), + # dot(rgb_map, color_transform[..., 1, :]), + # dot(rgb_map, color_transform[..., 2, :]), + # ], + # -1 + # ) + + # print(color_transform.mean(0), color_shift.mean(0)) + + return rgb_map + color_shift + + +def SHRender(xyz_sampled, viewdirs, features, kwargs): + sh_mult = eval_sh_bases(2, viewdirs[..., :3])[:, None] + rgb_sh = features.view(-1, 3, sh_mult.shape[-1]) + rgb = torch.relu(torch.sum(sh_mult * rgb_sh, dim=-1) + 0.5) + return rgb + + +def RGBRender(xyz_sampled, viewdirs, features, kwargs): + rgb = features + return torch.sigmoid(rgb) + + +def RGBIdentityRender(xyz_sampled, viewdirs, features, kwargs): + rgb = features + return torch.abs(rgb + 0.5) + + +def RGBtLinearRender(xyz_sampled, viewdirs, features, kwargs): + # Coefficients + coeffs = features.view(-1, 3, 2) + + # Basis functions + t = kwargs["times"].view(-1, 1) + + basis = torch.cat( + [ + torch.ones_like(t), + t, + ], + dim=-1, + ) + + # RGB + rgb = torch.relu(torch.sum(basis.unsqueeze(1) * coeffs, dim=-1) + 0.5) + + return rgb + + +def RGBtFourierRender(xyz_sampled, viewdirs, features, kwargs): + frames_per_keyframe = kwargs["frames_per_keyframe"] + num_keyframes = kwargs["num_keyframes"] + total_num_frames = kwargs["total_num_frames"] + time_scale_factor = num_keyframes * (total_num_frames - 1) / (total_num_frames) + + # Coefficients + coeffs = features.view(-1, 3, frames_per_keyframe * 2 + 1) + + # Basis functions + time_offset = kwargs["time_offset"].view(-1, 1) * time_scale_factor + t = kwargs["times"].view(-1, 1) + + freqs = torch.linspace(0, frames_per_keyframe - 1, frames_per_keyframe, device=time_offset.device)[None] + basis = torch.cat( + [ + t, + torch.cos(time_offset * freqs * 2 * np.pi), + torch.sin(time_offset * freqs * 2 * np.pi), + ], + dim=-1, + ) + + # RGB + rgb = torch.relu(torch.sum(basis.unsqueeze(1) * coeffs, dim=-1) + 0.5) + + return rgb + + +def DensityRender(density_features, kwargs): + return density_features[..., 0] + + +def DensityLinearRender(density_features, kwargs): + # Coefficients + coeffs = density_features.view(-1, 1, 2) + + # Basis functions + t = kwargs["times"].view(-1, 1) + + basis = torch.cat( + [ + torch.ones_like(t), + t, + ], + dim=-1, + ) + + # Density + density = torch.sum(basis.unsqueeze(1) * coeffs, dim=-1)[..., 0] + + return density + + +def DensityFourierRender(density_features, kwargs): + frames_per_keyframe = kwargs["frames_per_keyframe"] + num_keyframes = kwargs["num_keyframes"] + total_num_frames = kwargs["total_num_frames"] + time_scale_factor = num_keyframes * (total_num_frames - 1) / (total_num_frames) + + # Coefficients + coeffs = density_features.view(-1, 1, frames_per_keyframe * 2 + 1) + + # Basis functions + time_offset = kwargs["time_offset"].view(-1, 1) * time_scale_factor + t = kwargs["times"].view(-1, 1) + + freqs = torch.linspace(0, frames_per_keyframe - 1, frames_per_keyframe, device=time_offset.device)[None] + basis = torch.cat( + [ + t, + torch.cos(time_offset * freqs * 2 * np.pi), + torch.sin(time_offset * freqs * 2 * np.pi), + ], + dim=-1, + ) + + # Density + density = torch.sum(basis.unsqueeze(1) * coeffs, dim=-1)[..., 0] + + return density + + +class AlphaGridMask(torch.nn.Module): + def __init__(self, device, aabb, alpha_volume): + super().__init__() + + self.opt_group = "color" + self.device = device + + self.register_buffer("alpha_aabb", aabb.to(self.device)) + self.register_buffer("alpha_volume", alpha_volume.view(1, 1, *alpha_volume.shape[-3:])) + + self.aabbSize = self.alpha_aabb[1] - self.alpha_aabb[0] + self.invgridSize = 1.0 / self.aabbSize * 2 + self.gridSize = torch.LongTensor([alpha_volume.shape[-1], alpha_volume.shape[-2], alpha_volume.shape[-3]]).to( + self.device + ) + + def sample_alpha(self, xyz_sampled): + xyz_sampled = self.normalize_coord(xyz_sampled) + alpha_vals = F.grid_sample(self.alpha_volume, xyz_sampled.view(1, -1, 1, 1, 3), align_corners=True).view(-1) + + return alpha_vals + + def normalize_coord(self, xyz_sampled): + return (xyz_sampled - self.alpha_aabb[0]) * self.invgridSize - 1 diff --git a/utils/visualization.py b/utils/visualization.py new file mode 100644 index 0000000..d28255d --- /dev/null +++ b/utils/visualization.py @@ -0,0 +1,40 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2022 hawkey +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +import numpy as np +import torch + + +def get_warp_dimensions(embedding, W, H, k=3, sort=False, **kwargs): + if sort: + embedding_std = torch.std(embedding, 0, True) + return list(torch.argsort(-embedding_std, axis=-1)[:k].cpu().numpy()) + else: + return list(range(0, embedding.shape[-1])) + + +def visualize_warp(embedding, warp_dims, use_abs=False, bounds=None, normalize=False, **kwargs): + + if embedding.shape[-1] > 1: + warp_vis = embedding[..., warp_dims] + else: + warp_vis = embedding + + if use_abs: + warp_vis = torch.abs(warp_vis) + + if bounds is not None and len(bounds) > 0: + bounds_min = torch.tensor(bounds[0], device=warp_vis.device).view(1, -1) + bounds_max = torch.tensor(bounds[1], device=warp_vis.device).view(1, -1) + warp_vis = (warp_vis - bounds_min) / (bounds_max - bounds_min) + + if normalize: + bounds_min = torch.min(warp_vis, dim=0)[0].view(1, -1) + bounds_max = torch.max(warp_vis, dim=0)[0].view(1, -1) + warp_vis = (warp_vis - bounds_min) / (bounds_max - bounds_min) + + return warp_vis.clamp(0, 1) diff --git a/utils/warmup_scheduler.py b/utils/warmup_scheduler.py new file mode 100644 index 0000000..de97030 --- /dev/null +++ b/utils/warmup_scheduler.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2022 hawkey +# +# This source code is licensed under the MIT license found in the +# LICENSES folder in the root directory of this source tree. +# SPDX-License-Identifier: MIT +from torch.optim.lr_scheduler import _LRScheduler + + +class GradualWarmupScheduler(_LRScheduler): + """Gradually warm-up(increasing) learning rate in optimizer. + Implementation: https://github.com/kwea123/nerf_pl + Proposed in 'Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour'. + Args: + optimizer (Optimizer): Wrapped optimizer. + multiplier: target learning rate = base lr * multiplier + total_epoch: target learning rate is reached at total_epoch, gradually + after_scheduler: after target_epoch, use this scheduler(eg. ReduceLROnPlateau) + """ + + def __init__(self, optimizer, multiplier, total_epoch, after_scheduler=None): + self.multiplier = multiplier + self.total_epoch = total_epoch + self.after_scheduler = after_scheduler + self.finished = False + + super().__init__(optimizer) + + def get_lr(self): + if self.last_epoch >= self.total_epoch: + if self.after_scheduler: + if not self.finished: + self.after_scheduler.base_lrs = [base_lr * self.multiplier for base_lr in self.base_lrs] + self.finished = True + + lrs = self.after_scheduler.get_lr() + else: + lrs = [base_lr * self.multiplier for base_lr in self.base_lrs] + else: + lrs = [ + base_lr * ((self.last_epoch + 1) / (self.total_epoch + 1)) * self.multiplier + for base_lr in self.base_lrs + ] + + return lrs + + def step(self, epoch=None, metrics=None): + if self.finished and self.after_scheduler: + self.get_lr() + + if epoch is None: + self.after_scheduler.step(None) + else: + self.after_scheduler.step(epoch - self.total_epoch) + else: + return super(GradualWarmupScheduler, self).step(epoch)