Skip to content

cli

add(path, name)

Import user-defined function annotations into Lineapy.

This command copies the yaml file whose path is provided by the user into the user's .lineapy directory to allow Lineapy to manage it.

Source code in lineapy/cli/cli.py
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
@annotations.command("add")
@click.argument(
    "path",
    type=click.Path(dir_okay=False, path_type=pathlib.Path),
    callback=validate_annotations_path,
)
@click.option(
    "--name",
    "-n",
    default=None,
    help="What to name source. Input file name is used as default",
    type=str,
)
def add(path: pathlib.Path, name: str):
    """
    Import user-defined function annotations into Lineapy.

    This command copies the yaml file whose path is provided by the user into the user's .lineapy directory to allow Lineapy to manage it.
    """

    annotations_add(path, name)

annotations()

The annotate command can be used to import custom annotation sources into LineaPy. It can be used to add a source, list all sources, delete a source, and validate all sources.

Source code in lineapy/cli/cli.py
577
578
579
580
581
582
583
584
@linea_cli.group("annotate")
def annotations():
    """
    The annotate command can be used to import custom annotation sources
    into LineaPy. It can be used to add a source, list all sources,
    delete a source, and validate all sources.
    """
    pass

benchmark(path, n, skip_baseline)

Benchmarks running the notebook at PATH with lineapy versus with pure Python. Runs with and without lineapy REPETITIONS times.

Prints the length of each run, and some statistics if they are meaningfully different.

Source code in lineapy/cli/cli.py
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
@linea_cli.command()
@click.argument(
    "path",
    type=click.Path(dir_okay=False, path_type=pathlib.Path),
    callback=validate_benchmark_path,
)
@click.option("--n", default=3, help="Number of times to run each case.")
@click.option(
    "--skip-baseline",
    help="Only run with lineapy, skip benchmarking the baseline.",
    is_flag=True,
)
def benchmark(path: pathlib.Path, n: int, skip_baseline: bool):
    """
    Benchmarks running the notebook at PATH with lineapy versus with pure Python.
    Runs with and without lineapy REPETITIONS times.

    Prints the length of each run, and some statistics if they are meaningfully different.
    """
    console = Console()
    console.rule(f"[bold red]Benchmarking[/] {path}")

    with open(path) as f:
        notebook = nbformat.read(f, nbformat.NO_CONVERT)

    # Turn off tensorflow logging
    os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
    os.chdir(path.parent)

    exec_proc = ExecutePreprocessor(timeout=None)

    if not skip_baseline:
        console.rule("[bold green]Running without lineapy")

        without_lineapy: List[float] = []
        with Progress() as progress:
            task = progress.add_task("Executing...", total=n + 1)
            for i in range(n + 1):
                progress.advance(task)
                with redirect_stdout(None):
                    with redirect_stderr(None):
                        start_time = perf_counter()
                        exec_proc.preprocess(notebook)
                        duration = perf_counter() - start_time
                first_run = i == 0
                progress.console.print(
                    f"{duration:.1f} seconds{' (discarding first run)' if first_run else '' }"
                )
                if not first_run:
                    without_lineapy.append(duration)
        rich.print(f"Mean: {mean(without_lineapy):.1f} seconds")

    setup_ipython_dir()
    with_lineapy: List[float] = []
    console.rule("[bold green]Running with lineapy")

    with Progress() as progress:
        task = progress.add_task("Executing...", total=n)
        for _ in range(n):
            progress.advance(task)
            with redirect_stdout(None):
                with redirect_stderr(None):
                    start_time = perf_counter()
                    exec_proc.preprocess(notebook)
                    duration = perf_counter() - start_time
            progress.console.print(f"{duration:.1f} seconds")
            with_lineapy.append(duration)
    rich.print(f"Mean: {mean(with_lineapy):.1f} seconds")

    if not skip_baseline:

        console.rule("[bold blue]Analyzing")

        change = distribution_change(
            without_lineapy, with_lineapy, confidence_interval=0.90
        )
        rich.print(f"Lineapy is {str(change)}")

delete(name)

Deletes imported annotation source.

Source code in lineapy/cli/cli.py
695
696
697
698
699
700
701
702
703
704
705
706
707
708
@annotations.command("delete")
@click.option(
    "--name",
    "-n",
    required=True,
    help="Name of source to delete. Type `lineapy annotate list` to see all sources.",
    type=str,
)
def delete(name: str):
    """
    Deletes imported annotation source.
    """

    annotations_delete(name)

file(path, artifact_name, artifact_value, visualize_slice)

Executes python at PATH, saves the value ARTIFACT_VALUE with name ARTIFACT_NAME, and prints the sliced code.

Source code in lineapy/cli/cli.py
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
@linea_cli.command()
@click.argument(
    "path",
    type=click.Path(exists=True, dir_okay=False, path_type=pathlib.Path),
)
@click.argument("artifact_name")
@click.argument("artifact_value", type=str)
@click.option(
    "--visualize-slice",
    type=click.Path(dir_okay=False, path_type=pathlib.Path),
    help="Create a visualization for the sliced code, save it to this path",
)
def file(
    path: pathlib.Path,
    artifact_name: str,
    artifact_value: str,
    visualize_slice: Optional[pathlib.Path],
):
    """
    Executes python at PATH, saves the value ARTIFACT_VALUE with name ARTIFACT_NAME, and prints the sliced code.
    """
    # Create the code:
    code = path.read_text()
    code = code + generate_save_code(
        artifact_name, artifact_value, visualize_slice
    )

    # Run the code:
    db = RelationalLineaDB.from_config(options)
    tracer = Tracer(db, SessionType.SCRIPT)
    # Redirect all stdout to stderr, so its not printed.
    with redirect_stdout(sys.stderr):

        transform(code, path, tracer)

    # Print the slice:
    # FIXME: weird indirection
    artifactorm = db.get_artifactorm_by_name(artifact_name)
    api_artifact = LineaArtifact(
        db=db,
        _artifact_id=artifactorm.id,
        _execution_id=artifactorm.execution_id,
        _node_id=artifactorm.node_id,
        _session_id=artifactorm.node.session_id,
        _version=artifactorm.version,  # type:ignore
        name=artifact_name,
        date_created=artifactorm.date_created,  # type:ignore
    )
    logger.info(api_artifact.get_code())

init(output_file)

Create config file based on your desired output file path. If the file path is not specified, it will be at LINEAPY_HOME_DIR/CONFIG_FILE_NAME

For example,

lineapy --home-dir=/lineapy init

will generate a config file with home_dir='/lineapy'

Source code in lineapy/cli/cli.py
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
@linea_cli.command()
@click.option(
    "--output-file",
    type=click.Path(dir_okay=False, path_type=pathlib.Path),
    help="Output LineaPy config file",
)
def init(output_file: Optional[pathlib.Path]):
    """
    Create config file based on your desired output file path.
    If the file path is not specified, it will be at ``LINEAPY_HOME_DIR/CONFIG_FILE_NAME``

    For example,

        lineapy --home-dir=/lineapy init


    will generate a config file with ``home_dir='/lineapy'``
    """
    if output_file is None:
        output_file = pathlib.Path(options.home_dir).joinpath(CONFIG_FILE_NAME)

    with open(output_file, "w") as f:
        logging.info(f"Writing LineaPy config file to {output_file}")
        config = {
            k: str(v) for k, v in options.__dict__.items() if v is not None
        }
        json.dump(config, f, indent=4, sort_keys=True)

linea_cli(verbose, home_dir, database_url, artifact_storage_dir, customized_annotation_dir, do_not_track, logging_level, logging_file, mlflow_registry_uri, mlflow_tracking_uri, default_ml_models_storage_backend)

Pass all configuration to lineapy_config

Source code in lineapy/cli/cli.py
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
@click.group()
@click.option(
    "--verbose",
    help="Print out logging for graph creation and execution.",
    is_flag=True,
)
@click.version_option(
    None,
    "--version",
    "-v",
    message="%(package)s %(version)s",
    help="Print the Lineapy version number and exit.",
)
@click.option(
    "--home-dir",
    type=click.Path(dir_okay=True, path_type=pathlib.Path),
    help="LineaPy home directory.",
)
@click.option(
    "--database-url",
    type=click.STRING,
    help="SQLAlchemy connection string for LineaPy database.",
)
@click.option(
    "--artifact-storage-dir",
    type=click.Path(dir_okay=True, path_type=pathlib.Path),
    help="LineaPy artifact directory.",
)
@click.option(
    "--customized-annotation-dir",
    type=click.Path(dir_okay=True, path_type=pathlib.Path),
    help="Customized annotation directory.",
)
@click.option(
    "--do-not-track",
    type=click.BOOL,
    help="Opt out for user analytics.",
)
@click.option(
    "--logging-level",
    type=click.Choice(
        list(logging._nameToLevel.keys()),
        case_sensitive=False,
    ),
    help="Logging level for LineaPy, overrides the --verbose flag if set.",
)
@click.option(
    "--logging-file",
    type=click.Path(dir_okay=False, path_type=pathlib.Path),
    help="Logging file",
)
@click.option(
    "--mlflow-registry-uri",
    type=click.STRING,
    help="MLFlow registry uri for ML models storage backend.",
)
@click.option(
    "--mlflow-tracking-uri",
    type=click.STRING,
    help="MLFlow tracking uri for ML models storage backend.",
)
@click.option(
    "--default-ml-models-storage-backend",
    type=click.Choice(
        [member.name for member in ARTIFACT_STORAGE_BACKEND],
        case_sensitive=False,
    ),
    help="Default storage backend for ML models",
)
def linea_cli(
    verbose: bool,
    home_dir: Optional[pathlib.Path],
    database_url: Optional[str],
    artifact_storage_dir: Optional[pathlib.Path],
    customized_annotation_dir: Optional[pathlib.Path],
    do_not_track: Optional[bool],
    logging_level: Optional[str],
    logging_file: Optional[pathlib.Path],
    mlflow_registry_uri: Optional[str],
    mlflow_tracking_uri: Optional[str],
    default_ml_models_storage_backend: Optional[ARTIFACT_STORAGE_BACKEND],
):
    """
    Pass all configuration to lineapy_config
    """
    args = [x for x in locals().keys()]

    # Set the logging env variable so its passed to subprocesses, like creating a jupyter kernel
    if verbose:
        options.set("logging_level", "DEBUG")
    if logging_level:
        options.set("logging_level", logging_level)

    configure_logging()

    for arg in args:
        if arg in options.__dict__.keys() and locals().get(arg) is not None:
            options.set(arg, locals().get(arg))

list()

Lists full paths to all imported annotation sources.

Source code in lineapy/cli/cli.py
672
673
674
675
676
677
@annotations.command("list")
def list():
    """
    Lists full paths to all imported annotation sources.
    """
    annotations_list()

notebook(file, artifact_name, artifact_value, visualize_slice)

Executes the notebook FILE, saves the value ARTIFACT_VALUE with name ARTIFACT_NAME, and prints the sliced code.

For example, if your notebooks as dataframe with value df, then this will print the slice for it:

lineapy notebook my_notebook.ipynb my_df df

You can also reference side effect values, like file_system

lineapy notebook my_notebook.ipynb notebook_file_system lineapy.file_system
Source code in lineapy/cli/cli.py
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
@linea_cli.command()
@click.argument("file", type=click.File())
@click.argument("artifact_name")
@click.argument("artifact_value", type=str)
@click.option(
    "--visualize-slice",
    type=click.Path(dir_okay=False, path_type=pathlib.Path),
    help="Create a visualization for the sliced code, save it to this path",
)
def notebook(
    file: TextIOWrapper,
    artifact_name: str,
    artifact_value: str,
    visualize_slice: Optional[pathlib.Path],
):
    """
    Executes the notebook FILE, saves the value ARTIFACT_VALUE with name ARTIFACT_NAME, and prints the sliced code.

    For example, if your notebooks as dataframe with value `df`, then this will print the slice for it:

        lineapy notebook my_notebook.ipynb my_df df

    You can also reference side effect values, like `file_system`

        lineapy notebook my_notebook.ipynb notebook_file_system lineapy.file_system
    """
    logger.info("Creating in memory notebook")
    # Create the notebook:
    notebook = nbformat.read(file, nbformat.NO_CONVERT)
    notebook["cells"].append(
        nbformat.v4.new_code_cell(
            generate_save_code(artifact_name, artifact_value, visualize_slice)
        )
    )

    # Run the notebook:
    setup_ipython_dir()
    exec_proc = ExecutePreprocessor(timeout=None)
    logger.info("Executing notebook")
    exec_proc.preprocess(notebook)

    # Print the slice:
    logger.info("Printing slice")
    # TODO: duplicated with `get` but no context set, should rewrite eventually
    # to not duplicate
    db = RelationalLineaDB.from_config(options)
    artifactorm = db.get_artifactorm_by_name(artifact_name)
    # FIXME: mypy issue with SQLAlchemy, see https://github.com/python/typeshed/issues/974
    api_artifact = LineaArtifact(
        db=db,
        _artifact_id=artifactorm.id,
        _execution_id=artifactorm.execution_id,
        _node_id=artifactorm.node_id,
        _session_id=artifactorm.node.session_id,
        _version=artifactorm.version,  # type: ignore
        name=artifact_name,
        date_created=artifactorm.date_created,  # type: ignore
    )
    logger.info(api_artifact.get_code())

remove_annotations_file_extension(filename)

Remove '.annotations.yaml' or '.yaml'.

Source code in lineapy/cli/cli.py
600
601
602
603
604
605
606
607
608
609
def remove_annotations_file_extension(filename: str) -> str:
    """
    Remove '.annotations.yaml' or '.yaml'.
    """
    filename = filename.replace(" ", "")
    for ext_to_strip in (".yaml", ".annotations"):
        name, ext = os.path.splitext(filename)
        if ext == ext_to_strip:
            filename = name
    return filename

setup_ipython_dir()

Set the ipython directory to include the lineapy extension.

If IPython configure files exist, we copy them to temp the folder and append a line to add lineapy into extra_extensions. If they do not exist, we create new config files in the temp folder and add a line to specify extra_extensions.

Source code in lineapy/cli/cli.py
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
def setup_ipython_dir() -> None:
    """Set the ipython directory to include the lineapy extension.

    If IPython configure files exist, we copy them to temp the folder and append
    a line to add lineapy into ``extra_extensions``. If they do not exist, we create
    new config files in the temp folder and add a line to specify ``extra_extensions``.
    """
    check_python_version()
    ipython_dir_name = tempfile.mkdtemp()
    # Make a default profile with the extension added to the ipython and kernel
    # configs
    profile_dir = pathlib.Path(ipython_dir_name) / "profile_default"
    profile_dir.mkdir()

    append_settings = (
        '\nc.InteractiveShellApp.extra_extensions.append("lineapy")'
    )
    write_settings = 'c.InteractiveShellApp.extra_extensions = ["lineapy"]'

    existing_profile_dir = pathlib.Path(
        IPython.paths.get_ipython_dir()
    ).joinpath("profile_default")

    for config_file in ["ipython_config.py", "ipython_kernel_config.py"]:
        if existing_profile_dir.joinpath(config_file).exists():
            logger.debug(
                f"Default {config_file} founded, append setting to this one."
            )
            shutil.copy(
                existing_profile_dir.joinpath(config_file),
                profile_dir.joinpath(config_file),
            )
            with open(profile_dir.joinpath(config_file), "a") as f:
                f.write(append_settings)
        else:
            logger.debug(
                f"No default {config_file} founded, create a new one."
            )
            profile_dir.joinpath(config_file).write_text(write_settings)

    os.environ["IPYTHONDIR"] = ipython_dir_name

Was this helpful?

Help us improve docs with your feedback!