Skip to content

taskgen

This taskgen files contains helper functions to create tasks.

get_allsessions_task_definition_graph(artifact_collection, pipeline_name)

get_allsessions_task_definition returns a single task definition for the whole pipeline.

Source code in lineapy/plugins/taskgen.py
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
def get_allsessions_task_definition_graph(
    artifact_collection: ArtifactCollection,
    pipeline_name: str,
) -> Tuple[Dict[str, TaskDefinition], TaskGraph]:
    """
    get_allsessions_task_definition returns a single task definition for the whole pipeline.
    """

    indentation_block = " " * 4
    return {
        "run_all": TaskDefinition(
            function_name="run_all",
            user_input_variables=artifact_collection.input_parameters,
            loaded_input_variables=[],
            typing_blocks=[],
            pre_call_block="",
            call_block=f"{indentation_block}artifacts = {pipeline_name}_module.run_all_sessions()",
            post_call_block="",
            return_vars=["artifacts"],
            pipeline_name=pipeline_name,
        )
    }, TaskGraph(nodes=["run_all"], edges={})

get_artifact_task_definition_graph(artifact_collection, pipeline_name)

get_artifact_task_definitions returns a task definition for each artifact the pipeline produces. This may include tasks that produce common variables that were not initially defined as artifacts.

Source code in lineapy/plugins/taskgen.py
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
def get_artifact_task_definition_graph(
    artifact_collection: ArtifactCollection, pipeline_name: str
) -> Tuple[Dict[str, TaskDefinition], TaskGraph]:
    """
    get_artifact_task_definitions returns a task definition for each artifact the pipeline produces.
    This may include tasks that produce common variables that were not initially defined as artifacts.
    """
    task_definitions: Dict[str, TaskDefinition] = dict()
    unused_input_parameters = set(artifact_collection.input_parameters)

    for session_artifacts in artifact_collection.sort_session_artifacts():
        for nc in session_artifacts.usercode_nodecollections:
            all_input_variables = sorted(list(nc.input_variables))
            artifact_user_input_variables = [
                var
                for var in all_input_variables
                if var in unused_input_parameters
            ]
            session_input_parameters_spec = (
                BaseSessionWriter().get_session_input_parameters_spec(
                    session_artifacts
                )
            )
            user_input_var_typing_block = [
                f"{var} = {session_input_parameters_spec[var].value_type}({var})"
                for var in artifact_user_input_variables
            ]
            unused_input_parameters.difference_update(
                set(artifact_user_input_variables)
            )
            loaded_input_vars = [
                var
                for var in all_input_variables
                if var not in artifact_user_input_variables
            ]
            function_call_block = (
                BaseSessionWriter().get_session_artifact_function_call_block(
                    nc,
                    source_module=f"{pipeline_name}_module",
                )
            )

            task_def: TaskDefinition = TaskDefinition(
                function_name=nc.safename,
                user_input_variables=artifact_user_input_variables,
                loaded_input_variables=loaded_input_vars,
                typing_blocks=user_input_var_typing_block,
                pre_call_block="",
                call_block=function_call_block,
                post_call_block="",
                return_vars=nc.return_variables,
                pipeline_name=pipeline_name,
            )
            task_definitions[nc.safename] = task_def

    # no remapping needed, inter_artifact_taskgraph already uses nc.safename
    task_graph = artifact_collection.inter_artifact_taskgraph

    return task_definitions, task_graph

get_noop_setup_task_definition(pipeline_name)

Returns a TaskDefinition that no-ops so that users can write their own setup tasks by replacing the setup call block.

This task should be used at the beginning of a pipeline.

Source code in lineapy/plugins/taskgen.py
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
def get_noop_setup_task_definition(pipeline_name):
    """
    Returns a TaskDefinition that no-ops so that users can write
    their own setup tasks by replacing the setup call block.

    This task should be used at the beginning of a pipeline.
    """
    return TaskDefinition(
        function_name="setup",
        user_input_variables=[],
        loaded_input_variables=[],
        typing_blocks=[],
        pre_call_block="",
        call_block="pass",
        post_call_block="",
        return_vars=[],
        pipeline_name=pipeline_name,
    )

get_noop_teardown_task_definition(pipeline_name)

Returns a TaskDefinition that no-ops so that users can write their own teardown tasks by replacing the teardown call block.

This task should be used at the end of a pipeline.

Source code in lineapy/plugins/taskgen.py
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
def get_noop_teardown_task_definition(pipeline_name):
    """
    Returns a TaskDefinition that no-ops so that users can write
    their own teardown tasks by replacing the teardown call block.

    This task should be used at the end of a pipeline.

    """
    return TaskDefinition(
        function_name="teardown",
        user_input_variables=[],
        loaded_input_variables=[],
        typing_blocks=[],
        pre_call_block="",
        call_block="pass",
        post_call_block="",
        return_vars=[],
        pipeline_name=pipeline_name,
    )

get_session_task_definition_graph(artifact_collection, pipeline_name)

get_session_task_definition returns a task definition for each session in the pipeline.

Source code in lineapy/plugins/taskgen.py
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
def get_session_task_definition_graph(
    artifact_collection: ArtifactCollection, pipeline_name: str
) -> Tuple[Dict[str, TaskDefinition], TaskGraph]:
    """
    get_session_task_definition returns a task definition for each session in the pipeline.
    """
    task_definitions: Dict[str, TaskDefinition] = dict()

    # maps session_id to task names to create taskgraph
    session_id_task_map: Dict[str, str] = {}

    for session_artifacts in artifact_collection.sort_session_artifacts():

        session_input_parameters_spec = (
            BaseSessionWriter().get_session_input_parameters_spec(
                session_artifacts
            )
        )
        session_input_variables = list(session_input_parameters_spec.keys())
        user_input_var_typing_block = [
            f"{var} = {session_input_parameters_spec[var].value_type}({var})"
            for var in session_input_variables
        ]

        raw_function_call_block = (
            BaseSessionWriter().get_session_function_callblock(
                session_artifacts
            )
        )
        # Call module's run session function and unpack the artifacts from it
        function_call_block = (
            f"artifacts = {pipeline_name}_module.{raw_function_call_block}\n"
        )
        unpack_vars_block = "\n".join(
            f'{nc.safename} = artifacts["{nc.name}"]'
            for nc in session_artifacts.usercode_nodecollections
            if isinstance(nc, ArtifactNodeCollection)
        )

        return_vars = [
            nc.safename
            for nc in session_artifacts.usercode_nodecollections
            if isinstance(nc, ArtifactNodeCollection)
        ]

        function_name = BaseSessionWriter().get_session_function_name(
            session_artifacts
        )

        task_def: TaskDefinition = TaskDefinition(
            function_name=function_name,
            user_input_variables=session_input_variables,
            loaded_input_variables=[],
            typing_blocks=user_input_var_typing_block,
            pre_call_block="",
            call_block=function_call_block,
            post_call_block=unpack_vars_block,
            return_vars=return_vars,
            pipeline_name=pipeline_name,
        )

        task_definitions[function_name] = task_def
        session_id_task_map[session_artifacts.session_id] = function_name

    # avoid mapping in place here to not overwrite the artifact collection session taskgraph
    task_graph = artifact_collection.inter_session_taskgraph.remap_nodes(
        session_id_task_map
    )
    return (task_definitions, task_graph)

get_task_graph(artifact_collection, pipeline_name, task_breakdown)

get_task_graph returns a dictionary of TaskDefinitions

This function breaks down the artifact_collection into tasks based on the task_breakdown parameter. This will give the main bulk of tasks that should be included in a pipeline dag file.

Returns:

Type Description
Tuple[Dict[str, TaskDefinition], TaskGraph]

Returns a task_definitions dictionary, which maps a key corresponding to the task name to Linea's TaskDefinition object.

Specific framework implementations of PipelineWriters should serialize the TaskDefinition objects to match the format for pipeline arguments that is expected by that framework.

Source code in lineapy/plugins/taskgen.py
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
def get_task_graph(
    artifact_collection: ArtifactCollection,
    pipeline_name: str,
    task_breakdown: DagTaskBreakdown,
) -> Tuple[Dict[str, TaskDefinition], TaskGraph]:
    """
    get_task_graph returns a dictionary of TaskDefinitions

    This function breaks down the artifact_collection into tasks based on the
    task_breakdown parameter. This will give the main bulk of tasks that should
    be included in a pipeline dag file.

    Returns
    -------
    Tuple[Dict[str, TaskDefinition], TaskGraph]
        Returns a `task_definitions` dictionary, which maps a key corresponding
        to the task name to Linea's TaskDefinition object.

    Specific framework implementations of PipelineWriters should serialize the
    TaskDefinition objects to match the format for pipeline arguments that is
    expected by that framework.
    """
    if task_breakdown == DagTaskBreakdown.TaskAllSessions:
        return get_allsessions_task_definition_graph(
            artifact_collection, pipeline_name
        )
    elif task_breakdown == DagTaskBreakdown.TaskPerSession:
        return get_session_task_definition_graph(
            artifact_collection, pipeline_name
        )
    elif task_breakdown == DagTaskBreakdown.TaskPerArtifact:
        return get_artifact_task_definition_graph(
            artifact_collection, pipeline_name
        )
    else:
        raise ValueError(
            f"Task breakdown granularity {task_breakdown} is not currently supported."
        )

get_tmpdir_teardown_task_definition(pipeline_name)

Returns a TaskDefinition that is used to teardown a pipeline that uses pickle serialization to a temporary directory for inter task communication.

This task should be used at the end of a pipeline.

Source code in lineapy/plugins/taskgen.py
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
def get_tmpdir_teardown_task_definition(pipeline_name):
    """
    Returns a TaskDefinition that is used to teardown a pipeline that uses pickle
    serialization to a temporary directory for inter task communication.

    This task should be used at the end of a pipeline.
    """
    TASK_TMPDIRPICKLE_TEARDOWN_TEMPLATE = load_plugin_template(
        "task/tmpdirpickle/task_teardown.jinja"
    )
    call_block = TASK_TMPDIRPICKLE_TEARDOWN_TEMPLATE.render(
        pipeline_name=pipeline_name
    )
    return TaskDefinition(
        function_name="teardown",
        user_input_variables=[],
        loaded_input_variables=[],
        typing_blocks=[],
        pre_call_block="",
        call_block=call_block,
        post_call_block="",
        return_vars=[],
        pipeline_name=pipeline_name,
    )

get_tmpdirpickle_setup_task_definition(pipeline_name)

Returns a TaskDefinition that is used to set up pipeline that uses pickle serialization to a temporary directory for inter task communication.

This task should be used at the beginning of a pipeline.

Source code in lineapy/plugins/taskgen.py
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
def get_tmpdirpickle_setup_task_definition(pipeline_name):
    """
    Returns a TaskDefinition that is used to set up pipeline that uses pickle
    serialization to a temporary directory for inter task communication.

    This task should be used at the beginning of a pipeline.
    """
    TASK_TMPDIRPICKLE_SETUP_TEMPLATE = load_plugin_template(
        "task/tmpdirpickle/task_setup.jinja"
    )
    call_block = TASK_TMPDIRPICKLE_SETUP_TEMPLATE.render(
        pipeline_name=pipeline_name
    )
    return TaskDefinition(
        function_name="setup",
        user_input_variables=[],
        loaded_input_variables=[],
        typing_blocks=[],
        pre_call_block="",
        call_block=call_block,
        post_call_block="",
        return_vars=[],
        pipeline_name=pipeline_name,
    )

Was this helpful?

Help us improve docs with your feedback!