Source code for luigi.contrib.sparkey

# -*- coding: utf-8 -*-
#
# Copyright 2012-2015 Spotify AB
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

import luigi


[docs] class SparkeyExportTask(luigi.Task): """ A luigi task that writes to a local sparkey log file. Subclasses should implement the requires and output methods. The output must be a luigi.LocalTarget. The resulting sparkey log file will contain one entry for every line in the input, mapping from the first value to a tab-separated list of the rest of the line. To generate a simple key-value index, yield "key", "value" pairs from the input(s) to this task. """ # the separator used to split input lines separator = '\t' def __init__(self, *args, **kwargs): super(SparkeyExportTask, self).__init__(*args, **kwargs)
[docs] def run(self): self._write_sparkey_file()
def _write_sparkey_file(self): import sparkey infile = self.input() outfile = self.output() if not isinstance(outfile, luigi.LocalTarget): raise TypeError("output must be a LocalTarget") # write job output to temporary sparkey file temp_output = luigi.LocalTarget(is_tmp=True) w = sparkey.LogWriter(temp_output.path) for line in infile.open('r'): k, v = line.strip().split(self.separator, 1) w[k] = v w.close() # move finished sparkey file to final destination temp_output.move(outfile.path)