Object

com.eharmony.aloha.models.vw.jni.multilabel

VwMultilabelModel

Related Doc: package multilabel

Permalink

object VwMultilabelModel extends VwMultlabelJsonCreator with VwMultilabelParamAugmentation

Created by ryan.deak on 9/29/17.

Linear Supertypes
Ordering
  1. Alphabetic
  2. By Inheritance
Inherited
  1. VwMultilabelModel
  2. VwMultilabelParamAugmentation
  3. VwMultlabelJsonCreator
  4. VwMultilabelModelJson
  5. MultilabelModelJson
  6. ScalaJsonFormats
  7. SpecJson
  8. AnyRef
  9. Any
  1. Hide All
  2. Show All
Visibility
  1. Public
  2. All

Type Members

  1. case class MultilabelData[K](modelType: String, modelId: ModelId, features: ListMap[String, Spec], numMissingThreshold: Option[Int], labelsInTrainingSet: Vector[K], labelsOfInterest: Option[String], underlying: JsObject) extends PluginInfo[K] with Product with Serializable

    Permalink

    AST for multi-label models.

    AST for multi-label models.

    labelsInTrainingSet

    The sequence of all labels encountered in training. It is important that this is sequence (with the same order as the labels in the training set). This is because some algorithms may require indices based on the training data.

    labelsOfInterest

    a string representing a function that will be used to extract labels.

    underlying

    the underlying model that will be produced by a

    Attributes
    protected[this]
    Definition Classes
    MultilabelModelJson
  2. case class Plugin(type: String) extends Product with Serializable

    Permalink
    Attributes
    protected[this]
    Definition Classes
    MultilabelModelJson
  3. type VWNsCrossProdSet = Set[(Char, Char)]

    Permalink
    Attributes
    protected
    Definition Classes
    VwMultilabelParamAugmentation
  4. type VWNsSet = Set[Char]

    Permalink
    Attributes
    protected
    Definition Classes
    VwMultilabelParamAugmentation

Value Members

  1. final def !=(arg0: Any): Boolean

    Permalink
    Definition Classes
    AnyRef → Any
  2. final def ##(): Int

    Permalink
    Definition Classes
    AnyRef → Any
  3. final def ==(arg0: Any): Boolean

    Permalink
    Definition Classes
    AnyRef → Any
  4. val CaptureGroupWithContent: Int

    Permalink

    This is the capture group containing the content when the regex has been padded with the pad function.

    This is the capture group containing the content when the regex has been padded with the pad function.

    Attributes
    protected
    Definition Classes
    VwMultilabelParamAugmentation
  5. implicit object FeatureSpecFormat extends JsonFormat[Spec]

    Permalink
    Attributes
    protected[this]
    Definition Classes
    SpecJson
  6. val Ignore: Regex

    Permalink
    Attributes
    protected
    Definition Classes
    VwMultilabelParamAugmentation
  7. val IgnoreLinear: Regex

    Permalink
    Attributes
    protected
    Definition Classes
    VwMultilabelParamAugmentation
  8. val UnrecoverableFlagSet: Set[String]

    Permalink

    VW Flags automatically resulting in an error.

    VW Flags automatically resulting in an error.

    Attributes
    protected
    Definition Classes
    VwMultilabelParamAugmentation
  9. def addParams(paramsAfterRemoved: String, namespaceNames: Set[String], oldIgnored: VWNsSet, oldIgnoredLinear: VWNsSet, oldInteractions: Set[String], labelNs: LabelNamespaces, numUniqueLabels: Int): String

    Permalink
    Attributes
    protected
    Definition Classes
    VwMultilabelParamAugmentation
  10. final def asInstanceOf[T0]: T0

    Permalink
    Definition Classes
    Any
  11. def charsIn(r: Regex, chrSeq: CharSequence): VWNsSet

    Permalink
    Attributes
    protected
    Definition Classes
    VwMultilabelParamAugmentation
  12. def clone(): AnyRef

    Permalink
    Attributes
    protected[java.lang]
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  13. def createLabelInteractions(interactions: Set[String], ignored: VWNsSet, labelNs: LabelNamespaces, filter: (Int) ⇒ Boolean): Set[String]

    Permalink
    Attributes
    protected
    Definition Classes
    VwMultilabelParamAugmentation
  14. final def eq(arg0: AnyRef): Boolean

    Permalink
    Definition Classes
    AnyRef
  15. def equals(arg0: Any): Boolean

    Permalink
    Definition Classes
    AnyRef → Any
  16. def filterAndFlattenInteractions(is: Set[String], filter: (Int) ⇒ Boolean): VWNsSet

    Permalink
    Attributes
    protected
    Definition Classes
    VwMultilabelParamAugmentation
  17. def finalize(): Unit

    Permalink
    Attributes
    protected[java.lang]
    Definition Classes
    AnyRef
    Annotations
    @throws( classOf[java.lang.Throwable] )
  18. def firstCaptureGroups(vwParams: String, regex: Regex): Iterator[String]

    Permalink

    Find all of the regex matches and extract the first capture group from the match.

    Find all of the regex matches and extract the first capture group from the match.

    vwParams

    VW params passed to the updatedVwParams function.

    regex

    with at least one capture group (this is unchecked).

    returns

    Iterator of the matches' first capture group.

    Attributes
    protected
    Definition Classes
    VwMultilabelParamAugmentation
  19. def flagsReferencingMissingNss(namespaceNames: Set[String], i: VWNsSet, il: VWNsSet, q: VWNsSet, c: VWNsSet, ho: VWNsSet): Map[String, VWNsSet]

    Permalink
    Attributes
    protected
    Definition Classes
    VwMultilabelParamAugmentation
  20. def flagsReferencingMissingNss(namespaceNames: Set[String], i: VWNsSet, il: VWNsSet, is: Set[String]): Map[String, VWNsSet]

    Permalink
    Attributes
    protected
    Definition Classes
    VwMultilabelParamAugmentation
  21. final def getClass(): Class[_]

    Permalink
    Definition Classes
    AnyRef → Any
  22. def handleClassCastException(orig: String, mod: String, ex: ClassCastException): VwParamError

    Permalink
    Attributes
    protected
    Definition Classes
    VwMultilabelParamAugmentation
  23. def hashCode(): Int

    Permalink
    Definition Classes
    AnyRef → Any
  24. def ignored(vwParams: String): VWNsSet

    Permalink
    Attributes
    protected
    Definition Classes
    VwMultilabelParamAugmentation
  25. def ignoredLinear(vwParams: String): VWNsSet

    Permalink
    Attributes
    protected
    Definition Classes
    VwMultilabelParamAugmentation
  26. def interactions(vwParams: String): Set[String]

    Permalink

    Get the set of interactions (encoded as Strings).

    Get the set of interactions (encoded as Strings). String length represents the interaction arity.

    vwParams

    VW params passed to the updatedVwParams function.

    Attributes
    protected
    Definition Classes
    VwMultilabelParamAugmentation
  27. final def isInstanceOf[T0]: Boolean

    Permalink
    Definition Classes
    Any
  28. def isQuiet(vwParams: String): Boolean

    Permalink
    Attributes
    protected
    Definition Classes
    VwMultilabelParamAugmentation
  29. def json[K](datasetSpec: Vfs, binaryVwModel: Vfs, id: ModelIdentity, labelsInTrainingSet: Seq[K], labelsOfInterest: Option[String] = None, externalModel: Boolean = false, numMissingThreshold: Option[Int] = None)(implicit arg0: JsonWriter[K]): JsValue

    Permalink

    Create a JSON representation of an Aloha model.

    Create a JSON representation of an Aloha model.

    NOTE: Because of the inclusion of the unrestricted labelsOfInterest parameter, the JSON produced by this function is not guaranteed to result in a valid Aloha model. This is because no semantics are required by this function and so, the labelsOfInterest function specification cannot be validated.

    K

    the type of label or class.

    datasetSpec

    a location of a dataset specification.

    binaryVwModel

    a location of a VW binary model file.

    id

    a model ID.

    labelsInTrainingSet

    The sequence of all labels encountered in the training set used to produce the binaryVwModel. It is extremely important that this sequence has the same order as the sequence of labels used in the dataset creation process. Otherwise, the VW model might associate scores with an incorrect label.

    labelsOfInterest

    It is possible that a model is trained on a super set of labels for which predictions can be made. If the labels at prediction time differs (or should be extracted from the input to the model), this function can provide that capability.

    externalModel

    whether the underlying binary VW model should remain as a separate file and be referenced by the Aloha model specification (true) or the binary model content should be embeeded directly into the model (false). Keep in mind Aloha models must be smaller than 2 GB because they are decoded to Strings and Strings are indexed by 32-bit integers (which have a max value of 232 - 1).

    numMissingThreshold

    the number of missing features to tolerate before emitting a prediction failure.

    returns

    a JSON object.

    Definition Classes
    VwMultlabelJsonCreator
  30. implicit def listMapFormat[K, V](implicit arg0: JsonFormat[K], arg1: JsonFormat[V]): RootJsonFormat[ListMap[K, V]] { def write(m: scala.collection.immutable.ListMap[K,V]): spray.json.JsObject }

    Permalink
    Definition Classes
    ScalaJsonFormats
  31. implicit final def multilabelDataJsonFormat[K](implicit arg0: JsonFormat[K]): RootJsonFormat[MultilabelData[K]]

    Permalink
    Attributes
    protected[this]
    Definition Classes
    MultilabelModelJson
  32. final def ne(arg0: AnyRef): Boolean

    Permalink
    Definition Classes
    AnyRef
  33. final def notify(): Unit

    Permalink
    Definition Classes
    AnyRef
  34. final def notifyAll(): Unit

    Permalink
    Definition Classes
    AnyRef
  35. def nssNotInNamespaceNames(nsNames: Set[String], sets: (String, VWNsSet)*): Map[String, VWNsSet]

    Permalink
    Attributes
    protected
    Definition Classes
    VwMultilabelParamAugmentation
  36. implicit final val pluginJsonFormat: RootJsonFormat[Plugin]

    Permalink
    Attributes
    protected[this]
    Definition Classes
    MultilabelModelJson
  37. def removeParams(vwParams: String): String

    Permalink

    Remove flags (and options) for the flags listed in FlagsToRemove.

    Remove flags (and options) for the flags listed in FlagsToRemove.

    vwParams

    VW params passed to the updatedVwParams function.

    Attributes
    protected
    Definition Classes
    VwMultilabelParamAugmentation
  38. def replaceFileBasedFlags(updatedParams: String, flagsWithFiles: Set[String]): (String, Map[String, File])

    Permalink

    VW will actually update / replace files if files appear as options to flags.

    VW will actually update / replace files if files appear as options to flags. To overcome this, an attempt is made to detect flags referencing files and if found, replace the the files with temp files. These files should be deleted before exiting the main program.

    updatedParams

    the parameters after the updates.

    flagsWithFiles

    the flag

    returns

    a tuple2 of the final string to try with VW for validation along with the mapping from flag to file that was used.

    Attributes
    protected
    Definition Classes
    VwMultilabelParamAugmentation
  39. final val specJsonFormat: RootJsonFormat[Spec]

    Permalink
    Attributes
    protected[this]
    Definition Classes
    SpecJson
  40. final def synchronized[T0](arg0: ⇒ T0): T0

    Permalink
    Definition Classes
    AnyRef
  41. def toString(): String

    Permalink
    Definition Classes
    AnyRef → Any
  42. def unrecoverableFlags(vwParams: String): Set[String]

    Permalink
    Attributes
    protected
    Definition Classes
    VwMultilabelParamAugmentation
  43. def updatedVwParams(vwParams: String, namespaceNames: Set[String], numUniqueLabels: Int): Either[VwParamError, String]

    Permalink

    Adds VW parameters to make the parameters work as an Aloha multilabel model.

    Adds VW parameters to make the parameters work as an Aloha multilabel model.

    The algorithm works as follows:

    1. Ensure the VW csoaa_ldf or wap_ldf reduction is specified in the supplied VW parameter list (with the appropriate option for the flag).
    2. Ensure that no "unrecoverable" flags appear in the supplied VW parameter list. See UnrecoverableFlagSet for flags whose appearance is considered "unrecoverable".
    3. Ensure that ignore and interaction flags (--ignore, --ignore_linear, -q, --quadratic, --cubic) do not refer to namespaces not supplied in the namespaceNames parameter.
    4. Attempt to determine namespace names that can be used for the labels. For more information on the label namespace resolution algorithm, see: com.eharmony.aloha.dataset.vw.multilabel.VwMultilabelRowCreator.determineLabelNamespaces.
    5. Remove flags and options found in FlagsToRemove.
    6. Add --noconstant and --csoaa_rank flags. --noconstant is added because per-label intercepts will be included and take the place of a single intercept. --csoaa_rank is added to make the VWLearner a VWActionScoresLearner.
    7. Create interactions between features and the label namespaces created above.
      1. If a namespace in namespaceNames appears as an option to VW's ignore_linear flag, do not create a quadratic interaction between that namespace and the label namespace.
      2. For each interaction term (-q, --quadratic, --cubic, --interactions), replace it with an interaction term also interacted with the label namespace. This increases the arity of the interaction by 1.
    8. Finally, change the flag options that reference files to point to temp files so that VW doesn't change the files. This may represent a problem if VW needs to read the file in the option because although it should exist, it will be empty.
    9. Let VW doing any validations it can.

    Success Examples

    import com.eharmony.aloha.models.vw.jni.multilabel.VwMultilabelModel.updatedVwParams
    
    // This is a basic example. 'y' and 'Y' in the output are label
    // namespaces.  Notice all namespaces are quadratically interacted
    // with the label namespace.
    val uvw1 = updatedVwParams(
      "--csoaa_ldf mc",
      Set("a", "b", "c")
    )
    // Right("--csoaa_ldf mc --noconstant --csoaa_rank --ignore y " +
    //       "--ignore_linear abc -qYa -qYb -qYc")
    
    // Here since 'a' is in 'ignore_linear', no '-qYa' term appears
    // in the output.
    val uvw2 = updatedVwParams(
      "--csoaa_ldf mc --ignore_linear a -qbc",
      Set("a", "b", "c")
    )
    // Right("--csoaa_ldf mc --noconstant --csoaa_rank --ignore y " +
    //       "--ignore_linear abc -qYb -qYc --cubic Ybc)
    
    // 'a' is in 'ignore', so no terms with 'a' are emitted. 'b' is
    // in 'ignore_linear' so it does occur in any quadratic
    // interactions in the output, but can appear in interaction
    // terms of higher arity like the cubic interaction.
    val uvw3 = updatedVwParams(
      "--csoaa_ldf mc --ignore a --ignore_linear b -qbc --cubic abc",
      Set("a", "b", "c")
    )
    //  Right("--csoaa_ldf mc --noconstant --csoaa_rank --ignore ay " +
    //        "--ignore_linear bc -qYc --cubic Ybc")

    Errors Examples

    import com.eharmony.aloha.models.vw.jni.multilabel.VwMultilabelModel.updatedVwParams
    import com.eharmony.aloha.models.vw.jni.multilabel.{
      NotCsoaaOrWap,
      NamespaceError
    }
    
    assert( updatedVwParams("", Set())  ==  Left(NotCsoaaOrWap("")) )
    
    assert(
      updatedVwParams("--wap_ldf m -qaa", Set()) ==
      Left(NamespaceError("--wap_ldf m -qaa", Set(), Map("quadratic" -> Set('a'))))
    )
    
    assert(
      updatedVwParams(
        "--wap_ldf m --ignore_linear b --ignore a -qbb -qbd " +
                "--cubic bcd --interactions dde --interactions abcde",
        Set()
      ) ==
      Left(
        NamespaceError(
          "--wap_ldf m --ignore_linear b --ignore a -qbb -qbd --cubic bcd " +
            "--interactions dde --interactions abcde",
          Set(),
          Map(
            "ignore"        -> Set('a'),
            "ignore_linear" -> Set('b'),
            "quadratic"     -> Set('b', 'd'),
            "cubic"         -> Set('b', 'c', 'd', 'e'),
            "interactions"  -> Set('a', 'b', 'c', 'd', 'e')
          )
        )
      )
    )
    vwParams

    current VW parameters passed to the VW JNI

    namespaceNames

    it is assumed that namespaceNames is a superset of all of the namespaces referred to by any flags found in vwParams.

    numUniqueLabels

    the number of unique labels in the training set. This is used to calculate the appropriate VW ring_size parameter.

    Definition Classes
    VwMultilabelParamAugmentation
  44. def validateVwParams(orig: String, mod: String, finalPs: String, flagToFileMap: Map[String, File], addQuiet: Boolean): Either[VwParamError, String]

    Permalink
    Attributes
    protected
    Definition Classes
    VwMultilabelParamAugmentation
  45. implicit val vwMultilabelAstFormat: RootJsonFormat[VwMultilabelAst]

    Permalink
    Attributes
    protected[this]
    Definition Classes
    VwMultilabelModelJson
  46. final def wait(): Unit

    Permalink
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  47. final def wait(arg0: Long, arg1: Int): Unit

    Permalink
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  48. final def wait(arg0: Long): Unit

    Permalink
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )

Inherited from VwMultlabelJsonCreator

Inherited from VwMultilabelModelJson

Inherited from MultilabelModelJson

Inherited from ScalaJsonFormats

Inherited from SpecJson

Inherited from AnyRef

Inherited from Any

Ungrouped