import * as React from "react";
import { Footer, ProsodySamples, PaperSummary } from "./components";
import { useAsDefaultTheme } from "../../providers/Theme";
import * as samples from "./ensembleProsodyPrediction";
import { H3, H4, Paragraph } from "../../components/Typography";
import { useGA } from "../../hooks/analytics/useGA";

const TITLE = "Ensemble Prosody Prediction for Expressive Speech Synthesis";
const AUTHORS =
  "Tian Huey Teh*, Vivian Hu*, Devang S Ram Mohan, Zack Hodari, Christopher Wallis, Tomás Gómez Ibarrondo, Alexandra Torresquintero, James Leoni, Mark Gales, Simon King (*: contact)";
export const EnsembleProsodyPredictionSummary = ({
  samplesLink,
  paperLink
}: any) => (
  <PaperSummary
    title={TITLE}
    authors={AUTHORS}
    samplesLink={samplesLink}
    paperLink={paperLink}
  >
    Generating expressive speech with rich and varied prosody continues to be a
    challenge for Text-to-Speech. Most efforts have focused on sophisticated
    neural architectures intended to better model the data distribution. Yet, in
    evaluations it is generally found that no single model is preferred for all
    input texts. This suggests an approach that has rarely been used before for
    Text-to-Speech: an ensemble of models.
    <br /> <br />
    We apply ensemble learning to prosody prediction. We construct simple
    ensembles of prosody predictors by varying either model architecture or
    model parameter values. To automatically select amongst the models in the
    ensemble when performing Text-to-Speech, we propose a novel, and
    computationally trivial, variance-based criterion.
    <br /> <br /> We demonstrate that even a small ensemble of prosody
    predictors yields useful diversity, which, combined with the proposed
    selection criterion, outperforms any individual model from the ensemble.
  </PaperSummary>
);

export const EnsembleProsodyPredictionPaper: React.FunctionComponent = () => {
  useAsDefaultTheme("light");
  useGA();
  return (
    <div className="mw8 ph4 center mt4">
      <EnsembleProsodyPredictionSummary />
      <H3>Listener preference</H3>
      <Paragraph>
        We conducted a listening test to measure preference for the individual
        models (CONV and RNN) in the ensemble using an A/B. We then created a
        human ORACLE by choosing the rendition of each utterance that was most
        preferred by listeners. <br /> <br /> Have a listen to some of the
        samples produced by each model. (We encourage readers to listen to each
        rendition before revealing the “answer”.) What do you think? Do you
        agree with the crowd favorite? Was your preference based on intonation
        or some other factor?
      </Paragraph>
      <ProsodySamples rows={samples.LISTENER_PREFERENCE}></ProsodySamples>

      <H3 classNameOverride="mt5">
        Selection using variance of predicted F0 (AFP-F0)
      </H3>
      <Paragraph>
        Using the F0 variance-based criterion proposed in our paper, we are able
        to predict listener preference more accurately than using just a single
        model. However, greater variance doesn’t always correspond to crowd
        preference.
        <br /> <br /> Below, we’ve shared some samples where the selection
        criterion agrees with the ORACLE choice and some where it does not.
        Which do you agree with?
      </Paragraph>
      <H4 classNameOverride="mt5">AFP-F0 agrees with ORACLE</H4>
      <ProsodySamples rows={samples.PROSODY_SELECTION_CORRECT}></ProsodySamples>
      <H4 classNameOverride="mt5">AFP-F0 disagrees with ORACLE</H4>
      <ProsodySamples
        rows={samples.PROSODY_SELECTION_INCORRECT}
      ></ProsodySamples>

      <Footer />
      <style global jsx>
        {`
          .highlight:before {
            content: "";
            border-radius: 2px;
            width: 100%;
            height: 5px;
            opacity: 50%;
            background: rgb(252, 78, 54);
            position: absolute;
            bottom: 0px;
          }
        `}
      </style>
    </div>
  );
};
