export const getPercentageValue = (
  numerator: number,
  denomenator: number,
  precision: number = 0,
): string => {
  const result = ((numerator / denomenator) * 100).toFixed(precision);
  return result !== 'NaN' ? result : '';
};

function getValueCounts(buckets: any): string[] {
  return buckets?.map(
    (b: { value: string; count: number }) =>
      b['value'] + ' : ' + b['count'] + ', ',
  );
}

export const processMetricsData = (metricsData: ModelMetric[]) => {
  const violationsData: JobMetricViolation[] = [];
  const numericalData: {
    features: Set<string>;
    jobs: DataQualityNumericalMetricsDataJob[];
  } = { features: new Set(), jobs: [] };
  const categoricalData: {
    features: Set<string>;
    jobs: DataQualityCategoricalMetricsDataJob[];
  } = { features: new Set(), jobs: [] };
  let parseError: Error | null = null;

  metricsData.forEach((job) => {
    try {
      // Parse Contraint Violations

      // for each record (job), parse the JSON value of constraint violations returned from SageMaker
      // and build a collection of table rows to be given to DataTable
      // This will show all the detected violations across all the jobs parsed.

      if (job.constraintViolations) {
        const violations: MetricViolation[] = JSON.parse(
          job.constraintViolations,
        ).violations;
        violations?.forEach((violation) => {
          violationsData.push({
            jobId: job.jobId,
            jobType: job.jobType,
            createdDate: job.createdDate,
            feature_name: violation.feature_name,
            constraint_check_type: violation.constraint_check_type,
            description: violation.description,
          });
        });
      }

      // Parse Metrics

      // for each record (job), parse the JSON value of metrics, which should contain both numerical and
      // categorical stats. Separate these stats into two different collections. The collections should record the
      // job and each feature's values (min, max, mean, etc.).

      if (job.metrics) {
        const jobMetrics: DataQualityMetrics = JSON.parse(job.metrics);
        const numericalJob: DataQualityNumericalMetricsDataJob = <
          DataQualityNumericalMetricsDataJob
        >{
          // @ts-expect-error
          jobName: job.jobId || `${job.sharedModelVariationVersionId}`,
          jobType: job.jobType,
          // @ts-expect-error
          createdDate: job.createdDate || job.metricsCreatedDate,
        };
        const categoricalJob: DataQualityCategoricalMetricsDataJob = <
          DataQualityCategoricalMetricsDataJob
        >{
          // @ts-expect-error
          jobName: job.jobId || `${job.sharedModelVariationVersionId}`,
          jobType: job.jobType,
          // @ts-expect-error
          createdDate: job.createdDate || job.metricsCreatedDate,
        };
        jobMetrics.features.forEach((feature) => {
          // Numerical Collection
          if (feature.numerical_statistics) {
            const numPresent: number =
              feature.numerical_statistics.common.num_present;
            const numMissing: number =
              feature.numerical_statistics.common.num_missing;

            const totalNum = numPresent + numMissing;

            numericalData.features.add(feature.name);

            numericalJob[feature.name] = {
              missingPercentage: getPercentageValue(
                totalNum - numPresent,
                totalNum,
                4,
              ),
              mean: feature.numerical_statistics.mean,
              sum: feature.numerical_statistics.sum,
              stdDev: feature.numerical_statistics.std_dev,
              min: feature.numerical_statistics.min,
              max: feature.numerical_statistics.max,
            };

            // Categorical Collection
          } else if (feature.string_statistics) {
            categoricalData.features.add(feature.name);
            const numMissing: number =
              feature.string_statistics.common.num_missing;

            const totalNum = feature.string_statistics.distinct_count;

            const buckets =
              feature.string_statistics?.distribution?.categorical?.buckets;

            categoricalJob[feature.name] = {
              missingPercentage: getPercentageValue(numMissing, totalNum, 4),
              valueCount: buckets
                ? getValueCounts(
                    feature.string_statistics.distribution.categorical.buckets,
                  )
                : [],
            };
          }
        });

        numericalData.jobs.push(numericalJob);
        categoricalData.jobs.push(categoricalJob);
      }
    } catch (error) {
      parseError = new Error(
        `Error parsing metrics for job ${job.jobId}. Error: ${
          (error as Error)?.message
        }`,
      );
    }
  });

  return { violationsData, numericalData, categoricalData, error: parseError };
};

export const buildNumericalLineChartDataSet = (
  instances: DataQualityNumericalMetricsDataJob[],
  selectedFeatures: string[],
) => {
  const dataSet: { label: string; data: Record<string, number[]> }[] = [];

  selectedFeatures.forEach((feature) => {
    const dataSetItem: { label: string; data: Record<string, number[]> } = {
      label: feature,
      data: {
        Mean: [],
        Sum: [],
        Min: [],
        Max: [],
        stdDev: [],
        missingPercentage: [],
      },
    };

    instances.forEach((instance) => {
      dataSetItem.data['Mean'].push(instance[feature]?.mean);
      dataSetItem.data['Sum'].push(instance[feature]?.sum);
      dataSetItem.data['Min'].push(instance[feature]?.min);
      dataSetItem.data['Max'].push(instance[feature]?.max);
      dataSetItem.data['stdDev'].push(instance[feature]?.stdDev);
      dataSetItem.data['missingPercentage'].push(
        Number(instance[feature]?.missingPercentage),
      );
    });

    dataSet.push(dataSetItem);
  });

  return dataSet;
};

export const buildCategoricalLineChartDataSet = (
  instances: DataQualityCategoricalMetricsDataJob[],
  selectedFeatures: string[],
) => {
  const dataSet: { label: string; data: Record<string, number[]> }[] = [];

  selectedFeatures.forEach((feature) => {
    const dataSetItem: { label: string; data: Record<string, number[]> } = {
      label: feature,
      data: {
        missingPercentage: [],
      },
    };

    instances.forEach((instance) => {
      dataSetItem.data['missingPercentage'].push(
        Number(instance[feature]?.missingPercentage),
      );
    });

    dataSet.push(dataSetItem);
  });

  return dataSet;
};
