deedrop1140's picture
Upload 41 files
0d00d62 verified
{
"title": "Principal Component Analysis (PCA) Mastery: 100 MCQs",
"description": "A comprehensive set of 100 multiple-choice questions designed to test and deepen your understanding of Principal Component Analysis (PCA), from basic concepts to advanced applications in dimensionality reduction and feature engineering.",
"questions": [
{
"id": 1,
"questionText": "What is the main goal of Principal Component Analysis (PCA)?",
"options": [
"To classify data into categories",
"To generate random features",
"To cluster data points",
"To reduce the dimensionality of a dataset while retaining most variance"
],
"correctAnswerIndex": 3,
"explanation": "PCA aims to reduce the number of variables in a dataset while preserving as much variability as possible."
},
{
"id": 2,
"questionText": "In PCA, what does a 'principal component' represent?",
"options": [
"The cluster center",
"An original feature in the dataset",
"A new uncorrelated feature that captures maximum variance",
"The mean of all features"
],
"correctAnswerIndex": 2,
"explanation": "Principal components are linear combinations of original features that are uncorrelated and ordered by the amount of variance they capture."
},
{
"id": 3,
"questionText": "What is the first step before applying PCA?",
"options": [
"Standardize or normalize the data",
"Train a classifier",
"Apply k-means clustering",
"Remove outliers only"
],
"correctAnswerIndex": 0,
"explanation": "Standardization ensures that features with different scales contribute equally to the principal components."
},
{
"id": 4,
"questionText": "Scenario: You have 10 features, but 90% of the variance is captured in 2 components. What can you do?",
"options": [
"Keep all 10 features",
"Apply dropout",
"Add more features",
"Reduce the dataset to 2 principal components"
],
"correctAnswerIndex": 3,
"explanation": "Reducing to 2 principal components retains most of the information while simplifying the dataset."
},
{
"id": 5,
"questionText": "Which mathematical technique is commonly used to compute PCA?",
"options": [
"Eigen decomposition of the covariance matrix",
"Linear regression",
"Gradient descent",
"k-Nearest Neighbors"
],
"correctAnswerIndex": 0,
"explanation": "PCA typically involves computing eigenvectors and eigenvalues of the covariance matrix to find principal components."
},
{
"id": 6,
"questionText": "What property do all principal components have?",
"options": [
"They are uncorrelated (orthogonal) to each other",
"They sum to zero",
"They are dependent on each other",
"They always have the same variance"
],
"correctAnswerIndex": 0,
"explanation": "Principal components are constructed to be orthogonal, ensuring no redundancy in the information they capture."
},
{
"id": 7,
"questionText": "Scenario: PCA applied on a dataset with features in different scales. What happens if you don’t standardize?",
"options": [
"The first component captures zero variance",
"PCA fails to compute",
"Features with larger scale dominate the principal components",
"Variance is automatically normalized"
],
"correctAnswerIndex": 2,
"explanation": "Without standardization, features with larger numeric ranges contribute more to variance, skewing PCA results."
},
{
"id": 8,
"questionText": "Which of the following can PCA NOT do?",
"options": [
"Remove correlated features",
"Reduce dimensionality",
"Improve classification accuracy directly",
"Visualize high-dimensional data"
],
"correctAnswerIndex": 2,
"explanation": "PCA is unsupervised and reduces dimensionality; it does not directly improve classification accuracy."
},
{
"id": 9,
"questionText": "Scenario: You apply PCA and find that the first principal component explains 70% variance, the second 20%, and the rest 10%. How many components would you keep to retain 90% variance?",
"options": [
"All components",
"One component",
"Two components",
"Three components"
],
"correctAnswerIndex": 2,
"explanation": "Adding the first two components (70% + 20%) captures 90% of the dataset variance."
},
{
"id": 10,
"questionText": "PCA is an example of which type of learning?",
"options": [
"Supervised learning",
"Unsupervised learning",
"Reinforcement learning",
"Semi-supervised learning"
],
"correctAnswerIndex": 1,
"explanation": "PCA does not use labels; it finds patterns based on feature correlations, making it unsupervised."
},
{
"id": 11,
"questionText": "Scenario: You perform PCA and transform your 5D data to 3D. What property is guaranteed?",
"options": [
"Classification accuracy improves",
"The 3 components capture the maximum possible variance in 3D",
"All data points remain equidistant",
"The original features are perfectly preserved"
],
"correctAnswerIndex": 1,
"explanation": "PCA selects components capturing maximum variance in the reduced dimensions, not necessarily preserving original distances."
},
{
"id": 12,
"questionText": "Which PCA component explains the least variance?",
"options": [
"All components explain equal variance",
"Any intermediate component",
"The last principal component",
"The first principal component"
],
"correctAnswerIndex": 2,
"explanation": "PCA orders components from highest to lowest variance."
},
{
"id": 13,
"questionText": "Scenario: Dataset is already perfectly uncorrelated. What effect does PCA have?",
"options": [
"PCA fails to compute",
"Principal components are the same as original features",
"PCA increases correlation",
"PCA reduces variance"
],
"correctAnswerIndex": 1,
"explanation": "If features are uncorrelated, PCA simply aligns components with original features without reducing dimensionality."
},
{
"id": 14,
"questionText": "Scenario: You want to visualize high-dimensional data in 2D. PCA is applied. What is the risk?",
"options": [
"Data labels change",
"All variance is retained",
"Some variance is lost",
"Original features are unchanged"
],
"correctAnswerIndex": 2,
"explanation": "Reducing dimensions to 2D inevitably loses some information (variance)."
},
{
"id": 15,
"questionText": "Which of the following matrices is symmetric and used in PCA?",
"options": [
"Distance matrix",
"Covariance matrix",
"Adjacency matrix",
"Correlation matrix"
],
"correctAnswerIndex": 1,
"explanation": "The covariance matrix is symmetric and serves as the basis for eigen decomposition in PCA."
},
{
"id": 16,
"questionText": "Scenario: You have highly correlated features. PCA is applied. Effect?",
"options": [
"Normalizes variance",
"Reduces redundancy by combining correlated features into fewer components",
"Removes labels",
"Increases correlation"
],
"correctAnswerIndex": 1,
"explanation": "PCA transforms correlated features into uncorrelated principal components, reducing redundancy."
},
{
"id": 17,
"questionText": "Scenario: You want to reduce noise in data. PCA helps by:",
"options": [
"Scaling the first component only",
"Adding more features",
"Increasing learning rate",
"Discarding components with low variance"
],
"correctAnswerIndex": 3,
"explanation": "Low-variance components often represent noise; removing them denoises the dataset."
},
{
"id": 18,
"questionText": "Scenario: Eigenvalues of a PCA covariance matrix are [4, 2, 1]. Which component explains the most variance?",
"options": [
"Second component",
"All equally",
"First component",
"Third component"
],
"correctAnswerIndex": 2,
"explanation": "Eigenvalues correspond to the variance captured; the largest eigenvalue indicates the component with most variance."
},
{
"id": 19,
"questionText": "Scenario: You apply PCA to a dataset and plot explained variance ratio. Purpose?",
"options": [
"Scale features",
"Compute correlation matrix",
"Determine how many components to keep",
"Train a classifier"
],
"correctAnswerIndex": 2,
"explanation": "Explained variance ratio helps decide how many components capture sufficient information."
},
{
"id": 20,
"questionText": "Which PCA component is always orthogonal to the first component?",
"options": [
"It may not be orthogonal",
"Third component",
"Second component",
"Last component only"
],
"correctAnswerIndex": 2,
"explanation": "By definition, each principal component is orthogonal to all previous components."
},
{
"id": 21,
"questionText": "Scenario: After PCA, some components have very small variance. Action?",
"options": [
"They must be retained",
"Scale them up",
"Add noise to them",
"They can be discarded"
],
"correctAnswerIndex": 3,
"explanation": "Components with negligible variance contribute little to data representation and can be removed."
},
{
"id": 22,
"questionText": "Scenario: PCA applied to non-linear data. Limitation?",
"options": [
"PCA generates labels",
"PCA cannot capture non-linear relationships",
"PCA increases variance",
"PCA overfits"
],
"correctAnswerIndex": 1,
"explanation": "Standard PCA is linear and cannot model complex non-linear structures; kernel PCA may be used instead."
},
{
"id": 23,
"questionText": "Scenario: PCA reduces features from 5D to 2D. Data reconstruction is approximate. Why?",
"options": [
"Labels change",
"Information is lost in discarded components",
"PCA adds noise",
"Variance is increased"
],
"correctAnswerIndex": 1,
"explanation": "Dimensionality reduction retains only top components, losing some original information."
},
{
"id": 24,
"questionText": "Which metric is used to measure how much variance is captured by selected components?",
"options": [
"Mean squared error",
"Correlation coefficient",
"Explained variance ratio",
"Euclidean distance"
],
"correctAnswerIndex": 2,
"explanation": "Explained variance ratio shows the proportion of total variance captured by each principal component."
},
{
"id": 25,
"questionText": "Scenario: PCA on standardized data vs. unstandardized data. Difference?",
"options": [
"Standardization reduces variance",
"Standardized data gives equal weight to all features",
"Unstandardized data improves variance capture",
"No difference"
],
"correctAnswerIndex": 1,
"explanation": "Standardization prevents features with large scales from dominating the PCA components."
},
{
"id": 26,
"questionText": "Scenario: Two features are perfectly correlated. PCA effect?",
"options": [
"Cannot perform PCA",
"One component captures the shared variance",
"Both components are kept equally",
"Variance becomes zero"
],
"correctAnswerIndex": 1,
"explanation": "PCA combines correlated features into a single principal component."
},
{
"id": 27,
"questionText": "Scenario: You want to visualize 3D data in 2D. PCA helps by:",
"options": [
"Adding more dimensions",
"Projecting onto top 2 principal components",
"Scaling features only",
"Generating new labels"
],
"correctAnswerIndex": 1,
"explanation": "Projection onto top principal components preserves as much variance as possible in reduced dimensions."
},
{
"id": 28,
"questionText": "Scenario: After PCA, you notice negative values in transformed features. Meaning?",
"options": [
"PCA failed",
"Original data must be negative",
"Data must be scaled again",
"Principal components can have negative and positive values"
],
"correctAnswerIndex": 3,
"explanation": "PCA components are linear combinations of original features, allowing both negative and positive values."
},
{
"id": 29,
"questionText": "Scenario: You apply PCA for feature selection. Best approach?",
"options": [
"Discard largest components",
"Select random components",
"Keep all features",
"Select top components explaining desired variance"
],
"correctAnswerIndex": 3,
"explanation": "Top principal components capture most variance and are most informative for feature selection."
},
{
"id": 30,
"questionText": "Scenario: PCA is applied to a dataset with 100 features. First 10 components explain 95% variance. Next step?",
"options": [
"Use all 100 features",
"Discard the 10 components",
"Add more features",
"Use 10 components for reduced dataset"
],
"correctAnswerIndex": 3,
"explanation": "Using the first 10 components retains 95% of information while reducing dimensionality significantly."
},
{
"id": 31,
"questionText": "Scenario: You apply PCA but some features dominate due to large variance. Solution?",
"options": [
"Standardize the features",
"Remove features with high variance",
"Apply k-means clustering",
"Reduce dataset size"
],
"correctAnswerIndex": 0,
"explanation": "Standardizing ensures all features contribute equally, preventing dominance of large-scale features."
},
{
"id": 32,
"questionText": "Scenario: PCA eigenvalues are [5, 2, 0.5, 0.1]. What does the smallest eigenvalue indicate?",
"options": [
"Largest variance",
"Component is most important",
"Least variance along that component",
"PCA failed"
],
"correctAnswerIndex": 2,
"explanation": "Eigenvalues represent variance along principal components; the smallest captures minimal variance."
},
{
"id": 33,
"questionText": "Scenario: Two components have similar eigenvalues. What to do?",
"options": [
"Discard one randomly",
"Keep both as they explain similar variance",
"Combine them manually",
"Always choose the first"
],
"correctAnswerIndex": 1,
"explanation": "Similar eigenvalues indicate both components carry significant information; both should be retained."
},
{
"id": 34,
"questionText": "Scenario: You want to analyze which original features contribute to a principal component. Technique?",
"options": [
"Check explained variance ratio only",
"Remove low variance features",
"Examine component loadings (eigenvectors)",
"Normalize the dataset"
],
"correctAnswerIndex": 2,
"explanation": "Loadings show the weight of each original feature in a principal component, indicating contribution."
},
{
"id": 35,
"questionText": "Scenario: PCA applied on correlation matrix vs covariance matrix. Difference?",
"options": [
"Correlation matrix standardizes features; covariance matrix uses original scale",
"No difference",
"Covariance matrix reduces variance",
"Correlation matrix increases variance"
],
"correctAnswerIndex": 0,
"explanation": "Correlation matrix accounts for differing scales by standardizing variables before computing PCA."
},
{
"id": 36,
"questionText": "Scenario: PCA is applied to noisy data. Effect of noise?",
"options": [
"Noise appears in low-variance components",
"Noise improves variance",
"Noise is amplified in all components",
"Noise disappears automatically"
],
"correctAnswerIndex": 0,
"explanation": "High-frequency noise often contributes little variance and is captured in later components, which can be discarded."
},
{
"id": 37,
"questionText": "Scenario: You want to visualize 4D data in 2D using PCA. Which components to use?",
"options": [
"Last 2 components",
"All 4 components",
"Top 2 principal components",
"Random 2 features"
],
"correctAnswerIndex": 2,
"explanation": "Top components retain most variance, providing the best 2D representation of high-dimensional data."
},
{
"id": 38,
"questionText": "Scenario: PCA applied on dataset with zero mean. Why mean-centering?",
"options": [
"Normalizes labels",
"Increases variance artificially",
"Reduces number of features",
"Ensures first component captures maximum variance from origin"
],
"correctAnswerIndex": 3,
"explanation": "Mean-centering removes bias and ensures principal components represent variance relative to the mean."
},
{
"id": 39,
"questionText": "Scenario: PCA applied but first component explains only 20% variance. Interpretation?",
"options": [
"First component is irrelevant",
"Data variance is spread across many components",
"Reduce dataset size",
"PCA failed"
],
"correctAnswerIndex": 1,
"explanation": "Low variance in first component indicates no single direction dominates; variance is more uniform across features."
},
{
"id": 40,
"questionText": "Scenario: You perform PCA on features measured in different units. Why important?",
"options": [
"To generate labels",
"To increase explained variance",
"To remove features",
"To prevent unit differences from skewing components"
],
"correctAnswerIndex": 3,
"explanation": "Standardization equalizes units, ensuring PCA reflects intrinsic data structure rather than measurement scale."
},
{
"id": 41,
"questionText": "Scenario: Projecting data back from reduced PCA components to original space. Effect?",
"options": [
"Perfect reconstruction always",
"Approximate reconstruction with some information loss",
"Increase variance",
"Remove correlations"
],
"correctAnswerIndex": 1,
"explanation": "Dimensionality reduction discards minor components, so reconstruction is approximate."
},
{
"id": 42,
"questionText": "Scenario: PCA on sparse high-dimensional data. Which method can help?",
"options": [
"Sparse PCA",
"Random projection",
"Feature scaling",
"Standard PCA only"
],
"correctAnswerIndex": 0,
"explanation": "Sparse PCA introduces sparsity constraints to handle high-dimensional data efficiently."
},
{
"id": 43,
"questionText": "Scenario: Kernel PCA vs standard PCA. Advantage?",
"options": [
"Kernel PCA reduces variance",
"Kernel PCA does not reduce dimensions",
"Kernel PCA captures non-linear patterns",
"Standard PCA is better for non-linear data"
],
"correctAnswerIndex": 2,
"explanation": "Kernel PCA uses kernel functions to capture non-linear relationships, unlike linear PCA."
},
{
"id": 44,
"questionText": "Scenario: PCA applied to dataset, but covariance matrix is singular. Cause?",
"options": [
"Data standardized",
"Number of features > number of samples",
"Explained variance too high",
"Too few features"
],
"correctAnswerIndex": 1,
"explanation": "A singular covariance matrix occurs when the data matrix has more features than samples, causing linear dependency."
},
{
"id": 45,
"questionText": "Scenario: After PCA, a component has zero eigenvalue. Meaning?",
"options": [
"Data is invalid",
"No variance along this component",
"PCA failed",
"Most important component"
],
"correctAnswerIndex": 1,
"explanation": "Zero eigenvalue indicates the component captures no variation in the dataset."
},
{
"id": 46,
"questionText": "Scenario: PCA used for feature reduction in regression. Benefit?",
"options": [
"Removes labels",
"Increases overfitting",
"Generates noise",
"Reduces multicollinearity and model complexity"
],
"correctAnswerIndex": 3,
"explanation": "PCA produces uncorrelated features, mitigating multicollinearity and simplifying models."
},
{
"id": 47,
"questionText": "Scenario: PCA shows first 3 components explain 85% variance. Choice?",
"options": [
"Keep 3 components for reduced dataset",
"Keep only first",
"Discard all 3",
"Keep all original features"
],
"correctAnswerIndex": 0,
"explanation": "Top components capturing majority variance are sufficient for dimensionality reduction."
},
{
"id": 48,
"questionText": "Scenario: PCA reduces dataset but downstream classifier performs worse. Likely reason?",
"options": [
"Important variance discarded in low components",
"Features were not standardized",
"Too few principal components retained",
"All of the above"
],
"correctAnswerIndex": 3,
"explanation": "Poor classifier performance can result from lost variance, insufficient components, or unstandardized features."
},
{
"id": 49,
"questionText": "Scenario: PCA on image dataset. First component represents lighting changes. Meaning?",
"options": [
"Reduce dataset size",
"Lighting has no effect",
"Largest variance is due to lighting, not object content",
"PCA failed"
],
"correctAnswerIndex": 2,
"explanation": "PCA captures directions of maximal variance; if lighting dominates, first component encodes lighting."
},
{
"id": 50,
"questionText": "Scenario: You need interpretability for principal components. Technique?",
"options": [
"Examine feature loadings",
"Discard low variance components",
"Use explained variance only",
"Standardize data"
],
"correctAnswerIndex": 0,
"explanation": "Loadings show contribution of each original feature to principal components, aiding interpretation."
},
{
"id": 51,
"questionText": "Scenario: PCA applied on time-series features. Issue?",
"options": [
"Eigenvalues become negative",
"Labels are affected",
"Variance is increased",
"Temporal structure may be lost"
],
"correctAnswerIndex": 3,
"explanation": "Standard PCA ignores sequence information; temporal relationships may not be preserved."
},
{
"id": 52,
"questionText": "Scenario: Dataset contains categorical variables. PCA requirement?",
"options": [
"Convert to numerical via encoding",
"Remove them",
"No change required",
"Use labels directly"
],
"correctAnswerIndex": 0,
"explanation": "PCA requires numeric input; categorical features must be encoded first (e.g., one-hot encoding)."
},
{
"id": 53,
"questionText": "Scenario: PCA reduces dimensions for clustering. Benefit?",
"options": [
"Increases data size",
"Reduces noise and speeds computation",
"Removes clusters",
"Generates labels"
],
"correctAnswerIndex": 1,
"explanation": "PCA simplifies data, removes redundant features, and accelerates clustering algorithms."
},
{
"id": 54,
"questionText": "Scenario: After PCA, components are used in regression. Advantage?",
"options": [
"Remove labels",
"Avoid multicollinearity and improve stability",
"Increases overfitting",
"Increase computation"
],
"correctAnswerIndex": 1,
"explanation": "Principal components are uncorrelated, reducing multicollinearity in regression."
},
{
"id": 55,
"questionText": "Scenario: PCA applied to normalized vs standardized features. Impact?",
"options": [
"No impact",
"Normalized features fail PCA",
"Standardization is crucial for unequal scales",
"Variance is reduced"
],
"correctAnswerIndex": 2,
"explanation": "Features with different scales must be standardized; normalization alone may not equalize contribution."
},
{
"id": 56,
"questionText": "Scenario: First principal component explains 40% variance, second 25%. What % variance is left?",
"options": [
"40%",
"35%",
"25%",
"65%"
],
"correctAnswerIndex": 1,
"explanation": "Remaining variance = 100% - (40% + 25%) = 35%."
},
{
"id": 57,
"questionText": "Scenario: PCA used on finance dataset. First component dominated by one stock. Meaning?",
"options": [
"Data should be reduced",
"This stock has highest variance in data",
"Stock is irrelevant",
"PCA failed"
],
"correctAnswerIndex": 1,
"explanation": "Principal components capture directions of maximum variance; one high-variance stock can dominate."
},
{
"id": 58,
"questionText": "Scenario: PCA applied on small dataset. Risk?",
"options": [
"PCA fails",
"Components become identical",
"Overfitting and noisy components",
"Variance increases"
],
"correctAnswerIndex": 2,
"explanation": "Small datasets may produce unstable covariance estimates, leading to noisy components."
},
{
"id": 59,
"questionText": "Scenario: You want to project new data using previously fitted PCA. Method?",
"options": [
"Cannot project new data",
"Recompute PCA",
"Multiply new data by learned component matrix",
"Use labels only"
],
"correctAnswerIndex": 2,
"explanation": "New data can be projected by applying the PCA transformation learned from training data."
},
{
"id": 60,
"questionText": "Scenario: PCA shows negative loadings. Meaning?",
"options": [
"Error in computation",
"Variance is negative",
"Feature removed",
"Feature negatively correlates with component"
],
"correctAnswerIndex": 3,
"explanation": "Negative loadings indicate the original feature moves in opposite direction to the component."
},
{
"id": 61,
"questionText": "Scenario: PCA applied to very high-dimensional genomic data. Challenge?",
"options": [
"Variance is too high",
"Cannot compute eigenvectors",
"Labels cannot be used",
"Covariance matrix may be singular or noisy"
],
"correctAnswerIndex": 3,
"explanation": "High dimensionality with few samples can make the covariance matrix singular and PCA unstable."
},
{
"id": 62,
"questionText": "Scenario: PCA on dataset with outliers. Effect?",
"options": [
"PCA removes outliers",
"Outliers may distort principal components",
"Outliers are ignored automatically",
"Variance increases uniformly"
],
"correctAnswerIndex": 1,
"explanation": "Outliers can dominate variance, affecting directions of principal components."
},
{
"id": 63,
"questionText": "Scenario: PCA applied for compression. Target explained variance?",
"options": [
"Keep all components",
"Keep only first component",
"Choose enough components to capture 90–95% variance",
"Discard top components"
],
"correctAnswerIndex": 2,
"explanation": "Selecting components that retain most variance ensures compression without losing significant information."
},
{
"id": 64,
"questionText": "Scenario: PCA reduces dataset but downstream classifier performs worse. Likely reason?",
"options": [
"PCA cannot be applied to numeric data",
"Too few principal components retained",
"All of the above",
"Features were not standardized",
"Important variance discarded in low components"
],
"correctAnswerIndex": 2,
"explanation": "Poor classifier performance can result from lost variance, insufficient components, or unstandardized features."
},
{
"id": 65,
"questionText": "Scenario: PCA used for exploratory analysis. Benefit?",
"options": [
"Removes labels",
"Increases dimensionality",
"Generates random features",
"Reveals patterns, clusters, and correlations"
],
"correctAnswerIndex": 3,
"explanation": "PCA simplifies data and highlights underlying patterns or groupings."
},
{
"id": 66,
"questionText": "Scenario: PCA reduces dataset from 50 to 10 features. Effect on storage?",
"options": [
"Significant reduction in storage and computation",
"Increases storage",
"No change",
"Removes labels"
],
"correctAnswerIndex": 0,
"explanation": "Fewer features reduce memory usage and accelerate computations."
},
{
"id": 67,
"questionText": "Scenario: PCA used on correlation matrix. Advantage?",
"options": [
"Reduces variance",
"Generates labels",
"Equalizes feature scales and emphasizes relative relationships",
"Removes low-variance features only"
],
"correctAnswerIndex": 2,
"explanation": "Correlation matrix ensures features with different units or scales contribute proportionally to PCA."
},
{
"id": 68,
"questionText": "Scenario: After PCA, some features have nearly zero loadings across components. Meaning?",
"options": [
"Variance is too high",
"These features contribute little variance and can be discarded",
"They are most important",
"PCA failed"
],
"correctAnswerIndex": 1,
"explanation": "Features with negligible loadings do not influence principal components significantly."
},
{
"id": 69,
"questionText": "Scenario: PCA applied to dataset with correlated noise. Effect?",
"options": [
"All variance captured by noise",
"Noise may form separate low-variance components",
"PCA fails",
"Noise dominates first component"
],
"correctAnswerIndex": 1,
"explanation": "Correlated noise often appears in later components with low variance."
},
{
"id": 70,
"questionText": "Scenario: You want to reduce dimensionality without losing much information. PCA strategy?",
"options": [
"Use all components",
"Keep enough components to capture desired variance (e.g., 90–95%)",
"Keep only first component",
"Discard components randomly"
],
"correctAnswerIndex": 1,
"explanation": "Selecting enough principal components ensures dimensionality reduction while retaining most data information."
},
{
"id": 71,
"questionText": "Scenario: Kernel PCA is used instead of standard PCA. Benefit?",
"options": [
"Reduces dimensionality linearly",
"Captures non-linear relationships in the data",
"Removes outliers automatically",
"Generates labels"
],
"correctAnswerIndex": 1,
"explanation": "Kernel PCA uses kernel functions to map data into higher-dimensional space to capture non-linear patterns."
},
{
"id": 72,
"questionText": "Scenario: Sparse PCA is applied on high-dimensional genomic data. Advantage?",
"options": [
"Improves label prediction automatically",
"Maximizes variance only",
"Generates components with few non-zero loadings for interpretability",
"Removes all correlations"
],
"correctAnswerIndex": 2,
"explanation": "Sparse PCA introduces sparsity constraints, creating components influenced by fewer original features for easier interpretation."
},
{
"id": 73,
"questionText": "Scenario: PCA is applied to compress image data. How to measure quality of compression?",
"options": [
"Variance ratio only",
"Correlation of first component with pixels",
"Reconstruction error (difference between original and reconstructed images)",
"Number of components kept"
],
"correctAnswerIndex": 2,
"explanation": "Reconstruction error quantifies information loss during dimensionality reduction, evaluating compression quality."
},
{
"id": 74,
"questionText": "Scenario: PCA applied to multicollinear financial features. Effect?",
"options": [
"Increases collinearity",
"Reduces multicollinearity by generating uncorrelated components",
"Removes variance",
"Generates labels"
],
"correctAnswerIndex": 1,
"explanation": "Principal components are orthogonal, addressing multicollinearity issues in regression or predictive models."
},
{
"id": 75,
"questionText": "Scenario: PCA applied on large sparse document-term matrix. Challenge?",
"options": [
"PCA cannot be applied",
"High dimensionality and sparsity require optimized algorithms",
"All features dominate equally",
"Variance becomes negative"
],
"correctAnswerIndex": 1,
"explanation": "Sparse high-dimensional data may need techniques like randomized PCA to efficiently compute components."
},
{
"id": 76,
"questionText": "Scenario: PCA applied to time-series data. Concern?",
"options": [
"Labels are changed",
"Temporal correlations may be ignored",
"PCA reduces samples",
"Variance increases"
],
"correctAnswerIndex": 1,
"explanation": "PCA does not account for order in sequences; temporal patterns may be lost."
},
{
"id": 77,
"questionText": "Scenario: PCA reduces 100 features to 5 components. Downstream model performance drops. Likely cause?",
"options": [
"All of the above",
"Data not standardized",
"Too few samples",
"Important low-variance features were discarded"
],
"correctAnswerIndex": 0,
"explanation": "Discarding low-variance features may remove predictive information; other preprocessing issues can also affect performance."
},
{
"id": 78,
"questionText": "Scenario: PCA is used for anomaly detection. Approach?",
"options": [
"Discard all components",
"Use first component only",
"Model normal data with top components and examine reconstruction error",
"Apply PCA on labels"
],
"correctAnswerIndex": 2,
"explanation": "Anomalies often lie in directions of low variance; reconstruction error from PCA can identify unusual data points."
},
{
"id": 79,
"questionText": "Scenario: You apply PCA on a dataset with missing values. Best approach?",
"options": [
"PCA fills missing values automatically",
"Ignore missing values",
"Impute missing values before PCA",
"Discard rows with missing values"
],
"correctAnswerIndex": 2,
"explanation": "PCA requires complete numerical data; missing values must be imputed or handled before applying PCA."
},
{
"id": 80,
"questionText": "Scenario: PCA applied and first component aligns with single feature. Interpretation?",
"options": [
"This feature dominates variance in the dataset",
"Component is irrelevant",
"All features are equally important",
"PCA failed"
],
"correctAnswerIndex": 0,
"explanation": "When a single feature dominates variance, the first principal component aligns closely with that feature."
},
{
"id": 81,
"questionText": "Scenario: You perform PCA and observe negative explained variance ratio for a component. Reason?",
"options": [
"Data was not mean-centered properly",
"Eigenvectors are invalid",
"Variance is negative",
"PCA cannot run on this data"
],
"correctAnswerIndex": 0,
"explanation": "Improper centering can lead to incorrect covariance matrix, causing negative variance calculations."
},
{
"id": 82,
"questionText": "Scenario: PCA applied on data with categorical features encoded as one-hot. Concern?",
"options": [
"Variance decreases automatically",
"Labels are affected",
"Components become identical",
"High dimensionality may lead to sparse components"
],
"correctAnswerIndex": 3,
"explanation": "One-hot encoding increases dimensions, producing sparse data; special handling or sparse PCA may be useful."
},
{
"id": 83,
"questionText": "Scenario: After PCA, you plot a biplot. Purpose?",
"options": [
"Scale data",
"Generate labels",
"Visualize principal components and feature contributions",
"Remove low-variance components"
],
"correctAnswerIndex": 2,
"explanation": "Biplots show both projected data points and how original features contribute to components."
},
{
"id": 84,
"questionText": "Scenario: PCA applied on standardized vs non-standardized data with different scales. Outcome?",
"options": [
"Standardization ensures fair contribution; non-standardized may bias components",
"Non-standardized data improves variance",
"No difference",
"Variance is reduced in standardized data"
],
"correctAnswerIndex": 0,
"explanation": "Features with large scales dominate components without standardization, skewing PCA results."
},
{
"id": 85,
"questionText": "Scenario: You want interpretable PCA components. Which approach?",
"options": [
"Keep all components",
"Sparse PCA or rotation methods like varimax",
"Use first component only",
"Discard low-variance features"
],
"correctAnswerIndex": 1,
"explanation": "Sparse PCA and rotation techniques improve interpretability by reducing the number of features contributing to each component."
},
{
"id": 86,
"questionText": "Scenario: PCA reduces dimensions for clustering. Benefit?",
"options": [
"Removes noise, reduces computation, highlights clusters",
"Increases dimensionality",
"Generates labels",
"Removes clusters"
],
"correctAnswerIndex": 0,
"explanation": "Reduced, de-noised features simplify clustering and often improve performance."
},
{
"id": 87,
"questionText": "Scenario: PCA eigenvectors are not unique. Reason?",
"options": [
"PCA failed",
"Covariance matrix invalid",
"Eigenvectors are unique up to sign; direction can flip",
"Variance negative"
],
"correctAnswerIndex": 2,
"explanation": "Eigenvectors can be multiplied by -1 without changing the subspace, so they are not unique in sign."
},
{
"id": 88,
"questionText": "Scenario: PCA applied to data where noise dominates variance. Risk?",
"options": [
"Components may represent noise rather than signal",
"Variance reduces",
"All information preserved",
"Components become identical"
],
"correctAnswerIndex": 0,
"explanation": "High-variance noise can dominate principal components, reducing meaningful representation of data."
},
{
"id": 89,
"questionText": "Scenario: PCA applied on dataset with many features having zero variance. Effect?",
"options": [
"These features are ignored in covariance computation",
"PCA fails",
"Variance increases",
"Components become identical"
],
"correctAnswerIndex": 0,
"explanation": "Zero-variance features do not contribute to covariance and do not affect PCA results."
},
{
"id": 90,
"questionText": "Scenario: PCA applied to compress hyperspectral image. Key consideration?",
"options": [
"Retain components capturing most spectral variance for accurate reconstruction",
"Discard high-variance components",
"Keep only first component",
"Generate labels automatically"
],
"correctAnswerIndex": 0,
"explanation": "Hyperspectral data has many correlated channels; top components capture essential information while reducing data size."
},
{
"id": 91,
"questionText": "Scenario: PCA applied and first component is dominated by outlier. Solution?",
"options": [
"Keep data as-is",
"Remove or mitigate outliers before PCA",
"Scale only first feature",
"Discard PCA entirely"
],
"correctAnswerIndex": 1,
"explanation": "Outliers can skew variance and principal directions; handling them ensures meaningful PCA components."
},
{
"id": 92,
"questionText": "Scenario: You need to project streaming data onto PCA components. Requirement?",
"options": [
"Project only first sample",
"Cannot apply PCA",
"Recompute PCA each time",
"Use incremental PCA or precomputed components"
],
"correctAnswerIndex": 3,
"explanation": "Incremental PCA allows efficient projection of new data without recomputing from scratch."
},
{
"id": 93,
"questionText": "Scenario: PCA reduces dimensionality but variance explained is too low. Solution?",
"options": [
"Remove first component",
"Normalize data again",
"Keep more components",
"Discard components"
],
"correctAnswerIndex": 2,
"explanation": "Increasing number of components ensures more variance is retained for downstream tasks."
},
{
"id": 94,
"questionText": "Scenario: PCA applied on correlated features with different scales. Effect if not standardized?",
"options": [
"Variance is evenly distributed",
"Components are orthogonal",
"PCA fails",
"Large-scale features dominate components"
],
"correctAnswerIndex": 3,
"explanation": "Without standardization, features with larger numeric ranges contribute more variance, skewing PCA results."
},
{
"id": 95,
"questionText": "Scenario: PCA used for dimensionality reduction prior to deep learning. Benefit?",
"options": [
"Generates labels",
"Increases overfitting",
"Reduces input size and noise, improving training efficiency",
"Removes all variance"
],
"correctAnswerIndex": 2,
"explanation": "PCA simplifies input features, removing redundant information and reducing computational load."
},
{
"id": 96,
"questionText": "Scenario: PCA applied but some components are highly correlated. Why unusual?",
"options": [
"Principal components should be orthogonal; correlation indicates an issue",
"Variance is low",
"Labels are influencing components",
"Expected in standard PCA"
],
"correctAnswerIndex": 0,
"explanation": "PCA produces orthogonal components; correlated components suggest computation or preprocessing errors."
},
{
"id": 97,
"questionText": "Scenario: PCA applied on multi-class dataset for visualization. Approach?",
"options": [
"Discard labels",
"Keep all features",
"Project onto top 2 or 3 components and color points by class",
"Use only first component"
],
"correctAnswerIndex": 2,
"explanation": "Low-dimensional projection allows visualization of class separation while preserving maximal variance."
},
{
"id": 98,
"questionText": "Scenario: PCA applied with top components explaining 80% variance. Downstream task requires 95%. Action?",
"options": [
"Normalize data again",
"Use only top components",
"Include additional components until 95% variance is captured",
"Discard low components"
],
"correctAnswerIndex": 2,
"explanation": "Selecting enough components ensures sufficient information is retained for downstream analysis."
},
{
"id": 99,
"questionText": "Scenario: PCA applied to text embeddings. Challenge?",
"options": [
"High dimensionality and sparsity require careful computation",
"Variance is negative",
"PCA fails automatically",
"Components lose meaning entirely"
],
"correctAnswerIndex": 0,
"explanation": "Text embeddings are often high-dimensional; PCA helps reduce size but may require optimized algorithms."
},
{
"id": 100,
"questionText": "Scenario: PCA used for feature selection. How to choose components?",
"options": [
"Choose random components",
"Discard high-variance components",
"Use only first component",
"Select components explaining desired variance threshold (e.g., 90–95%)"
],
"correctAnswerIndex": 3,
"explanation": "Selecting top components ensures maximal retained information while reducing dimensionality."
}
]
}