{ "title": "Linear Discriminant Analysis Mastery: 100 MCQs", "description": "A comprehensive set of 100 multiple-choice questions to test and deepen your understanding of Linear Discriminant Analysis (LDA), covering fundamental concepts, assumptions, discriminant functions, and applications in classification tasks.", "questions": [ { "id": 1, "questionText": "What is the primary goal of Linear Discriminant Analysis (LDA)?", "options": [ "To cluster unlabeled data", "To find correlations between variables", "To reduce the dimensionality of a dataset while retaining most variance", "To fit a regression line" ], "correctAnswerIndex": 2, "explanation": "LDA aims to reduce dimensionality and maximize separability between known classes by projecting data onto a lower-dimensional space." }, { "id": 2, "questionText": "LDA assumes that the data in each class:", "options": [ "Has equal number of samples per class", "Follows a Gaussian (normal) distribution", "Has no correlation between features", "Is uniformly distributed" ], "correctAnswerIndex": 1, "explanation": "LDA assumes normally distributed features for each class, which allows it to compute class-specific mean and covariance matrices for optimal separation." }, { "id": 3, "questionText": "In LDA, the discriminant function is used to:", "options": [ "Normalize the data", "Compute the correlation between features", "Reduce the number of features", "Assign a class label to a sample" ], "correctAnswerIndex": 3, "explanation": "Discriminant functions are computed for each class, and a sample is assigned to the class with the highest score." }, { "id": 4, "questionText": "Scenario: You have two classes in 3D space. LDA reduces the data to 1D. Why?", "options": [ "Because LDA always reduces to 1D", "To normalize the data", "To eliminate one feature randomly", "Because maximum class separability can be achieved in a line" ], "correctAnswerIndex": 3, "explanation": "LDA projects the data to a subspace that maximizes the ratio of between-class variance to within-class variance. For two classes, a 1D projection suffices." }, { "id": 5, "questionText": "LDA works best when classes:", "options": [ "Have distinct means and similar covariance matrices", "Are highly skewed", "Have identical data points", "Are non-Gaussian" ], "correctAnswerIndex": 0, "explanation": "LDA assumes Gaussian distributions with equal covariances; distinct means allow for better class separation." }, { "id": 6, "questionText": "Scenario: Two features are highly correlated. What is a likely effect on LDA?", "options": [ "May cause redundancy but LDA can still compute projection", "Algorithm fails", "Produces random results", "Features will be removed automatically" ], "correctAnswerIndex": 0, "explanation": "Highly correlated features can reduce LDA effectiveness slightly but the algorithm can still compute the optimal projection." }, { "id": 7, "questionText": "The number of linear discriminants in LDA is at most:", "options": [ "Number of samples", "Number of features minus one", "Number of features", "Number of classes minus one" ], "correctAnswerIndex": 3, "explanation": "For c classes, LDA can produce at most c−1 discriminant axes." }, { "id": 8, "questionText": "Scenario: LDA projection separates classes poorly. Possible reason?", "options": [ "Output dimension is 1D", "Too many features", "Classes have overlapping distributions or unequal covariances", "Data is normalized" ], "correctAnswerIndex": 2, "explanation": "If the assumptions of Gaussian distribution with equal covariance are violated, class separation by LDA can be poor." }, { "id": 9, "questionText": "LDA is commonly used for:", "options": [ "Classification tasks", "Clustering", "Regression tasks", "Time series forecasting" ], "correctAnswerIndex": 0, "explanation": "LDA is primarily a classification algorithm, although it can also be used for dimensionality reduction." }, { "id": 10, "questionText": "Scenario: You have 3 classes and 5 features. Maximum LDA dimensions?", "options": [ "5", "2", "3", "1" ], "correctAnswerIndex": 1, "explanation": "Maximum number of linear discriminants is c−1 = 3−1 = 2." }, { "id": 11, "questionText": "LDA computes projections by maximizing:", "options": [ "Sum of all variances", "Mean of features", "Ratio of between-class variance to within-class variance", "Correlation coefficient" ], "correctAnswerIndex": 2, "explanation": "The objective of LDA is to find a projection that increases class separability, which is measured by the ratio of between-class to within-class variance." }, { "id": 12, "questionText": "Scenario: You have highly overlapping classes. LDA output may be:", "options": [ "Perfect classification", "Normalized projections", "Poor classification", "Automatic clustering" ], "correctAnswerIndex": 2, "explanation": "When class distributions overlap significantly, LDA cannot separate them well, resulting in misclassification." }, { "id": 13, "questionText": "Which of these is an assumption of LDA?", "options": [ "Features are normally distributed per class", "Data has missing values", "Classes have different covariance matrices", "Data is categorical only" ], "correctAnswerIndex": 0, "explanation": "LDA assumes Gaussian features for each class to compute the optimal linear projection." }, { "id": 14, "questionText": "Scenario: Two features have vastly different scales. What to do before LDA?", "options": [ "Use log transform only", "Leave them as is", "Standardize or normalize features", "Remove the smaller scale feature" ], "correctAnswerIndex": 2, "explanation": "Scaling ensures that features contribute equally to the discriminant function." }, { "id": 15, "questionText": "LDA vs PCA: key difference?", "options": [ "Both are unsupervised", "LDA reduces features; PCA increases features", "Both require class labels", "LDA is supervised; PCA is unsupervised" ], "correctAnswerIndex": 3, "explanation": "PCA ignores class labels and maximizes variance; LDA uses class labels to maximize separability." }, { "id": 16, "questionText": "Scenario: You have two classes with very different variances. How does it affect LDA?", "options": [ "May reduce classification performance since LDA assumes equal covariance", "Automatically scales variances", "Classes will merge", "Does not affect LDA" ], "correctAnswerIndex": 0, "explanation": "LDA assumes equal covariance matrices for classes; large differences can reduce separability and classification accuracy." }, { "id": 17, "questionText": "LDA projects data onto a lower-dimensional space using:", "options": [ "Linear combinations of original features", "Polynomial transformations", "Kernel functions only", "Random feature selection" ], "correctAnswerIndex": 0, "explanation": "LDA computes linear combinations of features to maximize class separability." }, { "id": 18, "questionText": "Scenario: After applying LDA, one feature dominates the discriminant. Likely reason?", "options": [ "Feature is categorical", "Feature has much larger variance than others", "Random initialization", "Algorithm error" ], "correctAnswerIndex": 1, "explanation": "Features with larger variance can dominate projections if data is not scaled." }, { "id": 19, "questionText": "Number of discriminant axes depends on:", "options": [ "Number of samples", "Number of features minus one", "Number of zero-variance features", "Number of classes minus one" ], "correctAnswerIndex": 3, "explanation": "Maximum number of discriminants is c−1 for c classes." }, { "id": 20, "questionText": "Scenario: You have three classes with very similar means. LDA may:", "options": [ "Perfectly separate classes", "Remove features automatically", "Struggle to separate classes", "Convert data to 1D" ], "correctAnswerIndex": 2, "explanation": "When class means are very close, LDA finds it difficult to achieve good separability." }, { "id": 21, "questionText": "In LDA, 'within-class scatter matrix' represents:", "options": [ "Mean differences between classes", "Variability of samples within each class", "Distance to origin", "Covariance of features across all samples" ], "correctAnswerIndex": 1, "explanation": "Within-class scatter measures how data points of the same class vary around their mean." }, { "id": 22, "questionText": "In LDA, 'between-class scatter matrix' represents:", "options": [ "Within-class variance", "Random noise", "Sum of all features", "Variability of class means relative to overall mean" ], "correctAnswerIndex": 3, "explanation": "Between-class scatter captures how distinct the class centers are from the global mean." }, { "id": 23, "questionText": "Scenario: You want to classify 4 classes. Maximum LDA dimensions?", "options": [ "4", "1", "3", "5" ], "correctAnswerIndex": 2, "explanation": "For c classes, maximum discriminant axes = c−1 = 4−1 = 3." }, { "id": 24, "questionText": "LDA vs QDA: main difference?", "options": [ "LDA allows nonlinear separation; QDA is linear", "Both are unsupervised", "QDA allows different covariance matrices per class; LDA assumes equal", "QDA uses PCA internally" ], "correctAnswerIndex": 2, "explanation": "LDA assumes equal covariance matrices; QDA allows class-specific covariances for more flexible separation." }, { "id": 25, "questionText": "Scenario: You have 100 features and 3 classes. LDA may:", "options": [ "Reduce data to at most 2 dimensions", "Fail if features exceed 50", "Increase dimensionality", "Remove random features" ], "correctAnswerIndex": 0, "explanation": "Number of LDA axes = c−1 = 2, regardless of the number of features." }, { "id": 26, "questionText": "Scenario: LDA applied to two classes with equal means. Expected outcome?", "options": [ "Cannot separate classes", "Data transformed to zero", "Perfect separation", "Automatic feature selection" ], "correctAnswerIndex": 0, "explanation": "If class means are identical, LDA has no discriminative power to separate them." }, { "id": 27, "questionText": "LDA projection maximizes:", "options": [ "Sum of variances", "Correlation of features", "Between-class variance / within-class variance", "Eigenvectors only" ], "correctAnswerIndex": 2, "explanation": "LDA selects projections that maximize separability measured by the ratio of between-class to within-class variance." }, { "id": 28, "questionText": "Scenario: LDA applied to skewed data. Suggested preprocessing?", "options": [ "Use raw data", "Normalize or transform data to approximate Gaussian", "Reduce classes to 2", "Remove features randomly" ], "correctAnswerIndex": 1, "explanation": "LDA assumes Gaussian features; transforming skewed data helps satisfy assumptions." }, { "id": 29, "questionText": "LDA is mainly classified as:", "options": [ "Regression algorithm", "Reinforcement learning", "Supervised dimensionality reduction", "Unsupervised clustering" ], "correctAnswerIndex": 2, "explanation": "LDA uses class labels to reduce dimensions while preserving class separability." }, { "id": 30, "questionText": "Scenario: After LDA projection, some data points are misclassified. Likely reason?", "options": [ "Output dimension too high", "Overlapping distributions or violated assumptions", "Algorithm failure", "Features removed automatically" ], "correctAnswerIndex": 1, "explanation": "Misclassification occurs when class distributions overlap or LDA assumptions (normality, equal covariance) are violated." }, { "id": 31, "questionText": "Scenario: You have 3 classes of flowers with 4 features each. Before applying LDA, why is standardization important?", "options": [ "To ensure all features contribute equally to the discriminant function", "To increase between-class variance", "To reduce the number of classes automatically", "To make LDA nonlinear" ], "correctAnswerIndex": 0, "explanation": "Standardizing features ensures that features with larger scales do not dominate the linear discriminant function." }, { "id": 32, "questionText": "Scenario: You want to reduce 10-dimensional data to 2D using LDA for 3 classes. Maximum dimensions achievable?", "options": [ "1", "10", "2", "3" ], "correctAnswerIndex": 2, "explanation": "Maximum LDA dimensions = c−1 = 3−1 = 2." }, { "id": 33, "questionText": "Scenario: Two classes with similar means but different covariances. LDA may:", "options": [ "Reduce dimensionality to 1D", "Perfectly separate them", "Fail to separate classes effectively", "Automatically detect clusters" ], "correctAnswerIndex": 2, "explanation": "LDA assumes equal covariances; different covariances violate this assumption and may reduce separation." }, { "id": 34, "questionText": "Which matrix in LDA captures variance within each class?", "options": [ "Between-class scatter matrix", "Within-class scatter matrix", "Covariance of all features", "Correlation matrix" ], "correctAnswerIndex": 1, "explanation": "Within-class scatter matrix measures how samples vary within their respective classes." }, { "id": 35, "questionText": "Scenario: You applied LDA and classes still overlap. Possible remedies?", "options": [ "Reduce output dimension to 1", "Check for assumptions violations, consider QDA or kernel LDA", "Ignore overlapping points", "Remove classes randomly" ], "correctAnswerIndex": 1, "explanation": "If assumptions are violated, alternatives like QDA or kernel LDA can handle unequal covariances or nonlinear separation." }, { "id": 36, "questionText": "Scenario: LDA applied to customer data with categorical features. How to proceed?", "options": [ "Ignore categorical features", "Reduce classes to 2 only", "Use raw text", "Encode categorical features numerically before applying LDA" ], "correctAnswerIndex": 3, "explanation": "Categorical variables must be numerically encoded to be included in LDA." }, { "id": 37, "questionText": "LDA maximizes the ratio of:", "options": [ "Eigenvalue magnitude", "Between-class variance to within-class variance", "Correlation of features", "Within-class variance to total variance" ], "correctAnswerIndex": 1, "explanation": "This ratio ensures optimal class separability in the projected space." }, { "id": 38, "questionText": "Scenario: LDA projection of 4-class dataset onto 3D space. Number of axes used?", "options": [ "4", "2", "1", "3" ], "correctAnswerIndex": 3, "explanation": "For c classes, maximum LDA axes = c−1 = 4−1 = 3." }, { "id": 39, "questionText": "Scenario: You observe one feature dominates LDA axis. Solution?", "options": [ "Remove other features", "Standardize features", "Increase number of classes", "Reduce output dimension" ], "correctAnswerIndex": 1, "explanation": "Standardization ensures all features contribute equally to discriminant axes." }, { "id": 40, "questionText": "Scenario: After LDA, misclassification occurs. Likely causes?", "options": [ "Overlapping classes, unequal covariances, or noisy data", "LDA failure", "Too few features", "Too many classes" ], "correctAnswerIndex": 0, "explanation": "Violation of LDA assumptions or overlapping class distributions can cause misclassification." }, { "id": 41, "questionText": "Scenario: You want to visualize class separation after LDA. Suggested output dimensions?", "options": [ "At most c−1 dimensions", "Number of samples", "Equal to number of features", "1D only" ], "correctAnswerIndex": 0, "explanation": "LDA can produce at most c−1 discriminant axes for c classes, suitable for visualization." }, { "id": 42, "questionText": "Scenario: LDA is applied to a dataset with overlapping covariances. What is recommended?", "options": [ "Consider QDA or kernel LDA for nonlinear separation", "Reduce dataset size", "Ignore overlapping samples", "Use PCA instead" ], "correctAnswerIndex": 0, "explanation": "QDA allows different class covariances and kernel LDA can handle nonlinear separability." }, { "id": 43, "questionText": "Scenario: Dataset has 1000 features and 4 classes. How many LDA axes?", "options": [ "1000", "2", "3", "4" ], "correctAnswerIndex": 2, "explanation": "Maximum axes = c−1 = 4−1 = 3, independent of feature count." }, { "id": 44, "questionText": "Scenario: You want LDA for classification on skewed data. Recommended preprocessing?", "options": [ "Use raw skewed data", "Remove skewed features", "Normalize or transform data to approximate Gaussian", "Reduce number of classes" ], "correctAnswerIndex": 2, "explanation": "LDA assumes Gaussian distributions; transformation improves performance." }, { "id": 45, "questionText": "Scenario: You have missing values in features. LDA requires:", "options": [ "Leave them as NaN", "Ignore affected samples in testing only", "Random replacement with 0", "Imputation or removal before applying LDA" ], "correctAnswerIndex": 3, "explanation": "Missing values must be handled because LDA computations require complete data." }, { "id": 46, "questionText": "Scenario: LDA applied to two classes with equal means. Projection result?", "options": [ "Perfect separation", "No separation possible", "Random projection", "Automatic clustering" ], "correctAnswerIndex": 1, "explanation": "If class means are identical, discriminant axis cannot separate classes." }, { "id": 47, "questionText": "Scenario: High-dimensional data with many features but few samples. Risk in LDA?", "options": [ "Singular scatter matrices and overfitting", "Perfect separation", "Automatic feature removal", "No impact" ], "correctAnswerIndex": 0, "explanation": "Few samples relative to features can make within-class scatter matrix singular, causing numerical issues." }, { "id": 48, "questionText": "Scenario: You want nonlinear class separation. Standard LDA may:", "options": [ "Work perfectly", "Fail to separate classes; consider kernel LDA", "Reduce output dimension to 1D", "Automatically create new features" ], "correctAnswerIndex": 1, "explanation": "Standard LDA is linear; kernel LDA extends it for nonlinear boundaries." }, { "id": 49, "questionText": "Scenario: Two features are highly correlated. LDA may:", "options": [ "Merge features automatically", "Fail completely", "Ignore one feature", "Produce redundant axes but still work" ], "correctAnswerIndex": 3, "explanation": "Highly correlated features may not add new information, but LDA can still compute discriminants." }, { "id": 50, "questionText": "Scenario: You apply LDA and notice one class dominates projections. Likely cause?", "options": [ "Class imbalance in sample sizes", "Output dimension too high", "Too few classes", "Too many features" ], "correctAnswerIndex": 0, "explanation": "LDA assumes equal class priors; imbalance can bias the projection toward larger classes." }, { "id": 51, "questionText": "Scenario: You have 5 classes, LDA reduces data to 4D. You want to visualize in 2D. Recommended approach?", "options": [ "Reduce classes to 2", "Use PCA instead of LDA", "Select first two discriminant axes", "Randomly select features" ], "correctAnswerIndex": 2, "explanation": "Selecting the top discriminant axes allows visualization while preserving maximum class separability." }, { "id": 52, "questionText": "Scenario: Features have different units (e.g., meters and kilograms). LDA requires:", "options": [ "Remove large-unit features", "Convert all features to meters", "Leave features as is", "Standardization to make features comparable" ], "correctAnswerIndex": 3, "explanation": "Standardization ensures all features contribute proportionally to discriminant functions." }, { "id": 53, "questionText": "Scenario: LDA misclassifies samples near class boundaries. Likely reason?", "options": [ "Overlap in class distributions", "Output dimension too high", "Too few features", "Algorithm failure" ], "correctAnswerIndex": 0, "explanation": "LDA cannot perfectly classify points in overlapping regions." }, { "id": 54, "questionText": "Scenario: LDA applied to a dataset with nonlinear boundaries. Best alternative?", "options": [ "QDA", "Kernel LDA", "PCA", "Standard LDA" ], "correctAnswerIndex": 1, "explanation": "Kernel LDA handles nonlinear separations by mapping data to higher-dimensional space." }, { "id": 55, "questionText": "Scenario: You want to classify emails as spam or not. Why LDA may be suitable?", "options": [ "It ignores class labels", "It clusters data automatically", "It reduces dimensionality and maximizes separation of two known classes", "It predicts numeric values" ], "correctAnswerIndex": 2, "explanation": "LDA is supervised and can be used for two-class classification while reducing dimensionality." }, { "id": 56, "questionText": "Scenario: You have two classes with different priors. How to handle in LDA?", "options": [ "Use class priors in discriminant function", "Standardize features only", "Ignore class priors", "Reduce number of classes" ], "correctAnswerIndex": 0, "explanation": "Incorporating class priors prevents bias toward larger classes." }, { "id": 57, "questionText": "Scenario: LDA applied after PCA pre-reduction. Advantage?", "options": [ "Randomly removes features", "Automatically adds classes", "Reduces noise and computational cost while preserving discriminative features", "Increases variance" ], "correctAnswerIndex": 2, "explanation": "PCA pre-reduction simplifies LDA computations and can improve stability on high-dimensional data." }, { "id": 58, "questionText": "Scenario: LDA misclassifies samples at class edges. Likely cause?", "options": [ "Output dimension too low", "Too many features", "Algorithm error", "Overlap in distributions or violation of assumptions" ], "correctAnswerIndex": 3, "explanation": "Misclassification occurs where distributions overlap or assumptions are violated." }, { "id": 59, "questionText": "Scenario: LDA applied to gene expression data with thousands of features. Recommended step?", "options": [ "Use raw data directly", "Reduce classes to 2", "Remove high-expression genes only", "Dimensionality reduction with PCA before LDA" ], "correctAnswerIndex": 3, "explanation": "PCA reduces high-dimensional noise and makes LDA computation feasible." }, { "id": 60, "questionText": "Scenario: LDA projections differ between runs. Likely cause?", "options": [ "Random initialization in eigen decomposition", "Features not scaled", "Algorithm failure", "Too few classes" ], "correctAnswerIndex": 0, "explanation": "Random numerical processes in LDA can lead to slightly different projections across runs." }, { "id": 61, "questionText": "Scenario: Two features perfectly correlated. LDA result?", "options": [ "Cannot compute projection", "Redundant axis; still works", "Algorithm fails", "Removes one feature automatically" ], "correctAnswerIndex": 1, "explanation": "Perfect correlation introduces redundancy but LDA can still compute the discriminant axis." }, { "id": 62, "questionText": "Scenario: Class distributions are heavily skewed. LDA assumption?", "options": [ "Automatic transformation occurs", "Skewed data improves separation", "LDA ignores distribution", "Assumes Gaussian distributions; skewness can reduce accuracy" ], "correctAnswerIndex": 3, "explanation": "Skewed distributions violate the Gaussian assumption, potentially reducing LDA effectiveness." }, { "id": 63, "questionText": "Scenario: LDA output used for visualization. How to select axes?", "options": [ "Top discriminant axes based on eigenvalues", "All features", "First features only", "Random axes" ], "correctAnswerIndex": 0, "explanation": "Eigenvalues indicate discriminative power; top axes preserve maximum class separation." }, { "id": 64, "questionText": "Scenario: LDA applied on imbalanced dataset. How to improve performance?", "options": [ "Remove smaller classes", "Ignore imbalance", "Use class priors or resample data", "Reduce output dimensions" ], "correctAnswerIndex": 2, "explanation": "Incorporating priors or balancing data prevents bias toward larger classes." }, { "id": 65, "questionText": "Scenario: LDA applied to text data (TF-IDF vectors). Recommended preprocessing?", "options": [ "Use raw counts without scaling", "Remove class labels", "Feature scaling or normalization", "Randomly select words" ], "correctAnswerIndex": 2, "explanation": "Scaling ensures different term frequencies contribute proportionally to discriminant function." }, { "id": 66, "questionText": "Scenario: LDA for multi-class image classification. Which is true?", "options": [ "LDA increases feature dimensions", "Maximum axes = c−1; use top axes for visualization or classifier", "Automatically converts to binary classes", "Does not require labels" ], "correctAnswerIndex": 1, "explanation": "LDA produces at most c−1 axes; top axes can be used for classification or visualization." }, { "id": 67, "questionText": "Scenario: After LDA, two classes overlap. Which action helps?", "options": [ "Reduce output dimensions", "Check assumptions, consider kernel LDA or QDA", "Remove overlapping points", "Use PCA only" ], "correctAnswerIndex": 1, "explanation": "Violations of LDA assumptions may require alternative methods like kernel LDA or QDA." }, { "id": 68, "questionText": "Scenario: LDA applied to numeric data only. Why?", "options": [ "Categorical data works directly", "Algorithm ignores numeric values", "LDA requires numeric input for linear combination computation", "LDA converts numeric to categorical automatically" ], "correctAnswerIndex": 2, "explanation": "Linear combinations require numeric values; categorical features must be encoded numerically." }, { "id": 69, "questionText": "Scenario: LDA applied to overlapping classes. Performance metric?", "options": [ "Scatter plot only", "Variance only", "Eigenvalues only", "Classification accuracy, confusion matrix, or F1-score" ], "correctAnswerIndex": 3, "explanation": "Use standard classification metrics to evaluate LDA performance on overlapping classes." }, { "id": 70, "questionText": "Scenario: LDA applied on high-dimensional dataset with noisy features. Suggested approach?", "options": [ "Increase output dimensions", "Apply PCA before LDA to reduce noise and dimensionality", "Use raw data directly", "Remove labels" ], "correctAnswerIndex": 1, "explanation": "PCA helps reduce noise and computational complexity before applying LDA on high-dimensional data." }, { "id": 71, "questionText": "Scenario: You apply LDA on a dataset with 5 classes, but class 4 has only 2 samples. Likely issue?", "options": [ "Within-class scatter matrix may become singular, causing numerical instability", "LDA perfectly separates all classes", "Algorithm will automatically remove the class", "No impact since LDA ignores class size" ], "correctAnswerIndex": 0, "explanation": "Very few samples in a class can make the within-class scatter matrix singular, leading to computation problems." }, { "id": 72, "questionText": "Scenario: After LDA, two classes are misclassified even though they have distinct means. Possible reason?", "options": [ "Algorithm failed randomly", "Too many features", "Overlap in covariance structure violates LDA assumptions", "Output dimensions too high" ], "correctAnswerIndex": 2, "explanation": "Distinct means alone do not guarantee separation; LDA assumes equal covariance matrices across classes." }, { "id": 73, "questionText": "Scenario: You want to classify high-dimensional gene expression data with 3 classes. LDA fails. Recommended approach?", "options": [ "Use LDA directly without preprocessing", "Reduce the number of classes to 2", "Apply PCA first to reduce dimensionality, then LDA", "Remove high-variance genes only" ], "correctAnswerIndex": 2, "explanation": "High-dimensional data can lead to singular matrices; PCA reduces dimensionality and noise, stabilizing LDA." }, { "id": 74, "questionText": "Scenario: Two classes with similar means but different covariance. LDA vs QDA?", "options": [ "LDA is better since it assumes equal covariance", "Both perform equally", "QDA will perform better as it allows class-specific covariances", "Neither works" ], "correctAnswerIndex": 2, "explanation": "QDA handles unequal covariances, whereas LDA assumes equality, which may cause misclassification." }, { "id": 75, "questionText": "Scenario: You apply LDA to image data and notice one axis dominates classification. Likely cause?", "options": [ "Feature variance imbalance; need normalization", "Too many classes", "Algorithm failure", "Output dimension too low" ], "correctAnswerIndex": 0, "explanation": "Features with larger variance dominate projections if data is not scaled." }, { "id": 76, "questionText": "Scenario: Applying LDA to text classification with sparse TF-IDF features. Recommended preprocessing?", "options": [ "Dimensionality reduction (PCA or SVD) before LDA", "Use raw sparse data", "Remove class labels", "Randomly sample features" ], "correctAnswerIndex": 0, "explanation": "Sparse high-dimensional data can cause numerical instability; PCA or SVD reduces dimensions before LDA." }, { "id": 77, "questionText": "Scenario: After LDA, some minority class samples are misclassified. How to improve?", "options": [ "Ignore minority class", "Increase output dimensions", "Use class priors or resampling techniques", "Randomly merge classes" ], "correctAnswerIndex": 2, "explanation": "Incorporating class priors or oversampling minority class balances the discriminant function." }, { "id": 78, "questionText": "Scenario: Two classes perfectly linearly separable. How does LDA behave?", "options": [ "Cannot compute scatter matrices", "Fails since overlap is zero", "Finds the optimal linear projection maximizing separation", "Randomly assigns projections" ], "correctAnswerIndex": 2, "explanation": "LDA works best when linear separation exists; it identifies a projection that maximizes separation." }, { "id": 79, "questionText": "Scenario: You have four classes and high feature correlation. LDA produces redundant axes. Solution?", "options": [ "Use raw features", "Apply PCA before LDA to remove redundancy", "Randomly remove features", "Reduce number of classes" ], "correctAnswerIndex": 1, "explanation": "PCA can decorrelate features, reducing redundancy and improving LDA projections." }, { "id": 80, "questionText": "Scenario: LDA applied to dataset with skewed distributions. Result?", "options": [ "Reduced accuracy due to violated Gaussian assumption", "Perfect classification", "Automatic feature scaling", "Increased dimensionality" ], "correctAnswerIndex": 0, "explanation": "LDA assumes normal distribution; skewness violates this, affecting performance." }, { "id": 81, "questionText": "Scenario: LDA applied to two classes, but one feature is categorical with three levels. How to proceed?", "options": [ "Use raw categorical values", "Ignore the categorical feature", "Encode categorical feature numerically before LDA", "Remove class labels" ], "correctAnswerIndex": 2, "explanation": "Categorical features must be numerically encoded to compute linear combinations in LDA." }, { "id": 82, "questionText": "Scenario: Applying LDA to imbalanced dataset causes bias toward majority class. Fix?", "options": [ "Increase output dimensions", "Reduce features", "Use class priors or resample minority classes", "Ignore imbalance" ], "correctAnswerIndex": 2, "explanation": "Adjusting priors or balancing data prevents biased projections." }, { "id": 83, "questionText": "Scenario: You want nonlinear boundaries between classes. Standard LDA?", "options": [ "Fails; consider kernel LDA or QDA", "Reduces dimensions automatically", "Performs perfectly", "Removes overlapping points" ], "correctAnswerIndex": 0, "explanation": "Standard LDA is linear; kernel LDA extends it to nonlinear separations." }, { "id": 84, "questionText": "Scenario: After LDA, eigenvalues of some discriminant axes are near zero. Interpretation?", "options": [ "Remove features randomly", "Axis contributes little to class separation", "Algorithm failure", "Data has missing values" ], "correctAnswerIndex": 1, "explanation": "Low eigenvalue axes have low discriminative power and may be ignored." }, { "id": 85, "questionText": "Scenario: You have 10 classes and 1000 features. LDA reduces to 9D. You want 2D visualization. How?", "options": [ "Randomly select 2 axes", "Use PCA only", "Reduce classes to 2", "Select top 2 discriminant axes based on eigenvalues" ], "correctAnswerIndex": 3, "explanation": "Top eigenvalue axes preserve maximum class separation in reduced dimensions." }, { "id": 86, "questionText": "Scenario: LDA misclassifies boundary samples consistently. Cause?", "options": [ "Output dimension too high", "Too many features", "Overlap in class distributions", "Algorithm error" ], "correctAnswerIndex": 2, "explanation": "Misclassification occurs where class distributions overlap." }, { "id": 87, "questionText": "Scenario: High-dimensional LDA suffers from singular within-class scatter matrix. Solution?", "options": [ "Increase sample size only", "Apply PCA or regularization before LDA", "Remove features randomly", "Reduce number of classes" ], "correctAnswerIndex": 1, "explanation": "Dimensionality reduction or regularization stabilizes matrix inversion." }, { "id": 88, "questionText": "Scenario: After LDA, two classes overlap in projection. Next step?", "options": [ "Ignore problem", "Increase output dimension", "Remove overlapping points", "Check assumptions, consider kernel LDA or QDA" ], "correctAnswerIndex": 3, "explanation": "Alternative methods handle unequal covariance or nonlinear separability." }, { "id": 89, "questionText": "Scenario: LDA applied to numeric and binary features. Action?", "options": [ "Remove numeric features", "Apply PCA only", "Ignore binary features", "Standardize numeric features, encode binary features numerically" ], "correctAnswerIndex": 3, "explanation": "All features must be numeric to compute linear discriminant functions." }, { "id": 90, "questionText": "Scenario: LDA applied to a 3-class dataset. Number of discriminant axes?", "options": [ "Depends on features", "3", "2", "1" ], "correctAnswerIndex": 2, "explanation": "Maximum axes = c−1 = 3−1 = 2." }, { "id": 91, "questionText": "Scenario: LDA applied with noisy features. Best practice?", "options": [ "Use raw features", "Remove minority classes", "Increase output dimension", "Apply PCA or feature selection before LDA" ], "correctAnswerIndex": 3, "explanation": "Reducing noise and dimensionality improves LDA stability." }, { "id": 92, "questionText": "Scenario: Two classes with identical covariances and means. LDA outcome?", "options": [ "Perfect separation", "Cannot separate classes", "Random projections", "Removes features automatically" ], "correctAnswerIndex": 1, "explanation": "No difference in mean or covariance means LDA has no discriminative power." }, { "id": 93, "questionText": "Scenario: LDA applied on imbalanced dataset with rare class. How to handle?", "options": [ "Ignore imbalance", "Remove majority class", "Reduce number of classes", "Use class priors or oversample rare class" ], "correctAnswerIndex": 3, "explanation": "Balancing class sizes prevents biased discriminant functions." }, { "id": 94, "questionText": "Scenario: After LDA, projections of classes are not aligned with original features. Why?", "options": [ "Normalization failed", "Discriminant axes are linear combinations, not original features", "Algorithm error", "Random initialization" ], "correctAnswerIndex": 1, "explanation": "LDA axes are combinations of features to maximize separability." }, { "id": 95, "questionText": "Scenario: LDA applied to high-dimensional text dataset. Why PCA first?", "options": [ "Convert text to binary", "Remove labels", "Increase class separation automatically", "Reduce noise and dimensionality, improve numerical stability" ], "correctAnswerIndex": 3, "explanation": "High-dimensional sparse data can cause singular matrices; PCA helps." }, { "id": 96, "questionText": "Scenario: LDA applied to two classes with unequal priors. How to proceed?", "options": [ "Ignore priors", "Remove minority class", "Reduce output dimensions", "Incorporate priors in discriminant function" ], "correctAnswerIndex": 3, "explanation": "Incorporating priors prevents bias toward larger class." }, { "id": 97, "questionText": "Scenario: You want to visualize 5-class dataset in 2D using LDA. Max axes?", "options": [ "4 axes; select top 2 for visualization", "Depends on features", "5 axes", "2 axes only" ], "correctAnswerIndex": 0, "explanation": "Maximum axes = c−1 = 5−1 = 4; top 2 axes can be used for visualization." }, { "id": 98, "questionText": "Scenario: LDA misclassifies samples near overlapping region. Best evaluation metric?", "options": [ "Eigenvalue magnitude", "Scatter plot", "Variance only", "Confusion matrix, precision, recall, or F1-score" ], "correctAnswerIndex": 3, "explanation": "Classification metrics are needed to evaluate performance on overlapping regions." }, { "id": 99, "questionText": "Scenario: LDA applied on dataset with outliers. Recommended step?", "options": [ "Ignore outliers", "Increase number of classes", "Reduce output dimensions", "Detect and remove or transform outliers before LDA" ], "correctAnswerIndex": 3, "explanation": "Outliers can distort mean and covariance, affecting discriminant functions." }, { "id": 100, "questionText": "Scenario: LDA applied to 3-class dataset with 50 features. Within-class scatter matrix is singular. Cause?", "options": [ "Features too independent", "Number of features > number of samples per class", "Output dimension too high", "Algorithm error" ], "correctAnswerIndex": 1, "explanation": "When features exceed sample count, the within-class scatter matrix becomes singular, requiring dimensionality reduction or regularization." } ] }