| { | |
| "title": "K-Nearest Neighbors (KNN) Mastery: 100 MCQs", | |
| "description": "A comprehensive set of 100 multiple-choice questions focused entirely on K-Nearest Neighbors (KNN) — covering intuition, distance metrics, hyperparameter tuning, classification & regression behavior, curse of dimensionality, and real-world use cases.", | |
| "questions": [ | |
| { | |
| "id": 1, | |
| "questionText": "What is the core principle behind the KNN algorithm?", | |
| "options": [ | |
| "It builds a decision tree and splits data recursively.", | |
| "It constructs a probabilistic model using Bayes theorem.", | |
| "It predicts the label based on the majority class of k nearest data points.", | |
| "It reduces dimensionality using PCA." | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "KNN predicts the class based on voting from the k nearest neighbors in the training data." | |
| }, | |
| { | |
| "id": 2, | |
| "questionText": "KNN is considered which type of learning algorithm?", | |
| "options": [ | |
| "Eager learning", | |
| "Reinforcement learning", | |
| "Unsupervised learning", | |
| "Lazy learning" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "KNN is a lazy learner because it does not build a model during training; it only stores the data." | |
| }, | |
| { | |
| "id": 3, | |
| "questionText": "Which distance metric is most commonly used in KNN?", | |
| "options": [ | |
| "Cosine Similarity", | |
| "Manhattan Distance", | |
| "Jaccard Distance", | |
| "Euclidean Distance" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Euclidean Distance (L2 norm) is the most common distance metric used in KNN." | |
| }, | |
| { | |
| "id": 4, | |
| "questionText": "KNN is mainly used for:", | |
| "options": [ | |
| "Only regression", | |
| "Only clustering", | |
| "Both classification and regression", | |
| "Only classification" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "KNN can perform both classification (class votes) and regression (mean of k nearest values)." | |
| }, | |
| { | |
| "id": 5, | |
| "questionText": "What happens if k is set to a very large value?", | |
| "options": [ | |
| "The model becomes faster and more accurate always.", | |
| "The model becomes overly generalized and biased.", | |
| "The model becomes highly sensitive to noise.", | |
| "The model becomes very overfitted." | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "A very large k considers too many neighbors and may smooth out genuine class boundaries, causing high bias." | |
| }, | |
| { | |
| "id": 6, | |
| "questionText": "In KNN, what does the parameter 'k' represent?", | |
| "options": [ | |
| "Number of features in the dataset", | |
| "Depth of the tree used internally", | |
| "Number of nearest neighbors considered", | |
| "Learning rate of the algorithm" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "'k' is the number of closest neighbors used to decide the predicted class or value." | |
| }, | |
| { | |
| "id": 7, | |
| "questionText": "Which of the following is true about KNN training phase?", | |
| "options": [ | |
| "It stores all training data without building a model", | |
| "It builds a model by computing centroids", | |
| "It generates decision boundaries explicitly", | |
| "It calculates feature importance scores" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "KNN is a lazy learner; during training, it only stores the dataset for use at prediction time." | |
| }, | |
| { | |
| "id": 8, | |
| "questionText": "Which KNN variant can handle weighted voting?", | |
| "options": [ | |
| "Uniform KNN", | |
| "Decision Tree", | |
| "Random Forest", | |
| "Weighted KNN" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Weighted KNN gives closer neighbors higher influence while predicting the output." | |
| }, | |
| { | |
| "id": 9, | |
| "questionText": "Which of the following affects KNN performance the most?", | |
| "options": [ | |
| "Choice of distance metric", | |
| "Activation function", | |
| "Regularization parameter", | |
| "Number of epochs" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "KNN relies on distance computations; the choice of distance metric (Euclidean, Manhattan, etc.) is critical." | |
| }, | |
| { | |
| "id": 10, | |
| "questionText": "What is the default distance metric for most KNN implementations?", | |
| "options": [ | |
| "Cosine similarity", | |
| "Manhattan distance", | |
| "Hamming distance", | |
| "Euclidean distance" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Euclidean distance is most commonly used by default in KNN implementations." | |
| }, | |
| { | |
| "id": 11, | |
| "questionText": "How does KNN handle a new data point for prediction?", | |
| "options": [ | |
| "It updates its model parameters", | |
| "It finds k closest points in the training set and predicts based on them", | |
| "It generates random prediction", | |
| "It builds a regression line through neighbors" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "KNN predicts by looking at the nearest k training points and using majority vote (classification) or average (regression)." | |
| }, | |
| { | |
| "id": 12, | |
| "questionText": "What is the main drawback of KNN on large datasets?", | |
| "options": [ | |
| "Does not scale to many classes", | |
| "High training time", | |
| "Cannot handle missing values", | |
| "High prediction time" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "KNN stores all training data, so prediction involves computing distances to all points, which is slow for large datasets." | |
| }, | |
| { | |
| "id": 13, | |
| "questionText": "Which of the following is true about KNN and normalization?", | |
| "options": [ | |
| "Normalization is not required", | |
| "Normalization only applies to categorical data", | |
| "Normalization changes class labels", | |
| "Normalization improves distance-based predictions" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Since KNN uses distances, features with larger scales can dominate. Normalization ensures fair contribution from all features." | |
| }, | |
| { | |
| "id": 14, | |
| "questionText": "How does KNN behave in the presence of irrelevant features?", | |
| "options": [ | |
| "Features are automatically ignored", | |
| "Performance improves", | |
| "Performance drops", | |
| "Algorithm ignores them during prediction" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Irrelevant features can distort distance calculations and reduce KNN prediction accuracy." | |
| }, | |
| { | |
| "id": 15, | |
| "questionText": "What type of algorithm is KNN considered in terms of model structure?", | |
| "options": [ | |
| "Non-parametric", | |
| "Linear", | |
| "Probabilistic", | |
| "Parametric" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "KNN is non-parametric because it does not assume a predefined form for the function mapping inputs to outputs." | |
| }, | |
| { | |
| "id": 16, | |
| "questionText": "Which K value is generally recommended to avoid overfitting in KNN?", | |
| "options": [ | |
| "k moderate value like sqrt(n)", | |
| "k = 1", | |
| "k very small", | |
| "k equal to dataset size" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "A moderate k like sqrt(n) balances bias and variance, preventing overfitting." | |
| }, | |
| { | |
| "id": 17, | |
| "questionText": "Which metric is suitable for categorical variables in KNN?", | |
| "options": [ | |
| "Minkowski distance", | |
| "Manhattan distance", | |
| "Euclidean distance", | |
| "Hamming distance" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Hamming distance counts mismatches between categorical feature values." | |
| }, | |
| { | |
| "id": 18, | |
| "questionText": "Which of the following is NOT a type of KNN?", | |
| "options": [ | |
| "Weighted KNN", | |
| "Regression KNN", | |
| "Decision KNN", | |
| "Classification KNN" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "There is no 'Decision KNN'; KNN is mainly classification, regression, or weighted variant." | |
| }, | |
| { | |
| "id": 19, | |
| "questionText": "What is the effect of having two classes with very imbalanced sizes in KNN?", | |
| "options": [ | |
| "Minority class dominates predictions", | |
| "Majority class dominates predictions", | |
| "KNN automatically balances classes", | |
| "Minor impact on accuracy" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "KNN predictions are influenced by majority neighbors; imbalanced classes may bias the results." | |
| }, | |
| { | |
| "id": 20, | |
| "questionText": "What is the primary storage requirement for KNN?", | |
| "options": [ | |
| "Feature coefficients", | |
| "All training data points", | |
| "Decision thresholds", | |
| "Distance matrices precomputed" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "KNN requires storing all training data for distance comparisons at prediction time." | |
| }, | |
| { | |
| "id": 21, | |
| "questionText": "What does the term 'curse of dimensionality' refer to in KNN?", | |
| "options": [ | |
| "Overfitting in small datasets", | |
| "High computation time with too many neighbors", | |
| "Distances become less meaningful in high dimensions", | |
| "Underfitting in large datasets" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "As dimensions increase, data points become sparse and distance measures lose effectiveness, reducing KNN performance." | |
| }, | |
| { | |
| "id": 22, | |
| "questionText": "Which technique can speed up KNN on large datasets?", | |
| "options": [ | |
| "KD-Trees or Ball-Trees", | |
| "Using logistic regression instead", | |
| "Principal Component Analysis", | |
| "Random Forest preprocessing" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "KD-Trees and Ball-Trees organize data to quickly find nearest neighbors without computing all distances." | |
| }, | |
| { | |
| "id": 23, | |
| "questionText": "In KNN regression, how is the predicted value calculated?", | |
| "options": [ | |
| "Using linear regression on neighbors", | |
| "Using gradient descent", | |
| "Majority vote of nearest neighbors", | |
| "Average of nearest neighbors’ values" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "KNN regression predicts by taking the mean (or sometimes weighted mean) of the k nearest neighbors' values." | |
| }, | |
| { | |
| "id": 24, | |
| "questionText": "Which of the following is true about KNN decision boundary?", | |
| "options": [ | |
| "Always axis-aligned", | |
| "Always linear", | |
| "Depends on data distribution", | |
| "Always circular" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "KNN decision boundaries can be irregular and follow the shape of data; they are not restricted to linear forms." | |
| }, | |
| { | |
| "id": 25, | |
| "questionText": "Which method can improve KNN on high-dimensional data?", | |
| "options": [ | |
| "Increasing k to dataset size", | |
| "Feature selection", | |
| "Ignoring normalization", | |
| "Adding more neighbors" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Selecting relevant features reduces dimensionality, improving distance calculation reliability." | |
| }, | |
| { | |
| "id": 26, | |
| "questionText": "KNN cannot handle which of the following natively?", | |
| "options": [ | |
| "Large datasets efficiently", | |
| "Numeric features", | |
| "Categorical features", | |
| "Missing data directly" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "KNN cannot handle missing values without preprocessing (imputation or removal)." | |
| }, | |
| { | |
| "id": 27, | |
| "questionText": "How does KNN handle ties in classification voting?", | |
| "options": [ | |
| "Chooses randomly among tied classes", | |
| "Fails with an error", | |
| "Chooses the closest neighbor's class", | |
| "Always chooses class 0" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Many implementations break ties by selecting the class of the closest neighbor among the tied classes." | |
| }, | |
| { | |
| "id": 28, | |
| "questionText": "Which scenario would make KNN less suitable?", | |
| "options": [ | |
| "Low-dimensional small datasets", | |
| "High-dimensional large datasets", | |
| "Well-separated clusters", | |
| "Binary classification" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "In high-dimensional large datasets, KNN is slow and distances lose meaning, reducing accuracy." | |
| }, | |
| { | |
| "id": 29, | |
| "questionText": "What is the time complexity of a naive KNN prediction with n training points?", | |
| "options": [ | |
| "O(n^2)", | |
| "O(1)", | |
| "O(log n)", | |
| "O(n)" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Naive KNN computes distances to all n points for each prediction, giving O(n) complexity." | |
| }, | |
| { | |
| "id": 30, | |
| "questionText": "What preprocessing step can improve KNN accuracy?", | |
| "options": [ | |
| "Adding irrelevant features", | |
| "Removing the dependent variable", | |
| "Randomly shuffling the data", | |
| "Scaling features to similar range" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Scaling features ensures fair distance computation, preventing one feature from dominating due to larger numeric range." | |
| }, | |
| { | |
| "id": 31, | |
| "questionText": "What is the effect of increasing 'k' in KNN classification?", | |
| "options": [ | |
| "Decreases bias", | |
| "Reduces overfitting", | |
| "Increases sensitivity to noise", | |
| "Increases model variance" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "A larger k smooths out predictions, reducing overfitting and variance but increasing bias." | |
| }, | |
| { | |
| "id": 32, | |
| "questionText": "Which distance metric can be more robust to outliers in KNN?", | |
| "options": [ | |
| "Cosine similarity", | |
| "Minkowski distance", | |
| "Manhattan distance", | |
| "Euclidean distance" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Manhattan distance is less sensitive to large deviations in individual features than Euclidean distance." | |
| }, | |
| { | |
| "id": 33, | |
| "questionText": "How can KNN be modified for imbalanced datasets?", | |
| "options": [ | |
| "Use weighted voting based on distance", | |
| "Increase k to dataset size", | |
| "Normalize features only", | |
| "Remove minority class samples" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Weighted voting gives closer neighbors more influence, reducing bias toward the majority class." | |
| }, | |
| { | |
| "id": 34, | |
| "questionText": "Which method can reduce KNN prediction time for large datasets?", | |
| "options": [ | |
| "Dimensionality reduction like PCA", | |
| "Using random shuffling", | |
| "Increasing k", | |
| "Adding more features" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Reducing the number of features with PCA lowers dimensionality, which speeds up distance computation." | |
| }, | |
| { | |
| "id": 35, | |
| "questionText": "Why might KNN fail in very high-dimensional spaces?", | |
| "options": [ | |
| "Overfitting to majority class", | |
| "Random initialization", | |
| "Learning rate too high", | |
| "Curse of dimensionality" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "In high dimensions, points become equidistant and neighbors are less meaningful, reducing accuracy." | |
| }, | |
| { | |
| "id": 36, | |
| "questionText": "What does weighted KNN regression use instead of simple averaging?", | |
| "options": [ | |
| "Distance-based weighting of neighbors", | |
| "Median value of neighbors", | |
| "Majority vote of neighbors", | |
| "Random selection of neighbors" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Weighted KNN regression assigns higher weights to closer neighbors when computing the predicted value." | |
| }, | |
| { | |
| "id": 37, | |
| "questionText": "Which technique is useful to handle categorical and numeric features together in KNN?", | |
| "options": [ | |
| "Ignore numeric features", | |
| "Convert categorical to numeric with one-hot encoding", | |
| "Normalize categorical features only", | |
| "Use majority voting only" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "One-hot encoding transforms categorical features to numeric so that distance metrics can be applied." | |
| }, | |
| { | |
| "id": 38, | |
| "questionText": "In KNN, what is the effect of noisy features?", | |
| "options": [ | |
| "Does not affect performance", | |
| "Automatically removed", | |
| "Reduces accuracy", | |
| "Improves accuracy" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Noisy features distort distance calculations, reducing prediction accuracy." | |
| }, | |
| { | |
| "id": 39, | |
| "questionText": "Which of the following can help KNN generalize better?", | |
| "options": [ | |
| "Adding more irrelevant features", | |
| "Reducing k to 1", | |
| "Feature scaling and selection", | |
| "Increasing dataset size without preprocessing" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Scaling ensures fair distance comparison, and selecting relevant features removes noise, improving generalization." | |
| }, | |
| { | |
| "id": 40, | |
| "questionText": "What happens if k is even and there is a tie in classification?", | |
| "options": [ | |
| "Prediction fails with error", | |
| "Tie-breaking strategy is needed", | |
| "Algorithm automatically increments k", | |
| "Randomly ignores the new point" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "When k is even, ties may occur; most implementations have a tie-breaking rule like choosing the closest neighbor." | |
| }, | |
| { | |
| "id": 41, | |
| "questionText": "Which preprocessing step can improve KNN on text data represented by TF-IDF vectors?", | |
| "options": [ | |
| "L2 normalization", | |
| "Random shuffling", | |
| "Adding more terms", | |
| "Stop-word removal only" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "L2 normalization ensures vectors are comparable in distance calculations for KNN." | |
| }, | |
| { | |
| "id": 42, | |
| "questionText": "Which of the following affects KNN accuracy most in practice?", | |
| "options": [ | |
| "Learning rate", | |
| "Random seed only", | |
| "Distance metric and k", | |
| "Number of trees" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Choice of k and distance metric strongly influence KNN performance." | |
| }, | |
| { | |
| "id": 43, | |
| "questionText": "In KNN regression, how can you reduce the impact of outliers?", | |
| "options": [ | |
| "Use simple mean without weighting", | |
| "Increase k to dataset size", | |
| "Ignore preprocessing", | |
| "Use weighted averaging based on distance" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Weighting closer neighbors more heavily reduces the effect of distant outliers." | |
| }, | |
| { | |
| "id": 44, | |
| "questionText": "Which approach can make KNN faster on large datasets?", | |
| "options": [ | |
| "Increase k to max", | |
| "Add random noise to data", | |
| "KD-Tree, Ball-Tree, or approximate nearest neighbor search", | |
| "Use high-dimensional features" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Tree-based or approximate search structures reduce distance computations needed for prediction." | |
| }, | |
| { | |
| "id": 45, | |
| "questionText": "How does KNN handle multi-class classification?", | |
| "options": [ | |
| "By majority vote among neighbors", | |
| "Cannot handle multi-class", | |
| "By training separate binary classifiers", | |
| "Only predicts top two classes" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "KNN counts votes among k neighbors for all classes and selects the class with the highest votes." | |
| }, | |
| { | |
| "id": 46, | |
| "questionText": "Which distance metric is suitable for high-dimensional sparse data?", | |
| "options": [ | |
| "Manhattan distance", | |
| "Euclidean distance", | |
| "Cosine similarity", | |
| "Hamming distance" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Cosine similarity works better for high-dimensional sparse vectors like TF-IDF representations." | |
| }, | |
| { | |
| "id": 47, | |
| "questionText": "What happens to KNN performance if features are not scaled?", | |
| "options": [ | |
| "Dominated by features with larger scales", | |
| "Performance improves automatically", | |
| "Distance calculation is unaffected", | |
| "Accuracy remains same always" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Features with larger numeric ranges dominate distance computation, skewing predictions." | |
| }, | |
| { | |
| "id": 48, | |
| "questionText": "How can KNN be adapted for regression with categorical features?", | |
| "options": [ | |
| "Encode categories numerically or use mixed distance metric", | |
| "Use Euclidean distance directly", | |
| "Remove categorical features", | |
| "Only predict the most frequent category" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Encoding categorical features allows KNN to compute distances effectively for regression tasks." | |
| }, | |
| { | |
| "id": 49, | |
| "questionText": "What is one common method to select an optimal k?", | |
| "options": [ | |
| "Maximizing feature count", | |
| "Using k=1 always", | |
| "Random selection", | |
| "Cross-validation" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Cross-validation evaluates different k values to choose the one yielding best performance." | |
| }, | |
| { | |
| "id": 50, | |
| "questionText": "Which factor can lead to overfitting in KNN?", | |
| "options": [ | |
| "Using fewer neighbors", | |
| "Too small k value", | |
| "Scaling features", | |
| "Using weighted distance" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "A very small k (like k=1) can fit to noise and outliers, causing overfitting." | |
| }, | |
| { | |
| "id": 51, | |
| "questionText": "In KNN, what is an advantage of using odd k values in binary classification?", | |
| "options": [ | |
| "Avoid ties in voting", | |
| "Reduce distance calculations", | |
| "Increase speed", | |
| "Improve scaling automatically" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Odd k values help prevent ties between two classes." | |
| }, | |
| { | |
| "id": 52, | |
| "questionText": "Which type of feature transformation is recommended for KNN?", | |
| "options": [ | |
| "Adding irrelevant features", | |
| "One-hot encoding only for numeric data", | |
| "Normalization or standardization", | |
| "Random shuffling of features" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Normalization ensures fair contribution of each feature to distance calculation." | |
| }, | |
| { | |
| "id": 53, | |
| "questionText": "Which of the following reduces KNN sensitivity to outliers?", | |
| "options": [ | |
| "Increase k to 1", | |
| "Use Euclidean distance only", | |
| "Remove normalization", | |
| "Weighted distance averaging" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Weighting neighbors based on distance gives closer points more influence, reducing outlier impact." | |
| }, | |
| { | |
| "id": 54, | |
| "questionText": "In KNN, what is the effect of adding irrelevant features?", | |
| "options": [ | |
| "Automatically removed", | |
| "Decreases accuracy", | |
| "Increases accuracy", | |
| "No effect" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Irrelevant features distort distance calculations, reducing prediction accuracy." | |
| }, | |
| { | |
| "id": 55, | |
| "questionText": "Which method can improve KNN performance in sparse datasets?", | |
| "options": [ | |
| "Ignore distance weighting", | |
| "Add noise to features", | |
| "Dimensionality reduction", | |
| "Increase k to dataset size" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Reducing dimensionality can make distance computations more meaningful in sparse datasets." | |
| }, | |
| { | |
| "id": 56, | |
| "questionText": "Which approach helps handle large-scale KNN efficiently?", | |
| "options": [ | |
| "Increasing k arbitrarily", | |
| "Scaling only", | |
| "Approximate nearest neighbor search", | |
| "Random shuffling" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Approximate nearest neighbor search reduces computational cost while giving nearly correct neighbors." | |
| }, | |
| { | |
| "id": 57, | |
| "questionText": "Which of the following is true for KNN regression prediction?", | |
| "options": [ | |
| "Weighted average based on neighbor distance", | |
| "Average of nearest neighbors’ values", | |
| "None of the above", | |
| "Both A and B" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "KNN regression can use simple or weighted averaging of neighbors’ values." | |
| }, | |
| { | |
| "id": 58, | |
| "questionText": "Which is a practical drawback of KNN in real-world systems?", | |
| "options": [ | |
| "Requires model training", | |
| "High prediction latency", | |
| "Automatically ignores irrelevant features", | |
| "Cannot handle numeric data" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "KNN computes distances at prediction time, leading to high latency for large datasets." | |
| }, | |
| { | |
| "id": 59, | |
| "questionText": "Which type of scaling preserves relative distances between points for KNN?", | |
| "options": [ | |
| "Min-Max scaling", | |
| "Log transformation only", | |
| "Adding random noise", | |
| "Shuffling features" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Min-Max or standardization scales features to similar ranges while preserving relative distances." | |
| }, | |
| { | |
| "id": 60, | |
| "questionText": "Which is a disadvantage of KNN compared to parametric models?", | |
| "options": [ | |
| "Requires fixed training", | |
| "Slower predictions for large datasets", | |
| "Cannot model non-linear boundaries", | |
| "Sensitive to overfitting only" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "KNN stores all training data and computes distances, making predictions slower than parametric models." | |
| }, | |
| { | |
| "id": 61, | |
| "questionText": "How can KNN handle multi-label classification?", | |
| "options": [ | |
| "Uses separate KNN per label", | |
| "Cannot handle multi-label", | |
| "Predict all labels present in neighbors", | |
| "Only predicts one label" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "KNN can aggregate labels from neighbors and predict multiple labels per instance." | |
| }, | |
| { | |
| "id": 62, | |
| "questionText": "Which distance metric can handle mixed numeric and categorical data?", | |
| "options": [ | |
| "Gower distance", | |
| "Euclidean distance", | |
| "Cosine similarity", | |
| "Manhattan distance" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Gower distance can compute similarity for mixed numeric and categorical features." | |
| }, | |
| { | |
| "id": 63, | |
| "questionText": "What is one way to reduce memory usage in KNN for large datasets?", | |
| "options": [ | |
| "Use condensed nearest neighbor algorithms", | |
| "Ignore irrelevant features", | |
| "Increase k to dataset size", | |
| "Normalize only" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Condensed nearest neighbor algorithms reduce stored points while maintaining accuracy." | |
| }, | |
| { | |
| "id": 64, | |
| "questionText": "Which approach helps improve KNN in imbalanced datasets?", | |
| "options": [ | |
| "Increase irrelevant features", | |
| "Use k=1 always", | |
| "Distance-weighted voting", | |
| "Ignore normalization" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Weighted voting gives closer points more influence, reducing bias toward majority class." | |
| }, | |
| { | |
| "id": 65, | |
| "questionText": "What is the effect of increasing feature dimensionality in KNN?", | |
| "options": [ | |
| "Computation decreases", | |
| "Feature importance is automatically computed", | |
| "Accuracy always improves", | |
| "Distances become less meaningful" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "High-dimensional spaces make points almost equidistant, reducing KNN effectiveness." | |
| }, | |
| { | |
| "id": 66, | |
| "questionText": "Which scenario can cause KNN to misclassify a data point?", | |
| "options": [ | |
| "Choosing odd k", | |
| "Using weighted voting", | |
| "Nearby points from other class dominate", | |
| "Normalization applied" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "If neighbors are closer from other classes, KNN may predict incorrectly." | |
| }, | |
| { | |
| "id": 67, | |
| "questionText": "Which strategy can improve KNN with very sparse datasets?", | |
| "options": [ | |
| "Add random features", | |
| "Ignore distance metric", | |
| "Dimensionality reduction", | |
| "Increase k arbitrarily" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Reducing dimensionality reduces sparsity and makes distances meaningful." | |
| }, | |
| { | |
| "id": 68, | |
| "questionText": "What is a good rule of thumb for selecting k?", | |
| "options": [ | |
| "k = 1 always", | |
| "k = n/2", | |
| "k = sqrt(n)", | |
| "k = number of features" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Using k = sqrt(n) balances bias and variance in most cases." | |
| }, | |
| { | |
| "id": 69, | |
| "questionText": "Which technique can speed up KNN predictions in high dimensions?", | |
| "options": [ | |
| "Approximate nearest neighbor algorithms", | |
| "Normalize only", | |
| "Random shuffling", | |
| "Increase k to max" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Approximate nearest neighbor search reduces computation while maintaining accuracy." | |
| }, | |
| { | |
| "id": 70, | |
| "questionText": "Which type of data preprocessing improves KNN performance?", | |
| "options": [ | |
| "Random shuffling only", | |
| "Ignoring categorical features", | |
| "Adding irrelevant features", | |
| "Feature scaling and selection" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Scaling ensures fair distance measurement, and selecting relevant features removes noise, improving predictions." | |
| }, | |
| { | |
| "id": 71, | |
| "questionText": "In a recommendation system using KNN, what could cause poor predictions?", | |
| "options": [ | |
| "High number of neighbors", | |
| "Sparse user-item interaction data", | |
| "Low-dimensional features", | |
| "Normalized data" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Sparse interaction matrices reduce neighbor similarity reliability, causing poor recommendations." | |
| }, | |
| { | |
| "id": 72, | |
| "questionText": "Which approach is suitable for reducing KNN latency in a real-time system?", | |
| "options": [ | |
| "Randomly select features", | |
| "Increase k to dataset size", | |
| "Normalize data only", | |
| "Approximate nearest neighbor search" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Approximate nearest neighbor algorithms provide fast predictions with minimal accuracy loss." | |
| }, | |
| { | |
| "id": 73, | |
| "questionText": "In high-dimensional gene expression data, KNN performance drops because:", | |
| "options": [ | |
| "Normalization causes data loss", | |
| "KNN overfits easily with large k", | |
| "Distances become less informative (curse of dimensionality)", | |
| "Minority classes dominate" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "High-dimensional data makes points nearly equidistant, reducing neighbor relevance and accuracy." | |
| }, | |
| { | |
| "id": 74, | |
| "questionText": "Scenario: A new customer profile is very different from existing customers. Which issue might KNN face?", | |
| "options": [ | |
| "Predicted class may be inaccurate due to no similar neighbors", | |
| "KNN will automatically ignore the profile", | |
| "Model overfits automatically", | |
| "KNN will generate a new class" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "If no close neighbors exist, KNN cannot provide reliable predictions." | |
| }, | |
| { | |
| "id": 75, | |
| "questionText": "What is the main challenge of KNN when deployed in high-frequency trading?", | |
| "options": [ | |
| "Weighted voting is not supported", | |
| "Overfitting to training set", | |
| "Distance metric fails for numeric data", | |
| "High prediction latency due to large datasets" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "KNN requires computing distances to all stored points, making it too slow for real-time predictions in trading." | |
| }, | |
| { | |
| "id": 76, | |
| "questionText": "Scenario: Two classes are very close in feature space but overlapping. Which KNN behavior is expected?", | |
| "options": [ | |
| "KNN ignores overlapping points", | |
| "Higher misclassification rate", | |
| "KNN increases k automatically", | |
| "Predictions are perfect" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "KNN struggles with overlapping classes as neighbors from the wrong class may dominate." | |
| }, | |
| { | |
| "id": 77, | |
| "questionText": "Which method improves KNN performance for very high-dimensional image embeddings?", | |
| "options": [ | |
| "Use raw pixel values directly", | |
| "Dimensionality reduction (PCA, t-SNE, or UMAP)", | |
| "Increase k to max", | |
| "Randomly shuffle features" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Reducing dimensions retains essential information and makes distances meaningful." | |
| }, | |
| { | |
| "id": 78, | |
| "questionText": "Scenario: A fraud detection system uses KNN. New types of fraud appear. What is the limitation?", | |
| "options": [ | |
| "KNN cannot detect unseen patterns without similar neighbors", | |
| "Prediction latency decreases", | |
| "KNN automatically adapts", | |
| "Accuracy improves with noise" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "KNN relies on similarity to existing points, so unseen patterns are difficult to detect." | |
| }, | |
| { | |
| "id": 79, | |
| "questionText": "What is a limitation of KNN in large-scale recommendation systems?", | |
| "options": [ | |
| "Cannot handle numeric data", | |
| "Fails on binary features", | |
| "Overfits automatically", | |
| "Memory and computation intensive" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Storing all user-item interactions and computing distances is memory and CPU intensive." | |
| }, | |
| { | |
| "id": 80, | |
| "questionText": "Which approach is suitable for speeding up KNN with millions of samples?", | |
| "options": [ | |
| "Use weighted voting only", | |
| "Increase k to n", | |
| "Use approximate nearest neighbor libraries like FAISS or Annoy", | |
| "Normalize features only" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Approximate search libraries significantly reduce computation while maintaining near-optimal accuracy." | |
| }, | |
| { | |
| "id": 81, | |
| "questionText": "Scenario: In KNN, a feature has a huge numeric range. What problem arises?", | |
| "options": [ | |
| "Feature dominates distance, biasing prediction", | |
| "Weighted voting fails", | |
| "Prediction latency reduces", | |
| "Feature is ignored automatically" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Large-scale features dominate distance computation, skewing predictions unless scaled." | |
| }, | |
| { | |
| "id": 82, | |
| "questionText": "What is a strategy to handle missing values in KNN?", | |
| "options": [ | |
| "Impute missing values before computing distances", | |
| "Increase k to handle missing", | |
| "Ignore missing values automatically", | |
| "Remove all neighbors with missing values" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Missing values should be imputed (mean, median, or mode) to allow proper distance computation." | |
| }, | |
| { | |
| "id": 83, | |
| "questionText": "Scenario: In medical diagnosis using KNN, rare disease cases are underrepresented. Which is a solution?", | |
| "options": [ | |
| "Use raw unscaled features", | |
| "Ignore minority class", | |
| "Weighted voting or synthetic oversampling (SMOTE)", | |
| "Reduce k to 1" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Weighted voting or synthetic oversampling addresses imbalance and improves prediction of rare cases." | |
| }, | |
| { | |
| "id": 84, | |
| "questionText": "Which technique reduces distance computation in high-dimensional KNN?", | |
| "options": [ | |
| "Random shuffling", | |
| "Adding irrelevant features", | |
| "Dimensionality reduction", | |
| "Increasing k to n" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Reducing dimensions reduces number of calculations and improves neighbor relevance." | |
| }, | |
| { | |
| "id": 85, | |
| "questionText": "Scenario: In KNN regression, a few extreme neighbor values exist. What is the impact?", | |
| "options": [ | |
| "Prediction unaffected", | |
| "Predicted value may be skewed unless weighted", | |
| "Accuracy improves automatically", | |
| "KNN fails completely" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Outliers can bias the predicted mean; using weighted averaging mitigates this effect." | |
| }, | |
| { | |
| "id": 86, | |
| "questionText": "What is a benefit of KD-Tree in KNN?", | |
| "options": [ | |
| "Reduces neighbor search complexity in low dimensions", | |
| "Reduces bias of model", | |
| "Automatically scales features", | |
| "Increases training time significantly" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "KD-Tree allows efficient nearest neighbor search in low to moderate dimensions." | |
| }, | |
| { | |
| "id": 87, | |
| "questionText": "Scenario: KNN is applied on time-series data without preprocessing. What is a potential problem?", | |
| "options": [ | |
| "Outliers are automatically removed", | |
| "Accuracy automatically improves", | |
| "Distance metric ignores temporal order", | |
| "KNN predicts trends perfectly" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "KNN does not account for temporal order; raw time-series may not capture pattern similarity properly." | |
| }, | |
| { | |
| "id": 88, | |
| "questionText": "Which scenario illustrates KNN's limitation?", | |
| "options": [ | |
| "Balanced, low-dimensional data", | |
| "Normalized dataset", | |
| "A new point far from all existing points", | |
| "Few noise-free features" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "When a point is far from all neighbors, KNN cannot predict reliably." | |
| }, | |
| { | |
| "id": 89, | |
| "questionText": "Scenario: KNN used for text document classification with TF-IDF vectors. Which step is crucial?", | |
| "options": [ | |
| "Increase k to dataset size", | |
| "Adding irrelevant terms", | |
| "Ignore vector scaling", | |
| "L2 normalization to make distances comparable" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "TF-IDF vectors should be normalized to ensure fair distance computation." | |
| }, | |
| { | |
| "id": 90, | |
| "questionText": "Scenario: KNN struggles with overlapping clusters in feature space. What is a solution?", | |
| "options": [ | |
| "Use feature engineering to separate clusters", | |
| "Ignore scaling", | |
| "Increase k arbitrarily", | |
| "Remove minority points" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Engineering features that better separate classes improves KNN accuracy." | |
| }, | |
| { | |
| "id": 91, | |
| "questionText": "Which approach can improve KNN in very large datasets without losing much accuracy?", | |
| "options": [ | |
| "Use approximate nearest neighbor search", | |
| "Increase k to dataset size", | |
| "Ignore preprocessing", | |
| "Add random noise" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Approximate search reduces computation while keeping predictions close to exact KNN." | |
| }, | |
| { | |
| "id": 92, | |
| "questionText": "Scenario: Online KNN requires predictions every second. Challenge?", | |
| "options": [ | |
| "Cannot handle numeric data", | |
| "KNN scales features automatically", | |
| "High latency due to full distance computation", | |
| "Overfitting automatically" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Real-time prediction is slow because KNN computes distance to all points at query time." | |
| }, | |
| { | |
| "id": 93, | |
| "questionText": "Scenario: Multi-class KNN with imbalanced classes. What can improve fairness?", | |
| "options": [ | |
| "Use k=1 always", | |
| "Random shuffling", | |
| "Distance-weighted voting", | |
| "Ignore minority classes" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Weighted voting ensures closer neighbors have more influence, improving minority class predictions." | |
| }, | |
| { | |
| "id": 94, | |
| "questionText": "Scenario: A KNN model is deployed for anomaly detection. Limitation?", | |
| "options": [ | |
| "Rare anomalies may have no close neighbors", | |
| "Weighted KNN solves all issues", | |
| "Accuracy improves automatically", | |
| "Feature scaling is irrelevant" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "If anomalies are isolated, KNN cannot detect them due to lack of nearby points." | |
| }, | |
| { | |
| "id": 95, | |
| "questionText": "Scenario: In high-dimensional image retrieval, KNN prediction is slow. Solution?", | |
| "options": [ | |
| "Use raw pixel vectors", | |
| "Use approximate nearest neighbor algorithms like FAISS", | |
| "Increase k arbitrarily", | |
| "Ignore normalization" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Approximate algorithms reduce computation significantly while maintaining retrieval quality." | |
| }, | |
| { | |
| "id": 96, | |
| "questionText": "Which scenario can lead to KNN overfitting?", | |
| "options": [ | |
| "Very small k and noisy data", | |
| "Large k with clean data", | |
| "Normalized features", | |
| "Weighted voting" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Small k may fit noise and outliers, causing overfitting." | |
| }, | |
| { | |
| "id": 97, | |
| "questionText": "Scenario: KNN regression for house prices with outlier houses. Best approach?", | |
| "options": [ | |
| "Increase k arbitrarily", | |
| "Remove scaling", | |
| "Simple mean ignoring distances", | |
| "Weighted averaging by distance" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Weighted averaging reduces outlier impact, giving closer neighbors more influence." | |
| }, | |
| { | |
| "id": 98, | |
| "questionText": "Scenario: KNN applied to large sparse matrix of user ratings. Challenge?", | |
| "options": [ | |
| "Distance metric fails", | |
| "High memory usage and computation", | |
| "Overfitting automatically", | |
| "Minority class ignored" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Sparse matrices require storing many zeros and computing many distances, which is expensive." | |
| }, | |
| { | |
| "id": 99, | |
| "questionText": "Scenario: Real-time KNN requires prediction in milliseconds. Solution?", | |
| "options": [ | |
| "Use weighted voting only", | |
| "Use approximate nearest neighbor search", | |
| "Increase k to n", | |
| "Ignore feature scaling" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Approximate methods like Annoy or FAISS significantly speed up prediction for large datasets." | |
| }, | |
| { | |
| "id": 100, | |
| "questionText": "Scenario: High-dimensional text KNN classification. Which step is crucial?", | |
| "options": [ | |
| "Ignore scaling", | |
| "Dimensionality reduction or normalization", | |
| "Add random features", | |
| "Use raw text vectors" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "High-dimensional text vectors suffer from the curse of dimensionality; normalization (like L2) or dimensionality reduction is needed to make distances meaningful." | |
| } | |
| ] | |
| } | |