{ "title": "K-Nearest Neighbors (KNN) Mastery: 100 MCQs", "description": "A comprehensive set of 100 multiple-choice questions focused entirely on K-Nearest Neighbors (KNN) — covering intuition, distance metrics, hyperparameter tuning, classification & regression behavior, curse of dimensionality, and real-world use cases.", "questions": [ { "id": 1, "questionText": "What is the core principle behind the KNN algorithm?", "options": [ "It builds a decision tree and splits data recursively.", "It constructs a probabilistic model using Bayes theorem.", "It predicts the label based on the majority class of k nearest data points.", "It reduces dimensionality using PCA." ], "correctAnswerIndex": 2, "explanation": "KNN predicts the class based on voting from the k nearest neighbors in the training data." }, { "id": 2, "questionText": "KNN is considered which type of learning algorithm?", "options": [ "Eager learning", "Reinforcement learning", "Unsupervised learning", "Lazy learning" ], "correctAnswerIndex": 3, "explanation": "KNN is a lazy learner because it does not build a model during training; it only stores the data." }, { "id": 3, "questionText": "Which distance metric is most commonly used in KNN?", "options": [ "Cosine Similarity", "Manhattan Distance", "Jaccard Distance", "Euclidean Distance" ], "correctAnswerIndex": 3, "explanation": "Euclidean Distance (L2 norm) is the most common distance metric used in KNN." }, { "id": 4, "questionText": "KNN is mainly used for:", "options": [ "Only regression", "Only clustering", "Both classification and regression", "Only classification" ], "correctAnswerIndex": 2, "explanation": "KNN can perform both classification (class votes) and regression (mean of k nearest values)." }, { "id": 5, "questionText": "What happens if k is set to a very large value?", "options": [ "The model becomes faster and more accurate always.", "The model becomes overly generalized and biased.", "The model becomes highly sensitive to noise.", "The model becomes very overfitted." ], "correctAnswerIndex": 1, "explanation": "A very large k considers too many neighbors and may smooth out genuine class boundaries, causing high bias." }, { "id": 6, "questionText": "In KNN, what does the parameter 'k' represent?", "options": [ "Number of features in the dataset", "Depth of the tree used internally", "Number of nearest neighbors considered", "Learning rate of the algorithm" ], "correctAnswerIndex": 2, "explanation": "'k' is the number of closest neighbors used to decide the predicted class or value." }, { "id": 7, "questionText": "Which of the following is true about KNN training phase?", "options": [ "It stores all training data without building a model", "It builds a model by computing centroids", "It generates decision boundaries explicitly", "It calculates feature importance scores" ], "correctAnswerIndex": 0, "explanation": "KNN is a lazy learner; during training, it only stores the dataset for use at prediction time." }, { "id": 8, "questionText": "Which KNN variant can handle weighted voting?", "options": [ "Uniform KNN", "Decision Tree", "Random Forest", "Weighted KNN" ], "correctAnswerIndex": 3, "explanation": "Weighted KNN gives closer neighbors higher influence while predicting the output." }, { "id": 9, "questionText": "Which of the following affects KNN performance the most?", "options": [ "Choice of distance metric", "Activation function", "Regularization parameter", "Number of epochs" ], "correctAnswerIndex": 0, "explanation": "KNN relies on distance computations; the choice of distance metric (Euclidean, Manhattan, etc.) is critical." }, { "id": 10, "questionText": "What is the default distance metric for most KNN implementations?", "options": [ "Cosine similarity", "Manhattan distance", "Hamming distance", "Euclidean distance" ], "correctAnswerIndex": 3, "explanation": "Euclidean distance is most commonly used by default in KNN implementations." }, { "id": 11, "questionText": "How does KNN handle a new data point for prediction?", "options": [ "It updates its model parameters", "It finds k closest points in the training set and predicts based on them", "It generates random prediction", "It builds a regression line through neighbors" ], "correctAnswerIndex": 1, "explanation": "KNN predicts by looking at the nearest k training points and using majority vote (classification) or average (regression)." }, { "id": 12, "questionText": "What is the main drawback of KNN on large datasets?", "options": [ "Does not scale to many classes", "High training time", "Cannot handle missing values", "High prediction time" ], "correctAnswerIndex": 3, "explanation": "KNN stores all training data, so prediction involves computing distances to all points, which is slow for large datasets." }, { "id": 13, "questionText": "Which of the following is true about KNN and normalization?", "options": [ "Normalization is not required", "Normalization only applies to categorical data", "Normalization changes class labels", "Normalization improves distance-based predictions" ], "correctAnswerIndex": 3, "explanation": "Since KNN uses distances, features with larger scales can dominate. Normalization ensures fair contribution from all features." }, { "id": 14, "questionText": "How does KNN behave in the presence of irrelevant features?", "options": [ "Features are automatically ignored", "Performance improves", "Performance drops", "Algorithm ignores them during prediction" ], "correctAnswerIndex": 2, "explanation": "Irrelevant features can distort distance calculations and reduce KNN prediction accuracy." }, { "id": 15, "questionText": "What type of algorithm is KNN considered in terms of model structure?", "options": [ "Non-parametric", "Linear", "Probabilistic", "Parametric" ], "correctAnswerIndex": 0, "explanation": "KNN is non-parametric because it does not assume a predefined form for the function mapping inputs to outputs." }, { "id": 16, "questionText": "Which K value is generally recommended to avoid overfitting in KNN?", "options": [ "k moderate value like sqrt(n)", "k = 1", "k very small", "k equal to dataset size" ], "correctAnswerIndex": 0, "explanation": "A moderate k like sqrt(n) balances bias and variance, preventing overfitting." }, { "id": 17, "questionText": "Which metric is suitable for categorical variables in KNN?", "options": [ "Minkowski distance", "Manhattan distance", "Euclidean distance", "Hamming distance" ], "correctAnswerIndex": 3, "explanation": "Hamming distance counts mismatches between categorical feature values." }, { "id": 18, "questionText": "Which of the following is NOT a type of KNN?", "options": [ "Weighted KNN", "Regression KNN", "Decision KNN", "Classification KNN" ], "correctAnswerIndex": 2, "explanation": "There is no 'Decision KNN'; KNN is mainly classification, regression, or weighted variant." }, { "id": 19, "questionText": "What is the effect of having two classes with very imbalanced sizes in KNN?", "options": [ "Minority class dominates predictions", "Majority class dominates predictions", "KNN automatically balances classes", "Minor impact on accuracy" ], "correctAnswerIndex": 1, "explanation": "KNN predictions are influenced by majority neighbors; imbalanced classes may bias the results." }, { "id": 20, "questionText": "What is the primary storage requirement for KNN?", "options": [ "Feature coefficients", "All training data points", "Decision thresholds", "Distance matrices precomputed" ], "correctAnswerIndex": 1, "explanation": "KNN requires storing all training data for distance comparisons at prediction time." }, { "id": 21, "questionText": "What does the term 'curse of dimensionality' refer to in KNN?", "options": [ "Overfitting in small datasets", "High computation time with too many neighbors", "Distances become less meaningful in high dimensions", "Underfitting in large datasets" ], "correctAnswerIndex": 2, "explanation": "As dimensions increase, data points become sparse and distance measures lose effectiveness, reducing KNN performance." }, { "id": 22, "questionText": "Which technique can speed up KNN on large datasets?", "options": [ "KD-Trees or Ball-Trees", "Using logistic regression instead", "Principal Component Analysis", "Random Forest preprocessing" ], "correctAnswerIndex": 0, "explanation": "KD-Trees and Ball-Trees organize data to quickly find nearest neighbors without computing all distances." }, { "id": 23, "questionText": "In KNN regression, how is the predicted value calculated?", "options": [ "Using linear regression on neighbors", "Using gradient descent", "Majority vote of nearest neighbors", "Average of nearest neighbors’ values" ], "correctAnswerIndex": 3, "explanation": "KNN regression predicts by taking the mean (or sometimes weighted mean) of the k nearest neighbors' values." }, { "id": 24, "questionText": "Which of the following is true about KNN decision boundary?", "options": [ "Always axis-aligned", "Always linear", "Depends on data distribution", "Always circular" ], "correctAnswerIndex": 2, "explanation": "KNN decision boundaries can be irregular and follow the shape of data; they are not restricted to linear forms." }, { "id": 25, "questionText": "Which method can improve KNN on high-dimensional data?", "options": [ "Increasing k to dataset size", "Feature selection", "Ignoring normalization", "Adding more neighbors" ], "correctAnswerIndex": 1, "explanation": "Selecting relevant features reduces dimensionality, improving distance calculation reliability." }, { "id": 26, "questionText": "KNN cannot handle which of the following natively?", "options": [ "Large datasets efficiently", "Numeric features", "Categorical features", "Missing data directly" ], "correctAnswerIndex": 3, "explanation": "KNN cannot handle missing values without preprocessing (imputation or removal)." }, { "id": 27, "questionText": "How does KNN handle ties in classification voting?", "options": [ "Chooses randomly among tied classes", "Fails with an error", "Chooses the closest neighbor's class", "Always chooses class 0" ], "correctAnswerIndex": 2, "explanation": "Many implementations break ties by selecting the class of the closest neighbor among the tied classes." }, { "id": 28, "questionText": "Which scenario would make KNN less suitable?", "options": [ "Low-dimensional small datasets", "High-dimensional large datasets", "Well-separated clusters", "Binary classification" ], "correctAnswerIndex": 1, "explanation": "In high-dimensional large datasets, KNN is slow and distances lose meaning, reducing accuracy." }, { "id": 29, "questionText": "What is the time complexity of a naive KNN prediction with n training points?", "options": [ "O(n^2)", "O(1)", "O(log n)", "O(n)" ], "correctAnswerIndex": 3, "explanation": "Naive KNN computes distances to all n points for each prediction, giving O(n) complexity." }, { "id": 30, "questionText": "What preprocessing step can improve KNN accuracy?", "options": [ "Adding irrelevant features", "Removing the dependent variable", "Randomly shuffling the data", "Scaling features to similar range" ], "correctAnswerIndex": 3, "explanation": "Scaling features ensures fair distance computation, preventing one feature from dominating due to larger numeric range." }, { "id": 31, "questionText": "What is the effect of increasing 'k' in KNN classification?", "options": [ "Decreases bias", "Reduces overfitting", "Increases sensitivity to noise", "Increases model variance" ], "correctAnswerIndex": 1, "explanation": "A larger k smooths out predictions, reducing overfitting and variance but increasing bias." }, { "id": 32, "questionText": "Which distance metric can be more robust to outliers in KNN?", "options": [ "Cosine similarity", "Minkowski distance", "Manhattan distance", "Euclidean distance" ], "correctAnswerIndex": 2, "explanation": "Manhattan distance is less sensitive to large deviations in individual features than Euclidean distance." }, { "id": 33, "questionText": "How can KNN be modified for imbalanced datasets?", "options": [ "Use weighted voting based on distance", "Increase k to dataset size", "Normalize features only", "Remove minority class samples" ], "correctAnswerIndex": 0, "explanation": "Weighted voting gives closer neighbors more influence, reducing bias toward the majority class." }, { "id": 34, "questionText": "Which method can reduce KNN prediction time for large datasets?", "options": [ "Dimensionality reduction like PCA", "Using random shuffling", "Increasing k", "Adding more features" ], "correctAnswerIndex": 0, "explanation": "Reducing the number of features with PCA lowers dimensionality, which speeds up distance computation." }, { "id": 35, "questionText": "Why might KNN fail in very high-dimensional spaces?", "options": [ "Overfitting to majority class", "Random initialization", "Learning rate too high", "Curse of dimensionality" ], "correctAnswerIndex": 3, "explanation": "In high dimensions, points become equidistant and neighbors are less meaningful, reducing accuracy." }, { "id": 36, "questionText": "What does weighted KNN regression use instead of simple averaging?", "options": [ "Distance-based weighting of neighbors", "Median value of neighbors", "Majority vote of neighbors", "Random selection of neighbors" ], "correctAnswerIndex": 0, "explanation": "Weighted KNN regression assigns higher weights to closer neighbors when computing the predicted value." }, { "id": 37, "questionText": "Which technique is useful to handle categorical and numeric features together in KNN?", "options": [ "Ignore numeric features", "Convert categorical to numeric with one-hot encoding", "Normalize categorical features only", "Use majority voting only" ], "correctAnswerIndex": 1, "explanation": "One-hot encoding transforms categorical features to numeric so that distance metrics can be applied." }, { "id": 38, "questionText": "In KNN, what is the effect of noisy features?", "options": [ "Does not affect performance", "Automatically removed", "Reduces accuracy", "Improves accuracy" ], "correctAnswerIndex": 2, "explanation": "Noisy features distort distance calculations, reducing prediction accuracy." }, { "id": 39, "questionText": "Which of the following can help KNN generalize better?", "options": [ "Adding more irrelevant features", "Reducing k to 1", "Feature scaling and selection", "Increasing dataset size without preprocessing" ], "correctAnswerIndex": 2, "explanation": "Scaling ensures fair distance comparison, and selecting relevant features removes noise, improving generalization." }, { "id": 40, "questionText": "What happens if k is even and there is a tie in classification?", "options": [ "Prediction fails with error", "Tie-breaking strategy is needed", "Algorithm automatically increments k", "Randomly ignores the new point" ], "correctAnswerIndex": 1, "explanation": "When k is even, ties may occur; most implementations have a tie-breaking rule like choosing the closest neighbor." }, { "id": 41, "questionText": "Which preprocessing step can improve KNN on text data represented by TF-IDF vectors?", "options": [ "L2 normalization", "Random shuffling", "Adding more terms", "Stop-word removal only" ], "correctAnswerIndex": 0, "explanation": "L2 normalization ensures vectors are comparable in distance calculations for KNN." }, { "id": 42, "questionText": "Which of the following affects KNN accuracy most in practice?", "options": [ "Learning rate", "Random seed only", "Distance metric and k", "Number of trees" ], "correctAnswerIndex": 2, "explanation": "Choice of k and distance metric strongly influence KNN performance." }, { "id": 43, "questionText": "In KNN regression, how can you reduce the impact of outliers?", "options": [ "Use simple mean without weighting", "Increase k to dataset size", "Ignore preprocessing", "Use weighted averaging based on distance" ], "correctAnswerIndex": 3, "explanation": "Weighting closer neighbors more heavily reduces the effect of distant outliers." }, { "id": 44, "questionText": "Which approach can make KNN faster on large datasets?", "options": [ "Increase k to max", "Add random noise to data", "KD-Tree, Ball-Tree, or approximate nearest neighbor search", "Use high-dimensional features" ], "correctAnswerIndex": 2, "explanation": "Tree-based or approximate search structures reduce distance computations needed for prediction." }, { "id": 45, "questionText": "How does KNN handle multi-class classification?", "options": [ "By majority vote among neighbors", "Cannot handle multi-class", "By training separate binary classifiers", "Only predicts top two classes" ], "correctAnswerIndex": 0, "explanation": "KNN counts votes among k neighbors for all classes and selects the class with the highest votes." }, { "id": 46, "questionText": "Which distance metric is suitable for high-dimensional sparse data?", "options": [ "Manhattan distance", "Euclidean distance", "Cosine similarity", "Hamming distance" ], "correctAnswerIndex": 2, "explanation": "Cosine similarity works better for high-dimensional sparse vectors like TF-IDF representations." }, { "id": 47, "questionText": "What happens to KNN performance if features are not scaled?", "options": [ "Dominated by features with larger scales", "Performance improves automatically", "Distance calculation is unaffected", "Accuracy remains same always" ], "correctAnswerIndex": 0, "explanation": "Features with larger numeric ranges dominate distance computation, skewing predictions." }, { "id": 48, "questionText": "How can KNN be adapted for regression with categorical features?", "options": [ "Encode categories numerically or use mixed distance metric", "Use Euclidean distance directly", "Remove categorical features", "Only predict the most frequent category" ], "correctAnswerIndex": 0, "explanation": "Encoding categorical features allows KNN to compute distances effectively for regression tasks." }, { "id": 49, "questionText": "What is one common method to select an optimal k?", "options": [ "Maximizing feature count", "Using k=1 always", "Random selection", "Cross-validation" ], "correctAnswerIndex": 3, "explanation": "Cross-validation evaluates different k values to choose the one yielding best performance." }, { "id": 50, "questionText": "Which factor can lead to overfitting in KNN?", "options": [ "Using fewer neighbors", "Too small k value", "Scaling features", "Using weighted distance" ], "correctAnswerIndex": 1, "explanation": "A very small k (like k=1) can fit to noise and outliers, causing overfitting." }, { "id": 51, "questionText": "In KNN, what is an advantage of using odd k values in binary classification?", "options": [ "Avoid ties in voting", "Reduce distance calculations", "Increase speed", "Improve scaling automatically" ], "correctAnswerIndex": 0, "explanation": "Odd k values help prevent ties between two classes." }, { "id": 52, "questionText": "Which type of feature transformation is recommended for KNN?", "options": [ "Adding irrelevant features", "One-hot encoding only for numeric data", "Normalization or standardization", "Random shuffling of features" ], "correctAnswerIndex": 2, "explanation": "Normalization ensures fair contribution of each feature to distance calculation." }, { "id": 53, "questionText": "Which of the following reduces KNN sensitivity to outliers?", "options": [ "Increase k to 1", "Use Euclidean distance only", "Remove normalization", "Weighted distance averaging" ], "correctAnswerIndex": 3, "explanation": "Weighting neighbors based on distance gives closer points more influence, reducing outlier impact." }, { "id": 54, "questionText": "In KNN, what is the effect of adding irrelevant features?", "options": [ "Automatically removed", "Decreases accuracy", "Increases accuracy", "No effect" ], "correctAnswerIndex": 1, "explanation": "Irrelevant features distort distance calculations, reducing prediction accuracy." }, { "id": 55, "questionText": "Which method can improve KNN performance in sparse datasets?", "options": [ "Ignore distance weighting", "Add noise to features", "Dimensionality reduction", "Increase k to dataset size" ], "correctAnswerIndex": 2, "explanation": "Reducing dimensionality can make distance computations more meaningful in sparse datasets." }, { "id": 56, "questionText": "Which approach helps handle large-scale KNN efficiently?", "options": [ "Increasing k arbitrarily", "Scaling only", "Approximate nearest neighbor search", "Random shuffling" ], "correctAnswerIndex": 2, "explanation": "Approximate nearest neighbor search reduces computational cost while giving nearly correct neighbors." }, { "id": 57, "questionText": "Which of the following is true for KNN regression prediction?", "options": [ "Weighted average based on neighbor distance", "Average of nearest neighbors’ values", "None of the above", "Both A and B" ], "correctAnswerIndex": 3, "explanation": "KNN regression can use simple or weighted averaging of neighbors’ values." }, { "id": 58, "questionText": "Which is a practical drawback of KNN in real-world systems?", "options": [ "Requires model training", "High prediction latency", "Automatically ignores irrelevant features", "Cannot handle numeric data" ], "correctAnswerIndex": 1, "explanation": "KNN computes distances at prediction time, leading to high latency for large datasets." }, { "id": 59, "questionText": "Which type of scaling preserves relative distances between points for KNN?", "options": [ "Min-Max scaling", "Log transformation only", "Adding random noise", "Shuffling features" ], "correctAnswerIndex": 0, "explanation": "Min-Max or standardization scales features to similar ranges while preserving relative distances." }, { "id": 60, "questionText": "Which is a disadvantage of KNN compared to parametric models?", "options": [ "Requires fixed training", "Slower predictions for large datasets", "Cannot model non-linear boundaries", "Sensitive to overfitting only" ], "correctAnswerIndex": 1, "explanation": "KNN stores all training data and computes distances, making predictions slower than parametric models." }, { "id": 61, "questionText": "How can KNN handle multi-label classification?", "options": [ "Uses separate KNN per label", "Cannot handle multi-label", "Predict all labels present in neighbors", "Only predicts one label" ], "correctAnswerIndex": 2, "explanation": "KNN can aggregate labels from neighbors and predict multiple labels per instance." }, { "id": 62, "questionText": "Which distance metric can handle mixed numeric and categorical data?", "options": [ "Gower distance", "Euclidean distance", "Cosine similarity", "Manhattan distance" ], "correctAnswerIndex": 0, "explanation": "Gower distance can compute similarity for mixed numeric and categorical features." }, { "id": 63, "questionText": "What is one way to reduce memory usage in KNN for large datasets?", "options": [ "Use condensed nearest neighbor algorithms", "Ignore irrelevant features", "Increase k to dataset size", "Normalize only" ], "correctAnswerIndex": 0, "explanation": "Condensed nearest neighbor algorithms reduce stored points while maintaining accuracy." }, { "id": 64, "questionText": "Which approach helps improve KNN in imbalanced datasets?", "options": [ "Increase irrelevant features", "Use k=1 always", "Distance-weighted voting", "Ignore normalization" ], "correctAnswerIndex": 2, "explanation": "Weighted voting gives closer points more influence, reducing bias toward majority class." }, { "id": 65, "questionText": "What is the effect of increasing feature dimensionality in KNN?", "options": [ "Computation decreases", "Feature importance is automatically computed", "Accuracy always improves", "Distances become less meaningful" ], "correctAnswerIndex": 3, "explanation": "High-dimensional spaces make points almost equidistant, reducing KNN effectiveness." }, { "id": 66, "questionText": "Which scenario can cause KNN to misclassify a data point?", "options": [ "Choosing odd k", "Using weighted voting", "Nearby points from other class dominate", "Normalization applied" ], "correctAnswerIndex": 2, "explanation": "If neighbors are closer from other classes, KNN may predict incorrectly." }, { "id": 67, "questionText": "Which strategy can improve KNN with very sparse datasets?", "options": [ "Add random features", "Ignore distance metric", "Dimensionality reduction", "Increase k arbitrarily" ], "correctAnswerIndex": 2, "explanation": "Reducing dimensionality reduces sparsity and makes distances meaningful." }, { "id": 68, "questionText": "What is a good rule of thumb for selecting k?", "options": [ "k = 1 always", "k = n/2", "k = sqrt(n)", "k = number of features" ], "correctAnswerIndex": 2, "explanation": "Using k = sqrt(n) balances bias and variance in most cases." }, { "id": 69, "questionText": "Which technique can speed up KNN predictions in high dimensions?", "options": [ "Approximate nearest neighbor algorithms", "Normalize only", "Random shuffling", "Increase k to max" ], "correctAnswerIndex": 0, "explanation": "Approximate nearest neighbor search reduces computation while maintaining accuracy." }, { "id": 70, "questionText": "Which type of data preprocessing improves KNN performance?", "options": [ "Random shuffling only", "Ignoring categorical features", "Adding irrelevant features", "Feature scaling and selection" ], "correctAnswerIndex": 3, "explanation": "Scaling ensures fair distance measurement, and selecting relevant features removes noise, improving predictions." }, { "id": 71, "questionText": "In a recommendation system using KNN, what could cause poor predictions?", "options": [ "High number of neighbors", "Sparse user-item interaction data", "Low-dimensional features", "Normalized data" ], "correctAnswerIndex": 1, "explanation": "Sparse interaction matrices reduce neighbor similarity reliability, causing poor recommendations." }, { "id": 72, "questionText": "Which approach is suitable for reducing KNN latency in a real-time system?", "options": [ "Randomly select features", "Increase k to dataset size", "Normalize data only", "Approximate nearest neighbor search" ], "correctAnswerIndex": 3, "explanation": "Approximate nearest neighbor algorithms provide fast predictions with minimal accuracy loss." }, { "id": 73, "questionText": "In high-dimensional gene expression data, KNN performance drops because:", "options": [ "Normalization causes data loss", "KNN overfits easily with large k", "Distances become less informative (curse of dimensionality)", "Minority classes dominate" ], "correctAnswerIndex": 2, "explanation": "High-dimensional data makes points nearly equidistant, reducing neighbor relevance and accuracy." }, { "id": 74, "questionText": "Scenario: A new customer profile is very different from existing customers. Which issue might KNN face?", "options": [ "Predicted class may be inaccurate due to no similar neighbors", "KNN will automatically ignore the profile", "Model overfits automatically", "KNN will generate a new class" ], "correctAnswerIndex": 0, "explanation": "If no close neighbors exist, KNN cannot provide reliable predictions." }, { "id": 75, "questionText": "What is the main challenge of KNN when deployed in high-frequency trading?", "options": [ "Weighted voting is not supported", "Overfitting to training set", "Distance metric fails for numeric data", "High prediction latency due to large datasets" ], "correctAnswerIndex": 3, "explanation": "KNN requires computing distances to all stored points, making it too slow for real-time predictions in trading." }, { "id": 76, "questionText": "Scenario: Two classes are very close in feature space but overlapping. Which KNN behavior is expected?", "options": [ "KNN ignores overlapping points", "Higher misclassification rate", "KNN increases k automatically", "Predictions are perfect" ], "correctAnswerIndex": 1, "explanation": "KNN struggles with overlapping classes as neighbors from the wrong class may dominate." }, { "id": 77, "questionText": "Which method improves KNN performance for very high-dimensional image embeddings?", "options": [ "Use raw pixel values directly", "Dimensionality reduction (PCA, t-SNE, or UMAP)", "Increase k to max", "Randomly shuffle features" ], "correctAnswerIndex": 1, "explanation": "Reducing dimensions retains essential information and makes distances meaningful." }, { "id": 78, "questionText": "Scenario: A fraud detection system uses KNN. New types of fraud appear. What is the limitation?", "options": [ "KNN cannot detect unseen patterns without similar neighbors", "Prediction latency decreases", "KNN automatically adapts", "Accuracy improves with noise" ], "correctAnswerIndex": 0, "explanation": "KNN relies on similarity to existing points, so unseen patterns are difficult to detect." }, { "id": 79, "questionText": "What is a limitation of KNN in large-scale recommendation systems?", "options": [ "Cannot handle numeric data", "Fails on binary features", "Overfits automatically", "Memory and computation intensive" ], "correctAnswerIndex": 3, "explanation": "Storing all user-item interactions and computing distances is memory and CPU intensive." }, { "id": 80, "questionText": "Which approach is suitable for speeding up KNN with millions of samples?", "options": [ "Use weighted voting only", "Increase k to n", "Use approximate nearest neighbor libraries like FAISS or Annoy", "Normalize features only" ], "correctAnswerIndex": 2, "explanation": "Approximate search libraries significantly reduce computation while maintaining near-optimal accuracy." }, { "id": 81, "questionText": "Scenario: In KNN, a feature has a huge numeric range. What problem arises?", "options": [ "Feature dominates distance, biasing prediction", "Weighted voting fails", "Prediction latency reduces", "Feature is ignored automatically" ], "correctAnswerIndex": 0, "explanation": "Large-scale features dominate distance computation, skewing predictions unless scaled." }, { "id": 82, "questionText": "What is a strategy to handle missing values in KNN?", "options": [ "Impute missing values before computing distances", "Increase k to handle missing", "Ignore missing values automatically", "Remove all neighbors with missing values" ], "correctAnswerIndex": 0, "explanation": "Missing values should be imputed (mean, median, or mode) to allow proper distance computation." }, { "id": 83, "questionText": "Scenario: In medical diagnosis using KNN, rare disease cases are underrepresented. Which is a solution?", "options": [ "Use raw unscaled features", "Ignore minority class", "Weighted voting or synthetic oversampling (SMOTE)", "Reduce k to 1" ], "correctAnswerIndex": 2, "explanation": "Weighted voting or synthetic oversampling addresses imbalance and improves prediction of rare cases." }, { "id": 84, "questionText": "Which technique reduces distance computation in high-dimensional KNN?", "options": [ "Random shuffling", "Adding irrelevant features", "Dimensionality reduction", "Increasing k to n" ], "correctAnswerIndex": 2, "explanation": "Reducing dimensions reduces number of calculations and improves neighbor relevance." }, { "id": 85, "questionText": "Scenario: In KNN regression, a few extreme neighbor values exist. What is the impact?", "options": [ "Prediction unaffected", "Predicted value may be skewed unless weighted", "Accuracy improves automatically", "KNN fails completely" ], "correctAnswerIndex": 1, "explanation": "Outliers can bias the predicted mean; using weighted averaging mitigates this effect." }, { "id": 86, "questionText": "What is a benefit of KD-Tree in KNN?", "options": [ "Reduces neighbor search complexity in low dimensions", "Reduces bias of model", "Automatically scales features", "Increases training time significantly" ], "correctAnswerIndex": 0, "explanation": "KD-Tree allows efficient nearest neighbor search in low to moderate dimensions." }, { "id": 87, "questionText": "Scenario: KNN is applied on time-series data without preprocessing. What is a potential problem?", "options": [ "Outliers are automatically removed", "Accuracy automatically improves", "Distance metric ignores temporal order", "KNN predicts trends perfectly" ], "correctAnswerIndex": 2, "explanation": "KNN does not account for temporal order; raw time-series may not capture pattern similarity properly." }, { "id": 88, "questionText": "Which scenario illustrates KNN's limitation?", "options": [ "Balanced, low-dimensional data", "Normalized dataset", "A new point far from all existing points", "Few noise-free features" ], "correctAnswerIndex": 2, "explanation": "When a point is far from all neighbors, KNN cannot predict reliably." }, { "id": 89, "questionText": "Scenario: KNN used for text document classification with TF-IDF vectors. Which step is crucial?", "options": [ "Increase k to dataset size", "Adding irrelevant terms", "Ignore vector scaling", "L2 normalization to make distances comparable" ], "correctAnswerIndex": 3, "explanation": "TF-IDF vectors should be normalized to ensure fair distance computation." }, { "id": 90, "questionText": "Scenario: KNN struggles with overlapping clusters in feature space. What is a solution?", "options": [ "Use feature engineering to separate clusters", "Ignore scaling", "Increase k arbitrarily", "Remove minority points" ], "correctAnswerIndex": 0, "explanation": "Engineering features that better separate classes improves KNN accuracy." }, { "id": 91, "questionText": "Which approach can improve KNN in very large datasets without losing much accuracy?", "options": [ "Use approximate nearest neighbor search", "Increase k to dataset size", "Ignore preprocessing", "Add random noise" ], "correctAnswerIndex": 0, "explanation": "Approximate search reduces computation while keeping predictions close to exact KNN." }, { "id": 92, "questionText": "Scenario: Online KNN requires predictions every second. Challenge?", "options": [ "Cannot handle numeric data", "KNN scales features automatically", "High latency due to full distance computation", "Overfitting automatically" ], "correctAnswerIndex": 2, "explanation": "Real-time prediction is slow because KNN computes distance to all points at query time." }, { "id": 93, "questionText": "Scenario: Multi-class KNN with imbalanced classes. What can improve fairness?", "options": [ "Use k=1 always", "Random shuffling", "Distance-weighted voting", "Ignore minority classes" ], "correctAnswerIndex": 2, "explanation": "Weighted voting ensures closer neighbors have more influence, improving minority class predictions." }, { "id": 94, "questionText": "Scenario: A KNN model is deployed for anomaly detection. Limitation?", "options": [ "Rare anomalies may have no close neighbors", "Weighted KNN solves all issues", "Accuracy improves automatically", "Feature scaling is irrelevant" ], "correctAnswerIndex": 0, "explanation": "If anomalies are isolated, KNN cannot detect them due to lack of nearby points." }, { "id": 95, "questionText": "Scenario: In high-dimensional image retrieval, KNN prediction is slow. Solution?", "options": [ "Use raw pixel vectors", "Use approximate nearest neighbor algorithms like FAISS", "Increase k arbitrarily", "Ignore normalization" ], "correctAnswerIndex": 1, "explanation": "Approximate algorithms reduce computation significantly while maintaining retrieval quality." }, { "id": 96, "questionText": "Which scenario can lead to KNN overfitting?", "options": [ "Very small k and noisy data", "Large k with clean data", "Normalized features", "Weighted voting" ], "correctAnswerIndex": 0, "explanation": "Small k may fit noise and outliers, causing overfitting." }, { "id": 97, "questionText": "Scenario: KNN regression for house prices with outlier houses. Best approach?", "options": [ "Increase k arbitrarily", "Remove scaling", "Simple mean ignoring distances", "Weighted averaging by distance" ], "correctAnswerIndex": 3, "explanation": "Weighted averaging reduces outlier impact, giving closer neighbors more influence." }, { "id": 98, "questionText": "Scenario: KNN applied to large sparse matrix of user ratings. Challenge?", "options": [ "Distance metric fails", "High memory usage and computation", "Overfitting automatically", "Minority class ignored" ], "correctAnswerIndex": 1, "explanation": "Sparse matrices require storing many zeros and computing many distances, which is expensive." }, { "id": 99, "questionText": "Scenario: Real-time KNN requires prediction in milliseconds. Solution?", "options": [ "Use weighted voting only", "Use approximate nearest neighbor search", "Increase k to n", "Ignore feature scaling" ], "correctAnswerIndex": 1, "explanation": "Approximate methods like Annoy or FAISS significantly speed up prediction for large datasets." }, { "id": 100, "questionText": "Scenario: High-dimensional text KNN classification. Which step is crucial?", "options": [ "Ignore scaling", "Dimensionality reduction or normalization", "Add random features", "Use raw text vectors" ], "correctAnswerIndex": 1, "explanation": "High-dimensional text vectors suffer from the curse of dimensionality; normalization (like L2) or dimensionality reduction is needed to make distances meaningful." } ] }