Spaces:

deedrop1140
/

MachineLearningAlgorithms

Running

App Files Files Community

MachineLearningAlgorithms / data /K-Nearest.json

deedrop1140

Upload 41 files

0d00d62 verified 3 months ago

raw

history blame contribute delete

46 kB

	{
	"title": "K-Nearest Neighbors (KNN) Mastery: 100 MCQs",
	"description": "A comprehensive set of 100 multiple-choice questions focused entirely on K-Nearest Neighbors (KNN) — covering intuition, distance metrics, hyperparameter tuning, classification & regression behavior, curse of dimensionality, and real-world use cases.",
	"questions": [
	{
	"id": 1,
	"questionText": "What is the core principle behind the KNN algorithm?",
	"options": [
	"It builds a decision tree and splits data recursively.",
	"It constructs a probabilistic model using Bayes theorem.",
	"It predicts the label based on the majority class of k nearest data points.",
	"It reduces dimensionality using PCA."
	],
	"correctAnswerIndex": 2,
	"explanation": "KNN predicts the class based on voting from the k nearest neighbors in the training data."
	},
	{
	"id": 2,
	"questionText": "KNN is considered which type of learning algorithm?",
	"options": [
	"Eager learning",
	"Reinforcement learning",
	"Unsupervised learning",
	"Lazy learning"
	],
	"correctAnswerIndex": 3,
	"explanation": "KNN is a lazy learner because it does not build a model during training; it only stores the data."
	},
	{
	"id": 3,
	"questionText": "Which distance metric is most commonly used in KNN?",
	"options": [
	"Cosine Similarity",
	"Manhattan Distance",
	"Jaccard Distance",
	"Euclidean Distance"
	],
	"correctAnswerIndex": 3,
	"explanation": "Euclidean Distance (L2 norm) is the most common distance metric used in KNN."
	},
	{
	"id": 4,
	"questionText": "KNN is mainly used for:",
	"options": [
	"Only regression",
	"Only clustering",
	"Both classification and regression",
	"Only classification"
	],
	"correctAnswerIndex": 2,
	"explanation": "KNN can perform both classification (class votes) and regression (mean of k nearest values)."
	},
	{
	"id": 5,
	"questionText": "What happens if k is set to a very large value?",
	"options": [
	"The model becomes faster and more accurate always.",
	"The model becomes overly generalized and biased.",
	"The model becomes highly sensitive to noise.",
	"The model becomes very overfitted."
	],
	"correctAnswerIndex": 1,
	"explanation": "A very large k considers too many neighbors and may smooth out genuine class boundaries, causing high bias."
	},
	{
	"id": 6,
	"questionText": "In KNN, what does the parameter 'k' represent?",
	"options": [
	"Number of features in the dataset",
	"Depth of the tree used internally",
	"Number of nearest neighbors considered",
	"Learning rate of the algorithm"
	],
	"correctAnswerIndex": 2,
	"explanation": "'k' is the number of closest neighbors used to decide the predicted class or value."
	},
	{
	"id": 7,
	"questionText": "Which of the following is true about KNN training phase?",
	"options": [
	"It stores all training data without building a model",
	"It builds a model by computing centroids",
	"It generates decision boundaries explicitly",
	"It calculates feature importance scores"
	],
	"correctAnswerIndex": 0,
	"explanation": "KNN is a lazy learner; during training, it only stores the dataset for use at prediction time."
	},
	{
	"id": 8,
	"questionText": "Which KNN variant can handle weighted voting?",
	"options": [
	"Uniform KNN",
	"Decision Tree",
	"Random Forest",
	"Weighted KNN"
	],
	"correctAnswerIndex": 3,
	"explanation": "Weighted KNN gives closer neighbors higher influence while predicting the output."
	},
	{
	"id": 9,
	"questionText": "Which of the following affects KNN performance the most?",
	"options": [
	"Choice of distance metric",
	"Activation function",
	"Regularization parameter",
	"Number of epochs"
	],
	"correctAnswerIndex": 0,
	"explanation": "KNN relies on distance computations; the choice of distance metric (Euclidean, Manhattan, etc.) is critical."
	},
	{
	"id": 10,
	"questionText": "What is the default distance metric for most KNN implementations?",
	"options": [
	"Cosine similarity",
	"Manhattan distance",
	"Hamming distance",
	"Euclidean distance"
	],
	"correctAnswerIndex": 3,
	"explanation": "Euclidean distance is most commonly used by default in KNN implementations."
	},
	{
	"id": 11,
	"questionText": "How does KNN handle a new data point for prediction?",
	"options": [
	"It updates its model parameters",
	"It finds k closest points in the training set and predicts based on them",
	"It generates random prediction",
	"It builds a regression line through neighbors"
	],
	"correctAnswerIndex": 1,
	"explanation": "KNN predicts by looking at the nearest k training points and using majority vote (classification) or average (regression)."
	},
	{
	"id": 12,
	"questionText": "What is the main drawback of KNN on large datasets?",
	"options": [
	"Does not scale to many classes",
	"High training time",
	"Cannot handle missing values",
	"High prediction time"
	],
	"correctAnswerIndex": 3,
	"explanation": "KNN stores all training data, so prediction involves computing distances to all points, which is slow for large datasets."
	},
	{
	"id": 13,
	"questionText": "Which of the following is true about KNN and normalization?",
	"options": [
	"Normalization is not required",
	"Normalization only applies to categorical data",
	"Normalization changes class labels",
	"Normalization improves distance-based predictions"
	],
	"correctAnswerIndex": 3,
	"explanation": "Since KNN uses distances, features with larger scales can dominate. Normalization ensures fair contribution from all features."
	},
	{
	"id": 14,
	"questionText": "How does KNN behave in the presence of irrelevant features?",
	"options": [
	"Features are automatically ignored",
	"Performance improves",
	"Performance drops",
	"Algorithm ignores them during prediction"
	],
	"correctAnswerIndex": 2,
	"explanation": "Irrelevant features can distort distance calculations and reduce KNN prediction accuracy."
	},
	{
	"id": 15,
	"questionText": "What type of algorithm is KNN considered in terms of model structure?",
	"options": [
	"Non-parametric",
	"Linear",
	"Probabilistic",
	"Parametric"
	],
	"correctAnswerIndex": 0,
	"explanation": "KNN is non-parametric because it does not assume a predefined form for the function mapping inputs to outputs."
	},
	{
	"id": 16,
	"questionText": "Which K value is generally recommended to avoid overfitting in KNN?",
	"options": [
	"k moderate value like sqrt(n)",
	"k = 1",
	"k very small",
	"k equal to dataset size"
	],
	"correctAnswerIndex": 0,
	"explanation": "A moderate k like sqrt(n) balances bias and variance, preventing overfitting."
	},
	{
	"id": 17,
	"questionText": "Which metric is suitable for categorical variables in KNN?",
	"options": [
	"Minkowski distance",
	"Manhattan distance",
	"Euclidean distance",
	"Hamming distance"
	],
	"correctAnswerIndex": 3,
	"explanation": "Hamming distance counts mismatches between categorical feature values."
	},
	{
	"id": 18,
	"questionText": "Which of the following is NOT a type of KNN?",
	"options": [
	"Weighted KNN",
	"Regression KNN",
	"Decision KNN",
	"Classification KNN"
	],
	"correctAnswerIndex": 2,
	"explanation": "There is no 'Decision KNN'; KNN is mainly classification, regression, or weighted variant."
	},
	{
	"id": 19,
	"questionText": "What is the effect of having two classes with very imbalanced sizes in KNN?",
	"options": [
	"Minority class dominates predictions",
	"Majority class dominates predictions",
	"KNN automatically balances classes",
	"Minor impact on accuracy"
	],
	"correctAnswerIndex": 1,
	"explanation": "KNN predictions are influenced by majority neighbors; imbalanced classes may bias the results."
	},
	{
	"id": 20,
	"questionText": "What is the primary storage requirement for KNN?",
	"options": [
	"Feature coefficients",
	"All training data points",
	"Decision thresholds",
	"Distance matrices precomputed"
	],
	"correctAnswerIndex": 1,
	"explanation": "KNN requires storing all training data for distance comparisons at prediction time."
	},
	{
	"id": 21,
	"questionText": "What does the term 'curse of dimensionality' refer to in KNN?",
	"options": [
	"Overfitting in small datasets",
	"High computation time with too many neighbors",
	"Distances become less meaningful in high dimensions",
	"Underfitting in large datasets"
	],
	"correctAnswerIndex": 2,
	"explanation": "As dimensions increase, data points become sparse and distance measures lose effectiveness, reducing KNN performance."
	},
	{
	"id": 22,
	"questionText": "Which technique can speed up KNN on large datasets?",
	"options": [
	"KD-Trees or Ball-Trees",
	"Using logistic regression instead",
	"Principal Component Analysis",
	"Random Forest preprocessing"
	],
	"correctAnswerIndex": 0,
	"explanation": "KD-Trees and Ball-Trees organize data to quickly find nearest neighbors without computing all distances."
	},
	{
	"id": 23,
	"questionText": "In KNN regression, how is the predicted value calculated?",
	"options": [
	"Using linear regression on neighbors",
	"Using gradient descent",
	"Majority vote of nearest neighbors",
	"Average of nearest neighbors’ values"
	],
	"correctAnswerIndex": 3,
	"explanation": "KNN regression predicts by taking the mean (or sometimes weighted mean) of the k nearest neighbors' values."
	},
	{
	"id": 24,
	"questionText": "Which of the following is true about KNN decision boundary?",
	"options": [
	"Always axis-aligned",
	"Always linear",
	"Depends on data distribution",
	"Always circular"
	],
	"correctAnswerIndex": 2,
	"explanation": "KNN decision boundaries can be irregular and follow the shape of data; they are not restricted to linear forms."
	},
	{
	"id": 25,
	"questionText": "Which method can improve KNN on high-dimensional data?",
	"options": [
	"Increasing k to dataset size",
	"Feature selection",
	"Ignoring normalization",
	"Adding more neighbors"
	],
	"correctAnswerIndex": 1,
	"explanation": "Selecting relevant features reduces dimensionality, improving distance calculation reliability."
	},
	{
	"id": 26,
	"questionText": "KNN cannot handle which of the following natively?",
	"options": [
	"Large datasets efficiently",
	"Numeric features",
	"Categorical features",
	"Missing data directly"
	],
	"correctAnswerIndex": 3,
	"explanation": "KNN cannot handle missing values without preprocessing (imputation or removal)."
	},
	{
	"id": 27,
	"questionText": "How does KNN handle ties in classification voting?",
	"options": [
	"Chooses randomly among tied classes",
	"Fails with an error",
	"Chooses the closest neighbor's class",
	"Always chooses class 0"
	],
	"correctAnswerIndex": 2,
	"explanation": "Many implementations break ties by selecting the class of the closest neighbor among the tied classes."
	},
	{
	"id": 28,
	"questionText": "Which scenario would make KNN less suitable?",
	"options": [
	"Low-dimensional small datasets",
	"High-dimensional large datasets",
	"Well-separated clusters",
	"Binary classification"
	],
	"correctAnswerIndex": 1,
	"explanation": "In high-dimensional large datasets, KNN is slow and distances lose meaning, reducing accuracy."
	},
	{
	"id": 29,
	"questionText": "What is the time complexity of a naive KNN prediction with n training points?",
	"options": [
	"O(n^2)",
	"O(1)",
	"O(log n)",
	"O(n)"
	],
	"correctAnswerIndex": 3,
	"explanation": "Naive KNN computes distances to all n points for each prediction, giving O(n) complexity."
	},
	{
	"id": 30,
	"questionText": "What preprocessing step can improve KNN accuracy?",
	"options": [
	"Adding irrelevant features",
	"Removing the dependent variable",
	"Randomly shuffling the data",
	"Scaling features to similar range"
	],
	"correctAnswerIndex": 3,
	"explanation": "Scaling features ensures fair distance computation, preventing one feature from dominating due to larger numeric range."
	},
	{
	"id": 31,
	"questionText": "What is the effect of increasing 'k' in KNN classification?",
	"options": [
	"Decreases bias",
	"Reduces overfitting",
	"Increases sensitivity to noise",
	"Increases model variance"
	],
	"correctAnswerIndex": 1,
	"explanation": "A larger k smooths out predictions, reducing overfitting and variance but increasing bias."
	},
	{
	"id": 32,
	"questionText": "Which distance metric can be more robust to outliers in KNN?",
	"options": [
	"Cosine similarity",
	"Minkowski distance",
	"Manhattan distance",
	"Euclidean distance"
	],
	"correctAnswerIndex": 2,
	"explanation": "Manhattan distance is less sensitive to large deviations in individual features than Euclidean distance."
	},
	{
	"id": 33,
	"questionText": "How can KNN be modified for imbalanced datasets?",
	"options": [
	"Use weighted voting based on distance",
	"Increase k to dataset size",
	"Normalize features only",
	"Remove minority class samples"
	],
	"correctAnswerIndex": 0,
	"explanation": "Weighted voting gives closer neighbors more influence, reducing bias toward the majority class."
	},
	{
	"id": 34,
	"questionText": "Which method can reduce KNN prediction time for large datasets?",
	"options": [
	"Dimensionality reduction like PCA",
	"Using random shuffling",
	"Increasing k",
	"Adding more features"
	],
	"correctAnswerIndex": 0,
	"explanation": "Reducing the number of features with PCA lowers dimensionality, which speeds up distance computation."
	},
	{
	"id": 35,
	"questionText": "Why might KNN fail in very high-dimensional spaces?",
	"options": [
	"Overfitting to majority class",
	"Random initialization",
	"Learning rate too high",
	"Curse of dimensionality"
	],
	"correctAnswerIndex": 3,
	"explanation": "In high dimensions, points become equidistant and neighbors are less meaningful, reducing accuracy."
	},
	{
	"id": 36,
	"questionText": "What does weighted KNN regression use instead of simple averaging?",
	"options": [
	"Distance-based weighting of neighbors",
	"Median value of neighbors",
	"Majority vote of neighbors",
	"Random selection of neighbors"
	],
	"correctAnswerIndex": 0,
	"explanation": "Weighted KNN regression assigns higher weights to closer neighbors when computing the predicted value."
	},
	{
	"id": 37,
	"questionText": "Which technique is useful to handle categorical and numeric features together in KNN?",
	"options": [
	"Ignore numeric features",
	"Convert categorical to numeric with one-hot encoding",
	"Normalize categorical features only",
	"Use majority voting only"
	],
	"correctAnswerIndex": 1,
	"explanation": "One-hot encoding transforms categorical features to numeric so that distance metrics can be applied."
	},
	{
	"id": 38,
	"questionText": "In KNN, what is the effect of noisy features?",
	"options": [
	"Does not affect performance",
	"Automatically removed",
	"Reduces accuracy",
	"Improves accuracy"
	],
	"correctAnswerIndex": 2,
	"explanation": "Noisy features distort distance calculations, reducing prediction accuracy."
	},
	{
	"id": 39,
	"questionText": "Which of the following can help KNN generalize better?",
	"options": [
	"Adding more irrelevant features",
	"Reducing k to 1",
	"Feature scaling and selection",
	"Increasing dataset size without preprocessing"
	],
	"correctAnswerIndex": 2,
	"explanation": "Scaling ensures fair distance comparison, and selecting relevant features removes noise, improving generalization."
	},
	{
	"id": 40,
	"questionText": "What happens if k is even and there is a tie in classification?",
	"options": [
	"Prediction fails with error",
	"Tie-breaking strategy is needed",
	"Algorithm automatically increments k",
	"Randomly ignores the new point"
	],
	"correctAnswerIndex": 1,
	"explanation": "When k is even, ties may occur; most implementations have a tie-breaking rule like choosing the closest neighbor."
	},
	{
	"id": 41,
	"questionText": "Which preprocessing step can improve KNN on text data represented by TF-IDF vectors?",
	"options": [
	"L2 normalization",
	"Random shuffling",
	"Adding more terms",
	"Stop-word removal only"
	],
	"correctAnswerIndex": 0,
	"explanation": "L2 normalization ensures vectors are comparable in distance calculations for KNN."
	},
	{
	"id": 42,
	"questionText": "Which of the following affects KNN accuracy most in practice?",
	"options": [
	"Learning rate",
	"Random seed only",
	"Distance metric and k",
	"Number of trees"
	],
	"correctAnswerIndex": 2,
	"explanation": "Choice of k and distance metric strongly influence KNN performance."
	},
	{
	"id": 43,
	"questionText": "In KNN regression, how can you reduce the impact of outliers?",
	"options": [
	"Use simple mean without weighting",
	"Increase k to dataset size",
	"Ignore preprocessing",
	"Use weighted averaging based on distance"
	],
	"correctAnswerIndex": 3,
	"explanation": "Weighting closer neighbors more heavily reduces the effect of distant outliers."
	},
	{
	"id": 44,
	"questionText": "Which approach can make KNN faster on large datasets?",
	"options": [
	"Increase k to max",
	"Add random noise to data",
	"KD-Tree, Ball-Tree, or approximate nearest neighbor search",
	"Use high-dimensional features"
	],
	"correctAnswerIndex": 2,
	"explanation": "Tree-based or approximate search structures reduce distance computations needed for prediction."
	},
	{
	"id": 45,
	"questionText": "How does KNN handle multi-class classification?",
	"options": [
	"By majority vote among neighbors",
	"Cannot handle multi-class",
	"By training separate binary classifiers",
	"Only predicts top two classes"
	],
	"correctAnswerIndex": 0,
	"explanation": "KNN counts votes among k neighbors for all classes and selects the class with the highest votes."
	},
	{
	"id": 46,
	"questionText": "Which distance metric is suitable for high-dimensional sparse data?",
	"options": [
	"Manhattan distance",
	"Euclidean distance",
	"Cosine similarity",
	"Hamming distance"
	],
	"correctAnswerIndex": 2,
	"explanation": "Cosine similarity works better for high-dimensional sparse vectors like TF-IDF representations."
	},
	{
	"id": 47,
	"questionText": "What happens to KNN performance if features are not scaled?",
	"options": [
	"Dominated by features with larger scales",
	"Performance improves automatically",
	"Distance calculation is unaffected",
	"Accuracy remains same always"
	],
	"correctAnswerIndex": 0,
	"explanation": "Features with larger numeric ranges dominate distance computation, skewing predictions."
	},
	{
	"id": 48,
	"questionText": "How can KNN be adapted for regression with categorical features?",
	"options": [
	"Encode categories numerically or use mixed distance metric",
	"Use Euclidean distance directly",
	"Remove categorical features",
	"Only predict the most frequent category"
	],
	"correctAnswerIndex": 0,
	"explanation": "Encoding categorical features allows KNN to compute distances effectively for regression tasks."
	},
	{
	"id": 49,
	"questionText": "What is one common method to select an optimal k?",
	"options": [
	"Maximizing feature count",
	"Using k=1 always",
	"Random selection",
	"Cross-validation"
	],
	"correctAnswerIndex": 3,
	"explanation": "Cross-validation evaluates different k values to choose the one yielding best performance."
	},
	{
	"id": 50,
	"questionText": "Which factor can lead to overfitting in KNN?",
	"options": [
	"Using fewer neighbors",
	"Too small k value",
	"Scaling features",
	"Using weighted distance"
	],
	"correctAnswerIndex": 1,
	"explanation": "A very small k (like k=1) can fit to noise and outliers, causing overfitting."
	},
	{
	"id": 51,
	"questionText": "In KNN, what is an advantage of using odd k values in binary classification?",
	"options": [
	"Avoid ties in voting",
	"Reduce distance calculations",
	"Increase speed",
	"Improve scaling automatically"
	],
	"correctAnswerIndex": 0,
	"explanation": "Odd k values help prevent ties between two classes."
	},
	{
	"id": 52,
	"questionText": "Which type of feature transformation is recommended for KNN?",
	"options": [
	"Adding irrelevant features",
	"One-hot encoding only for numeric data",
	"Normalization or standardization",
	"Random shuffling of features"
	],
	"correctAnswerIndex": 2,
	"explanation": "Normalization ensures fair contribution of each feature to distance calculation."
	},
	{
	"id": 53,
	"questionText": "Which of the following reduces KNN sensitivity to outliers?",
	"options": [
	"Increase k to 1",
	"Use Euclidean distance only",
	"Remove normalization",
	"Weighted distance averaging"
	],
	"correctAnswerIndex": 3,
	"explanation": "Weighting neighbors based on distance gives closer points more influence, reducing outlier impact."
	},
	{
	"id": 54,
	"questionText": "In KNN, what is the effect of adding irrelevant features?",
	"options": [
	"Automatically removed",
	"Decreases accuracy",
	"Increases accuracy",
	"No effect"
	],
	"correctAnswerIndex": 1,
	"explanation": "Irrelevant features distort distance calculations, reducing prediction accuracy."
	},
	{
	"id": 55,
	"questionText": "Which method can improve KNN performance in sparse datasets?",
	"options": [
	"Ignore distance weighting",
	"Add noise to features",
	"Dimensionality reduction",
	"Increase k to dataset size"
	],
	"correctAnswerIndex": 2,
	"explanation": "Reducing dimensionality can make distance computations more meaningful in sparse datasets."
	},
	{
	"id": 56,
	"questionText": "Which approach helps handle large-scale KNN efficiently?",
	"options": [
	"Increasing k arbitrarily",
	"Scaling only",
	"Approximate nearest neighbor search",
	"Random shuffling"
	],
	"correctAnswerIndex": 2,
	"explanation": "Approximate nearest neighbor search reduces computational cost while giving nearly correct neighbors."
	},
	{
	"id": 57,
	"questionText": "Which of the following is true for KNN regression prediction?",
	"options": [
	"Weighted average based on neighbor distance",
	"Average of nearest neighbors’ values",
	"None of the above",
	"Both A and B"
	],
	"correctAnswerIndex": 3,
	"explanation": "KNN regression can use simple or weighted averaging of neighbors’ values."
	},
	{
	"id": 58,
	"questionText": "Which is a practical drawback of KNN in real-world systems?",
	"options": [
	"Requires model training",
	"High prediction latency",
	"Automatically ignores irrelevant features",
	"Cannot handle numeric data"
	],
	"correctAnswerIndex": 1,
	"explanation": "KNN computes distances at prediction time, leading to high latency for large datasets."
	},
	{
	"id": 59,
	"questionText": "Which type of scaling preserves relative distances between points for KNN?",
	"options": [
	"Min-Max scaling",
	"Log transformation only",
	"Adding random noise",
	"Shuffling features"
	],
	"correctAnswerIndex": 0,
	"explanation": "Min-Max or standardization scales features to similar ranges while preserving relative distances."
	},
	{
	"id": 60,
	"questionText": "Which is a disadvantage of KNN compared to parametric models?",
	"options": [
	"Requires fixed training",
	"Slower predictions for large datasets",
	"Cannot model non-linear boundaries",
	"Sensitive to overfitting only"
	],
	"correctAnswerIndex": 1,
	"explanation": "KNN stores all training data and computes distances, making predictions slower than parametric models."
	},
	{
	"id": 61,
	"questionText": "How can KNN handle multi-label classification?",
	"options": [
	"Uses separate KNN per label",
	"Cannot handle multi-label",
	"Predict all labels present in neighbors",
	"Only predicts one label"
	],
	"correctAnswerIndex": 2,
	"explanation": "KNN can aggregate labels from neighbors and predict multiple labels per instance."
	},
	{
	"id": 62,
	"questionText": "Which distance metric can handle mixed numeric and categorical data?",
	"options": [
	"Gower distance",
	"Euclidean distance",
	"Cosine similarity",
	"Manhattan distance"
	],
	"correctAnswerIndex": 0,
	"explanation": "Gower distance can compute similarity for mixed numeric and categorical features."
	},
	{
	"id": 63,
	"questionText": "What is one way to reduce memory usage in KNN for large datasets?",
	"options": [
	"Use condensed nearest neighbor algorithms",
	"Ignore irrelevant features",
	"Increase k to dataset size",
	"Normalize only"
	],
	"correctAnswerIndex": 0,
	"explanation": "Condensed nearest neighbor algorithms reduce stored points while maintaining accuracy."
	},
	{
	"id": 64,
	"questionText": "Which approach helps improve KNN in imbalanced datasets?",
	"options": [
	"Increase irrelevant features",
	"Use k=1 always",
	"Distance-weighted voting",
	"Ignore normalization"
	],
	"correctAnswerIndex": 2,
	"explanation": "Weighted voting gives closer points more influence, reducing bias toward majority class."
	},
	{
	"id": 65,
	"questionText": "What is the effect of increasing feature dimensionality in KNN?",
	"options": [
	"Computation decreases",
	"Feature importance is automatically computed",
	"Accuracy always improves",
	"Distances become less meaningful"
	],
	"correctAnswerIndex": 3,
	"explanation": "High-dimensional spaces make points almost equidistant, reducing KNN effectiveness."
	},
	{
	"id": 66,
	"questionText": "Which scenario can cause KNN to misclassify a data point?",
	"options": [
	"Choosing odd k",
	"Using weighted voting",
	"Nearby points from other class dominate",
	"Normalization applied"
	],
	"correctAnswerIndex": 2,
	"explanation": "If neighbors are closer from other classes, KNN may predict incorrectly."
	},
	{
	"id": 67,
	"questionText": "Which strategy can improve KNN with very sparse datasets?",
	"options": [
	"Add random features",
	"Ignore distance metric",
	"Dimensionality reduction",
	"Increase k arbitrarily"
	],
	"correctAnswerIndex": 2,
	"explanation": "Reducing dimensionality reduces sparsity and makes distances meaningful."
	},
	{
	"id": 68,
	"questionText": "What is a good rule of thumb for selecting k?",
	"options": [
	"k = 1 always",
	"k = n/2",
	"k = sqrt(n)",
	"k = number of features"
	],
	"correctAnswerIndex": 2,
	"explanation": "Using k = sqrt(n) balances bias and variance in most cases."
	},
	{
	"id": 69,
	"questionText": "Which technique can speed up KNN predictions in high dimensions?",
	"options": [
	"Approximate nearest neighbor algorithms",
	"Normalize only",
	"Random shuffling",
	"Increase k to max"
	],
	"correctAnswerIndex": 0,
	"explanation": "Approximate nearest neighbor search reduces computation while maintaining accuracy."
	},
	{
	"id": 70,
	"questionText": "Which type of data preprocessing improves KNN performance?",
	"options": [
	"Random shuffling only",
	"Ignoring categorical features",
	"Adding irrelevant features",
	"Feature scaling and selection"
	],
	"correctAnswerIndex": 3,
	"explanation": "Scaling ensures fair distance measurement, and selecting relevant features removes noise, improving predictions."
	},
	{
	"id": 71,
	"questionText": "In a recommendation system using KNN, what could cause poor predictions?",
	"options": [
	"High number of neighbors",
	"Sparse user-item interaction data",
	"Low-dimensional features",
	"Normalized data"
	],
	"correctAnswerIndex": 1,
	"explanation": "Sparse interaction matrices reduce neighbor similarity reliability, causing poor recommendations."
	},
	{
	"id": 72,
	"questionText": "Which approach is suitable for reducing KNN latency in a real-time system?",
	"options": [
	"Randomly select features",
	"Increase k to dataset size",
	"Normalize data only",
	"Approximate nearest neighbor search"
	],
	"correctAnswerIndex": 3,
	"explanation": "Approximate nearest neighbor algorithms provide fast predictions with minimal accuracy loss."
	},
	{
	"id": 73,
	"questionText": "In high-dimensional gene expression data, KNN performance drops because:",
	"options": [
	"Normalization causes data loss",
	"KNN overfits easily with large k",
	"Distances become less informative (curse of dimensionality)",
	"Minority classes dominate"
	],
	"correctAnswerIndex": 2,
	"explanation": "High-dimensional data makes points nearly equidistant, reducing neighbor relevance and accuracy."
	},
	{
	"id": 74,
	"questionText": "Scenario: A new customer profile is very different from existing customers. Which issue might KNN face?",
	"options": [
	"Predicted class may be inaccurate due to no similar neighbors",
	"KNN will automatically ignore the profile",
	"Model overfits automatically",
	"KNN will generate a new class"
	],
	"correctAnswerIndex": 0,
	"explanation": "If no close neighbors exist, KNN cannot provide reliable predictions."
	},
	{
	"id": 75,
	"questionText": "What is the main challenge of KNN when deployed in high-frequency trading?",
	"options": [
	"Weighted voting is not supported",
	"Overfitting to training set",
	"Distance metric fails for numeric data",
	"High prediction latency due to large datasets"
	],
	"correctAnswerIndex": 3,
	"explanation": "KNN requires computing distances to all stored points, making it too slow for real-time predictions in trading."
	},
	{
	"id": 76,
	"questionText": "Scenario: Two classes are very close in feature space but overlapping. Which KNN behavior is expected?",
	"options": [
	"KNN ignores overlapping points",
	"Higher misclassification rate",
	"KNN increases k automatically",
	"Predictions are perfect"
	],
	"correctAnswerIndex": 1,
	"explanation": "KNN struggles with overlapping classes as neighbors from the wrong class may dominate."
	},
	{
	"id": 77,
	"questionText": "Which method improves KNN performance for very high-dimensional image embeddings?",
	"options": [
	"Use raw pixel values directly",
	"Dimensionality reduction (PCA, t-SNE, or UMAP)",
	"Increase k to max",
	"Randomly shuffle features"
	],
	"correctAnswerIndex": 1,
	"explanation": "Reducing dimensions retains essential information and makes distances meaningful."
	},
	{
	"id": 78,
	"questionText": "Scenario: A fraud detection system uses KNN. New types of fraud appear. What is the limitation?",
	"options": [
	"KNN cannot detect unseen patterns without similar neighbors",
	"Prediction latency decreases",
	"KNN automatically adapts",
	"Accuracy improves with noise"
	],
	"correctAnswerIndex": 0,
	"explanation": "KNN relies on similarity to existing points, so unseen patterns are difficult to detect."
	},
	{
	"id": 79,
	"questionText": "What is a limitation of KNN in large-scale recommendation systems?",
	"options": [
	"Cannot handle numeric data",
	"Fails on binary features",
	"Overfits automatically",
	"Memory and computation intensive"
	],
	"correctAnswerIndex": 3,
	"explanation": "Storing all user-item interactions and computing distances is memory and CPU intensive."
	},
	{
	"id": 80,
	"questionText": "Which approach is suitable for speeding up KNN with millions of samples?",
	"options": [
	"Use weighted voting only",
	"Increase k to n",
	"Use approximate nearest neighbor libraries like FAISS or Annoy",
	"Normalize features only"
	],
	"correctAnswerIndex": 2,
	"explanation": "Approximate search libraries significantly reduce computation while maintaining near-optimal accuracy."
	},
	{
	"id": 81,
	"questionText": "Scenario: In KNN, a feature has a huge numeric range. What problem arises?",
	"options": [
	"Feature dominates distance, biasing prediction",
	"Weighted voting fails",
	"Prediction latency reduces",
	"Feature is ignored automatically"
	],
	"correctAnswerIndex": 0,
	"explanation": "Large-scale features dominate distance computation, skewing predictions unless scaled."
	},
	{
	"id": 82,
	"questionText": "What is a strategy to handle missing values in KNN?",
	"options": [
	"Impute missing values before computing distances",
	"Increase k to handle missing",
	"Ignore missing values automatically",
	"Remove all neighbors with missing values"
	],
	"correctAnswerIndex": 0,
	"explanation": "Missing values should be imputed (mean, median, or mode) to allow proper distance computation."
	},
	{
	"id": 83,
	"questionText": "Scenario: In medical diagnosis using KNN, rare disease cases are underrepresented. Which is a solution?",
	"options": [
	"Use raw unscaled features",
	"Ignore minority class",
	"Weighted voting or synthetic oversampling (SMOTE)",
	"Reduce k to 1"
	],
	"correctAnswerIndex": 2,
	"explanation": "Weighted voting or synthetic oversampling addresses imbalance and improves prediction of rare cases."
	},
	{
	"id": 84,
	"questionText": "Which technique reduces distance computation in high-dimensional KNN?",
	"options": [
	"Random shuffling",
	"Adding irrelevant features",
	"Dimensionality reduction",
	"Increasing k to n"
	],
	"correctAnswerIndex": 2,
	"explanation": "Reducing dimensions reduces number of calculations and improves neighbor relevance."
	},
	{
	"id": 85,
	"questionText": "Scenario: In KNN regression, a few extreme neighbor values exist. What is the impact?",
	"options": [
	"Prediction unaffected",
	"Predicted value may be skewed unless weighted",
	"Accuracy improves automatically",
	"KNN fails completely"
	],
	"correctAnswerIndex": 1,
	"explanation": "Outliers can bias the predicted mean; using weighted averaging mitigates this effect."
	},
	{
	"id": 86,
	"questionText": "What is a benefit of KD-Tree in KNN?",
	"options": [
	"Reduces neighbor search complexity in low dimensions",
	"Reduces bias of model",
	"Automatically scales features",
	"Increases training time significantly"
	],
	"correctAnswerIndex": 0,
	"explanation": "KD-Tree allows efficient nearest neighbor search in low to moderate dimensions."
	},
	{
	"id": 87,
	"questionText": "Scenario: KNN is applied on time-series data without preprocessing. What is a potential problem?",
	"options": [
	"Outliers are automatically removed",
	"Accuracy automatically improves",
	"Distance metric ignores temporal order",
	"KNN predicts trends perfectly"
	],
	"correctAnswerIndex": 2,
	"explanation": "KNN does not account for temporal order; raw time-series may not capture pattern similarity properly."
	},
	{
	"id": 88,
	"questionText": "Which scenario illustrates KNN's limitation?",
	"options": [
	"Balanced, low-dimensional data",
	"Normalized dataset",
	"A new point far from all existing points",
	"Few noise-free features"
	],
	"correctAnswerIndex": 2,
	"explanation": "When a point is far from all neighbors, KNN cannot predict reliably."
	},
	{
	"id": 89,
	"questionText": "Scenario: KNN used for text document classification with TF-IDF vectors. Which step is crucial?",
	"options": [
	"Increase k to dataset size",
	"Adding irrelevant terms",
	"Ignore vector scaling",
	"L2 normalization to make distances comparable"
	],
	"correctAnswerIndex": 3,
	"explanation": "TF-IDF vectors should be normalized to ensure fair distance computation."
	},
	{
	"id": 90,
	"questionText": "Scenario: KNN struggles with overlapping clusters in feature space. What is a solution?",
	"options": [
	"Use feature engineering to separate clusters",
	"Ignore scaling",
	"Increase k arbitrarily",
	"Remove minority points"
	],
	"correctAnswerIndex": 0,
	"explanation": "Engineering features that better separate classes improves KNN accuracy."
	},
	{
	"id": 91,
	"questionText": "Which approach can improve KNN in very large datasets without losing much accuracy?",
	"options": [
	"Use approximate nearest neighbor search",
	"Increase k to dataset size",
	"Ignore preprocessing",
	"Add random noise"
	],
	"correctAnswerIndex": 0,
	"explanation": "Approximate search reduces computation while keeping predictions close to exact KNN."
	},
	{
	"id": 92,
	"questionText": "Scenario: Online KNN requires predictions every second. Challenge?",
	"options": [
	"Cannot handle numeric data",
	"KNN scales features automatically",
	"High latency due to full distance computation",
	"Overfitting automatically"
	],
	"correctAnswerIndex": 2,
	"explanation": "Real-time prediction is slow because KNN computes distance to all points at query time."
	},
	{
	"id": 93,
	"questionText": "Scenario: Multi-class KNN with imbalanced classes. What can improve fairness?",
	"options": [
	"Use k=1 always",
	"Random shuffling",
	"Distance-weighted voting",
	"Ignore minority classes"
	],
	"correctAnswerIndex": 2,
	"explanation": "Weighted voting ensures closer neighbors have more influence, improving minority class predictions."
	},
	{
	"id": 94,
	"questionText": "Scenario: A KNN model is deployed for anomaly detection. Limitation?",
	"options": [
	"Rare anomalies may have no close neighbors",
	"Weighted KNN solves all issues",
	"Accuracy improves automatically",
	"Feature scaling is irrelevant"
	],
	"correctAnswerIndex": 0,
	"explanation": "If anomalies are isolated, KNN cannot detect them due to lack of nearby points."
	},
	{
	"id": 95,
	"questionText": "Scenario: In high-dimensional image retrieval, KNN prediction is slow. Solution?",
	"options": [
	"Use raw pixel vectors",
	"Use approximate nearest neighbor algorithms like FAISS",
	"Increase k arbitrarily",
	"Ignore normalization"
	],
	"correctAnswerIndex": 1,
	"explanation": "Approximate algorithms reduce computation significantly while maintaining retrieval quality."
	},
	{
	"id": 96,
	"questionText": "Which scenario can lead to KNN overfitting?",
	"options": [
	"Very small k and noisy data",
	"Large k with clean data",
	"Normalized features",
	"Weighted voting"
	],
	"correctAnswerIndex": 0,
	"explanation": "Small k may fit noise and outliers, causing overfitting."
	},
	{
	"id": 97,
	"questionText": "Scenario: KNN regression for house prices with outlier houses. Best approach?",
	"options": [
	"Increase k arbitrarily",
	"Remove scaling",
	"Simple mean ignoring distances",
	"Weighted averaging by distance"
	],
	"correctAnswerIndex": 3,
	"explanation": "Weighted averaging reduces outlier impact, giving closer neighbors more influence."
	},
	{
	"id": 98,
	"questionText": "Scenario: KNN applied to large sparse matrix of user ratings. Challenge?",
	"options": [
	"Distance metric fails",
	"High memory usage and computation",
	"Overfitting automatically",
	"Minority class ignored"
	],
	"correctAnswerIndex": 1,
	"explanation": "Sparse matrices require storing many zeros and computing many distances, which is expensive."
	},
	{
	"id": 99,
	"questionText": "Scenario: Real-time KNN requires prediction in milliseconds. Solution?",
	"options": [
	"Use weighted voting only",
	"Use approximate nearest neighbor search",
	"Increase k to n",
	"Ignore feature scaling"
	],
	"correctAnswerIndex": 1,
	"explanation": "Approximate methods like Annoy or FAISS significantly speed up prediction for large datasets."
	},
	{
	"id": 100,
	"questionText": "Scenario: High-dimensional text KNN classification. Which step is crucial?",
	"options": [
	"Ignore scaling",
	"Dimensionality reduction or normalization",
	"Add random features",
	"Use raw text vectors"
	],
	"correctAnswerIndex": 1,
	"explanation": "High-dimensional text vectors suffer from the curse of dimensionality; normalization (like L2) or dimensionality reduction is needed to make distances meaningful."
	}
	]
	}