Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 13 additions & 9 deletions fe/fe-core/src/main/java/org/apache/doris/system/Diagnoser.java
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import org.apache.doris.catalog.Tablet;
import org.apache.doris.catalog.TabletInvertedIndex;
import org.apache.doris.catalog.TabletMeta;
import org.apache.doris.common.Config;

import com.google.common.collect.Lists;
import org.json.simple.JSONObject;
Expand Down Expand Up @@ -114,6 +115,7 @@ public static List<List<String>> diagnoseTablet(long tabletId) {
StringBuilder versionErr = new StringBuilder();
StringBuilder statusErr = new StringBuilder();
StringBuilder compactionErr = new StringBuilder();
boolean isCloudMode = Config.isCloudMode();
// for local mode, getCachedVisibleVersion return visibleVersion.
// for cloud mode, the replica version is not updated.
long visibleVersion = partition.getCachedVisibleVersion();
Expand Down Expand Up @@ -143,20 +145,22 @@ public static List<List<String>> diagnoseTablet(long tabletId) {
+ replica.getBackendIdWithoutException() + " is not query available. ");
break;
}
if (be.diskExceedLimit()) {
if (!isCloudMode && be.diskExceedLimit()) {
backendErr.append("Backend " + replica.getBackendIdWithoutException() + " has no space left. ");
break;
}
} while (false);
// version
if (replica.getVersion() != visibleVersion) {
versionErr.append("Replica on backend " + replica.getBackendIdWithoutException() + "'s version ("
+ replica.getVersion() + ") does not equal"
+ " to partition visible version (" + visibleVersion + ")");
} else if (replica.getLastFailedVersion() != -1) {
versionErr.append("Replica on backend "
+ replica.getBackendIdWithoutException() + "'s last failed version is "
+ replica.getLastFailedVersion());
if (!isCloudMode) {
if (replica.getVersion() != visibleVersion) {
versionErr.append("Replica on backend " + replica.getBackendIdWithoutException() + "'s version ("
+ replica.getVersion() + ") does not equal"
+ " to partition visible version (" + visibleVersion + ")");
} else if (replica.getLastFailedVersion() != -1) {
versionErr.append("Replica on backend "
+ replica.getBackendIdWithoutException() + "'s last failed version is "
+ replica.getLastFailedVersion());
}
}
// status
if (!replica.isAlive() || replica.isUserDrop()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,14 @@

package org.apache.doris.clone;

import org.apache.doris.catalog.Database;
import org.apache.doris.catalog.DiskInfo;
import org.apache.doris.catalog.Env;
import org.apache.doris.catalog.MaterializedIndex;
import org.apache.doris.catalog.OlapTable;
import org.apache.doris.catalog.Partition;
import org.apache.doris.catalog.Replica;
import org.apache.doris.catalog.Tablet;
import org.apache.doris.catalog.TabletInvertedIndex;
import org.apache.doris.common.Config;
import org.apache.doris.common.ExceptionChecker;
Expand Down Expand Up @@ -162,6 +167,98 @@ private static void updateReplicaVersionCount() {
Assert.assertTrue(result.get(11).get(1).contains("version count is too high"));
}

private static String getDiagnosisInfo(List<List<String>> rows, String item) {
for (List<String> row : rows) {
if (item.equals(row.get(0))) {
return row.get(1);
}
}
return "";
}

private static Map<String, TDisk> copyBackendDisks(Backend backend) {
Map<String, TDisk> disks = Maps.newHashMap();
for (DiskInfo diskInfo : backend.getDisks().values()) {
TDisk tDisk = new TDisk();
tDisk.setRootPath(diskInfo.getRootPath());
tDisk.setDiskTotalCapacity(diskInfo.getTotalCapacityB());
tDisk.setDataUsedCapacity(diskInfo.getDataUsedCapacityB());
tDisk.setTrashUsedCapacity(diskInfo.getTrashUsedCapacityB());
tDisk.setDiskAvailableCapacity(diskInfo.getAvailableCapacityB());
tDisk.setUsed(diskInfo.getState() == DiskInfo.DiskState.ONLINE);
tDisk.setPathHash(diskInfo.getPathHash());
tDisk.setStorageMedium(diskInfo.getStorageMedium());
disks.put(tDisk.getRootPath(), tDisk);
}
return disks;
}

private static Map<String, TDisk> buildExceedLimitDisks(Backend backend) {
Map<String, TDisk> disks = Maps.newHashMap();
for (DiskInfo diskInfo : backend.getDisks().values()) {
TDisk tDisk = new TDisk();
tDisk.setRootPath(diskInfo.getRootPath());
tDisk.setDiskTotalCapacity(1L);
tDisk.setDataUsedCapacity(1L);
tDisk.setTrashUsedCapacity(0L);
tDisk.setDiskAvailableCapacity(0L);
tDisk.setUsed(true);
tDisk.setPathHash(diskInfo.getPathHash());
tDisk.setStorageMedium(diskInfo.getStorageMedium());
disks.put(tDisk.getRootPath(), tDisk);
}
return disks;
}

@Test
public void testDiagnoseTabletCloudModeSkipDiskAndVersionCheck() throws Exception {
String tableName = "tbl_diag_cloud_" + Math.abs(random.nextInt());
String createStr = "create table test." + tableName + "\n"
+ "(k1 date, k2 int)\n"
+ "distributed by hash(k2) buckets 1\n"
+ "properties\n"
+ "(\n"
+ " \"replication_num\" = \"3\"\n"
+ ")";
ExceptionChecker.expectThrowsNoException(() -> createTable(createStr));

Database db = Env.getCurrentInternalCatalog().getDbNullable("test");
Assert.assertNotNull(db);
OlapTable table = (OlapTable) db.getTableNullable(tableName);
Assert.assertNotNull(table);
Partition partition = table.getAllPartitions().iterator().next();
MaterializedIndex index = partition.getBaseIndex();
Tablet tablet = index.getTablets().get(0);
Replica replica = tablet.getReplicas().get(0);
long tabletId = tablet.getId();
long visibleVersion = partition.getCachedVisibleVersion();
Backend backend = Env.getCurrentSystemInfo().getBackend(replica.getBackendIdWithoutException());
Assert.assertNotNull(backend);

Map<String, TDisk> originalDisks = copyBackendDisks(backend);
String originCloudUniqueId = Config.cloud_unique_id;
long originalVersion = replica.getVersion();

try {
backend.updateDisks(buildExceedLimitDisks(backend));
long mismatchVersion = visibleVersion == Long.MAX_VALUE ? visibleVersion - 1 : visibleVersion + 1;
replica.adminUpdateVersionInfo(mismatchVersion, null, null, System.currentTimeMillis());

List<List<String>> localResult = Diagnoser.diagnoseTablet(tabletId);
Assert.assertTrue(getDiagnosisInfo(localResult, "ReplicaBackendStatus").contains("has no space left"));
Assert.assertTrue(getDiagnosisInfo(localResult, "ReplicaVersionStatus").contains("does not equal"));

Config.cloud_unique_id = "diagnose-tablet-cloud-mode-ut";
List<List<String>> cloudResult = Diagnoser.diagnoseTablet(tabletId);
Assert.assertEquals("OK", getDiagnosisInfo(cloudResult, "ReplicaBackendStatus"));
Assert.assertEquals("OK", getDiagnosisInfo(cloudResult, "ReplicaVersionStatus"));
} finally {
Config.cloud_unique_id = originCloudUniqueId;
backend.updateDisks(originalDisks);
replica.adminUpdateVersionInfo(originalVersion, null, null, System.currentTimeMillis());
}
}

@Test
public void test() throws Exception {
// test colocate tablet repair
Expand Down
Loading